From 48c5e1a5fe0ba2175c8d6eaac07dd9bee9ee8c09 Mon Sep 17 00:00:00 2001
From: Koushik Dutta <koushd@gmail.com>
Date: Sat, 28 Dec 2024 14:24:12 -0800
Subject: [PATCH] tensorflow-lite: quantization cleanups

---
 plugins/tensorflow-lite/package-lock.json     |  78 ++++++-----
 .../tensorflow-lite/src/tflite/__init__.py    | 126 ++++++++++--------
 .../src/tflite/yolo_separate_outputs.py       |   8 +-
 3 files changed, 116 insertions(+), 96 deletions(-)

diff --git a/plugins/tensorflow-lite/package-lock.json b/plugins/tensorflow-lite/package-lock.json
index 0c8f617ea..d0baebf52 100644
--- a/plugins/tensorflow-lite/package-lock.json
+++ b/plugins/tensorflow-lite/package-lock.json
@@ -13,39 +13,44 @@
       },
       "../../sdk": {
          "name": "@scrypted/sdk",
-         "version": "0.2.39",
+         "version": "0.3.102",
          "dev": true,
          "license": "ISC",
          "dependencies": {
-            "@babel/preset-typescript": "^7.16.7",
-            "adm-zip": "^0.4.13",
-            "axios": "^0.21.4",
-            "babel-loader": "^8.2.3",
-            "babel-plugin-const-enum": "^1.1.0",
-            "esbuild": "^0.15.9",
+            "@babel/preset-typescript": "^7.26.0",
+            "@rollup/plugin-commonjs": "^28.0.1",
+            "@rollup/plugin-json": "^6.1.0",
+            "@rollup/plugin-node-resolve": "^15.3.0",
+            "@rollup/plugin-typescript": "^12.1.1",
+            "@rollup/plugin-virtual": "^3.0.2",
+            "adm-zip": "^0.5.16",
+            "axios": "^1.7.8",
+            "babel-loader": "^9.2.1",
+            "babel-plugin-const-enum": "^1.2.0",
             "ncp": "^2.0.0",
             "raw-loader": "^4.0.2",
-            "rimraf": "^3.0.2",
-            "tmp": "^0.2.1",
-            "typescript": "^4.9.3",
-            "webpack": "^5.74.0",
-            "webpack-bundle-analyzer": "^4.5.0"
+            "rimraf": "^6.0.1",
+            "rollup": "^4.27.4",
+            "tmp": "^0.2.3",
+            "ts-loader": "^9.5.1",
+            "tslib": "^2.8.1",
+            "typescript": "^5.6.3",
+            "webpack": "^5.96.1",
+            "webpack-bundle-analyzer": "^4.10.2"
          },
          "bin": {
+            "scrypted-changelog": "bin/scrypted-changelog.js",
             "scrypted-debug": "bin/scrypted-debug.js",
             "scrypted-deploy": "bin/scrypted-deploy.js",
             "scrypted-deploy-debug": "bin/scrypted-deploy-debug.js",
             "scrypted-package-json": "bin/scrypted-package-json.js",
-            "scrypted-readme": "bin/scrypted-readme.js",
             "scrypted-setup-project": "bin/scrypted-setup-project.js",
             "scrypted-webpack": "bin/scrypted-webpack.js"
          },
          "devDependencies": {
-            "@types/node": "^18.11.9",
-            "@types/stringify-object": "^4.0.0",
-            "stringify-object": "^3.3.0",
-            "ts-node": "^10.4.0",
-            "typedoc": "^0.23.21"
+            "@types/node": "^22.10.1",
+            "ts-node": "^10.9.2",
+            "typedoc": "^0.26.11"
          }
       },
       "../sdk": {
@@ -60,24 +65,29 @@
       "@scrypted/sdk": {
          "version": "file:../../sdk",
          "requires": {
-            "@babel/preset-typescript": "^7.16.7",
-            "@types/node": "^18.11.9",
-            "@types/stringify-object": "^4.0.0",
-            "adm-zip": "^0.4.13",
-            "axios": "^0.21.4",
-            "babel-loader": "^8.2.3",
-            "babel-plugin-const-enum": "^1.1.0",
-            "esbuild": "^0.15.9",
+            "@babel/preset-typescript": "^7.26.0",
+            "@rollup/plugin-commonjs": "^28.0.1",
+            "@rollup/plugin-json": "^6.1.0",
+            "@rollup/plugin-node-resolve": "^15.3.0",
+            "@rollup/plugin-typescript": "^12.1.1",
+            "@rollup/plugin-virtual": "^3.0.2",
+            "@types/node": "^22.10.1",
+            "adm-zip": "^0.5.16",
+            "axios": "^1.7.8",
+            "babel-loader": "^9.2.1",
+            "babel-plugin-const-enum": "^1.2.0",
             "ncp": "^2.0.0",
             "raw-loader": "^4.0.2",
-            "rimraf": "^3.0.2",
-            "stringify-object": "^3.3.0",
-            "tmp": "^0.2.1",
-            "ts-node": "^10.4.0",
-            "typedoc": "^0.23.21",
-            "typescript": "^4.9.3",
-            "webpack": "^5.74.0",
-            "webpack-bundle-analyzer": "^4.5.0"
+            "rimraf": "^6.0.1",
+            "rollup": "^4.27.4",
+            "tmp": "^0.2.3",
+            "ts-loader": "^9.5.1",
+            "ts-node": "^10.9.2",
+            "tslib": "^2.8.1",
+            "typedoc": "^0.26.11",
+            "typescript": "^5.6.3",
+            "webpack": "^5.96.1",
+            "webpack-bundle-analyzer": "^4.10.2"
          }
       }
    }
diff --git a/plugins/tensorflow-lite/src/tflite/__init__.py b/plugins/tensorflow-lite/src/tflite/__init__.py
index 73c78e8be..975d16534 100644
--- a/plugins/tensorflow-lite/src/tflite/__init__.py
+++ b/plugins/tensorflow-lite/src/tflite/__init__.py
@@ -223,67 +223,81 @@ class TensorFlowLitePlugin(
     async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss):
         def predict():
             interpreter = self.interpreters[threading.current_thread().name]
-            if self.yolo:
-                tensor_index = input_details(interpreter, "index")
-
-                im = np.stack([input])
-                i = interpreter.get_input_details()[0]
-                if i["dtype"] == np.int8:
-                    scale, zero_point = i["quantization"]
-                    if scale == 0.003986024297773838 and zero_point == -128:
-                        # fast path for quantization 1/255 = 0.003986024297773838
-                        im = im.view(np.int8)
-                        im -= 128
-                    else:
-                        im = im.astype(np.float32) / (255.0 * scale)
-                        im = (im + zero_point).astype(np.int8)  # de-scale
-                else:
-                    # this code path is unused.
-                    im = im.astype(np.float32) / 255.0
-                interpreter.set_tensor(tensor_index, im)
-                interpreter.invoke()
-                output_details = interpreter.get_output_details()
-                input_scale = self.get_input_details()[0]
-                if self.scrypted_yolo_sep:
-                    outputs = []
-                    for index, output in enumerate(output_details):
-                        o = interpreter.get_tensor(output["index"]).astype(np.float32)
-                        scale, zero_point = output["quantization"]
-                        o -= zero_point
-                        o *= scale
-                        outputs.append(o)
-
-                    output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height])
-                    objs = yolo.parse_yolov9(output[0])
-                else:
-                    output = output_details[0]
-                    x = interpreter.get_tensor(output["index"])
-                    if x.dtype == np.int8:
-                        scale, zero_point = output["quantization"]
-                        combined_scale = scale * input_scale
-                        if self.scrypted_yolov10:
-                            objs = yolo.parse_yolov10(
-                                x[0],
-                                scale=lambda v: (v - zero_point) * combined_scale,
-                                confidence_scale=lambda v: (v - zero_point) * scale,
-                                threshold_scale=lambda v: (v - zero_point) * scale,
-                            )
-                        else:
-                            objs = yolo.parse_yolov9(
-                                x[0],
-                                scale=lambda v: (v - zero_point) * combined_scale,
-                                confidence_scale=lambda v: (v - zero_point) * scale,
-                                threshold_scale=lambda v: (v - zero_point) * scale,
-                            )
-                    else:
-                        # this code path is unused.
-                        objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale)
-            else:
+            if not self.yolo:
                 tflite_common.set_input(interpreter, input)
                 interpreter.invoke()
                 objs = detect.get_objects(
                     interpreter, score_threshold=0.2, image_scale=(1, 1)
                 )
+                return objs
+
+            tensor_index = input_details(interpreter, "index")
+
+            im = np.stack([input])
+            i = interpreter.get_input_details()[0]
+            if i["dtype"] == np.int8:
+                scale, zero_point = i["quantization"]
+                if scale == 0.003986024297773838 and zero_point == -128:
+                    # fast path for quantization 1/255 = 0.003986024297773838
+                    im = im.view(np.int8)
+                    im -= 128
+                else:
+                    im = im.astype(np.float32) / (255.0 * scale)
+                    im = (im + zero_point).astype(np.int8)  # de-scale
+            else:
+                # this code path is unused.
+                im = im.astype(np.float32) / 255.0
+            interpreter.set_tensor(tensor_index, im)
+            interpreter.invoke()
+            output_details = interpreter.get_output_details()
+
+            # handle sseparate outputs for quantization accuracy
+            if self.scrypted_yolo_sep:
+                outputs = []
+                for output in output_details:
+                    o = interpreter.get_tensor(output["index"]).astype(np.float32)
+                    scale, zero_point = output["quantization"]
+                    o -= zero_point
+                    o *= scale
+                    outputs.append(o)
+
+                output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height])
+                if self.scrypted_yolov10:
+                    objs = yolo.parse_yolov10(output[0])
+                else:
+                    objs = yolo.parse_yolov9(output[0])
+                return objs
+
+            # this scale stuff can probably be optimized to dequantize ahead of time...
+            output = output_details[0]
+            x = interpreter.get_tensor(output["index"])
+            input_scale = self.get_input_details()[0]
+
+            # this non-quantized code path is unused but here for reference.
+            if x.dtype != np.int8 and x.dtype != np.int16:
+                if self.scrypted_yolov10:
+                    objs = yolo.parse_yolov10(x[0], scale=lambda v: v * input_scale)
+                else:
+                    objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale)
+                return objs
+
+            # this scale stuff can probably be optimized to dequantize ahead of time...
+            scale, zero_point = output["quantization"]
+            combined_scale = scale * input_scale
+            if self.scrypted_yolov10:
+                objs = yolo.parse_yolov10(
+                    x[0],
+                    scale=lambda v: (v - zero_point) * combined_scale,
+                    confidence_scale=lambda v: (v - zero_point) * scale,
+                    threshold_scale=lambda v: (v - zero_point) * scale,
+                )
+            else:
+                objs = yolo.parse_yolov9(
+                    x[0],
+                    scale=lambda v: (v - zero_point) * combined_scale,
+                    confidence_scale=lambda v: (v - zero_point) * scale,
+                    threshold_scale=lambda v: (v - zero_point) * scale,
+                )
             return objs
 
         objs = await asyncio.get_event_loop().run_in_executor(self.executor, predict)
diff --git a/plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py b/plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py
index 1c08c2641..2f5ab034c 100644
--- a/plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py
+++ b/plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py
@@ -1,5 +1,5 @@
 import numpy as np
-
+from common.softmax import softmax
 class DFL:
     def __init__(self, c1=16):
         self.c1 = c1
@@ -12,10 +12,6 @@ class DFL:
         x = np.sum(self.conv_weights * x, axis=1)
         return x.reshape(b, 4, a)
 
-def softmax(x, axis=-1):
-    e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
-    return e_x / np.sum(e_x, axis=axis, keepdims=True)
-
 def make_anchors(feats, strides, grid_cell_offset=0.5):
     anchor_points, stride_tensor = [], []
     assert feats is not None
@@ -59,7 +55,7 @@ def decode_bbox(preds, img_shape):
         int(np.sqrt(img_shape[-2] * img_shape[-1] / preds[p].shape[1])) for p in pos if preds[p].shape[2] != 64]
     dims = [(img_h // s, img_w // s) for s in strides]
     fake_feats = [np.zeros((1, 1, h, w), dtype=preds[0].dtype) for h, w in dims]
-    anchors, strides = [x.transpose(0, 1) for x in make_anchors(fake_feats, strides, 0.5)]  # generate anchors and strides
+    anchors, strides = make_anchors(fake_feats, strides, 0.5)
 
     strides_tensor = strides.transpose(1, 0)
     strides_tensor = np.expand_dims(strides_tensor, 0)