From 48c5e1a5fe0ba2175c8d6eaac07dd9bee9ee8c09 Mon Sep 17 00:00:00 2001 From: Koushik Dutta Date: Sat, 28 Dec 2024 14:24:12 -0800 Subject: [PATCH] tensorflow-lite: quantization cleanups --- plugins/tensorflow-lite/package-lock.json | 78 ++++++----- .../tensorflow-lite/src/tflite/__init__.py | 126 ++++++++++-------- .../src/tflite/yolo_separate_outputs.py | 8 +- 3 files changed, 116 insertions(+), 96 deletions(-) diff --git a/plugins/tensorflow-lite/package-lock.json b/plugins/tensorflow-lite/package-lock.json index 0c8f617ea..d0baebf52 100644 --- a/plugins/tensorflow-lite/package-lock.json +++ b/plugins/tensorflow-lite/package-lock.json @@ -13,39 +13,44 @@ }, "../../sdk": { "name": "@scrypted/sdk", - "version": "0.2.39", + "version": "0.3.102", "dev": true, "license": "ISC", "dependencies": { - "@babel/preset-typescript": "^7.16.7", - "adm-zip": "^0.4.13", - "axios": "^0.21.4", - "babel-loader": "^8.2.3", - "babel-plugin-const-enum": "^1.1.0", - "esbuild": "^0.15.9", + "@babel/preset-typescript": "^7.26.0", + "@rollup/plugin-commonjs": "^28.0.1", + "@rollup/plugin-json": "^6.1.0", + "@rollup/plugin-node-resolve": "^15.3.0", + "@rollup/plugin-typescript": "^12.1.1", + "@rollup/plugin-virtual": "^3.0.2", + "adm-zip": "^0.5.16", + "axios": "^1.7.8", + "babel-loader": "^9.2.1", + "babel-plugin-const-enum": "^1.2.0", "ncp": "^2.0.0", "raw-loader": "^4.0.2", - "rimraf": "^3.0.2", - "tmp": "^0.2.1", - "typescript": "^4.9.3", - "webpack": "^5.74.0", - "webpack-bundle-analyzer": "^4.5.0" + "rimraf": "^6.0.1", + "rollup": "^4.27.4", + "tmp": "^0.2.3", + "ts-loader": "^9.5.1", + "tslib": "^2.8.1", + "typescript": "^5.6.3", + "webpack": "^5.96.1", + "webpack-bundle-analyzer": "^4.10.2" }, "bin": { + "scrypted-changelog": "bin/scrypted-changelog.js", "scrypted-debug": "bin/scrypted-debug.js", "scrypted-deploy": "bin/scrypted-deploy.js", "scrypted-deploy-debug": "bin/scrypted-deploy-debug.js", "scrypted-package-json": "bin/scrypted-package-json.js", - "scrypted-readme": "bin/scrypted-readme.js", "scrypted-setup-project": "bin/scrypted-setup-project.js", "scrypted-webpack": "bin/scrypted-webpack.js" }, "devDependencies": { - "@types/node": "^18.11.9", - "@types/stringify-object": "^4.0.0", - "stringify-object": "^3.3.0", - "ts-node": "^10.4.0", - "typedoc": "^0.23.21" + "@types/node": "^22.10.1", + "ts-node": "^10.9.2", + "typedoc": "^0.26.11" } }, "../sdk": { @@ -60,24 +65,29 @@ "@scrypted/sdk": { "version": "file:../../sdk", "requires": { - "@babel/preset-typescript": "^7.16.7", - "@types/node": "^18.11.9", - "@types/stringify-object": "^4.0.0", - "adm-zip": "^0.4.13", - "axios": "^0.21.4", - "babel-loader": "^8.2.3", - "babel-plugin-const-enum": "^1.1.0", - "esbuild": "^0.15.9", + "@babel/preset-typescript": "^7.26.0", + "@rollup/plugin-commonjs": "^28.0.1", + "@rollup/plugin-json": "^6.1.0", + "@rollup/plugin-node-resolve": "^15.3.0", + "@rollup/plugin-typescript": "^12.1.1", + "@rollup/plugin-virtual": "^3.0.2", + "@types/node": "^22.10.1", + "adm-zip": "^0.5.16", + "axios": "^1.7.8", + "babel-loader": "^9.2.1", + "babel-plugin-const-enum": "^1.2.0", "ncp": "^2.0.0", "raw-loader": "^4.0.2", - "rimraf": "^3.0.2", - "stringify-object": "^3.3.0", - "tmp": "^0.2.1", - "ts-node": "^10.4.0", - "typedoc": "^0.23.21", - "typescript": "^4.9.3", - "webpack": "^5.74.0", - "webpack-bundle-analyzer": "^4.5.0" + "rimraf": "^6.0.1", + "rollup": "^4.27.4", + "tmp": "^0.2.3", + "ts-loader": "^9.5.1", + "ts-node": "^10.9.2", + "tslib": "^2.8.1", + "typedoc": "^0.26.11", + "typescript": "^5.6.3", + "webpack": "^5.96.1", + "webpack-bundle-analyzer": "^4.10.2" } } } diff --git a/plugins/tensorflow-lite/src/tflite/__init__.py b/plugins/tensorflow-lite/src/tflite/__init__.py index 73c78e8be..975d16534 100644 --- a/plugins/tensorflow-lite/src/tflite/__init__.py +++ b/plugins/tensorflow-lite/src/tflite/__init__.py @@ -223,67 +223,81 @@ class TensorFlowLitePlugin( async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss): def predict(): interpreter = self.interpreters[threading.current_thread().name] - if self.yolo: - tensor_index = input_details(interpreter, "index") - - im = np.stack([input]) - i = interpreter.get_input_details()[0] - if i["dtype"] == np.int8: - scale, zero_point = i["quantization"] - if scale == 0.003986024297773838 and zero_point == -128: - # fast path for quantization 1/255 = 0.003986024297773838 - im = im.view(np.int8) - im -= 128 - else: - im = im.astype(np.float32) / (255.0 * scale) - im = (im + zero_point).astype(np.int8) # de-scale - else: - # this code path is unused. - im = im.astype(np.float32) / 255.0 - interpreter.set_tensor(tensor_index, im) - interpreter.invoke() - output_details = interpreter.get_output_details() - input_scale = self.get_input_details()[0] - if self.scrypted_yolo_sep: - outputs = [] - for index, output in enumerate(output_details): - o = interpreter.get_tensor(output["index"]).astype(np.float32) - scale, zero_point = output["quantization"] - o -= zero_point - o *= scale - outputs.append(o) - - output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height]) - objs = yolo.parse_yolov9(output[0]) - else: - output = output_details[0] - x = interpreter.get_tensor(output["index"]) - if x.dtype == np.int8: - scale, zero_point = output["quantization"] - combined_scale = scale * input_scale - if self.scrypted_yolov10: - objs = yolo.parse_yolov10( - x[0], - scale=lambda v: (v - zero_point) * combined_scale, - confidence_scale=lambda v: (v - zero_point) * scale, - threshold_scale=lambda v: (v - zero_point) * scale, - ) - else: - objs = yolo.parse_yolov9( - x[0], - scale=lambda v: (v - zero_point) * combined_scale, - confidence_scale=lambda v: (v - zero_point) * scale, - threshold_scale=lambda v: (v - zero_point) * scale, - ) - else: - # this code path is unused. - objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale) - else: + if not self.yolo: tflite_common.set_input(interpreter, input) interpreter.invoke() objs = detect.get_objects( interpreter, score_threshold=0.2, image_scale=(1, 1) ) + return objs + + tensor_index = input_details(interpreter, "index") + + im = np.stack([input]) + i = interpreter.get_input_details()[0] + if i["dtype"] == np.int8: + scale, zero_point = i["quantization"] + if scale == 0.003986024297773838 and zero_point == -128: + # fast path for quantization 1/255 = 0.003986024297773838 + im = im.view(np.int8) + im -= 128 + else: + im = im.astype(np.float32) / (255.0 * scale) + im = (im + zero_point).astype(np.int8) # de-scale + else: + # this code path is unused. + im = im.astype(np.float32) / 255.0 + interpreter.set_tensor(tensor_index, im) + interpreter.invoke() + output_details = interpreter.get_output_details() + + # handle sseparate outputs for quantization accuracy + if self.scrypted_yolo_sep: + outputs = [] + for output in output_details: + o = interpreter.get_tensor(output["index"]).astype(np.float32) + scale, zero_point = output["quantization"] + o -= zero_point + o *= scale + outputs.append(o) + + output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height]) + if self.scrypted_yolov10: + objs = yolo.parse_yolov10(output[0]) + else: + objs = yolo.parse_yolov9(output[0]) + return objs + + # this scale stuff can probably be optimized to dequantize ahead of time... + output = output_details[0] + x = interpreter.get_tensor(output["index"]) + input_scale = self.get_input_details()[0] + + # this non-quantized code path is unused but here for reference. + if x.dtype != np.int8 and x.dtype != np.int16: + if self.scrypted_yolov10: + objs = yolo.parse_yolov10(x[0], scale=lambda v: v * input_scale) + else: + objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale) + return objs + + # this scale stuff can probably be optimized to dequantize ahead of time... + scale, zero_point = output["quantization"] + combined_scale = scale * input_scale + if self.scrypted_yolov10: + objs = yolo.parse_yolov10( + x[0], + scale=lambda v: (v - zero_point) * combined_scale, + confidence_scale=lambda v: (v - zero_point) * scale, + threshold_scale=lambda v: (v - zero_point) * scale, + ) + else: + objs = yolo.parse_yolov9( + x[0], + scale=lambda v: (v - zero_point) * combined_scale, + confidence_scale=lambda v: (v - zero_point) * scale, + threshold_scale=lambda v: (v - zero_point) * scale, + ) return objs objs = await asyncio.get_event_loop().run_in_executor(self.executor, predict) diff --git a/plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py b/plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py index 1c08c2641..2f5ab034c 100644 --- a/plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py +++ b/plugins/tensorflow-lite/src/tflite/yolo_separate_outputs.py @@ -1,5 +1,5 @@ import numpy as np - +from common.softmax import softmax class DFL: def __init__(self, c1=16): self.c1 = c1 @@ -12,10 +12,6 @@ class DFL: x = np.sum(self.conv_weights * x, axis=1) return x.reshape(b, 4, a) -def softmax(x, axis=-1): - e_x = np.exp(x - np.max(x, axis=axis, keepdims=True)) - return e_x / np.sum(e_x, axis=axis, keepdims=True) - def make_anchors(feats, strides, grid_cell_offset=0.5): anchor_points, stride_tensor = [], [] assert feats is not None @@ -59,7 +55,7 @@ def decode_bbox(preds, img_shape): int(np.sqrt(img_shape[-2] * img_shape[-1] / preds[p].shape[1])) for p in pos if preds[p].shape[2] != 64] dims = [(img_h // s, img_w // s) for s in strides] fake_feats = [np.zeros((1, 1, h, w), dtype=preds[0].dtype) for h, w in dims] - anchors, strides = [x.transpose(0, 1) for x in make_anchors(fake_feats, strides, 0.5)] # generate anchors and strides + anchors, strides = make_anchors(fake_feats, strides, 0.5) strides_tensor = strides.transpose(1, 0) strides_tensor = np.expand_dims(strides_tensor, 0)