tensorflow-lite: quantization cleanups

This commit is contained in:
Koushik Dutta
2024-12-28 14:24:12 -08:00
parent a6a986a8ac
commit 48c5e1a5fe
3 changed files with 116 additions and 96 deletions

View File

@@ -13,39 +13,44 @@
},
"../../sdk": {
"name": "@scrypted/sdk",
"version": "0.2.39",
"version": "0.3.102",
"dev": true,
"license": "ISC",
"dependencies": {
"@babel/preset-typescript": "^7.16.7",
"adm-zip": "^0.4.13",
"axios": "^0.21.4",
"babel-loader": "^8.2.3",
"babel-plugin-const-enum": "^1.1.0",
"esbuild": "^0.15.9",
"@babel/preset-typescript": "^7.26.0",
"@rollup/plugin-commonjs": "^28.0.1",
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.3.0",
"@rollup/plugin-typescript": "^12.1.1",
"@rollup/plugin-virtual": "^3.0.2",
"adm-zip": "^0.5.16",
"axios": "^1.7.8",
"babel-loader": "^9.2.1",
"babel-plugin-const-enum": "^1.2.0",
"ncp": "^2.0.0",
"raw-loader": "^4.0.2",
"rimraf": "^3.0.2",
"tmp": "^0.2.1",
"typescript": "^4.9.3",
"webpack": "^5.74.0",
"webpack-bundle-analyzer": "^4.5.0"
"rimraf": "^6.0.1",
"rollup": "^4.27.4",
"tmp": "^0.2.3",
"ts-loader": "^9.5.1",
"tslib": "^2.8.1",
"typescript": "^5.6.3",
"webpack": "^5.96.1",
"webpack-bundle-analyzer": "^4.10.2"
},
"bin": {
"scrypted-changelog": "bin/scrypted-changelog.js",
"scrypted-debug": "bin/scrypted-debug.js",
"scrypted-deploy": "bin/scrypted-deploy.js",
"scrypted-deploy-debug": "bin/scrypted-deploy-debug.js",
"scrypted-package-json": "bin/scrypted-package-json.js",
"scrypted-readme": "bin/scrypted-readme.js",
"scrypted-setup-project": "bin/scrypted-setup-project.js",
"scrypted-webpack": "bin/scrypted-webpack.js"
},
"devDependencies": {
"@types/node": "^18.11.9",
"@types/stringify-object": "^4.0.0",
"stringify-object": "^3.3.0",
"ts-node": "^10.4.0",
"typedoc": "^0.23.21"
"@types/node": "^22.10.1",
"ts-node": "^10.9.2",
"typedoc": "^0.26.11"
}
},
"../sdk": {
@@ -60,24 +65,29 @@
"@scrypted/sdk": {
"version": "file:../../sdk",
"requires": {
"@babel/preset-typescript": "^7.16.7",
"@types/node": "^18.11.9",
"@types/stringify-object": "^4.0.0",
"adm-zip": "^0.4.13",
"axios": "^0.21.4",
"babel-loader": "^8.2.3",
"babel-plugin-const-enum": "^1.1.0",
"esbuild": "^0.15.9",
"@babel/preset-typescript": "^7.26.0",
"@rollup/plugin-commonjs": "^28.0.1",
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.3.0",
"@rollup/plugin-typescript": "^12.1.1",
"@rollup/plugin-virtual": "^3.0.2",
"@types/node": "^22.10.1",
"adm-zip": "^0.5.16",
"axios": "^1.7.8",
"babel-loader": "^9.2.1",
"babel-plugin-const-enum": "^1.2.0",
"ncp": "^2.0.0",
"raw-loader": "^4.0.2",
"rimraf": "^3.0.2",
"stringify-object": "^3.3.0",
"tmp": "^0.2.1",
"ts-node": "^10.4.0",
"typedoc": "^0.23.21",
"typescript": "^4.9.3",
"webpack": "^5.74.0",
"webpack-bundle-analyzer": "^4.5.0"
"rimraf": "^6.0.1",
"rollup": "^4.27.4",
"tmp": "^0.2.3",
"ts-loader": "^9.5.1",
"ts-node": "^10.9.2",
"tslib": "^2.8.1",
"typedoc": "^0.26.11",
"typescript": "^5.6.3",
"webpack": "^5.96.1",
"webpack-bundle-analyzer": "^4.10.2"
}
}
}

View File

@@ -223,67 +223,81 @@ class TensorFlowLitePlugin(
async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss):
def predict():
interpreter = self.interpreters[threading.current_thread().name]
if self.yolo:
tensor_index = input_details(interpreter, "index")
im = np.stack([input])
i = interpreter.get_input_details()[0]
if i["dtype"] == np.int8:
scale, zero_point = i["quantization"]
if scale == 0.003986024297773838 and zero_point == -128:
# fast path for quantization 1/255 = 0.003986024297773838
im = im.view(np.int8)
im -= 128
else:
im = im.astype(np.float32) / (255.0 * scale)
im = (im + zero_point).astype(np.int8) # de-scale
else:
# this code path is unused.
im = im.astype(np.float32) / 255.0
interpreter.set_tensor(tensor_index, im)
interpreter.invoke()
output_details = interpreter.get_output_details()
input_scale = self.get_input_details()[0]
if self.scrypted_yolo_sep:
outputs = []
for index, output in enumerate(output_details):
o = interpreter.get_tensor(output["index"]).astype(np.float32)
scale, zero_point = output["quantization"]
o -= zero_point
o *= scale
outputs.append(o)
output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height])
objs = yolo.parse_yolov9(output[0])
else:
output = output_details[0]
x = interpreter.get_tensor(output["index"])
if x.dtype == np.int8:
scale, zero_point = output["quantization"]
combined_scale = scale * input_scale
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
objs = yolo.parse_yolov9(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
# this code path is unused.
objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale)
else:
if not self.yolo:
tflite_common.set_input(interpreter, input)
interpreter.invoke()
objs = detect.get_objects(
interpreter, score_threshold=0.2, image_scale=(1, 1)
)
return objs
tensor_index = input_details(interpreter, "index")
im = np.stack([input])
i = interpreter.get_input_details()[0]
if i["dtype"] == np.int8:
scale, zero_point = i["quantization"]
if scale == 0.003986024297773838 and zero_point == -128:
# fast path for quantization 1/255 = 0.003986024297773838
im = im.view(np.int8)
im -= 128
else:
im = im.astype(np.float32) / (255.0 * scale)
im = (im + zero_point).astype(np.int8) # de-scale
else:
# this code path is unused.
im = im.astype(np.float32) / 255.0
interpreter.set_tensor(tensor_index, im)
interpreter.invoke()
output_details = interpreter.get_output_details()
# handle sseparate outputs for quantization accuracy
if self.scrypted_yolo_sep:
outputs = []
for output in output_details:
o = interpreter.get_tensor(output["index"]).astype(np.float32)
scale, zero_point = output["quantization"]
o -= zero_point
o *= scale
outputs.append(o)
output = yolo_separate_outputs.decode_bbox(outputs, [input.width, input.height])
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(output[0])
else:
objs = yolo.parse_yolov9(output[0])
return objs
# this scale stuff can probably be optimized to dequantize ahead of time...
output = output_details[0]
x = interpreter.get_tensor(output["index"])
input_scale = self.get_input_details()[0]
# this non-quantized code path is unused but here for reference.
if x.dtype != np.int8 and x.dtype != np.int16:
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(x[0], scale=lambda v: v * input_scale)
else:
objs = yolo.parse_yolov9(x[0], scale=lambda v: v * input_scale)
return objs
# this scale stuff can probably be optimized to dequantize ahead of time...
scale, zero_point = output["quantization"]
combined_scale = scale * input_scale
if self.scrypted_yolov10:
objs = yolo.parse_yolov10(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
else:
objs = yolo.parse_yolov9(
x[0],
scale=lambda v: (v - zero_point) * combined_scale,
confidence_scale=lambda v: (v - zero_point) * scale,
threshold_scale=lambda v: (v - zero_point) * scale,
)
return objs
objs = await asyncio.get_event_loop().run_in_executor(self.executor, predict)

View File

@@ -1,5 +1,5 @@
import numpy as np
from common.softmax import softmax
class DFL:
def __init__(self, c1=16):
self.c1 = c1
@@ -12,10 +12,6 @@ class DFL:
x = np.sum(self.conv_weights * x, axis=1)
return x.reshape(b, 4, a)
def softmax(x, axis=-1):
e_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
return e_x / np.sum(e_x, axis=axis, keepdims=True)
def make_anchors(feats, strides, grid_cell_offset=0.5):
anchor_points, stride_tensor = [], []
assert feats is not None
@@ -59,7 +55,7 @@ def decode_bbox(preds, img_shape):
int(np.sqrt(img_shape[-2] * img_shape[-1] / preds[p].shape[1])) for p in pos if preds[p].shape[2] != 64]
dims = [(img_h // s, img_w // s) for s in strides]
fake_feats = [np.zeros((1, 1, h, w), dtype=preds[0].dtype) for h, w in dims]
anchors, strides = [x.transpose(0, 1) for x in make_anchors(fake_feats, strides, 0.5)] # generate anchors and strides
anchors, strides = make_anchors(fake_feats, strides, 0.5)
strides_tensor = strides.transpose(1, 0)
strides_tensor = np.expand_dims(strides_tensor, 0)