From 758adf8022eb084329f59cc6275c3bdc1dbe2a44 Mon Sep 17 00:00:00 2001 From: Koushik Dutta Date: Tue, 27 Sep 2022 20:34:52 -0700 Subject: [PATCH] tensorflow-lite/opencv/videoanalysis: refactor to support detection snapshots --- plugins/objectdetector/.vscode/settings.json | 2 +- plugins/objectdetector/src/main.ts | 26 ++-- plugins/opencv/package-lock.json | 19 ++- plugins/opencv/src/opencv/__init__.py | 6 +- plugins/tensorflow-lite/.vscode/settings.json | 8 +- .../tensorflow-lite/src/pipeline/__init__.py | 90 +++++++----- .../tensorflow-lite/src/tflite/__init__.py | 130 ++++++++++++++---- 7 files changed, 185 insertions(+), 96 deletions(-) diff --git a/plugins/objectdetector/.vscode/settings.json b/plugins/objectdetector/.vscode/settings.json index aed953a86..79c896063 100644 --- a/plugins/objectdetector/.vscode/settings.json +++ b/plugins/objectdetector/.vscode/settings.json @@ -1,3 +1,3 @@ { - "scrypted.debugHost": "raspberrypi", + "scrypted.debugHost": "127.0.0.1", } \ No newline at end of file diff --git a/plugins/objectdetector/src/main.ts b/plugins/objectdetector/src/main.ts index 272112462..f4044f86c 100644 --- a/plugins/objectdetector/src/main.ts +++ b/plugins/objectdetector/src/main.ts @@ -5,7 +5,7 @@ import { alertRecommendedPlugins } from '@scrypted/common/src/alert-recommended- import { DenoisedDetectionEntry, DenoisedDetectionState, denoiseDetections } from './denoise'; import { AutoenableMixinProvider } from "../../../common/src/autoenable-mixin-provider" import { safeParseJson } from './util'; -import fs from 'fs'; +import crypto from 'crypto'; const polygonOverlap = require('polygon-overlap'); @@ -150,7 +150,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase { @@ -302,10 +305,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase { this.detections.delete(detectionId); @@ -465,7 +469,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase { const detection = this.detections.get(detectionId); if (detection) - return; + return detection; if (this.mixinDeviceInterfaces.includes(ScryptedInterface.ObjectDetector)) return this.mixinDevice.getDetectionInput(detectionId); throw new Error('Detection not found. It may have expired.'); diff --git a/plugins/opencv/package-lock.json b/plugins/opencv/package-lock.json index 620fd8c67..1a4d9da8c 100644 --- a/plugins/opencv/package-lock.json +++ b/plugins/opencv/package-lock.json @@ -7,14 +7,13 @@ "": { "name": "@scrypted/opencv", "version": "0.0.46", - "hasInstallScript": true, "devDependencies": { "@scrypted/sdk": "file:../../sdk" } }, "../../sdk": { "name": "@scrypted/sdk", - "version": "0.0.199", + "version": "0.1.17", "dev": true, "license": "ISC", "dependencies": { @@ -23,12 +22,13 @@ "axios": "^0.21.4", "babel-loader": "^8.2.3", "babel-plugin-const-enum": "^1.1.0", - "esbuild": "^0.13.8", + "esbuild": "^0.15.9", "ncp": "^2.0.0", "raw-loader": "^4.0.2", "rimraf": "^3.0.2", "tmp": "^0.2.1", - "webpack": "^5.59.0" + "webpack": "^5.74.0", + "webpack-bundle-analyzer": "^4.5.0" }, "bin": { "scrypted-debug": "bin/scrypted-debug.js", @@ -44,9 +44,7 @@ "@types/stringify-object": "^4.0.0", "stringify-object": "^3.3.0", "ts-node": "^10.4.0", - "typedoc": "^0.22.8", - "typescript-json-schema": "^0.50.1", - "webpack-bundle-analyzer": "^4.5.0" + "typedoc": "^0.23.15" } }, "../sdk": { @@ -68,16 +66,15 @@ "axios": "^0.21.4", "babel-loader": "^8.2.3", "babel-plugin-const-enum": "^1.1.0", - "esbuild": "^0.13.8", + "esbuild": "^0.15.9", "ncp": "^2.0.0", "raw-loader": "^4.0.2", "rimraf": "^3.0.2", "stringify-object": "^3.3.0", "tmp": "^0.2.1", "ts-node": "^10.4.0", - "typedoc": "^0.22.8", - "typescript-json-schema": "^0.50.1", - "webpack": "^5.59.0", + "typedoc": "^0.23.15", + "webpack": "^5.74.0", "webpack-bundle-analyzer": "^4.5.0" } } diff --git a/plugins/opencv/src/opencv/__init__.py b/plugins/opencv/src/opencv/__init__.py index e9ce5f8aa..665894129 100644 --- a/plugins/opencv/src/opencv/__init__.py +++ b/plugins/opencv/src/opencv/__init__.py @@ -207,7 +207,7 @@ class OpenCVPlugin(DetectPlugin): width = caps.get_structure(0).get_value('width') result, info = buf.map(Gst.MapFlags.READ) if not result: - return + return None, None try: mat = np.ndarray( (height, @@ -223,8 +223,8 @@ class OpenCVPlugin(DetectPlugin): if not detections or not len(detections['detections']): self.detection_sleep(settings) - return None - return detections + return None, None + return detections, None def create_detection_session(self): return OpenCVDetectionSession() diff --git a/plugins/tensorflow-lite/.vscode/settings.json b/plugins/tensorflow-lite/.vscode/settings.json index 89ced671e..6c1c35006 100644 --- a/plugins/tensorflow-lite/.vscode/settings.json +++ b/plugins/tensorflow-lite/.vscode/settings.json @@ -1,16 +1,16 @@ { // docker installation - "scrypted.debugHost": "raspberrypi", - "scrypted.serverRoot": "/server", + // "scrypted.debugHost": "raspberrypi", + // "scrypted.serverRoot": "/server", // pi local installation // "scrypted.debugHost": "192.168.2.119", // "scrypted.serverRoot": "/home/pi/.scrypted", // local checkout - // "scrypted.debugHost": "127.0.0.1", - // "scrypted.serverRoot": "/Users/koush/.scrypted", + "scrypted.debugHost": "127.0.0.1", + "scrypted.serverRoot": "/Users/koush/.scrypted", "scrypted.pythonRemoteRoot": "${config:scrypted.serverRoot}/volume/plugin.zip", "python.analysis.extraPaths": [ diff --git a/plugins/tensorflow-lite/src/pipeline/__init__.py b/plugins/tensorflow-lite/src/pipeline/__init__.py index 5a13f34cd..ba59feb61 100644 --- a/plugins/tensorflow-lite/src/pipeline/__init__.py +++ b/plugins/tensorflow-lite/src/pipeline/__init__.py @@ -66,8 +66,9 @@ class GstPipelineBase: self.watchId = None self.gst = None + class GstPipeline(GstPipelineBase): - def __init__(self, loop: AbstractEventLoop, finished: Future, appsink_name: str, user_callback, crop = False): + def __init__(self, loop: AbstractEventLoop, finished: Future, appsink_name: str, user_callback, crop=False): super().__init__(loop, finished) self.appsink_name = appsink_name self.user_callback = user_callback @@ -115,13 +116,15 @@ class GstPipeline(GstPipelineBase): def get_src_size(self): if not self.src_size: videoconvert = self.gst.get_by_name('videoconvert') - structure = videoconvert.srcpads[0].get_current_caps().get_structure(0) + structure = videoconvert.srcpads[0].get_current_caps( + ).get_structure(0) _, w = structure.get_int('width') _, h = structure.get_int('height') self.src_size = (w, h) videoscale = self.gst.get_by_name('videoscale') - structure = videoscale.srcpads[0].get_current_caps().get_structure(0) + structure = videoscale.srcpads[0].get_current_caps( + ).get_structure(0) _, w = structure.get_int('width') _, h = structure.get_int('height') self.dst_size = (w, h) @@ -134,7 +137,8 @@ class GstPipeline(GstPipelineBase): # the dimension with the higher scale value got cropped or boxed. # use the other dimension to figure out the crop/box amount. - scales = (self.dst_size[0] / self.src_size[0], self.dst_size[1] / self.src_size[1]) + scales = (self.dst_size[0] / self.src_size[0], + self.dst_size[1] / self.src_size[1]) if self.crop: scale = max(scales[0], scales[1]) else: @@ -148,10 +152,10 @@ class GstPipeline(GstPipelineBase): py = math.ceil((self.dst_size[1] - dy) / 2) self.pad_size = (px, py) - + return self.src_size - def convert_to_src_size(self, point, normalize = False): + def convert_to_src_size(self, point, normalize=False): valid = True px, py = self.pad_size x, y = point @@ -189,49 +193,61 @@ class GstPipeline(GstPipelineBase): break gstsample = self.gstsample self.gstsample = None - self.user_callback(gstsample, self.get_src_size(), lambda p, normalize=False: self.convert_to_src_size(p, normalize)) + self.user_callback(gstsample, self.get_src_size( + ), lambda p, normalize=False: self.convert_to_src_size(p, normalize)) + def get_dev_board_model(): - try: - model = open('/sys/firmware/devicetree/base/model').read().lower() - if 'mx8mq' in model: - return 'mx8mq' - if 'mt8167' in model: - return 'mt8167' - except: pass - return None + try: + model = open('/sys/firmware/devicetree/base/model').read().lower() + if 'mx8mq' in model: + return 'mx8mq' + if 'mt8167' in model: + return 'mt8167' + except: + pass + return None + def create_pipeline_sink( - appsink_name, - appsink_size, - pixel_format, - crop = False): - SINK_ELEMENT = 'appsink name={appsink_name} emit-signals=true max-buffers=0 drop=true sync=false'.format(appsink_name=appsink_name) + appsink_name, + appsink_size, + pixel_format, + crop=False): + SINK_ELEMENT = 'appsink name={appsink_name} emit-signals=true max-buffers=0 drop=true sync=false'.format( + appsink_name=appsink_name) - (width, height)= appsink_size + (width, height) = appsink_size - SINK_CAPS = 'video/x-raw,format={pixel_format},width={width},height={height},pixel-aspect-ratio=1/1' - sink_caps = SINK_CAPS.format(width=width, height=height, pixel_format=pixel_format) + SINK_CAPS = 'video/x-raw,format={pixel_format}' + if width and height: + SINK_CAPS += ',width={width},height={height},pixel-aspect-ratio=1/1' + + sink_caps = SINK_CAPS.format( + width=width, height=height, pixel_format=pixel_format) pipeline = " {sink_caps} ! {sink_element}".format( sink_caps=sink_caps, sink_element=SINK_ELEMENT) return pipeline + def create_pipeline( - appsink_name, - appsink_size, - video_input, - pixel_format, - crop = False, - parse_only = False): + appsink_name, + appsink_size, + video_input, + pixel_format, + crop=False, + parse_only=False): if parse_only: - sink = 'appsink name={appsink_name} emit-signals=true sync=false'.format(appsink_name=appsink_name) + sink = 'appsink name={appsink_name} emit-signals=true sync=false'.format( + appsink_name=appsink_name) PIPELINE = """ {video_input} ! {sink} """ else: - sink = create_pipeline_sink(appsink_name, appsink_size, pixel_format, crop = crop) + sink = create_pipeline_sink( + appsink_name, appsink_size, pixel_format, crop=crop) if crop: PIPELINE = """ {video_input} ! videoconvert name=videoconvert ! aspectratiocrop aspect-ratio=1/1 ! videoscale name=videoscale ! queue leaky=downstream max-size-buffers=0 ! {sink} @@ -240,19 +256,21 @@ def create_pipeline( PIPELINE = """ {video_input} ! queue leaky=downstream max-size-buffers=0 ! videoconvert name=videoconvert ! videoscale name=videoscale ! {sink} """ - pipeline = PIPELINE.format(video_input = video_input, sink = sink) + pipeline = PIPELINE.format(video_input=video_input, sink=sink) print('Gstreamer pipeline:\n', pipeline) return pipeline + def run_pipeline(loop, finished, user_callback, appsink_name, appsink_size, video_input, pixel_format, - crop = False, - parse_only = False): - gst = GstPipeline(loop, finished, appsink_name, user_callback, crop = crop) - pipeline = create_pipeline(appsink_name, appsink_size, video_input, pixel_format, crop = crop, parse_only = parse_only) + crop=False, + parse_only=False): + gst = GstPipeline(loop, finished, appsink_name, user_callback, crop=crop) + pipeline = create_pipeline( + appsink_name, appsink_size, video_input, pixel_format, crop=crop, parse_only=parse_only) gst.parse_launch(pipeline) return gst diff --git a/plugins/tensorflow-lite/src/tflite/__init__.py b/plugins/tensorflow-lite/src/tflite/__init__.py index 926f6d9a7..e0263267c 100644 --- a/plugins/tensorflow-lite/src/tflite/__init__.py +++ b/plugins/tensorflow-lite/src/tflite/__init__.py @@ -23,6 +23,7 @@ import scrypted_sdk from typing import Any, List, Tuple from gi.repository import Gst import asyncio +import numpy from detect import DetectionSession, DetectPlugin @@ -50,13 +51,14 @@ def parse_label_contents(contents: str): defaultThreshold = .4 +defaultSecondThreshold = .7 class RawImage: - jpeg: scrypted_sdk.MediaObject + jpegMediaObject: scrypted_sdk.MediaObject def __init__(self, image: Image.Image): self.image = image - self.jpeg = None + self.jpegMediaObject = None MIME_TYPE = 'x-scrypted-tensorflow-lite/x-raw-image' @@ -67,7 +69,7 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter): self.fromMimeType = MIME_TYPE self.toMimeType = scrypted_sdk.ScryptedMimeTypes.MediaObject.value - self.crop = True + self.crop = False labels_contents = scrypted_sdk.zip.open( 'fs/coco_labels.txt').read().decode('utf8') @@ -117,10 +119,10 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter): detection_session.image = image else: image.close() - data.jpeg = None + data.jpegMediaObject = None async def convert(self, data: RawImage, fromMimeType: str, toMimeType: str, options: scrypted_sdk.BufferConvertorOptions = None) -> Any: - mo = data.jpeg + mo = data.jpegMediaObject if not mo: image = data.image if not image: @@ -130,8 +132,7 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter): image.save(bio, format='JPEG') jpegBytes = bio.getvalue() mo = await scrypted_sdk.mediaManager.createMediaObject(jpegBytes, 'image/jpeg') - data.jpeg = jpegBytes - data.image = None + data.jpegMediaObject = mo return mo def requestRestart(self): @@ -155,6 +156,14 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter): 'value': defaultThreshold, 'placeholder': defaultThreshold, } + secondConfidence: Setting = { + 'title': 'Second Pass Confidence', + 'description': 'Scale, crop, and reanalyze the results from the initial detection pass to get more accurate results. This will exponentially increase complexity, so using an allow list is recommended', + 'key': 'second_score_threshold', + 'type': 'number', + 'value': defaultSecondThreshold, + 'placeholder': defaultSecondThreshold, + } decoderSetting: Setting = { 'title': "Decoder", 'description': "The gstreamer element used to decode the stream", @@ -174,7 +183,9 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter): 'choices': list(self.labels.values()), 'multiple': True, 'key': 'allowList', - 'value': [], + 'value': [ + 'person', + ], } coral: Setting = { 'title': 'Detected Edge TPU', @@ -184,10 +195,10 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter): 'key': 'coral', } - d['settings'] = [coral, confidence, decoderSetting, allowList] + d['settings'] = [coral, confidence, secondConfidence, decoderSetting, allowList] return d - def create_detection_result(self, objs, size, allowList, convert_to_src_size=None): + def create_detection_result(self, objs, size, allowList, convert_to_src_size=None) -> ObjectsDetected: detections: List[ObjectDetectionResult] = [] detection_result: ObjectsDetected = {} detection_result['detections'] = detections @@ -232,25 +243,93 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter): stream = io.BytesIO(image_bytes) image = Image.open(stream) - score_threshold = self.parse_settings(settings) + return self.run_detection_image(self, settings, image.size) + + def get_detection_input_size(self, src_size): + return (None, None) with self.mutex: - _, scale = common.set_resized_input( - self.interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) + return input_size(self.interpreter) + + def run_detection_image(self, image: Image.Image, settings: Any, src_size, convert_to_src_size: Any = None, second_pass_crop: Tuple[float, float, float, float] = None): + score_threshold = defaultThreshold + second_score_threshold = None + if settings: + score_threshold = float(settings.get( + 'score_threshold', score_threshold) or score_threshold) + check = settings.get( + 'second_score_threshold', None) + if check: + second_score_threshold = float(check) + + if second_pass_crop: + score_threshold = second_score_threshold + + (w, h) = input_size(self.interpreter) + if not second_pass_crop: + (iw, ih) = image.size + ws = w / iw + hs = h / ih + s = max(ws, hs) + scaled = image.resize((round(s * iw), round(s * ih)), Image.ANTIALIAS) + ow = round((scaled.width - w) / 2) + oh = round((scaled.height - h) / 2) + input = scaled.crop((ow, oh, ow + w, oh + h)) + + def cvss(point, normalize=False): + converted = convert_to_src_size(point, normalize) + return ((converted[0] + ow) / s, (converted[1] + oh) / s, converted[2]) + else: + (l, t, r, b) = second_pass_crop + cropped = image.crop(second_pass_crop) + (cw, ch) = cropped.size + input = cropped.resize((w, h), Image.ANTIALIAS) + + def cvss(point, normalize=False): + converted = convert_to_src_size(point, normalize) + return ((converted[0] / w) * cw + l, (converted[1] / h) * ch + t, converted[2]) + + with self.mutex: + common.set_input( + self.interpreter, input) + scale = (1, 1) + # _, scale = common.set_resized_input( + # self.interpreter, cropped.size, lambda size: cropped.resize(size, Image.ANTIALIAS)) self.interpreter.invoke() objs = detect.get_objects( self.interpreter, score_threshold=score_threshold, image_scale=scale) - allowList = settings and settings.get('allowList', None) + + allowList = settings.get('allowList', None) + ret = self.create_detection_result(objs, src_size, allowList, cvss) - return self.create_detection_result(objs, image.size, allowList) + if second_pass_crop or not second_score_threshold or not len(ret['detections']): + return ret, RawImage(image) + + secondPassDetections: List[ObjectDetectionResult] = [] + detections = ret['detections'] + ret['detections'] = [] + for detection in detections: + if detection['score'] >= second_score_threshold: + ret['detections'].append(detection) + continue + (x, y, w, h) = detection['boundingBox'] + cx = x + w / 2 + cy = y + h / 2 + d = round(max(w, h) * 1.5) + x = round(cx - d / 2) + y = round(cy - d / 2) + x = max(0, x) + y = max(0, y) + x2 = x + d + y2 = y + d - def get_detection_input_size(self, src_size): - with self.mutex: - return input_size(self.interpreter) + secondPassResult, _ = self.run_detection_image(image, settings, src_size, convert_to_src_size, (x, y, x2, y2)) + ret['detections'].extend(secondPassResult['detections']) + + return ret, RawImage(image) def run_detection_gstsample(self, detection_session: TensorFlowLiteSession, gstsample, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Image.Image]: - score_threshold = self.parse_settings(settings) - + # todo reenable this if detection images aren't needed. if False and loaded_py_coral: with self.mutex: gst_buffer = gstsample.get_buffer() @@ -280,16 +359,7 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter): finally: gst_buffer.unmap(info) - with self.mutex: - _, scale = common.set_resized_input( - self.interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS)) - self.interpreter.invoke() - objs = detect.get_objects( - self.interpreter, score_threshold=score_threshold, image_scale=scale) - - allowList = settings.get('allowList', None) - - return self.create_detection_result(objs, src_size, allowList, convert_to_src_size), RawImage(image) + return self.run_detection_image(image, settings, src_size, convert_to_src_size) def create_detection_session(self): return TensorFlowLiteSession()