detect: support motion on new pipeline

2026-02-03 14:13:28 +00:00 · 2023-03-16 20:37:12 -07:00
parent 9ba22e4058
commit aedb985941
6 changed files with 120 additions and 53 deletions
--- a/plugins/objectdetector/src/main.ts
+++ b/plugins/objectdetector/src/main.ts
@@ -1,4 +1,4 @@
-import sdk, { ScryptedMimeTypes, Image, VideoFrame, VideoFrameGenerator, Camera, DeviceState, EventListenerRegister, MediaObject, MixinDeviceBase, MixinProvider, MotionSensor, ObjectDetection, ObjectDetectionCallbacks, ObjectDetectionModel, ObjectDetectionResult, ObjectDetectionTypes, ObjectDetector, ObjectsDetected, ScryptedDevice, ScryptedDeviceType, ScryptedInterface, ScryptedNativeId, Setting, Settings, SettingValue, VideoCamera } from '@scrypted/sdk';
+import sdk, { ScryptedMimeTypes, Image, VideoFrame, VideoFrameGenerator, Camera, DeviceState, EventListenerRegister, MediaObject, MixinDeviceBase, MixinProvider, MotionSensor, ObjectDetection, ObjectDetectionCallbacks, ObjectDetectionModel, ObjectDetectionResult, ObjectDetectionTypes, ObjectDetector, ObjectsDetected, ScryptedDevice, ScryptedDeviceType, ScryptedInterface, ScryptedNativeId, Setting, Settings, SettingValue, VideoCamera, MediaStreamDestination } from '@scrypted/sdk';
 import { StorageSettings } from '@scrypted/sdk/storage-settings';
 import crypto from 'crypto';
 import cloneDeep from 'lodash/cloneDeep';
@@ -53,13 +53,15 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
    newPipeline: {
      title: 'Video Pipeline',
      description: 'Configure how frames are provided to the video analysis pipeline.',
-      async onGet() {
+      onGet: async () => {
+        const choices = [
+          'Default',
+          ...getAllDevices().filter(d => d.interfaces.includes(ScryptedInterface.VideoFrameGenerator)).map(d => d.name),
+        ];
+        if (!this.hasMotionType)
+          choices.push('Snapshot');
        return {
-          choices: [
-            'Default',
-            'Snapshot',
-            ...getAllDevices().filter(d => d.interfaces.includes(ScryptedInterface.VideoFrameGenerator)).map(d => d.name),
-          ],
+          choices,
        }
      },
      defaultValue: 'Default',
@@ -142,7 +144,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
  analyzeStop = 0;
  lastDetectionInput = 0;

-  constructor(public plugin: ObjectDetectionPlugin, mixinDevice: VideoCamera & Camera & MotionSensor & ObjectDetector & Settings, mixinDeviceInterfaces: ScryptedInterface[], mixinDeviceState: { [key: string]: any }, providerNativeId: string, public objectDetection: ObjectDetection & ScryptedDevice, modelName: string, group: string, public hasMotionType: boolean, public settings: Setting[]) {
+  constructor(public plugin: ObjectDetectionPlugin, mixinDevice: VideoCamera & Camera & MotionSensor & ObjectDetector & Settings, mixinDeviceInterfaces: ScryptedInterface[], mixinDeviceState: { [key: string]: any }, providerNativeId: string, public objectDetection: ObjectDetection & ScryptedDevice, public model: ObjectDetectionModel, group: string, public hasMotionType: boolean, public settings: Setting[]) {
    super({
      mixinDevice, mixinDeviceState,
      mixinProviderNativeId: providerNativeId,
@@ -153,7 +155,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
    });

    this.cameraDevice = systemManager.getDeviceById<Camera & VideoCamera & MotionSensor & ObjectDetector>(this.id);
-    this.detectionId = modelName + '-' + this.cameraDevice.id;
+    this.detectionId = model.name + '-' + this.cameraDevice.id;

    this.bindObjectDetection();
    this.register();
@@ -171,7 +173,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
      if (this.hasMotionType) {
        // force a motion detection restart if it quit
        if (this.motionSensorSupplementation === BUILTIN_MOTION_SENSOR_REPLACE)
-          await this.startVideoDetection();
+          await this.startStreamAnalysis();
        return;
      }
    }, this.storageSettings.values.detectionInterval * 1000);
@@ -224,7 +226,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
      return;
    if (this.motionSensorSupplementation !== BUILTIN_MOTION_SENSOR_REPLACE)
      return;
-    await this.startVideoDetection();
+    await this.startStreamAnalysis();
  }

  endObjectDetection() {
@@ -310,7 +312,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
            return;
          if (!this.detectorRunning)
            this.console.log('built in motion sensor started motion, starting video detection.');
-          await this.startVideoDetection();
+          await this.startStreamAnalysis();
          return;
        }

@@ -491,8 +493,8 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
    this.analyzeStop = Date.now() + this.getDetectionDuration();

    const newPipeline = this.newPipeline;
-    let generator : () => Promise<AsyncGenerator<VideoFrame & MediaObject>>;
-    if (newPipeline === 'Snapshot') {
+    let generator: () => Promise<AsyncGenerator<VideoFrame & MediaObject>>;
+    if (newPipeline === 'Snapshot' && !this.hasMotionType) {
      const self = this;
      generator = async () => (async function* gen() {
        try {
@@ -528,20 +530,26 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
      })();
    }
    else {
+      const destination: MediaStreamDestination = this.hasMotionType ? 'low-resolution' : 'local-recorder';
      const videoFrameGenerator = systemManager.getDeviceById<VideoFrameGenerator>(newPipeline);
      if (!videoFrameGenerator)
        throw new Error('invalid VideoFrameGenerator');
      const stream = await this.cameraDevice.getVideoStream({
-        destination: 'local-recorder',
+        destination,
        // ask rebroadcast to mute audio, not needed.
        audio: null,
      });

-      generator = async () => videoFrameGenerator.generateVideoFrames(stream);
+      generator = async () => videoFrameGenerator.generateVideoFrames(stream, {
+        resize: this.model?.inputSize ? {
+          width: this.model.inputSize[0],
+          height: this.model.inputSize[1],
+        } : undefined,
+        format: this.model?.inputFormat,
+      });
    }

    try {
-      const start = Date.now();
      let detections = 0;
      for await (const detected
        of await this.objectDetection.generateObjectDetections(await generator(), {
@@ -588,6 +596,9 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
        // this.handleDetectionEvent(detected.detected);
      }
    }
+    catch (e) {
+      this.console.error('video pipeline ended with error', e);
+    }
    finally {
      this.endObjectDetection();
    }
@@ -1190,7 +1201,7 @@ class ObjectDetectorMixin extends MixinDeviceBase<ObjectDetection> implements Mi

    const settings = this.model.settings;

-    const ret = new ObjectDetectionMixin(this.plugin, mixinDevice, mixinDeviceInterfaces, mixinDeviceState, this.mixinProviderNativeId, objectDetection, this.model.name, group, hasMotionType, settings);
+    const ret = new ObjectDetectionMixin(this.plugin, mixinDevice, mixinDeviceInterfaces, mixinDeviceState, this.mixinProviderNativeId, objectDetection, this.model, group, hasMotionType, settings);
    this.currentMixins.add(ret);
    return ret;
  }
--- a/plugins/opencv/.vscode/settings.json
+++ b/plugins/opencv/.vscode/settings.json
@@ -16,6 +16,6 @@

    "scrypted.pythonRemoteRoot": "${config:scrypted.serverRoot}/volume/plugin.zip",
    "python.analysis.extraPaths": [
-        "./node_modules/@scrypted/sdk/scrypted_python"
+        "./node_modules/@scrypted/sdk/types/scrypted_python"
    ]
 }
--- a/plugins/opencv/package-lock.json
+++ b/plugins/opencv/package-lock.json
@@ -13,38 +13,40 @@
      },
      "../../sdk": {
         "name": "@scrypted/sdk",
-         "version": "0.1.17",
+         "version": "0.2.85",
         "dev": true,
         "license": "ISC",
         "dependencies": {
-            "@babel/preset-typescript": "^7.16.7",
+            "@babel/preset-typescript": "^7.18.6",
            "adm-zip": "^0.4.13",
            "axios": "^0.21.4",
-            "babel-loader": "^8.2.3",
+            "babel-loader": "^9.1.0",
            "babel-plugin-const-enum": "^1.1.0",
            "esbuild": "^0.15.9",
            "ncp": "^2.0.0",
            "raw-loader": "^4.0.2",
            "rimraf": "^3.0.2",
            "tmp": "^0.2.1",
-            "webpack": "^5.74.0",
+            "ts-loader": "^9.4.2",
+            "typescript": "^4.9.4",
+            "webpack": "^5.75.0",
            "webpack-bundle-analyzer": "^4.5.0"
         },
         "bin": {
+            "scrypted-changelog": "bin/scrypted-changelog.js",
            "scrypted-debug": "bin/scrypted-debug.js",
            "scrypted-deploy": "bin/scrypted-deploy.js",
            "scrypted-deploy-debug": "bin/scrypted-deploy-debug.js",
            "scrypted-package-json": "bin/scrypted-package-json.js",
-            "scrypted-readme": "bin/scrypted-readme.js",
            "scrypted-setup-project": "bin/scrypted-setup-project.js",
            "scrypted-webpack": "bin/scrypted-webpack.js"
         },
         "devDependencies": {
-            "@types/node": "^16.11.1",
+            "@types/node": "^18.11.18",
            "@types/stringify-object": "^4.0.0",
            "stringify-object": "^3.3.0",
            "ts-node": "^10.4.0",
-            "typedoc": "^0.23.15"
+            "typedoc": "^0.23.21"
         }
      },
      "../sdk": {
@@ -59,12 +61,12 @@
      "@scrypted/sdk": {
         "version": "file:../../sdk",
         "requires": {
-            "@babel/preset-typescript": "^7.16.7",
-            "@types/node": "^16.11.1",
+            "@babel/preset-typescript": "^7.18.6",
+            "@types/node": "^18.11.18",
            "@types/stringify-object": "^4.0.0",
            "adm-zip": "^0.4.13",
            "axios": "^0.21.4",
-            "babel-loader": "^8.2.3",
+            "babel-loader": "^9.1.0",
            "babel-plugin-const-enum": "^1.1.0",
            "esbuild": "^0.15.9",
            "ncp": "^2.0.0",
@@ -72,9 +74,11 @@
            "rimraf": "^3.0.2",
            "stringify-object": "^3.3.0",
            "tmp": "^0.2.1",
+            "ts-loader": "^9.4.2",
            "ts-node": "^10.4.0",
-            "typedoc": "^0.23.15",
-            "webpack": "^5.74.0",
+            "typedoc": "^0.23.21",
+            "typescript": "^4.9.4",
+            "webpack": "^5.75.0",
            "webpack-bundle-analyzer": "^4.5.0"
         }
      }
--- a/plugins/opencv/src/opencv/init.py
+++ b/plugins/opencv/src/opencv/init.py
@@ -3,6 +3,7 @@ from time import sleep
 from detect import DetectionSession, DetectPlugin
 from typing import Any, List, Tuple
 import numpy as np
+import asyncio
 import cv2
 import imutils
 Gst = None
@@ -10,7 +11,7 @@ try:
    from gi.repository import Gst
 except:
    pass
-from scrypted_sdk.types import ObjectDetectionModel, ObjectDetectionResult, ObjectsDetected, Setting
+from scrypted_sdk.types import ObjectDetectionModel, ObjectDetectionResult, ObjectsDetected, Setting, VideoFrame
 from PIL import Image

 class OpenCVDetectionSession(DetectionSession):
@@ -93,6 +94,9 @@ class OpenCVPlugin(DetectPlugin):

    def get_pixel_format(self):
        return self.pixelFormat
+    
+    def get_input_format(self) -> str:
+        return 'gray'

    def parse_settings(self, settings: Any):
        area = defaultArea
@@ -106,7 +110,8 @@ class OpenCVPlugin(DetectPlugin):
            blur = int(settings.get('blur', blur))
        return area, threshold, interval, blur

-    def detect(self, detection_session: OpenCVDetectionSession, frame, settings: Any, src_size, convert_to_src_size) -> ObjectsDetected:
+    def detect(self, detection_session: OpenCVDetectionSession, frame, src_size, convert_to_src_size) -> ObjectsDetected:
+        settings = detection_session.settings
        area, threshold, interval, blur = self.parse_settings(settings)

        # see get_detection_input_size on undocumented size requirements for GRAY8
@@ -119,10 +124,15 @@ class OpenCVPlugin(DetectPlugin):
        detection_session.curFrame = cv2.GaussianBlur(
            gray, (blur, blur), 0, dst=detection_session.curFrame)

+        detections: List[ObjectDetectionResult] = []
+        detection_result: ObjectsDetected = {}
+        detection_result['detections'] = detections
+        detection_result['inputDimensions'] = src_size
+
        if detection_session.previous_frame is None:
            detection_session.previous_frame = detection_session.curFrame
            detection_session.curFrame = None
-            return
+            return detection_result

        detection_session.frameDelta = cv2.absdiff(
            detection_session.previous_frame, detection_session.curFrame, dst=detection_session.frameDelta)
@@ -138,10 +148,6 @@ class OpenCVPlugin(DetectPlugin):
            detection_session.dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        contours = imutils.grab_contours(fcontours)

-        detections: List[ObjectDetectionResult] = []
-        detection_result: ObjectsDetected = {}
-        detection_result['detections'] = detections
-        detection_result['inputDimensions'] = src_size

        for c in contours:
            x, y, w, h = cv2.boundingRect(c)
@@ -163,6 +169,9 @@ class OpenCVPlugin(DetectPlugin):
                detections.append(detection)

        return detection_result
+    
+    def get_input_details(self) -> Tuple[int, int, int]:
+        return (300, 300, 1)

    def get_detection_input_size(self, src_size):
        # The initial implementation of this plugin used BGRA
@@ -197,11 +206,45 @@ class OpenCVPlugin(DetectPlugin):
            detection_session.cap = None
        return super().end_session(detection_session)

-    def run_detection_image(self, detection_session: DetectionSession, image: Image.Image, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
+    async def run_detection_image(self, detection_session: DetectionSession, image: Image.Image, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
        # todo
        raise Exception('can not run motion detection on image')
+    
+    async def run_detection_videoframe(self, videoFrame: VideoFrame, detection_session: OpenCVDetectionSession) -> ObjectsDetected:
+        width = videoFrame.width
+        height = videoFrame.height

-    def run_detection_avframe(self, detection_session: DetectionSession, avframe, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
+        aspectRatio = width / height
+        
+        # dont bother resizing if its already fairly small
+        if width <= 640 and height < 640:
+            scale = 1
+            resize = None
+        elif aspectRatio > 1:
+            scale = height / 300
+            resize = {
+                'height': 300,
+                'width': int(300 * aspectRatio)
+            }
+        else:
+            scale = width / 300
+            resize = {
+                'width': 300,
+                'height': int(300 / aspectRatio)
+            }
+
+        buffer = await videoFrame.toBuffer({
+            'resize': resize,
+        })
+
+        def convert_to_src_size(point, normalize = False):
+            return point[0] * scale, point[1] * scale, True
+        mat = np.ndarray((videoFrame.height, videoFrame.width, self.pixelFormatChannelCount), buffer=buffer, dtype=np.uint8)
+        detections = self.detect(
+            detection_session, mat, (width, height), convert_to_src_size)
+        return detections
+
+    async def run_detection_avframe(self, detection_session: DetectionSession, avframe, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
        if avframe.format.name != 'yuv420p' and avframe.format.name != 'yuvj420p':
            mat = avframe.to_ndarray(format='gray8')
        else:
@@ -209,11 +252,11 @@ class OpenCVPlugin(DetectPlugin):
        detections = self.detect(
            detection_session, mat, settings, src_size, convert_to_src_size)
        if not detections or not len(detections['detections']):
-            self.detection_sleep(settings)
+            await self.detection_sleep(settings)
            return None, None
        return detections, None

-    def run_detection_gstsample(self, detection_session: OpenCVDetectionSession, gst_sample, settings: Any, src_size, convert_to_src_size) -> ObjectsDetected:
+    async def run_detection_gstsample(self, detection_session: OpenCVDetectionSession, gst_sample, settings: Any, src_size, convert_to_src_size) -> ObjectsDetected:
        buf = gst_sample.get_buffer()
        caps = gst_sample.get_caps()
        # can't trust the width value, compute the stride
@@ -236,18 +279,18 @@ class OpenCVPlugin(DetectPlugin):
            buf.unmap(info)

        if not detections or not len(detections['detections']):
-            self.detection_sleep(settings)
+            await self.detection_sleep(settings)
            return None, None
        return detections, None

    def create_detection_session(self):
        return OpenCVDetectionSession()

-    def detection_sleep(self, settings: Any):
+    async def detection_sleep(self, settings: Any):
        area, threshold, interval, blur = self.parse_settings(settings)
        # it is safe to block here because gstreamer creates a queue thread
-        sleep(interval / 1000)
+        await asyncio.sleep(interval / 1000)

-    def detection_event_notified(self, settings: Any):
-        self.detection_sleep(settings)
-        return super().detection_event_notified(settings)
+    async def detection_event_notified(self, settings: Any):
+        await self.detection_sleep(settings)
+        return await super().detection_event_notified(settings)
--- a/plugins/tensorflow-lite/src/detect/init.py
+++ b/plugins/tensorflow-lite/src/detect/init.py
@@ -122,6 +122,9 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
    def get_input_details(self) -> Tuple[int, int, int]:
        pass

+    def get_input_format(self) -> str:
+        pass
+
    def getModelSettings(self, settings: Any = None) -> list[Setting]:
        return []

@@ -131,6 +134,7 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
            'classes': self.getClasses(),
            'triggerClasses': self.getTriggerClasses(),
            'inputSize': self.get_input_details(),
+            'inputFormat': self.get_input_format(),
            'settings': [],
        }

@@ -206,7 +210,7 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
    def run_detection_gstsample(self, detection_session: DetectionSession, gst_sample, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
        pass

-    async def run_detection_videoframe(self, videoFrame: scrypted_sdk.VideoFrame) -> ObjectsDetected:
+    async def run_detection_videoframe(self, videoFrame: scrypted_sdk.VideoFrame, detection_session: DetectionSession) -> ObjectsDetected:
        pass

    async def run_detection_avframe(self, detection_session: DetectionSession, avframe, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
@@ -288,13 +292,17 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
    async def generateObjectDetections(self, videoFrames: Any, session: ObjectDetectionGeneratorSession = None) -> Any:
        try:
            videoFrames = await scrypted_sdk.sdk.connectRPCObject(videoFrames)
+            detection_session = self.create_detection_session()
+            detection_session.plugin = self
+            detection_session.settings = session and session.get('settings')
            async for videoFrame in videoFrames:
-               detected = await self.run_detection_videoframe(videoFrame, session and session.get('settings'))
+               detected = await self.run_detection_videoframe(videoFrame, detection_session)
               yield {
                   '__json_copy_serialize_children': True,
                   'detected': detected,
                   'videoFrame': videoFrame,
               }
+               await self.detection_event_notified(detection_session.settings)
        except:
            raise
        finally:
@@ -456,7 +464,7 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):

        return ret

-    def detection_event_notified(self, settings: Any):
+    async def detection_event_notified(self, settings: Any):
        pass

    async def createMedia(self, data: Any) -> MediaObject:
@@ -527,7 +535,7 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
                        self.invalidateMedia(detection_session, data)

                    # asyncio.run_coroutine_threadsafe(, loop = self.loop).result()
-                    self.detection_event_notified(detection_session.settings)
+                    await self.detection_event_notified(detection_session.settings)

                if not detection_session or duration == None:
                    safe_set_result(detection_session.loop,
--- a/plugins/tensorflow-lite/src/predict/init.py
+++ b/plugins/tensorflow-lite/src/predict/init.py
@@ -272,7 +272,8 @@ class PredictPlugin(DetectPlugin, scrypted_sdk.BufferConverter, scrypted_sdk.Set
    async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss) -> ObjectsDetected:
        pass

-    async def run_detection_videoframe(self, videoFrame: scrypted_sdk.VideoFrame, settings: Any) -> ObjectsDetected:
+    async def run_detection_videoframe(self, videoFrame: scrypted_sdk.VideoFrame, detection_session: PredictSession) -> ObjectsDetected:
+        settings = detection_session.settings
        src_size = videoFrame.width, videoFrame.height
        w, h = self.get_input_size()
        iw, ih = src_size