detect: support motion on new pipeline

This commit is contained in:
Koushik Dutta
2023-03-16 20:37:12 -07:00
parent 9ba22e4058
commit aedb985941
6 changed files with 120 additions and 53 deletions

View File

@@ -1,4 +1,4 @@
import sdk, { ScryptedMimeTypes, Image, VideoFrame, VideoFrameGenerator, Camera, DeviceState, EventListenerRegister, MediaObject, MixinDeviceBase, MixinProvider, MotionSensor, ObjectDetection, ObjectDetectionCallbacks, ObjectDetectionModel, ObjectDetectionResult, ObjectDetectionTypes, ObjectDetector, ObjectsDetected, ScryptedDevice, ScryptedDeviceType, ScryptedInterface, ScryptedNativeId, Setting, Settings, SettingValue, VideoCamera } from '@scrypted/sdk';
import sdk, { ScryptedMimeTypes, Image, VideoFrame, VideoFrameGenerator, Camera, DeviceState, EventListenerRegister, MediaObject, MixinDeviceBase, MixinProvider, MotionSensor, ObjectDetection, ObjectDetectionCallbacks, ObjectDetectionModel, ObjectDetectionResult, ObjectDetectionTypes, ObjectDetector, ObjectsDetected, ScryptedDevice, ScryptedDeviceType, ScryptedInterface, ScryptedNativeId, Setting, Settings, SettingValue, VideoCamera, MediaStreamDestination } from '@scrypted/sdk';
import { StorageSettings } from '@scrypted/sdk/storage-settings';
import crypto from 'crypto';
import cloneDeep from 'lodash/cloneDeep';
@@ -53,13 +53,15 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
newPipeline: {
title: 'Video Pipeline',
description: 'Configure how frames are provided to the video analysis pipeline.',
async onGet() {
onGet: async () => {
const choices = [
'Default',
...getAllDevices().filter(d => d.interfaces.includes(ScryptedInterface.VideoFrameGenerator)).map(d => d.name),
];
if (!this.hasMotionType)
choices.push('Snapshot');
return {
choices: [
'Default',
'Snapshot',
...getAllDevices().filter(d => d.interfaces.includes(ScryptedInterface.VideoFrameGenerator)).map(d => d.name),
],
choices,
}
},
defaultValue: 'Default',
@@ -142,7 +144,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
analyzeStop = 0;
lastDetectionInput = 0;
constructor(public plugin: ObjectDetectionPlugin, mixinDevice: VideoCamera & Camera & MotionSensor & ObjectDetector & Settings, mixinDeviceInterfaces: ScryptedInterface[], mixinDeviceState: { [key: string]: any }, providerNativeId: string, public objectDetection: ObjectDetection & ScryptedDevice, modelName: string, group: string, public hasMotionType: boolean, public settings: Setting[]) {
constructor(public plugin: ObjectDetectionPlugin, mixinDevice: VideoCamera & Camera & MotionSensor & ObjectDetector & Settings, mixinDeviceInterfaces: ScryptedInterface[], mixinDeviceState: { [key: string]: any }, providerNativeId: string, public objectDetection: ObjectDetection & ScryptedDevice, public model: ObjectDetectionModel, group: string, public hasMotionType: boolean, public settings: Setting[]) {
super({
mixinDevice, mixinDeviceState,
mixinProviderNativeId: providerNativeId,
@@ -153,7 +155,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
});
this.cameraDevice = systemManager.getDeviceById<Camera & VideoCamera & MotionSensor & ObjectDetector>(this.id);
this.detectionId = modelName + '-' + this.cameraDevice.id;
this.detectionId = model.name + '-' + this.cameraDevice.id;
this.bindObjectDetection();
this.register();
@@ -171,7 +173,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
if (this.hasMotionType) {
// force a motion detection restart if it quit
if (this.motionSensorSupplementation === BUILTIN_MOTION_SENSOR_REPLACE)
await this.startVideoDetection();
await this.startStreamAnalysis();
return;
}
}, this.storageSettings.values.detectionInterval * 1000);
@@ -224,7 +226,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
return;
if (this.motionSensorSupplementation !== BUILTIN_MOTION_SENSOR_REPLACE)
return;
await this.startVideoDetection();
await this.startStreamAnalysis();
}
endObjectDetection() {
@@ -310,7 +312,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
return;
if (!this.detectorRunning)
this.console.log('built in motion sensor started motion, starting video detection.');
await this.startVideoDetection();
await this.startStreamAnalysis();
return;
}
@@ -491,8 +493,8 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
this.analyzeStop = Date.now() + this.getDetectionDuration();
const newPipeline = this.newPipeline;
let generator : () => Promise<AsyncGenerator<VideoFrame & MediaObject>>;
if (newPipeline === 'Snapshot') {
let generator: () => Promise<AsyncGenerator<VideoFrame & MediaObject>>;
if (newPipeline === 'Snapshot' && !this.hasMotionType) {
const self = this;
generator = async () => (async function* gen() {
try {
@@ -528,20 +530,26 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
})();
}
else {
const destination: MediaStreamDestination = this.hasMotionType ? 'low-resolution' : 'local-recorder';
const videoFrameGenerator = systemManager.getDeviceById<VideoFrameGenerator>(newPipeline);
if (!videoFrameGenerator)
throw new Error('invalid VideoFrameGenerator');
const stream = await this.cameraDevice.getVideoStream({
destination: 'local-recorder',
destination,
// ask rebroadcast to mute audio, not needed.
audio: null,
});
generator = async () => videoFrameGenerator.generateVideoFrames(stream);
generator = async () => videoFrameGenerator.generateVideoFrames(stream, {
resize: this.model?.inputSize ? {
width: this.model.inputSize[0],
height: this.model.inputSize[1],
} : undefined,
format: this.model?.inputFormat,
});
}
try {
const start = Date.now();
let detections = 0;
for await (const detected
of await this.objectDetection.generateObjectDetections(await generator(), {
@@ -588,6 +596,9 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
// this.handleDetectionEvent(detected.detected);
}
}
catch (e) {
this.console.error('video pipeline ended with error', e);
}
finally {
this.endObjectDetection();
}
@@ -1190,7 +1201,7 @@ class ObjectDetectorMixin extends MixinDeviceBase<ObjectDetection> implements Mi
const settings = this.model.settings;
const ret = new ObjectDetectionMixin(this.plugin, mixinDevice, mixinDeviceInterfaces, mixinDeviceState, this.mixinProviderNativeId, objectDetection, this.model.name, group, hasMotionType, settings);
const ret = new ObjectDetectionMixin(this.plugin, mixinDevice, mixinDeviceInterfaces, mixinDeviceState, this.mixinProviderNativeId, objectDetection, this.model, group, hasMotionType, settings);
this.currentMixins.add(ret);
return ret;
}

View File

@@ -16,6 +16,6 @@
"scrypted.pythonRemoteRoot": "${config:scrypted.serverRoot}/volume/plugin.zip",
"python.analysis.extraPaths": [
"./node_modules/@scrypted/sdk/scrypted_python"
"./node_modules/@scrypted/sdk/types/scrypted_python"
]
}

View File

@@ -13,38 +13,40 @@
},
"../../sdk": {
"name": "@scrypted/sdk",
"version": "0.1.17",
"version": "0.2.85",
"dev": true,
"license": "ISC",
"dependencies": {
"@babel/preset-typescript": "^7.16.7",
"@babel/preset-typescript": "^7.18.6",
"adm-zip": "^0.4.13",
"axios": "^0.21.4",
"babel-loader": "^8.2.3",
"babel-loader": "^9.1.0",
"babel-plugin-const-enum": "^1.1.0",
"esbuild": "^0.15.9",
"ncp": "^2.0.0",
"raw-loader": "^4.0.2",
"rimraf": "^3.0.2",
"tmp": "^0.2.1",
"webpack": "^5.74.0",
"ts-loader": "^9.4.2",
"typescript": "^4.9.4",
"webpack": "^5.75.0",
"webpack-bundle-analyzer": "^4.5.0"
},
"bin": {
"scrypted-changelog": "bin/scrypted-changelog.js",
"scrypted-debug": "bin/scrypted-debug.js",
"scrypted-deploy": "bin/scrypted-deploy.js",
"scrypted-deploy-debug": "bin/scrypted-deploy-debug.js",
"scrypted-package-json": "bin/scrypted-package-json.js",
"scrypted-readme": "bin/scrypted-readme.js",
"scrypted-setup-project": "bin/scrypted-setup-project.js",
"scrypted-webpack": "bin/scrypted-webpack.js"
},
"devDependencies": {
"@types/node": "^16.11.1",
"@types/node": "^18.11.18",
"@types/stringify-object": "^4.0.0",
"stringify-object": "^3.3.0",
"ts-node": "^10.4.0",
"typedoc": "^0.23.15"
"typedoc": "^0.23.21"
}
},
"../sdk": {
@@ -59,12 +61,12 @@
"@scrypted/sdk": {
"version": "file:../../sdk",
"requires": {
"@babel/preset-typescript": "^7.16.7",
"@types/node": "^16.11.1",
"@babel/preset-typescript": "^7.18.6",
"@types/node": "^18.11.18",
"@types/stringify-object": "^4.0.0",
"adm-zip": "^0.4.13",
"axios": "^0.21.4",
"babel-loader": "^8.2.3",
"babel-loader": "^9.1.0",
"babel-plugin-const-enum": "^1.1.0",
"esbuild": "^0.15.9",
"ncp": "^2.0.0",
@@ -72,9 +74,11 @@
"rimraf": "^3.0.2",
"stringify-object": "^3.3.0",
"tmp": "^0.2.1",
"ts-loader": "^9.4.2",
"ts-node": "^10.4.0",
"typedoc": "^0.23.15",
"webpack": "^5.74.0",
"typedoc": "^0.23.21",
"typescript": "^4.9.4",
"webpack": "^5.75.0",
"webpack-bundle-analyzer": "^4.5.0"
}
}

View File

@@ -3,6 +3,7 @@ from time import sleep
from detect import DetectionSession, DetectPlugin
from typing import Any, List, Tuple
import numpy as np
import asyncio
import cv2
import imutils
Gst = None
@@ -10,7 +11,7 @@ try:
from gi.repository import Gst
except:
pass
from scrypted_sdk.types import ObjectDetectionModel, ObjectDetectionResult, ObjectsDetected, Setting
from scrypted_sdk.types import ObjectDetectionModel, ObjectDetectionResult, ObjectsDetected, Setting, VideoFrame
from PIL import Image
class OpenCVDetectionSession(DetectionSession):
@@ -93,6 +94,9 @@ class OpenCVPlugin(DetectPlugin):
def get_pixel_format(self):
return self.pixelFormat
def get_input_format(self) -> str:
return 'gray'
def parse_settings(self, settings: Any):
area = defaultArea
@@ -106,7 +110,8 @@ class OpenCVPlugin(DetectPlugin):
blur = int(settings.get('blur', blur))
return area, threshold, interval, blur
def detect(self, detection_session: OpenCVDetectionSession, frame, settings: Any, src_size, convert_to_src_size) -> ObjectsDetected:
def detect(self, detection_session: OpenCVDetectionSession, frame, src_size, convert_to_src_size) -> ObjectsDetected:
settings = detection_session.settings
area, threshold, interval, blur = self.parse_settings(settings)
# see get_detection_input_size on undocumented size requirements for GRAY8
@@ -119,10 +124,15 @@ class OpenCVPlugin(DetectPlugin):
detection_session.curFrame = cv2.GaussianBlur(
gray, (blur, blur), 0, dst=detection_session.curFrame)
detections: List[ObjectDetectionResult] = []
detection_result: ObjectsDetected = {}
detection_result['detections'] = detections
detection_result['inputDimensions'] = src_size
if detection_session.previous_frame is None:
detection_session.previous_frame = detection_session.curFrame
detection_session.curFrame = None
return
return detection_result
detection_session.frameDelta = cv2.absdiff(
detection_session.previous_frame, detection_session.curFrame, dst=detection_session.frameDelta)
@@ -138,10 +148,6 @@ class OpenCVPlugin(DetectPlugin):
detection_session.dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(fcontours)
detections: List[ObjectDetectionResult] = []
detection_result: ObjectsDetected = {}
detection_result['detections'] = detections
detection_result['inputDimensions'] = src_size
for c in contours:
x, y, w, h = cv2.boundingRect(c)
@@ -163,6 +169,9 @@ class OpenCVPlugin(DetectPlugin):
detections.append(detection)
return detection_result
def get_input_details(self) -> Tuple[int, int, int]:
return (300, 300, 1)
def get_detection_input_size(self, src_size):
# The initial implementation of this plugin used BGRA
@@ -197,11 +206,45 @@ class OpenCVPlugin(DetectPlugin):
detection_session.cap = None
return super().end_session(detection_session)
def run_detection_image(self, detection_session: DetectionSession, image: Image.Image, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
async def run_detection_image(self, detection_session: DetectionSession, image: Image.Image, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
# todo
raise Exception('can not run motion detection on image')
async def run_detection_videoframe(self, videoFrame: VideoFrame, detection_session: OpenCVDetectionSession) -> ObjectsDetected:
width = videoFrame.width
height = videoFrame.height
def run_detection_avframe(self, detection_session: DetectionSession, avframe, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
aspectRatio = width / height
# dont bother resizing if its already fairly small
if width <= 640 and height < 640:
scale = 1
resize = None
elif aspectRatio > 1:
scale = height / 300
resize = {
'height': 300,
'width': int(300 * aspectRatio)
}
else:
scale = width / 300
resize = {
'width': 300,
'height': int(300 / aspectRatio)
}
buffer = await videoFrame.toBuffer({
'resize': resize,
})
def convert_to_src_size(point, normalize = False):
return point[0] * scale, point[1] * scale, True
mat = np.ndarray((videoFrame.height, videoFrame.width, self.pixelFormatChannelCount), buffer=buffer, dtype=np.uint8)
detections = self.detect(
detection_session, mat, (width, height), convert_to_src_size)
return detections
async def run_detection_avframe(self, detection_session: DetectionSession, avframe, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
if avframe.format.name != 'yuv420p' and avframe.format.name != 'yuvj420p':
mat = avframe.to_ndarray(format='gray8')
else:
@@ -209,11 +252,11 @@ class OpenCVPlugin(DetectPlugin):
detections = self.detect(
detection_session, mat, settings, src_size, convert_to_src_size)
if not detections or not len(detections['detections']):
self.detection_sleep(settings)
await self.detection_sleep(settings)
return None, None
return detections, None
def run_detection_gstsample(self, detection_session: OpenCVDetectionSession, gst_sample, settings: Any, src_size, convert_to_src_size) -> ObjectsDetected:
async def run_detection_gstsample(self, detection_session: OpenCVDetectionSession, gst_sample, settings: Any, src_size, convert_to_src_size) -> ObjectsDetected:
buf = gst_sample.get_buffer()
caps = gst_sample.get_caps()
# can't trust the width value, compute the stride
@@ -236,18 +279,18 @@ class OpenCVPlugin(DetectPlugin):
buf.unmap(info)
if not detections or not len(detections['detections']):
self.detection_sleep(settings)
await self.detection_sleep(settings)
return None, None
return detections, None
def create_detection_session(self):
return OpenCVDetectionSession()
def detection_sleep(self, settings: Any):
async def detection_sleep(self, settings: Any):
area, threshold, interval, blur = self.parse_settings(settings)
# it is safe to block here because gstreamer creates a queue thread
sleep(interval / 1000)
await asyncio.sleep(interval / 1000)
def detection_event_notified(self, settings: Any):
self.detection_sleep(settings)
return super().detection_event_notified(settings)
async def detection_event_notified(self, settings: Any):
await self.detection_sleep(settings)
return await super().detection_event_notified(settings)

View File

@@ -122,6 +122,9 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
def get_input_details(self) -> Tuple[int, int, int]:
pass
def get_input_format(self) -> str:
pass
def getModelSettings(self, settings: Any = None) -> list[Setting]:
return []
@@ -131,6 +134,7 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
'classes': self.getClasses(),
'triggerClasses': self.getTriggerClasses(),
'inputSize': self.get_input_details(),
'inputFormat': self.get_input_format(),
'settings': [],
}
@@ -206,7 +210,7 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
def run_detection_gstsample(self, detection_session: DetectionSession, gst_sample, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
pass
async def run_detection_videoframe(self, videoFrame: scrypted_sdk.VideoFrame) -> ObjectsDetected:
async def run_detection_videoframe(self, videoFrame: scrypted_sdk.VideoFrame, detection_session: DetectionSession) -> ObjectsDetected:
pass
async def run_detection_avframe(self, detection_session: DetectionSession, avframe, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Any]:
@@ -288,13 +292,17 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
async def generateObjectDetections(self, videoFrames: Any, session: ObjectDetectionGeneratorSession = None) -> Any:
try:
videoFrames = await scrypted_sdk.sdk.connectRPCObject(videoFrames)
detection_session = self.create_detection_session()
detection_session.plugin = self
detection_session.settings = session and session.get('settings')
async for videoFrame in videoFrames:
detected = await self.run_detection_videoframe(videoFrame, session and session.get('settings'))
detected = await self.run_detection_videoframe(videoFrame, detection_session)
yield {
'__json_copy_serialize_children': True,
'detected': detected,
'videoFrame': videoFrame,
}
await self.detection_event_notified(detection_session.settings)
except:
raise
finally:
@@ -456,7 +464,7 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
return ret
def detection_event_notified(self, settings: Any):
async def detection_event_notified(self, settings: Any):
pass
async def createMedia(self, data: Any) -> MediaObject:
@@ -527,7 +535,7 @@ class DetectPlugin(scrypted_sdk.ScryptedDeviceBase, ObjectDetection):
self.invalidateMedia(detection_session, data)
# asyncio.run_coroutine_threadsafe(, loop = self.loop).result()
self.detection_event_notified(detection_session.settings)
await self.detection_event_notified(detection_session.settings)
if not detection_session or duration == None:
safe_set_result(detection_session.loop,

View File

@@ -272,7 +272,8 @@ class PredictPlugin(DetectPlugin, scrypted_sdk.BufferConverter, scrypted_sdk.Set
async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss) -> ObjectsDetected:
pass
async def run_detection_videoframe(self, videoFrame: scrypted_sdk.VideoFrame, settings: Any) -> ObjectsDetected:
async def run_detection_videoframe(self, videoFrame: scrypted_sdk.VideoFrame, detection_session: PredictSession) -> ObjectsDetected:
settings = detection_session.settings
src_size = videoFrame.width, videoFrame.height
w, h = self.get_input_size()
iw, ih = src_size