tensorflow-lite/opencv/videoanalysis: refactor to support detection snapshots

This commit is contained in:
Koushik Dutta
2022-09-27 20:34:52 -07:00
parent a13991183d
commit 758adf8022
7 changed files with 185 additions and 96 deletions

View File

@@ -1,3 +1,3 @@
{
"scrypted.debugHost": "raspberrypi",
"scrypted.debugHost": "127.0.0.1",
}

View File

@@ -5,7 +5,7 @@ import { alertRecommendedPlugins } from '@scrypted/common/src/alert-recommended-
import { DenoisedDetectionEntry, DenoisedDetectionState, denoiseDetections } from './denoise';
import { AutoenableMixinProvider } from "../../../common/src/autoenable-mixin-provider"
import { safeParseJson } from './util';
import fs from 'fs';
import crypto from 'crypto';
const polygonOverlap = require('polygon-overlap');
@@ -150,7 +150,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
settings: await this.getCurrentSettings(),
});
this.objectsDetected(detections, true);
this.reportObjectDetections(detections, undefined);
this.reportObjectDetections(detections);
}
bindObjectDetection() {
@@ -170,7 +170,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
if (eventData?.detectionId !== this.detectionId)
return;
this.objectsDetected(eventData);
this.reportObjectDetections(eventData, undefined);
this.reportObjectDetections(eventData);
this.running = eventData.running;
});
@@ -194,7 +194,8 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
settings: await this.getCurrentSettings(),
});
this.objectsDetected(detections, true);
this.reportObjectDetections(detections, eventData.detectionId);
this.setDetection(detections, mo);
this.reportObjectDetections(detections);
});
}
}
@@ -228,7 +229,9 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
this.running = detection.running;
const newOrBetterDetection = this.objectsDetected(detection);
this.reportObjectDetections(detection, newOrBetterDetection ? mediaObject : undefined);
if (newOrBetterDetection)
this.setDetection(detection, mediaObject);
this.reportObjectDetections(detection);
// if (newOrBetterDetection) {
// mediaManager.convertMediaObjectToBuffer(mediaObject, 'image/jpeg')
// .then(jpeg => {
@@ -302,10 +305,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
return this.hasMotionType ? this.detectionInterval * 1000 * 5 : this.detectionDuration * 1000;
}
reportObjectDetections(detection: ObjectsDetected, detectionInput?: MediaObject) {
if (detectionInput)
this.setDetection(detection.detectionId, detectionInput);
reportObjectDetections(detection: ObjectsDetected) {
// determine zones of the objects, if configured.
if (detection.detections && Object.keys(this.zones).length) {
for (const o of detection.detections) {
@@ -441,7 +441,11 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
return newOrBetterDetection;
}
setDetection(detectionId: string, detectionInput: MediaObject) {
setDetection(detection: ObjectsDetected, detectionInput: MediaObject) {
if (!detection.detectionId)
detection.detectionId = crypto.randomBytes(4).toString('hex');
const { detectionId } = detection;
this.detections.set(detectionId, detectionInput);
setTimeout(() => {
this.detections.delete(detectionId);
@@ -465,7 +469,7 @@ class ObjectDetectionMixin extends SettingsMixinDeviceBase<VideoCamera & Camera
async getDetectionInput(detectionId: any): Promise<MediaObject> {
const detection = this.detections.get(detectionId);
if (detection)
return;
return detection;
if (this.mixinDeviceInterfaces.includes(ScryptedInterface.ObjectDetector))
return this.mixinDevice.getDetectionInput(detectionId);
throw new Error('Detection not found. It may have expired.');

View File

@@ -7,14 +7,13 @@
"": {
"name": "@scrypted/opencv",
"version": "0.0.46",
"hasInstallScript": true,
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
}
},
"../../sdk": {
"name": "@scrypted/sdk",
"version": "0.0.199",
"version": "0.1.17",
"dev": true,
"license": "ISC",
"dependencies": {
@@ -23,12 +22,13 @@
"axios": "^0.21.4",
"babel-loader": "^8.2.3",
"babel-plugin-const-enum": "^1.1.0",
"esbuild": "^0.13.8",
"esbuild": "^0.15.9",
"ncp": "^2.0.0",
"raw-loader": "^4.0.2",
"rimraf": "^3.0.2",
"tmp": "^0.2.1",
"webpack": "^5.59.0"
"webpack": "^5.74.0",
"webpack-bundle-analyzer": "^4.5.0"
},
"bin": {
"scrypted-debug": "bin/scrypted-debug.js",
@@ -44,9 +44,7 @@
"@types/stringify-object": "^4.0.0",
"stringify-object": "^3.3.0",
"ts-node": "^10.4.0",
"typedoc": "^0.22.8",
"typescript-json-schema": "^0.50.1",
"webpack-bundle-analyzer": "^4.5.0"
"typedoc": "^0.23.15"
}
},
"../sdk": {
@@ -68,16 +66,15 @@
"axios": "^0.21.4",
"babel-loader": "^8.2.3",
"babel-plugin-const-enum": "^1.1.0",
"esbuild": "^0.13.8",
"esbuild": "^0.15.9",
"ncp": "^2.0.0",
"raw-loader": "^4.0.2",
"rimraf": "^3.0.2",
"stringify-object": "^3.3.0",
"tmp": "^0.2.1",
"ts-node": "^10.4.0",
"typedoc": "^0.22.8",
"typescript-json-schema": "^0.50.1",
"webpack": "^5.59.0",
"typedoc": "^0.23.15",
"webpack": "^5.74.0",
"webpack-bundle-analyzer": "^4.5.0"
}
}

View File

@@ -207,7 +207,7 @@ class OpenCVPlugin(DetectPlugin):
width = caps.get_structure(0).get_value('width')
result, info = buf.map(Gst.MapFlags.READ)
if not result:
return
return None, None
try:
mat = np.ndarray(
(height,
@@ -223,8 +223,8 @@ class OpenCVPlugin(DetectPlugin):
if not detections or not len(detections['detections']):
self.detection_sleep(settings)
return None
return detections
return None, None
return detections, None
def create_detection_session(self):
return OpenCVDetectionSession()

View File

@@ -1,16 +1,16 @@
{
// docker installation
"scrypted.debugHost": "raspberrypi",
"scrypted.serverRoot": "/server",
// "scrypted.debugHost": "raspberrypi",
// "scrypted.serverRoot": "/server",
// pi local installation
// "scrypted.debugHost": "192.168.2.119",
// "scrypted.serverRoot": "/home/pi/.scrypted",
// local checkout
// "scrypted.debugHost": "127.0.0.1",
// "scrypted.serverRoot": "/Users/koush/.scrypted",
"scrypted.debugHost": "127.0.0.1",
"scrypted.serverRoot": "/Users/koush/.scrypted",
"scrypted.pythonRemoteRoot": "${config:scrypted.serverRoot}/volume/plugin.zip",
"python.analysis.extraPaths": [

View File

@@ -66,8 +66,9 @@ class GstPipelineBase:
self.watchId = None
self.gst = None
class GstPipeline(GstPipelineBase):
def __init__(self, loop: AbstractEventLoop, finished: Future, appsink_name: str, user_callback, crop = False):
def __init__(self, loop: AbstractEventLoop, finished: Future, appsink_name: str, user_callback, crop=False):
super().__init__(loop, finished)
self.appsink_name = appsink_name
self.user_callback = user_callback
@@ -115,13 +116,15 @@ class GstPipeline(GstPipelineBase):
def get_src_size(self):
if not self.src_size:
videoconvert = self.gst.get_by_name('videoconvert')
structure = videoconvert.srcpads[0].get_current_caps().get_structure(0)
structure = videoconvert.srcpads[0].get_current_caps(
).get_structure(0)
_, w = structure.get_int('width')
_, h = structure.get_int('height')
self.src_size = (w, h)
videoscale = self.gst.get_by_name('videoscale')
structure = videoscale.srcpads[0].get_current_caps().get_structure(0)
structure = videoscale.srcpads[0].get_current_caps(
).get_structure(0)
_, w = structure.get_int('width')
_, h = structure.get_int('height')
self.dst_size = (w, h)
@@ -134,7 +137,8 @@ class GstPipeline(GstPipelineBase):
# the dimension with the higher scale value got cropped or boxed.
# use the other dimension to figure out the crop/box amount.
scales = (self.dst_size[0] / self.src_size[0], self.dst_size[1] / self.src_size[1])
scales = (self.dst_size[0] / self.src_size[0],
self.dst_size[1] / self.src_size[1])
if self.crop:
scale = max(scales[0], scales[1])
else:
@@ -148,10 +152,10 @@ class GstPipeline(GstPipelineBase):
py = math.ceil((self.dst_size[1] - dy) / 2)
self.pad_size = (px, py)
return self.src_size
def convert_to_src_size(self, point, normalize = False):
def convert_to_src_size(self, point, normalize=False):
valid = True
px, py = self.pad_size
x, y = point
@@ -189,49 +193,61 @@ class GstPipeline(GstPipelineBase):
break
gstsample = self.gstsample
self.gstsample = None
self.user_callback(gstsample, self.get_src_size(), lambda p, normalize=False: self.convert_to_src_size(p, normalize))
self.user_callback(gstsample, self.get_src_size(
), lambda p, normalize=False: self.convert_to_src_size(p, normalize))
def get_dev_board_model():
try:
model = open('/sys/firmware/devicetree/base/model').read().lower()
if 'mx8mq' in model:
return 'mx8mq'
if 'mt8167' in model:
return 'mt8167'
except: pass
return None
try:
model = open('/sys/firmware/devicetree/base/model').read().lower()
if 'mx8mq' in model:
return 'mx8mq'
if 'mt8167' in model:
return 'mt8167'
except:
pass
return None
def create_pipeline_sink(
appsink_name,
appsink_size,
pixel_format,
crop = False):
SINK_ELEMENT = 'appsink name={appsink_name} emit-signals=true max-buffers=0 drop=true sync=false'.format(appsink_name=appsink_name)
appsink_name,
appsink_size,
pixel_format,
crop=False):
SINK_ELEMENT = 'appsink name={appsink_name} emit-signals=true max-buffers=0 drop=true sync=false'.format(
appsink_name=appsink_name)
(width, height)= appsink_size
(width, height) = appsink_size
SINK_CAPS = 'video/x-raw,format={pixel_format},width={width},height={height},pixel-aspect-ratio=1/1'
sink_caps = SINK_CAPS.format(width=width, height=height, pixel_format=pixel_format)
SINK_CAPS = 'video/x-raw,format={pixel_format}'
if width and height:
SINK_CAPS += ',width={width},height={height},pixel-aspect-ratio=1/1'
sink_caps = SINK_CAPS.format(
width=width, height=height, pixel_format=pixel_format)
pipeline = " {sink_caps} ! {sink_element}".format(
sink_caps=sink_caps,
sink_element=SINK_ELEMENT)
return pipeline
def create_pipeline(
appsink_name,
appsink_size,
video_input,
pixel_format,
crop = False,
parse_only = False):
appsink_name,
appsink_size,
video_input,
pixel_format,
crop=False,
parse_only=False):
if parse_only:
sink = 'appsink name={appsink_name} emit-signals=true sync=false'.format(appsink_name=appsink_name)
sink = 'appsink name={appsink_name} emit-signals=true sync=false'.format(
appsink_name=appsink_name)
PIPELINE = """ {video_input}
! {sink}
"""
else:
sink = create_pipeline_sink(appsink_name, appsink_size, pixel_format, crop = crop)
sink = create_pipeline_sink(
appsink_name, appsink_size, pixel_format, crop=crop)
if crop:
PIPELINE = """ {video_input} ! videoconvert name=videoconvert ! aspectratiocrop aspect-ratio=1/1 ! videoscale name=videoscale ! queue leaky=downstream max-size-buffers=0
! {sink}
@@ -240,19 +256,21 @@ def create_pipeline(
PIPELINE = """ {video_input} ! queue leaky=downstream max-size-buffers=0 ! videoconvert name=videoconvert ! videoscale name=videoscale
! {sink}
"""
pipeline = PIPELINE.format(video_input = video_input, sink = sink)
pipeline = PIPELINE.format(video_input=video_input, sink=sink)
print('Gstreamer pipeline:\n', pipeline)
return pipeline
def run_pipeline(loop, finished,
user_callback,
appsink_name,
appsink_size,
video_input,
pixel_format,
crop = False,
parse_only = False):
gst = GstPipeline(loop, finished, appsink_name, user_callback, crop = crop)
pipeline = create_pipeline(appsink_name, appsink_size, video_input, pixel_format, crop = crop, parse_only = parse_only)
crop=False,
parse_only=False):
gst = GstPipeline(loop, finished, appsink_name, user_callback, crop=crop)
pipeline = create_pipeline(
appsink_name, appsink_size, video_input, pixel_format, crop=crop, parse_only=parse_only)
gst.parse_launch(pipeline)
return gst

View File

@@ -23,6 +23,7 @@ import scrypted_sdk
from typing import Any, List, Tuple
from gi.repository import Gst
import asyncio
import numpy
from detect import DetectionSession, DetectPlugin
@@ -50,13 +51,14 @@ def parse_label_contents(contents: str):
defaultThreshold = .4
defaultSecondThreshold = .7
class RawImage:
jpeg: scrypted_sdk.MediaObject
jpegMediaObject: scrypted_sdk.MediaObject
def __init__(self, image: Image.Image):
self.image = image
self.jpeg = None
self.jpegMediaObject = None
MIME_TYPE = 'x-scrypted-tensorflow-lite/x-raw-image'
@@ -67,7 +69,7 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter):
self.fromMimeType = MIME_TYPE
self.toMimeType = scrypted_sdk.ScryptedMimeTypes.MediaObject.value
self.crop = True
self.crop = False
labels_contents = scrypted_sdk.zip.open(
'fs/coco_labels.txt').read().decode('utf8')
@@ -117,10 +119,10 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter):
detection_session.image = image
else:
image.close()
data.jpeg = None
data.jpegMediaObject = None
async def convert(self, data: RawImage, fromMimeType: str, toMimeType: str, options: scrypted_sdk.BufferConvertorOptions = None) -> Any:
mo = data.jpeg
mo = data.jpegMediaObject
if not mo:
image = data.image
if not image:
@@ -130,8 +132,7 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter):
image.save(bio, format='JPEG')
jpegBytes = bio.getvalue()
mo = await scrypted_sdk.mediaManager.createMediaObject(jpegBytes, 'image/jpeg')
data.jpeg = jpegBytes
data.image = None
data.jpegMediaObject = mo
return mo
def requestRestart(self):
@@ -155,6 +156,14 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter):
'value': defaultThreshold,
'placeholder': defaultThreshold,
}
secondConfidence: Setting = {
'title': 'Second Pass Confidence',
'description': 'Scale, crop, and reanalyze the results from the initial detection pass to get more accurate results. This will exponentially increase complexity, so using an allow list is recommended',
'key': 'second_score_threshold',
'type': 'number',
'value': defaultSecondThreshold,
'placeholder': defaultSecondThreshold,
}
decoderSetting: Setting = {
'title': "Decoder",
'description': "The gstreamer element used to decode the stream",
@@ -174,7 +183,9 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter):
'choices': list(self.labels.values()),
'multiple': True,
'key': 'allowList',
'value': [],
'value': [
'person',
],
}
coral: Setting = {
'title': 'Detected Edge TPU',
@@ -184,10 +195,10 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter):
'key': 'coral',
}
d['settings'] = [coral, confidence, decoderSetting, allowList]
d['settings'] = [coral, confidence, secondConfidence, decoderSetting, allowList]
return d
def create_detection_result(self, objs, size, allowList, convert_to_src_size=None):
def create_detection_result(self, objs, size, allowList, convert_to_src_size=None) -> ObjectsDetected:
detections: List[ObjectDetectionResult] = []
detection_result: ObjectsDetected = {}
detection_result['detections'] = detections
@@ -232,25 +243,93 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter):
stream = io.BytesIO(image_bytes)
image = Image.open(stream)
score_threshold = self.parse_settings(settings)
return self.run_detection_image(self, settings, image.size)
def get_detection_input_size(self, src_size):
return (None, None)
with self.mutex:
_, scale = common.set_resized_input(
self.interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS))
return input_size(self.interpreter)
def run_detection_image(self, image: Image.Image, settings: Any, src_size, convert_to_src_size: Any = None, second_pass_crop: Tuple[float, float, float, float] = None):
score_threshold = defaultThreshold
second_score_threshold = None
if settings:
score_threshold = float(settings.get(
'score_threshold', score_threshold) or score_threshold)
check = settings.get(
'second_score_threshold', None)
if check:
second_score_threshold = float(check)
if second_pass_crop:
score_threshold = second_score_threshold
(w, h) = input_size(self.interpreter)
if not second_pass_crop:
(iw, ih) = image.size
ws = w / iw
hs = h / ih
s = max(ws, hs)
scaled = image.resize((round(s * iw), round(s * ih)), Image.ANTIALIAS)
ow = round((scaled.width - w) / 2)
oh = round((scaled.height - h) / 2)
input = scaled.crop((ow, oh, ow + w, oh + h))
def cvss(point, normalize=False):
converted = convert_to_src_size(point, normalize)
return ((converted[0] + ow) / s, (converted[1] + oh) / s, converted[2])
else:
(l, t, r, b) = second_pass_crop
cropped = image.crop(second_pass_crop)
(cw, ch) = cropped.size
input = cropped.resize((w, h), Image.ANTIALIAS)
def cvss(point, normalize=False):
converted = convert_to_src_size(point, normalize)
return ((converted[0] / w) * cw + l, (converted[1] / h) * ch + t, converted[2])
with self.mutex:
common.set_input(
self.interpreter, input)
scale = (1, 1)
# _, scale = common.set_resized_input(
# self.interpreter, cropped.size, lambda size: cropped.resize(size, Image.ANTIALIAS))
self.interpreter.invoke()
objs = detect.get_objects(
self.interpreter, score_threshold=score_threshold, image_scale=scale)
allowList = settings and settings.get('allowList', None)
allowList = settings.get('allowList', None)
ret = self.create_detection_result(objs, src_size, allowList, cvss)
return self.create_detection_result(objs, image.size, allowList)
if second_pass_crop or not second_score_threshold or not len(ret['detections']):
return ret, RawImage(image)
secondPassDetections: List[ObjectDetectionResult] = []
detections = ret['detections']
ret['detections'] = []
for detection in detections:
if detection['score'] >= second_score_threshold:
ret['detections'].append(detection)
continue
(x, y, w, h) = detection['boundingBox']
cx = x + w / 2
cy = y + h / 2
d = round(max(w, h) * 1.5)
x = round(cx - d / 2)
y = round(cy - d / 2)
x = max(0, x)
y = max(0, y)
x2 = x + d
y2 = y + d
def get_detection_input_size(self, src_size):
with self.mutex:
return input_size(self.interpreter)
secondPassResult, _ = self.run_detection_image(image, settings, src_size, convert_to_src_size, (x, y, x2, y2))
ret['detections'].extend(secondPassResult['detections'])
return ret, RawImage(image)
def run_detection_gstsample(self, detection_session: TensorFlowLiteSession, gstsample, settings: Any, src_size, convert_to_src_size) -> Tuple[ObjectsDetected, Image.Image]:
score_threshold = self.parse_settings(settings)
# todo reenable this if detection images aren't needed.
if False and loaded_py_coral:
with self.mutex:
gst_buffer = gstsample.get_buffer()
@@ -280,16 +359,7 @@ class TensorFlowLitePlugin(DetectPlugin, scrypted_sdk.BufferConverter):
finally:
gst_buffer.unmap(info)
with self.mutex:
_, scale = common.set_resized_input(
self.interpreter, image.size, lambda size: image.resize(size, Image.ANTIALIAS))
self.interpreter.invoke()
objs = detect.get_objects(
self.interpreter, score_threshold=score_threshold, image_scale=scale)
allowList = settings.get('allowList', None)
return self.create_detection_result(objs, src_size, allowList, convert_to_src_size), RawImage(image)
return self.run_detection_image(image, settings, src_size, convert_to_src_size)
def create_detection_session(self):
return TensorFlowLiteSession()