vision-framework: initial release

This commit is contained in:
Koushik Dutta
2024-04-01 10:20:49 -07:00
parent 97b09442e8
commit 2e72366d41
15 changed files with 405 additions and 13 deletions

View File

@@ -27,7 +27,7 @@ async def to_thread(f):
return await loop.run_in_executor(toThreadExecutor, f)
async def ensureRGBData(data: bytes, size: Tuple[int, int], format: str):
if format != 'rgba':
if format == 'rgb':
return Image.frombuffer('RGB', size, data)
def convert():
@@ -38,6 +38,19 @@ async def ensureRGBData(data: bytes, size: Tuple[int, int], format: str):
rgba.close()
return await to_thread(convert)
async def ensureRGBAData(data: bytes, size: Tuple[int, int], format: str):
if format == 'rgba':
return Image.frombuffer('RGBA', size, data)
# this path should never be possible as all the image sources should be capable of rgba.
def convert():
rgb = Image.frombuffer('RGB', size, data)
try:
return rgb.convert('RGBA')
finally:
rgb.close()
return await to_thread(convert)
def parse_label_contents(contents: str):
lines = contents.splitlines()
ret = {}
@@ -154,23 +167,36 @@ class PredictPlugin(DetectPlugin, scrypted_sdk.BufferConverter):
iw, ih = image.width, image.height
w, h = self.get_input_size()
resize = None
xs = w / iw
ys = h / ih
def cvss(point):
return point[0] / xs, point[1] / ys
if w is None or h is None:
resize = None
w = image.width
h = image.height
def cvss(point):
return point
else:
resize = None
xs = w / iw
ys = h / ih
def cvss(point):
return point[0] / xs, point[1] / ys
if iw != w or ih != h:
resize = {
'width': w,
'height': h,
}
if iw != w or ih != h:
resize = {
'width': w,
'height': h,
}
format = image.format or self.get_input_format()
b = await image.toBuffer({
'resize': resize,
'format': image.format or 'rgb',
'format': format,
})
data = await ensureRGBData(b, (w, h), image.format)
if self.get_input_format() == 'rgb':
data = await ensureRGBData(b, (w, h), format)
elif self.get_input_format() == 'rgba':
data = await ensureRGBAData(b, (w, h), format)
try:
ret = await self.safe_detect_once(data, settings, (iw, ih), cvss)
return ret

6
plugins/vision-framework/.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
.DS_Store
out/
node_modules/
dist/
.venv
all_models*

View File

@@ -0,0 +1,12 @@
.DS_Store
out/
node_modules/
*.map
fs
src
.vscode
dist/*.js
dist/*.txt
__pycache__
all_models
.venv

View File

@@ -0,0 +1,30 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Scrypted Debugger",
"type": "python",
"request": "attach",
"connect": {
"host": "${config:scrypted.debugHost}",
"port": 10081
},
"justMyCode": false,
"preLaunchTask": "scrypted: deploy+debug",
"pathMappings": [
{
"localRoot": "/Volumes/Dev/scrypted/server/python/",
"remoteRoot": "/Volumes/Dev/scrypted/server/python/",
},
{
"localRoot": "${workspaceFolder}/src",
"remoteRoot": "${config:scrypted.pythonRemoteRoot}"
},
]
}
]
}

View File

@@ -0,0 +1,19 @@
{
// docker installation
// "scrypted.debugHost": "koushik-thin",
// "scrypted.serverRoot": "/server",
// pi local installation
// "scrypted.debugHost": "192.168.2.119",
// "scrypted.serverRoot": "/home/pi/.scrypted",
// local checkout
"scrypted.debugHost": "127.0.0.1",
"scrypted.serverRoot": "/Users/koush/.scrypted",
"scrypted.pythonRemoteRoot": "${config:scrypted.serverRoot}/volume/plugin.zip",
"python.analysis.extraPaths": [
"./node_modules/@scrypted/sdk/types/scrypted_python"
]
}

View File

@@ -0,0 +1,20 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "scrypted: deploy+debug",
"type": "shell",
"presentation": {
"echo": true,
"reveal": "silent",
"focus": false,
"panel": "shared",
"showReuseMessage": true,
"clear": false
},
"command": "npm run scrypted-vscode-launch ${config:scrypted.debugHost}",
},
]
}

View File

@@ -0,0 +1,6 @@
# Vision Framework Object Detection for Scrypted
This plugin adds object detection capabilities to any camera in Scrypted. This plugin requires MacOS Vision Framework. Vision Framework utilizes the CPU, GPU, and Neural Cores (Apple Silicon).
The Vision Framework Plugin should only be used if you are a Scrypted NVR user. It will provide no
benefits to HomeKit, which does its own detection processing.

View File

@@ -0,0 +1,86 @@
{
"name": "@scrypted/coreml",
"version": "0.0.2",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@scrypted/coreml",
"version": "0.0.2",
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
}
},
"../../sdk": {
"name": "@scrypted/sdk",
"version": "0.2.101",
"dev": true,
"license": "ISC",
"dependencies": {
"@babel/preset-typescript": "^7.18.6",
"adm-zip": "^0.4.13",
"axios": "^0.21.4",
"babel-loader": "^9.1.0",
"babel-plugin-const-enum": "^1.1.0",
"esbuild": "^0.15.9",
"ncp": "^2.0.0",
"raw-loader": "^4.0.2",
"rimraf": "^3.0.2",
"tmp": "^0.2.1",
"ts-loader": "^9.4.2",
"typescript": "^4.9.4",
"webpack": "^5.75.0",
"webpack-bundle-analyzer": "^4.5.0"
},
"bin": {
"scrypted-changelog": "bin/scrypted-changelog.js",
"scrypted-debug": "bin/scrypted-debug.js",
"scrypted-deploy": "bin/scrypted-deploy.js",
"scrypted-deploy-debug": "bin/scrypted-deploy-debug.js",
"scrypted-package-json": "bin/scrypted-package-json.js",
"scrypted-setup-project": "bin/scrypted-setup-project.js",
"scrypted-webpack": "bin/scrypted-webpack.js"
},
"devDependencies": {
"@types/node": "^18.11.18",
"@types/stringify-object": "^4.0.0",
"stringify-object": "^3.3.0",
"ts-node": "^10.4.0",
"typedoc": "^0.23.21"
}
},
"../sdk": {
"extraneous": true
},
"node_modules/@scrypted/sdk": {
"resolved": "../../sdk",
"link": true
}
},
"dependencies": {
"@scrypted/sdk": {
"version": "file:../../sdk",
"requires": {
"@babel/preset-typescript": "^7.18.6",
"@types/node": "^18.11.18",
"@types/stringify-object": "^4.0.0",
"adm-zip": "^0.4.13",
"axios": "^0.21.4",
"babel-loader": "^9.1.0",
"babel-plugin-const-enum": "^1.1.0",
"esbuild": "^0.15.9",
"ncp": "^2.0.0",
"raw-loader": "^4.0.2",
"rimraf": "^3.0.2",
"stringify-object": "^3.3.0",
"tmp": "^0.2.1",
"ts-loader": "^9.4.2",
"ts-node": "^10.4.0",
"typedoc": "^0.23.21",
"typescript": "^4.9.4",
"webpack": "^5.75.0",
"webpack-bundle-analyzer": "^4.5.0"
}
}
}
}

View File

@@ -0,0 +1,44 @@
{
"name": "@scrypted/vision-framework",
"description": "Scrypted Vision Framework Object Detection",
"keywords": [
"scrypted",
"plugin",
"vision framework",
"neural",
"object",
"detect",
"detection",
"people",
"person"
],
"scripts": {
"scrypted-setup-project": "scrypted-setup-project",
"prescrypted-setup-project": "scrypted-package-json",
"build": "scrypted-webpack",
"prepublishOnly": "NODE_ENV=production scrypted-webpack",
"prescrypted-vscode-launch": "scrypted-webpack",
"scrypted-vscode-launch": "scrypted-deploy-debug",
"scrypted-deploy-debug": "scrypted-deploy-debug",
"scrypted-debug": "scrypted-debug",
"scrypted-deploy": "scrypted-deploy",
"scrypted-readme": "scrypted-readme",
"scrypted-package-json": "scrypted-package-json"
},
"scrypted": {
"name": "Vision Framework Object Detection",
"pluginDependencies": [
"@scrypted/objectdetector"
],
"runtime": "python",
"type": "API",
"interfaces": [
"ObjectDetection",
"ObjectDetectionPreview"
]
},
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
},
"version": "0.0.2"
}

View File

@@ -0,0 +1 @@
../../tensorflow-lite/src/detect

View File

@@ -0,0 +1,4 @@
from vision import VisionPlugin
def create_scrypted_plugin():
return VisionPlugin()

View File

@@ -0,0 +1 @@
../../tensorflow-lite/src/predict

View File

@@ -0,0 +1,2 @@
pyobjc
Pillow>=5.4.1

View File

@@ -0,0 +1,122 @@
from __future__ import annotations
import asyncio
import concurrent.futures
import os
from typing import Any, Tuple
import scrypted_sdk
from PIL import Image
from scrypted_sdk import Setting, SettingValue
import Vision
import Quartz
from Foundation import NSData, NSMakeSize
from predict import Prediction, PredictPlugin, from_bounding_box
predictExecutor = concurrent.futures.ThreadPoolExecutor(8, "Vision-Predict")
class VisionPlugin(PredictPlugin, scrypted_sdk.BufferConverter, scrypted_sdk.Settings):
def __init__(self, nativeId: str | None = None):
super().__init__(nativeId=nativeId)
self.inputheight = None
self.inputwidth = None
self.labels = {
0: "face",
}
self.loop = asyncio.get_event_loop()
self.minThreshold = 0.2
async def getSettings(self) -> list[Setting]:
pass
async def putSetting(self, key: str, value: SettingValue):
self.storage.setItem(key, value)
await self.onDeviceEvent(scrypted_sdk.ScryptedInterface.Settings.value, None)
await scrypted_sdk.deviceManager.requestRestart()
# width, height, channels
def get_input_details(self) -> Tuple[int, int, int]:
return (self.inputwidth, self.inputheight, 4)
def get_input_size(self) -> Tuple[float, float]:
return (self.inputwidth, self.inputheight)
def get_input_format(self) -> str:
return "rgba"
def predictVision(self, input: Image.Image) -> asyncio.Future[list[Prediction]]:
buffer = input.tobytes()
myData = NSData.alloc().initWithBytes_length_(buffer, len(buffer))
input_image = (
Quartz.CIImage.imageWithBitmapData_bytesPerRow_size_format_options_(
myData,
4 * input.width,
NSMakeSize(input.width, input.height),
Quartz.kCIFormatRGBA8,
None,
)
)
request_handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(
input_image, None
)
loop = self.loop
future = loop.create_future()
def detect_face_handler(request, error):
observations = request.results()
if error:
loop.call_soon_threadsafe(future.set_exception, Exception())
else:
loop.call_soon_threadsafe(future.set_result, observations)
request = (
Vision.VNDetectFaceRectanglesRequest.alloc().initWithCompletionHandler_(
detect_face_handler
)
)
error = request_handler.performRequests_error_([request], None)
return future
async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss):
if asyncio.get_event_loop() is self.loop:
future = await asyncio.get_event_loop().run_in_executor(
predictExecutor,
lambda: self.predictVision(input),
)
else:
future = await self.predictVision(input)
observations = await future
objs = []
for o in observations:
confidence = o.confidence()
bb = o.boundingBox()
origin = bb.origin
size = bb.size
# print(confidence, origin.x, origin.y, size.width, size.height)
prediction = Prediction(
0,
confidence,
from_bounding_box(
(
origin.x * input.width,
(1 - origin.y - size.height) * input.height,
size.width * input.width,
size.height * input.height,
)
),
)
objs.append(prediction)
ret = self.create_detection_result(objs, src_size, cvss)
return ret

View File

@@ -0,0 +1,13 @@
{
"compilerOptions": {
"module": "commonjs",
"target": "ES2021",
"resolveJsonModule": true,
"moduleResolution": "Node16",
"esModuleInterop": true,
"sourceMap": true
},
"include": [
"src/**/*"
]
}