diff --git a/plugins/tensorflow-lite/src/predict/__init__.py b/plugins/tensorflow-lite/src/predict/__init__.py index bfc4f6963..f2af2172c 100644 --- a/plugins/tensorflow-lite/src/predict/__init__.py +++ b/plugins/tensorflow-lite/src/predict/__init__.py @@ -27,7 +27,7 @@ async def to_thread(f): return await loop.run_in_executor(toThreadExecutor, f) async def ensureRGBData(data: bytes, size: Tuple[int, int], format: str): - if format != 'rgba': + if format == 'rgb': return Image.frombuffer('RGB', size, data) def convert(): @@ -38,6 +38,19 @@ async def ensureRGBData(data: bytes, size: Tuple[int, int], format: str): rgba.close() return await to_thread(convert) +async def ensureRGBAData(data: bytes, size: Tuple[int, int], format: str): + if format == 'rgba': + return Image.frombuffer('RGBA', size, data) + + # this path should never be possible as all the image sources should be capable of rgba. + def convert(): + rgb = Image.frombuffer('RGB', size, data) + try: + return rgb.convert('RGBA') + finally: + rgb.close() + return await to_thread(convert) + def parse_label_contents(contents: str): lines = contents.splitlines() ret = {} @@ -154,23 +167,36 @@ class PredictPlugin(DetectPlugin, scrypted_sdk.BufferConverter): iw, ih = image.width, image.height w, h = self.get_input_size() - resize = None - xs = w / iw - ys = h / ih - def cvss(point): - return point[0] / xs, point[1] / ys + if w is None or h is None: + resize = None + w = image.width + h = image.height + def cvss(point): + return point + else: + resize = None + xs = w / iw + ys = h / ih + def cvss(point): + return point[0] / xs, point[1] / ys - if iw != w or ih != h: - resize = { - 'width': w, - 'height': h, - } + if iw != w or ih != h: + resize = { + 'width': w, + 'height': h, + } + format = image.format or self.get_input_format() b = await image.toBuffer({ 'resize': resize, - 'format': image.format or 'rgb', + 'format': format, }) - data = await ensureRGBData(b, (w, h), image.format) + + if self.get_input_format() == 'rgb': + data = await ensureRGBData(b, (w, h), format) + elif self.get_input_format() == 'rgba': + data = await ensureRGBAData(b, (w, h), format) + try: ret = await self.safe_detect_once(data, settings, (iw, ih), cvss) return ret diff --git a/plugins/vision-framework/.gitignore b/plugins/vision-framework/.gitignore new file mode 100644 index 000000000..a3e781b00 --- /dev/null +++ b/plugins/vision-framework/.gitignore @@ -0,0 +1,6 @@ +.DS_Store +out/ +node_modules/ +dist/ +.venv +all_models* diff --git a/plugins/vision-framework/.npmignore b/plugins/vision-framework/.npmignore new file mode 100644 index 000000000..3ef013488 --- /dev/null +++ b/plugins/vision-framework/.npmignore @@ -0,0 +1,12 @@ +.DS_Store +out/ +node_modules/ +*.map +fs +src +.vscode +dist/*.js +dist/*.txt +__pycache__ +all_models +.venv diff --git a/plugins/vision-framework/.vscode/launch.json b/plugins/vision-framework/.vscode/launch.json new file mode 100644 index 000000000..ee46b594f --- /dev/null +++ b/plugins/vision-framework/.vscode/launch.json @@ -0,0 +1,30 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Scrypted Debugger", + "type": "python", + "request": "attach", + "connect": { + "host": "${config:scrypted.debugHost}", + "port": 10081 + }, + "justMyCode": false, + "preLaunchTask": "scrypted: deploy+debug", + "pathMappings": [ + { + "localRoot": "/Volumes/Dev/scrypted/server/python/", + "remoteRoot": "/Volumes/Dev/scrypted/server/python/", + }, + { + "localRoot": "${workspaceFolder}/src", + "remoteRoot": "${config:scrypted.pythonRemoteRoot}" + }, + + ] + } + ] +} \ No newline at end of file diff --git a/plugins/vision-framework/.vscode/settings.json b/plugins/vision-framework/.vscode/settings.json new file mode 100644 index 000000000..d71b6ca7d --- /dev/null +++ b/plugins/vision-framework/.vscode/settings.json @@ -0,0 +1,19 @@ + +{ + // docker installation + // "scrypted.debugHost": "koushik-thin", + // "scrypted.serverRoot": "/server", + + // pi local installation + // "scrypted.debugHost": "192.168.2.119", + // "scrypted.serverRoot": "/home/pi/.scrypted", + + // local checkout + "scrypted.debugHost": "127.0.0.1", + "scrypted.serverRoot": "/Users/koush/.scrypted", + + "scrypted.pythonRemoteRoot": "${config:scrypted.serverRoot}/volume/plugin.zip", + "python.analysis.extraPaths": [ + "./node_modules/@scrypted/sdk/types/scrypted_python" + ] +} \ No newline at end of file diff --git a/plugins/vision-framework/.vscode/tasks.json b/plugins/vision-framework/.vscode/tasks.json new file mode 100644 index 000000000..4d922a539 --- /dev/null +++ b/plugins/vision-framework/.vscode/tasks.json @@ -0,0 +1,20 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "scrypted: deploy+debug", + "type": "shell", + "presentation": { + "echo": true, + "reveal": "silent", + "focus": false, + "panel": "shared", + "showReuseMessage": true, + "clear": false + }, + "command": "npm run scrypted-vscode-launch ${config:scrypted.debugHost}", + }, + ] +} diff --git a/plugins/vision-framework/README.md b/plugins/vision-framework/README.md new file mode 100644 index 000000000..435560e03 --- /dev/null +++ b/plugins/vision-framework/README.md @@ -0,0 +1,6 @@ +# Vision Framework Object Detection for Scrypted + +This plugin adds object detection capabilities to any camera in Scrypted. This plugin requires MacOS Vision Framework. Vision Framework utilizes the CPU, GPU, and Neural Cores (Apple Silicon). + +The Vision Framework Plugin should only be used if you are a Scrypted NVR user. It will provide no +benefits to HomeKit, which does its own detection processing. \ No newline at end of file diff --git a/plugins/vision-framework/package-lock.json b/plugins/vision-framework/package-lock.json new file mode 100644 index 000000000..7c1413fe2 --- /dev/null +++ b/plugins/vision-framework/package-lock.json @@ -0,0 +1,86 @@ +{ + "name": "@scrypted/coreml", + "version": "0.0.2", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "name": "@scrypted/coreml", + "version": "0.0.2", + "devDependencies": { + "@scrypted/sdk": "file:../../sdk" + } + }, + "../../sdk": { + "name": "@scrypted/sdk", + "version": "0.2.101", + "dev": true, + "license": "ISC", + "dependencies": { + "@babel/preset-typescript": "^7.18.6", + "adm-zip": "^0.4.13", + "axios": "^0.21.4", + "babel-loader": "^9.1.0", + "babel-plugin-const-enum": "^1.1.0", + "esbuild": "^0.15.9", + "ncp": "^2.0.0", + "raw-loader": "^4.0.2", + "rimraf": "^3.0.2", + "tmp": "^0.2.1", + "ts-loader": "^9.4.2", + "typescript": "^4.9.4", + "webpack": "^5.75.0", + "webpack-bundle-analyzer": "^4.5.0" + }, + "bin": { + "scrypted-changelog": "bin/scrypted-changelog.js", + "scrypted-debug": "bin/scrypted-debug.js", + "scrypted-deploy": "bin/scrypted-deploy.js", + "scrypted-deploy-debug": "bin/scrypted-deploy-debug.js", + "scrypted-package-json": "bin/scrypted-package-json.js", + "scrypted-setup-project": "bin/scrypted-setup-project.js", + "scrypted-webpack": "bin/scrypted-webpack.js" + }, + "devDependencies": { + "@types/node": "^18.11.18", + "@types/stringify-object": "^4.0.0", + "stringify-object": "^3.3.0", + "ts-node": "^10.4.0", + "typedoc": "^0.23.21" + } + }, + "../sdk": { + "extraneous": true + }, + "node_modules/@scrypted/sdk": { + "resolved": "../../sdk", + "link": true + } + }, + "dependencies": { + "@scrypted/sdk": { + "version": "file:../../sdk", + "requires": { + "@babel/preset-typescript": "^7.18.6", + "@types/node": "^18.11.18", + "@types/stringify-object": "^4.0.0", + "adm-zip": "^0.4.13", + "axios": "^0.21.4", + "babel-loader": "^9.1.0", + "babel-plugin-const-enum": "^1.1.0", + "esbuild": "^0.15.9", + "ncp": "^2.0.0", + "raw-loader": "^4.0.2", + "rimraf": "^3.0.2", + "stringify-object": "^3.3.0", + "tmp": "^0.2.1", + "ts-loader": "^9.4.2", + "ts-node": "^10.4.0", + "typedoc": "^0.23.21", + "typescript": "^4.9.4", + "webpack": "^5.75.0", + "webpack-bundle-analyzer": "^4.5.0" + } + } + } +} diff --git a/plugins/vision-framework/package.json b/plugins/vision-framework/package.json new file mode 100644 index 000000000..dd541199b --- /dev/null +++ b/plugins/vision-framework/package.json @@ -0,0 +1,44 @@ +{ + "name": "@scrypted/vision-framework", + "description": "Scrypted Vision Framework Object Detection", + "keywords": [ + "scrypted", + "plugin", + "vision framework", + "neural", + "object", + "detect", + "detection", + "people", + "person" + ], + "scripts": { + "scrypted-setup-project": "scrypted-setup-project", + "prescrypted-setup-project": "scrypted-package-json", + "build": "scrypted-webpack", + "prepublishOnly": "NODE_ENV=production scrypted-webpack", + "prescrypted-vscode-launch": "scrypted-webpack", + "scrypted-vscode-launch": "scrypted-deploy-debug", + "scrypted-deploy-debug": "scrypted-deploy-debug", + "scrypted-debug": "scrypted-debug", + "scrypted-deploy": "scrypted-deploy", + "scrypted-readme": "scrypted-readme", + "scrypted-package-json": "scrypted-package-json" + }, + "scrypted": { + "name": "Vision Framework Object Detection", + "pluginDependencies": [ + "@scrypted/objectdetector" + ], + "runtime": "python", + "type": "API", + "interfaces": [ + "ObjectDetection", + "ObjectDetectionPreview" + ] + }, + "devDependencies": { + "@scrypted/sdk": "file:../../sdk" + }, + "version": "0.0.2" +} diff --git a/plugins/vision-framework/src/detect b/plugins/vision-framework/src/detect new file mode 120000 index 000000000..e3b8b32b7 --- /dev/null +++ b/plugins/vision-framework/src/detect @@ -0,0 +1 @@ +../../tensorflow-lite/src/detect \ No newline at end of file diff --git a/plugins/vision-framework/src/main.py b/plugins/vision-framework/src/main.py new file mode 100644 index 000000000..e0f913e66 --- /dev/null +++ b/plugins/vision-framework/src/main.py @@ -0,0 +1,4 @@ +from vision import VisionPlugin + +def create_scrypted_plugin(): + return VisionPlugin() diff --git a/plugins/vision-framework/src/predict b/plugins/vision-framework/src/predict new file mode 120000 index 000000000..ac7161fea --- /dev/null +++ b/plugins/vision-framework/src/predict @@ -0,0 +1 @@ +../../tensorflow-lite/src/predict \ No newline at end of file diff --git a/plugins/vision-framework/src/requirements.txt b/plugins/vision-framework/src/requirements.txt new file mode 100644 index 000000000..0fa51fdd0 --- /dev/null +++ b/plugins/vision-framework/src/requirements.txt @@ -0,0 +1,2 @@ +pyobjc +Pillow>=5.4.1 diff --git a/plugins/vision-framework/src/vision/__init__.py b/plugins/vision-framework/src/vision/__init__.py new file mode 100644 index 000000000..7631427d6 --- /dev/null +++ b/plugins/vision-framework/src/vision/__init__.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import asyncio +import concurrent.futures +import os +from typing import Any, Tuple + +import scrypted_sdk +from PIL import Image +from scrypted_sdk import Setting, SettingValue + +import Vision +import Quartz +from Foundation import NSData, NSMakeSize + +from predict import Prediction, PredictPlugin, from_bounding_box + +predictExecutor = concurrent.futures.ThreadPoolExecutor(8, "Vision-Predict") + + +class VisionPlugin(PredictPlugin, scrypted_sdk.BufferConverter, scrypted_sdk.Settings): + def __init__(self, nativeId: str | None = None): + super().__init__(nativeId=nativeId) + + self.inputheight = None + self.inputwidth = None + + self.labels = { + 0: "face", + } + self.loop = asyncio.get_event_loop() + self.minThreshold = 0.2 + + async def getSettings(self) -> list[Setting]: + pass + + async def putSetting(self, key: str, value: SettingValue): + self.storage.setItem(key, value) + await self.onDeviceEvent(scrypted_sdk.ScryptedInterface.Settings.value, None) + await scrypted_sdk.deviceManager.requestRestart() + + # width, height, channels + def get_input_details(self) -> Tuple[int, int, int]: + return (self.inputwidth, self.inputheight, 4) + + def get_input_size(self) -> Tuple[float, float]: + return (self.inputwidth, self.inputheight) + + def get_input_format(self) -> str: + return "rgba" + + def predictVision(self, input: Image.Image) -> asyncio.Future[list[Prediction]]: + buffer = input.tobytes() + myData = NSData.alloc().initWithBytes_length_(buffer, len(buffer)) + + input_image = ( + Quartz.CIImage.imageWithBitmapData_bytesPerRow_size_format_options_( + myData, + 4 * input.width, + NSMakeSize(input.width, input.height), + Quartz.kCIFormatRGBA8, + None, + ) + ) + + request_handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_( + input_image, None + ) + + loop = self.loop + future = loop.create_future() + + def detect_face_handler(request, error): + observations = request.results() + if error: + loop.call_soon_threadsafe(future.set_exception, Exception()) + else: + loop.call_soon_threadsafe(future.set_result, observations) + + request = ( + Vision.VNDetectFaceRectanglesRequest.alloc().initWithCompletionHandler_( + detect_face_handler + ) + ) + + error = request_handler.performRequests_error_([request], None) + return future + + async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss): + if asyncio.get_event_loop() is self.loop: + future = await asyncio.get_event_loop().run_in_executor( + predictExecutor, + lambda: self.predictVision(input), + ) + else: + future = await self.predictVision(input) + + observations = await future + + objs = [] + for o in observations: + confidence = o.confidence() + bb = o.boundingBox() + origin = bb.origin + size = bb.size + # print(confidence, origin.x, origin.y, size.width, size.height) + prediction = Prediction( + 0, + confidence, + from_bounding_box( + ( + origin.x * input.width, + (1 - origin.y - size.height) * input.height, + size.width * input.width, + size.height * input.height, + ) + ), + ) + objs.append(prediction) + + ret = self.create_detection_result(objs, src_size, cvss) + return ret diff --git a/plugins/vision-framework/tsconfig.json b/plugins/vision-framework/tsconfig.json new file mode 100644 index 000000000..34a847ad8 --- /dev/null +++ b/plugins/vision-framework/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "ES2021", + "resolveJsonModule": true, + "moduleResolution": "Node16", + "esModuleInterop": true, + "sourceMap": true + }, + "include": [ + "src/**/*" + ] +} \ No newline at end of file