diff --git a/plugins/coreml/package-lock.json b/plugins/coreml/package-lock.json index d61f394f0..00b845fa8 100644 --- a/plugins/coreml/package-lock.json +++ b/plugins/coreml/package-lock.json @@ -1,12 +1,12 @@ { "name": "@scrypted/coreml", - "version": "0.1.50", + "version": "0.1.51", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@scrypted/coreml", - "version": "0.1.50", + "version": "0.1.51", "devDependencies": { "@scrypted/sdk": "file:../../sdk" } diff --git a/plugins/coreml/package.json b/plugins/coreml/package.json index c0ab66b53..70fc35f4b 100644 --- a/plugins/coreml/package.json +++ b/plugins/coreml/package.json @@ -42,5 +42,5 @@ "devDependencies": { "@scrypted/sdk": "file:../../sdk" }, - "version": "0.1.50" + "version": "0.1.51" } diff --git a/plugins/coreml/src/coreml/__init__.py b/plugins/coreml/src/coreml/__init__.py index 76a559c29..2d29b0bba 100644 --- a/plugins/coreml/src/coreml/__init__.py +++ b/plugins/coreml/src/coreml/__init__.py @@ -134,6 +134,8 @@ class CoreMLPlugin(PredictPlugin, scrypted_sdk.Settings, scrypted_sdk.DeviceProv self.loop = asyncio.get_event_loop() self.minThreshold = 0.2 + self.faceDevice = None + self.textDevice = None asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop) async def prepareRecognitionModels(self): @@ -171,9 +173,11 @@ class CoreMLPlugin(PredictPlugin, scrypted_sdk.Settings, scrypted_sdk.DeviceProv async def getDevice(self, nativeId: str) -> Any: if nativeId == "facerecognition": - return CoreMLFaceRecognition(nativeId) + self.faceDevice = self.faceDevice or CoreMLFaceRecognition(nativeId) + return self.faceDevice if nativeId == "textrecognition": - return CoreMLTextRecognition(nativeId) + self.textDevice = self.textDevice or CoreMLTextRecognition(nativeId) + return self.textDevice raise Exception("unknown device") async def getSettings(self) -> list[Setting]: diff --git a/plugins/coreml/src/coreml/face_recognition.py b/plugins/coreml/src/coreml/face_recognition.py index 93a777fec..1552a7efe 100644 --- a/plugins/coreml/src/coreml/face_recognition.py +++ b/plugins/coreml/src/coreml/face_recognition.py @@ -3,6 +3,7 @@ from __future__ import annotations import concurrent.futures import os +import asyncio import coremltools as ct import numpy as np # import Quartz @@ -10,6 +11,7 @@ import numpy as np # import Vision from predict.face_recognize import FaceRecognizeDetection +from PIL import Image def euclidean_distance(arr1, arr2): @@ -29,6 +31,8 @@ predictExecutor = concurrent.futures.ThreadPoolExecutor(8, "Vision-Predict") class CoreMLFaceRecognition(FaceRecognizeDetection): def __init__(self, nativeId: str | None = None): super().__init__(nativeId=nativeId) + self.detectExecutor = concurrent.futures.ThreadPoolExecutor(1, "detect-face") + self.recogExecutor = concurrent.futures.ThreadPoolExecutor(1, "recog-face") def downloadModel(self, model: str): model_version = "v7" @@ -51,23 +55,29 @@ class CoreMLFaceRecognition(FaceRecognizeDetection): inputName = model.get_spec().description.input[0].name return model, inputName - def predictDetectModel(self, input): - model, inputName = self.detectModel - out_dict = model.predict({inputName: input}) - results = list(out_dict.values())[0][0] + async def predictDetectModel(self, input: Image.Image): + def predict(): + model, inputName = self.detectModel + out_dict = model.predict({inputName: input}) + results = list(out_dict.values())[0][0] + return results + + results = await asyncio.get_event_loop().run_in_executor( + self.detectExecutor, lambda: predict() + ) return results - def predictFaceModel(self, input): - model, inputName = self.faceModel - out_dict = model.predict({inputName: input}) - return out_dict["var_2167"][0] + async def predictFaceModel(self, input: np.ndarray): + def predict(): + model, inputName = self.faceModel + out_dict = model.predict({inputName: input}) + results = out_dict["var_2167"][0] + return results + results = await asyncio.get_event_loop().run_in_executor( + self.recogExecutor, lambda: predict() + ) + return results - def predictTextModel(self, input): - model, inputName = self.textModel - out_dict = model.predict({inputName: input}) - preds = out_dict["linear_2"] - return preds - # def predictVision(self, input: Image.Image) -> asyncio.Future[list[Prediction]]: # buffer = input.tobytes() # myData = NSData.alloc().initWithBytes_length_(buffer, len(buffer)) diff --git a/plugins/coreml/src/coreml/text_recognition.py b/plugins/coreml/src/coreml/text_recognition.py index fe192d9f4..f70af2cdd 100644 --- a/plugins/coreml/src/coreml/text_recognition.py +++ b/plugins/coreml/src/coreml/text_recognition.py @@ -1,8 +1,13 @@ from __future__ import annotations +import concurrent.futures import os +import asyncio + import coremltools as ct +import numpy as np +from PIL import Image from predict.text_recognize import TextRecognition @@ -11,6 +16,9 @@ class CoreMLTextRecognition(TextRecognition): def __init__(self, nativeId: str | None = None): super().__init__(nativeId=nativeId) + self.detectExecutor = concurrent.futures.ThreadPoolExecutor(1, "detect-text") + self.recogExecutor = concurrent.futures.ThreadPoolExecutor(1, "recog-text") + def downloadModel(self, model: str): model_version = "v7" mlmodel = "model" @@ -32,14 +40,24 @@ class CoreMLTextRecognition(TextRecognition): inputName = model.get_spec().description.input[0].name return model, inputName - def predictDetectModel(self, input): - model, inputName = self.detectModel - out_dict = model.predict({inputName: input}) - results = list(out_dict.values())[0] + async def predictDetectModel(self, input: Image.Image): + def predict(): + model, inputName = self.detectModel + out_dict = model.predict({inputName: input}) + results = list(out_dict.values())[0] + return results + results = await asyncio.get_event_loop().run_in_executor( + self.detectExecutor, lambda: predict() + ) return results - def predictTextModel(self, input): - model, inputName = self.textModel - out_dict = model.predict({inputName: input}) - preds = out_dict["linear_2"] + async def predictTextModel(self, input: np.ndarray): + def predict(): + model, inputName = self.textModel + out_dict = model.predict({inputName: input}) + preds = out_dict["linear_2"] + return preds + preds = await asyncio.get_event_loop().run_in_executor( + self.recogExecutor, lambda: predict() + ) return preds diff --git a/plugins/onnx/package-lock.json b/plugins/onnx/package-lock.json index a8d332079..a04691601 100644 --- a/plugins/onnx/package-lock.json +++ b/plugins/onnx/package-lock.json @@ -1,12 +1,12 @@ { "name": "@scrypted/openvino", - "version": "0.1.87", + "version": "0.1.88", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@scrypted/openvino", - "version": "0.1.87", + "version": "0.1.88", "devDependencies": { "@scrypted/sdk": "file:../../sdk" } diff --git a/plugins/onnx/package.json b/plugins/onnx/package.json index 1c4abf9cd..32734e313 100644 --- a/plugins/onnx/package.json +++ b/plugins/onnx/package.json @@ -33,6 +33,7 @@ "runtime": "python", "type": "API", "interfaces": [ + "DeviceProvider", "Settings", "ObjectDetection", "ObjectDetectionPreview" @@ -41,5 +42,5 @@ "devDependencies": { "@scrypted/sdk": "file:../../sdk" }, - "version": "0.1.87" + "version": "0.1.88" } diff --git a/plugins/onnx/src/ort/__init__.py b/plugins/onnx/src/ort/__init__.py index 9c0e1942b..06c8884d5 100644 --- a/plugins/onnx/src/ort/__init__.py +++ b/plugins/onnx/src/ort/__init__.py @@ -20,6 +20,13 @@ from scrypted_sdk.types import Setting import common.yolo as yolo from predict import PredictPlugin +from .face_recognition import ONNXFaceRecognition + +try: + from .text_recognition import ONNXTextRecognition +except: + ONNXTextRecognition = None + availableModels = [ "Default", "scrypted_yolo_nas_s_320", @@ -72,6 +79,7 @@ class ONNXPlugin( deviceIds = json.loads(deviceIds) if not len(deviceIds): deviceIds = ["0"] + self.deviceIds = deviceIds compiled_models = [] self.compiled_models = {} @@ -124,6 +132,52 @@ class ONNXPlugin( thread_name_prefix="onnx-prepare", ) + self.faceDevice = None + self.textDevice = None + asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop) + + async def prepareRecognitionModels(self): + try: + devices = [ + { + "nativeId": "facerecognition", + "type": scrypted_sdk.ScryptedDeviceType.Builtin.value, + "interfaces": [ + scrypted_sdk.ScryptedInterface.ObjectDetection.value, + ], + "name": "ONNX Face Recognition", + }, + ] + + if ONNXTextRecognition: + devices.append( + { + "nativeId": "textrecognition", + "type": scrypted_sdk.ScryptedDeviceType.Builtin.value, + "interfaces": [ + scrypted_sdk.ScryptedInterface.ObjectDetection.value, + ], + "name": "ONNX Text Recognition", + }, + ) + + await scrypted_sdk.deviceManager.onDevicesChanged( + { + "devices": devices, + } + ) + except: + pass + + async def getDevice(self, nativeId: str) -> Any: + if nativeId == "facerecognition": + self.faceDevice = self.faceDevice or ONNXFaceRecognition(self, nativeId) + return self.faceDevice + elif nativeId == "textrecognition": + self.textDevice = self.textDevice or ONNXTextRecognition(self, nativeId) + return self.textDevice + raise Exception("unknown device") + async def getSettings(self) -> list[Setting]: model = self.storage.getItem("model") or "Default" deviceIds = self.storage.getItem("deviceIds") or '["0"]' diff --git a/plugins/onnx/src/ort/face_recognition.py b/plugins/onnx/src/ort/face_recognition.py new file mode 100644 index 000000000..06ddac882 --- /dev/null +++ b/plugins/onnx/src/ort/face_recognition.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import asyncio +import concurrent.futures +import platform +import sys +import threading + +import numpy as np +import onnxruntime +from PIL import Image + +from predict.face_recognize import FaceRecognizeDetection + + +class ONNXFaceRecognition(FaceRecognizeDetection): + def __init__(self, plugin, nativeId: str | None = None): + self.plugin = plugin + + super().__init__(nativeId=nativeId) + + def downloadModel(self, model: str): + onnxmodel = "best" if "scrypted" in model else model + model_version = "v1" + onnxfile = self.downloadFile( + f"https://raw.githubusercontent.com/koush/onnx-models/main/{model}/{onnxmodel}.onnx", + f"{model_version}/{model}/{onnxmodel}.onnx", + ) + print(onnxfile) + + compiled_models_array = [] + compiled_models = {} + deviceIds = self.plugin.deviceIds + + for deviceId in deviceIds: + sess_options = onnxruntime.SessionOptions() + + providers: list[str] = [] + if sys.platform == "darwin": + providers.append("CoreMLExecutionProvider") + + if "linux" in sys.platform and platform.machine() == "x86_64": + deviceId = int(deviceId) + providers.append(("CUDAExecutionProvider", {"device_id": deviceId})) + + providers.append("CPUExecutionProvider") + + compiled_model = onnxruntime.InferenceSession( + onnxfile, sess_options=sess_options, providers=providers + ) + compiled_models_array.append(compiled_model) + + input = compiled_model.get_inputs()[0] + input_name = input.name + + def executor_initializer(): + thread_name = threading.current_thread().name + interpreter = compiled_models_array.pop() + compiled_models[thread_name] = interpreter + print("Runtime initialized on thread {}".format(thread_name)) + + executor = concurrent.futures.ThreadPoolExecutor( + initializer=executor_initializer, + max_workers=len(compiled_models_array), + thread_name_prefix="face", + ) + + prepareExecutor = concurrent.futures.ThreadPoolExecutor( + max_workers=len(compiled_models_array), + thread_name_prefix="face-prepare", + ) + + return compiled_models, input_name, prepareExecutor, executor + + async def predictDetectModel(self, input: Image.Image): + compiled_models, input_name, prepareExecutor, executor = self.detectModel + + def prepare(): + im = np.array(input) + im = np.expand_dims(input, axis=0) + im = im.transpose((0, 3, 1, 2)) # BHWC to BCHW, (n, 3, h, w) + im = im.astype(np.float32) / 255.0 + im = np.ascontiguousarray(im) # contiguous + return im + + def predict(input_tensor): + compiled_model = compiled_models[threading.current_thread().name] + output_tensors = compiled_model.run(None, {input_name: input_tensor}) + return output_tensors + + input_tensor = await asyncio.get_event_loop().run_in_executor( + prepareExecutor, lambda: prepare() + ) + objs = await asyncio.get_event_loop().run_in_executor( + executor, lambda: predict(input_tensor) + ) + + return objs[0][0] + + async def predictFaceModel(self, input: np.ndarray): + compiled_models, input_name, prepareExecutor, executor = self.faceModel + + def predict(): + compiled_model = compiled_models[threading.current_thread().name] + output_tensors = compiled_model.run(None, {input_name: input}) + return output_tensors + + objs = await asyncio.get_event_loop().run_in_executor( + executor, lambda: predict() + ) + + return objs[0] diff --git a/plugins/onnx/src/ort/text_recognition.py b/plugins/onnx/src/ort/text_recognition.py new file mode 100644 index 000000000..744eb37ec --- /dev/null +++ b/plugins/onnx/src/ort/text_recognition.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import asyncio +import concurrent.futures +import platform +import sys +import threading + +import numpy as np +import onnxruntime +from PIL import Image + +from predict.text_recognize import TextRecognition + + +class ONNXTextRecognition(TextRecognition): + def __init__(self, plugin, nativeId: str | None = None): + self.plugin = plugin + + super().__init__(nativeId=nativeId) + + def downloadModel(self, model: str): + onnxmodel = model + model_version = "v3" + onnxfile = self.downloadFile( + f"https://raw.githubusercontent.com/koush/onnx-models/main/{model}/{onnxmodel}.onnx", + f"{model_version}/{model}/{onnxmodel}.onnx", + ) + print(onnxfile) + + compiled_models_array = [] + compiled_models = {} + deviceIds = self.plugin.deviceIds + + for deviceId in deviceIds: + sess_options = onnxruntime.SessionOptions() + + providers: list[str] = [] + if sys.platform == "darwin": + providers.append("CoreMLExecutionProvider") + + if "linux" in sys.platform and platform.machine() == "x86_64": + deviceId = int(deviceId) + providers.append(("CUDAExecutionProvider", {"device_id": deviceId})) + + providers.append("CPUExecutionProvider") + + compiled_model = onnxruntime.InferenceSession( + onnxfile, sess_options=sess_options, providers=providers + ) + compiled_models_array.append(compiled_model) + + input = compiled_model.get_inputs()[0] + input_name = input.name + + def executor_initializer(): + thread_name = threading.current_thread().name + interpreter = compiled_models_array.pop() + compiled_models[thread_name] = interpreter + print("Runtime initialized on thread {}".format(thread_name)) + + executor = concurrent.futures.ThreadPoolExecutor( + initializer=executor_initializer, + max_workers=len(compiled_models_array), + thread_name_prefix="face", + ) + + prepareExecutor = concurrent.futures.ThreadPoolExecutor( + max_workers=len(compiled_models_array), + thread_name_prefix="face-prepare", + ) + + return compiled_models, input_name, prepareExecutor, executor + + async def predictDetectModel(self, input: Image.Image): + compiled_models, input_name, prepareExecutor, executor = self.detectModel + + def predict(): + compiled_model = compiled_models[threading.current_thread().name] + output_tensors = compiled_model.run(None, {input_name: input}) + return output_tensors + + objs = await asyncio.get_event_loop().run_in_executor( + executor, lambda: predict() + ) + + return objs[0] + + async def predictTextModel(self, input: np.ndarray): + input = input.astype(np.float32) + compiled_models, input_name, prepareExecutor, executor = self.textModel + + def predict(): + compiled_model = compiled_models[threading.current_thread().name] + output_tensors = compiled_model.run(None, {input_name: input}) + return output_tensors + + objs = await asyncio.get_event_loop().run_in_executor( + executor, lambda: predict() + ) + + return objs[0] diff --git a/plugins/onnx/src/requirements.optional.txt b/plugins/onnx/src/requirements.optional.txt new file mode 100644 index 000000000..0dd006bbc --- /dev/null +++ b/plugins/onnx/src/requirements.optional.txt @@ -0,0 +1 @@ +opencv-python diff --git a/plugins/onnx/src/requirements.txt b/plugins/onnx/src/requirements.txt index 93b4e20bf..5b29cc757 100644 --- a/plugins/onnx/src/requirements.txt +++ b/plugins/onnx/src/requirements.txt @@ -4,6 +4,7 @@ onnxruntime-gpu; 'linux' in sys_platform and platform_machine == 'x86_64' # cpu and coreml execution provider onnxruntime; 'linux' not in sys_platform or platform_machine != 'x86_64' +# nightly? # ort-nightly-gpu==1.17.3.dev20240409002 # pillow-simd is available on x64 linux diff --git a/plugins/openvino/package-lock.json b/plugins/openvino/package-lock.json index 1933feaf7..13b64c87c 100644 --- a/plugins/openvino/package-lock.json +++ b/plugins/openvino/package-lock.json @@ -1,12 +1,12 @@ { "name": "@scrypted/openvino", - "version": "0.1.85", + "version": "0.1.86", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@scrypted/openvino", - "version": "0.1.85", + "version": "0.1.86", "devDependencies": { "@scrypted/sdk": "file:../../sdk" } diff --git a/plugins/openvino/package.json b/plugins/openvino/package.json index a9bf2d900..b74ff526f 100644 --- a/plugins/openvino/package.json +++ b/plugins/openvino/package.json @@ -42,5 +42,5 @@ "devDependencies": { "@scrypted/sdk": "file:../../sdk" }, - "version": "0.1.85" + "version": "0.1.86" } diff --git a/plugins/openvino/src/ov/__init__.py b/plugins/openvino/src/ov/__init__.py index d4ee3c46b..180c3b91f 100644 --- a/plugins/openvino/src/ov/__init__.py +++ b/plugins/openvino/src/ov/__init__.py @@ -204,6 +204,8 @@ class OpenVINOPlugin( labels_contents = open(labelsFile, "r").read() self.labels = parse_label_contents(labels_contents) + self.faceDevice = None + self.textDevice = None asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop) async def getSettings(self) -> list[Setting]: @@ -396,7 +398,9 @@ class OpenVINOPlugin( async def getDevice(self, nativeId: str) -> Any: if nativeId == "facerecognition": - return OpenVINOFaceRecognition(self, nativeId) + self.faceDevice = self.faceDevice or OpenVINOFaceRecognition(self, nativeId) + return self.faceDevice elif nativeId == "textrecognition": - return OpenVINOTextRecognition(self, nativeId) + self.textDevice = self.textDevice or OpenVINOTextRecognition(self, nativeId) + return self.textDevice raise Exception("unknown device") diff --git a/plugins/openvino/src/ov/async_infer.py b/plugins/openvino/src/ov/async_infer.py new file mode 100644 index 000000000..9d0c88e33 --- /dev/null +++ b/plugins/openvino/src/ov/async_infer.py @@ -0,0 +1,9 @@ +import asyncio + +async def start_async(infer_request): + future = asyncio.Future(loop = asyncio.get_event_loop()) + def callback(status = None, result = None): + future.set_result(None) + infer_request.set_callback(callback, None) + infer_request.start_async() + await future diff --git a/plugins/openvino/src/ov/face_recognition.py b/plugins/openvino/src/ov/face_recognition.py index f99a164f6..54d77ea86 100644 --- a/plugins/openvino/src/ov/face_recognition.py +++ b/plugins/openvino/src/ov/face_recognition.py @@ -1,24 +1,13 @@ from __future__ import annotations -import concurrent.futures import openvino.runtime as ov +from ov import async_infer +from PIL import Image import numpy as np from predict.face_recognize import FaceRecognizeDetection - -def euclidean_distance(arr1, arr2): - return np.linalg.norm(arr1 - arr2) - - -def cosine_similarity(vector_a, vector_b): - dot_product = np.dot(vector_a, vector_b) - norm_a = np.linalg.norm(vector_a) - norm_b = np.linalg.norm(vector_b) - similarity = dot_product / (norm_a * norm_b) - return similarity - class OpenVINOFaceRecognition(FaceRecognizeDetection): def __init__(self, plugin, nativeId: str | None = None): self.plugin = plugin @@ -40,32 +29,20 @@ class OpenVINOFaceRecognition(FaceRecognizeDetection): print(xmlFile, binFile) return self.plugin.core.compile_model(xmlFile, self.plugin.mode) - def predictDetectModel(self, input): + async def predictDetectModel(self, input: Image.Image): infer_request = self.detectModel.create_infer_request() - im = np.stack([input]) + im = np.expand_dims(input, axis=0) im = im.transpose((0, 3, 1, 2)) # BHWC to BCHW, (n, 3, h, w) im = im.astype(np.float32) / 255.0 im = np.ascontiguousarray(im) # contiguous im = ov.Tensor(array=im) - input_tensor = im - infer_request.set_input_tensor(input_tensor) - infer_request.start_async() - infer_request.wait() + infer_request.set_input_tensor(im) + await async_infer.start_async(infer_request) return infer_request.output_tensors[0].data[0] - def predictFaceModel(self, input): + async def predictFaceModel(self, input: np.ndarray): im = ov.Tensor(array=input) infer_request = self.faceModel.create_infer_request() infer_request.set_input_tensor(im) - infer_request.start_async() - infer_request.wait() + await async_infer.start_async(infer_request) return infer_request.output_tensors[0].data[0] - - def predictTextModel(self, input): - input = input.astype(np.float32) - im = ov.Tensor(array=input) - infer_request = self.textModel.create_infer_request() - infer_request.set_input_tensor(im) - infer_request.start_async() - infer_request.wait() - return infer_request.output_tensors[0].data diff --git a/plugins/openvino/src/ov/text_recognition.py b/plugins/openvino/src/ov/text_recognition.py index fb4ec136e..eede0c69c 100644 --- a/plugins/openvino/src/ov/text_recognition.py +++ b/plugins/openvino/src/ov/text_recognition.py @@ -1,7 +1,8 @@ from __future__ import annotations -import openvino.runtime as ov import numpy as np +import openvino.runtime as ov +from ov import async_infer from predict.text_recognize import TextRecognition @@ -27,20 +28,18 @@ class OpenVINOTextRecognition(TextRecognition): print(xmlFile, binFile) return self.plugin.core.compile_model(xmlFile, self.plugin.mode) - def predictDetectModel(self, input): + async def predictDetectModel(self, input: np.ndarray): infer_request = self.detectModel.create_infer_request() im = ov.Tensor(array=input) input_tensor = im infer_request.set_input_tensor(input_tensor) - infer_request.start_async() - infer_request.wait() + await async_infer.start_async(infer_request) return infer_request.output_tensors[0].data - def predictTextModel(self, input): + async def predictTextModel(self, input: np.ndarray): input = input.astype(np.float32) im = ov.Tensor(array=input) infer_request = self.textModel.create_infer_request() infer_request.set_input_tensor(im) - infer_request.start_async() - infer_request.wait() + await async_infer.start_async(infer_request) return infer_request.output_tensors[0].data diff --git a/plugins/tensorflow-lite/src/predict/face_recognize.py b/plugins/tensorflow-lite/src/predict/face_recognize.py index e8bbde165..ecf4a8f71 100644 --- a/plugins/tensorflow-lite/src/predict/face_recognize.py +++ b/plugins/tensorflow-lite/src/predict/face_recognize.py @@ -3,14 +3,10 @@ from __future__ import annotations import asyncio from asyncio import Future import base64 -import concurrent.futures -import os from typing import Any, Tuple, List import numpy as np -# import Quartz import scrypted_sdk -# from Foundation import NSData, NSMakeSize from PIL import Image from scrypted_sdk import ( Setting, @@ -21,10 +17,8 @@ from scrypted_sdk import ( ) import traceback -# import Vision from predict import PredictPlugin from common import yolo -from common.text import prepare_text_result, process_text_result def euclidean_distance(arr1, arr2): return np.linalg.norm(arr1 - arr2) @@ -37,9 +31,6 @@ def cosine_similarity(vector_a, vector_b): similarity = dot_product / (norm_a * norm_b) return similarity - -predictExecutor = concurrent.futures.ThreadPoolExecutor(1, "Recognize") - class FaceRecognizeDetection(PredictPlugin): def __init__(self, nativeId: str | None = None): super().__init__(nativeId=nativeId) @@ -56,7 +47,6 @@ class FaceRecognizeDetection(PredictPlugin): self.minThreshold = 0.7 self.detectModel = self.downloadModel("scrypted_yolov9c_flt") - self.textModel = self.downloadModel("vgg_english_g2") self.faceModel = self.downloadModel("inception_resnet_v1") def downloadModel(self, model: str): @@ -81,9 +71,7 @@ class FaceRecognizeDetection(PredictPlugin): return "rgb" async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss): - results = await asyncio.get_event_loop().run_in_executor( - predictExecutor, lambda: self.predictDetectModel(input) - ) + results = await self.predictDetectModel(input) objs = yolo.parse_yolov9(results) ret = self.create_detection_result(objs, src_size, cvss) return ret @@ -112,10 +100,7 @@ class FaceRecognizeDetection(PredictPlugin): processed_tensor = (image_tensor - 127.5) / 128.0 processed_tensor = np.expand_dims(processed_tensor, axis=0) - output = await asyncio.get_event_loop().run_in_executor( - predictExecutor, - lambda: self.predictFaceModel(processed_tensor) - ) + output = await self.predictFaceModel(processed_tensor) b = output.tobytes() embedding = base64.b64encode(b).decode("utf-8") @@ -125,29 +110,12 @@ class FaceRecognizeDetection(PredictPlugin): traceback.print_exc() pass - def predictTextModel(self, input): + async def predictDetectModel(self, input: Image.Image): pass - def predictDetectModel(self, input): + async def predictFaceModel(self, input: np.ndarray): pass - def predictFaceModel(self, input): - pass - - async def setLabel(self, d: ObjectDetectionResult, image: scrypted_sdk.Image): - try: - - image_tensor = await prepare_text_result(d, image) - preds = await asyncio.get_event_loop().run_in_executor( - predictExecutor, - lambda: self.predictTextModel(image_tensor), - ) - d['label'] = process_text_result(preds) - - except Exception as e: - traceback.print_exc() - pass - async def run_detection_image( self, image: scrypted_sdk.Image, detection_session: ObjectDetectionSession ) -> ObjectsDetected: @@ -206,10 +174,6 @@ class FaceRecognizeDetection(PredictPlugin): for d in ret["detections"]: if d["className"] == "face": futures.append(asyncio.ensure_future(self.setEmbedding(d, image))) - # elif d["className"] == "plate": - # futures.append(asyncio.ensure_future(self.setLabel(d, image))) - # elif d['className'] == 'text': - # futures.append(asyncio.ensure_future(self.setLabel(d, image))) if len(futures): await asyncio.wait(futures) diff --git a/plugins/tensorflow-lite/src/predict/text_recognize.py b/plugins/tensorflow-lite/src/predict/text_recognize.py index 7ab980b7f..1a2c146c7 100644 --- a/plugins/tensorflow-lite/src/predict/text_recognize.py +++ b/plugins/tensorflow-lite/src/predict/text_recognize.py @@ -41,10 +41,10 @@ class TextRecognition(PredictPlugin): def downloadModel(self, model: str): pass - def predictDetectModel(self, input): + async def predictDetectModel(self, input: np.ndarray): pass - def predictTextModel(self, input): + async def predictTextModel(self, input: np.ndarray): pass async def detect_once( @@ -56,9 +56,7 @@ class TextRecognition(PredictPlugin): # add extra dimension to tensor image_tensor = np.expand_dims(image_tensor, axis=0) - y = await asyncio.get_event_loop().run_in_executor( - predictExecutor, lambda: self.predictDetectModel(image_tensor) - ) + y = await self.predictDetectModel(image_tensor) estimate_num_chars = False ratio_h = ratio_w = 1 @@ -158,10 +156,7 @@ class TextRecognition(PredictPlugin): try: image_tensor = await prepare_text_result(d, image, skew_angle) - preds = await asyncio.get_event_loop().run_in_executor( - predictExecutor, - lambda: self.predictTextModel(image_tensor), - ) + preds = await self.predictTextModel(image_tensor) d["label"] = process_text_result(preds) except Exception as e: