mirror of
https://github.com/koush/scrypted.git
synced 2026-05-05 05:40:27 +01:00
predict plugins: refactor recog, add onnx, fix spurious model leaks
This commit is contained in:
4
plugins/coreml/package-lock.json
generated
4
plugins/coreml/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@scrypted/coreml",
|
||||
"version": "0.1.50",
|
||||
"version": "0.1.51",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@scrypted/coreml",
|
||||
"version": "0.1.50",
|
||||
"version": "0.1.51",
|
||||
"devDependencies": {
|
||||
"@scrypted/sdk": "file:../../sdk"
|
||||
}
|
||||
|
||||
@@ -42,5 +42,5 @@
|
||||
"devDependencies": {
|
||||
"@scrypted/sdk": "file:../../sdk"
|
||||
},
|
||||
"version": "0.1.50"
|
||||
"version": "0.1.51"
|
||||
}
|
||||
|
||||
@@ -134,6 +134,8 @@ class CoreMLPlugin(PredictPlugin, scrypted_sdk.Settings, scrypted_sdk.DeviceProv
|
||||
self.loop = asyncio.get_event_loop()
|
||||
self.minThreshold = 0.2
|
||||
|
||||
self.faceDevice = None
|
||||
self.textDevice = None
|
||||
asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop)
|
||||
|
||||
async def prepareRecognitionModels(self):
|
||||
@@ -171,9 +173,11 @@ class CoreMLPlugin(PredictPlugin, scrypted_sdk.Settings, scrypted_sdk.DeviceProv
|
||||
|
||||
async def getDevice(self, nativeId: str) -> Any:
|
||||
if nativeId == "facerecognition":
|
||||
return CoreMLFaceRecognition(nativeId)
|
||||
self.faceDevice = self.faceDevice or CoreMLFaceRecognition(nativeId)
|
||||
return self.faceDevice
|
||||
if nativeId == "textrecognition":
|
||||
return CoreMLTextRecognition(nativeId)
|
||||
self.textDevice = self.textDevice or CoreMLTextRecognition(nativeId)
|
||||
return self.textDevice
|
||||
raise Exception("unknown device")
|
||||
|
||||
async def getSettings(self) -> list[Setting]:
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import concurrent.futures
|
||||
import os
|
||||
|
||||
import asyncio
|
||||
import coremltools as ct
|
||||
import numpy as np
|
||||
# import Quartz
|
||||
@@ -10,6 +11,7 @@ import numpy as np
|
||||
|
||||
# import Vision
|
||||
from predict.face_recognize import FaceRecognizeDetection
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def euclidean_distance(arr1, arr2):
|
||||
@@ -29,6 +31,8 @@ predictExecutor = concurrent.futures.ThreadPoolExecutor(8, "Vision-Predict")
|
||||
class CoreMLFaceRecognition(FaceRecognizeDetection):
|
||||
def __init__(self, nativeId: str | None = None):
|
||||
super().__init__(nativeId=nativeId)
|
||||
self.detectExecutor = concurrent.futures.ThreadPoolExecutor(1, "detect-face")
|
||||
self.recogExecutor = concurrent.futures.ThreadPoolExecutor(1, "recog-face")
|
||||
|
||||
def downloadModel(self, model: str):
|
||||
model_version = "v7"
|
||||
@@ -51,23 +55,29 @@ class CoreMLFaceRecognition(FaceRecognizeDetection):
|
||||
inputName = model.get_spec().description.input[0].name
|
||||
return model, inputName
|
||||
|
||||
def predictDetectModel(self, input):
|
||||
model, inputName = self.detectModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
results = list(out_dict.values())[0][0]
|
||||
async def predictDetectModel(self, input: Image.Image):
|
||||
def predict():
|
||||
model, inputName = self.detectModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
results = list(out_dict.values())[0][0]
|
||||
return results
|
||||
|
||||
results = await asyncio.get_event_loop().run_in_executor(
|
||||
self.detectExecutor, lambda: predict()
|
||||
)
|
||||
return results
|
||||
|
||||
def predictFaceModel(self, input):
|
||||
model, inputName = self.faceModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
return out_dict["var_2167"][0]
|
||||
async def predictFaceModel(self, input: np.ndarray):
|
||||
def predict():
|
||||
model, inputName = self.faceModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
results = out_dict["var_2167"][0]
|
||||
return results
|
||||
results = await asyncio.get_event_loop().run_in_executor(
|
||||
self.recogExecutor, lambda: predict()
|
||||
)
|
||||
return results
|
||||
|
||||
def predictTextModel(self, input):
|
||||
model, inputName = self.textModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
preds = out_dict["linear_2"]
|
||||
return preds
|
||||
|
||||
# def predictVision(self, input: Image.Image) -> asyncio.Future[list[Prediction]]:
|
||||
# buffer = input.tobytes()
|
||||
# myData = NSData.alloc().initWithBytes_length_(buffer, len(buffer))
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import os
|
||||
|
||||
import asyncio
|
||||
|
||||
import coremltools as ct
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from predict.text_recognize import TextRecognition
|
||||
|
||||
@@ -11,6 +16,9 @@ class CoreMLTextRecognition(TextRecognition):
|
||||
def __init__(self, nativeId: str | None = None):
|
||||
super().__init__(nativeId=nativeId)
|
||||
|
||||
self.detectExecutor = concurrent.futures.ThreadPoolExecutor(1, "detect-text")
|
||||
self.recogExecutor = concurrent.futures.ThreadPoolExecutor(1, "recog-text")
|
||||
|
||||
def downloadModel(self, model: str):
|
||||
model_version = "v7"
|
||||
mlmodel = "model"
|
||||
@@ -32,14 +40,24 @@ class CoreMLTextRecognition(TextRecognition):
|
||||
inputName = model.get_spec().description.input[0].name
|
||||
return model, inputName
|
||||
|
||||
def predictDetectModel(self, input):
|
||||
model, inputName = self.detectModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
results = list(out_dict.values())[0]
|
||||
async def predictDetectModel(self, input: Image.Image):
|
||||
def predict():
|
||||
model, inputName = self.detectModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
results = list(out_dict.values())[0]
|
||||
return results
|
||||
results = await asyncio.get_event_loop().run_in_executor(
|
||||
self.detectExecutor, lambda: predict()
|
||||
)
|
||||
return results
|
||||
|
||||
def predictTextModel(self, input):
|
||||
model, inputName = self.textModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
preds = out_dict["linear_2"]
|
||||
async def predictTextModel(self, input: np.ndarray):
|
||||
def predict():
|
||||
model, inputName = self.textModel
|
||||
out_dict = model.predict({inputName: input})
|
||||
preds = out_dict["linear_2"]
|
||||
return preds
|
||||
preds = await asyncio.get_event_loop().run_in_executor(
|
||||
self.recogExecutor, lambda: predict()
|
||||
)
|
||||
return preds
|
||||
|
||||
4
plugins/onnx/package-lock.json
generated
4
plugins/onnx/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@scrypted/openvino",
|
||||
"version": "0.1.87",
|
||||
"version": "0.1.88",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@scrypted/openvino",
|
||||
"version": "0.1.87",
|
||||
"version": "0.1.88",
|
||||
"devDependencies": {
|
||||
"@scrypted/sdk": "file:../../sdk"
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
"runtime": "python",
|
||||
"type": "API",
|
||||
"interfaces": [
|
||||
"DeviceProvider",
|
||||
"Settings",
|
||||
"ObjectDetection",
|
||||
"ObjectDetectionPreview"
|
||||
@@ -41,5 +42,5 @@
|
||||
"devDependencies": {
|
||||
"@scrypted/sdk": "file:../../sdk"
|
||||
},
|
||||
"version": "0.1.87"
|
||||
"version": "0.1.88"
|
||||
}
|
||||
|
||||
@@ -20,6 +20,13 @@ from scrypted_sdk.types import Setting
|
||||
import common.yolo as yolo
|
||||
from predict import PredictPlugin
|
||||
|
||||
from .face_recognition import ONNXFaceRecognition
|
||||
|
||||
try:
|
||||
from .text_recognition import ONNXTextRecognition
|
||||
except:
|
||||
ONNXTextRecognition = None
|
||||
|
||||
availableModels = [
|
||||
"Default",
|
||||
"scrypted_yolo_nas_s_320",
|
||||
@@ -72,6 +79,7 @@ class ONNXPlugin(
|
||||
deviceIds = json.loads(deviceIds)
|
||||
if not len(deviceIds):
|
||||
deviceIds = ["0"]
|
||||
self.deviceIds = deviceIds
|
||||
|
||||
compiled_models = []
|
||||
self.compiled_models = {}
|
||||
@@ -124,6 +132,52 @@ class ONNXPlugin(
|
||||
thread_name_prefix="onnx-prepare",
|
||||
)
|
||||
|
||||
self.faceDevice = None
|
||||
self.textDevice = None
|
||||
asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop)
|
||||
|
||||
async def prepareRecognitionModels(self):
|
||||
try:
|
||||
devices = [
|
||||
{
|
||||
"nativeId": "facerecognition",
|
||||
"type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
|
||||
"interfaces": [
|
||||
scrypted_sdk.ScryptedInterface.ObjectDetection.value,
|
||||
],
|
||||
"name": "ONNX Face Recognition",
|
||||
},
|
||||
]
|
||||
|
||||
if ONNXTextRecognition:
|
||||
devices.append(
|
||||
{
|
||||
"nativeId": "textrecognition",
|
||||
"type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
|
||||
"interfaces": [
|
||||
scrypted_sdk.ScryptedInterface.ObjectDetection.value,
|
||||
],
|
||||
"name": "ONNX Text Recognition",
|
||||
},
|
||||
)
|
||||
|
||||
await scrypted_sdk.deviceManager.onDevicesChanged(
|
||||
{
|
||||
"devices": devices,
|
||||
}
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
async def getDevice(self, nativeId: str) -> Any:
|
||||
if nativeId == "facerecognition":
|
||||
self.faceDevice = self.faceDevice or ONNXFaceRecognition(self, nativeId)
|
||||
return self.faceDevice
|
||||
elif nativeId == "textrecognition":
|
||||
self.textDevice = self.textDevice or ONNXTextRecognition(self, nativeId)
|
||||
return self.textDevice
|
||||
raise Exception("unknown device")
|
||||
|
||||
async def getSettings(self) -> list[Setting]:
|
||||
model = self.storage.getItem("model") or "Default"
|
||||
deviceIds = self.storage.getItem("deviceIds") or '["0"]'
|
||||
|
||||
112
plugins/onnx/src/ort/face_recognition.py
Normal file
112
plugins/onnx/src/ort/face_recognition.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import platform
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime
|
||||
from PIL import Image
|
||||
|
||||
from predict.face_recognize import FaceRecognizeDetection
|
||||
|
||||
|
||||
class ONNXFaceRecognition(FaceRecognizeDetection):
|
||||
def __init__(self, plugin, nativeId: str | None = None):
|
||||
self.plugin = plugin
|
||||
|
||||
super().__init__(nativeId=nativeId)
|
||||
|
||||
def downloadModel(self, model: str):
|
||||
onnxmodel = "best" if "scrypted" in model else model
|
||||
model_version = "v1"
|
||||
onnxfile = self.downloadFile(
|
||||
f"https://raw.githubusercontent.com/koush/onnx-models/main/{model}/{onnxmodel}.onnx",
|
||||
f"{model_version}/{model}/{onnxmodel}.onnx",
|
||||
)
|
||||
print(onnxfile)
|
||||
|
||||
compiled_models_array = []
|
||||
compiled_models = {}
|
||||
deviceIds = self.plugin.deviceIds
|
||||
|
||||
for deviceId in deviceIds:
|
||||
sess_options = onnxruntime.SessionOptions()
|
||||
|
||||
providers: list[str] = []
|
||||
if sys.platform == "darwin":
|
||||
providers.append("CoreMLExecutionProvider")
|
||||
|
||||
if "linux" in sys.platform and platform.machine() == "x86_64":
|
||||
deviceId = int(deviceId)
|
||||
providers.append(("CUDAExecutionProvider", {"device_id": deviceId}))
|
||||
|
||||
providers.append("CPUExecutionProvider")
|
||||
|
||||
compiled_model = onnxruntime.InferenceSession(
|
||||
onnxfile, sess_options=sess_options, providers=providers
|
||||
)
|
||||
compiled_models_array.append(compiled_model)
|
||||
|
||||
input = compiled_model.get_inputs()[0]
|
||||
input_name = input.name
|
||||
|
||||
def executor_initializer():
|
||||
thread_name = threading.current_thread().name
|
||||
interpreter = compiled_models_array.pop()
|
||||
compiled_models[thread_name] = interpreter
|
||||
print("Runtime initialized on thread {}".format(thread_name))
|
||||
|
||||
executor = concurrent.futures.ThreadPoolExecutor(
|
||||
initializer=executor_initializer,
|
||||
max_workers=len(compiled_models_array),
|
||||
thread_name_prefix="face",
|
||||
)
|
||||
|
||||
prepareExecutor = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=len(compiled_models_array),
|
||||
thread_name_prefix="face-prepare",
|
||||
)
|
||||
|
||||
return compiled_models, input_name, prepareExecutor, executor
|
||||
|
||||
async def predictDetectModel(self, input: Image.Image):
|
||||
compiled_models, input_name, prepareExecutor, executor = self.detectModel
|
||||
|
||||
def prepare():
|
||||
im = np.array(input)
|
||||
im = np.expand_dims(input, axis=0)
|
||||
im = im.transpose((0, 3, 1, 2)) # BHWC to BCHW, (n, 3, h, w)
|
||||
im = im.astype(np.float32) / 255.0
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
return im
|
||||
|
||||
def predict(input_tensor):
|
||||
compiled_model = compiled_models[threading.current_thread().name]
|
||||
output_tensors = compiled_model.run(None, {input_name: input_tensor})
|
||||
return output_tensors
|
||||
|
||||
input_tensor = await asyncio.get_event_loop().run_in_executor(
|
||||
prepareExecutor, lambda: prepare()
|
||||
)
|
||||
objs = await asyncio.get_event_loop().run_in_executor(
|
||||
executor, lambda: predict(input_tensor)
|
||||
)
|
||||
|
||||
return objs[0][0]
|
||||
|
||||
async def predictFaceModel(self, input: np.ndarray):
|
||||
compiled_models, input_name, prepareExecutor, executor = self.faceModel
|
||||
|
||||
def predict():
|
||||
compiled_model = compiled_models[threading.current_thread().name]
|
||||
output_tensors = compiled_model.run(None, {input_name: input})
|
||||
return output_tensors
|
||||
|
||||
objs = await asyncio.get_event_loop().run_in_executor(
|
||||
executor, lambda: predict()
|
||||
)
|
||||
|
||||
return objs[0]
|
||||
102
plugins/onnx/src/ort/text_recognition.py
Normal file
102
plugins/onnx/src/ort/text_recognition.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import platform
|
||||
import sys
|
||||
import threading
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime
|
||||
from PIL import Image
|
||||
|
||||
from predict.text_recognize import TextRecognition
|
||||
|
||||
|
||||
class ONNXTextRecognition(TextRecognition):
|
||||
def __init__(self, plugin, nativeId: str | None = None):
|
||||
self.plugin = plugin
|
||||
|
||||
super().__init__(nativeId=nativeId)
|
||||
|
||||
def downloadModel(self, model: str):
|
||||
onnxmodel = model
|
||||
model_version = "v3"
|
||||
onnxfile = self.downloadFile(
|
||||
f"https://raw.githubusercontent.com/koush/onnx-models/main/{model}/{onnxmodel}.onnx",
|
||||
f"{model_version}/{model}/{onnxmodel}.onnx",
|
||||
)
|
||||
print(onnxfile)
|
||||
|
||||
compiled_models_array = []
|
||||
compiled_models = {}
|
||||
deviceIds = self.plugin.deviceIds
|
||||
|
||||
for deviceId in deviceIds:
|
||||
sess_options = onnxruntime.SessionOptions()
|
||||
|
||||
providers: list[str] = []
|
||||
if sys.platform == "darwin":
|
||||
providers.append("CoreMLExecutionProvider")
|
||||
|
||||
if "linux" in sys.platform and platform.machine() == "x86_64":
|
||||
deviceId = int(deviceId)
|
||||
providers.append(("CUDAExecutionProvider", {"device_id": deviceId}))
|
||||
|
||||
providers.append("CPUExecutionProvider")
|
||||
|
||||
compiled_model = onnxruntime.InferenceSession(
|
||||
onnxfile, sess_options=sess_options, providers=providers
|
||||
)
|
||||
compiled_models_array.append(compiled_model)
|
||||
|
||||
input = compiled_model.get_inputs()[0]
|
||||
input_name = input.name
|
||||
|
||||
def executor_initializer():
|
||||
thread_name = threading.current_thread().name
|
||||
interpreter = compiled_models_array.pop()
|
||||
compiled_models[thread_name] = interpreter
|
||||
print("Runtime initialized on thread {}".format(thread_name))
|
||||
|
||||
executor = concurrent.futures.ThreadPoolExecutor(
|
||||
initializer=executor_initializer,
|
||||
max_workers=len(compiled_models_array),
|
||||
thread_name_prefix="face",
|
||||
)
|
||||
|
||||
prepareExecutor = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=len(compiled_models_array),
|
||||
thread_name_prefix="face-prepare",
|
||||
)
|
||||
|
||||
return compiled_models, input_name, prepareExecutor, executor
|
||||
|
||||
async def predictDetectModel(self, input: Image.Image):
|
||||
compiled_models, input_name, prepareExecutor, executor = self.detectModel
|
||||
|
||||
def predict():
|
||||
compiled_model = compiled_models[threading.current_thread().name]
|
||||
output_tensors = compiled_model.run(None, {input_name: input})
|
||||
return output_tensors
|
||||
|
||||
objs = await asyncio.get_event_loop().run_in_executor(
|
||||
executor, lambda: predict()
|
||||
)
|
||||
|
||||
return objs[0]
|
||||
|
||||
async def predictTextModel(self, input: np.ndarray):
|
||||
input = input.astype(np.float32)
|
||||
compiled_models, input_name, prepareExecutor, executor = self.textModel
|
||||
|
||||
def predict():
|
||||
compiled_model = compiled_models[threading.current_thread().name]
|
||||
output_tensors = compiled_model.run(None, {input_name: input})
|
||||
return output_tensors
|
||||
|
||||
objs = await asyncio.get_event_loop().run_in_executor(
|
||||
executor, lambda: predict()
|
||||
)
|
||||
|
||||
return objs[0]
|
||||
1
plugins/onnx/src/requirements.optional.txt
Normal file
1
plugins/onnx/src/requirements.optional.txt
Normal file
@@ -0,0 +1 @@
|
||||
opencv-python
|
||||
@@ -4,6 +4,7 @@
|
||||
onnxruntime-gpu; 'linux' in sys_platform and platform_machine == 'x86_64'
|
||||
# cpu and coreml execution provider
|
||||
onnxruntime; 'linux' not in sys_platform or platform_machine != 'x86_64'
|
||||
# nightly?
|
||||
# ort-nightly-gpu==1.17.3.dev20240409002
|
||||
|
||||
# pillow-simd is available on x64 linux
|
||||
|
||||
4
plugins/openvino/package-lock.json
generated
4
plugins/openvino/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@scrypted/openvino",
|
||||
"version": "0.1.85",
|
||||
"version": "0.1.86",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@scrypted/openvino",
|
||||
"version": "0.1.85",
|
||||
"version": "0.1.86",
|
||||
"devDependencies": {
|
||||
"@scrypted/sdk": "file:../../sdk"
|
||||
}
|
||||
|
||||
@@ -42,5 +42,5 @@
|
||||
"devDependencies": {
|
||||
"@scrypted/sdk": "file:../../sdk"
|
||||
},
|
||||
"version": "0.1.85"
|
||||
"version": "0.1.86"
|
||||
}
|
||||
|
||||
@@ -204,6 +204,8 @@ class OpenVINOPlugin(
|
||||
labels_contents = open(labelsFile, "r").read()
|
||||
self.labels = parse_label_contents(labels_contents)
|
||||
|
||||
self.faceDevice = None
|
||||
self.textDevice = None
|
||||
asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop)
|
||||
|
||||
async def getSettings(self) -> list[Setting]:
|
||||
@@ -396,7 +398,9 @@ class OpenVINOPlugin(
|
||||
|
||||
async def getDevice(self, nativeId: str) -> Any:
|
||||
if nativeId == "facerecognition":
|
||||
return OpenVINOFaceRecognition(self, nativeId)
|
||||
self.faceDevice = self.faceDevice or OpenVINOFaceRecognition(self, nativeId)
|
||||
return self.faceDevice
|
||||
elif nativeId == "textrecognition":
|
||||
return OpenVINOTextRecognition(self, nativeId)
|
||||
self.textDevice = self.textDevice or OpenVINOTextRecognition(self, nativeId)
|
||||
return self.textDevice
|
||||
raise Exception("unknown device")
|
||||
|
||||
9
plugins/openvino/src/ov/async_infer.py
Normal file
9
plugins/openvino/src/ov/async_infer.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import asyncio
|
||||
|
||||
async def start_async(infer_request):
|
||||
future = asyncio.Future(loop = asyncio.get_event_loop())
|
||||
def callback(status = None, result = None):
|
||||
future.set_result(None)
|
||||
infer_request.set_callback(callback, None)
|
||||
infer_request.start_async()
|
||||
await future
|
||||
@@ -1,24 +1,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import openvino.runtime as ov
|
||||
from ov import async_infer
|
||||
from PIL import Image
|
||||
|
||||
import numpy as np
|
||||
|
||||
from predict.face_recognize import FaceRecognizeDetection
|
||||
|
||||
|
||||
def euclidean_distance(arr1, arr2):
|
||||
return np.linalg.norm(arr1 - arr2)
|
||||
|
||||
|
||||
def cosine_similarity(vector_a, vector_b):
|
||||
dot_product = np.dot(vector_a, vector_b)
|
||||
norm_a = np.linalg.norm(vector_a)
|
||||
norm_b = np.linalg.norm(vector_b)
|
||||
similarity = dot_product / (norm_a * norm_b)
|
||||
return similarity
|
||||
|
||||
class OpenVINOFaceRecognition(FaceRecognizeDetection):
|
||||
def __init__(self, plugin, nativeId: str | None = None):
|
||||
self.plugin = plugin
|
||||
@@ -40,32 +29,20 @@ class OpenVINOFaceRecognition(FaceRecognizeDetection):
|
||||
print(xmlFile, binFile)
|
||||
return self.plugin.core.compile_model(xmlFile, self.plugin.mode)
|
||||
|
||||
def predictDetectModel(self, input):
|
||||
async def predictDetectModel(self, input: Image.Image):
|
||||
infer_request = self.detectModel.create_infer_request()
|
||||
im = np.stack([input])
|
||||
im = np.expand_dims(input, axis=0)
|
||||
im = im.transpose((0, 3, 1, 2)) # BHWC to BCHW, (n, 3, h, w)
|
||||
im = im.astype(np.float32) / 255.0
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
im = ov.Tensor(array=im)
|
||||
input_tensor = im
|
||||
infer_request.set_input_tensor(input_tensor)
|
||||
infer_request.start_async()
|
||||
infer_request.wait()
|
||||
infer_request.set_input_tensor(im)
|
||||
await async_infer.start_async(infer_request)
|
||||
return infer_request.output_tensors[0].data[0]
|
||||
|
||||
def predictFaceModel(self, input):
|
||||
async def predictFaceModel(self, input: np.ndarray):
|
||||
im = ov.Tensor(array=input)
|
||||
infer_request = self.faceModel.create_infer_request()
|
||||
infer_request.set_input_tensor(im)
|
||||
infer_request.start_async()
|
||||
infer_request.wait()
|
||||
await async_infer.start_async(infer_request)
|
||||
return infer_request.output_tensors[0].data[0]
|
||||
|
||||
def predictTextModel(self, input):
|
||||
input = input.astype(np.float32)
|
||||
im = ov.Tensor(array=input)
|
||||
infer_request = self.textModel.create_infer_request()
|
||||
infer_request.set_input_tensor(im)
|
||||
infer_request.start_async()
|
||||
infer_request.wait()
|
||||
return infer_request.output_tensors[0].data
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import openvino.runtime as ov
|
||||
import numpy as np
|
||||
import openvino.runtime as ov
|
||||
from ov import async_infer
|
||||
|
||||
from predict.text_recognize import TextRecognition
|
||||
|
||||
@@ -27,20 +28,18 @@ class OpenVINOTextRecognition(TextRecognition):
|
||||
print(xmlFile, binFile)
|
||||
return self.plugin.core.compile_model(xmlFile, self.plugin.mode)
|
||||
|
||||
def predictDetectModel(self, input):
|
||||
async def predictDetectModel(self, input: np.ndarray):
|
||||
infer_request = self.detectModel.create_infer_request()
|
||||
im = ov.Tensor(array=input)
|
||||
input_tensor = im
|
||||
infer_request.set_input_tensor(input_tensor)
|
||||
infer_request.start_async()
|
||||
infer_request.wait()
|
||||
await async_infer.start_async(infer_request)
|
||||
return infer_request.output_tensors[0].data
|
||||
|
||||
def predictTextModel(self, input):
|
||||
async def predictTextModel(self, input: np.ndarray):
|
||||
input = input.astype(np.float32)
|
||||
im = ov.Tensor(array=input)
|
||||
infer_request = self.textModel.create_infer_request()
|
||||
infer_request.set_input_tensor(im)
|
||||
infer_request.start_async()
|
||||
infer_request.wait()
|
||||
await async_infer.start_async(infer_request)
|
||||
return infer_request.output_tensors[0].data
|
||||
|
||||
@@ -3,14 +3,10 @@ from __future__ import annotations
|
||||
import asyncio
|
||||
from asyncio import Future
|
||||
import base64
|
||||
import concurrent.futures
|
||||
import os
|
||||
from typing import Any, Tuple, List
|
||||
|
||||
import numpy as np
|
||||
# import Quartz
|
||||
import scrypted_sdk
|
||||
# from Foundation import NSData, NSMakeSize
|
||||
from PIL import Image
|
||||
from scrypted_sdk import (
|
||||
Setting,
|
||||
@@ -21,10 +17,8 @@ from scrypted_sdk import (
|
||||
)
|
||||
import traceback
|
||||
|
||||
# import Vision
|
||||
from predict import PredictPlugin
|
||||
from common import yolo
|
||||
from common.text import prepare_text_result, process_text_result
|
||||
|
||||
def euclidean_distance(arr1, arr2):
|
||||
return np.linalg.norm(arr1 - arr2)
|
||||
@@ -37,9 +31,6 @@ def cosine_similarity(vector_a, vector_b):
|
||||
similarity = dot_product / (norm_a * norm_b)
|
||||
return similarity
|
||||
|
||||
|
||||
predictExecutor = concurrent.futures.ThreadPoolExecutor(1, "Recognize")
|
||||
|
||||
class FaceRecognizeDetection(PredictPlugin):
|
||||
def __init__(self, nativeId: str | None = None):
|
||||
super().__init__(nativeId=nativeId)
|
||||
@@ -56,7 +47,6 @@ class FaceRecognizeDetection(PredictPlugin):
|
||||
self.minThreshold = 0.7
|
||||
|
||||
self.detectModel = self.downloadModel("scrypted_yolov9c_flt")
|
||||
self.textModel = self.downloadModel("vgg_english_g2")
|
||||
self.faceModel = self.downloadModel("inception_resnet_v1")
|
||||
|
||||
def downloadModel(self, model: str):
|
||||
@@ -81,9 +71,7 @@ class FaceRecognizeDetection(PredictPlugin):
|
||||
return "rgb"
|
||||
|
||||
async def detect_once(self, input: Image.Image, settings: Any, src_size, cvss):
|
||||
results = await asyncio.get_event_loop().run_in_executor(
|
||||
predictExecutor, lambda: self.predictDetectModel(input)
|
||||
)
|
||||
results = await self.predictDetectModel(input)
|
||||
objs = yolo.parse_yolov9(results)
|
||||
ret = self.create_detection_result(objs, src_size, cvss)
|
||||
return ret
|
||||
@@ -112,10 +100,7 @@ class FaceRecognizeDetection(PredictPlugin):
|
||||
processed_tensor = (image_tensor - 127.5) / 128.0
|
||||
processed_tensor = np.expand_dims(processed_tensor, axis=0)
|
||||
|
||||
output = await asyncio.get_event_loop().run_in_executor(
|
||||
predictExecutor,
|
||||
lambda: self.predictFaceModel(processed_tensor)
|
||||
)
|
||||
output = await self.predictFaceModel(processed_tensor)
|
||||
|
||||
b = output.tobytes()
|
||||
embedding = base64.b64encode(b).decode("utf-8")
|
||||
@@ -125,29 +110,12 @@ class FaceRecognizeDetection(PredictPlugin):
|
||||
traceback.print_exc()
|
||||
pass
|
||||
|
||||
def predictTextModel(self, input):
|
||||
async def predictDetectModel(self, input: Image.Image):
|
||||
pass
|
||||
|
||||
def predictDetectModel(self, input):
|
||||
async def predictFaceModel(self, input: np.ndarray):
|
||||
pass
|
||||
|
||||
def predictFaceModel(self, input):
|
||||
pass
|
||||
|
||||
async def setLabel(self, d: ObjectDetectionResult, image: scrypted_sdk.Image):
|
||||
try:
|
||||
|
||||
image_tensor = await prepare_text_result(d, image)
|
||||
preds = await asyncio.get_event_loop().run_in_executor(
|
||||
predictExecutor,
|
||||
lambda: self.predictTextModel(image_tensor),
|
||||
)
|
||||
d['label'] = process_text_result(preds)
|
||||
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pass
|
||||
|
||||
async def run_detection_image(
|
||||
self, image: scrypted_sdk.Image, detection_session: ObjectDetectionSession
|
||||
) -> ObjectsDetected:
|
||||
@@ -206,10 +174,6 @@ class FaceRecognizeDetection(PredictPlugin):
|
||||
for d in ret["detections"]:
|
||||
if d["className"] == "face":
|
||||
futures.append(asyncio.ensure_future(self.setEmbedding(d, image)))
|
||||
# elif d["className"] == "plate":
|
||||
# futures.append(asyncio.ensure_future(self.setLabel(d, image)))
|
||||
# elif d['className'] == 'text':
|
||||
# futures.append(asyncio.ensure_future(self.setLabel(d, image)))
|
||||
|
||||
if len(futures):
|
||||
await asyncio.wait(futures)
|
||||
|
||||
@@ -41,10 +41,10 @@ class TextRecognition(PredictPlugin):
|
||||
def downloadModel(self, model: str):
|
||||
pass
|
||||
|
||||
def predictDetectModel(self, input):
|
||||
async def predictDetectModel(self, input: np.ndarray):
|
||||
pass
|
||||
|
||||
def predictTextModel(self, input):
|
||||
async def predictTextModel(self, input: np.ndarray):
|
||||
pass
|
||||
|
||||
async def detect_once(
|
||||
@@ -56,9 +56,7 @@ class TextRecognition(PredictPlugin):
|
||||
# add extra dimension to tensor
|
||||
image_tensor = np.expand_dims(image_tensor, axis=0)
|
||||
|
||||
y = await asyncio.get_event_loop().run_in_executor(
|
||||
predictExecutor, lambda: self.predictDetectModel(image_tensor)
|
||||
)
|
||||
y = await self.predictDetectModel(image_tensor)
|
||||
|
||||
estimate_num_chars = False
|
||||
ratio_h = ratio_w = 1
|
||||
@@ -158,10 +156,7 @@ class TextRecognition(PredictPlugin):
|
||||
try:
|
||||
|
||||
image_tensor = await prepare_text_result(d, image, skew_angle)
|
||||
preds = await asyncio.get_event_loop().run_in_executor(
|
||||
predictExecutor,
|
||||
lambda: self.predictTextModel(image_tensor),
|
||||
)
|
||||
preds = await self.predictTextModel(image_tensor)
|
||||
d["label"] = process_text_result(preds)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user