mirror of
https://github.com/koush/scrypted.git
synced 2026-02-03 06:03:27 +00:00
predict: add segmentation models to onnx/coreml and refactor openvino
This commit is contained in:
@@ -16,6 +16,7 @@ from common import yolo
|
||||
from coreml.face_recognition import CoreMLFaceRecognition
|
||||
from coreml.custom_detection import CoreMLCustomDetection
|
||||
from coreml.clip_embedding import CoreMLClipEmbedding
|
||||
from coreml.segment import CoreMLSegmentation
|
||||
|
||||
try:
|
||||
from coreml.text_recognition import CoreMLTextRecognition
|
||||
@@ -105,6 +106,7 @@ class CoreMLPlugin(
|
||||
self.faceDevice = None
|
||||
self.textDevice = None
|
||||
self.clipDevice = None
|
||||
self.segmentDevice = None
|
||||
|
||||
if not self.forked:
|
||||
asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop)
|
||||
@@ -149,6 +151,18 @@ class CoreMLPlugin(
|
||||
"name": "CoreML CLIP Embedding",
|
||||
}
|
||||
)
|
||||
|
||||
await scrypted_sdk.deviceManager.onDeviceDiscovered(
|
||||
{
|
||||
"nativeId": "segment",
|
||||
"type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
|
||||
"interfaces": [
|
||||
scrypted_sdk.ScryptedInterface.ClusterForkInterface.value,
|
||||
scrypted_sdk.ScryptedInterface.ObjectDetection.value,
|
||||
],
|
||||
"name": "CoreML Segmentation",
|
||||
}
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -162,6 +176,9 @@ class CoreMLPlugin(
|
||||
elif nativeId == "clipembedding":
|
||||
self.clipDevice = self.clipDevice or CoreMLClipEmbedding(self, nativeId)
|
||||
return self.clipDevice
|
||||
elif nativeId == "segment":
|
||||
self.segmentDevice = self.segmentDevice or CoreMLSegmentation(self, nativeId)
|
||||
return self.segmentDevice
|
||||
custom_model = self.custom_models.get(nativeId, None)
|
||||
if custom_model:
|
||||
return custom_model
|
||||
|
||||
48
plugins/coreml/src/coreml/segment.py
Normal file
48
plugins/coreml/src/coreml/segment.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import numpy as np
|
||||
|
||||
import coremltools as ct
|
||||
from common import async_infer
|
||||
from common import yolov9_seg
|
||||
from predict.segment import Segmentation
|
||||
|
||||
prepareExecutor, predictExecutor = async_infer.create_executors("Segment")
|
||||
|
||||
|
||||
class CoreMLSegmentation(Segmentation):
|
||||
def __init__(self, plugin, nativeId: str):
|
||||
super().__init__(plugin=plugin, nativeId=nativeId)
|
||||
|
||||
def loadModel(self, name):
|
||||
model_path = self.plugin.downloadHuggingFaceModelLocalFallback(name)
|
||||
modelFile = os.path.join(model_path, f"{name}.mlpackage")
|
||||
model = ct.models.MLModel(modelFile)
|
||||
return model
|
||||
|
||||
async def detect_once(self, input, settings, src_size, cvss):
|
||||
def predict():
|
||||
input_name = self.model.get_spec().description.input[0].name
|
||||
out_dict = self.model.predict({input_name: input})
|
||||
|
||||
outputs = list(out_dict.values())
|
||||
pred = outputs[0]
|
||||
proto = outputs[1]
|
||||
pred = yolov9_seg.non_max_suppression(pred, nm=32)
|
||||
|
||||
return self.process_segmentation_output(pred, proto)
|
||||
|
||||
try:
|
||||
objs = await asyncio.get_event_loop().run_in_executor(
|
||||
predictExecutor, lambda: predict()
|
||||
)
|
||||
except:
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
ret = self.create_detection_result(objs, src_size, cvss)
|
||||
return ret
|
||||
@@ -1 +1 @@
|
||||
../../../openvino/src/ov/async_infer.py
|
||||
../../../openvino/src/common/async_infer.py
|
||||
@@ -24,6 +24,7 @@ from predict import PredictPlugin
|
||||
|
||||
from .face_recognition import ONNXFaceRecognition
|
||||
from .clip_embedding import ONNXClipEmbedding
|
||||
from .segment import ONNXSegmentation
|
||||
|
||||
try:
|
||||
from .text_recognition import ONNXTextRecognition
|
||||
@@ -155,6 +156,7 @@ class ONNXPlugin(
|
||||
self.faceDevice = None
|
||||
self.textDevice = None
|
||||
self.clipDevice = None
|
||||
self.segmentDevice = None
|
||||
|
||||
if not self.forked:
|
||||
asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop)
|
||||
@@ -199,6 +201,18 @@ class ONNXPlugin(
|
||||
"name": "ONNX CLIP Embedding",
|
||||
}
|
||||
)
|
||||
|
||||
await scrypted_sdk.deviceManager.onDeviceDiscovered(
|
||||
{
|
||||
"nativeId": "segment",
|
||||
"type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
|
||||
"interfaces": [
|
||||
scrypted_sdk.ScryptedInterface.ClusterForkInterface.value,
|
||||
scrypted_sdk.ScryptedInterface.ObjectDetection.value,
|
||||
],
|
||||
"name": "ONNX Segmentation",
|
||||
}
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -212,6 +226,9 @@ class ONNXPlugin(
|
||||
elif nativeId == "clipembedding":
|
||||
self.clipDevice = self.clipDevice or ONNXClipEmbedding(self, nativeId)
|
||||
return self.clipDevice
|
||||
elif nativeId == "segment":
|
||||
self.segmentDevice = self.segmentDevice or ONNXSegmentation(self, nativeId)
|
||||
return self.segmentDevice
|
||||
custom_model = self.custom_models.get(nativeId, None)
|
||||
if custom_model:
|
||||
return custom_model
|
||||
|
||||
55
plugins/onnx/src/ort/segment.py
Normal file
55
plugins/onnx/src/ort/segment.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import numpy as np
|
||||
|
||||
import onnxruntime
|
||||
from predict.segment import Segmentation
|
||||
from common import yolov9_seg
|
||||
from common import async_infer
|
||||
|
||||
prepareExecutor, predictExecutor = async_infer.create_executors("Segment")
|
||||
|
||||
|
||||
|
||||
class ONNXSegmentation(Segmentation):
|
||||
def __init__(self, plugin, nativeId: str):
|
||||
super().__init__(plugin=plugin, nativeId=nativeId)
|
||||
|
||||
def loadModel(self, name):
|
||||
model_path = self.plugin.downloadHuggingFaceModelLocalFallback(name)
|
||||
onnxfile = os.path.join(model_path, f"{name}.onnx")
|
||||
model = onnxruntime.InferenceSession(onnxfile)
|
||||
return model
|
||||
|
||||
async def detect_once(self, input, settings, src_size, cvss):
|
||||
def prepare():
|
||||
im = np.expand_dims(input, axis=0)
|
||||
im = im.transpose((0, 3, 1, 2)) # BHWC to BCHW, (n, 3, h, w)
|
||||
im = im.astype(np.float32) / 255.0
|
||||
im = np.ascontiguousarray(im) # contiguous
|
||||
return im
|
||||
|
||||
def predict():
|
||||
input_tensor = prepare()
|
||||
output_tensors = self.model.run(None, {self.input_name: input_tensor})
|
||||
|
||||
pred = output_tensors[0]
|
||||
proto = output_tensors[1]
|
||||
pred = yolov9_seg.non_max_suppression(pred, nm=32)
|
||||
|
||||
return self.process_segmentation_output(pred, proto)
|
||||
|
||||
try:
|
||||
objs = await asyncio.get_event_loop().run_in_executor(
|
||||
predictExecutor, lambda: predict()
|
||||
)
|
||||
except:
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
ret = self.create_detection_result(objs, src_size, cvss)
|
||||
return ret
|
||||
@@ -1,6 +1,5 @@
|
||||
import concurrent.futures
|
||||
|
||||
|
||||
def create_executors(name: str):
|
||||
prepare = concurrent.futures.ThreadPoolExecutor(1, f"{name}Prepare")
|
||||
predict = concurrent.futures.ThreadPoolExecutor(1, f"{name}Predict")
|
||||
@@ -7,6 +7,7 @@ that are equivalent to their torch counterparts in utils/segment/general.py.
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
import time
|
||||
|
||||
def crop_mask_numpy(masks, boxes):
|
||||
"""
|
||||
@@ -197,4 +198,158 @@ def masks2polygons_numpy(masks):
|
||||
"""
|
||||
segments = masks2segments_numpy(masks)
|
||||
# Convert to list of [x, y] pairs
|
||||
return [segment.tolist() for segment in segments]
|
||||
return [segment.tolist() for segment in segments]
|
||||
|
||||
|
||||
def xywh2xyxy(x):
|
||||
"""Convert [x_center, y_center, width, height] to [x1, y1, x2, y2]"""
|
||||
y = np.copy(x)
|
||||
y[:, 0] = x[:, 0] - x[:, 2] / 2 # x1
|
||||
y[:, 1] = x[:, 1] - x[:, 3] / 2 # y1
|
||||
y[:, 2] = x[:, 0] + x[:, 2] / 2 # x2
|
||||
y[:, 3] = x[:, 1] + x[:, 3] / 2 # y2
|
||||
return y
|
||||
|
||||
|
||||
def box_iou(box1, box2):
|
||||
"""Calculate IoU between two sets of boxes"""
|
||||
area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
|
||||
area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
|
||||
|
||||
iou = np.zeros((len(box1), len(box2)), dtype=np.float32)
|
||||
|
||||
for i in range(len(box1)):
|
||||
for j in range(len(box2)):
|
||||
inter_x1 = np.maximum(box1[i, 0], box2[j, 0])
|
||||
inter_y1 = np.maximum(box1[i, 1], box2[j, 1])
|
||||
inter_x2 = np.minimum(box1[i, 2], box2[j, 2])
|
||||
inter_y2 = np.minimum(box1[i, 3], box2[j, 3])
|
||||
|
||||
inter_w = np.maximum(0, inter_x2 - inter_x1)
|
||||
inter_h = np.maximum(0, inter_y2 - inter_y1)
|
||||
inter_area = inter_w * inter_h
|
||||
|
||||
union = area1[i] + area2[j] - inter_area
|
||||
iou[i, j] = inter_area / union if union > 0 else 0
|
||||
|
||||
return iou
|
||||
|
||||
|
||||
def nms(boxes, scores, iou_thres):
|
||||
"""Non-Maximum Suppression implementation in NumPy"""
|
||||
if len(boxes) == 0:
|
||||
return np.array([], dtype=np.int32)
|
||||
|
||||
indices = np.argsort(-scores)
|
||||
|
||||
keep = []
|
||||
while len(indices) > 0:
|
||||
i = indices[0]
|
||||
keep.append(i)
|
||||
|
||||
if len(indices) == 1:
|
||||
break
|
||||
|
||||
iou_scores = box_iou(boxes[indices[0:1]], boxes[indices[1:]])[0]
|
||||
|
||||
indices = indices[1:][iou_scores < iou_thres]
|
||||
|
||||
return np.array(keep, dtype=np.int32)
|
||||
|
||||
|
||||
def non_max_suppression(
|
||||
prediction,
|
||||
conf_thres=0.25,
|
||||
iou_thres=0.45,
|
||||
classes=None,
|
||||
agnostic=False,
|
||||
multi_label=False,
|
||||
labels=(),
|
||||
max_det=300,
|
||||
nm=0,
|
||||
):
|
||||
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
|
||||
|
||||
Returns:
|
||||
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
||||
"""
|
||||
|
||||
if isinstance(prediction, (list, tuple)):
|
||||
prediction = prediction[0]
|
||||
|
||||
bs = prediction.shape[0]
|
||||
nc = prediction.shape[1] - nm - 4
|
||||
mi = 4 + nc
|
||||
xc = np.max(prediction[:, 4:mi], axis=1) > conf_thres
|
||||
|
||||
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
||||
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
||||
|
||||
max_wh = 7680
|
||||
max_nms = 30000
|
||||
time_limit = 2.5 + 0.05 * bs
|
||||
redundant = True
|
||||
multi_label &= nc > 1
|
||||
merge = False
|
||||
|
||||
t = time.time()
|
||||
output = [np.zeros((0, 6 + nm), dtype=np.float32)] * bs
|
||||
for xi, pred_x in enumerate(prediction):
|
||||
x = pred_x.T[xc[xi]]
|
||||
|
||||
if labels and len(labels[xi]):
|
||||
lb = labels[xi]
|
||||
v = np.zeros((len(lb), nc + nm + 5), dtype=x.dtype)
|
||||
v[:, :4] = lb[:, 1:5]
|
||||
v[np.arange(len(lb)), lb[:, 0].astype(int) + 4] = 1.0
|
||||
x = np.concatenate((x, v), 0)
|
||||
|
||||
if x.shape[0] == 0:
|
||||
continue
|
||||
|
||||
box = x[:, :4]
|
||||
cls = x[:, 4:4 + nc]
|
||||
mask = x[:, 4 + nc:] if nm > 0 else np.zeros((x.shape[0], nm), dtype=x.dtype)
|
||||
|
||||
box = xywh2xyxy(box)
|
||||
|
||||
if multi_label:
|
||||
i, j = np.where(cls > conf_thres)
|
||||
x = np.concatenate((box[i], x[i, 4 + j][:, None], j[:, None].astype(np.float32), mask[i]), 1)
|
||||
else:
|
||||
j = np.argmax(cls, axis=1, keepdims=True)
|
||||
conf = cls[np.arange(len(cls)), j.flatten()][:, None]
|
||||
x = np.concatenate((box, conf, j.astype(np.float32), mask), 1)[conf.flatten() > conf_thres]
|
||||
|
||||
if classes is not None:
|
||||
class_tensor = np.array(classes, dtype=np.float32)
|
||||
mask = np.any(x[:, 5:6] == class_tensor, axis=1)
|
||||
x = x[mask]
|
||||
|
||||
n = x.shape[0]
|
||||
if n == 0:
|
||||
continue
|
||||
elif n > max_nms:
|
||||
x = x[x[:, 4].argsort()[::-1][:max_nms]]
|
||||
else:
|
||||
x = x[x[:, 4].argsort()[::-1]]
|
||||
|
||||
c = x[:, 5:6] * (0 if agnostic else max_wh)
|
||||
boxes, scores = x[:, :4] + c, x[:, 4]
|
||||
i = nms(boxes, scores, iou_thres)
|
||||
if i.shape[0] > max_det:
|
||||
i = i[:max_det]
|
||||
if merge and (1 < n < 3E3):
|
||||
iou = box_iou(boxes[i], boxes) > iou_thres
|
||||
weights = iou * scores[None]
|
||||
x[i, :4] = np.dot(weights, x[:, :4]).astype(np.float32) / weights.sum(1, keepdims=True)
|
||||
if redundant:
|
||||
i = i[iou.sum(1) > 1]
|
||||
|
||||
output[xi] = x[i]
|
||||
if (time.time() - t) > time_limit:
|
||||
import warnings
|
||||
warnings.warn(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
|
||||
break
|
||||
|
||||
return output
|
||||
@@ -7,7 +7,7 @@ import numpy as np
|
||||
import openvino as ov
|
||||
from PIL import Image
|
||||
|
||||
from ov import async_infer
|
||||
from common import async_infer
|
||||
from predict.clip import ClipEmbedding
|
||||
from scrypted_sdk import ObjectsDetected
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import numpy as np
|
||||
import openvino as ov
|
||||
from PIL import Image
|
||||
|
||||
from ov import async_infer
|
||||
from common import async_infer
|
||||
from predict.custom_detect import CustomDetection
|
||||
from scrypted_sdk import ObjectsDetected
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
import openvino as ov
|
||||
from ov import async_infer
|
||||
from common import async_infer
|
||||
from predict.face_recognize import FaceRecognizeDetection
|
||||
|
||||
faceDetectPrepare, faceDetectPredict = async_infer.create_executors("FaceDetect")
|
||||
|
||||
@@ -6,195 +6,14 @@ import traceback
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ov import async_infer
|
||||
import openvino as ov
|
||||
from predict.segment import Segmentation
|
||||
from predict import Prediction
|
||||
from predict.rectangle import Rectangle
|
||||
from common import yolo
|
||||
import time
|
||||
from common import yolov9_seg
|
||||
from common import async_infer
|
||||
|
||||
prepareExecutor, predictExecutor = async_infer.create_executors("Segment")
|
||||
|
||||
|
||||
def xywh2xyxy(x):
|
||||
"""Convert [x_center, y_center, width, height] to [x1, y1, x2, y2]"""
|
||||
y = np.copy(x)
|
||||
y[:, 0] = x[:, 0] - x[:, 2] / 2 # x1
|
||||
y[:, 1] = x[:, 1] - x[:, 3] / 2 # y1
|
||||
y[:, 2] = x[:, 0] + x[:, 2] / 2 # x2
|
||||
y[:, 3] = x[:, 1] + x[:, 3] / 2 # y2
|
||||
return y
|
||||
|
||||
|
||||
def box_iou(box1, box2):
|
||||
"""Calculate IoU between two sets of boxes"""
|
||||
# box1 shape: (n, 4), box2 shape: (m, 4)
|
||||
# Compute intersection areas
|
||||
area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
|
||||
area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
|
||||
|
||||
iou = np.zeros((len(box1), len(box2)), dtype=np.float32)
|
||||
|
||||
for i in range(len(box1)):
|
||||
for j in range(len(box2)):
|
||||
# Intersection
|
||||
inter_x1 = np.maximum(box1[i, 0], box2[j, 0])
|
||||
inter_y1 = np.maximum(box1[i, 1], box2[j, 1])
|
||||
inter_x2 = np.minimum(box1[i, 2], box2[j, 2])
|
||||
inter_y2 = np.minimum(box1[i, 3], box2[j, 3])
|
||||
|
||||
inter_w = np.maximum(0, inter_x2 - inter_x1)
|
||||
inter_h = np.maximum(0, inter_y2 - inter_y1)
|
||||
inter_area = inter_w * inter_h
|
||||
|
||||
# Union
|
||||
union = area1[i] + area2[j] - inter_area
|
||||
iou[i, j] = inter_area / union if union > 0 else 0
|
||||
|
||||
return iou
|
||||
|
||||
|
||||
def nms(boxes, scores, iou_thres):
|
||||
"""Non-Maximum Suppression implementation in NumPy"""
|
||||
if len(boxes) == 0:
|
||||
return np.array([], dtype=np.int32)
|
||||
|
||||
# Sort by scores in descending order
|
||||
indices = np.argsort(-scores)
|
||||
|
||||
keep = []
|
||||
while len(indices) > 0:
|
||||
i = indices[0]
|
||||
keep.append(i)
|
||||
|
||||
if len(indices) == 1:
|
||||
break
|
||||
|
||||
# Calculate IoU between the current box and all remaining boxes
|
||||
iou_scores = box_iou(boxes[indices[0:1]], boxes[indices[1:]])[0]
|
||||
|
||||
# Keep boxes with IoU below threshold
|
||||
indices = indices[1:][iou_scores < iou_thres]
|
||||
|
||||
return np.array(keep, dtype=np.int32)
|
||||
|
||||
|
||||
def non_max_suppression(
|
||||
prediction,
|
||||
conf_thres=0.25,
|
||||
iou_thres=0.45,
|
||||
classes=None,
|
||||
agnostic=False,
|
||||
multi_label=False,
|
||||
labels=(),
|
||||
max_det=300,
|
||||
nm=0, # number of masks
|
||||
):
|
||||
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
|
||||
|
||||
Returns:
|
||||
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
||||
"""
|
||||
|
||||
if isinstance(prediction, (list, tuple)): # YOLO model in validation model, output = (inference_out, loss_out)
|
||||
prediction = prediction[0] # select only inference output
|
||||
|
||||
bs = prediction.shape[0] # batch size
|
||||
nc = prediction.shape[1] - nm - 4 # number of classes
|
||||
mi = 4 + nc # mask start index
|
||||
xc = np.max(prediction[:, 4:mi], axis=1) > conf_thres # candidates
|
||||
|
||||
# Checks
|
||||
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
||||
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
||||
|
||||
# Settings
|
||||
# min_wh = 2 # (pixels) minimum box width and height
|
||||
max_wh = 7680 # (pixels) maximum box width and height
|
||||
max_nms = 30000 # maximum number of boxes into NMS()
|
||||
time_limit = 2.5 + 0.05 * bs # seconds to quit after
|
||||
redundant = True # require redundant detections
|
||||
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
||||
merge = False # use merge-NMS
|
||||
|
||||
t = time.time()
|
||||
output = [np.zeros((0, 6 + nm), dtype=np.float32)] * bs
|
||||
for xi, pred_x in enumerate(prediction): # image index, image inference
|
||||
# Apply constraints
|
||||
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
||||
x = pred_x.T[xc[xi]] # confidence
|
||||
|
||||
# Cat apriori labels if autolabelling
|
||||
if labels and len(labels[xi]):
|
||||
lb = labels[xi]
|
||||
v = np.zeros((len(lb), nc + nm + 5), dtype=x.dtype)
|
||||
v[:, :4] = lb[:, 1:5] # box
|
||||
v[np.arange(len(lb)), lb[:, 0].astype(int) + 4] = 1.0 # cls
|
||||
x = np.concatenate((x, v), 0)
|
||||
|
||||
# If none remain process next image
|
||||
if x.shape[0] == 0:
|
||||
continue
|
||||
|
||||
# Detections matrix nx6 (xyxy, conf, cls)
|
||||
box = x[:, :4]
|
||||
cls = x[:, 4:4 + nc]
|
||||
mask = x[:, 4 + nc:] if nm > 0 else np.zeros((x.shape[0], nm), dtype=x.dtype)
|
||||
|
||||
box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2)
|
||||
|
||||
if multi_label:
|
||||
i, j = np.where(cls > conf_thres)
|
||||
x = np.concatenate((box[i], x[i, 4 + j][:, None], j[:, None].astype(np.float32), mask[i]), 1)
|
||||
else: # best class only
|
||||
j = np.argmax(cls, axis=1, keepdims=True)
|
||||
conf = cls[np.arange(len(cls)), j.flatten()][:, None]
|
||||
x = np.concatenate((box, conf, j.astype(np.float32), mask), 1)[conf.flatten() > conf_thres]
|
||||
|
||||
# Filter by class
|
||||
if classes is not None:
|
||||
class_tensor = np.array(classes, dtype=np.float32)
|
||||
mask = np.any(x[:, 5:6] == class_tensor, axis=1)
|
||||
x = x[mask]
|
||||
|
||||
# Apply finite constraint
|
||||
# if not np.isfinite(x).all():
|
||||
# x = x[np.isfinite(x).all(1)]
|
||||
|
||||
# Check shape
|
||||
n = x.shape[0] # number of boxes
|
||||
if n == 0: # no boxes
|
||||
continue
|
||||
elif n > max_nms: # excess boxes
|
||||
x = x[x[:, 4].argsort()[::-1][:max_nms]] # sort by confidence
|
||||
else:
|
||||
x = x[x[:, 4].argsort()[::-1]] # sort by confidence
|
||||
|
||||
# Batched NMS
|
||||
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
||||
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
||||
i = nms(boxes, scores, iou_thres) # NMS
|
||||
if i.shape[0] > max_det: # limit detections
|
||||
i = i[:max_det]
|
||||
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
||||
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
||||
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
||||
weights = iou * scores[None] # box weights
|
||||
x[i, :4] = np.dot(weights, x[:, :4]).astype(np.float32) / weights.sum(1, keepdims=True) # merged boxes
|
||||
if redundant:
|
||||
i = i[iou.sum(1) > 1] # require redundancy
|
||||
|
||||
output[xi] = x[i]
|
||||
if (time.time() - t) > time_limit:
|
||||
import warnings
|
||||
warnings.warn(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
|
||||
break # time limit exceeded
|
||||
|
||||
return output
|
||||
|
||||
|
||||
|
||||
class OpenVINOSegmentation(Segmentation):
|
||||
def __init__(self, plugin, nativeId: str):
|
||||
@@ -222,41 +41,9 @@ class OpenVINOSegmentation(Segmentation):
|
||||
|
||||
pred = output_tensors[0]
|
||||
proto = output_tensors[1]
|
||||
pred = non_max_suppression(pred, nm=32)
|
||||
pred = yolov9_seg.non_max_suppression(pred, nm=32)
|
||||
|
||||
objs = []
|
||||
for det in pred:
|
||||
if not len(det):
|
||||
continue
|
||||
# Upsample masks to input image space (320x320)
|
||||
masks = yolov9_seg.process_mask_numpy(proto.squeeze(0), det[:, 6:], det[:, :4], (320, 320), upsample=True)
|
||||
# Convert masks to contour points
|
||||
segments = yolov9_seg.masks2segments_numpy(masks)
|
||||
# Create Prediction instances
|
||||
for i in range(len(det)):
|
||||
# Convert all contours for this detection to list of [x, y] tuples
|
||||
mask_contours = segments[i]
|
||||
clip_paths = []
|
||||
for contour in mask_contours:
|
||||
if len(contour) > 0 and contour.shape[1] == 2:
|
||||
single_path = [(float(contour[j, 0]), float(contour[j, 1])) for j in range(len(contour))]
|
||||
clip_paths.append(single_path)
|
||||
|
||||
prediction = Prediction(
|
||||
id=int(det[i, 5]), # class_id
|
||||
score=float(det[i, 4]), # confidence
|
||||
bbox=Rectangle(
|
||||
xmin=float(det[i, 0]), # x1
|
||||
ymin=float(det[i, 1]), # y1
|
||||
xmax=float(det[i, 2]), # x2
|
||||
ymax=float(det[i, 3]), # y2
|
||||
),
|
||||
embedding=None, # no embedding for segmentation
|
||||
clipPaths=clip_paths # list of polygon outlines [[[x, y], ...], ...] at 320x320
|
||||
)
|
||||
objs.append(prediction)
|
||||
|
||||
return objs
|
||||
return self.process_segmentation_output(pred, proto)
|
||||
|
||||
try:
|
||||
objs = await asyncio.get_event_loop().run_in_executor(
|
||||
@@ -268,3 +55,4 @@ class OpenVINOSegmentation(Segmentation):
|
||||
|
||||
ret = self.create_detection_result(objs, src_size, cvss)
|
||||
return ret
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
import numpy as np
|
||||
|
||||
import openvino as ov
|
||||
from ov import async_infer
|
||||
from common import async_infer
|
||||
from predict.text_recognize import TextRecognition
|
||||
|
||||
textDetectPrepare, textDetectPredict = async_infer.create_executors("TextDetect")
|
||||
|
||||
@@ -424,6 +424,8 @@ class PredictPlugin(DetectPlugin, scrypted_sdk.ClusterForkInterface, scrypted_sd
|
||||
ret = await result.getFaceRecognition()
|
||||
elif self.nativeId == "clipembedding":
|
||||
ret = await result.getClipEmbedding()
|
||||
elif self.nativeId == "segmentation":
|
||||
ret = await result.getSegmentation()
|
||||
else:
|
||||
ret = await result.getCustomDetection(self.nativeId)
|
||||
return ret
|
||||
@@ -559,6 +561,9 @@ class Fork:
|
||||
|
||||
async def getClipEmbedding(self):
|
||||
return await self.plugin.getDevice("clipembedding")
|
||||
|
||||
async def getSegmentation(self):
|
||||
return await self.plugin.getDevice("segmentation")
|
||||
|
||||
async def getCustomDetection(self, nativeId: str):
|
||||
return await self.plugin.getDevice(nativeId)
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
from ov import async_infer
|
||||
from common import async_infer
|
||||
from common import yolov9_seg
|
||||
from predict import PredictPlugin
|
||||
from predict import Prediction
|
||||
from predict.rectangle import Rectangle
|
||||
import asyncio
|
||||
from common import coco
|
||||
import traceback
|
||||
|
||||
customDetectPrepare, customDetectPredict = async_infer.create_executors("CustomDetect")
|
||||
customDetectPrepare, customDetectPredict = async_infer.create_executors("Segment")
|
||||
|
||||
class Segmentation(PredictPlugin):
|
||||
def __init__(self, plugin, nativeId: str):
|
||||
@@ -20,8 +24,9 @@ class Segmentation(PredictPlugin):
|
||||
self.labels = coco.COCO_LABELS
|
||||
|
||||
try:
|
||||
self.model = self.loadModel('yolov9c_seg')
|
||||
self.model = self.loadModel('scrypted_yolov9t_seg_relu')
|
||||
except:
|
||||
traceback.print_exc()
|
||||
raise
|
||||
|
||||
def loadModel(self, name: str):
|
||||
@@ -36,4 +41,49 @@ class Segmentation(PredictPlugin):
|
||||
return (self.inputwidth, self.inputheight)
|
||||
|
||||
def get_input_format(self) -> str:
|
||||
return "rgb"
|
||||
return "rgb"
|
||||
|
||||
def process_segmentation_output(self, pred, proto):
|
||||
"""
|
||||
Process segmentation model outputs into a list of Prediction objects.
|
||||
|
||||
Args:
|
||||
pred: Predictions output from NMS (list of detections)
|
||||
proto: Prototype masks for segmentation
|
||||
|
||||
Returns:
|
||||
List of Prediction objects with segmentation masks (clipPaths)
|
||||
"""
|
||||
objs = []
|
||||
for det in pred:
|
||||
if not len(det):
|
||||
continue
|
||||
# Upsample masks to input image space (320x320)
|
||||
masks = yolov9_seg.process_mask_numpy(proto.squeeze(0), det[:, 6:], det[:, :4], (320, 320), upsample=True)
|
||||
# Convert masks to contour points
|
||||
segments = yolov9_seg.masks2segments_numpy(masks)
|
||||
# Create Prediction instances
|
||||
for i in range(len(det)):
|
||||
# Convert all contours for this detection to list of [x, y] tuples
|
||||
mask_contours = segments[i]
|
||||
clip_paths = []
|
||||
for contour in mask_contours:
|
||||
if len(contour) > 0 and contour.shape[1] == 2:
|
||||
single_path = [(float(contour[j, 0]), float(contour[j, 1])) for j in range(len(contour))]
|
||||
clip_paths.append(single_path)
|
||||
|
||||
prediction = Prediction(
|
||||
id=int(det[i, 5]), # class_id
|
||||
score=float(det[i, 4]), # confidence
|
||||
bbox=Rectangle(
|
||||
xmin=float(det[i, 0]), # x1
|
||||
ymin=float(det[i, 1]), # y1
|
||||
xmax=float(det[i, 2]), # x2
|
||||
ymax=float(det[i, 3]), # y3
|
||||
),
|
||||
embedding=None, # no embedding for segmentation
|
||||
clipPaths=clip_paths # list of polygon outlines [[[x, y], ...], ...] at 320x320
|
||||
)
|
||||
objs.append(prediction)
|
||||
|
||||
return objs
|
||||
Reference in New Issue
Block a user