predict: add segmentation models to onnx/coreml and refactor openvino

2026-02-03 14:13:28 +00:00 · 2026-01-18 13:58:28 -08:00
parent f415e4f2e1
commit 2fff8b0044
14 changed files with 362 additions and 228 deletions
--- a/plugins/coreml/src/coreml/init.py
+++ b/plugins/coreml/src/coreml/init.py
@@ -16,6 +16,7 @@ from common import yolo
 from coreml.face_recognition import CoreMLFaceRecognition
 from coreml.custom_detection import CoreMLCustomDetection
 from coreml.clip_embedding import CoreMLClipEmbedding
 from coreml.segment import CoreMLSegmentation
 try:
    from coreml.text_recognition import CoreMLTextRecognition
@@ -105,6 +106,7 @@ class CoreMLPlugin(
        self.faceDevice = None
        self.textDevice = None
        self.clipDevice = None
        self.segmentDevice = None
        if not self.forked:
            asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop)
@@ -149,6 +151,18 @@ class CoreMLPlugin(
                    "name": "CoreML CLIP Embedding",
                }
            )
            await scrypted_sdk.deviceManager.onDeviceDiscovered(
                {
                    "nativeId": "segment",
                    "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
                    "interfaces": [
                        scrypted_sdk.ScryptedInterface.ClusterForkInterface.value,
                        scrypted_sdk.ScryptedInterface.ObjectDetection.value,
                    ],
                    "name": "CoreML Segmentation",
                }
            )
        except:
            pass
@@ -162,6 +176,9 @@ class CoreMLPlugin(
        elif nativeId == "clipembedding":
            self.clipDevice = self.clipDevice or CoreMLClipEmbedding(self, nativeId)
            return self.clipDevice
        elif nativeId == "segment":
            self.segmentDevice = self.segmentDevice or CoreMLSegmentation(self, nativeId)
            return self.segmentDevice
        custom_model = self.custom_models.get(nativeId, None)
        if custom_model:
            return custom_model
--- a/plugins/coreml/src/coreml/segment.py
+++ b/plugins/coreml/src/coreml/segment.py
@@ -0,0 +1,48 @@
 from __future__ import annotations
 import asyncio
 import os
 import traceback
 import numpy as np
 import coremltools as ct
 from common import async_infer
 from common import yolov9_seg
 from predict.segment import Segmentation
 prepareExecutor, predictExecutor = async_infer.create_executors("Segment")
 class CoreMLSegmentation(Segmentation):
    def __init__(self, plugin, nativeId: str):
        super().__init__(plugin=plugin, nativeId=nativeId)
    def loadModel(self, name):
        model_path = self.plugin.downloadHuggingFaceModelLocalFallback(name)
        modelFile = os.path.join(model_path, f"{name}.mlpackage")
        model = ct.models.MLModel(modelFile)
        return model
    async def detect_once(self, input, settings, src_size, cvss):
        def predict():
            input_name = self.model.get_spec().description.input[0].name
            out_dict = self.model.predict({input_name: input})
            outputs = list(out_dict.values())
            pred = outputs[0]
            proto = outputs[1]
            pred = yolov9_seg.non_max_suppression(pred, nm=32)
            return self.process_segmentation_output(pred, proto)
        try:
            objs = await asyncio.get_event_loop().run_in_executor(
                predictExecutor, lambda: predict()
            )
        except:
            traceback.print_exc()
            raise
        ret = self.create_detection_result(objs, src_size, cvss)
        return ret
--- a/plugins/ncnn/src/nc/async_infer.py
+++ b/plugins/ncnn/src/nc/async_infer.py
@@ -1 +1 @@
-../../../openvino/src/ov/async_infer.py
+../../../openvino/src/common/async_infer.py
--- a/plugins/onnx/src/ort/init.py
+++ b/plugins/onnx/src/ort/init.py
@@ -24,6 +24,7 @@ from predict import PredictPlugin
 from .face_recognition import ONNXFaceRecognition
 from .clip_embedding import ONNXClipEmbedding
 from .segment import ONNXSegmentation
 try:
    from .text_recognition import ONNXTextRecognition
@@ -155,6 +156,7 @@ class ONNXPlugin(
        self.faceDevice = None
        self.textDevice = None
        self.clipDevice = None
        self.segmentDevice = None
        if not self.forked:
            asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop)
@@ -199,6 +201,18 @@ class ONNXPlugin(
                    "name": "ONNX CLIP Embedding",
                }
            )
            await scrypted_sdk.deviceManager.onDeviceDiscovered(
                {
                    "nativeId": "segment",
                    "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
                    "interfaces": [
                        scrypted_sdk.ScryptedInterface.ClusterForkInterface.value,
                        scrypted_sdk.ScryptedInterface.ObjectDetection.value,
                    ],
                    "name": "ONNX Segmentation",
                }
            )
        except:
            pass
@@ -212,6 +226,9 @@ class ONNXPlugin(
        elif nativeId == "clipembedding":
            self.clipDevice = self.clipDevice or ONNXClipEmbedding(self, nativeId)
            return self.clipDevice
        elif nativeId == "segment":
            self.segmentDevice = self.segmentDevice or ONNXSegmentation(self, nativeId)
            return self.segmentDevice
        custom_model = self.custom_models.get(nativeId, None)
        if custom_model:
            return custom_model
--- a/plugins/onnx/src/ort/segment.py
+++ b/plugins/onnx/src/ort/segment.py
@@ -0,0 +1,55 @@
 from __future__ import annotations
 import asyncio
 import os
 import traceback
 import numpy as np
 import onnxruntime
 from predict.segment import Segmentation
 from common import yolov9_seg
 from common import async_infer
 prepareExecutor, predictExecutor = async_infer.create_executors("Segment")
 class ONNXSegmentation(Segmentation):
    def __init__(self, plugin, nativeId: str):
        super().__init__(plugin=plugin, nativeId=nativeId)
    def loadModel(self, name):
        model_path = self.plugin.downloadHuggingFaceModelLocalFallback(name)
        onnxfile = os.path.join(model_path, f"{name}.onnx")
        model = onnxruntime.InferenceSession(onnxfile)
        return model
    async def detect_once(self, input, settings, src_size, cvss):
        def prepare():
            im = np.expand_dims(input, axis=0)
            im = im.transpose((0, 3, 1, 2))  # BHWC to BCHW, (n, 3, h, w)
            im = im.astype(np.float32) / 255.0
            im = np.ascontiguousarray(im)  # contiguous
            return im
        def predict():
            input_tensor = prepare()
            output_tensors = self.model.run(None, {self.input_name: input_tensor})
            pred = output_tensors[0]
            proto = output_tensors[1]
            pred = yolov9_seg.non_max_suppression(pred, nm=32)
            return self.process_segmentation_output(pred, proto)
        try:
            objs = await asyncio.get_event_loop().run_in_executor(
                predictExecutor, lambda: predict()
            )
        except:
            traceback.print_exc()
            raise
        ret = self.create_detection_result(objs, src_size, cvss)
        return ret
--- a/plugins/openvino/src/common/async_infer.py
+++ b/plugins/openvino/src/common/async_infer.py
@@ -1,6 +1,5 @@
 import concurrent.futures
 def create_executors(name: str):
    prepare = concurrent.futures.ThreadPoolExecutor(1, f"{name}Prepare")
    predict = concurrent.futures.ThreadPoolExecutor(1, f"{name}Predict")
--- a/plugins/openvino/src/common/yolov9_seg.py
+++ b/plugins/openvino/src/common/yolov9_seg.py
@@ -7,6 +7,7 @@ that are equivalent to their torch counterparts in utils/segment/general.py.
 import numpy as np
 import cv2
 import time
 def crop_mask_numpy(masks, boxes):
    """
@@ -197,4 +198,158 @@ def masks2polygons_numpy(masks):
    """
    segments = masks2segments_numpy(masks)
    # Convert to list of [x, y] pairs
-    return [segment.tolist() for segment in segments]
+    return [segment.tolist() for segment in segments]
 def xywh2xyxy(x):
    """Convert [x_center, y_center, width, height] to [x1, y1, x2, y2]"""
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # x1
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # y1
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # x2
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # y2
    return y
 def box_iou(box1, box2):
    """Calculate IoU between two sets of boxes"""
    area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
    area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
    iou = np.zeros((len(box1), len(box2)), dtype=np.float32)
    for i in range(len(box1)):
        for j in range(len(box2)):
            inter_x1 = np.maximum(box1[i, 0], box2[j, 0])
            inter_y1 = np.maximum(box1[i, 1], box2[j, 1])
            inter_x2 = np.minimum(box1[i, 2], box2[j, 2])
            inter_y2 = np.minimum(box1[i, 3], box2[j, 3])
            inter_w = np.maximum(0, inter_x2 - inter_x1)
            inter_h = np.maximum(0, inter_y2 - inter_y1)
            inter_area = inter_w * inter_h
            union = area1[i] + area2[j] - inter_area
            iou[i, j] = inter_area / union if union > 0 else 0
    return iou
 def nms(boxes, scores, iou_thres):
    """Non-Maximum Suppression implementation in NumPy"""
    if len(boxes) == 0:
        return np.array([], dtype=np.int32)
    indices = np.argsort(-scores)
    keep = []
    while len(indices) > 0:
        i = indices[0]
        keep.append(i)
        if len(indices) == 1:
            break
        iou_scores = box_iou(boxes[indices[0:1]], boxes[indices[1:]])[0]
        indices = indices[1:][iou_scores < iou_thres]
    return np.array(keep, dtype=np.int32)
 def non_max_suppression(
        prediction,
        conf_thres=0.25,
        iou_thres=0.45,
        classes=None,
        agnostic=False,
        multi_label=False,
        labels=(),
        max_det=300,
        nm=0,
 ):
    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """
    if isinstance(prediction, (list, tuple)):
        prediction = prediction[0]
    bs = prediction.shape[0]
    nc = prediction.shape[1] - nm - 4
    mi = 4 + nc
    xc = np.max(prediction[:, 4:mi], axis=1) > conf_thres
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
    max_wh = 7680
    max_nms = 30000
    time_limit = 2.5 + 0.05 * bs
    redundant = True
    multi_label &= nc > 1
    merge = False
    t = time.time()
    output = [np.zeros((0, 6 + nm), dtype=np.float32)] * bs
    for xi, pred_x in enumerate(prediction):
        x = pred_x.T[xc[xi]]
        if labels and len(labels[xi]):
            lb = labels[xi]
            v = np.zeros((len(lb), nc + nm + 5), dtype=x.dtype)
            v[:, :4] = lb[:, 1:5]
            v[np.arange(len(lb)), lb[:, 0].astype(int) + 4] = 1.0
            x = np.concatenate((x, v), 0)
        if x.shape[0] == 0:
            continue
        box = x[:, :4]
        cls = x[:, 4:4 + nc]
        mask = x[:, 4 + nc:] if nm > 0 else np.zeros((x.shape[0], nm), dtype=x.dtype)
        box = xywh2xyxy(box)
        if multi_label:
            i, j = np.where(cls > conf_thres)
            x = np.concatenate((box[i], x[i, 4 + j][:, None], j[:, None].astype(np.float32), mask[i]), 1)
        else:
            j = np.argmax(cls, axis=1, keepdims=True)
            conf = cls[np.arange(len(cls)), j.flatten()][:, None]
            x = np.concatenate((box, conf, j.astype(np.float32), mask), 1)[conf.flatten() > conf_thres]
        if classes is not None:
            class_tensor = np.array(classes, dtype=np.float32)
            mask = np.any(x[:, 5:6] == class_tensor, axis=1)
            x = x[mask]
        n = x.shape[0]
        if n == 0:
            continue
        elif n > max_nms:
            x = x[x[:, 4].argsort()[::-1][:max_nms]]
        else:
            x = x[x[:, 4].argsort()[::-1]]
        c = x[:, 5:6] * (0 if agnostic else max_wh)
        boxes, scores = x[:, :4] + c, x[:, 4]
        i = nms(boxes, scores, iou_thres)
        if i.shape[0] > max_det:
            i = i[:max_det]
        if merge and (1 < n < 3E3):
            iou = box_iou(boxes[i], boxes) > iou_thres
            weights = iou * scores[None]
            x[i, :4] = np.dot(weights, x[:, :4]).astype(np.float32) / weights.sum(1, keepdims=True)
            if redundant:
                i = i[iou.sum(1) > 1]
        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            import warnings
            warnings.warn(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
            break
    return output
--- a/plugins/openvino/src/ov/clip_embedding.py
+++ b/plugins/openvino/src/ov/clip_embedding.py
@@ -7,7 +7,7 @@ import numpy as np
 import openvino as ov
 from PIL import Image
-from ov import async_infer
+from common import async_infer
 from predict.clip import ClipEmbedding
 from scrypted_sdk import ObjectsDetected
--- a/plugins/openvino/src/ov/custom_detection.py
+++ b/plugins/openvino/src/ov/custom_detection.py
@@ -6,7 +6,7 @@ import numpy as np
 import openvino as ov
 from PIL import Image
-from ov import async_infer
+from common import async_infer
 from predict.custom_detect import CustomDetection
 from scrypted_sdk import ObjectsDetected
--- a/plugins/openvino/src/ov/face_recognition.py
+++ b/plugins/openvino/src/ov/face_recognition.py
@@ -7,7 +7,7 @@ import numpy as np
 from PIL import Image
 import openvino as ov
-from ov import async_infer
+from common import async_infer
 from predict.face_recognize import FaceRecognizeDetection
 faceDetectPrepare, faceDetectPredict = async_infer.create_executors("FaceDetect")
--- a/plugins/openvino/src/ov/segment.py
+++ b/plugins/openvino/src/ov/segment.py
@@ -6,195 +6,14 @@ import traceback
 import numpy as np
 from ov import async_infer
 import openvino as ov
 from predict.segment import Segmentation
 from predict import Prediction
 from predict.rectangle import Rectangle
 from common import yolo
 import time
 from common import yolov9_seg
 from common import async_infer
 prepareExecutor, predictExecutor = async_infer.create_executors("Segment")
 def xywh2xyxy(x):
    """Convert [x_center, y_center, width, height] to [x1, y1, x2, y2]"""
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # x1
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # y1
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # x2
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # y2
    return y
 def box_iou(box1, box2):
    """Calculate IoU between two sets of boxes"""
    # box1 shape: (n, 4), box2 shape: (m, 4)
    # Compute intersection areas
    area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
    area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
    iou = np.zeros((len(box1), len(box2)), dtype=np.float32)
    for i in range(len(box1)):
        for j in range(len(box2)):
            # Intersection
            inter_x1 = np.maximum(box1[i, 0], box2[j, 0])
            inter_y1 = np.maximum(box1[i, 1], box2[j, 1])
            inter_x2 = np.minimum(box1[i, 2], box2[j, 2])
            inter_y2 = np.minimum(box1[i, 3], box2[j, 3])
            inter_w = np.maximum(0, inter_x2 - inter_x1)
            inter_h = np.maximum(0, inter_y2 - inter_y1)
            inter_area = inter_w * inter_h
            # Union
            union = area1[i] + area2[j] - inter_area
            iou[i, j] = inter_area / union if union > 0 else 0
    return iou
 def nms(boxes, scores, iou_thres):
    """Non-Maximum Suppression implementation in NumPy"""
    if len(boxes) == 0:
        return np.array([], dtype=np.int32)
    # Sort by scores in descending order
    indices = np.argsort(-scores)
    keep = []
    while len(indices) > 0:
        i = indices[0]
        keep.append(i)
        if len(indices) == 1:
            break
        # Calculate IoU between the current box and all remaining boxes
        iou_scores = box_iou(boxes[indices[0:1]], boxes[indices[1:]])[0]
        # Keep boxes with IoU below threshold
        indices = indices[1:][iou_scores < iou_thres]
    return np.array(keep, dtype=np.int32)
 def non_max_suppression(
        prediction,
        conf_thres=0.25,
        iou_thres=0.45,
        classes=None,
        agnostic=False,
        multi_label=False,
        labels=(),
        max_det=300,
        nm=0,  # number of masks
 ):
    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """
    if isinstance(prediction, (list, tuple)):  # YOLO model in validation model, output = (inference_out, loss_out)
        prediction = prediction[0]  # select only inference output
    bs = prediction.shape[0]  # batch size
    nc = prediction.shape[1] - nm - 4  # number of classes
    mi = 4 + nc  # mask start index
    xc = np.max(prediction[:, 4:mi], axis=1) > conf_thres  # candidates
    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
    # Settings
    # min_wh = 2  # (pixels) minimum box width and height
    max_wh = 7680  # (pixels) maximum box width and height
    max_nms = 30000  # maximum number of boxes into NMS()
    time_limit = 2.5 + 0.05 * bs  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS
    t = time.time()
    output = [np.zeros((0, 6 + nm), dtype=np.float32)] * bs
    for xi, pred_x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = pred_x.T[xc[xi]]  # confidence
        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]):
            lb = labels[xi]
            v = np.zeros((len(lb), nc + nm + 5), dtype=x.dtype)
            v[:, :4] = lb[:, 1:5]  # box
            v[np.arange(len(lb)), lb[:, 0].astype(int) + 4] = 1.0  # cls
            x = np.concatenate((x, v), 0)
        # If none remain process next image
        if x.shape[0] == 0:
            continue
        # Detections matrix nx6 (xyxy, conf, cls)
        box = x[:, :4]
        cls = x[:, 4:4 + nc]
        mask = x[:, 4 + nc:] if nm > 0 else np.zeros((x.shape[0], nm), dtype=x.dtype)
        box = xywh2xyxy(box)  # center_x, center_y, width, height) to (x1, y1, x2, y2)
        if multi_label:
            i, j = np.where(cls > conf_thres)
            x = np.concatenate((box[i], x[i, 4 + j][:, None], j[:, None].astype(np.float32), mask[i]), 1)
        else:  # best class only
            j = np.argmax(cls, axis=1, keepdims=True)
            conf = cls[np.arange(len(cls)), j.flatten()][:, None]
            x = np.concatenate((box, conf, j.astype(np.float32), mask), 1)[conf.flatten() > conf_thres]
        # Filter by class
        if classes is not None:
            class_tensor = np.array(classes, dtype=np.float32)
            mask = np.any(x[:, 5:6] == class_tensor, axis=1)
            x = x[mask]
        # Apply finite constraint
        # if not np.isfinite(x).all():
        #     x = x[np.isfinite(x).all(1)]
        # Check shape
        n = x.shape[0]  # number of boxes
        if n == 0:  # no boxes
            continue
        elif n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort()[::-1][:max_nms]]  # sort by confidence
        else:
            x = x[x[:, 4].argsort()[::-1]]  # sort by confidence
        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = nms(boxes, scores, iou_thres)  # NMS
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
            weights = iou * scores[None]  # box weights
            x[i, :4] = np.dot(weights, x[:, :4]).astype(np.float32) / weights.sum(1, keepdims=True)  # merged boxes
            if redundant:
                i = i[iou.sum(1) > 1]  # require redundancy
        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            import warnings
            warnings.warn(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
            break  # time limit exceeded
    return output
 class OpenVINOSegmentation(Segmentation):
    def __init__(self, plugin, nativeId: str):
@@ -222,41 +41,9 @@ class OpenVINOSegmentation(Segmentation):
            pred = output_tensors[0]
            proto = output_tensors[1]
-            pred = non_max_suppression(pred, nm=32)
+            pred = yolov9_seg.non_max_suppression(pred, nm=32)
-            objs = []
+            return self.process_segmentation_output(pred, proto)
            for det in pred:
                if not len(det):
                    continue
                # Upsample masks to input image space (320x320)
                masks = yolov9_seg.process_mask_numpy(proto.squeeze(0), det[:, 6:], det[:, :4], (320, 320), upsample=True)
                # Convert masks to contour points
                segments = yolov9_seg.masks2segments_numpy(masks)
                # Create Prediction instances
                for i in range(len(det)):
                    # Convert all contours for this detection to list of [x, y] tuples
                    mask_contours = segments[i]
                    clip_paths = []
                    for contour in mask_contours:
                        if len(contour) > 0 and contour.shape[1] == 2:
                            single_path = [(float(contour[j, 0]), float(contour[j, 1])) for j in range(len(contour))]
                            clip_paths.append(single_path)
                    prediction = Prediction(
                        id=int(det[i, 5]),  # class_id
                        score=float(det[i, 4]),  # confidence
                        bbox=Rectangle(
                            xmin=float(det[i, 0]),  # x1
                            ymin=float(det[i, 1]),  # y1
                            xmax=float(det[i, 2]),  # x2
                            ymax=float(det[i, 3]),  # y2
                        ),
                        embedding=None,  # no embedding for segmentation
                        clipPaths=clip_paths  # list of polygon outlines [[[x, y], ...], ...] at 320x320
                    )
                    objs.append(prediction)
            return objs
        try:
            objs = await asyncio.get_event_loop().run_in_executor(
@@ -268,3 +55,4 @@ class OpenVINOSegmentation(Segmentation):
        ret = self.create_detection_result(objs, src_size, cvss)
        return ret
--- a/plugins/openvino/src/ov/text_recognition.py
+++ b/plugins/openvino/src/ov/text_recognition.py
@@ -6,7 +6,7 @@ import os
 import numpy as np
 import openvino as ov
-from ov import async_infer
+from common import async_infer
 from predict.text_recognize import TextRecognition
 textDetectPrepare, textDetectPredict = async_infer.create_executors("TextDetect")
--- a/plugins/openvino/src/predict/init.py
+++ b/plugins/openvino/src/predict/init.py
@@ -424,6 +424,8 @@ class PredictPlugin(DetectPlugin, scrypted_sdk.ClusterForkInterface, scrypted_sd
            ret = await result.getFaceRecognition()
        elif self.nativeId == "clipembedding":
            ret = await result.getClipEmbedding()
        elif self.nativeId == "segmentation":
            ret = await result.getSegmentation()
        else:
            ret = await result.getCustomDetection(self.nativeId)
        return ret
@@ -559,6 +561,9 @@ class Fork:
    async def getClipEmbedding(self):
        return await self.plugin.getDevice("clipembedding")
    async def getSegmentation(self):
        return await self.plugin.getDevice("segmentation")
    async def getCustomDetection(self, nativeId: str):
        return await self.plugin.getDevice(nativeId)
--- a/plugins/openvino/src/predict/segment.py
+++ b/plugins/openvino/src/predict/segment.py
@@ -1,14 +1,18 @@
 from __future__ import annotations
 from typing import Tuple
 import numpy as np
-
+from common import async_infer
-from ov import async_infer
+from common import yolov9_seg
 from predict import PredictPlugin
 from predict import Prediction
 from predict.rectangle import Rectangle
 import asyncio
 from common import coco
 import traceback
-customDetectPrepare, customDetectPredict = async_infer.create_executors("CustomDetect")
+customDetectPrepare, customDetectPredict = async_infer.create_executors("Segment")
 class Segmentation(PredictPlugin):
    def __init__(self, plugin, nativeId: str):
@@ -20,8 +24,9 @@ class Segmentation(PredictPlugin):
        self.labels = coco.COCO_LABELS
        try:
-            self.model = self.loadModel('yolov9c_seg')
+            self.model = self.loadModel('scrypted_yolov9t_seg_relu')
        except:
            traceback.print_exc()
            raise
    def loadModel(self, name: str):
@@ -36,4 +41,49 @@ class Segmentation(PredictPlugin):
        return (self.inputwidth, self.inputheight)
    def get_input_format(self) -> str:
-        return "rgb"
+        return "rgb"
    def process_segmentation_output(self, pred, proto):
        """
        Process segmentation model outputs into a list of Prediction objects.
        Args:
            pred: Predictions output from NMS (list of detections)
            proto: Prototype masks for segmentation
        Returns:
            List of Prediction objects with segmentation masks (clipPaths)
        """
        objs = []
        for det in pred:
            if not len(det):
                continue
            # Upsample masks to input image space (320x320)
            masks = yolov9_seg.process_mask_numpy(proto.squeeze(0), det[:, 6:], det[:, :4], (320, 320), upsample=True)
            # Convert masks to contour points
            segments = yolov9_seg.masks2segments_numpy(masks)
            # Create Prediction instances
            for i in range(len(det)):
                # Convert all contours for this detection to list of [x, y] tuples
                mask_contours = segments[i]
                clip_paths = []
                for contour in mask_contours:
                    if len(contour) > 0 and contour.shape[1] == 2:
                        single_path = [(float(contour[j, 0]), float(contour[j, 1])) for j in range(len(contour))]
                        clip_paths.append(single_path)
                prediction = Prediction(
                    id=int(det[i, 5]),  # class_id
                    score=float(det[i, 4]),  # confidence
                    bbox=Rectangle(
                        xmin=float(det[i, 0]),  # x1
                        ymin=float(det[i, 1]),  # y1
                        xmax=float(det[i, 2]),  # x2
                        ymax=float(det[i, 3]),  # y3
                    ),
                    embedding=None,  # no embedding for segmentation
                    clipPaths=clip_paths  # list of polygon outlines [[[x, y], ...], ...] at 320x320
                )
                objs.append(prediction)
        return objs
		`@@ -1 +1 @@`
			`../../../openvino/src/ov/async_infer.py`				`../../../openvino/src/common/async_infer.py`