detect: finish segmentation support

2026-03-16 15:12:06 +00:00 · 2026-03-07 17:26:08 -08:00
parent baa9368199
commit f2f8fb265f
5 changed files with 101 additions and 6 deletions
--- a/plugins/coreml/src/coreml/init.py
+++ b/plugins/coreml/src/coreml/init.py
@@ -157,7 +157,7 @@ class CoreMLPlugin(

            await scrypted_sdk.deviceManager.onDeviceDiscovered(
                {
-                    "nativeId": "segment",
+                    "nativeId": "segmentation",
                    "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
                    "interfaces": [
                        scrypted_sdk.ScryptedInterface.ClusterForkInterface.value,
@@ -179,7 +179,7 @@ class CoreMLPlugin(
        elif nativeId == "clipembedding":
            self.clipDevice = self.clipDevice or CoreMLClipEmbedding(self, nativeId)
            return self.clipDevice
-        elif nativeId == "segment":
+        elif nativeId == "segmentation":
            self.segmentDevice = self.segmentDevice or CoreMLSegmentation(self, nativeId)
            return self.segmentDevice
        custom_model = self.custom_models.get(nativeId, None)
--- a/plugins/ncnn/src/nc/init.py
+++ b/plugins/ncnn/src/nc/init.py
@@ -25,6 +25,10 @@ try:
    from nc.text_recognition import NCNNTextRecognition
 except:
    NCNNTextRecognition = None
+try:
+    from nc.segment import NCNNSegmentation
+except:
+    NCNNSegmentation = None
 from predict import Prediction, PredictPlugin
 from predict.rectangle import Rectangle

@@ -128,6 +132,7 @@ class NCNNPlugin(

        self.faceDevice = None
        self.textDevice = None
+        self.segmentDevice = None

        if not self.forked:
            asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop)
@@ -158,6 +163,19 @@ class NCNNPlugin(
                        "name": "NCNN Text Recognition",
                    },
                )
+
+            if NCNNSegmentation:
+                await scrypted_sdk.deviceManager.onDeviceDiscovered(
+                    {
+                        "nativeId": "segmentation",
+                        "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
+                        "interfaces": [
+                            scrypted_sdk.ScryptedInterface.ClusterForkInterface.value,
+                            scrypted_sdk.ScryptedInterface.ObjectDetection.value,
+                        ],
+                        "name": "NCNN Segmentation",
+                    },
+                )
        except:
            pass

@@ -168,6 +186,9 @@ class NCNNPlugin(
        if nativeId == "textrecognition":
            self.textDevice = self.textDevice or NCNNTextRecognition(self, nativeId)
            return self.textDevice
+        if nativeId == "segmentation":
+            self.segmentDevice = self.segmentDevice or NCNNSegmentation(self, nativeId)
+            return self.segmentDevice
        custom_model = self.custom_models.get(nativeId, None)
        if custom_model:
            return custom_model
--- a/plugins/ncnn/src/nc/segment.py
+++ b/plugins/ncnn/src/nc/segment.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+import asyncio
+import os
+import traceback
+
+import numpy as np
+
+import ncnn
+from nc import async_infer
+from common import yolov9_seg
+from predict.segment import Segmentation
+
+prepareExecutor, predictExecutor = async_infer.create_executors("NCNN-Segment")
+
+
+class NCNNSegmentation(Segmentation):
+    def __init__(self, plugin, nativeId: str):
+        super().__init__(plugin=plugin, nativeId=nativeId)
+
+    def loadModel(self, name):
+        model_path = self.downloadHuggingFaceModelLocalFallback(name)
+        binFile = os.path.join(model_path, f"{name}.ncnn.bin")
+        paramFile = os.path.join(model_path, f"{name}.ncnn.param")
+
+        net = ncnn.Net()
+        net.opt.use_vulkan_compute = True
+
+        net.load_param(paramFile)
+        net.load_model(binFile)
+
+        input_name = net.input_names()[0]
+
+        return [net, input_name]
+
+    async def detect_once(self, input, settings, src_size, cvss):
+        def prepare():
+            im = np.expand_dims(input, axis=0)
+            im = im.transpose((0, 3, 1, 2))  # BHWC to BCHW
+            im = im.astype(np.float32) / 255.0
+            im = im.reshape((1, 3, 320, 320)).squeeze(0)
+            im = np.ascontiguousarray(im)
+            return im
+
+        def predict(input_tensor):
+            net, input_name = self.model
+            input_ncnn = ncnn.Mat(input_tensor)
+            ex = net.create_extractor()
+            ex.input(input_name, input_ncnn)
+
+            out0 = ncnn.Mat()
+            out1 = ncnn.Mat()
+            ex.extract("out0", out0)
+            ex.extract("out1", out1)
+
+            pred = np.array(out0)
+            proto = np.array(out1)
+            pred = yolov9_seg.non_max_suppression(pred, nm=32)
+
+            return self.process_segmentation_output(pred, proto)
+
+        try:
+            input_tensor = await asyncio.get_event_loop().run_in_executor(
+                prepareExecutor, lambda: prepare()
+            )
+            objs = await asyncio.get_event_loop().run_in_executor(
+                predictExecutor, lambda: predict(input_tensor)
+            )
+        except:
+            traceback.print_exc()
+            raise
+
+        ret = self.create_detection_result(objs, src_size, cvss)
+        return ret
--- a/plugins/onnx/src/ort/init.py
+++ b/plugins/onnx/src/ort/init.py
@@ -207,7 +207,7 @@ class ONNXPlugin(

            await scrypted_sdk.deviceManager.onDeviceDiscovered(
                {
-                    "nativeId": "segment",
+                    "nativeId": "segmentation",
                    "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
                    "interfaces": [
                        scrypted_sdk.ScryptedInterface.ClusterForkInterface.value,
@@ -229,7 +229,7 @@ class ONNXPlugin(
        elif nativeId == "clipembedding":
            self.clipDevice = self.clipDevice or ONNXClipEmbedding(self, nativeId)
            return self.clipDevice
-        elif nativeId == "segment":
+        elif nativeId == "segmentation":
            self.segmentDevice = self.segmentDevice or ONNXSegmentation(self, nativeId)
            return self.segmentDevice
        custom_model = self.custom_models.get(nativeId, None)
--- a/plugins/openvino/src/ov/init.py
+++ b/plugins/openvino/src/ov/init.py
@@ -343,7 +343,7 @@ class OpenVINOPlugin(

            await scrypted_sdk.deviceManager.onDeviceDiscovered(
                {
-                    "nativeId": "segment",
+                    "nativeId": "segmentation",
                    "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
                    "interfaces": [
                        scrypted_sdk.ScryptedInterface.ClusterForkInterface.value,
@@ -365,7 +365,7 @@ class OpenVINOPlugin(
        elif nativeId == "clipembedding":
            self.clipDevice = self.clipDevice or OpenVINOClipEmbedding(self, nativeId)
            return self.clipDevice
-        elif nativeId == "segment":
+        elif nativeId == "segmentation":
            self.segmentDevice = self.segmentDevice or OpenVINOSegmentation(self, nativeId)
            return self.segmentDevice
        custom_model = self.custom_models.get(nativeId, None)