From f2f8fb265f363eebf79e21a58011b23d3e325557 Mon Sep 17 00:00:00 2001 From: Koushik Dutta Date: Sat, 7 Mar 2026 17:26:08 -0800 Subject: [PATCH] detect: finish segmentation support --- plugins/coreml/src/coreml/__init__.py | 4 +- plugins/ncnn/src/nc/__init__.py | 21 ++++++++ plugins/ncnn/src/nc/segment.py | 74 +++++++++++++++++++++++++++ plugins/onnx/src/ort/__init__.py | 4 +- plugins/openvino/src/ov/__init__.py | 4 +- 5 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 plugins/ncnn/src/nc/segment.py diff --git a/plugins/coreml/src/coreml/__init__.py b/plugins/coreml/src/coreml/__init__.py index 0f42797ae..3540d6ac1 100644 --- a/plugins/coreml/src/coreml/__init__.py +++ b/plugins/coreml/src/coreml/__init__.py @@ -157,7 +157,7 @@ class CoreMLPlugin( await scrypted_sdk.deviceManager.onDeviceDiscovered( { - "nativeId": "segment", + "nativeId": "segmentation", "type": scrypted_sdk.ScryptedDeviceType.Builtin.value, "interfaces": [ scrypted_sdk.ScryptedInterface.ClusterForkInterface.value, @@ -179,7 +179,7 @@ class CoreMLPlugin( elif nativeId == "clipembedding": self.clipDevice = self.clipDevice or CoreMLClipEmbedding(self, nativeId) return self.clipDevice - elif nativeId == "segment": + elif nativeId == "segmentation": self.segmentDevice = self.segmentDevice or CoreMLSegmentation(self, nativeId) return self.segmentDevice custom_model = self.custom_models.get(nativeId, None) diff --git a/plugins/ncnn/src/nc/__init__.py b/plugins/ncnn/src/nc/__init__.py index 01d6ebcee..d9dcc5162 100644 --- a/plugins/ncnn/src/nc/__init__.py +++ b/plugins/ncnn/src/nc/__init__.py @@ -25,6 +25,10 @@ try: from nc.text_recognition import NCNNTextRecognition except: NCNNTextRecognition = None +try: + from nc.segment import NCNNSegmentation +except: + NCNNSegmentation = None from predict import Prediction, PredictPlugin from predict.rectangle import Rectangle @@ -128,6 +132,7 @@ class NCNNPlugin( self.faceDevice = None self.textDevice = None + self.segmentDevice = None if not self.forked: asyncio.ensure_future(self.prepareRecognitionModels(), loop=self.loop) @@ -158,6 +163,19 @@ class NCNNPlugin( "name": "NCNN Text Recognition", }, ) + + if NCNNSegmentation: + await scrypted_sdk.deviceManager.onDeviceDiscovered( + { + "nativeId": "segmentation", + "type": scrypted_sdk.ScryptedDeviceType.Builtin.value, + "interfaces": [ + scrypted_sdk.ScryptedInterface.ClusterForkInterface.value, + scrypted_sdk.ScryptedInterface.ObjectDetection.value, + ], + "name": "NCNN Segmentation", + }, + ) except: pass @@ -168,6 +186,9 @@ class NCNNPlugin( if nativeId == "textrecognition": self.textDevice = self.textDevice or NCNNTextRecognition(self, nativeId) return self.textDevice + if nativeId == "segmentation": + self.segmentDevice = self.segmentDevice or NCNNSegmentation(self, nativeId) + return self.segmentDevice custom_model = self.custom_models.get(nativeId, None) if custom_model: return custom_model diff --git a/plugins/ncnn/src/nc/segment.py b/plugins/ncnn/src/nc/segment.py new file mode 100644 index 000000000..ef77cc944 --- /dev/null +++ b/plugins/ncnn/src/nc/segment.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import asyncio +import os +import traceback + +import numpy as np + +import ncnn +from nc import async_infer +from common import yolov9_seg +from predict.segment import Segmentation + +prepareExecutor, predictExecutor = async_infer.create_executors("NCNN-Segment") + + +class NCNNSegmentation(Segmentation): + def __init__(self, plugin, nativeId: str): + super().__init__(plugin=plugin, nativeId=nativeId) + + def loadModel(self, name): + model_path = self.downloadHuggingFaceModelLocalFallback(name) + binFile = os.path.join(model_path, f"{name}.ncnn.bin") + paramFile = os.path.join(model_path, f"{name}.ncnn.param") + + net = ncnn.Net() + net.opt.use_vulkan_compute = True + + net.load_param(paramFile) + net.load_model(binFile) + + input_name = net.input_names()[0] + + return [net, input_name] + + async def detect_once(self, input, settings, src_size, cvss): + def prepare(): + im = np.expand_dims(input, axis=0) + im = im.transpose((0, 3, 1, 2)) # BHWC to BCHW + im = im.astype(np.float32) / 255.0 + im = im.reshape((1, 3, 320, 320)).squeeze(0) + im = np.ascontiguousarray(im) + return im + + def predict(input_tensor): + net, input_name = self.model + input_ncnn = ncnn.Mat(input_tensor) + ex = net.create_extractor() + ex.input(input_name, input_ncnn) + + out0 = ncnn.Mat() + out1 = ncnn.Mat() + ex.extract("out0", out0) + ex.extract("out1", out1) + + pred = np.array(out0) + proto = np.array(out1) + pred = yolov9_seg.non_max_suppression(pred, nm=32) + + return self.process_segmentation_output(pred, proto) + + try: + input_tensor = await asyncio.get_event_loop().run_in_executor( + prepareExecutor, lambda: prepare() + ) + objs = await asyncio.get_event_loop().run_in_executor( + predictExecutor, lambda: predict(input_tensor) + ) + except: + traceback.print_exc() + raise + + ret = self.create_detection_result(objs, src_size, cvss) + return ret diff --git a/plugins/onnx/src/ort/__init__.py b/plugins/onnx/src/ort/__init__.py index 0ac50f047..171b6bb40 100644 --- a/plugins/onnx/src/ort/__init__.py +++ b/plugins/onnx/src/ort/__init__.py @@ -207,7 +207,7 @@ class ONNXPlugin( await scrypted_sdk.deviceManager.onDeviceDiscovered( { - "nativeId": "segment", + "nativeId": "segmentation", "type": scrypted_sdk.ScryptedDeviceType.Builtin.value, "interfaces": [ scrypted_sdk.ScryptedInterface.ClusterForkInterface.value, @@ -229,7 +229,7 @@ class ONNXPlugin( elif nativeId == "clipembedding": self.clipDevice = self.clipDevice or ONNXClipEmbedding(self, nativeId) return self.clipDevice - elif nativeId == "segment": + elif nativeId == "segmentation": self.segmentDevice = self.segmentDevice or ONNXSegmentation(self, nativeId) return self.segmentDevice custom_model = self.custom_models.get(nativeId, None) diff --git a/plugins/openvino/src/ov/__init__.py b/plugins/openvino/src/ov/__init__.py index 525e5415b..cd7cb02f7 100644 --- a/plugins/openvino/src/ov/__init__.py +++ b/plugins/openvino/src/ov/__init__.py @@ -343,7 +343,7 @@ class OpenVINOPlugin( await scrypted_sdk.deviceManager.onDeviceDiscovered( { - "nativeId": "segment", + "nativeId": "segmentation", "type": scrypted_sdk.ScryptedDeviceType.Builtin.value, "interfaces": [ scrypted_sdk.ScryptedInterface.ClusterForkInterface.value, @@ -365,7 +365,7 @@ class OpenVINOPlugin( elif nativeId == "clipembedding": self.clipDevice = self.clipDevice or OpenVINOClipEmbedding(self, nativeId) return self.clipDevice - elif nativeId == "segment": + elif nativeId == "segmentation": self.segmentDevice = self.segmentDevice or OpenVINOSegmentation(self, nativeId) return self.segmentDevice custom_model = self.custom_models.get(nativeId, None)