detect: move clip to huggingface

This commit is contained in:
Koushik Dutta
2026-02-27 20:49:33 -08:00
parent e66ea8e794
commit 46dd4006c7
10 changed files with 37 additions and 68 deletions

View File

@@ -1,12 +1,12 @@
{
"name": "@scrypted/coreml",
"version": "0.1.89",
"version": "0.1.90",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@scrypted/coreml",
"version": "0.1.89",
"version": "0.1.90",
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
}

View File

@@ -50,5 +50,5 @@
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
},
"version": "0.1.89"
"version": "0.1.90"
}

View File

@@ -29,21 +29,12 @@ class CoreMLClipEmbedding(ClipEmbedding):
"vision.mlpackage/Data/com.apple.CoreML/model.mlmodel",
]
def loadModel(self, files):
# find the xml file in the files list
text_manifest = [f for f in files if f.lower().endswith('text.mlpackage/manifest.json')]
if not text_manifest:
raise ValueError("No XML model file found in the provided files list")
text_manifest = text_manifest[0]
vision_manifest = [f for f in files if f.lower().endswith('vision.mlpackage/manifest.json')]
if not vision_manifest:
raise ValueError("No XML model file found in the provided files list")
vision_manifest = vision_manifest[0]
textModel = ct.models.MLModel(os.path.dirname(text_manifest))
visionModel = ct.models.MLModel(os.path.dirname(vision_manifest))
def initModel(self):
model_path = self.downloadHuggingFaceModelLocalFallback("clip")
text = os.path.join(model_path, "text.mlpackage")
vision = os.path.join(model_path, "vision.mlpackage")
textModel = ct.models.MLModel(text)
visionModel = ct.models.MLModel(vision)
return textModel, visionModel

View File

@@ -1,12 +1,12 @@
{
"name": "@scrypted/onnx",
"version": "0.1.130",
"version": "0.1.131",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@scrypted/onnx",
"version": "0.1.130",
"version": "0.1.131",
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
}

View File

@@ -50,5 +50,5 @@
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
},
"version": "0.1.130"
"version": "0.1.131"
}

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import asyncio
from typing import Any
import os
import numpy as np
import onnxruntime
@@ -19,24 +20,12 @@ class ONNXClipEmbedding(ClipEmbedding):
def __init__(self, plugin, nativeId: str):
super().__init__(plugin=plugin, nativeId=nativeId)
def getFiles(self):
return [
"text.onnx",
"vision.onnx",
]
def initModel(self):
model_path = self.downloadHuggingFaceModelLocalFallback("clip")
def loadModel(self, files):
# find the xml file in the files list
text_onnx = [f for f in files if f.lower().endswith('text.onnx')]
if not text_onnx:
raise ValueError("No onnx model file found in the provided files list")
text_onnx = text_onnx[0]
vision_onnx = [f for f in files if f.lower().endswith('vision.onnx')]
if not vision_onnx:
raise ValueError("No onnx model file found in the provided files list")
vision_onnx = vision_onnx[0]
text_onnx = os.path.join(model_path, 'text.onnx')
vision_onnx = os.path.join(model_path, 'vision.onnx')
compiled_models_array = []
compiled_models = {}

View File

@@ -1,12 +1,12 @@
{
"name": "@scrypted/openvino",
"version": "0.1.194",
"version": "0.1.195",
"lockfileVersion": 2,
"requires": true,
"packages": {
"": {
"name": "@scrypted/openvino",
"version": "0.1.194",
"version": "0.1.195",
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
}

View File

@@ -50,5 +50,5 @@
"devDependencies": {
"@scrypted/sdk": "file:../../sdk"
},
"version": "0.1.194"
"version": "0.1.195"
}

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import asyncio
from typing import Any
import os
import numpy as np
import openvino as ov
@@ -31,17 +32,12 @@ class OpenVINOClipEmbedding(ClipEmbedding):
f"openvino/vision{model_suffix}.bin"
]
def loadModel(self, files):
# find the xml file in the files list
text_xml = [f for f in files if f.lower().endswith(text_xml_name)]
if not text_xml:
raise ValueError("No XML model file found in the provided files list")
text_xml = text_xml[0]
def initModel(self):
model_path = self.downloadHuggingFaceModelLocalFallback("clip")
vision_xml = [f for f in files if f.lower().endswith(vision_xml_name)]
if not vision_xml:
raise ValueError("No XML model file found in the provided files list")
vision_xml = vision_xml[0]
# find the xml file in the files list
text_xml = os.path.join(model_path, "text.xml")
vision_xml = os.path.join(model_path, "vision.xml")
textModel = self.plugin.core.compile_model(text_xml, self.plugin.mode)
model = self.plugin.core.read_model(vision_xml)

View File

@@ -23,45 +23,38 @@ class ClipEmbedding(PredictPlugin, scrypted_sdk.TextEmbedding, scrypted_sdk.Imag
self.loop = asyncio.get_event_loop()
self.minThreshold = 0.5
self.model = self.initModel()
try:
self.model = self.initModel()
except Exception as e:
self.print("Error initializing CLIP model:", e)
raise
self.processor = None
print("Loading CLIP processor from local cache.")
self.print("Loading CLIP processor from local cache.")
try:
self.processor = CLIPProcessor.from_pretrained(
hf_id,
local_files_only=True,
)
print("Loaded CLIP processor from local cache.")
self.print("Loaded CLIP processor from local cache.")
except Exception:
print("CLIP processor not available in local cache yet.")
self.print("CLIP processor not available in local cache yet.")
asyncio.ensure_future(self.refreshClipProcessor(hf_id), loop=self.loop)
async def refreshClipProcessor(self, hf_id: str):
try:
print("Refreshing CLIP processor cache (online).")
self.print("Refreshing CLIP processor cache (online).")
processor = await asyncio.to_thread(
CLIPProcessor.from_pretrained,
hf_id,
)
self.processor = processor
print("Refreshed CLIP processor cache.")
self.print("Refreshed CLIP processor cache.")
except Exception:
print("CLIP processor cache refresh failed.")
def getFiles(self):
pass
self.print("CLIP processor cache refresh failed.")
def initModel(self):
local_files: list[str] = []
for file in self.getFiles():
remote_file = "https://huggingface.co/koushd/clip/resolve/main/" + file
localFile = self.downloadFile(remote_file, f"{self.id}/{file}")
local_files.append(localFile)
return self.loadModel(local_files)
def loadModel(self, files: list[str]):
pass
async def getImageEmbedding(self, input):