0
Fork 0
mirror of https://github.com/immich-app/immich.git synced 2025-03-18 02:31:28 -05:00

feat(ml): ML on Rockchip NPUs (#15241)

This commit is contained in:
Yoni Yang 2025-03-18 00:04:08 +08:00 committed by GitHub
parent 1e184a70f1
commit 14c3b99c0f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 2417 additions and 4726 deletions

View file

@ -49,7 +49,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
suffix: ["", "-cuda", "-openvino", "-armnn"]
suffix: ["", "-cuda", "-openvino", "-armnn","-rknn"]
steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
@ -129,6 +129,9 @@ jobs:
runner: ubuntu-24.04-arm
device: armnn
suffix: -armnn
- platforms: linux/arm64
device: rknn
suffix: -rknn
steps:
- name: Prepare
@ -454,4 +457,4 @@ jobs:
run: exit 1
- name: All jobs passed or skipped
if: ${{ !(contains(needs.*.result, 'failure')) }}
run: echo "All jobs passed or skipped" && echo "${{ toJSON(needs.*.result) }}"
run: echo "All jobs passed or skipped" && echo "${{ toJSON(needs.*.result) }}"

View file

@ -95,12 +95,12 @@ services:
image: immich-machine-learning-dev:latest
# extends:
# file: hwaccel.ml.yml
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
build:
context: ../machine-learning
dockerfile: Dockerfile
args:
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
ports:
- 3003:3003
volumes:

View file

@ -38,12 +38,12 @@ services:
image: immich-machine-learning:latest
# extends:
# file: hwaccel.ml.yml
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
build:
context: ../machine-learning
dockerfile: Dockerfile
args:
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
ports:
- 3003:3003
volumes:
@ -77,22 +77,12 @@ services:
- 5432:5432
healthcheck:
test: >-
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
--command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
echo "checksum failure count is $$Chksum";
[ "$$Chksum" = '0' ] || exit 1
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
interval: 5m
start_interval: 30s
start_period: 5m
command: >-
postgres
-c shared_preload_libraries=vectors.so
-c 'search_path="$$user", public, vectors'
-c logging_collector=on
-c max_wal_size=2GB
-c shared_buffers=512MB
-c wal_compression=on
postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
restart: always
# set IMMICH_TELEMETRY_INCLUDE=all in .env to enable metrics
@ -109,7 +99,7 @@ services:
# add data source for http://immich-prometheus:9090 to get started
immich-grafana:
container_name: immich_grafana
command: ['./run.sh', '-disable-reporting']
command: [ './run.sh', '-disable-reporting' ]
ports:
- 3000:3000
image: grafana/grafana:11.5.2-ubuntu@sha256:8b5858c447e06fd7a89006b562ba7bba7c4d5813600c7982374c41852adefaeb

View file

@ -33,12 +33,12 @@ services:
immich-machine-learning:
container_name: immich_machine_learning
# For hardware acceleration, add one of -[armnn, cuda, openvino] to the image tag.
# For hardware acceleration, add one of -[armnn, cuda, openvino, rknn] to the image tag.
# Example tag: ${IMMICH_VERSION:-release}-cuda
image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}
# extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/ml-hardware-acceleration
# file: hwaccel.ml.yml
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference - use the `-wsl` version for WSL2 where applicable
volumes:
- model-cache:/cache
env_file:
@ -67,22 +67,12 @@ services:
- ${DB_DATA_LOCATION}:/var/lib/postgresql/data
healthcheck:
test: >-
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
--command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
echo "checksum failure count is $$Chksum";
[ "$$Chksum" = '0' ] || exit 1
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
interval: 5m
start_interval: 30s
start_period: 5m
command: >-
postgres
-c shared_preload_libraries=vectors.so
-c 'search_path="$$user", public, vectors'
-c logging_collector=on
-c max_wal_size=2GB
-c shared_buffers=512MB
-c wal_compression=on
postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
restart: always
volumes:

View file

@ -13,6 +13,13 @@ services:
volumes:
- /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset (not always required depending on the driver)
- /usr/lib/libmali.so:/usr/lib/libmali.so:ro # Mali driver for your chipset (always required)
rknn:
security_opt:
- systempaths=unconfined
- apparmor=unconfined
devices:
- /dev/dri:/dev/dri
cpu: {}

View file

@ -12,6 +12,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
- ARM NN (Mali)
- CUDA (NVIDIA GPUs with [compute capability](https://developer.nvidia.com/cuda-gpus) 5.2 or higher)
- OpenVINO (Intel GPUs such as Iris Xe and Arc)
- RKNN (Rockchip)
## Limitations
@ -19,6 +20,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
- Only Linux and Windows (through WSL2) servers are supported.
- ARM NN is only supported on devices with Mali GPUs. Other Arm devices are not supported.
- Some models may not be compatible with certain backends. CUDA is the most reliable.
- Search latency isn't improved by ARM NN due to model compatibility issues preventing its use. However, smart search jobs do make use of ARM NN.
## Prerequisites
@ -33,6 +35,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
- The `hwaccel.ml.yml` file assumes the path to it is `/usr/lib/libmali.so`, so update accordingly if it is elsewhere
- The `hwaccel.ml.yml` file assumes an additional file `/lib/firmware/mali_csffw.bin`, so update accordingly if your device's driver does not require this file
- Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for ARM NN specific settings
- In particular, the `MACHINE_LEARNING_ANN_FP16_TURBO` can significantly improve performance at the cost of very slightly lower accuracy
#### CUDA
@ -47,6 +50,16 @@ You do not need to redo any machine learning jobs after enabling hardware accele
- Ensure the server's kernel version is new enough to use the device for hardware accceleration.
- Expect higher RAM usage when using OpenVINO compared to CPU processing.
#### RKNN
- You must have a supported Rockchip SoC: only RK3566, RK3568, RK3576 and RK3588 are supported at this moment.
- Make sure you have the appropriate linux kernel driver installed
- This is usually pre-installed on the device vendor's Linux images
- RKNPU driver V0.9.8 or later must be available in the host server
- You may confirm this by running `cat /sys/kernel/debug/rknpu/version` to check the version
- Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for RKNN specific settings
- In particular, setting `MACHINE_LEARNING_RKNN_THREADS` to 2 or 3 can _dramatically_ improve performance for RK3576 and RK3588 compared to the default of 1, at the expense of multiplying the amount of RAM each model uses by that amount.
## Setup
1. If you do not already have it, download the latest [`hwaccel.ml.yml`][hw-file] file and ensure it's in the same folder as the `docker-compose.yml`.
@ -127,3 +140,12 @@ Note that you should increase job concurrencies to increase overall utilization
- If you encounter an error when a model is running, try a different model to see if the issue is model-specific.
- You may want to increase concurrency past the default for higher utilization. However, keep in mind that this will also increase VRAM consumption.
- Larger models benefit more from hardware acceleration, if you have the VRAM for them.
- Compared to ARM NN, RKNPU has:
- Wider model support (including for search, which ARM NN does not accelerate)
- Less heat generation
- Very slightly lower accuracy (RKNPU always uses FP16, while ARM NN by default uses higher precision FP32 unless `MACHINE_LEARNING_ANN_FP16_TURBO` is enabled)
- Varying speed (tested on RK3588):
- If `MACHINE_LEARNING_RKNN_THREADS` is at the default of 1, RKNPU will have substantially lower throughput for ML jobs than ARM NN in most cases, but similar latency (such as when searching)
- If `MACHINE_LEARNING_RKNN_THREADS` is set to 3, it will be somewhat faster than ARM NN at FP32, but somewhat slower than ARM NN if `MACHINE_LEARNING_ANN_FP16_TURBO` is enabled
- When other tasks also use the GPU (like transcoding), RKNPU has a significant advantage over ARM NN as it uses the otherwise idle NPU instead of competing for GPU usage
- Lower RAM usage if `MACHINE_LEARNING_RKNN_THREADS` is at the default of 1, but significantly higher if greater than 1 (which is necessary for it to fully utilize the NPU and hence be comparable in speed to ARM NN)

View file

@ -170,6 +170,8 @@ Redis (Sentinel) URL example JSON before encoding:
| `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION` | Set the maximum number of faces that will be processed at once by the facial recognition model | None (`1` if using OpenVINO) | machine learning |
| `MACHINE_LEARNING_PING_TIMEOUT` | How long (ms) to wait for a PING response when checking if an ML server is available | `2000` | server |
| `MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME` | How long to ignore ML servers that are offline before trying again | `30000` | server |
| `MACHINE_LEARNING_RKNN` | Enable RKNN hardware acceleration if supported | `True` | machine learning |
| `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spinned up while inferencing. | `1` | machine learning |
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.

View file

@ -1,5 +1,24 @@
*.zip
*.onnx
*.rknn
*.npy
*_attr__value
*.weight
*.bias
onnx__*
*in_proj_bias
*.proj
*.latent
*.pos_embed
vocab.txt
export/immich_model_exporter/models/**/README.md
tokenizer.json
tokenizer_config.json
special_tokens_map.json
preprocess_cfg.json
config.json
merges.txt
vocab.json
upload/
venv/
__pycache__/

View file

@ -15,6 +15,8 @@ RUN mkdir /opt/armnn && \
cd /opt/ann && \
sh build.sh
FROM builder-cpu AS builder-rknn
FROM builder-${DEVICE} AS builder
ARG DEVICE
@ -77,6 +79,10 @@ COPY --from=builder-armnn \
/opt/ann/build.sh \
/opt/armnn/
FROM prod-cpu AS prod-rknn
ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/v2.3.0/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so /usr/lib/
FROM prod-${DEVICE} AS prod
ARG DEVICE
@ -123,4 +129,4 @@ ENV IMMICH_SOURCE_URL=https://github.com/immich-app/immich/commit/${BUILD_SOURCE
ENTRYPOINT ["tini", "--"]
CMD ["./start.sh"]
HEALTHCHECK CMD python3 healthcheck.py
HEALTHCHECK CMD python3 healthcheck.py

View file

@ -64,6 +64,8 @@ class Settings(BaseSettings):
ann: bool = True
ann_fp16_turbo: bool = False
ann_tuning_level: int = 2
rknn: bool = True
rknn_threads: int = 1
preload: PreloadModelData | None = None
max_batch_size: MaxBatchSize | None = None

View file

@ -136,6 +136,12 @@ def ann_session() -> Iterator[mock.Mock]:
yield mocked
@pytest.fixture(scope="function")
def rknn_session() -> Iterator[mock.Mock]:
with mock.patch("app.sessions.rknn.RknnPoolExecutor") as mocked:
yield mocked
@pytest.fixture(scope="function")
def rmtree() -> Iterator[mock.Mock]:
with mock.patch("app.models.base.rmtree", autospec=True) as mocked:

View file

@ -226,9 +226,9 @@ async def load(model: InferenceModel) -> InferenceModel:
except FileNotFoundError as e:
if model.model_format == ModelFormat.ONNX:
raise e
log.exception(e)
log.warning(
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it."
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it.",
exc_info=e,
)
model.model_format = ModelFormat.ONNX
model.load()

View file

@ -8,6 +8,7 @@ from typing import Any, ClassVar
from huggingface_hub import snapshot_download
import ann.ann
import app.sessions.rknn as rknn
from app.sessions.ort import OrtSession
from ..config import clean_name, log, settings
@ -66,12 +67,17 @@ class InferenceModel(ABC):
pass
def _download(self) -> None:
ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"]
ignored_patterns: dict[ModelFormat, list[str]] = {
ModelFormat.ONNX: ["*.armnn", "*.rknn"],
ModelFormat.ARMNN: ["*.rknn"],
ModelFormat.RKNN: ["*.armnn"],
}
snapshot_download(
f"immich-app/{clean_name(self.model_name)}",
cache_dir=self.cache_dir,
local_dir=self.cache_dir,
ignore_patterns=ignore_patterns,
ignore_patterns=ignored_patterns.get(self.model_format, []),
)
def _load(self) -> ModelSession:
@ -108,17 +114,25 @@ class InferenceModel(ABC):
session: ModelSession = AnnSession(model_path)
case ".onnx":
session = OrtSession(model_path)
case ".rknn":
session = rknn.RknnSession(model_path)
case _:
raise ValueError(f"Unsupported model file type: {model_path.suffix}")
return session
def model_path_for_format(self, model_format: ModelFormat) -> Path:
model_path_prefix = rknn.model_prefix if model_format == ModelFormat.RKNN else None
if model_path_prefix:
return self.model_dir / model_path_prefix / f"model.{model_format}"
return self.model_dir / f"model.{model_format}"
@property
def model_dir(self) -> Path:
return self.cache_dir / self.model_type.value
@property
def model_path(self) -> Path:
return self.model_dir / f"model.{self.model_format}"
return self.model_path_for_format(self.model_format)
@property
def model_task(self) -> ModelTask:
@ -155,4 +169,9 @@ class InferenceModel(ABC):
@property
def _model_format_default(self) -> ModelFormat:
return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
if rknn.is_available:
return ModelFormat.RKNN
elif ann.ann.is_available and settings.ann:
return ModelFormat.ARMNN
else:
return ModelFormat.ONNX

View file

@ -44,6 +44,18 @@ _OPENCLIP_MODELS = {
"nllb-clip-base-siglip__v1",
"nllb-clip-large-siglip__mrl",
"nllb-clip-large-siglip__v1",
"ViT-B-16-SigLIP2__webli",
"ViT-B-32-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-384__webli",
"ViT-L-16-SigLIP2-512__webli",
"ViT-SO400M-14-SigLIP2-378__webli",
"ViT-SO400M-14-SigLIP2__webli",
"ViT-SO400M-16-SigLIP2-256__webli",
"ViT-SO400M-16-SigLIP2-384__webli",
"ViT-SO400M-16-SigLIP2-512__webli",
"ViT-gopt-16-SigLIP2-256__webli",
"ViT-gopt-16-SigLIP2-384__webli",
}
@ -65,6 +77,9 @@ _INSIGHTFACE_MODELS = {
SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
def get_model_source(model_name: str) -> ModelSource | None:
cleaned_name = clean_name(model_name)

View file

@ -31,7 +31,7 @@ class FaceRecognizer(InferenceModel):
self._add_batch_axis(self.model_path)
session = self._make_session(self.model_path)
self.model = ArcFaceONNX(
self.model_path.with_suffix(".onnx").as_posix(),
self.model_path_for_format(ModelFormat.ONNX).as_posix(),
session=session,
)
return session

View file

@ -35,6 +35,7 @@ class ModelType(StrEnum):
class ModelFormat(StrEnum):
ARMNN = "armnn"
ONNX = "onnx"
RKNN = "rknn"
class ModelSource(StrEnum):

View file

@ -0,0 +1,76 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, NamedTuple
import numpy as np
from numpy.typing import NDArray
from app.config import log, settings
from app.schemas import SessionNode
from .rknnpool import RknnPoolExecutor, is_available, soc_name
is_available = is_available and settings.rknn
model_prefix = Path("rknpu") / soc_name if is_available and soc_name is not None else None
def run_inference(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
return outputs
input_output_mapping: dict[str, dict[str, Any]] = {
"detection": {
"input": {"norm_tensor:0": (1, 3, 640, 640)},
"output": {
"norm_tensor:1": (12800, 1),
"norm_tensor:2": (3200, 1),
"norm_tensor:3": (800, 1),
"norm_tensor:4": (12800, 4),
"norm_tensor:5": (3200, 4),
"norm_tensor:6": (800, 4),
"norm_tensor:7": (12800, 10),
"norm_tensor:8": (3200, 10),
"norm_tensor:9": (800, 10),
},
},
"recognition": {"input": {"norm_tensor:0": (1, 3, 112, 112)}, "output": {"norm_tensor:1": (1, 512)}},
}
class RknnSession:
def __init__(self, model_path: Path) -> None:
self.model_type = "detection" if "detection" in model_path.parts else "recognition"
self.tpe = settings.rknn_threads
log.info(f"Loading RKNN model from {model_path} with {self.tpe} threads.")
self.rknnpool = RknnPoolExecutor(model_path=model_path.as_posix(), tpes=self.tpe, func=run_inference)
log.info(f"Loaded RKNN model from {model_path} with {self.tpe} threads.")
def get_inputs(self) -> list[SessionNode]:
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["input"].items()]
def get_outputs(self) -> list[SessionNode]:
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["output"].items()]
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[NDArray[np.float32]]:
input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
self.rknnpool.put(input_data)
res = self.rknnpool.get()
if res is None:
raise RuntimeError("RKNN inference failed!")
return res
class RknnNode(NamedTuple):
name: str | None
shape: tuple[int, ...]
__all__ = ["RknnSession", "RknnNode", "is_available", "soc_name", "model_prefix"]

View file

@ -0,0 +1,91 @@
# This code is from leafqycc/rknn-multi-threaded
# Following Apache License 2.0
import logging
from concurrent.futures import Future, ThreadPoolExecutor
from pathlib import Path
from queue import Queue
from typing import Callable
import numpy as np
from numpy.typing import NDArray
from app.config import log
from app.models.constants import RKNN_COREMASK_SUPPORTED_SOCS, RKNN_SUPPORTED_SOCS
def get_soc(device_tree_path: Path | str) -> str | None:
try:
with Path(device_tree_path).open() as f:
device_compatible_str = f.read()
for soc in RKNN_SUPPORTED_SOCS:
if soc in device_compatible_str:
return soc
log.warning("Device is not supported for RKNN")
except OSError as e:
log.warning(f"Could not read {device_tree_path}. Reason: %s", e)
return None
soc_name = None
is_available = False
try:
from rknnlite.api import RKNNLite
soc_name = get_soc("/proc/device-tree/compatible")
is_available = soc_name is not None
except ImportError:
log.debug("RKNN is not available")
def init_rknn(model_path: str) -> "RKNNLite":
if not is_available:
raise RuntimeError("rknn is not available!")
rknn_lite = RKNNLite()
rknn_lite.rknn_log.logger.setLevel(logging.ERROR)
ret = rknn_lite.load_rknn(model_path)
if ret != 0:
raise RuntimeError("Failed to load RKNN model")
if soc_name in RKNN_COREMASK_SUPPORTED_SOCS:
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO)
else:
ret = rknn_lite.init_runtime() # Please do not set this parameter on other platforms.
if ret != 0:
raise RuntimeError("Failed to inititalize RKNN runtime environment")
return rknn_lite
class RknnPoolExecutor:
def __init__(
self,
model_path: str,
tpes: int,
func: Callable[["RKNNLite", list[NDArray[np.float32]]], list[NDArray[np.float32]]],
) -> None:
self.tpes = tpes
self.queue: Queue[Future[list[NDArray[np.float32]]]] = Queue()
self.rknn_pool = [init_rknn(model_path) for _ in range(tpes)]
self.pool = ThreadPoolExecutor(max_workers=tpes)
self.func = func
self.num = 0
def put(self, inputs: list[NDArray[np.float32]]) -> None:
self.queue.put(self.pool.submit(self.func, self.rknn_pool[self.num % self.tpes], inputs))
self.num += 1
def get(self) -> list[NDArray[np.float32]] | None:
if self.queue.empty():
return None
fut = self.queue.get()
return fut.result()
def release(self) -> None:
self.pool.shutdown()
for rknn_lite in self.rknn_pool:
rknn_lite.release()
def __del__(self) -> None:
self.release()

View file

@ -25,6 +25,7 @@ from app.models.facial_recognition.detection import FaceDetector
from app.models.facial_recognition.recognition import FaceRecognizer
from app.sessions.ann import AnnSession
from app.sessions.ort import OrtSession
from app.sessions.rknn import RknnSession, run_inference
from .config import Settings, settings
from .models.base import InferenceModel
@ -69,6 +70,14 @@ class TestBase:
assert encoder.model_format == ModelFormat.ARMNN
def test_sets_default_model_format_to_rknn_if_available(self, mocker: MockerFixture) -> None:
mocker.patch.object(settings, "rknn", True)
mocker.patch("app.sessions.rknn.is_available", True)
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
assert encoder.model_format == ModelFormat.RKNN
def test_casts_cache_dir_string_to_path(self) -> None:
cache_dir = "/test_cache"
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir)
@ -125,7 +134,7 @@ class TestBase:
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=["*.armnn"],
ignore_patterns=["*.armnn", "*.rknn"],
)
def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
@ -136,7 +145,18 @@ class TestBase:
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=[],
ignore_patterns=["*.rknn"],
)
def test_download_downloads_rknn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.RKNN)
encoder.download()
snapshot_download.assert_called_once_with(
"immich-app/ViT-B-32__openai",
cache_dir=encoder.cache_dir,
local_dir=encoder.cache_dir,
ignore_patterns=["*.armnn"],
)
def test_throws_exception_if_model_path_does_not_exist(
@ -328,6 +348,33 @@ class TestAnnSession:
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
class TestRknnSession:
def test_creates_rknn_session(self, rknn_session: mock.Mock, info: mock.Mock, mocker: MockerFixture) -> None:
model_path = mock.MagicMock(spec=Path)
tpe = 1
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
mocker.patch("app.sessions.rknn.is_available", True)
RknnSession(model_path)
rknn_session.assert_called_once_with(model_path=model_path.as_posix(), tpes=tpe, func=run_inference)
info.assert_has_calls([mock.call(f"Loaded RKNN model from {model_path} with {tpe} threads.")])
def test_run_rknn(self, rknn_session: mock.Mock, mocker: MockerFixture) -> None:
rknn_session.return_value.load.return_value = 123
np_spy = mocker.spy(np, "ascontiguousarray")
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
session = RknnSession(Path("ViT-B-32__openai"))
[input1, input2] = [np.random.rand(1, 3, 224, 224).astype(np.float32) for _ in range(2)]
input_feed = {"input.1": input1, "input.2": input2}
session.run(None, input_feed)
rknn_session.return_value.put.assert_called_once_with([input1, input2])
np_spy.call_count == 2
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
class TestCLIP:
embedding = np.random.rand(512).astype(np.float32)
cache_dir = Path("test_cache")
@ -829,9 +876,7 @@ class TestLoad:
mock_model.clear_cache.assert_not_called()
mock_model.load.assert_not_called()
async def test_falls_back_to_onnx_if_other_format_does_not_exist(
self, exception: mock.Mock, warning: mock.Mock
) -> None:
async def test_falls_back_to_onnx_if_other_format_does_not_exist(self, warning: mock.Mock) -> None:
mock_model = mock.Mock(spec=InferenceModel)
mock_model.model_name = "test_model_name"
mock_model.model_type = ModelType.VISUAL
@ -846,8 +891,9 @@ class TestLoad:
mock_model.clear_cache.assert_not_called()
assert mock_model.load.call_count == 2
exception.assert_called_once_with(error)
warning.assert_called_once_with("ARMNN is available, but model 'test_model_name' does not support it.")
warning.assert_called_once_with(
"ARMNN is available, but model 'test_model_name' does not support it.", exc_info=error
)
mock_model.model_format = ModelFormat.ONNX

View file

@ -0,0 +1 @@
3.12

View file

@ -1,20 +0,0 @@
FROM mambaorg/micromamba:bookworm-slim@sha256:e3797091302382ea841498bc93a7b0a50f7c1448333d5e946d2d1608d0c5f43d AS builder
ENV TRANSFORMERS_CACHE=/cache \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PATH="/opt/venv/bin:$PATH" \
PYTHONPATH=/usr/src
COPY --chown=$MAMBA_USER:$MAMBA_USER conda-lock.yml /tmp/conda-lock.yml
RUN micromamba install -y -n base -f /tmp/conda-lock.yml && \
micromamba remove -y -n base cxx-compiler && \
micromamba clean --all --yes
WORKDIR /usr/src/app
COPY --chown=$MAMBA_USER:$MAMBA_USER start.sh .
COPY --chown=$MAMBA_USER:$MAMBA_USER app .
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
CMD ["./start.sh"]

File diff suppressed because it is too large Load diff

View file

@ -1,15 +0,0 @@
name: base
channels:
- conda-forge
platforms:
- linux-64
- linux-aarch64
dependencies:
- black
- conda-lock
- mypy
- pytest
- pytest-cov
- pytest-mock
- ruff
category: dev

View file

@ -1,25 +0,0 @@
name: base
channels:
- conda-forge
- nvidia
- pytorch
platforms:
- linux-64
dependencies:
- cxx-compiler
- onnx==1.*
- onnxruntime==1.*
- open-clip-torch==2.*
- orjson==3.*
- pip
- python==3.11.*
- pytorch>=2.3
- rich==13.*
- safetensors==0.*
- setuptools==68.*
- torchvision
- transformers==4.*
- pip:
- multilingual-clip
- onnxsim
category: main

View file

@ -0,0 +1,98 @@
from pathlib import Path
import typer
from tenacity import retry, stop_after_attempt, wait_fixed
from typing_extensions import Annotated
from .exporters.constants import DELETE_PATTERNS, SOURCE_TO_METADATA, ModelSource
from .exporters.onnx import export as onnx_export
from .exporters.rknn import export as rknn_export
app = typer.Typer(pretty_exceptions_show_locals=False)
def generate_readme(model_name: str, model_source: ModelSource) -> str:
(name, link, type) = SOURCE_TO_METADATA[model_source]
match model_source:
case ModelSource.MCLIP:
tags = ["immich", "clip", "multilingual"]
case ModelSource.OPENCLIP:
tags = ["immich", "clip"]
lowered = model_name.lower()
if "xlm" in lowered or "nllb" in lowered:
tags.append("multilingual")
case ModelSource.INSIGHTFACE:
tags = ["immich", "facial-recognition"]
case _:
raise ValueError(f"Unsupported model source {model_source}")
return f"""---
tags:
{" - " + "\n - ".join(tags)}
---
# Model Description
This repo contains ONNX exports for the associated {type} model by {name}. See the [{name}]({link}) repo for more info.
This repo is specifically intended for use with [Immich](https://immich.app/), a self-hosted photo library.
"""
@app.command()
def main(
model_name: str,
model_source: ModelSource,
output_dir: Path = Path("./models"),
no_cache: bool = False,
hf_organization: str = "immich-app",
hf_auth_token: Annotated[str | None, typer.Option(envvar="HF_AUTH_TOKEN")] = None,
) -> None:
hf_model_name = model_name.split("/")[-1]
hf_model_name = hf_model_name.replace("xlm-roberta-large", "XLM-Roberta-Large")
hf_model_name = hf_model_name.replace("xlm-roberta-base", "XLM-Roberta-Base")
output_dir = output_dir / hf_model_name
match model_source:
case ModelSource.MCLIP | ModelSource.OPENCLIP:
output_dir.mkdir(parents=True, exist_ok=True)
onnx_export(model_name, model_source, output_dir, no_cache=no_cache)
case ModelSource.INSIGHTFACE:
from huggingface_hub import snapshot_download
# TODO: start from insightface dump instead of downloading from HF
snapshot_download(f"immich-app/{hf_model_name}", local_dir=output_dir)
case _:
raise ValueError(f"Unsupported model source {model_source}")
try:
rknn_export(output_dir, no_cache=no_cache)
except Exception as e:
print(f"Failed to export model {model_name} to rknn: {e}")
(output_dir / "rknpu").unlink(missing_ok=True)
readme_path = output_dir / "README.md"
if no_cache or not readme_path.exists():
with open(readme_path, "w") as f:
f.write(generate_readme(model_name, model_source))
if hf_auth_token is not None:
from huggingface_hub import create_repo, upload_folder
repo_id = f"{hf_organization}/{hf_model_name}"
@retry(stop=stop_after_attempt(5), wait=wait_fixed(5))
def upload_model() -> None:
create_repo(repo_id, exist_ok=True, token=hf_auth_token)
upload_folder(
repo_id=repo_id,
folder_path=output_dir,
# remote repo files to be deleted before uploading
# deletion is in the same commit as the upload, so it's atomic
delete_patterns=DELETE_PATTERNS,
token=hf_auth_token,
)
upload_model()
if __name__ == "__main__":
typer.run(main)

View file

@ -0,0 +1,42 @@
from enum import StrEnum
from typing import NamedTuple
class ModelSource(StrEnum):
INSIGHTFACE = "insightface"
MCLIP = "mclip"
OPENCLIP = "openclip"
class SourceMetadata(NamedTuple):
name: str
link: str
type: str
SOURCE_TO_METADATA = {
ModelSource.MCLIP: SourceMetadata("M-CLIP", "https://huggingface.co/M-CLIP", "CLIP"),
ModelSource.OPENCLIP: SourceMetadata("OpenCLIP", "https://github.com/mlfoundations/open_clip", "CLIP"),
ModelSource.INSIGHTFACE: SourceMetadata(
"InsightFace", "https://github.com/deepinsight/insightface/tree/master", "facial recognition"
),
}
RKNN_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
# glob to delete old UUID blobs when reuploading models
_uuid_char = "[a-fA-F0-9]"
_uuid_glob = _uuid_char * 8 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 12
DELETE_PATTERNS = [
"**/*onnx*",
"**/Constant*",
"**/*.weight",
"**/*.bias",
"**/*.proj",
"**/*in_proj_bias",
"**/*.npy",
"**/*.latent",
"**/*.pos_embed",
f"**/{_uuid_glob}",
]

View file

@ -0,0 +1,20 @@
from pathlib import Path
from ..constants import ModelSource
from .models import mclip, openclip
def export(
model_name: str, model_source: ModelSource, output_dir: Path, opset_version: int = 19, no_cache: bool = False
) -> None:
visual_dir = output_dir / "visual"
textual_dir = output_dir / "textual"
match model_source:
case ModelSource.MCLIP:
mclip.to_onnx(model_name, opset_version, visual_dir, textual_dir, no_cache=no_cache)
case ModelSource.OPENCLIP:
name, _, pretrained = model_name.partition("__")
config = openclip.OpenCLIPModelConfig(name, pretrained)
openclip.to_onnx(config, opset_version, visual_dir, textual_dir, no_cache=no_cache)
case _:
raise ValueError(f"Unsupported model source {model_source}")

View file

@ -1,11 +1,6 @@
import os
import tempfile
import warnings
from pathlib import Path
import torch
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
from transformers import AutoTokenizer
from typing import Any
from .openclip import OpenCLIPModelConfig
from .openclip import to_onnx as openclip_to_onnx
@ -21,25 +16,40 @@ _MCLIP_TO_OPENCLIP = {
def to_onnx(
model_name: str,
opset_version: int,
output_dir_visual: Path | str,
output_dir_textual: Path | str,
no_cache: bool = False,
) -> tuple[Path, Path]:
textual_path = get_model_path(output_dir_textual)
with tempfile.TemporaryDirectory() as tmpdir:
model = MultilingualCLIP.from_pretrained(model_name, cache_dir=os.environ.get("CACHE_DIR", tmpdir))
if no_cache or not textual_path.exists():
import torch
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
from transformers import AutoTokenizer
torch.backends.mha.set_fastpath_enabled(False)
model = MultilingualCLIP.from_pretrained(model_name)
AutoTokenizer.from_pretrained(model_name).save_pretrained(output_dir_textual)
model.eval()
for param in model.parameters():
param.requires_grad_(False)
export_text_encoder(model, textual_path)
visual_path, _ = openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual)
assert visual_path is not None, "Visual model export failed"
_export_text_encoder(model, textual_path, opset_version)
else:
print(f"Model {textual_path} already exists, skipping")
visual_path, _ = openclip_to_onnx(
_MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, no_cache=no_cache
)
assert visual_path is not None, "Visual model export failed"
return visual_path, textual_path
def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> None:
def _export_text_encoder(model: Any, output_path: Path | str, opset_version: int) -> None:
import torch
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
output_path = Path(output_path)
def forward(self: MultilingualCLIP, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
@ -61,7 +71,7 @@ def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> Non
output_path.as_posix(),
input_names=["input_ids", "attention_mask"],
output_names=["embedding"],
opset_version=17,
opset_version=opset_version,
# dynamic_axes={
# "input_ids": {0: "batch_size", 1: "sequence_length"},
# "attention_mask": {0: "batch_size", 1: "sequence_length"},

View file

@ -0,0 +1,153 @@
import warnings
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
from typing import Any
from .util import get_model_path, save_config
@dataclass
class OpenCLIPModelConfig:
name: str
pretrained: str
@cached_property
def model_config(self) -> dict[str, Any]:
import open_clip
config: dict[str, Any] | None = open_clip.get_model_config(self.name)
if config is None:
raise ValueError(f"Unknown model {self.name}")
return config
@property
def image_size(self) -> int:
image_size: int = self.model_config["vision_cfg"]["image_size"]
return image_size
@property
def sequence_length(self) -> int:
context_length: int = self.model_config["text_cfg"].get("context_length", 77)
return context_length
def to_onnx(
model_cfg: OpenCLIPModelConfig,
opset_version: int,
output_dir_visual: Path | str | None = None,
output_dir_textual: Path | str | None = None,
no_cache: bool = False,
) -> tuple[Path | None, Path | None]:
visual_path = None
textual_path = None
if output_dir_visual is not None:
output_dir_visual = Path(output_dir_visual)
visual_path = get_model_path(output_dir_visual)
if output_dir_textual is not None:
output_dir_textual = Path(output_dir_textual)
textual_path = get_model_path(output_dir_textual)
if not no_cache and (
(textual_path is None or textual_path.exists()) and (visual_path is None or visual_path.exists())
):
print(f"Models {textual_path} and {visual_path} already exist, skipping")
return visual_path, textual_path
import open_clip
import torch
from transformers import AutoTokenizer
torch.backends.mha.set_fastpath_enabled(False)
model = open_clip.create_model(
model_cfg.name,
pretrained=model_cfg.pretrained,
jit=False,
require_pretrained=True,
)
text_vision_cfg = open_clip.get_model_config(model_cfg.name)
model.eval()
for param in model.parameters():
param.requires_grad_(False)
if visual_path is not None and output_dir_visual is not None:
if no_cache or not visual_path.exists():
save_config(
open_clip.get_model_preprocess_cfg(model),
output_dir_visual / "preprocess_cfg.json",
)
save_config(text_vision_cfg, output_dir_visual.parent / "config.json")
_export_image_encoder(model, model_cfg, visual_path, opset_version)
else:
print(f"Model {visual_path} already exists, skipping")
if textual_path is not None and output_dir_textual is not None:
if no_cache or not textual_path.exists():
tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
_export_text_encoder(model, model_cfg, textual_path, opset_version)
else:
print(f"Model {textual_path} already exists, skipping")
return visual_path, textual_path
def _export_image_encoder(
model: Any, model_cfg: OpenCLIPModelConfig, output_path: Path | str, opset_version: int
) -> None:
import torch
output_path = Path(output_path)
def encode_image(image: torch.Tensor) -> torch.Tensor:
output = model.encode_image(image, normalize=True)
assert isinstance(output, torch.Tensor)
return output
model.forward = encode_image
args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
torch.onnx.export(
model,
args,
output_path.as_posix(),
input_names=["image"],
output_names=["embedding"],
opset_version=opset_version,
# dynamic_axes={"image": {0: "batch_size"}},
)
def _export_text_encoder(
model: Any, model_cfg: OpenCLIPModelConfig, output_path: Path | str, opset_version: int
) -> None:
import torch
output_path = Path(output_path)
def encode_text(text: torch.Tensor) -> torch.Tensor:
output = model.encode_text(text, normalize=True)
assert isinstance(output, torch.Tensor)
return output
model.forward = encode_text
args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
torch.onnx.export(
model,
args,
output_path.as_posix(),
input_names=["text"],
output_names=["embedding"],
opset_version=opset_version,
# dynamic_axes={"text": {0: "batch_size"}},
)

View file

@ -0,0 +1,96 @@
from pathlib import Path
from .constants import RKNN_SOCS
def _export_platform(
model_dir: Path,
target_platform: str,
inputs: list[str] | None = None,
input_size_list: list[list[int]] | None = None,
fuse_matmul_softmax_matmul_to_sdpa: bool = True,
no_cache: bool = False,
) -> None:
from rknn.api import RKNN
input_path = model_dir / "model.onnx"
output_path = model_dir / "rknpu" / target_platform / "model.rknn"
if not no_cache and output_path.exists():
print(f"Model {input_path} already exists at {output_path}, skipping")
return
print(f"Exporting model {input_path} to {output_path}")
rknn = RKNN(verbose=False)
rknn.config(
target_platform=target_platform,
disable_rules=["fuse_matmul_softmax_matmul_to_sdpa"] if not fuse_matmul_softmax_matmul_to_sdpa else [],
enable_flash_attention=False,
model_pruning=True,
)
ret = rknn.load_onnx(model=input_path.as_posix(), inputs=inputs, input_size_list=input_size_list)
if ret != 0:
raise RuntimeError("Load failed!")
ret = rknn.build(do_quantization=False)
if ret != 0:
raise RuntimeError("Build failed!")
output_path.parent.mkdir(parents=True, exist_ok=True)
ret = rknn.export_rknn(output_path.as_posix())
if ret != 0:
raise RuntimeError("Export rknn model failed!")
def _export_platforms(
model_dir: Path,
inputs: list[str] | None = None,
input_size_list: list[list[int]] | None = None,
no_cache: bool = False,
) -> None:
fuse_matmul_softmax_matmul_to_sdpa = True
for soc in RKNN_SOCS:
try:
_export_platform(
model_dir,
soc,
inputs=inputs,
input_size_list=input_size_list,
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
no_cache=no_cache,
)
except Exception as e:
print(f"Failed to export model for {soc}: {e}")
if "inputs or 'outputs' must be set" in str(e):
print("Retrying without fuse_matmul_softmax_matmul_to_sdpa")
fuse_matmul_softmax_matmul_to_sdpa = False
_export_platform(
model_dir,
soc,
inputs=inputs,
input_size_list=input_size_list,
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
no_cache=no_cache,
)
def export(model_dir: Path, no_cache: bool = False) -> None:
textual = model_dir / "textual"
visual = model_dir / "visual"
detection = model_dir / "detection"
recognition = model_dir / "recognition"
if textual.is_dir():
_export_platforms(textual, no_cache=no_cache)
if visual.is_dir():
_export_platforms(visual, no_cache=no_cache)
if detection.is_dir():
_export_platforms(detection, inputs=["input.1"], input_size_list=[[1, 3, 640, 640]], no_cache=no_cache)
if recognition.is_dir():
_export_platforms(recognition, inputs=["input.1"], input_size_list=[[1, 3, 112, 112]], no_cache=no_cache)

View file

@ -0,0 +1,88 @@
import subprocess
from exporters.constants import ModelSource
mclip = [
"M-CLIP/LABSE-Vit-L-14",
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
"M-CLIP/XLM-Roberta-Large-Vit-B-32",
"M-CLIP/XLM-Roberta-Large-Vit-L-14",
]
openclip = [
"RN101__openai",
"RN101__yfcc15m",
"RN50__cc12m",
"RN50__openai",
"RN50__yfcc15m",
"RN50x16__openai",
"RN50x4__openai",
"RN50x64__openai",
"ViT-B-16-SigLIP-256__webli",
"ViT-B-16-SigLIP-384__webli",
"ViT-B-16-SigLIP-512__webli",
"ViT-B-16-SigLIP-i18n-256__webli",
"ViT-B-16-SigLIP2__webli",
"ViT-B-16-SigLIP__webli",
"ViT-B-16-plus-240__laion400m_e31",
"ViT-B-16-plus-240__laion400m_e32",
"ViT-B-16__laion400m_e31",
"ViT-B-16__laion400m_e32",
"ViT-B-16__openai",
"ViT-B-32-SigLIP2-256__webli",
"ViT-B-32__laion2b-s34b-b79k",
"ViT-B-32__laion2b_e16",
"ViT-B-32__laion400m_e31",
"ViT-B-32__laion400m_e32",
"ViT-B-32__openai",
"ViT-H-14-378-quickgelu__dfn5b",
"ViT-H-14-quickgelu__dfn5b",
"ViT-H-14__laion2b-s32b-b79k",
"ViT-L-14-336__openai",
"ViT-L-14-quickgelu__dfn2b",
"ViT-L-14__laion2b-s32b-b82k",
"ViT-L-14__laion400m_e31",
"ViT-L-14__laion400m_e32",
"ViT-L-14__openai",
"ViT-L-16-SigLIP-256__webli",
"ViT-L-16-SigLIP-384__webli",
"ViT-L-16-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-384__webli",
"ViT-L-16-SigLIP2-512__webli",
"ViT-SO400M-14-SigLIP-384__webli",
"ViT-SO400M-14-SigLIP2-378__webli",
"ViT-SO400M-14-SigLIP2__webli",
"ViT-SO400M-16-SigLIP2-256__webli",
"ViT-SO400M-16-SigLIP2-384__webli",
"ViT-SO400M-16-SigLIP2-512__webli",
"ViT-gopt-16-SigLIP2-256__webli",
"ViT-gopt-16-SigLIP2-384__webli",
"nllb-clip-base-siglip__mrl",
"nllb-clip-base-siglip__v1",
"nllb-clip-large-siglip__mrl",
"nllb-clip-large-siglip__v1",
"xlm-roberta-base-ViT-B-32__laion5b_s13b_b90k",
"xlm-roberta-large-ViT-H-14__frozen_laion5b_s13b_b90k",
]
insightface = [
"antelopev2",
"buffalo_l",
"buffalo_m",
"buffalo_s",
]
def export_models(models: list[str], source: ModelSource) -> None:
for model in models:
try:
print(f"Exporting model {model}")
subprocess.check_call(["python", "-m", "immich_model_exporter.export", model, source])
except Exception as e:
print(f"Failed to export model {model}: {e}")
if __name__ == "__main__":
export_models(mclip, ModelSource.MCLIP)
export_models(openclip, ModelSource.OPENCLIP)
export_models(insightface, ModelSource.INSIGHTFACE)

View file

@ -1,114 +0,0 @@
import os
import tempfile
import warnings
from dataclasses import dataclass, field
from pathlib import Path
import open_clip
import torch
from transformers import AutoTokenizer
from .util import get_model_path, save_config
@dataclass
class OpenCLIPModelConfig:
name: str
pretrained: str
image_size: int = field(init=False)
sequence_length: int = field(init=False)
def __post_init__(self) -> None:
open_clip_cfg = open_clip.get_model_config(self.name)
if open_clip_cfg is None:
raise ValueError(f"Unknown model {self.name}")
self.image_size = open_clip_cfg["vision_cfg"]["image_size"]
self.sequence_length = open_clip_cfg["text_cfg"].get("context_length", 77)
def to_onnx(
model_cfg: OpenCLIPModelConfig,
output_dir_visual: Path | str | None = None,
output_dir_textual: Path | str | None = None,
) -> tuple[Path | None, Path | None]:
visual_path = None
textual_path = None
with tempfile.TemporaryDirectory() as tmpdir:
model = open_clip.create_model(
model_cfg.name,
pretrained=model_cfg.pretrained,
jit=False,
cache_dir=os.environ.get("CACHE_DIR", tmpdir),
require_pretrained=True,
)
text_vision_cfg = open_clip.get_model_config(model_cfg.name)
model.eval()
for param in model.parameters():
param.requires_grad_(False)
if output_dir_visual is not None:
output_dir_visual = Path(output_dir_visual)
visual_path = get_model_path(output_dir_visual)
save_config(open_clip.get_model_preprocess_cfg(model), output_dir_visual / "preprocess_cfg.json")
save_config(text_vision_cfg, output_dir_visual.parent / "config.json")
export_image_encoder(model, model_cfg, visual_path)
if output_dir_textual is not None:
output_dir_textual = Path(output_dir_textual)
textual_path = get_model_path(output_dir_textual)
tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
export_text_encoder(model, model_cfg, textual_path)
return visual_path, textual_path
def export_image_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None:
output_path = Path(output_path)
def encode_image(image: torch.Tensor) -> torch.Tensor:
output = model.encode_image(image, normalize=True)
assert isinstance(output, torch.Tensor)
return output
args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),)
traced = torch.jit.trace(encode_image, args) # type: ignore[no-untyped-call]
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
torch.onnx.export(
traced,
args,
output_path.as_posix(),
input_names=["image"],
output_names=["embedding"],
opset_version=17,
# dynamic_axes={"image": {0: "batch_size"}},
)
def export_text_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None:
output_path = Path(output_path)
def encode_text(text: torch.Tensor) -> torch.Tensor:
output = model.encode_text(text, normalize=True)
assert isinstance(output, torch.Tensor)
return output
args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),)
traced = torch.jit.trace(encode_text, args) # type: ignore[no-untyped-call]
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
torch.onnx.export(
traced,
args,
output_path.as_posix(),
input_names=["text"],
output_names=["embedding"],
opset_version=17,
# dynamic_axes={"text": {0: "batch_size"}},
)

View file

@ -1,49 +0,0 @@
from pathlib import Path
import onnx
import onnxruntime as ort
import onnxsim
def save_onnx(model: onnx.ModelProto, output_path: Path | str) -> None:
try:
onnx.save(model, output_path)
except ValueError as e:
if "The proto size is larger than the 2 GB limit." in str(e):
onnx.save(model, output_path, save_as_external_data=True, size_threshold=1_000_000)
else:
raise e
def optimize_onnxsim(model_path: Path | str, output_path: Path | str) -> None:
model_path = Path(model_path)
output_path = Path(output_path)
model = onnx.load(model_path.as_posix())
model, check = onnxsim.simplify(model)
assert check, "Simplified ONNX model could not be validated"
for file in model_path.parent.iterdir():
if file.name.startswith("Constant") or "onnx" in file.name or file.suffix == ".weight":
file.unlink()
save_onnx(model, output_path)
def optimize_ort(
model_path: Path | str,
output_path: Path | str,
level: ort.GraphOptimizationLevel = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
) -> None:
model_path = Path(model_path)
output_path = Path(output_path)
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = level
sess_options.optimized_model_filepath = output_path.as_posix()
ort.InferenceSession(model_path.as_posix(), providers=["CPUExecutionProvider"], sess_options=sess_options)
def optimize(model_path: Path | str) -> None:
model_path = Path(model_path)
optimize_ort(model_path, model_path)
optimize_onnxsim(model_path, model_path)

View file

@ -0,0 +1,67 @@
[project]
name = "immich_model_exporter"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10, <4.0"
dependencies = [
"huggingface-hub>=0.29.3",
"multilingual-clip>=1.0.10",
"onnx>=1.14.1",
"onnxruntime>=1.16.0",
"open-clip-torch>=2.31.0",
"typer>=0.15.2",
"rknn-toolkit2>=2.3.0",
"transformers>=4.49.0",
"tenacity>=9.0.0",
]
[dependency-groups]
dev = ["black>=23.3.0", "mypy>=1.3.0", "ruff>=0.0.272"]
[tool.uv]
override-dependencies = [
"onnx>=1.16.0,<2",
"onnxruntime>=1.18.2,<2",
"torch>=2.4",
"torchvision>=0.21",
]
[tool.uv.sources]
torch = [{ index = "pytorch-cpu" }]
torchvision = [{ index = "pytorch-cpu" }]
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.hatch.build.targets.sdist]
include = ["immich_model_exporter"]
[tool.hatch.build.targets.wheel]
include = ["immich_model_exporter"]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.mypy]
python_version = "3.12"
follow_imports = "silent"
warn_redundant_casts = true
disallow_any_generics = true
check_untyped_defs = true
disallow_untyped_defs = true
ignore_missing_imports = true
[tool.ruff]
line-length = 120
target-version = "py312"
[tool.ruff.lint]
select = ["E", "F", "I"]
[tool.black]
line-length = 120
target-version = ['py312']

View file

@ -1,113 +0,0 @@
import gc
import os
from pathlib import Path
from tempfile import TemporaryDirectory
import torch
from huggingface_hub import create_repo, upload_folder
from models import mclip, openclip
from models.optimize import optimize
from rich.progress import Progress
models = [
"M-CLIP/LABSE-Vit-L-14",
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
"M-CLIP/XLM-Roberta-Large-Vit-B-32",
"M-CLIP/XLM-Roberta-Large-Vit-L-14",
"RN101::openai",
"RN101::yfcc15m",
"RN50::cc12m",
"RN50::openai",
"RN50::yfcc15m",
"RN50x16::openai",
"RN50x4::openai",
"RN50x64::openai",
"ViT-B-16-SigLIP-256::webli",
"ViT-B-16-SigLIP-384::webli",
"ViT-B-16-SigLIP-512::webli",
"ViT-B-16-SigLIP-i18n-256::webli",
"ViT-B-16-SigLIP::webli",
"ViT-B-16-plus-240::laion400m_e31",
"ViT-B-16-plus-240::laion400m_e32",
"ViT-B-16::laion400m_e31",
"ViT-B-16::laion400m_e32",
"ViT-B-16::openai",
"ViT-B-32::laion2b-s34b-b79k",
"ViT-B-32::laion2b_e16",
"ViT-B-32::laion400m_e31",
"ViT-B-32::laion400m_e32",
"ViT-B-32::openai",
"ViT-H-14-378-quickgelu::dfn5b",
"ViT-H-14-quickgelu::dfn5b",
"ViT-H-14::laion2b-s32b-b79k",
"ViT-L-14-336::openai",
"ViT-L-14-quickgelu::dfn2b",
"ViT-L-14::laion2b-s32b-b82k",
"ViT-L-14::laion400m_e31",
"ViT-L-14::laion400m_e32",
"ViT-L-14::openai",
"ViT-L-16-SigLIP-256::webli",
"ViT-L-16-SigLIP-384::webli",
"ViT-SO400M-14-SigLIP-384::webli",
"ViT-g-14::laion2b-s12b-b42k",
"nllb-clip-base-siglip::mrl",
"nllb-clip-base-siglip::v1",
"nllb-clip-large-siglip::mrl",
"nllb-clip-large-siglip::v1",
"xlm-roberta-base-ViT-B-32::laion5b_s13b_b90k",
"xlm-roberta-large-ViT-H-14::frozen_laion5b_s13b_b90k",
]
# glob to delete old UUID blobs when reuploading models
uuid_char = "[a-fA-F0-9]"
uuid_glob = uuid_char * 8 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 12
# remote repo files to be deleted before uploading
# deletion is in the same commit as the upload, so it's atomic
delete_patterns = ["**/*onnx*", "**/Constant*", "**/*.weight", "**/*.bias", f"**/{uuid_glob}"]
with Progress() as progress:
task = progress.add_task("[green]Exporting models...", total=len(models))
token = os.environ.get("HF_AUTH_TOKEN")
torch.backends.mha.set_fastpath_enabled(False)
with TemporaryDirectory() as tmp:
tmpdir = Path(tmp)
for model in models:
model_name = model.split("/")[-1].replace("::", "__")
hf_model_name = model_name.replace("xlm-roberta-large", "XLM-Roberta-Large")
hf_model_name = model_name.replace("xlm-roberta-base", "XLM-Roberta-Base")
config_path = tmpdir / model_name / "config.json"
def export() -> None:
progress.update(task, description=f"[green]Exporting {hf_model_name}")
visual_dir = tmpdir / hf_model_name / "visual"
textual_dir = tmpdir / hf_model_name / "textual"
if model.startswith("M-CLIP"):
visual_path, textual_path = mclip.to_onnx(model, visual_dir, textual_dir)
else:
name, _, pretrained = model_name.partition("__")
config = openclip.OpenCLIPModelConfig(name, pretrained)
visual_path, textual_path = openclip.to_onnx(config, visual_dir, textual_dir)
progress.update(task, description=f"[green]Optimizing {hf_model_name} (visual)")
optimize(visual_path)
progress.update(task, description=f"[green]Optimizing {hf_model_name} (textual)")
optimize(textual_path)
gc.collect()
def upload() -> None:
progress.update(task, description=f"[yellow]Uploading {hf_model_name}")
repo_id = f"immich-app/{hf_model_name}"
create_repo(repo_id, exist_ok=True)
upload_folder(
repo_id=repo_id,
folder_path=tmpdir / hf_model_name,
delete_patterns=delete_patterns,
token=token,
)
export()
if token is not None:
upload()
progress.update(task, advance=1)

1395
machine-learning/export/uv.lock generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -51,6 +51,7 @@ cpu = ["onnxruntime>=1.15.0,<2"]
cuda = ["onnxruntime-gpu>=1.17.0,<2"]
openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
armnn = ["onnxruntime>=1.15.0,<2"]
rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
[tool.uv]
compile-bytecode = true

View file

@ -1109,6 +1109,10 @@ cuda = [
openvino = [
{ name = "onnxruntime-openvino" },
]
rknn = [
{ name = "onnxruntime" },
{ name = "rknn-toolkit-lite2" },
]
[package.dev-dependencies]
dev = [
@ -1162,6 +1166,7 @@ requires-dist = [
{ name = "insightface", specifier = ">=0.7.3,<1.0" },
{ name = "onnxruntime", marker = "extra == 'armnn'", specifier = ">=1.15.0,<2" },
{ name = "onnxruntime", marker = "extra == 'cpu'", specifier = ">=1.15.0,<2" },
{ name = "onnxruntime", marker = "extra == 'rknn'", specifier = ">=1.15.0,<2" },
{ name = "onnxruntime-gpu", marker = "extra == 'cuda'", specifier = ">=1.17.0,<2", index = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/" },
{ name = "onnxruntime-openvino", marker = "extra == 'openvino'", specifier = ">=1.17.1,<1.19.0" },
{ name = "opencv-python-headless", specifier = ">=4.7.0.72,<5.0" },
@ -1171,10 +1176,11 @@ requires-dist = [
{ name = "pydantic-settings", specifier = ">=2.5.2,<3" },
{ name = "python-multipart", specifier = ">=0.0.6,<1.0" },
{ name = "rich", specifier = ">=13.4.2" },
{ name = "rknn-toolkit-lite2", marker = "extra == 'rknn'", specifier = ">=2.3.0,<3" },
{ name = "tokenizers", specifier = ">=0.15.0,<1.0" },
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.22.0,<1.0" },
]
provides-extras = ["cpu", "cuda", "openvino", "armnn"]
provides-extras = ["cpu", "cuda", "openvino", "armnn", "rknn"]
[package.metadata.requires-dev]
dev = [
@ -2131,6 +2137,77 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 },
]
[[package]]
name = "rknn-toolkit-lite2"
version = "2.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "numpy" },
{ name = "psutil" },
{ name = "ruamel-yaml" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/ed/77/6af374a4a8cd2aee762a1fb8a3050dcf3f129134bbdc4bb6bed755c4325b/rknn_toolkit_lite2-2.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b6733689bd09a262bcb6ba4744e690dd4b37ebeac4ed427cf45242c4b4ce9a4", size = 559372 },
{ url = "https://files.pythonhosted.org/packages/9b/0c/76ff1eb09d09ce4394a6959d2343a321d28dd9e604348ffdafceafdc344c/rknn_toolkit_lite2-2.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3e4fefe355dc34a155680e4bcb9e4abb37ebc271f045ec9e0a4a3a018bc5beb", size = 569149 },
{ url = "https://files.pythonhosted.org/packages/0d/6e/8679562028051b02312212defc6e8c07248953f10dd7ad506e941b575bf3/rknn_toolkit_lite2-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37394371d1561f470c553f39869d7c35ff93405dffe3d0d72babf297a2b0aee9", size = 527457 },
]
[[package]]
name = "ruamel-yaml"
version = "0.18.10"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "ruamel-yaml-clib", marker = "python_full_version < '3.13' and platform_python_implementation == 'CPython'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ea/46/f44d8be06b85bc7c4d8c95d658be2b68f27711f279bf9dd0612a5e4794f5/ruamel.yaml-0.18.10.tar.gz", hash = "sha256:20c86ab29ac2153f80a428e1254a8adf686d3383df04490514ca3b79a362db58", size = 143447 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/36/dfc1ebc0081e6d39924a2cc53654497f967a084a436bb64402dfce4254d9/ruamel.yaml-0.18.10-py3-none-any.whl", hash = "sha256:30f22513ab2301b3d2b577adc121c6471f28734d3d9728581245f1e76468b4f1", size = 117729 },
]
[[package]]
name = "ruamel-yaml-clib"
version = "0.2.12"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/20/84/80203abff8ea4993a87d823a5f632e4d92831ef75d404c9fc78d0176d2b5/ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f", size = 225315 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/70/57/40a958e863e299f0c74ef32a3bde9f2d1ea8d69669368c0c502a0997f57f/ruamel.yaml.clib-0.2.12-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:11f891336688faf5156a36293a9c362bdc7c88f03a8a027c2c1d8e0bcde998e5", size = 131301 },
{ url = "https://files.pythonhosted.org/packages/98/a8/29a3eb437b12b95f50a6bcc3d7d7214301c6c529d8fdc227247fa84162b5/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969", size = 633728 },
{ url = "https://files.pythonhosted.org/packages/35/6d/ae05a87a3ad540259c3ad88d71275cbd1c0f2d30ae04c65dcbfb6dcd4b9f/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd5415dded15c3822597455bc02bcd66e81ef8b7a48cb71a33628fc9fdde39df", size = 722230 },
{ url = "https://files.pythonhosted.org/packages/7f/b7/20c6f3c0b656fe609675d69bc135c03aac9e3865912444be6339207b6648/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76", size = 686712 },
{ url = "https://files.pythonhosted.org/packages/cd/11/d12dbf683471f888d354dac59593873c2b45feb193c5e3e0f2ebf85e68b9/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6", size = 663936 },
{ url = "https://files.pythonhosted.org/packages/72/14/4c268f5077db5c83f743ee1daeb236269fa8577133a5cfa49f8b382baf13/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd", size = 696580 },
{ url = "https://files.pythonhosted.org/packages/30/fc/8cd12f189c6405a4c1cf37bd633aa740a9538c8e40497c231072d0fef5cf/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a", size = 663393 },
{ url = "https://files.pythonhosted.org/packages/80/29/c0a017b704aaf3cbf704989785cd9c5d5b8ccec2dae6ac0c53833c84e677/ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da", size = 100326 },
{ url = "https://files.pythonhosted.org/packages/3a/65/fa39d74db4e2d0cd252355732d966a460a41cd01c6353b820a0952432839/ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28", size = 118079 },
{ url = "https://files.pythonhosted.org/packages/fb/8f/683c6ad562f558cbc4f7c029abcd9599148c51c54b5ef0f24f2638da9fbb/ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6", size = 132224 },
{ url = "https://files.pythonhosted.org/packages/3c/d2/b79b7d695e2f21da020bd44c782490578f300dd44f0a4c57a92575758a76/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d84318609196d6bd6da0edfa25cedfbabd8dbde5140a0a23af29ad4b8f91fb1e", size = 641480 },
{ url = "https://files.pythonhosted.org/packages/68/6e/264c50ce2a31473a9fdbf4fa66ca9b2b17c7455b31ef585462343818bd6c/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb43a269eb827806502c7c8efb7ae7e9e9d0573257a46e8e952f4d4caba4f31e", size = 739068 },
{ url = "https://files.pythonhosted.org/packages/86/29/88c2567bc893c84d88b4c48027367c3562ae69121d568e8a3f3a8d363f4d/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52", size = 703012 },
{ url = "https://files.pythonhosted.org/packages/11/46/879763c619b5470820f0cd6ca97d134771e502776bc2b844d2adb6e37753/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642", size = 704352 },
{ url = "https://files.pythonhosted.org/packages/02/80/ece7e6034256a4186bbe50dee28cd032d816974941a6abf6a9d65e4228a7/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2", size = 737344 },
{ url = "https://files.pythonhosted.org/packages/f0/ca/e4106ac7e80efbabdf4bf91d3d32fc424e41418458251712f5672eada9ce/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3", size = 714498 },
{ url = "https://files.pythonhosted.org/packages/67/58/b1f60a1d591b771298ffa0428237afb092c7f29ae23bad93420b1eb10703/ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4", size = 100205 },
{ url = "https://files.pythonhosted.org/packages/b4/4f/b52f634c9548a9291a70dfce26ca7ebce388235c93588a1068028ea23fcc/ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb", size = 118185 },
{ url = "https://files.pythonhosted.org/packages/48/41/e7a405afbdc26af961678474a55373e1b323605a4f5e2ddd4a80ea80f628/ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632", size = 133433 },
{ url = "https://files.pythonhosted.org/packages/ec/b0/b850385604334c2ce90e3ee1013bd911aedf058a934905863a6ea95e9eb4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d", size = 647362 },
{ url = "https://files.pythonhosted.org/packages/44/d0/3f68a86e006448fb6c005aee66565b9eb89014a70c491d70c08de597f8e4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c", size = 754118 },
{ url = "https://files.pythonhosted.org/packages/52/a9/d39f3c5ada0a3bb2870d7db41901125dbe2434fa4f12ca8c5b83a42d7c53/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd", size = 706497 },
{ url = "https://files.pythonhosted.org/packages/b0/fa/097e38135dadd9ac25aecf2a54be17ddf6e4c23e43d538492a90ab3d71c6/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31", size = 698042 },
{ url = "https://files.pythonhosted.org/packages/ec/d5/a659ca6f503b9379b930f13bc6b130c9f176469b73b9834296822a83a132/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680", size = 745831 },
{ url = "https://files.pythonhosted.org/packages/db/5d/36619b61ffa2429eeaefaab4f3374666adf36ad8ac6330d855848d7d36fd/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d", size = 715692 },
{ url = "https://files.pythonhosted.org/packages/b1/82/85cb92f15a4231c89b95dfe08b09eb6adca929ef7df7e17ab59902b6f589/ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5", size = 98777 },
{ url = "https://files.pythonhosted.org/packages/d7/8f/c3654f6f1ddb75daf3922c3d8fc6005b1ab56671ad56ffb874d908bfa668/ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4", size = 115523 },
{ url = "https://files.pythonhosted.org/packages/29/00/4864119668d71a5fa45678f380b5923ff410701565821925c69780356ffa/ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a", size = 132011 },
{ url = "https://files.pythonhosted.org/packages/7f/5e/212f473a93ae78c669ffa0cb051e3fee1139cb2d385d2ae1653d64281507/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475", size = 642488 },
{ url = "https://files.pythonhosted.org/packages/1f/8f/ecfbe2123ade605c49ef769788f79c38ddb1c8fa81e01f4dbf5cf1a44b16/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef", size = 745066 },
{ url = "https://files.pythonhosted.org/packages/e2/a9/28f60726d29dfc01b8decdb385de4ced2ced9faeb37a847bd5cf26836815/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6", size = 701785 },
{ url = "https://files.pythonhosted.org/packages/84/7e/8e7ec45920daa7f76046578e4f677a3215fe8f18ee30a9cb7627a19d9b4c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf", size = 693017 },
{ url = "https://files.pythonhosted.org/packages/c5/b3/d650eaade4ca225f02a648321e1ab835b9d361c60d51150bac49063b83fa/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1", size = 741270 },
{ url = "https://files.pythonhosted.org/packages/87/b8/01c29b924dcbbed75cc45b30c30d565d763b9c4d540545a0eeecffb8f09c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01", size = 709059 },
{ url = "https://files.pythonhosted.org/packages/30/8c/ed73f047a73638257aa9377ad356bea4d96125b305c34a28766f4445cc0f/ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6", size = 98583 },
{ url = "https://files.pythonhosted.org/packages/b0/85/e8e751d8791564dd333d5d9a4eab0a7a115f7e349595417fd50ecae3395c/ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3", size = 115190 },
]
[[package]]
name = "ruff"
version = "0.9.9"

View file

@ -96,6 +96,18 @@ export const CLIP_MODEL_INFO: Record<string, ModelInfo> = {
'ViT-SO400M-14-SigLIP-384__webli': { dimSize: 1152 },
'nllb-clip-large-siglip__mrl': { dimSize: 1152 },
'nllb-clip-large-siglip__v1': { dimSize: 1152 },
'ViT-B-16-SigLIP2__webli': { dimSize: 768 },
'ViT-B-32-SigLIP2-256__webli': { dimSize: 768 },
'ViT-L-16-SigLIP2-256__webli': { dimSize: 1024 },
'ViT-L-16-SigLIP2-384__webli': { dimSize: 1024 },
'ViT-L-16-SigLIP2-512__webli': { dimSize: 1024 },
'ViT-SO400M-14-SigLIP2__webli': { dimSize: 1152 },
'ViT-SO400M-14-SigLIP2-378__webli': { dimSize: 1152 },
'ViT-SO400M-16-SigLIP2-256__webli': { dimSize: 1152 },
'ViT-SO400M-16-SigLIP2-384__webli': { dimSize: 1152 },
'ViT-SO400M-16-SigLIP2-512__webli': { dimSize: 1152 },
'ViT-gopt-16-SigLIP2-256__webli': { dimSize: 1536 },
'ViT-gopt-16-SigLIP2-384__webli': { dimSize: 1536 },
};
type SharpRotationData = {