mirror of
https://github.com/immich-app/immich.git
synced 2025-03-18 02:31:28 -05:00
feat(ml): ML on Rockchip NPUs (#15241)
This commit is contained in:
parent
1e184a70f1
commit
14c3b99c0f
43 changed files with 2417 additions and 4726 deletions
7
.github/workflows/docker.yml
vendored
7
.github/workflows/docker.yml
vendored
|
@ -49,7 +49,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
suffix: ["", "-cuda", "-openvino", "-armnn"]
|
||||
suffix: ["", "-cuda", "-openvino", "-armnn","-rknn"]
|
||||
steps:
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
|
@ -129,6 +129,9 @@ jobs:
|
|||
runner: ubuntu-24.04-arm
|
||||
device: armnn
|
||||
suffix: -armnn
|
||||
- platforms: linux/arm64
|
||||
device: rknn
|
||||
suffix: -rknn
|
||||
|
||||
steps:
|
||||
- name: Prepare
|
||||
|
@ -454,4 +457,4 @@ jobs:
|
|||
run: exit 1
|
||||
- name: All jobs passed or skipped
|
||||
if: ${{ !(contains(needs.*.result, 'failure')) }}
|
||||
run: echo "All jobs passed or skipped" && echo "${{ toJSON(needs.*.result) }}"
|
||||
run: echo "All jobs passed or skipped" && echo "${{ toJSON(needs.*.result) }}"
|
|
@ -95,12 +95,12 @@ services:
|
|||
image: immich-machine-learning-dev:latest
|
||||
# extends:
|
||||
# file: hwaccel.ml.yml
|
||||
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
|
||||
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
|
||||
build:
|
||||
context: ../machine-learning
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
|
||||
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
|
||||
ports:
|
||||
- 3003:3003
|
||||
volumes:
|
||||
|
|
|
@ -38,12 +38,12 @@ services:
|
|||
image: immich-machine-learning:latest
|
||||
# extends:
|
||||
# file: hwaccel.ml.yml
|
||||
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
|
||||
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
|
||||
build:
|
||||
context: ../machine-learning
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
|
||||
- DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
|
||||
ports:
|
||||
- 3003:3003
|
||||
volumes:
|
||||
|
@ -77,22 +77,12 @@ services:
|
|||
- 5432:5432
|
||||
healthcheck:
|
||||
test: >-
|
||||
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
|
||||
Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
|
||||
--command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
|
||||
echo "checksum failure count is $$Chksum";
|
||||
[ "$$Chksum" = '0' ] || exit 1
|
||||
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
|
||||
interval: 5m
|
||||
start_interval: 30s
|
||||
start_period: 5m
|
||||
command: >-
|
||||
postgres
|
||||
-c shared_preload_libraries=vectors.so
|
||||
-c 'search_path="$$user", public, vectors'
|
||||
-c logging_collector=on
|
||||
-c max_wal_size=2GB
|
||||
-c shared_buffers=512MB
|
||||
-c wal_compression=on
|
||||
postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
|
||||
restart: always
|
||||
|
||||
# set IMMICH_TELEMETRY_INCLUDE=all in .env to enable metrics
|
||||
|
@ -109,7 +99,7 @@ services:
|
|||
# add data source for http://immich-prometheus:9090 to get started
|
||||
immich-grafana:
|
||||
container_name: immich_grafana
|
||||
command: ['./run.sh', '-disable-reporting']
|
||||
command: [ './run.sh', '-disable-reporting' ]
|
||||
ports:
|
||||
- 3000:3000
|
||||
image: grafana/grafana:11.5.2-ubuntu@sha256:8b5858c447e06fd7a89006b562ba7bba7c4d5813600c7982374c41852adefaeb
|
||||
|
|
|
@ -33,12 +33,12 @@ services:
|
|||
|
||||
immich-machine-learning:
|
||||
container_name: immich_machine_learning
|
||||
# For hardware acceleration, add one of -[armnn, cuda, openvino] to the image tag.
|
||||
# For hardware acceleration, add one of -[armnn, cuda, openvino, rknn] to the image tag.
|
||||
# Example tag: ${IMMICH_VERSION:-release}-cuda
|
||||
image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}
|
||||
# extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/ml-hardware-acceleration
|
||||
# file: hwaccel.ml.yml
|
||||
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
|
||||
# service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference - use the `-wsl` version for WSL2 where applicable
|
||||
volumes:
|
||||
- model-cache:/cache
|
||||
env_file:
|
||||
|
@ -67,22 +67,12 @@ services:
|
|||
- ${DB_DATA_LOCATION}:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: >-
|
||||
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
|
||||
Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
|
||||
--command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
|
||||
echo "checksum failure count is $$Chksum";
|
||||
[ "$$Chksum" = '0' ] || exit 1
|
||||
pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
|
||||
interval: 5m
|
||||
start_interval: 30s
|
||||
start_period: 5m
|
||||
command: >-
|
||||
postgres
|
||||
-c shared_preload_libraries=vectors.so
|
||||
-c 'search_path="$$user", public, vectors'
|
||||
-c logging_collector=on
|
||||
-c max_wal_size=2GB
|
||||
-c shared_buffers=512MB
|
||||
-c wal_compression=on
|
||||
postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
|
|
|
@ -13,6 +13,13 @@ services:
|
|||
volumes:
|
||||
- /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset (not always required depending on the driver)
|
||||
- /usr/lib/libmali.so:/usr/lib/libmali.so:ro # Mali driver for your chipset (always required)
|
||||
|
||||
rknn:
|
||||
security_opt:
|
||||
- systempaths=unconfined
|
||||
- apparmor=unconfined
|
||||
devices:
|
||||
- /dev/dri:/dev/dri
|
||||
|
||||
cpu: {}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
|
|||
- ARM NN (Mali)
|
||||
- CUDA (NVIDIA GPUs with [compute capability](https://developer.nvidia.com/cuda-gpus) 5.2 or higher)
|
||||
- OpenVINO (Intel GPUs such as Iris Xe and Arc)
|
||||
- RKNN (Rockchip)
|
||||
|
||||
## Limitations
|
||||
|
||||
|
@ -19,6 +20,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
|
|||
- Only Linux and Windows (through WSL2) servers are supported.
|
||||
- ARM NN is only supported on devices with Mali GPUs. Other Arm devices are not supported.
|
||||
- Some models may not be compatible with certain backends. CUDA is the most reliable.
|
||||
- Search latency isn't improved by ARM NN due to model compatibility issues preventing its use. However, smart search jobs do make use of ARM NN.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
|
@ -33,6 +35,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
|
|||
- The `hwaccel.ml.yml` file assumes the path to it is `/usr/lib/libmali.so`, so update accordingly if it is elsewhere
|
||||
- The `hwaccel.ml.yml` file assumes an additional file `/lib/firmware/mali_csffw.bin`, so update accordingly if your device's driver does not require this file
|
||||
- Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for ARM NN specific settings
|
||||
- In particular, the `MACHINE_LEARNING_ANN_FP16_TURBO` can significantly improve performance at the cost of very slightly lower accuracy
|
||||
|
||||
#### CUDA
|
||||
|
||||
|
@ -47,6 +50,16 @@ You do not need to redo any machine learning jobs after enabling hardware accele
|
|||
- Ensure the server's kernel version is new enough to use the device for hardware accceleration.
|
||||
- Expect higher RAM usage when using OpenVINO compared to CPU processing.
|
||||
|
||||
#### RKNN
|
||||
|
||||
- You must have a supported Rockchip SoC: only RK3566, RK3568, RK3576 and RK3588 are supported at this moment.
|
||||
- Make sure you have the appropriate linux kernel driver installed
|
||||
- This is usually pre-installed on the device vendor's Linux images
|
||||
- RKNPU driver V0.9.8 or later must be available in the host server
|
||||
- You may confirm this by running `cat /sys/kernel/debug/rknpu/version` to check the version
|
||||
- Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for RKNN specific settings
|
||||
- In particular, setting `MACHINE_LEARNING_RKNN_THREADS` to 2 or 3 can _dramatically_ improve performance for RK3576 and RK3588 compared to the default of 1, at the expense of multiplying the amount of RAM each model uses by that amount.
|
||||
|
||||
## Setup
|
||||
|
||||
1. If you do not already have it, download the latest [`hwaccel.ml.yml`][hw-file] file and ensure it's in the same folder as the `docker-compose.yml`.
|
||||
|
@ -127,3 +140,12 @@ Note that you should increase job concurrencies to increase overall utilization
|
|||
- If you encounter an error when a model is running, try a different model to see if the issue is model-specific.
|
||||
- You may want to increase concurrency past the default for higher utilization. However, keep in mind that this will also increase VRAM consumption.
|
||||
- Larger models benefit more from hardware acceleration, if you have the VRAM for them.
|
||||
- Compared to ARM NN, RKNPU has:
|
||||
- Wider model support (including for search, which ARM NN does not accelerate)
|
||||
- Less heat generation
|
||||
- Very slightly lower accuracy (RKNPU always uses FP16, while ARM NN by default uses higher precision FP32 unless `MACHINE_LEARNING_ANN_FP16_TURBO` is enabled)
|
||||
- Varying speed (tested on RK3588):
|
||||
- If `MACHINE_LEARNING_RKNN_THREADS` is at the default of 1, RKNPU will have substantially lower throughput for ML jobs than ARM NN in most cases, but similar latency (such as when searching)
|
||||
- If `MACHINE_LEARNING_RKNN_THREADS` is set to 3, it will be somewhat faster than ARM NN at FP32, but somewhat slower than ARM NN if `MACHINE_LEARNING_ANN_FP16_TURBO` is enabled
|
||||
- When other tasks also use the GPU (like transcoding), RKNPU has a significant advantage over ARM NN as it uses the otherwise idle NPU instead of competing for GPU usage
|
||||
- Lower RAM usage if `MACHINE_LEARNING_RKNN_THREADS` is at the default of 1, but significantly higher if greater than 1 (which is necessary for it to fully utilize the NPU and hence be comparable in speed to ARM NN)
|
||||
|
|
|
@ -170,6 +170,8 @@ Redis (Sentinel) URL example JSON before encoding:
|
|||
| `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION` | Set the maximum number of faces that will be processed at once by the facial recognition model | None (`1` if using OpenVINO) | machine learning |
|
||||
| `MACHINE_LEARNING_PING_TIMEOUT` | How long (ms) to wait for a PING response when checking if an ML server is available | `2000` | server |
|
||||
| `MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME` | How long to ignore ML servers that are offline before trying again | `30000` | server |
|
||||
| `MACHINE_LEARNING_RKNN` | Enable RKNN hardware acceleration if supported | `True` | machine learning |
|
||||
| `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spinned up while inferencing. | `1` | machine learning |
|
||||
|
||||
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
|
||||
|
||||
|
|
19
machine-learning/.gitignore
vendored
19
machine-learning/.gitignore
vendored
|
@ -1,5 +1,24 @@
|
|||
*.zip
|
||||
*.onnx
|
||||
*.rknn
|
||||
*.npy
|
||||
*_attr__value
|
||||
*.weight
|
||||
*.bias
|
||||
onnx__*
|
||||
*in_proj_bias
|
||||
*.proj
|
||||
*.latent
|
||||
*.pos_embed
|
||||
vocab.txt
|
||||
export/immich_model_exporter/models/**/README.md
|
||||
tokenizer.json
|
||||
tokenizer_config.json
|
||||
special_tokens_map.json
|
||||
preprocess_cfg.json
|
||||
config.json
|
||||
merges.txt
|
||||
vocab.json
|
||||
upload/
|
||||
venv/
|
||||
__pycache__/
|
||||
|
|
|
@ -15,6 +15,8 @@ RUN mkdir /opt/armnn && \
|
|||
cd /opt/ann && \
|
||||
sh build.sh
|
||||
|
||||
FROM builder-cpu AS builder-rknn
|
||||
|
||||
FROM builder-${DEVICE} AS builder
|
||||
|
||||
ARG DEVICE
|
||||
|
@ -77,6 +79,10 @@ COPY --from=builder-armnn \
|
|||
/opt/ann/build.sh \
|
||||
/opt/armnn/
|
||||
|
||||
FROM prod-cpu AS prod-rknn
|
||||
|
||||
ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/v2.3.0/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so /usr/lib/
|
||||
|
||||
FROM prod-${DEVICE} AS prod
|
||||
ARG DEVICE
|
||||
|
||||
|
@ -123,4 +129,4 @@ ENV IMMICH_SOURCE_URL=https://github.com/immich-app/immich/commit/${BUILD_SOURCE
|
|||
ENTRYPOINT ["tini", "--"]
|
||||
CMD ["./start.sh"]
|
||||
|
||||
HEALTHCHECK CMD python3 healthcheck.py
|
||||
HEALTHCHECK CMD python3 healthcheck.py
|
|
@ -64,6 +64,8 @@ class Settings(BaseSettings):
|
|||
ann: bool = True
|
||||
ann_fp16_turbo: bool = False
|
||||
ann_tuning_level: int = 2
|
||||
rknn: bool = True
|
||||
rknn_threads: int = 1
|
||||
preload: PreloadModelData | None = None
|
||||
max_batch_size: MaxBatchSize | None = None
|
||||
|
||||
|
|
|
@ -136,6 +136,12 @@ def ann_session() -> Iterator[mock.Mock]:
|
|||
yield mocked
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def rknn_session() -> Iterator[mock.Mock]:
|
||||
with mock.patch("app.sessions.rknn.RknnPoolExecutor") as mocked:
|
||||
yield mocked
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def rmtree() -> Iterator[mock.Mock]:
|
||||
with mock.patch("app.models.base.rmtree", autospec=True) as mocked:
|
||||
|
|
|
@ -226,9 +226,9 @@ async def load(model: InferenceModel) -> InferenceModel:
|
|||
except FileNotFoundError as e:
|
||||
if model.model_format == ModelFormat.ONNX:
|
||||
raise e
|
||||
log.exception(e)
|
||||
log.warning(
|
||||
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it."
|
||||
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it.",
|
||||
exc_info=e,
|
||||
)
|
||||
model.model_format = ModelFormat.ONNX
|
||||
model.load()
|
||||
|
|
|
@ -8,6 +8,7 @@ from typing import Any, ClassVar
|
|||
from huggingface_hub import snapshot_download
|
||||
|
||||
import ann.ann
|
||||
import app.sessions.rknn as rknn
|
||||
from app.sessions.ort import OrtSession
|
||||
|
||||
from ..config import clean_name, log, settings
|
||||
|
@ -66,12 +67,17 @@ class InferenceModel(ABC):
|
|||
pass
|
||||
|
||||
def _download(self) -> None:
|
||||
ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"]
|
||||
ignored_patterns: dict[ModelFormat, list[str]] = {
|
||||
ModelFormat.ONNX: ["*.armnn", "*.rknn"],
|
||||
ModelFormat.ARMNN: ["*.rknn"],
|
||||
ModelFormat.RKNN: ["*.armnn"],
|
||||
}
|
||||
|
||||
snapshot_download(
|
||||
f"immich-app/{clean_name(self.model_name)}",
|
||||
cache_dir=self.cache_dir,
|
||||
local_dir=self.cache_dir,
|
||||
ignore_patterns=ignore_patterns,
|
||||
ignore_patterns=ignored_patterns.get(self.model_format, []),
|
||||
)
|
||||
|
||||
def _load(self) -> ModelSession:
|
||||
|
@ -108,17 +114,25 @@ class InferenceModel(ABC):
|
|||
session: ModelSession = AnnSession(model_path)
|
||||
case ".onnx":
|
||||
session = OrtSession(model_path)
|
||||
case ".rknn":
|
||||
session = rknn.RknnSession(model_path)
|
||||
case _:
|
||||
raise ValueError(f"Unsupported model file type: {model_path.suffix}")
|
||||
return session
|
||||
|
||||
def model_path_for_format(self, model_format: ModelFormat) -> Path:
|
||||
model_path_prefix = rknn.model_prefix if model_format == ModelFormat.RKNN else None
|
||||
if model_path_prefix:
|
||||
return self.model_dir / model_path_prefix / f"model.{model_format}"
|
||||
return self.model_dir / f"model.{model_format}"
|
||||
|
||||
@property
|
||||
def model_dir(self) -> Path:
|
||||
return self.cache_dir / self.model_type.value
|
||||
|
||||
@property
|
||||
def model_path(self) -> Path:
|
||||
return self.model_dir / f"model.{self.model_format}"
|
||||
return self.model_path_for_format(self.model_format)
|
||||
|
||||
@property
|
||||
def model_task(self) -> ModelTask:
|
||||
|
@ -155,4 +169,9 @@ class InferenceModel(ABC):
|
|||
|
||||
@property
|
||||
def _model_format_default(self) -> ModelFormat:
|
||||
return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
|
||||
if rknn.is_available:
|
||||
return ModelFormat.RKNN
|
||||
elif ann.ann.is_available and settings.ann:
|
||||
return ModelFormat.ARMNN
|
||||
else:
|
||||
return ModelFormat.ONNX
|
||||
|
|
|
@ -44,6 +44,18 @@ _OPENCLIP_MODELS = {
|
|||
"nllb-clip-base-siglip__v1",
|
||||
"nllb-clip-large-siglip__mrl",
|
||||
"nllb-clip-large-siglip__v1",
|
||||
"ViT-B-16-SigLIP2__webli",
|
||||
"ViT-B-32-SigLIP2-256__webli",
|
||||
"ViT-L-16-SigLIP2-256__webli",
|
||||
"ViT-L-16-SigLIP2-384__webli",
|
||||
"ViT-L-16-SigLIP2-512__webli",
|
||||
"ViT-SO400M-14-SigLIP2-378__webli",
|
||||
"ViT-SO400M-14-SigLIP2__webli",
|
||||
"ViT-SO400M-16-SigLIP2-256__webli",
|
||||
"ViT-SO400M-16-SigLIP2-384__webli",
|
||||
"ViT-SO400M-16-SigLIP2-512__webli",
|
||||
"ViT-gopt-16-SigLIP2-256__webli",
|
||||
"ViT-gopt-16-SigLIP2-384__webli",
|
||||
}
|
||||
|
||||
|
||||
|
@ -65,6 +77,9 @@ _INSIGHTFACE_MODELS = {
|
|||
|
||||
SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
|
||||
|
||||
RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
|
||||
RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
|
||||
|
||||
|
||||
def get_model_source(model_name: str) -> ModelSource | None:
|
||||
cleaned_name = clean_name(model_name)
|
||||
|
|
|
@ -31,7 +31,7 @@ class FaceRecognizer(InferenceModel):
|
|||
self._add_batch_axis(self.model_path)
|
||||
session = self._make_session(self.model_path)
|
||||
self.model = ArcFaceONNX(
|
||||
self.model_path.with_suffix(".onnx").as_posix(),
|
||||
self.model_path_for_format(ModelFormat.ONNX).as_posix(),
|
||||
session=session,
|
||||
)
|
||||
return session
|
||||
|
|
|
@ -35,6 +35,7 @@ class ModelType(StrEnum):
|
|||
class ModelFormat(StrEnum):
|
||||
ARMNN = "armnn"
|
||||
ONNX = "onnx"
|
||||
RKNN = "rknn"
|
||||
|
||||
|
||||
class ModelSource(StrEnum):
|
||||
|
|
76
machine-learning/app/sessions/rknn/__init__.py
Normal file
76
machine-learning/app/sessions/rknn/__init__.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, NamedTuple
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from app.config import log, settings
|
||||
from app.schemas import SessionNode
|
||||
|
||||
from .rknnpool import RknnPoolExecutor, is_available, soc_name
|
||||
|
||||
is_available = is_available and settings.rknn
|
||||
model_prefix = Path("rknpu") / soc_name if is_available and soc_name is not None else None
|
||||
|
||||
|
||||
def run_inference(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
|
||||
outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
|
||||
return outputs
|
||||
|
||||
|
||||
input_output_mapping: dict[str, dict[str, Any]] = {
|
||||
"detection": {
|
||||
"input": {"norm_tensor:0": (1, 3, 640, 640)},
|
||||
"output": {
|
||||
"norm_tensor:1": (12800, 1),
|
||||
"norm_tensor:2": (3200, 1),
|
||||
"norm_tensor:3": (800, 1),
|
||||
"norm_tensor:4": (12800, 4),
|
||||
"norm_tensor:5": (3200, 4),
|
||||
"norm_tensor:6": (800, 4),
|
||||
"norm_tensor:7": (12800, 10),
|
||||
"norm_tensor:8": (3200, 10),
|
||||
"norm_tensor:9": (800, 10),
|
||||
},
|
||||
},
|
||||
"recognition": {"input": {"norm_tensor:0": (1, 3, 112, 112)}, "output": {"norm_tensor:1": (1, 512)}},
|
||||
}
|
||||
|
||||
|
||||
class RknnSession:
|
||||
def __init__(self, model_path: Path) -> None:
|
||||
self.model_type = "detection" if "detection" in model_path.parts else "recognition"
|
||||
self.tpe = settings.rknn_threads
|
||||
|
||||
log.info(f"Loading RKNN model from {model_path} with {self.tpe} threads.")
|
||||
self.rknnpool = RknnPoolExecutor(model_path=model_path.as_posix(), tpes=self.tpe, func=run_inference)
|
||||
log.info(f"Loaded RKNN model from {model_path} with {self.tpe} threads.")
|
||||
|
||||
def get_inputs(self) -> list[SessionNode]:
|
||||
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["input"].items()]
|
||||
|
||||
def get_outputs(self) -> list[SessionNode]:
|
||||
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["output"].items()]
|
||||
|
||||
def run(
|
||||
self,
|
||||
output_names: list[str] | None,
|
||||
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
|
||||
run_options: Any = None,
|
||||
) -> list[NDArray[np.float32]]:
|
||||
input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
|
||||
self.rknnpool.put(input_data)
|
||||
res = self.rknnpool.get()
|
||||
if res is None:
|
||||
raise RuntimeError("RKNN inference failed!")
|
||||
return res
|
||||
|
||||
|
||||
class RknnNode(NamedTuple):
|
||||
name: str | None
|
||||
shape: tuple[int, ...]
|
||||
|
||||
|
||||
__all__ = ["RknnSession", "RknnNode", "is_available", "soc_name", "model_prefix"]
|
91
machine-learning/app/sessions/rknn/rknnpool.py
Normal file
91
machine-learning/app/sessions/rknn/rknnpool.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
# This code is from leafqycc/rknn-multi-threaded
|
||||
# Following Apache License 2.0
|
||||
|
||||
import logging
|
||||
from concurrent.futures import Future, ThreadPoolExecutor
|
||||
from pathlib import Path
|
||||
from queue import Queue
|
||||
from typing import Callable
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from app.config import log
|
||||
from app.models.constants import RKNN_COREMASK_SUPPORTED_SOCS, RKNN_SUPPORTED_SOCS
|
||||
|
||||
|
||||
def get_soc(device_tree_path: Path | str) -> str | None:
|
||||
try:
|
||||
with Path(device_tree_path).open() as f:
|
||||
device_compatible_str = f.read()
|
||||
for soc in RKNN_SUPPORTED_SOCS:
|
||||
if soc in device_compatible_str:
|
||||
return soc
|
||||
log.warning("Device is not supported for RKNN")
|
||||
except OSError as e:
|
||||
log.warning(f"Could not read {device_tree_path}. Reason: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
soc_name = None
|
||||
is_available = False
|
||||
try:
|
||||
from rknnlite.api import RKNNLite
|
||||
|
||||
soc_name = get_soc("/proc/device-tree/compatible")
|
||||
is_available = soc_name is not None
|
||||
except ImportError:
|
||||
log.debug("RKNN is not available")
|
||||
|
||||
|
||||
def init_rknn(model_path: str) -> "RKNNLite":
|
||||
if not is_available:
|
||||
raise RuntimeError("rknn is not available!")
|
||||
rknn_lite = RKNNLite()
|
||||
rknn_lite.rknn_log.logger.setLevel(logging.ERROR)
|
||||
ret = rknn_lite.load_rknn(model_path)
|
||||
if ret != 0:
|
||||
raise RuntimeError("Failed to load RKNN model")
|
||||
|
||||
if soc_name in RKNN_COREMASK_SUPPORTED_SOCS:
|
||||
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO)
|
||||
else:
|
||||
ret = rknn_lite.init_runtime() # Please do not set this parameter on other platforms.
|
||||
|
||||
if ret != 0:
|
||||
raise RuntimeError("Failed to inititalize RKNN runtime environment")
|
||||
|
||||
return rknn_lite
|
||||
|
||||
|
||||
class RknnPoolExecutor:
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
tpes: int,
|
||||
func: Callable[["RKNNLite", list[NDArray[np.float32]]], list[NDArray[np.float32]]],
|
||||
) -> None:
|
||||
self.tpes = tpes
|
||||
self.queue: Queue[Future[list[NDArray[np.float32]]]] = Queue()
|
||||
self.rknn_pool = [init_rknn(model_path) for _ in range(tpes)]
|
||||
self.pool = ThreadPoolExecutor(max_workers=tpes)
|
||||
self.func = func
|
||||
self.num = 0
|
||||
|
||||
def put(self, inputs: list[NDArray[np.float32]]) -> None:
|
||||
self.queue.put(self.pool.submit(self.func, self.rknn_pool[self.num % self.tpes], inputs))
|
||||
self.num += 1
|
||||
|
||||
def get(self) -> list[NDArray[np.float32]] | None:
|
||||
if self.queue.empty():
|
||||
return None
|
||||
fut = self.queue.get()
|
||||
return fut.result()
|
||||
|
||||
def release(self) -> None:
|
||||
self.pool.shutdown()
|
||||
for rknn_lite in self.rknn_pool:
|
||||
rknn_lite.release()
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.release()
|
|
@ -25,6 +25,7 @@ from app.models.facial_recognition.detection import FaceDetector
|
|||
from app.models.facial_recognition.recognition import FaceRecognizer
|
||||
from app.sessions.ann import AnnSession
|
||||
from app.sessions.ort import OrtSession
|
||||
from app.sessions.rknn import RknnSession, run_inference
|
||||
|
||||
from .config import Settings, settings
|
||||
from .models.base import InferenceModel
|
||||
|
@ -69,6 +70,14 @@ class TestBase:
|
|||
|
||||
assert encoder.model_format == ModelFormat.ARMNN
|
||||
|
||||
def test_sets_default_model_format_to_rknn_if_available(self, mocker: MockerFixture) -> None:
|
||||
mocker.patch.object(settings, "rknn", True)
|
||||
mocker.patch("app.sessions.rknn.is_available", True)
|
||||
|
||||
encoder = OpenClipTextualEncoder("ViT-B-32__openai")
|
||||
|
||||
assert encoder.model_format == ModelFormat.RKNN
|
||||
|
||||
def test_casts_cache_dir_string_to_path(self) -> None:
|
||||
cache_dir = "/test_cache"
|
||||
encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir)
|
||||
|
@ -125,7 +134,7 @@ class TestBase:
|
|||
"immich-app/ViT-B-32__openai",
|
||||
cache_dir=encoder.cache_dir,
|
||||
local_dir=encoder.cache_dir,
|
||||
ignore_patterns=["*.armnn"],
|
||||
ignore_patterns=["*.armnn", "*.rknn"],
|
||||
)
|
||||
|
||||
def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
|
||||
|
@ -136,7 +145,18 @@ class TestBase:
|
|||
"immich-app/ViT-B-32__openai",
|
||||
cache_dir=encoder.cache_dir,
|
||||
local_dir=encoder.cache_dir,
|
||||
ignore_patterns=[],
|
||||
ignore_patterns=["*.rknn"],
|
||||
)
|
||||
|
||||
def test_download_downloads_rknn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
|
||||
encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.RKNN)
|
||||
encoder.download()
|
||||
|
||||
snapshot_download.assert_called_once_with(
|
||||
"immich-app/ViT-B-32__openai",
|
||||
cache_dir=encoder.cache_dir,
|
||||
local_dir=encoder.cache_dir,
|
||||
ignore_patterns=["*.armnn"],
|
||||
)
|
||||
|
||||
def test_throws_exception_if_model_path_does_not_exist(
|
||||
|
@ -328,6 +348,33 @@ class TestAnnSession:
|
|||
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
|
||||
|
||||
|
||||
class TestRknnSession:
|
||||
def test_creates_rknn_session(self, rknn_session: mock.Mock, info: mock.Mock, mocker: MockerFixture) -> None:
|
||||
model_path = mock.MagicMock(spec=Path)
|
||||
tpe = 1
|
||||
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
|
||||
mocker.patch("app.sessions.rknn.is_available", True)
|
||||
RknnSession(model_path)
|
||||
|
||||
rknn_session.assert_called_once_with(model_path=model_path.as_posix(), tpes=tpe, func=run_inference)
|
||||
|
||||
info.assert_has_calls([mock.call(f"Loaded RKNN model from {model_path} with {tpe} threads.")])
|
||||
|
||||
def test_run_rknn(self, rknn_session: mock.Mock, mocker: MockerFixture) -> None:
|
||||
rknn_session.return_value.load.return_value = 123
|
||||
np_spy = mocker.spy(np, "ascontiguousarray")
|
||||
mocker.patch("app.sessions.rknn.soc_name", "rk3566")
|
||||
session = RknnSession(Path("ViT-B-32__openai"))
|
||||
[input1, input2] = [np.random.rand(1, 3, 224, 224).astype(np.float32) for _ in range(2)]
|
||||
input_feed = {"input.1": input1, "input.2": input2}
|
||||
|
||||
session.run(None, input_feed)
|
||||
|
||||
rknn_session.return_value.put.assert_called_once_with([input1, input2])
|
||||
np_spy.call_count == 2
|
||||
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
|
||||
|
||||
|
||||
class TestCLIP:
|
||||
embedding = np.random.rand(512).astype(np.float32)
|
||||
cache_dir = Path("test_cache")
|
||||
|
@ -829,9 +876,7 @@ class TestLoad:
|
|||
mock_model.clear_cache.assert_not_called()
|
||||
mock_model.load.assert_not_called()
|
||||
|
||||
async def test_falls_back_to_onnx_if_other_format_does_not_exist(
|
||||
self, exception: mock.Mock, warning: mock.Mock
|
||||
) -> None:
|
||||
async def test_falls_back_to_onnx_if_other_format_does_not_exist(self, warning: mock.Mock) -> None:
|
||||
mock_model = mock.Mock(spec=InferenceModel)
|
||||
mock_model.model_name = "test_model_name"
|
||||
mock_model.model_type = ModelType.VISUAL
|
||||
|
@ -846,8 +891,9 @@ class TestLoad:
|
|||
|
||||
mock_model.clear_cache.assert_not_called()
|
||||
assert mock_model.load.call_count == 2
|
||||
exception.assert_called_once_with(error)
|
||||
warning.assert_called_once_with("ARMNN is available, but model 'test_model_name' does not support it.")
|
||||
warning.assert_called_once_with(
|
||||
"ARMNN is available, but model 'test_model_name' does not support it.", exc_info=error
|
||||
)
|
||||
mock_model.model_format = ModelFormat.ONNX
|
||||
|
||||
|
||||
|
|
1
machine-learning/export/.python-version
Normal file
1
machine-learning/export/.python-version
Normal file
|
@ -0,0 +1 @@
|
|||
3.12
|
|
@ -1,20 +0,0 @@
|
|||
FROM mambaorg/micromamba:bookworm-slim@sha256:e3797091302382ea841498bc93a7b0a50f7c1448333d5e946d2d1608d0c5f43d AS builder
|
||||
|
||||
ENV TRANSFORMERS_CACHE=/cache \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PATH="/opt/venv/bin:$PATH" \
|
||||
PYTHONPATH=/usr/src
|
||||
|
||||
COPY --chown=$MAMBA_USER:$MAMBA_USER conda-lock.yml /tmp/conda-lock.yml
|
||||
RUN micromamba install -y -n base -f /tmp/conda-lock.yml && \
|
||||
micromamba remove -y -n base cxx-compiler && \
|
||||
micromamba clean --all --yes
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
COPY --chown=$MAMBA_USER:$MAMBA_USER start.sh .
|
||||
COPY --chown=$MAMBA_USER:$MAMBA_USER app .
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
|
||||
CMD ["./start.sh"]
|
File diff suppressed because it is too large
Load diff
|
@ -1,15 +0,0 @@
|
|||
name: base
|
||||
channels:
|
||||
- conda-forge
|
||||
platforms:
|
||||
- linux-64
|
||||
- linux-aarch64
|
||||
dependencies:
|
||||
- black
|
||||
- conda-lock
|
||||
- mypy
|
||||
- pytest
|
||||
- pytest-cov
|
||||
- pytest-mock
|
||||
- ruff
|
||||
category: dev
|
|
@ -1,25 +0,0 @@
|
|||
name: base
|
||||
channels:
|
||||
- conda-forge
|
||||
- nvidia
|
||||
- pytorch
|
||||
platforms:
|
||||
- linux-64
|
||||
dependencies:
|
||||
- cxx-compiler
|
||||
- onnx==1.*
|
||||
- onnxruntime==1.*
|
||||
- open-clip-torch==2.*
|
||||
- orjson==3.*
|
||||
- pip
|
||||
- python==3.11.*
|
||||
- pytorch>=2.3
|
||||
- rich==13.*
|
||||
- safetensors==0.*
|
||||
- setuptools==68.*
|
||||
- torchvision
|
||||
- transformers==4.*
|
||||
- pip:
|
||||
- multilingual-clip
|
||||
- onnxsim
|
||||
category: main
|
98
machine-learning/export/immich_model_exporter/export.py
Normal file
98
machine-learning/export/immich_model_exporter/export.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from .exporters.constants import DELETE_PATTERNS, SOURCE_TO_METADATA, ModelSource
|
||||
from .exporters.onnx import export as onnx_export
|
||||
from .exporters.rknn import export as rknn_export
|
||||
|
||||
app = typer.Typer(pretty_exceptions_show_locals=False)
|
||||
|
||||
|
||||
def generate_readme(model_name: str, model_source: ModelSource) -> str:
|
||||
(name, link, type) = SOURCE_TO_METADATA[model_source]
|
||||
match model_source:
|
||||
case ModelSource.MCLIP:
|
||||
tags = ["immich", "clip", "multilingual"]
|
||||
case ModelSource.OPENCLIP:
|
||||
tags = ["immich", "clip"]
|
||||
lowered = model_name.lower()
|
||||
if "xlm" in lowered or "nllb" in lowered:
|
||||
tags.append("multilingual")
|
||||
case ModelSource.INSIGHTFACE:
|
||||
tags = ["immich", "facial-recognition"]
|
||||
case _:
|
||||
raise ValueError(f"Unsupported model source {model_source}")
|
||||
|
||||
return f"""---
|
||||
tags:
|
||||
{" - " + "\n - ".join(tags)}
|
||||
---
|
||||
# Model Description
|
||||
|
||||
This repo contains ONNX exports for the associated {type} model by {name}. See the [{name}]({link}) repo for more info.
|
||||
|
||||
This repo is specifically intended for use with [Immich](https://immich.app/), a self-hosted photo library.
|
||||
"""
|
||||
|
||||
|
||||
@app.command()
|
||||
def main(
|
||||
model_name: str,
|
||||
model_source: ModelSource,
|
||||
output_dir: Path = Path("./models"),
|
||||
no_cache: bool = False,
|
||||
hf_organization: str = "immich-app",
|
||||
hf_auth_token: Annotated[str | None, typer.Option(envvar="HF_AUTH_TOKEN")] = None,
|
||||
) -> None:
|
||||
hf_model_name = model_name.split("/")[-1]
|
||||
hf_model_name = hf_model_name.replace("xlm-roberta-large", "XLM-Roberta-Large")
|
||||
hf_model_name = hf_model_name.replace("xlm-roberta-base", "XLM-Roberta-Base")
|
||||
output_dir = output_dir / hf_model_name
|
||||
match model_source:
|
||||
case ModelSource.MCLIP | ModelSource.OPENCLIP:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
onnx_export(model_name, model_source, output_dir, no_cache=no_cache)
|
||||
case ModelSource.INSIGHTFACE:
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
# TODO: start from insightface dump instead of downloading from HF
|
||||
snapshot_download(f"immich-app/{hf_model_name}", local_dir=output_dir)
|
||||
case _:
|
||||
raise ValueError(f"Unsupported model source {model_source}")
|
||||
|
||||
try:
|
||||
rknn_export(output_dir, no_cache=no_cache)
|
||||
except Exception as e:
|
||||
print(f"Failed to export model {model_name} to rknn: {e}")
|
||||
(output_dir / "rknpu").unlink(missing_ok=True)
|
||||
|
||||
readme_path = output_dir / "README.md"
|
||||
if no_cache or not readme_path.exists():
|
||||
with open(readme_path, "w") as f:
|
||||
f.write(generate_readme(model_name, model_source))
|
||||
|
||||
if hf_auth_token is not None:
|
||||
from huggingface_hub import create_repo, upload_folder
|
||||
|
||||
repo_id = f"{hf_organization}/{hf_model_name}"
|
||||
|
||||
@retry(stop=stop_after_attempt(5), wait=wait_fixed(5))
|
||||
def upload_model() -> None:
|
||||
create_repo(repo_id, exist_ok=True, token=hf_auth_token)
|
||||
upload_folder(
|
||||
repo_id=repo_id,
|
||||
folder_path=output_dir,
|
||||
# remote repo files to be deleted before uploading
|
||||
# deletion is in the same commit as the upload, so it's atomic
|
||||
delete_patterns=DELETE_PATTERNS,
|
||||
token=hf_auth_token,
|
||||
)
|
||||
|
||||
upload_model()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
typer.run(main)
|
|
@ -0,0 +1,42 @@
|
|||
from enum import StrEnum
|
||||
from typing import NamedTuple
|
||||
|
||||
|
||||
class ModelSource(StrEnum):
|
||||
INSIGHTFACE = "insightface"
|
||||
MCLIP = "mclip"
|
||||
OPENCLIP = "openclip"
|
||||
|
||||
|
||||
class SourceMetadata(NamedTuple):
|
||||
name: str
|
||||
link: str
|
||||
type: str
|
||||
|
||||
|
||||
SOURCE_TO_METADATA = {
|
||||
ModelSource.MCLIP: SourceMetadata("M-CLIP", "https://huggingface.co/M-CLIP", "CLIP"),
|
||||
ModelSource.OPENCLIP: SourceMetadata("OpenCLIP", "https://github.com/mlfoundations/open_clip", "CLIP"),
|
||||
ModelSource.INSIGHTFACE: SourceMetadata(
|
||||
"InsightFace", "https://github.com/deepinsight/insightface/tree/master", "facial recognition"
|
||||
),
|
||||
}
|
||||
|
||||
RKNN_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
|
||||
|
||||
|
||||
# glob to delete old UUID blobs when reuploading models
|
||||
_uuid_char = "[a-fA-F0-9]"
|
||||
_uuid_glob = _uuid_char * 8 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 12
|
||||
DELETE_PATTERNS = [
|
||||
"**/*onnx*",
|
||||
"**/Constant*",
|
||||
"**/*.weight",
|
||||
"**/*.bias",
|
||||
"**/*.proj",
|
||||
"**/*in_proj_bias",
|
||||
"**/*.npy",
|
||||
"**/*.latent",
|
||||
"**/*.pos_embed",
|
||||
f"**/{_uuid_glob}",
|
||||
]
|
|
@ -0,0 +1,20 @@
|
|||
from pathlib import Path
|
||||
|
||||
from ..constants import ModelSource
|
||||
from .models import mclip, openclip
|
||||
|
||||
|
||||
def export(
|
||||
model_name: str, model_source: ModelSource, output_dir: Path, opset_version: int = 19, no_cache: bool = False
|
||||
) -> None:
|
||||
visual_dir = output_dir / "visual"
|
||||
textual_dir = output_dir / "textual"
|
||||
match model_source:
|
||||
case ModelSource.MCLIP:
|
||||
mclip.to_onnx(model_name, opset_version, visual_dir, textual_dir, no_cache=no_cache)
|
||||
case ModelSource.OPENCLIP:
|
||||
name, _, pretrained = model_name.partition("__")
|
||||
config = openclip.OpenCLIPModelConfig(name, pretrained)
|
||||
openclip.to_onnx(config, opset_version, visual_dir, textual_dir, no_cache=no_cache)
|
||||
case _:
|
||||
raise ValueError(f"Unsupported model source {model_source}")
|
|
@ -1,11 +1,6 @@
|
|||
import os
|
||||
import tempfile
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
|
||||
from transformers import AutoTokenizer
|
||||
from typing import Any
|
||||
|
||||
from .openclip import OpenCLIPModelConfig
|
||||
from .openclip import to_onnx as openclip_to_onnx
|
||||
|
@ -21,25 +16,40 @@ _MCLIP_TO_OPENCLIP = {
|
|||
|
||||
def to_onnx(
|
||||
model_name: str,
|
||||
opset_version: int,
|
||||
output_dir_visual: Path | str,
|
||||
output_dir_textual: Path | str,
|
||||
no_cache: bool = False,
|
||||
) -> tuple[Path, Path]:
|
||||
textual_path = get_model_path(output_dir_textual)
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
model = MultilingualCLIP.from_pretrained(model_name, cache_dir=os.environ.get("CACHE_DIR", tmpdir))
|
||||
if no_cache or not textual_path.exists():
|
||||
import torch
|
||||
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
torch.backends.mha.set_fastpath_enabled(False)
|
||||
|
||||
model = MultilingualCLIP.from_pretrained(model_name)
|
||||
AutoTokenizer.from_pretrained(model_name).save_pretrained(output_dir_textual)
|
||||
|
||||
model.eval()
|
||||
for param in model.parameters():
|
||||
param.requires_grad_(False)
|
||||
|
||||
export_text_encoder(model, textual_path)
|
||||
visual_path, _ = openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual)
|
||||
assert visual_path is not None, "Visual model export failed"
|
||||
_export_text_encoder(model, textual_path, opset_version)
|
||||
else:
|
||||
print(f"Model {textual_path} already exists, skipping")
|
||||
visual_path, _ = openclip_to_onnx(
|
||||
_MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, no_cache=no_cache
|
||||
)
|
||||
assert visual_path is not None, "Visual model export failed"
|
||||
return visual_path, textual_path
|
||||
|
||||
|
||||
def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> None:
|
||||
def _export_text_encoder(model: Any, output_path: Path | str, opset_version: int) -> None:
|
||||
import torch
|
||||
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
|
||||
|
||||
output_path = Path(output_path)
|
||||
|
||||
def forward(self: MultilingualCLIP, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
|
||||
|
@ -61,7 +71,7 @@ def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> Non
|
|||
output_path.as_posix(),
|
||||
input_names=["input_ids", "attention_mask"],
|
||||
output_names=["embedding"],
|
||||
opset_version=17,
|
||||
opset_version=opset_version,
|
||||
# dynamic_axes={
|
||||
# "input_ids": {0: "batch_size", 1: "sequence_length"},
|
||||
# "attention_mask": {0: "batch_size", 1: "sequence_length"},
|
|
@ -0,0 +1,153 @@
|
|||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .util import get_model_path, save_config
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenCLIPModelConfig:
|
||||
name: str
|
||||
pretrained: str
|
||||
|
||||
@cached_property
|
||||
def model_config(self) -> dict[str, Any]:
|
||||
import open_clip
|
||||
|
||||
config: dict[str, Any] | None = open_clip.get_model_config(self.name)
|
||||
if config is None:
|
||||
raise ValueError(f"Unknown model {self.name}")
|
||||
return config
|
||||
|
||||
@property
|
||||
def image_size(self) -> int:
|
||||
image_size: int = self.model_config["vision_cfg"]["image_size"]
|
||||
return image_size
|
||||
|
||||
@property
|
||||
def sequence_length(self) -> int:
|
||||
context_length: int = self.model_config["text_cfg"].get("context_length", 77)
|
||||
return context_length
|
||||
|
||||
|
||||
def to_onnx(
|
||||
model_cfg: OpenCLIPModelConfig,
|
||||
opset_version: int,
|
||||
output_dir_visual: Path | str | None = None,
|
||||
output_dir_textual: Path | str | None = None,
|
||||
no_cache: bool = False,
|
||||
) -> tuple[Path | None, Path | None]:
|
||||
visual_path = None
|
||||
textual_path = None
|
||||
if output_dir_visual is not None:
|
||||
output_dir_visual = Path(output_dir_visual)
|
||||
visual_path = get_model_path(output_dir_visual)
|
||||
|
||||
if output_dir_textual is not None:
|
||||
output_dir_textual = Path(output_dir_textual)
|
||||
textual_path = get_model_path(output_dir_textual)
|
||||
|
||||
if not no_cache and (
|
||||
(textual_path is None or textual_path.exists()) and (visual_path is None or visual_path.exists())
|
||||
):
|
||||
print(f"Models {textual_path} and {visual_path} already exist, skipping")
|
||||
return visual_path, textual_path
|
||||
|
||||
import open_clip
|
||||
import torch
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
torch.backends.mha.set_fastpath_enabled(False)
|
||||
|
||||
model = open_clip.create_model(
|
||||
model_cfg.name,
|
||||
pretrained=model_cfg.pretrained,
|
||||
jit=False,
|
||||
require_pretrained=True,
|
||||
)
|
||||
|
||||
text_vision_cfg = open_clip.get_model_config(model_cfg.name)
|
||||
|
||||
model.eval()
|
||||
for param in model.parameters():
|
||||
param.requires_grad_(False)
|
||||
|
||||
if visual_path is not None and output_dir_visual is not None:
|
||||
if no_cache or not visual_path.exists():
|
||||
save_config(
|
||||
open_clip.get_model_preprocess_cfg(model),
|
||||
output_dir_visual / "preprocess_cfg.json",
|
||||
)
|
||||
save_config(text_vision_cfg, output_dir_visual.parent / "config.json")
|
||||
_export_image_encoder(model, model_cfg, visual_path, opset_version)
|
||||
else:
|
||||
print(f"Model {visual_path} already exists, skipping")
|
||||
|
||||
if textual_path is not None and output_dir_textual is not None:
|
||||
if no_cache or not textual_path.exists():
|
||||
tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
|
||||
AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
|
||||
_export_text_encoder(model, model_cfg, textual_path, opset_version)
|
||||
else:
|
||||
print(f"Model {textual_path} already exists, skipping")
|
||||
return visual_path, textual_path
|
||||
|
||||
|
||||
def _export_image_encoder(
|
||||
model: Any, model_cfg: OpenCLIPModelConfig, output_path: Path | str, opset_version: int
|
||||
) -> None:
|
||||
import torch
|
||||
|
||||
output_path = Path(output_path)
|
||||
|
||||
def encode_image(image: torch.Tensor) -> torch.Tensor:
|
||||
output = model.encode_image(image, normalize=True)
|
||||
assert isinstance(output, torch.Tensor)
|
||||
return output
|
||||
|
||||
model.forward = encode_image
|
||||
|
||||
args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),)
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
torch.onnx.export(
|
||||
model,
|
||||
args,
|
||||
output_path.as_posix(),
|
||||
input_names=["image"],
|
||||
output_names=["embedding"],
|
||||
opset_version=opset_version,
|
||||
# dynamic_axes={"image": {0: "batch_size"}},
|
||||
)
|
||||
|
||||
|
||||
def _export_text_encoder(
|
||||
model: Any, model_cfg: OpenCLIPModelConfig, output_path: Path | str, opset_version: int
|
||||
) -> None:
|
||||
import torch
|
||||
|
||||
output_path = Path(output_path)
|
||||
|
||||
def encode_text(text: torch.Tensor) -> torch.Tensor:
|
||||
output = model.encode_text(text, normalize=True)
|
||||
assert isinstance(output, torch.Tensor)
|
||||
return output
|
||||
|
||||
model.forward = encode_text
|
||||
|
||||
args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),)
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
torch.onnx.export(
|
||||
model,
|
||||
args,
|
||||
output_path.as_posix(),
|
||||
input_names=["text"],
|
||||
output_names=["embedding"],
|
||||
opset_version=opset_version,
|
||||
# dynamic_axes={"text": {0: "batch_size"}},
|
||||
)
|
|
@ -0,0 +1,96 @@
|
|||
from pathlib import Path
|
||||
|
||||
from .constants import RKNN_SOCS
|
||||
|
||||
|
||||
def _export_platform(
|
||||
model_dir: Path,
|
||||
target_platform: str,
|
||||
inputs: list[str] | None = None,
|
||||
input_size_list: list[list[int]] | None = None,
|
||||
fuse_matmul_softmax_matmul_to_sdpa: bool = True,
|
||||
no_cache: bool = False,
|
||||
) -> None:
|
||||
from rknn.api import RKNN
|
||||
|
||||
input_path = model_dir / "model.onnx"
|
||||
output_path = model_dir / "rknpu" / target_platform / "model.rknn"
|
||||
if not no_cache and output_path.exists():
|
||||
print(f"Model {input_path} already exists at {output_path}, skipping")
|
||||
return
|
||||
|
||||
print(f"Exporting model {input_path} to {output_path}")
|
||||
|
||||
rknn = RKNN(verbose=False)
|
||||
|
||||
rknn.config(
|
||||
target_platform=target_platform,
|
||||
disable_rules=["fuse_matmul_softmax_matmul_to_sdpa"] if not fuse_matmul_softmax_matmul_to_sdpa else [],
|
||||
enable_flash_attention=False,
|
||||
model_pruning=True,
|
||||
)
|
||||
ret = rknn.load_onnx(model=input_path.as_posix(), inputs=inputs, input_size_list=input_size_list)
|
||||
|
||||
if ret != 0:
|
||||
raise RuntimeError("Load failed!")
|
||||
|
||||
ret = rknn.build(do_quantization=False)
|
||||
|
||||
if ret != 0:
|
||||
raise RuntimeError("Build failed!")
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ret = rknn.export_rknn(output_path.as_posix())
|
||||
if ret != 0:
|
||||
raise RuntimeError("Export rknn model failed!")
|
||||
|
||||
|
||||
def _export_platforms(
|
||||
model_dir: Path,
|
||||
inputs: list[str] | None = None,
|
||||
input_size_list: list[list[int]] | None = None,
|
||||
no_cache: bool = False,
|
||||
) -> None:
|
||||
fuse_matmul_softmax_matmul_to_sdpa = True
|
||||
for soc in RKNN_SOCS:
|
||||
try:
|
||||
_export_platform(
|
||||
model_dir,
|
||||
soc,
|
||||
inputs=inputs,
|
||||
input_size_list=input_size_list,
|
||||
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
|
||||
no_cache=no_cache,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Failed to export model for {soc}: {e}")
|
||||
if "inputs or 'outputs' must be set" in str(e):
|
||||
print("Retrying without fuse_matmul_softmax_matmul_to_sdpa")
|
||||
fuse_matmul_softmax_matmul_to_sdpa = False
|
||||
_export_platform(
|
||||
model_dir,
|
||||
soc,
|
||||
inputs=inputs,
|
||||
input_size_list=input_size_list,
|
||||
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
|
||||
no_cache=no_cache,
|
||||
)
|
||||
|
||||
|
||||
def export(model_dir: Path, no_cache: bool = False) -> None:
|
||||
textual = model_dir / "textual"
|
||||
visual = model_dir / "visual"
|
||||
detection = model_dir / "detection"
|
||||
recognition = model_dir / "recognition"
|
||||
|
||||
if textual.is_dir():
|
||||
_export_platforms(textual, no_cache=no_cache)
|
||||
|
||||
if visual.is_dir():
|
||||
_export_platforms(visual, no_cache=no_cache)
|
||||
|
||||
if detection.is_dir():
|
||||
_export_platforms(detection, inputs=["input.1"], input_size_list=[[1, 3, 640, 640]], no_cache=no_cache)
|
||||
|
||||
if recognition.is_dir():
|
||||
_export_platforms(recognition, inputs=["input.1"], input_size_list=[[1, 3, 112, 112]], no_cache=no_cache)
|
88
machine-learning/export/immich_model_exporter/run.py
Normal file
88
machine-learning/export/immich_model_exporter/run.py
Normal file
|
@ -0,0 +1,88 @@
|
|||
import subprocess
|
||||
|
||||
from exporters.constants import ModelSource
|
||||
|
||||
mclip = [
|
||||
"M-CLIP/LABSE-Vit-L-14",
|
||||
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
|
||||
"M-CLIP/XLM-Roberta-Large-Vit-B-32",
|
||||
"M-CLIP/XLM-Roberta-Large-Vit-L-14",
|
||||
]
|
||||
|
||||
openclip = [
|
||||
"RN101__openai",
|
||||
"RN101__yfcc15m",
|
||||
"RN50__cc12m",
|
||||
"RN50__openai",
|
||||
"RN50__yfcc15m",
|
||||
"RN50x16__openai",
|
||||
"RN50x4__openai",
|
||||
"RN50x64__openai",
|
||||
"ViT-B-16-SigLIP-256__webli",
|
||||
"ViT-B-16-SigLIP-384__webli",
|
||||
"ViT-B-16-SigLIP-512__webli",
|
||||
"ViT-B-16-SigLIP-i18n-256__webli",
|
||||
"ViT-B-16-SigLIP2__webli",
|
||||
"ViT-B-16-SigLIP__webli",
|
||||
"ViT-B-16-plus-240__laion400m_e31",
|
||||
"ViT-B-16-plus-240__laion400m_e32",
|
||||
"ViT-B-16__laion400m_e31",
|
||||
"ViT-B-16__laion400m_e32",
|
||||
"ViT-B-16__openai",
|
||||
"ViT-B-32-SigLIP2-256__webli",
|
||||
"ViT-B-32__laion2b-s34b-b79k",
|
||||
"ViT-B-32__laion2b_e16",
|
||||
"ViT-B-32__laion400m_e31",
|
||||
"ViT-B-32__laion400m_e32",
|
||||
"ViT-B-32__openai",
|
||||
"ViT-H-14-378-quickgelu__dfn5b",
|
||||
"ViT-H-14-quickgelu__dfn5b",
|
||||
"ViT-H-14__laion2b-s32b-b79k",
|
||||
"ViT-L-14-336__openai",
|
||||
"ViT-L-14-quickgelu__dfn2b",
|
||||
"ViT-L-14__laion2b-s32b-b82k",
|
||||
"ViT-L-14__laion400m_e31",
|
||||
"ViT-L-14__laion400m_e32",
|
||||
"ViT-L-14__openai",
|
||||
"ViT-L-16-SigLIP-256__webli",
|
||||
"ViT-L-16-SigLIP-384__webli",
|
||||
"ViT-L-16-SigLIP2-256__webli",
|
||||
"ViT-L-16-SigLIP2-384__webli",
|
||||
"ViT-L-16-SigLIP2-512__webli",
|
||||
"ViT-SO400M-14-SigLIP-384__webli",
|
||||
"ViT-SO400M-14-SigLIP2-378__webli",
|
||||
"ViT-SO400M-14-SigLIP2__webli",
|
||||
"ViT-SO400M-16-SigLIP2-256__webli",
|
||||
"ViT-SO400M-16-SigLIP2-384__webli",
|
||||
"ViT-SO400M-16-SigLIP2-512__webli",
|
||||
"ViT-gopt-16-SigLIP2-256__webli",
|
||||
"ViT-gopt-16-SigLIP2-384__webli",
|
||||
"nllb-clip-base-siglip__mrl",
|
||||
"nllb-clip-base-siglip__v1",
|
||||
"nllb-clip-large-siglip__mrl",
|
||||
"nllb-clip-large-siglip__v1",
|
||||
"xlm-roberta-base-ViT-B-32__laion5b_s13b_b90k",
|
||||
"xlm-roberta-large-ViT-H-14__frozen_laion5b_s13b_b90k",
|
||||
]
|
||||
|
||||
insightface = [
|
||||
"antelopev2",
|
||||
"buffalo_l",
|
||||
"buffalo_m",
|
||||
"buffalo_s",
|
||||
]
|
||||
|
||||
|
||||
def export_models(models: list[str], source: ModelSource) -> None:
|
||||
for model in models:
|
||||
try:
|
||||
print(f"Exporting model {model}")
|
||||
subprocess.check_call(["python", "-m", "immich_model_exporter.export", model, source])
|
||||
except Exception as e:
|
||||
print(f"Failed to export model {model}: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
export_models(mclip, ModelSource.MCLIP)
|
||||
export_models(openclip, ModelSource.OPENCLIP)
|
||||
export_models(insightface, ModelSource.INSIGHTFACE)
|
|
@ -1,114 +0,0 @@
|
|||
import os
|
||||
import tempfile
|
||||
import warnings
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
import open_clip
|
||||
import torch
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from .util import get_model_path, save_config
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenCLIPModelConfig:
|
||||
name: str
|
||||
pretrained: str
|
||||
image_size: int = field(init=False)
|
||||
sequence_length: int = field(init=False)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
open_clip_cfg = open_clip.get_model_config(self.name)
|
||||
if open_clip_cfg is None:
|
||||
raise ValueError(f"Unknown model {self.name}")
|
||||
self.image_size = open_clip_cfg["vision_cfg"]["image_size"]
|
||||
self.sequence_length = open_clip_cfg["text_cfg"].get("context_length", 77)
|
||||
|
||||
|
||||
def to_onnx(
|
||||
model_cfg: OpenCLIPModelConfig,
|
||||
output_dir_visual: Path | str | None = None,
|
||||
output_dir_textual: Path | str | None = None,
|
||||
) -> tuple[Path | None, Path | None]:
|
||||
visual_path = None
|
||||
textual_path = None
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
model = open_clip.create_model(
|
||||
model_cfg.name,
|
||||
pretrained=model_cfg.pretrained,
|
||||
jit=False,
|
||||
cache_dir=os.environ.get("CACHE_DIR", tmpdir),
|
||||
require_pretrained=True,
|
||||
)
|
||||
|
||||
text_vision_cfg = open_clip.get_model_config(model_cfg.name)
|
||||
|
||||
model.eval()
|
||||
for param in model.parameters():
|
||||
param.requires_grad_(False)
|
||||
|
||||
if output_dir_visual is not None:
|
||||
output_dir_visual = Path(output_dir_visual)
|
||||
visual_path = get_model_path(output_dir_visual)
|
||||
|
||||
save_config(open_clip.get_model_preprocess_cfg(model), output_dir_visual / "preprocess_cfg.json")
|
||||
save_config(text_vision_cfg, output_dir_visual.parent / "config.json")
|
||||
export_image_encoder(model, model_cfg, visual_path)
|
||||
|
||||
if output_dir_textual is not None:
|
||||
output_dir_textual = Path(output_dir_textual)
|
||||
textual_path = get_model_path(output_dir_textual)
|
||||
|
||||
tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
|
||||
AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
|
||||
export_text_encoder(model, model_cfg, textual_path)
|
||||
return visual_path, textual_path
|
||||
|
||||
|
||||
def export_image_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None:
|
||||
output_path = Path(output_path)
|
||||
|
||||
def encode_image(image: torch.Tensor) -> torch.Tensor:
|
||||
output = model.encode_image(image, normalize=True)
|
||||
assert isinstance(output, torch.Tensor)
|
||||
return output
|
||||
|
||||
args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),)
|
||||
traced = torch.jit.trace(encode_image, args) # type: ignore[no-untyped-call]
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
torch.onnx.export(
|
||||
traced,
|
||||
args,
|
||||
output_path.as_posix(),
|
||||
input_names=["image"],
|
||||
output_names=["embedding"],
|
||||
opset_version=17,
|
||||
# dynamic_axes={"image": {0: "batch_size"}},
|
||||
)
|
||||
|
||||
|
||||
def export_text_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None:
|
||||
output_path = Path(output_path)
|
||||
|
||||
def encode_text(text: torch.Tensor) -> torch.Tensor:
|
||||
output = model.encode_text(text, normalize=True)
|
||||
assert isinstance(output, torch.Tensor)
|
||||
return output
|
||||
|
||||
args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),)
|
||||
traced = torch.jit.trace(encode_text, args) # type: ignore[no-untyped-call]
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
torch.onnx.export(
|
||||
traced,
|
||||
args,
|
||||
output_path.as_posix(),
|
||||
input_names=["text"],
|
||||
output_names=["embedding"],
|
||||
opset_version=17,
|
||||
# dynamic_axes={"text": {0: "batch_size"}},
|
||||
)
|
|
@ -1,49 +0,0 @@
|
|||
from pathlib import Path
|
||||
|
||||
import onnx
|
||||
import onnxruntime as ort
|
||||
import onnxsim
|
||||
|
||||
|
||||
def save_onnx(model: onnx.ModelProto, output_path: Path | str) -> None:
|
||||
try:
|
||||
onnx.save(model, output_path)
|
||||
except ValueError as e:
|
||||
if "The proto size is larger than the 2 GB limit." in str(e):
|
||||
onnx.save(model, output_path, save_as_external_data=True, size_threshold=1_000_000)
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
def optimize_onnxsim(model_path: Path | str, output_path: Path | str) -> None:
|
||||
model_path = Path(model_path)
|
||||
output_path = Path(output_path)
|
||||
model = onnx.load(model_path.as_posix())
|
||||
model, check = onnxsim.simplify(model)
|
||||
assert check, "Simplified ONNX model could not be validated"
|
||||
for file in model_path.parent.iterdir():
|
||||
if file.name.startswith("Constant") or "onnx" in file.name or file.suffix == ".weight":
|
||||
file.unlink()
|
||||
save_onnx(model, output_path)
|
||||
|
||||
|
||||
def optimize_ort(
|
||||
model_path: Path | str,
|
||||
output_path: Path | str,
|
||||
level: ort.GraphOptimizationLevel = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
|
||||
) -> None:
|
||||
model_path = Path(model_path)
|
||||
output_path = Path(output_path)
|
||||
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.graph_optimization_level = level
|
||||
sess_options.optimized_model_filepath = output_path.as_posix()
|
||||
|
||||
ort.InferenceSession(model_path.as_posix(), providers=["CPUExecutionProvider"], sess_options=sess_options)
|
||||
|
||||
|
||||
def optimize(model_path: Path | str) -> None:
|
||||
model_path = Path(model_path)
|
||||
|
||||
optimize_ort(model_path, model_path)
|
||||
optimize_onnxsim(model_path, model_path)
|
67
machine-learning/export/pyproject.toml
Normal file
67
machine-learning/export/pyproject.toml
Normal file
|
@ -0,0 +1,67 @@
|
|||
[project]
|
||||
name = "immich_model_exporter"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10, <4.0"
|
||||
dependencies = [
|
||||
"huggingface-hub>=0.29.3",
|
||||
"multilingual-clip>=1.0.10",
|
||||
"onnx>=1.14.1",
|
||||
"onnxruntime>=1.16.0",
|
||||
"open-clip-torch>=2.31.0",
|
||||
"typer>=0.15.2",
|
||||
"rknn-toolkit2>=2.3.0",
|
||||
"transformers>=4.49.0",
|
||||
"tenacity>=9.0.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
dev = ["black>=23.3.0", "mypy>=1.3.0", "ruff>=0.0.272"]
|
||||
|
||||
[tool.uv]
|
||||
override-dependencies = [
|
||||
"onnx>=1.16.0,<2",
|
||||
"onnxruntime>=1.18.2,<2",
|
||||
"torch>=2.4",
|
||||
"torchvision>=0.21",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
torch = [{ index = "pytorch-cpu" }]
|
||||
torchvision = [{ index = "pytorch-cpu" }]
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-cpu"
|
||||
url = "https://download.pytorch.org/whl/cpu"
|
||||
explicit = true
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
include = ["immich_model_exporter"]
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
include = ["immich_model_exporter"]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.12"
|
||||
follow_imports = "silent"
|
||||
warn_redundant_casts = true
|
||||
disallow_any_generics = true
|
||||
check_untyped_defs = true
|
||||
disallow_untyped_defs = true
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py312"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I"]
|
||||
|
||||
[tool.black]
|
||||
line-length = 120
|
||||
target-version = ['py312']
|
|
@ -1,113 +0,0 @@
|
|||
import gc
|
||||
import os
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
import torch
|
||||
from huggingface_hub import create_repo, upload_folder
|
||||
from models import mclip, openclip
|
||||
from models.optimize import optimize
|
||||
from rich.progress import Progress
|
||||
|
||||
models = [
|
||||
"M-CLIP/LABSE-Vit-L-14",
|
||||
"M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
|
||||
"M-CLIP/XLM-Roberta-Large-Vit-B-32",
|
||||
"M-CLIP/XLM-Roberta-Large-Vit-L-14",
|
||||
"RN101::openai",
|
||||
"RN101::yfcc15m",
|
||||
"RN50::cc12m",
|
||||
"RN50::openai",
|
||||
"RN50::yfcc15m",
|
||||
"RN50x16::openai",
|
||||
"RN50x4::openai",
|
||||
"RN50x64::openai",
|
||||
"ViT-B-16-SigLIP-256::webli",
|
||||
"ViT-B-16-SigLIP-384::webli",
|
||||
"ViT-B-16-SigLIP-512::webli",
|
||||
"ViT-B-16-SigLIP-i18n-256::webli",
|
||||
"ViT-B-16-SigLIP::webli",
|
||||
"ViT-B-16-plus-240::laion400m_e31",
|
||||
"ViT-B-16-plus-240::laion400m_e32",
|
||||
"ViT-B-16::laion400m_e31",
|
||||
"ViT-B-16::laion400m_e32",
|
||||
"ViT-B-16::openai",
|
||||
"ViT-B-32::laion2b-s34b-b79k",
|
||||
"ViT-B-32::laion2b_e16",
|
||||
"ViT-B-32::laion400m_e31",
|
||||
"ViT-B-32::laion400m_e32",
|
||||
"ViT-B-32::openai",
|
||||
"ViT-H-14-378-quickgelu::dfn5b",
|
||||
"ViT-H-14-quickgelu::dfn5b",
|
||||
"ViT-H-14::laion2b-s32b-b79k",
|
||||
"ViT-L-14-336::openai",
|
||||
"ViT-L-14-quickgelu::dfn2b",
|
||||
"ViT-L-14::laion2b-s32b-b82k",
|
||||
"ViT-L-14::laion400m_e31",
|
||||
"ViT-L-14::laion400m_e32",
|
||||
"ViT-L-14::openai",
|
||||
"ViT-L-16-SigLIP-256::webli",
|
||||
"ViT-L-16-SigLIP-384::webli",
|
||||
"ViT-SO400M-14-SigLIP-384::webli",
|
||||
"ViT-g-14::laion2b-s12b-b42k",
|
||||
"nllb-clip-base-siglip::mrl",
|
||||
"nllb-clip-base-siglip::v1",
|
||||
"nllb-clip-large-siglip::mrl",
|
||||
"nllb-clip-large-siglip::v1",
|
||||
"xlm-roberta-base-ViT-B-32::laion5b_s13b_b90k",
|
||||
"xlm-roberta-large-ViT-H-14::frozen_laion5b_s13b_b90k",
|
||||
]
|
||||
|
||||
# glob to delete old UUID blobs when reuploading models
|
||||
uuid_char = "[a-fA-F0-9]"
|
||||
uuid_glob = uuid_char * 8 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 12
|
||||
|
||||
# remote repo files to be deleted before uploading
|
||||
# deletion is in the same commit as the upload, so it's atomic
|
||||
delete_patterns = ["**/*onnx*", "**/Constant*", "**/*.weight", "**/*.bias", f"**/{uuid_glob}"]
|
||||
|
||||
with Progress() as progress:
|
||||
task = progress.add_task("[green]Exporting models...", total=len(models))
|
||||
token = os.environ.get("HF_AUTH_TOKEN")
|
||||
torch.backends.mha.set_fastpath_enabled(False)
|
||||
with TemporaryDirectory() as tmp:
|
||||
tmpdir = Path(tmp)
|
||||
for model in models:
|
||||
model_name = model.split("/")[-1].replace("::", "__")
|
||||
hf_model_name = model_name.replace("xlm-roberta-large", "XLM-Roberta-Large")
|
||||
hf_model_name = model_name.replace("xlm-roberta-base", "XLM-Roberta-Base")
|
||||
config_path = tmpdir / model_name / "config.json"
|
||||
|
||||
def export() -> None:
|
||||
progress.update(task, description=f"[green]Exporting {hf_model_name}")
|
||||
visual_dir = tmpdir / hf_model_name / "visual"
|
||||
textual_dir = tmpdir / hf_model_name / "textual"
|
||||
if model.startswith("M-CLIP"):
|
||||
visual_path, textual_path = mclip.to_onnx(model, visual_dir, textual_dir)
|
||||
else:
|
||||
name, _, pretrained = model_name.partition("__")
|
||||
config = openclip.OpenCLIPModelConfig(name, pretrained)
|
||||
visual_path, textual_path = openclip.to_onnx(config, visual_dir, textual_dir)
|
||||
progress.update(task, description=f"[green]Optimizing {hf_model_name} (visual)")
|
||||
optimize(visual_path)
|
||||
progress.update(task, description=f"[green]Optimizing {hf_model_name} (textual)")
|
||||
optimize(textual_path)
|
||||
|
||||
gc.collect()
|
||||
|
||||
def upload() -> None:
|
||||
progress.update(task, description=f"[yellow]Uploading {hf_model_name}")
|
||||
repo_id = f"immich-app/{hf_model_name}"
|
||||
|
||||
create_repo(repo_id, exist_ok=True)
|
||||
upload_folder(
|
||||
repo_id=repo_id,
|
||||
folder_path=tmpdir / hf_model_name,
|
||||
delete_patterns=delete_patterns,
|
||||
token=token,
|
||||
)
|
||||
|
||||
export()
|
||||
if token is not None:
|
||||
upload()
|
||||
progress.update(task, advance=1)
|
1395
machine-learning/export/uv.lock
generated
Normal file
1395
machine-learning/export/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
|
@ -51,6 +51,7 @@ cpu = ["onnxruntime>=1.15.0,<2"]
|
|||
cuda = ["onnxruntime-gpu>=1.17.0,<2"]
|
||||
openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
|
||||
armnn = ["onnxruntime>=1.15.0,<2"]
|
||||
rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
|
||||
|
||||
[tool.uv]
|
||||
compile-bytecode = true
|
||||
|
|
79
machine-learning/uv.lock
generated
79
machine-learning/uv.lock
generated
|
@ -1109,6 +1109,10 @@ cuda = [
|
|||
openvino = [
|
||||
{ name = "onnxruntime-openvino" },
|
||||
]
|
||||
rknn = [
|
||||
{ name = "onnxruntime" },
|
||||
{ name = "rknn-toolkit-lite2" },
|
||||
]
|
||||
|
||||
[package.dev-dependencies]
|
||||
dev = [
|
||||
|
@ -1162,6 +1166,7 @@ requires-dist = [
|
|||
{ name = "insightface", specifier = ">=0.7.3,<1.0" },
|
||||
{ name = "onnxruntime", marker = "extra == 'armnn'", specifier = ">=1.15.0,<2" },
|
||||
{ name = "onnxruntime", marker = "extra == 'cpu'", specifier = ">=1.15.0,<2" },
|
||||
{ name = "onnxruntime", marker = "extra == 'rknn'", specifier = ">=1.15.0,<2" },
|
||||
{ name = "onnxruntime-gpu", marker = "extra == 'cuda'", specifier = ">=1.17.0,<2", index = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/" },
|
||||
{ name = "onnxruntime-openvino", marker = "extra == 'openvino'", specifier = ">=1.17.1,<1.19.0" },
|
||||
{ name = "opencv-python-headless", specifier = ">=4.7.0.72,<5.0" },
|
||||
|
@ -1171,10 +1176,11 @@ requires-dist = [
|
|||
{ name = "pydantic-settings", specifier = ">=2.5.2,<3" },
|
||||
{ name = "python-multipart", specifier = ">=0.0.6,<1.0" },
|
||||
{ name = "rich", specifier = ">=13.4.2" },
|
||||
{ name = "rknn-toolkit-lite2", marker = "extra == 'rknn'", specifier = ">=2.3.0,<3" },
|
||||
{ name = "tokenizers", specifier = ">=0.15.0,<1.0" },
|
||||
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.22.0,<1.0" },
|
||||
]
|
||||
provides-extras = ["cpu", "cuda", "openvino", "armnn"]
|
||||
provides-extras = ["cpu", "cuda", "openvino", "armnn", "rknn"]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [
|
||||
|
@ -2131,6 +2137,77 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rknn-toolkit-lite2"
|
||||
version = "2.3.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "numpy" },
|
||||
{ name = "psutil" },
|
||||
{ name = "ruamel-yaml" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/77/6af374a4a8cd2aee762a1fb8a3050dcf3f129134bbdc4bb6bed755c4325b/rknn_toolkit_lite2-2.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b6733689bd09a262bcb6ba4744e690dd4b37ebeac4ed427cf45242c4b4ce9a4", size = 559372 },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/0c/76ff1eb09d09ce4394a6959d2343a321d28dd9e604348ffdafceafdc344c/rknn_toolkit_lite2-2.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3e4fefe355dc34a155680e4bcb9e4abb37ebc271f045ec9e0a4a3a018bc5beb", size = 569149 },
|
||||
{ url = "https://files.pythonhosted.org/packages/0d/6e/8679562028051b02312212defc6e8c07248953f10dd7ad506e941b575bf3/rknn_toolkit_lite2-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37394371d1561f470c553f39869d7c35ff93405dffe3d0d72babf297a2b0aee9", size = 527457 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruamel-yaml"
|
||||
version = "0.18.10"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "ruamel-yaml-clib", marker = "python_full_version < '3.13' and platform_python_implementation == 'CPython'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ea/46/f44d8be06b85bc7c4d8c95d658be2b68f27711f279bf9dd0612a5e4794f5/ruamel.yaml-0.18.10.tar.gz", hash = "sha256:20c86ab29ac2153f80a428e1254a8adf686d3383df04490514ca3b79a362db58", size = 143447 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/36/dfc1ebc0081e6d39924a2cc53654497f967a084a436bb64402dfce4254d9/ruamel.yaml-0.18.10-py3-none-any.whl", hash = "sha256:30f22513ab2301b3d2b577adc121c6471f28734d3d9728581245f1e76468b4f1", size = 117729 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruamel-yaml-clib"
|
||||
version = "0.2.12"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/20/84/80203abff8ea4993a87d823a5f632e4d92831ef75d404c9fc78d0176d2b5/ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f", size = 225315 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/70/57/40a958e863e299f0c74ef32a3bde9f2d1ea8d69669368c0c502a0997f57f/ruamel.yaml.clib-0.2.12-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:11f891336688faf5156a36293a9c362bdc7c88f03a8a027c2c1d8e0bcde998e5", size = 131301 },
|
||||
{ url = "https://files.pythonhosted.org/packages/98/a8/29a3eb437b12b95f50a6bcc3d7d7214301c6c529d8fdc227247fa84162b5/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969", size = 633728 },
|
||||
{ url = "https://files.pythonhosted.org/packages/35/6d/ae05a87a3ad540259c3ad88d71275cbd1c0f2d30ae04c65dcbfb6dcd4b9f/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd5415dded15c3822597455bc02bcd66e81ef8b7a48cb71a33628fc9fdde39df", size = 722230 },
|
||||
{ url = "https://files.pythonhosted.org/packages/7f/b7/20c6f3c0b656fe609675d69bc135c03aac9e3865912444be6339207b6648/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76", size = 686712 },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/11/d12dbf683471f888d354dac59593873c2b45feb193c5e3e0f2ebf85e68b9/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6", size = 663936 },
|
||||
{ url = "https://files.pythonhosted.org/packages/72/14/4c268f5077db5c83f743ee1daeb236269fa8577133a5cfa49f8b382baf13/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd", size = 696580 },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/fc/8cd12f189c6405a4c1cf37bd633aa740a9538c8e40497c231072d0fef5cf/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a", size = 663393 },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/29/c0a017b704aaf3cbf704989785cd9c5d5b8ccec2dae6ac0c53833c84e677/ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da", size = 100326 },
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/65/fa39d74db4e2d0cd252355732d966a460a41cd01c6353b820a0952432839/ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28", size = 118079 },
|
||||
{ url = "https://files.pythonhosted.org/packages/fb/8f/683c6ad562f558cbc4f7c029abcd9599148c51c54b5ef0f24f2638da9fbb/ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6", size = 132224 },
|
||||
{ url = "https://files.pythonhosted.org/packages/3c/d2/b79b7d695e2f21da020bd44c782490578f300dd44f0a4c57a92575758a76/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d84318609196d6bd6da0edfa25cedfbabd8dbde5140a0a23af29ad4b8f91fb1e", size = 641480 },
|
||||
{ url = "https://files.pythonhosted.org/packages/68/6e/264c50ce2a31473a9fdbf4fa66ca9b2b17c7455b31ef585462343818bd6c/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb43a269eb827806502c7c8efb7ae7e9e9d0573257a46e8e952f4d4caba4f31e", size = 739068 },
|
||||
{ url = "https://files.pythonhosted.org/packages/86/29/88c2567bc893c84d88b4c48027367c3562ae69121d568e8a3f3a8d363f4d/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52", size = 703012 },
|
||||
{ url = "https://files.pythonhosted.org/packages/11/46/879763c619b5470820f0cd6ca97d134771e502776bc2b844d2adb6e37753/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642", size = 704352 },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/80/ece7e6034256a4186bbe50dee28cd032d816974941a6abf6a9d65e4228a7/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2", size = 737344 },
|
||||
{ url = "https://files.pythonhosted.org/packages/f0/ca/e4106ac7e80efbabdf4bf91d3d32fc424e41418458251712f5672eada9ce/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3", size = 714498 },
|
||||
{ url = "https://files.pythonhosted.org/packages/67/58/b1f60a1d591b771298ffa0428237afb092c7f29ae23bad93420b1eb10703/ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4", size = 100205 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/4f/b52f634c9548a9291a70dfce26ca7ebce388235c93588a1068028ea23fcc/ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb", size = 118185 },
|
||||
{ url = "https://files.pythonhosted.org/packages/48/41/e7a405afbdc26af961678474a55373e1b323605a4f5e2ddd4a80ea80f628/ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632", size = 133433 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/b0/b850385604334c2ce90e3ee1013bd911aedf058a934905863a6ea95e9eb4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d", size = 647362 },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/d0/3f68a86e006448fb6c005aee66565b9eb89014a70c491d70c08de597f8e4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c", size = 754118 },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/a9/d39f3c5ada0a3bb2870d7db41901125dbe2434fa4f12ca8c5b83a42d7c53/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd", size = 706497 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/fa/097e38135dadd9ac25aecf2a54be17ddf6e4c23e43d538492a90ab3d71c6/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31", size = 698042 },
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/d5/a659ca6f503b9379b930f13bc6b130c9f176469b73b9834296822a83a132/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680", size = 745831 },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/5d/36619b61ffa2429eeaefaab4f3374666adf36ad8ac6330d855848d7d36fd/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d", size = 715692 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/82/85cb92f15a4231c89b95dfe08b09eb6adca929ef7df7e17ab59902b6f589/ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5", size = 98777 },
|
||||
{ url = "https://files.pythonhosted.org/packages/d7/8f/c3654f6f1ddb75daf3922c3d8fc6005b1ab56671ad56ffb874d908bfa668/ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4", size = 115523 },
|
||||
{ url = "https://files.pythonhosted.org/packages/29/00/4864119668d71a5fa45678f380b5923ff410701565821925c69780356ffa/ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a", size = 132011 },
|
||||
{ url = "https://files.pythonhosted.org/packages/7f/5e/212f473a93ae78c669ffa0cb051e3fee1139cb2d385d2ae1653d64281507/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475", size = 642488 },
|
||||
{ url = "https://files.pythonhosted.org/packages/1f/8f/ecfbe2123ade605c49ef769788f79c38ddb1c8fa81e01f4dbf5cf1a44b16/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef", size = 745066 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/a9/28f60726d29dfc01b8decdb385de4ced2ced9faeb37a847bd5cf26836815/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6", size = 701785 },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/7e/8e7ec45920daa7f76046578e4f677a3215fe8f18ee30a9cb7627a19d9b4c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf", size = 693017 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/b3/d650eaade4ca225f02a648321e1ab835b9d361c60d51150bac49063b83fa/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1", size = 741270 },
|
||||
{ url = "https://files.pythonhosted.org/packages/87/b8/01c29b924dcbbed75cc45b30c30d565d763b9c4d540545a0eeecffb8f09c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01", size = 709059 },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/8c/ed73f047a73638257aa9377ad356bea4d96125b305c34a28766f4445cc0f/ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6", size = 98583 },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/85/e8e751d8791564dd333d5d9a4eab0a7a115f7e349595417fd50ecae3395c/ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3", size = 115190 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
version = "0.9.9"
|
||||
|
|
|
@ -96,6 +96,18 @@ export const CLIP_MODEL_INFO: Record<string, ModelInfo> = {
|
|||
'ViT-SO400M-14-SigLIP-384__webli': { dimSize: 1152 },
|
||||
'nllb-clip-large-siglip__mrl': { dimSize: 1152 },
|
||||
'nllb-clip-large-siglip__v1': { dimSize: 1152 },
|
||||
'ViT-B-16-SigLIP2__webli': { dimSize: 768 },
|
||||
'ViT-B-32-SigLIP2-256__webli': { dimSize: 768 },
|
||||
'ViT-L-16-SigLIP2-256__webli': { dimSize: 1024 },
|
||||
'ViT-L-16-SigLIP2-384__webli': { dimSize: 1024 },
|
||||
'ViT-L-16-SigLIP2-512__webli': { dimSize: 1024 },
|
||||
'ViT-SO400M-14-SigLIP2__webli': { dimSize: 1152 },
|
||||
'ViT-SO400M-14-SigLIP2-378__webli': { dimSize: 1152 },
|
||||
'ViT-SO400M-16-SigLIP2-256__webli': { dimSize: 1152 },
|
||||
'ViT-SO400M-16-SigLIP2-384__webli': { dimSize: 1152 },
|
||||
'ViT-SO400M-16-SigLIP2-512__webli': { dimSize: 1152 },
|
||||
'ViT-gopt-16-SigLIP2-256__webli': { dimSize: 1536 },
|
||||
'ViT-gopt-16-SigLIP2-384__webli': { dimSize: 1536 },
|
||||
};
|
||||
|
||||
type SharpRotationData = {
|
||||
|
|
Loading…
Add table
Reference in a new issue