From ae80def7f2f22dfb8893b4efa107c4f439513b09 Mon Sep 17 00:00:00 2001 From: mertalev <101130780+mertalev@users.noreply.github.com> Date: Sun, 12 Nov 2023 18:01:12 -0500 Subject: [PATCH] export cli --- machine-learning/export/__init__.py | 0 machine-learning/export/models/constants.py | 9 + machine-learning/export/models/mclip.py | 13 +- machine-learning/export/models/util.py | 7 + machine-learning/export/run.py | 177 +++++++++++++------- 5 files changed, 135 insertions(+), 71 deletions(-) create mode 100644 machine-learning/export/__init__.py create mode 100644 machine-learning/export/models/constants.py diff --git a/machine-learning/export/__init__.py b/machine-learning/export/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/machine-learning/export/models/constants.py b/machine-learning/export/models/constants.py new file mode 100644 index 0000000000..be062b8643 --- /dev/null +++ b/machine-learning/export/models/constants.py @@ -0,0 +1,9 @@ +from export.models.openclip import OpenCLIPModelConfig + + +MCLIP_TO_OPENCLIP = { + "XLM-Roberta-Large-Vit-B-32": OpenCLIPModelConfig("ViT-B-32", "openai"), + "XLM-Roberta-Large-Vit-B-16Plus": OpenCLIPModelConfig("ViT-B-16-plus-240", "laion400m_e32"), + "LABSE-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"), + "XLM-Roberta-Large-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"), +} diff --git a/machine-learning/export/models/mclip.py b/machine-learning/export/models/mclip.py index 565539016a..80ca1579e0 100644 --- a/machine-learning/export/models/mclip.py +++ b/machine-learning/export/models/mclip.py @@ -1,22 +1,15 @@ import tempfile import warnings from pathlib import Path +from export.models.constants import MCLIP_TO_OPENCLIP import torch from multilingual_clip.pt_multilingual_clip import MultilingualCLIP from transformers import AutoTokenizer -from .openclip import OpenCLIPModelConfig from .openclip import to_onnx as openclip_to_onnx from .optimize import optimize -from .util import get_model_path - -_MCLIP_TO_OPENCLIP = { - "M-CLIP/XLM-Roberta-Large-Vit-B-32": OpenCLIPModelConfig("ViT-B-32", "openai"), - "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus": OpenCLIPModelConfig("ViT-B-16-plus-240", "laion400m_e32"), - "M-CLIP/LABSE-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"), - "M-CLIP/XLM-Roberta-Large-Vit-L-14": OpenCLIPModelConfig("ViT-L-14", "openai"), -} +from .util import get_model_path, clean_name def to_onnx( @@ -33,7 +26,7 @@ def to_onnx( param.requires_grad_(False) export_text_encoder(model, textual_path) - openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual) + openclip_to_onnx(MCLIP_TO_OPENCLIP[clean_name(model_name)], output_dir_visual) optimize(textual_path) diff --git a/machine-learning/export/models/util.py b/machine-learning/export/models/util.py index 67e582af46..585fa824da 100644 --- a/machine-learning/export/models/util.py +++ b/machine-learning/export/models/util.py @@ -3,6 +3,9 @@ from pathlib import Path from typing import Any +_clean_name = str.maketrans(":\\/", "___", ".") + + def get_model_path(output_dir: Path | str) -> Path: output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) @@ -13,3 +16,7 @@ def save_config(config: Any, output_path: Path | str) -> None: output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) json.dump(config, output_path.open("w")) + + +def clean_name(model_name: str) -> str: + return model_name.split("/")[-1].translate(_clean_name) diff --git a/machine-learning/export/run.py b/machine-learning/export/run.py index 5ce32189e2..d2810a2455 100644 --- a/machine-learning/export/run.py +++ b/machine-learning/export/run.py @@ -1,76 +1,131 @@ +from enum import StrEnum import gc import os from pathlib import Path from tempfile import TemporaryDirectory +from typing import Optional -from huggingface_hub import create_repo, login, upload_folder -from models import mclip, openclip +from huggingface_hub import create_repo, upload_folder +from export.models import mclip, openclip, insightface +from export.models.util import clean_name from rich.progress import Progress +import typer -models = [ - "RN50::openai", - "RN50::yfcc15m", - "RN50::cc12m", - "RN101::openai", - "RN101::yfcc15m", - "RN50x4::openai", - "RN50x16::openai", - "RN50x64::openai", - "ViT-B-32::openai", - "ViT-B-32::laion2b_e16", - "ViT-B-32::laion400m_e31", - "ViT-B-32::laion400m_e32", - "ViT-B-32::laion2b-s34b-b79k", - "ViT-B-16::openai", - "ViT-B-16::laion400m_e31", - "ViT-B-16::laion400m_e32", - "ViT-B-16-plus-240::laion400m_e31", - "ViT-B-16-plus-240::laion400m_e32", - "ViT-L-14::openai", - "ViT-L-14::laion400m_e31", - "ViT-L-14::laion400m_e32", - "ViT-L-14::laion2b-s32b-b82k", - "ViT-L-14-336::openai", - "ViT-H-14::laion2b-s32b-b79k", - "ViT-g-14::laion2b-s12b-b42k", - "M-CLIP/LABSE-Vit-L-14", - "M-CLIP/XLM-Roberta-Large-Vit-B-32", - "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus", - "M-CLIP/XLM-Roberta-Large-Vit-L-14", -] -login(token=os.environ["HF_AUTH_TOKEN"]) +app = typer.Typer() -with Progress() as progress: - task1 = progress.add_task("[green]Exporting models...", total=len(models)) - task2 = progress.add_task("[yellow]Uploading models...", total=len(models)) - with TemporaryDirectory() as tmp: - tmpdir = Path(tmp) - for model in models: - model_name = model.split("/")[-1].replace("::", "__") - config_path = tmpdir / model_name / "config.json" +class ModelLibrary(StrEnum): + MCLIP = "mclip" + OPENCLIP = "openclip" + INSIGHTFACE = "insightface" - def upload() -> None: - progress.update(task2, description=f"[yellow]Uploading {model_name}") - repo_id = f"immich-app/{model_name}" - create_repo(repo_id, exist_ok=True) - upload_folder(repo_id=repo_id, folder_path=tmpdir / model_name) - progress.update(task2, advance=1) +def _export(model_name: str, library: ModelLibrary, export_dir: Path) -> None: + visual_dir = export_dir / "visual" + textual_dir = export_dir / "textual" + match library: + case ModelLibrary.MCLIP: + insightface.to_onnx(model_name, visual_dir, textual_dir) + case ModelLibrary.OPENCLIP: + mclip.to_onnx(model_name, visual_dir, textual_dir) + case ModelLibrary.INSIGHTFACE: + name, _, pretrained = model_name.partition("__") + openclip.to_onnx(openclip.OpenCLIPModelConfig(name, pretrained), visual_dir, textual_dir) - def export() -> None: - progress.update(task1, description=f"[green]Exporting {model_name}") - visual_dir = tmpdir / model_name / "visual" - textual_dir = tmpdir / model_name / "textual" - if model.startswith("M-CLIP"): - mclip.to_onnx(model, visual_dir, textual_dir) - else: - name, _, pretrained = model_name.partition("__") - openclip.to_onnx(openclip.OpenCLIPModelConfig(name, pretrained), visual_dir, textual_dir) + gc.collect() - progress.update(task1, advance=1) - gc.collect() - export() - upload() +def _upload(repo_id: str, upload_dir: Path, auth_token: str | None = os.environ.get("HF_AUTH_TOKEN", None)) -> None: + create_repo(repo_id, exist_ok=True, token=auth_token) + upload_folder(repo_id=repo_id, folder_path=upload_dir, token=auth_token) + + +@app.command() +def export( + models: list[str] = typer.Argument( + ..., help="The model(s) to be exported. Model names should be the same as used in the associated library." + ), + library: ModelLibrary = typer.Option( + ..., "--library", "-l", help="The library associated with the models to be exported." + ), + output_dir: Optional[Path] = typer.Option( + None, + "--output-dir", + "-o", + help="Directory where exported models will be stored. Defaults to a temporary directory.", + ), + should_upload: bool = typer.Option(False, "--upload", "-u", help="Whether to upload the exported models."), + auth_token: Optional[str] = typer.Option( + os.environ.get("HF_AUTH_TOKEN", None), + "--auth_token", + "-t", + help="If uploading models to Hugging Face, the auth token of the user or organisation.", + ), + repo_prefix: str = typer.Option( + "immich-app", + "--repo_prefix", + "-p", + help="If uploading models to Hugging Face, the prefix to put before the model name. Can be a username or organisation.", + ), +) -> None: + if not models: + raise ValueError("No models specified") + + with Progress() as progress: + task1 = progress.add_task("[green]Exporting model(s)...", total=len(models)) + + with TemporaryDirectory() as tmp: + output_dir = output_dir if output_dir else Path(tmp) + for model_name in models: + cleaned_name = clean_name(model_name) + model_dir = output_dir / cleaned_name + progress.update(task1, description=f"[green]Exporting {cleaned_name}") + _export(model_name, library, model_dir) + progress.update(task1, advance=1, description=f"[green]Exported {cleaned_name}") + + if should_upload: + upload(models, output_dir, auth_token, repo_prefix) + + +@app.command() +def upload( + models: list[str] = typer.Argument( + ..., help="The model(s) to be uploaded. Model names should be the same as used in the associated library." + ), + output_dir: Optional[Path] = typer.Option( + None, + "--output-dir", + "-o", + help="Directory where exported models will be stored. Defaults to a temporary directory.", + ), + auth_token: Optional[str] = typer.Option( + os.environ.get("HF_AUTH_TOKEN", None), + "--auth_token", + "-t", + help="The Hugging Face auth token of the user or organisation.", + ), + repo_prefix: str = typer.Option( + "immich-app", + "--repo_prefix", + "-p", + help="The name to put before the model name to form the Hugging Face repo name. Can be a username or organisation.", + ), +) -> None: + if not models: + raise ValueError("No models specified") + + with Progress() as progress: + task2 = progress.add_task("[yellow]Uploading models...", total=len(models)) + for model_name in models: + cleaned_name = clean_name(model_name) + repo_id = f"{repo_prefix}/{cleaned_name}" + model_dir = output_dir / cleaned_name + + progress.update(task2, description=f"[yellow]Uploading {cleaned_name}") + _upload(repo_id, model_dir, auth_token) + progress.update(task2, advance=1, description=f"[yellow]Uploaded {cleaned_name}") + + +if __name__ == "__main__": + app()