mirror of
https://github.com/immich-app/immich.git
synced 2025-01-07 00:50:23 -05:00
34201be74c
* using pydantic BaseSetting * ML API takes image file as input * keeping image in memory * reducing duplicate code * using bytes instead of UploadFile & other small code improvements * removed form-multipart, using HTTP body * format code --------- Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
119 lines
3.4 KiB
Python
119 lines
3.4 KiB
Python
import torch
|
|
from insightface.app import FaceAnalysis
|
|
from pathlib import Path
|
|
|
|
from transformers import pipeline, Pipeline
|
|
from sentence_transformers import SentenceTransformer
|
|
from typing import Any, BinaryIO
|
|
import cv2 as cv
|
|
import numpy as np
|
|
from PIL import Image
|
|
from config import settings
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
def get_model(model_name: str, model_type: str, **model_kwargs):
|
|
"""
|
|
Instantiates the specified model.
|
|
|
|
Args:
|
|
model_name: Name of model in the model hub used for the task.
|
|
model_type: Model type or task, which determines which model zoo is used.
|
|
`facial-recognition` uses Insightface, while all other models use the HF Model Hub.
|
|
|
|
Options:
|
|
`image-classification`, `clip`,`facial-recognition`, `tokenizer`, `processor`
|
|
|
|
Returns:
|
|
model: The requested model.
|
|
"""
|
|
|
|
cache_dir = _get_cache_dir(model_name, model_type)
|
|
match model_type:
|
|
case "facial-recognition":
|
|
model = _load_facial_recognition(
|
|
model_name, cache_dir=cache_dir, **model_kwargs
|
|
)
|
|
case "clip":
|
|
model = SentenceTransformer(
|
|
model_name, cache_folder=cache_dir, **model_kwargs
|
|
)
|
|
case _:
|
|
model = pipeline(
|
|
model_type,
|
|
model_name,
|
|
model_kwargs={"cache_dir": cache_dir, **model_kwargs},
|
|
)
|
|
|
|
return model
|
|
|
|
|
|
def run_classification(
|
|
model: Pipeline, image: Image, min_score: float | None = None
|
|
):
|
|
predictions: list[dict[str, Any]] = model(image) # type: ignore
|
|
result = {
|
|
tag
|
|
for pred in predictions
|
|
for tag in pred["label"].split(", ")
|
|
if min_score is None or pred["score"] >= min_score
|
|
}
|
|
|
|
return list(result)
|
|
|
|
|
|
def run_facial_recognition(
|
|
model: FaceAnalysis, image: bytes
|
|
) -> list[dict[str, Any]]:
|
|
file_bytes = np.frombuffer(image, dtype=np.uint8)
|
|
img = cv.imdecode(file_bytes, cv.IMREAD_COLOR)
|
|
height, width, _ = img.shape
|
|
results = []
|
|
faces = model.get(img)
|
|
|
|
for face in faces:
|
|
x1, y1, x2, y2 = face.bbox
|
|
|
|
results.append(
|
|
{
|
|
"imageWidth": width,
|
|
"imageHeight": height,
|
|
"boundingBox": {
|
|
"x1": round(x1),
|
|
"y1": round(y1),
|
|
"x2": round(x2),
|
|
"y2": round(y2),
|
|
},
|
|
"score": face.det_score.item(),
|
|
"embedding": face.normed_embedding.tolist(),
|
|
}
|
|
)
|
|
return results
|
|
|
|
|
|
def _load_facial_recognition(
|
|
model_name: str,
|
|
min_face_score: float | None = None,
|
|
cache_dir: Path | str | None = None,
|
|
**model_kwargs,
|
|
):
|
|
if cache_dir is None:
|
|
cache_dir = _get_cache_dir(model_name, "facial-recognition")
|
|
if isinstance(cache_dir, Path):
|
|
cache_dir = cache_dir.as_posix()
|
|
if min_face_score is None:
|
|
min_face_score = settings.min_face_score
|
|
|
|
model = FaceAnalysis(
|
|
name=model_name,
|
|
root=cache_dir,
|
|
allowed_modules=["detection", "recognition"],
|
|
**model_kwargs,
|
|
)
|
|
model.prepare(ctx_id=0, det_thresh=min_face_score, det_size=(640, 640))
|
|
return model
|
|
|
|
|
|
def _get_cache_dir(model_name: str, model_type: str) -> Path:
|
|
return Path(settings.cache_folder, device, model_type, model_name)
|