2023-06-24 23:18:09 -04:00
|
|
|
from pathlib import Path
|
2023-06-27 17:01:24 -04:00
|
|
|
from typing import Any
|
2023-06-24 23:18:09 -04:00
|
|
|
|
|
|
|
from PIL.Image import Image
|
|
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
|
from ..schemas import ModelType
|
|
|
|
from .base import InferenceModel
|
|
|
|
|
|
|
|
|
|
|
|
class CLIPSTEncoder(InferenceModel):
|
|
|
|
_model_type = ModelType.CLIP
|
|
|
|
|
2023-06-27 17:01:24 -04:00
|
|
|
def load(self, **model_kwargs: Any) -> None:
|
2023-06-24 23:18:09 -04:00
|
|
|
self.model = SentenceTransformer(
|
|
|
|
self.model_name,
|
|
|
|
cache_folder=self.cache_dir.as_posix(),
|
|
|
|
**model_kwargs,
|
|
|
|
)
|
|
|
|
|
|
|
|
def predict(self, image_or_text: Image | str) -> list[float]:
|
|
|
|
return self.model.encode(image_or_text).tolist()
|