mirror of
https://github.com/immich-app/immich.git
synced 2025-01-07 00:50:23 -05:00
added locustfile (#2926)
This commit is contained in:
parent
9dd1d81536
commit
a58482cb2b
5 changed files with 1301 additions and 258 deletions
|
@ -11,3 +11,12 @@ Running `poetry install --no-root --with dev` will install everything you need i
|
||||||
|
|
||||||
To add or remove dependencies, you can use the commands `poetry add $PACKAGE_NAME` and `poetry remove $PACKAGE_NAME`, respectively.
|
To add or remove dependencies, you can use the commands `poetry add $PACKAGE_NAME` and `poetry remove $PACKAGE_NAME`, respectively.
|
||||||
Be sure to commit the `poetry.lock` and `pyproject.toml` files to reflect any changes in dependencies.
|
Be sure to commit the `poetry.lock` and `pyproject.toml` files to reflect any changes in dependencies.
|
||||||
|
|
||||||
|
|
||||||
|
# Load Testing
|
||||||
|
|
||||||
|
To measure inference throughput and latency, you can use [Locust](https://locust.io/) using the provided `locustfile.py`.
|
||||||
|
Locust works by querying the model endpoints and aggregating their statistics, meaning the app must be deployed.
|
||||||
|
You can run `load_test.sh` to automatically deploy the app locally and start Locust, optionally adjusting its env variables as needed.
|
||||||
|
|
||||||
|
Alternatively, for more custom testing, you may also run `locust` directly: see the [documentation](https://docs.locust.io/en/stable/index.html). Note that in Locust's jargon, concurrency is measured in `users`, and each user runs one task at a time. To achieve a particular per-endpoint concurrency, multiply that number by the number of endpoints to be queried. For example, if there are 3 endpoints and you want each of them to receive 8 requests at a time, you should set the number of users to 24.
|
24
machine-learning/load_test.sh
Executable file
24
machine-learning/load_test.sh
Executable file
|
@ -0,0 +1,24 @@
|
||||||
|
export MACHINE_LEARNING_CACHE_FOLDER=/tmp/model_cache
|
||||||
|
export MACHINE_LEARNING_MIN_FACE_SCORE=0.034 # returns 1 face per request; setting this to 0 blows up the number of faces to the thousands
|
||||||
|
export MACHINE_LEARNING_MIN_TAG_SCORE=0.0
|
||||||
|
export PID_FILE=/tmp/locust_pid
|
||||||
|
export LOG_FILE=/tmp/gunicorn.log
|
||||||
|
export HEADLESS=false
|
||||||
|
export HOST=127.0.0.1:3003
|
||||||
|
export CONCURRENCY=4
|
||||||
|
export NUM_ENDPOINTS=3
|
||||||
|
export PYTHONPATH=app
|
||||||
|
|
||||||
|
gunicorn app.main:app --worker-class uvicorn.workers.UvicornWorker \
|
||||||
|
--bind $HOST --daemon --error-logfile $LOG_FILE --pid $PID_FILE
|
||||||
|
while true ; do
|
||||||
|
echo "Loading models..."
|
||||||
|
sleep 5
|
||||||
|
if cat $LOG_FILE | grep -q -E "startup complete"; then break; fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# "users" are assigned only one task, so multiply concurrency by the number of tasks
|
||||||
|
locust --host http://$HOST --web-host 127.0.0.1 \
|
||||||
|
--run-time 120s --users $(($CONCURRENCY * $NUM_ENDPOINTS)) $(if $HEADLESS; then echo "--headless"; fi)
|
||||||
|
|
||||||
|
if [[ -e $PID_FILE ]]; then kill $(cat $PID_FILE); fi
|
52
machine-learning/locustfile.py
Normal file
52
machine-learning/locustfile.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from locust import HttpUser, events, task
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
@events.test_start.add_listener
|
||||||
|
def on_test_start(environment, **kwargs):
|
||||||
|
global byte_image
|
||||||
|
image = Image.new("RGB", (1000, 1000))
|
||||||
|
byte_image = BytesIO()
|
||||||
|
image.save(byte_image, format="jpeg")
|
||||||
|
|
||||||
|
|
||||||
|
class InferenceLoadTest(HttpUser):
|
||||||
|
abstract: bool = True
|
||||||
|
host = "http://127.0.0.1:3003"
|
||||||
|
data: bytes
|
||||||
|
headers: dict[str, str] = {"Content-Type": "image/jpg"}
|
||||||
|
|
||||||
|
# re-use the image across all instances in a process
|
||||||
|
def on_start(self):
|
||||||
|
global byte_image
|
||||||
|
self.data = byte_image.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
class ClassificationLoadTest(InferenceLoadTest):
|
||||||
|
@task
|
||||||
|
def classify(self):
|
||||||
|
self.client.post(
|
||||||
|
"/image-classifier/tag-image", data=self.data, headers=self.headers
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CLIPLoadTest(InferenceLoadTest):
|
||||||
|
@task
|
||||||
|
def encode_image(self):
|
||||||
|
self.client.post(
|
||||||
|
"/sentence-transformer/encode-image",
|
||||||
|
data=self.data,
|
||||||
|
headers=self.headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RecognitionLoadTest(InferenceLoadTest):
|
||||||
|
@task
|
||||||
|
def recognize(self):
|
||||||
|
self.client.post(
|
||||||
|
"/facial-recognition/detect-faces",
|
||||||
|
data=self.data,
|
||||||
|
headers=self.headers,
|
||||||
|
)
|
1472
machine-learning/poetry.lock
generated
1472
machine-learning/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -27,6 +27,8 @@ aiocache = "^0.12.1"
|
||||||
mypy = "^1.3.0"
|
mypy = "^1.3.0"
|
||||||
black = "^23.3.0"
|
black = "^23.3.0"
|
||||||
pytest = "^7.3.1"
|
pytest = "^7.3.1"
|
||||||
|
locust = "^2.15.1"
|
||||||
|
gunicorn = "^20.1.0"
|
||||||
|
|
||||||
[[tool.poetry.source]]
|
[[tool.poetry.source]]
|
||||||
name = "pytorch-cpu"
|
name = "pytorch-cpu"
|
||||||
|
|
Loading…
Reference in a new issue