| triton: | |
| docker run --rm -d \ | |
| --name dinov2_vits14_triton \ | |
| -p 8000:8000 --gpus "device=0" -p 8001:8001 -p 8002:8002 \ | |
| -v $(PWD)/model_repository:/models \ | |
| nvcr.io/nvidia/tritonserver:23.04-py3 \ | |
| tritonserver --model-repository=/models | |
| perf: | |
| docker run --gpus all --rm -it --net host nvcr.io/nvidia/tritonserver:23.04-py3-sdk perf_analyzer -m dinov2_vits14 --percentile=95 -i grpc -u 0.0.0.0:8001 --concurrency-range 16:16 --shape input:3,280,280 | |