version: '3.8' services: mk-llm-api: build: . environment: - MODEL_PATH=${MODEL_PATH:-./models/mistral-finetuned-mk} - MODEL_ID=${MODEL_ID:-mk-llm} - LOAD_IN_4BIT=${LOAD_IN_4BIT:-false} - LOAD_IN_8BIT=${LOAD_IN_8BIT:-false} - TORCH_DTYPE=${TORCH_DTYPE:-float16} - HOST=0.0.0.0 - PORT=8000 ports: - "8000:8000" deploy: resources: reservations: devices: - capabilities: [gpu]