Spaces:
Sleeping
Sleeping
| services: | |
| avp-rag: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile.full | |
| ports: | |
| - "8000:8000" | |
| env_file: | |
| - .env | |
| volumes: | |
| - ./data:/app/data | |
| depends_on: | |
| vllm: | |
| condition: service_healthy | |
| required: false | |
| vllm: | |
| image: vllm/vllm-openai:latest | |
| profiles: | |
| - vllm | |
| ports: | |
| - "8080:8080" | |
| environment: | |
| - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-} | |
| volumes: | |
| - huggingface_cache:/root/.cache/huggingface | |
| command: > | |
| --model Qwen/Qwen3-8B | |
| --port 8080 | |
| --max-model-len 8192 | |
| --gpu-memory-utilization 0.90 | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |
| healthcheck: | |
| test: ["CMD", "python3", "-c", | |
| "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"] | |
| interval: 30s | |
| timeout: 10s | |
| start_period: 300s | |
| retries: 5 | |
| volumes: | |
| huggingface_cache: | |