Spaces:
Paused
Paused
| # Docker Compose配置文件 - GPU部署 | |
| version: '3.8' | |
| services: | |
| adaptive-rag: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile.gpu | |
| container_name: adaptive-rag-gpu | |
| restart: unless-stopped | |
| environment: | |
| - CUDA_VISIBLE_DEVICES=0 | |
| - PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 | |
| - TOKENIZERS_PARALLELISM=false | |
| - HF_HOME=/app/models | |
| - TRANSFORMERS_CACHE=/app/models | |
| env_file: | |
| - .env | |
| ports: | |
| - "8000:8000" | |
| - "8001:8001" # 可选:监控端口 | |
| volumes: | |
| - ./data:/app/data | |
| - ./models:/app/models | |
| - ./logs:/app/logs | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |
| depends_on: | |
| - ollama | |
| ollama: | |
| image: ollama/ollama:latest | |
| container_name: ollama-gpu | |
| restart: unless-stopped | |
| ports: | |
| - "11434:11434" | |
| volumes: | |
| - ollama-data:/root/.ollama | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |
| command: ["ollama", "serve"] | |
| # 可选:监控服务 | |
| nvidia-smi-exporter: | |
| image: mindprince/nvidia_gpu_prometheus_exporter:0.1 | |
| container_name: gpu-monitor | |
| restart: unless-stopped | |
| ports: | |
| - "9445:9445" | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |
| volumes: | |
| ollama-data: | |
| driver: local |