version: '3.8' services: inference: build: context: . dockerfile: Dockerfile environment: - API_BASE_URL=${API_BASE_URL:-https://router.huggingface.co/v1} - MODEL_NAME=${MODEL_NAME:-Qwen/Qwen2.5-72B-Instruct} - HF_TOKEN=${HF_TOKEN} - TEMPERATURE=${TEMPERATURE:-0.7} - MAX_TOKENS=${MAX_TOKENS:-200} - MAX_STEPS=${MAX_STEPS:-12} volumes: - ./logs:/app/logs - ./.env:/app/.env:ro ports: - "8000:8000" restart: unless-stopped healthcheck: test: ["CMD", "python", "-c", "import sys; sys.exit(0)"] interval: 30s timeout: 10s retries: 3 start_period: 40s # Optional: Local LLM service (LM Studio compatible) # Uncomment to use local LLM instead of API # lm_studio: # image: ubuntu:22.04 # ports: # - "1234:1234" # command: /bin/bash -c "echo 'Run LM Studio on your machine and connect to it'"