version: '3.8'

services:
  inference:
    build:
      context: .
      dockerfile: Dockerfile
    environment:
      - API_BASE_URL=${API_BASE_URL:-https://router.huggingface.co/v1}
      - MODEL_NAME=${MODEL_NAME:-Qwen/Qwen2.5-72B-Instruct}
      - HF_TOKEN=${HF_TOKEN}
      - TEMPERATURE=${TEMPERATURE:-0.7}
      - MAX_TOKENS=${MAX_TOKENS:-200}
      - MAX_STEPS=${MAX_STEPS:-12}
    volumes:
      - ./logs:/app/logs
      - ./.env:/app/.env:ro
    ports:
      - "8000:8000"
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "python", "-c", "import sys; sys.exit(0)"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

  # Optional: Local LLM service (LM Studio compatible)
  # Uncomment to use local LLM instead of API
  # lm_studio:
  #   image: ubuntu:22.04
  #   ports:
  #     - "1234:1234"
  #   command: /bin/bash -c "echo 'Run LM Studio on your machine and connect to it'"