services:
  vllm:
    image: ollama/rocm:latest
    container_name: contextforge-vllm
    ports:
      - "8000:8000"
    environment:
      - VLLM_API_KEY=${VLLM_API_KEY:-contextforge-local}
    command: >
      vllm serve Qwen/Qwen3.6-35B-A3B
      --enable-prefix-caching
      --enable-chunked-prefill
      --tensor-parallel-size 1
      --reasoning-parser qwen3
      --trust-remote-code
      --host 0.0.0.0
      --port 8000
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
    deploy:
      resources:
        reservations:
          devices:
            - driver: amd
              count: 1
              capabilities: [gpu]

  apohara:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: apohara
    ports:
      - "8001:8001"
    environment:
      - VLLM_BASE_URL=http://vllm:8000
      - VLLM_MODEL=Qwen/Qwen3.6-35B-A3B
      - CONTEXTFORGE_PORT=8001
    depends_on:
      vllm:
        condition: service_healthy
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
      interval: 30s
      timeout: 10s
      retries: 3

  gradio:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: apohara-ui
    ports:
      - "7860:7860"
    environment:
      - CONTEXTFORGE_PORT=8001
    depends_on:
      - apohara
    command: python demo/app.py

volumes:
  models: