services:
  llama:
    image: ghcr.io/ggml-org/llama.cpp:server
    volumes:
      - ./finetune/models/qwen-base-run/ckpt-001.gguf:/models/model.gguf:ro
    command: >
      -m /models/model.gguf
      --port 9000
      --host 0.0.0.0
      --ctx-size 2048
      -t 4
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9000/health"]
      interval: 10s
      timeout: 5s
      retries: 30
      start_period: 30s

  app:
    build:
      context: .
      dockerfile: Dockerfile.compose
    volumes:
      - ./data:/data:ro
    environment:
      GAZET_DATA_DIR: /data
      LLAMA_SERVER_URL: http://llama:9000
    ports:
      - "8000:8000"
    command: uvicorn gazet.api:app --host 0.0.0.0 --port 8000
    depends_on:
      llama:
        condition: service_healthy

  demo:
    build:
      context: .
      dockerfile: Dockerfile.compose
    environment:
      GAZET_API_URL: http://app:8000
    ports:
      - "8501:8501"
    command: streamlit run gazet_demo.py --server.port 8501 --server.address 0.0.0.0
    depends_on:
      - app