Spaces:

Aaryan17
/

MAC

Sleeping

File size: 8,048 Bytes

0e76632

# ═══════════════════════════════════════════════════════════
#  MAC — MBM AI Cloud  |  Local Server Setup (12GB GPU)
# ═══════════════════════════════════════════════════════════
#  RTX 3060 12GB VRAM — single model at a time strategy.
#  GPU: Qwen2.5-7B chat/code      ~ 5GB (gpu_memory_utilization=0.45)
#  CPU: Whisper STT + Piper TTS   ~ 1.5GB RAM (no VRAM)
#  Infra: PostgreSQL + Redis + Nginx + Qdrant + SearXNG
# ═══════════════════════════════════════════════════════════

services:

  # ── MAC API Server ──────────────────────────────────────
  mac:
    build: .
    container_name: mac-api
    ports:
      - "${APP_HOST:-0.0.0.0}:8001:8000"
    env_file: .env
    environment:
      - DATABASE_URL=postgresql+asyncpg://mac:mac_password@postgres:5432/mac_db
      - REDIS_URL=redis://redis:6379/0
      - VLLM_BASE_URL=http://vllm-speed:8001
      - VLLM_SPEED_URL=http://vllm-speed:8001
      - VLLM_CODE_URL=http://vllm-speed:8001
      - VLLM_REASONING_URL=http://vllm-speed:8001
      - VLLM_INTELLIGENCE_URL=http://vllm-speed:8001
      - WHISPER_URL=http://whisper:8000
      - TTS_URL=http://tts:8000
      - EMBEDDING_URL=http://vllm-speed:8001
      - QDRANT_URL=http://qdrant:6333
      - SEARXNG_URL=http://searxng:8080
      - MAC_ENABLED_MODELS=qwen2.5:7b,whisper-small,tts-piper
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    restart: unless-stopped
    networks:
      - mac-net

  # ═══════════════════════════════════════════════════════
  #  vLLM GPU INFERENCE — Single model for 12GB GPU
  # ═══════════════════════════════════════════════════════

  # ── Speed Model: Qwen2.5-7B (handles ALL chat/code/general) ──
  vllm-speed:
    image: vllm/vllm-openai:latest
    container_name: mac-vllm-speed
    ports:
      - "${VLLM_SPEED_PORT:-8001}:${VLLM_SPEED_PORT:-8001}"
    environment:
      - HF_HOME=/root/.cache/huggingface
    volumes:
      - hf-cache:/root/.cache/huggingface
    command: >
      --model ${VLLM_SPEED_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ}
      --port ${VLLM_SPEED_PORT:-8001}
      --gpu-memory-utilization 0.85
      --max-model-len 8192
      --trust-remote-code
      --enforce-eager
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped
    networks:
      - mac-net

  # ── Code/Reasoning/Intelligence models DISABLED (12GB GPU) ──
  # Uncomment when upgrading to 24GB+ GPU
  # vllm-code:
  #   ...
  # vllm-reason:
  #   ...
  # vllm-intel:
  #   ...

  # ═══════════════════════════════════════════════════════
  #  SPEECH & AUDIO SERVICES (CPU — saves GPU for LLM)
  # ═══════════════════════════════════════════════════════

  # ── Whisper — Speech-to-Text (CPU mode) ────────────────
  whisper:
    image: fedirz/faster-whisper-server:latest-cpu
    container_name: mac-whisper
    ports:
      - "${WHISPER_PORT:-8005}:8000"
    environment:
      - WHISPER__MODEL=${WHISPER_MODEL:-Systran/faster-whisper-small}
      - WHISPER__DEVICE=cpu
    restart: unless-stopped
    networks:
      - mac-net

  # ── Piper TTS — Text-to-Speech (CPU, lightweight) ─────
  # TEMPORARILY DISABLED — image still downloading on slow WiFi
  # tts:
  #   image: ghcr.io/matatonic/openedai-speech:latest
  #   container_name: mac-tts
  #   ports:
  #     - "${TTS_PORT:-8006}:8000"
  #   volumes:
  #     - tts-voices:/app/voices
  #   restart: unless-stopped
  #   networks:
  #     - mac-net

  # ═══════════════════════════════════════════════════════
  #  INFRASTRUCTURE SERVICES
  # ═══════════════════════════════════════════════════════

  # ── PostgreSQL — Persistent data store ─────────────────
  postgres:
    image: postgres:16-alpine
    container_name: mac-postgres
    environment:
      POSTGRES_USER: mac
      POSTGRES_PASSWORD: mac_password
      POSTGRES_DB: mac_db
    ports:
      - "5433:5432"
    volumes:
      - pgdata:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U mac -d mac_db"]
      interval: 5s
      timeout: 5s
      retries: 5
    restart: unless-stopped
    networks:
      - mac-net

  # ── pgAdmin — PostgreSQL admin UI (local-only by default) ──
  pgadmin:
    image: dpage/pgadmin4:8
    container_name: mac-pgadmin
    ports:
      - "127.0.0.1:${PGADMIN_PORT:-5051}:80"
    environment:
      PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@mbm.ac.in}
      PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-ChangeThisStrongPassword!}
      PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "True"
    depends_on:
      postgres:
        condition: service_healthy
    volumes:
      - pgadmin-data:/var/lib/pgadmin
    restart: unless-stopped
    networks:
      - mac-net

  # ── Redis — Rate limiting & caching ────────────────────
  redis:
    image: redis:7-alpine
    container_name: mac-redis
    ports:
      - "6380:6379"
    volumes:
      - redisdata:/data
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 5s
      timeout: 5s
      retries: 5
    restart: unless-stopped
    networks:
      - mac-net

  # ── Nginx — Reverse proxy + SvelteKit frontend ─────────
  nginx:
    image: nginx:alpine
    container_name: mac-nginx
    ports:
      - "${APP_HOST:-0.0.0.0}:${APP_PORT:-80}:80"
    volumes:
      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./frontend/build:/app:ro          # SvelteKit static build output
    depends_on:
      - mac
    restart: unless-stopped
    networks:
      - mac-net

  # ── Qdrant — Vector DB for RAG ─────────────────────────
  qdrant:
    image: qdrant/qdrant:latest
    container_name: mac-qdrant
    ports:
      - "6333:6333"
    volumes:
      - qdrantdata:/qdrant/storage
    restart: unless-stopped
    networks:
      - mac-net

  # ── SearXNG — Self-hosted web search ───────────────────
  searxng:
    image: searxng/searxng:latest
    container_name: mac-searxng
    ports:
      - "8888:8080"
    environment:
      - SEARXNG_BASE_URL=http://localhost:8888/
    volumes:
      - searxngdata:/etc/searxng
    restart: unless-stopped
    networks:
      - mac-net

volumes:
  pgdata:
  pgadmin-data:
  redisdata:
  qdrantdata:
  searxngdata:
  hf-cache:      # Shared HuggingFace model cache across all vLLM instances
  tts-voices:    # Persisted TTS voice models

networks:
  mac-net:
    driver: bridge