MAC / docker-compose.yml
Aaryan17's picture
chore: upload MAC codebase to HF Space
0e76632 verified
# ═══════════════════════════════════════════════════════════
# MAC β€” MBM AI Cloud | Local Server Setup (12GB GPU)
# ═══════════════════════════════════════════════════════════
# RTX 3060 12GB VRAM β€” single model at a time strategy.
# GPU: Qwen2.5-7B chat/code ~ 5GB (gpu_memory_utilization=0.45)
# CPU: Whisper STT + Piper TTS ~ 1.5GB RAM (no VRAM)
# Infra: PostgreSQL + Redis + Nginx + Qdrant + SearXNG
# ═══════════════════════════════════════════════════════════
services:
# ── MAC API Server ──────────────────────────────────────
mac:
build: .
container_name: mac-api
ports:
- "${APP_HOST:-0.0.0.0}:8001:8000"
env_file: .env
environment:
- DATABASE_URL=postgresql+asyncpg://mac:mac_password@postgres:5432/mac_db
- REDIS_URL=redis://redis:6379/0
- VLLM_BASE_URL=http://vllm-speed:8001
- VLLM_SPEED_URL=http://vllm-speed:8001
- VLLM_CODE_URL=http://vllm-speed:8001
- VLLM_REASONING_URL=http://vllm-speed:8001
- VLLM_INTELLIGENCE_URL=http://vllm-speed:8001
- WHISPER_URL=http://whisper:8000
- TTS_URL=http://tts:8000
- EMBEDDING_URL=http://vllm-speed:8001
- QDRANT_URL=http://qdrant:6333
- SEARXNG_URL=http://searxng:8080
- MAC_ENABLED_MODELS=qwen2.5:7b,whisper-small,tts-piper
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
networks:
- mac-net
# ═══════════════════════════════════════════════════════
# vLLM GPU INFERENCE β€” Single model for 12GB GPU
# ═══════════════════════════════════════════════════════
# ── Speed Model: Qwen2.5-7B (handles ALL chat/code/general) ──
vllm-speed:
image: vllm/vllm-openai:latest
container_name: mac-vllm-speed
ports:
- "${VLLM_SPEED_PORT:-8001}:${VLLM_SPEED_PORT:-8001}"
environment:
- HF_HOME=/root/.cache/huggingface
volumes:
- hf-cache:/root/.cache/huggingface
command: >
--model ${VLLM_SPEED_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ}
--port ${VLLM_SPEED_PORT:-8001}
--gpu-memory-utilization 0.85
--max-model-len 8192
--trust-remote-code
--enforce-eager
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
networks:
- mac-net
# ── Code/Reasoning/Intelligence models DISABLED (12GB GPU) ──
# Uncomment when upgrading to 24GB+ GPU
# vllm-code:
# ...
# vllm-reason:
# ...
# vllm-intel:
# ...
# ═══════════════════════════════════════════════════════
# SPEECH & AUDIO SERVICES (CPU β€” saves GPU for LLM)
# ═══════════════════════════════════════════════════════
# ── Whisper β€” Speech-to-Text (CPU mode) ────────────────
whisper:
image: fedirz/faster-whisper-server:latest-cpu
container_name: mac-whisper
ports:
- "${WHISPER_PORT:-8005}:8000"
environment:
- WHISPER__MODEL=${WHISPER_MODEL:-Systran/faster-whisper-small}
- WHISPER__DEVICE=cpu
restart: unless-stopped
networks:
- mac-net
# ── Piper TTS β€” Text-to-Speech (CPU, lightweight) ─────
# TEMPORARILY DISABLED β€” image still downloading on slow WiFi
# tts:
# image: ghcr.io/matatonic/openedai-speech:latest
# container_name: mac-tts
# ports:
# - "${TTS_PORT:-8006}:8000"
# volumes:
# - tts-voices:/app/voices
# restart: unless-stopped
# networks:
# - mac-net
# ═══════════════════════════════════════════════════════
# INFRASTRUCTURE SERVICES
# ═══════════════════════════════════════════════════════
# ── PostgreSQL β€” Persistent data store ─────────────────
postgres:
image: postgres:16-alpine
container_name: mac-postgres
environment:
POSTGRES_USER: mac
POSTGRES_PASSWORD: mac_password
POSTGRES_DB: mac_db
ports:
- "5433:5432"
volumes:
- pgdata:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U mac -d mac_db"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- mac-net
# ── pgAdmin β€” PostgreSQL admin UI (local-only by default) ──
pgadmin:
image: dpage/pgadmin4:8
container_name: mac-pgadmin
ports:
- "127.0.0.1:${PGADMIN_PORT:-5051}:80"
environment:
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@mbm.ac.in}
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-ChangeThisStrongPassword!}
PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "True"
depends_on:
postgres:
condition: service_healthy
volumes:
- pgadmin-data:/var/lib/pgadmin
restart: unless-stopped
networks:
- mac-net
# ── Redis β€” Rate limiting & caching ────────────────────
redis:
image: redis:7-alpine
container_name: mac-redis
ports:
- "6380:6379"
volumes:
- redisdata:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- mac-net
# ── Nginx β€” Reverse proxy + SvelteKit frontend ─────────
nginx:
image: nginx:alpine
container_name: mac-nginx
ports:
- "${APP_HOST:-0.0.0.0}:${APP_PORT:-80}:80"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./frontend/build:/app:ro # SvelteKit static build output
depends_on:
- mac
restart: unless-stopped
networks:
- mac-net
# ── Qdrant β€” Vector DB for RAG ─────────────────────────
qdrant:
image: qdrant/qdrant:latest
container_name: mac-qdrant
ports:
- "6333:6333"
volumes:
- qdrantdata:/qdrant/storage
restart: unless-stopped
networks:
- mac-net
# ── SearXNG β€” Self-hosted web search ───────────────────
searxng:
image: searxng/searxng:latest
container_name: mac-searxng
ports:
- "8888:8080"
environment:
- SEARXNG_BASE_URL=http://localhost:8888/
volumes:
- searxngdata:/etc/searxng
restart: unless-stopped
networks:
- mac-net
volumes:
pgdata:
pgadmin-data:
redisdata:
qdrantdata:
searxngdata:
hf-cache: # Shared HuggingFace model cache across all vLLM instances
tts-voices: # Persisted TTS voice models
networks:
mac-net:
driver: bridge