| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MAC β MBM AI Cloud | Local Server Setup (12GB GPU) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # RTX 3060 12GB VRAM β single model at a time strategy. | |
| # GPU: Qwen2.5-7B chat/code ~ 5GB (gpu_memory_utilization=0.45) | |
| # CPU: Whisper STT + Piper TTS ~ 1.5GB RAM (no VRAM) | |
| # Infra: PostgreSQL + Redis + Nginx + Qdrant + SearXNG | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| services: | |
| # ββ MAC API Server ββββββββββββββββββββββββββββββββββββββ | |
| mac: | |
| build: . | |
| container_name: mac-api | |
| ports: | |
| - "${APP_HOST:-0.0.0.0}:8001:8000" | |
| env_file: .env | |
| environment: | |
| - DATABASE_URL=postgresql+asyncpg://mac:mac_password@postgres:5432/mac_db | |
| - REDIS_URL=redis://redis:6379/0 | |
| - VLLM_BASE_URL=http://vllm-speed:8001 | |
| - VLLM_SPEED_URL=http://vllm-speed:8001 | |
| - VLLM_CODE_URL=http://vllm-speed:8001 | |
| - VLLM_REASONING_URL=http://vllm-speed:8001 | |
| - VLLM_INTELLIGENCE_URL=http://vllm-speed:8001 | |
| - WHISPER_URL=http://whisper:8000 | |
| - TTS_URL=http://tts:8000 | |
| - EMBEDDING_URL=http://vllm-speed:8001 | |
| - QDRANT_URL=http://qdrant:6333 | |
| - SEARXNG_URL=http://searxng:8080 | |
| - MAC_ENABLED_MODELS=qwen2.5:7b,whisper-small,tts-piper | |
| depends_on: | |
| postgres: | |
| condition: service_healthy | |
| redis: | |
| condition: service_healthy | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # vLLM GPU INFERENCE β Single model for 12GB GPU | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ Speed Model: Qwen2.5-7B (handles ALL chat/code/general) ββ | |
| vllm-speed: | |
| image: vllm/vllm-openai:latest | |
| container_name: mac-vllm-speed | |
| ports: | |
| - "${VLLM_SPEED_PORT:-8001}:${VLLM_SPEED_PORT:-8001}" | |
| environment: | |
| - HF_HOME=/root/.cache/huggingface | |
| volumes: | |
| - hf-cache:/root/.cache/huggingface | |
| command: > | |
| --model ${VLLM_SPEED_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ} | |
| --port ${VLLM_SPEED_PORT:-8001} | |
| --gpu-memory-utilization 0.85 | |
| --max-model-len 8192 | |
| --trust-remote-code | |
| --enforce-eager | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| # ββ Code/Reasoning/Intelligence models DISABLED (12GB GPU) ββ | |
| # Uncomment when upgrading to 24GB+ GPU | |
| # vllm-code: | |
| # ... | |
| # vllm-reason: | |
| # ... | |
| # vllm-intel: | |
| # ... | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SPEECH & AUDIO SERVICES (CPU β saves GPU for LLM) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ Whisper β Speech-to-Text (CPU mode) ββββββββββββββββ | |
| whisper: | |
| image: fedirz/faster-whisper-server:latest-cpu | |
| container_name: mac-whisper | |
| ports: | |
| - "${WHISPER_PORT:-8005}:8000" | |
| environment: | |
| - WHISPER__MODEL=${WHISPER_MODEL:-Systran/faster-whisper-small} | |
| - WHISPER__DEVICE=cpu | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| # ββ Piper TTS β Text-to-Speech (CPU, lightweight) βββββ | |
| # TEMPORARILY DISABLED β image still downloading on slow WiFi | |
| # tts: | |
| # image: ghcr.io/matatonic/openedai-speech:latest | |
| # container_name: mac-tts | |
| # ports: | |
| # - "${TTS_PORT:-8006}:8000" | |
| # volumes: | |
| # - tts-voices:/app/voices | |
| # restart: unless-stopped | |
| # networks: | |
| # - mac-net | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # INFRASTRUCTURE SERVICES | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ PostgreSQL β Persistent data store βββββββββββββββββ | |
| postgres: | |
| image: postgres:16-alpine | |
| container_name: mac-postgres | |
| environment: | |
| POSTGRES_USER: mac | |
| POSTGRES_PASSWORD: mac_password | |
| POSTGRES_DB: mac_db | |
| ports: | |
| - "5433:5432" | |
| volumes: | |
| - pgdata:/var/lib/postgresql/data | |
| healthcheck: | |
| test: ["CMD-SHELL", "pg_isready -U mac -d mac_db"] | |
| interval: 5s | |
| timeout: 5s | |
| retries: 5 | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| # ββ pgAdmin β PostgreSQL admin UI (local-only by default) ββ | |
| pgadmin: | |
| image: dpage/pgadmin4:8 | |
| container_name: mac-pgadmin | |
| ports: | |
| - "127.0.0.1:${PGADMIN_PORT:-5051}:80" | |
| environment: | |
| PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@mbm.ac.in} | |
| PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-ChangeThisStrongPassword!} | |
| PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "True" | |
| depends_on: | |
| postgres: | |
| condition: service_healthy | |
| volumes: | |
| - pgadmin-data:/var/lib/pgadmin | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| # ββ Redis β Rate limiting & caching ββββββββββββββββββββ | |
| redis: | |
| image: redis:7-alpine | |
| container_name: mac-redis | |
| ports: | |
| - "6380:6379" | |
| volumes: | |
| - redisdata:/data | |
| healthcheck: | |
| test: ["CMD", "redis-cli", "ping"] | |
| interval: 5s | |
| timeout: 5s | |
| retries: 5 | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| # ββ Nginx β Reverse proxy + SvelteKit frontend βββββββββ | |
| nginx: | |
| image: nginx:alpine | |
| container_name: mac-nginx | |
| ports: | |
| - "${APP_HOST:-0.0.0.0}:${APP_PORT:-80}:80" | |
| volumes: | |
| - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro | |
| - ./frontend/build:/app:ro # SvelteKit static build output | |
| depends_on: | |
| - mac | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| # ββ Qdrant β Vector DB for RAG βββββββββββββββββββββββββ | |
| qdrant: | |
| image: qdrant/qdrant:latest | |
| container_name: mac-qdrant | |
| ports: | |
| - "6333:6333" | |
| volumes: | |
| - qdrantdata:/qdrant/storage | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| # ββ SearXNG β Self-hosted web search βββββββββββββββββββ | |
| searxng: | |
| image: searxng/searxng:latest | |
| container_name: mac-searxng | |
| ports: | |
| - "8888:8080" | |
| environment: | |
| - SEARXNG_BASE_URL=http://localhost:8888/ | |
| volumes: | |
| - searxngdata:/etc/searxng | |
| restart: unless-stopped | |
| networks: | |
| - mac-net | |
| volumes: | |
| pgdata: | |
| pgadmin-data: | |
| redisdata: | |
| qdrantdata: | |
| searxngdata: | |
| hf-cache: # Shared HuggingFace model cache across all vLLM instances | |
| tts-voices: # Persisted TTS voice models | |
| networks: | |
| mac-net: | |
| driver: bridge | |