Spaces:

Aaryan17
/

MAC

Sleeping

App Files Files Community

MAC / docker-compose.yml

Aaryan17

chore: upload MAC codebase to HF Space

0e76632 verified 26 days ago

raw

history blame contribute delete

8.05 kB

	# ═══════════════════════════════════════════════════════════
	# MAC — MBM AI Cloud \| Local Server Setup (12GB GPU)
	# ═══════════════════════════════════════════════════════════
	# RTX 3060 12GB VRAM — single model at a time strategy.
	# GPU: Qwen2.5-7B chat/code ~ 5GB (gpu_memory_utilization=0.45)
	# CPU: Whisper STT + Piper TTS ~ 1.5GB RAM (no VRAM)
	# Infra: PostgreSQL + Redis + Nginx + Qdrant + SearXNG
	# ═══════════════════════════════════════════════════════════

	services:

	# ── MAC API Server ──────────────────────────────────────
	mac:
	build: .
	container_name: mac-api
	ports:
	- "${APP_HOST:-0.0.0.0}:8001:8000"
	env_file: .env
	environment:
	- DATABASE_URL=postgresql+asyncpg://mac:mac_password@postgres:5432/mac_db
	- REDIS_URL=redis://redis:6379/0
	- VLLM_BASE_URL=http://vllm-speed:8001
	- VLLM_SPEED_URL=http://vllm-speed:8001
	- VLLM_CODE_URL=http://vllm-speed:8001
	- VLLM_REASONING_URL=http://vllm-speed:8001
	- VLLM_INTELLIGENCE_URL=http://vllm-speed:8001
	- WHISPER_URL=http://whisper:8000
	- TTS_URL=http://tts:8000
	- EMBEDDING_URL=http://vllm-speed:8001
	- QDRANT_URL=http://qdrant:6333
	- SEARXNG_URL=http://searxng:8080
	- MAC_ENABLED_MODELS=qwen2.5:7b,whisper-small,tts-piper
	depends_on:
	postgres:
	condition: service_healthy
	redis:
	condition: service_healthy
	restart: unless-stopped
	networks:
	- mac-net

	# ═══════════════════════════════════════════════════════
	# vLLM GPU INFERENCE — Single model for 12GB GPU
	# ═══════════════════════════════════════════════════════

	# ── Speed Model: Qwen2.5-7B (handles ALL chat/code/general) ──
	vllm-speed:
	image: vllm/vllm-openai:latest
	container_name: mac-vllm-speed
	ports:
	- "${VLLM_SPEED_PORT:-8001}:${VLLM_SPEED_PORT:-8001}"
	environment:
	- HF_HOME=/root/.cache/huggingface
	volumes:
	- hf-cache:/root/.cache/huggingface
	command: >
	--model ${VLLM_SPEED_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ}
	--port ${VLLM_SPEED_PORT:-8001}
	--gpu-memory-utilization 0.85
	--max-model-len 8192
	--trust-remote-code
	--enforce-eager
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: 1
	capabilities: [gpu]
	restart: unless-stopped
	networks:
	- mac-net

	# ── Code/Reasoning/Intelligence models DISABLED (12GB GPU) ──
	# Uncomment when upgrading to 24GB+ GPU
	# vllm-code:
	# ...
	# vllm-reason:
	# ...
	# vllm-intel:
	# ...

	# ═══════════════════════════════════════════════════════
	# SPEECH & AUDIO SERVICES (CPU — saves GPU for LLM)
	# ═══════════════════════════════════════════════════════

	# ── Whisper — Speech-to-Text (CPU mode) ────────────────
	whisper:
	image: fedirz/faster-whisper-server:latest-cpu
	container_name: mac-whisper
	ports:
	- "${WHISPER_PORT:-8005}:8000"
	environment:
	- WHISPER__MODEL=${WHISPER_MODEL:-Systran/faster-whisper-small}
	- WHISPER__DEVICE=cpu
	restart: unless-stopped
	networks:
	- mac-net

	# ── Piper TTS — Text-to-Speech (CPU, lightweight) ─────
	# TEMPORARILY DISABLED — image still downloading on slow WiFi
	# tts:
	# image: ghcr.io/matatonic/openedai-speech:latest
	# container_name: mac-tts
	# ports:
	# - "${TTS_PORT:-8006}:8000"
	# volumes:
	# - tts-voices:/app/voices
	# restart: unless-stopped
	# networks:
	# - mac-net

	# ═══════════════════════════════════════════════════════
	# INFRASTRUCTURE SERVICES
	# ═══════════════════════════════════════════════════════

	# ── PostgreSQL — Persistent data store ─────────────────
	postgres:
	image: postgres:16-alpine
	container_name: mac-postgres
	environment:
	POSTGRES_USER: mac
	POSTGRES_PASSWORD: mac_password
	POSTGRES_DB: mac_db
	ports:
	- "5433:5432"
	volumes:
	- pgdata:/var/lib/postgresql/data
	healthcheck:
	test: ["CMD-SHELL", "pg_isready -U mac -d mac_db"]
	interval: 5s
	timeout: 5s
	retries: 5
	restart: unless-stopped
	networks:
	- mac-net

	# ── pgAdmin — PostgreSQL admin UI (local-only by default) ──
	pgadmin:
	image: dpage/pgadmin4:8
	container_name: mac-pgadmin
	ports:
	- "127.0.0.1:${PGADMIN_PORT:-5051}:80"
	environment:
	PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@mbm.ac.in}
	PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-ChangeThisStrongPassword!}
	PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "True"
	depends_on:
	postgres:
	condition: service_healthy
	volumes:
	- pgadmin-data:/var/lib/pgadmin
	restart: unless-stopped
	networks:
	- mac-net

	# ── Redis — Rate limiting & caching ────────────────────
	redis:
	image: redis:7-alpine
	container_name: mac-redis
	ports:
	- "6380:6379"
	volumes:
	- redisdata:/data
	healthcheck:
	test: ["CMD", "redis-cli", "ping"]
	interval: 5s
	timeout: 5s
	retries: 5
	restart: unless-stopped
	networks:
	- mac-net

	# ── Nginx — Reverse proxy + SvelteKit frontend ─────────
	nginx:
	image: nginx:alpine
	container_name: mac-nginx
	ports:
	- "${APP_HOST:-0.0.0.0}:${APP_PORT:-80}:80"
	volumes:
	- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
	- ./frontend/build:/app:ro # SvelteKit static build output
	depends_on:
	- mac
	restart: unless-stopped
	networks:
	- mac-net

	# ── Qdrant — Vector DB for RAG ─────────────────────────
	qdrant:
	image: qdrant/qdrant:latest
	container_name: mac-qdrant
	ports:
	- "6333:6333"
	volumes:
	- qdrantdata:/qdrant/storage
	restart: unless-stopped
	networks:
	- mac-net

	# ── SearXNG — Self-hosted web search ───────────────────
	searxng:
	image: searxng/searxng:latest
	container_name: mac-searxng
	ports:
	- "8888:8080"
	environment:
	- SEARXNG_BASE_URL=http://localhost:8888/
	volumes:
	- searxngdata:/etc/searxng
	restart: unless-stopped
	networks:
	- mac-net

	volumes:
	pgdata:
	pgadmin-data:
	redisdata:
	qdrantdata:
	searxngdata:
	hf-cache: # Shared HuggingFace model cache across all vLLM instances
	tts-voices: # Persisted TTS voice models

	networks:
	mac-net:
	driver: bridge