File size: 8,048 Bytes
0e76632 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# MAC β MBM AI Cloud | Local Server Setup (12GB GPU)
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# RTX 3060 12GB VRAM β single model at a time strategy.
# GPU: Qwen2.5-7B chat/code ~ 5GB (gpu_memory_utilization=0.45)
# CPU: Whisper STT + Piper TTS ~ 1.5GB RAM (no VRAM)
# Infra: PostgreSQL + Redis + Nginx + Qdrant + SearXNG
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
services:
# ββ MAC API Server ββββββββββββββββββββββββββββββββββββββ
mac:
build: .
container_name: mac-api
ports:
- "${APP_HOST:-0.0.0.0}:8001:8000"
env_file: .env
environment:
- DATABASE_URL=postgresql+asyncpg://mac:mac_password@postgres:5432/mac_db
- REDIS_URL=redis://redis:6379/0
- VLLM_BASE_URL=http://vllm-speed:8001
- VLLM_SPEED_URL=http://vllm-speed:8001
- VLLM_CODE_URL=http://vllm-speed:8001
- VLLM_REASONING_URL=http://vllm-speed:8001
- VLLM_INTELLIGENCE_URL=http://vllm-speed:8001
- WHISPER_URL=http://whisper:8000
- TTS_URL=http://tts:8000
- EMBEDDING_URL=http://vllm-speed:8001
- QDRANT_URL=http://qdrant:6333
- SEARXNG_URL=http://searxng:8080
- MAC_ENABLED_MODELS=qwen2.5:7b,whisper-small,tts-piper
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
networks:
- mac-net
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# vLLM GPU INFERENCE β Single model for 12GB GPU
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# ββ Speed Model: Qwen2.5-7B (handles ALL chat/code/general) ββ
vllm-speed:
image: vllm/vllm-openai:latest
container_name: mac-vllm-speed
ports:
- "${VLLM_SPEED_PORT:-8001}:${VLLM_SPEED_PORT:-8001}"
environment:
- HF_HOME=/root/.cache/huggingface
volumes:
- hf-cache:/root/.cache/huggingface
command: >
--model ${VLLM_SPEED_MODEL:-Qwen/Qwen2.5-7B-Instruct-AWQ}
--port ${VLLM_SPEED_PORT:-8001}
--gpu-memory-utilization 0.85
--max-model-len 8192
--trust-remote-code
--enforce-eager
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
networks:
- mac-net
# ββ Code/Reasoning/Intelligence models DISABLED (12GB GPU) ββ
# Uncomment when upgrading to 24GB+ GPU
# vllm-code:
# ...
# vllm-reason:
# ...
# vllm-intel:
# ...
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# SPEECH & AUDIO SERVICES (CPU β saves GPU for LLM)
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# ββ Whisper β Speech-to-Text (CPU mode) ββββββββββββββββ
whisper:
image: fedirz/faster-whisper-server:latest-cpu
container_name: mac-whisper
ports:
- "${WHISPER_PORT:-8005}:8000"
environment:
- WHISPER__MODEL=${WHISPER_MODEL:-Systran/faster-whisper-small}
- WHISPER__DEVICE=cpu
restart: unless-stopped
networks:
- mac-net
# ββ Piper TTS β Text-to-Speech (CPU, lightweight) βββββ
# TEMPORARILY DISABLED β image still downloading on slow WiFi
# tts:
# image: ghcr.io/matatonic/openedai-speech:latest
# container_name: mac-tts
# ports:
# - "${TTS_PORT:-8006}:8000"
# volumes:
# - tts-voices:/app/voices
# restart: unless-stopped
# networks:
# - mac-net
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# INFRASTRUCTURE SERVICES
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# ββ PostgreSQL β Persistent data store βββββββββββββββββ
postgres:
image: postgres:16-alpine
container_name: mac-postgres
environment:
POSTGRES_USER: mac
POSTGRES_PASSWORD: mac_password
POSTGRES_DB: mac_db
ports:
- "5433:5432"
volumes:
- pgdata:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U mac -d mac_db"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- mac-net
# ββ pgAdmin β PostgreSQL admin UI (local-only by default) ββ
pgadmin:
image: dpage/pgadmin4:8
container_name: mac-pgadmin
ports:
- "127.0.0.1:${PGADMIN_PORT:-5051}:80"
environment:
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@mbm.ac.in}
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-ChangeThisStrongPassword!}
PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "True"
depends_on:
postgres:
condition: service_healthy
volumes:
- pgadmin-data:/var/lib/pgadmin
restart: unless-stopped
networks:
- mac-net
# ββ Redis β Rate limiting & caching ββββββββββββββββββββ
redis:
image: redis:7-alpine
container_name: mac-redis
ports:
- "6380:6379"
volumes:
- redisdata:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
networks:
- mac-net
# ββ Nginx β Reverse proxy + SvelteKit frontend βββββββββ
nginx:
image: nginx:alpine
container_name: mac-nginx
ports:
- "${APP_HOST:-0.0.0.0}:${APP_PORT:-80}:80"
volumes:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./frontend/build:/app:ro # SvelteKit static build output
depends_on:
- mac
restart: unless-stopped
networks:
- mac-net
# ββ Qdrant β Vector DB for RAG βββββββββββββββββββββββββ
qdrant:
image: qdrant/qdrant:latest
container_name: mac-qdrant
ports:
- "6333:6333"
volumes:
- qdrantdata:/qdrant/storage
restart: unless-stopped
networks:
- mac-net
# ββ SearXNG β Self-hosted web search βββββββββββββββββββ
searxng:
image: searxng/searxng:latest
container_name: mac-searxng
ports:
- "8888:8080"
environment:
- SEARXNG_BASE_URL=http://localhost:8888/
volumes:
- searxngdata:/etc/searxng
restart: unless-stopped
networks:
- mac-net
volumes:
pgdata:
pgadmin-data:
redisdata:
qdrantdata:
searxngdata:
hf-cache: # Shared HuggingFace model cache across all vLLM instances
tts-voices: # Persisted TTS voice models
networks:
mac-net:
driver: bridge
|