SimpleRAGPipeline / docker-compose.yml
Aditya-1301's picture
Multi-document upload support and Docker/GitHub Actions deployment ready
83fb11e
version: '3.8'
services:
# Ollama service for local LLM inference
ollama:
image: ollama/ollama:latest
container_name: rag-ollama
volumes:
- ollama_data:/root/.ollama
ports:
- "11434:11434"
environment:
- OLLAMA_HOST=0.0.0.0
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
interval: 30s
timeout: 10s
retries: 5
restart: unless-stopped
# Uncomment for GPU support
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# count: 1
# capabilities: [gpu]
# RAG Application
rag-app:
build:
context: .
dockerfile: Dockerfile
container_name: rag-terminal
ports:
- "7860:7860"
volumes:
# Persist embeddings and vector store
- ./rag_data:/app/rag_data
- ./.embedding_cache:/app/.embedding_cache
# Mount documents directory (read-only)
- ~/Documents/Books:/app/documents:ro
environment:
# Ollama configuration
- OLLAMA_BASE_URL=http://ollama:11434
- OLLAMA_MODEL=${OLLAMA_MODEL:-smollm2:360m}
- OLLAMA_MODEL_CLOUD=${OLLAMA_MODEL_CLOUD:-gpt-oss:20b-cloud}
- OLLAMA_API_KEY=${OLLAMA_API_KEY}
# Embedding configuration
- EMBEDDING_METHOD=${EMBEDDING_METHOD:-huggingface}
- HF_TOKEN=${HF_TOKEN}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- VOYAGE_API_KEY=${VOYAGE_API_KEY}
- HF_EMBEDDING_MODEL=${HF_EMBEDDING_MODEL:-BAAI/bge-base-en-v1.5}
# Document configuration
- SAMPLE_DOCUMENT_PATH=${SAMPLE_DOCUMENT_PATH:-/app/documents/Atomic_Habits_James_Clear.pdf}
- SAVE_DIR=/app/rag_data
- CHUNK_SIZE=${CHUNK_SIZE:-1000}
- CHUNK_OVERLAP=${CHUNK_OVERLAP:-200}
- TOP_K=${TOP_K:-5}
# Token limits
- TOKEN_LIMIT_BASE=${TOKEN_LIMIT_BASE:-512}
- TOKEN_LIMIT_PER_SOURCE=${TOKEN_LIMIT_PER_SOURCE:-200}
- TOKEN_LIMIT_MAX=${TOKEN_LIMIT_MAX:-2048}
# Gradio configuration
- GRADIO_SERVER_NAME=0.0.0.0
- GRADIO_SERVER_PORT=7860
- GRADIO_SHARE=${GRADIO_SHARE:-false}
depends_on:
ollama:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:7860/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
restart: unless-stopped
volumes:
ollama_data:
driver: local
networks:
default:
name: rag-network