Spaces:

Aditya-1301
/

SimpleRAGPipeline

Sleeping

App Files Files Community

SimpleRAGPipeline / docker-compose.yml

Aditya-1301

Multi-document upload support and Docker/GitHub Actions deployment ready

83fb11e 5 months ago

raw

history blame contribute delete

2.5 kB

	version: '3.8'

	services:
	# Ollama service for local LLM inference
	ollama:
	image: ollama/ollama:latest
	container_name: rag-ollama
	volumes:
	- ollama_data:/root/.ollama
	ports:
	- "11434:11434"
	environment:
	- OLLAMA_HOST=0.0.0.0
	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
	interval: 30s
	timeout: 10s
	retries: 5
	restart: unless-stopped
	# Uncomment for GPU support
	# deploy:
	# resources:
	# reservations:
	# devices:
	# - driver: nvidia
	# count: 1
	# capabilities: [gpu]

	# RAG Application
	rag-app:
	build:
	context: .
	dockerfile: Dockerfile
	container_name: rag-terminal
	ports:
	- "7860:7860"
	volumes:
	# Persist embeddings and vector store
	- ./rag_data:/app/rag_data
	- ./.embedding_cache:/app/.embedding_cache
	# Mount documents directory (read-only)
	- ~/Documents/Books:/app/documents:ro
	environment:
	# Ollama configuration
	- OLLAMA_BASE_URL=http://ollama:11434
	- OLLAMA_MODEL=${OLLAMA_MODEL:-smollm2:360m}
	- OLLAMA_MODEL_CLOUD=${OLLAMA_MODEL_CLOUD:-gpt-oss:20b-cloud}
	- OLLAMA_API_KEY=${OLLAMA_API_KEY}

	# Embedding configuration
	- EMBEDDING_METHOD=${EMBEDDING_METHOD:-huggingface}
	- HF_TOKEN=${HF_TOKEN}
	- OPENAI_API_KEY=${OPENAI_API_KEY}
	- VOYAGE_API_KEY=${VOYAGE_API_KEY}
	- HF_EMBEDDING_MODEL=${HF_EMBEDDING_MODEL:-BAAI/bge-base-en-v1.5}

	# Document configuration
	- SAMPLE_DOCUMENT_PATH=${SAMPLE_DOCUMENT_PATH:-/app/documents/Atomic_Habits_James_Clear.pdf}
	- SAVE_DIR=/app/rag_data
	- CHUNK_SIZE=${CHUNK_SIZE:-1000}
	- CHUNK_OVERLAP=${CHUNK_OVERLAP:-200}
	- TOP_K=${TOP_K:-5}

	# Token limits
	- TOKEN_LIMIT_BASE=${TOKEN_LIMIT_BASE:-512}
	- TOKEN_LIMIT_PER_SOURCE=${TOKEN_LIMIT_PER_SOURCE:-200}
	- TOKEN_LIMIT_MAX=${TOKEN_LIMIT_MAX:-2048}

	# Gradio configuration
	- GRADIO_SERVER_NAME=0.0.0.0
	- GRADIO_SERVER_PORT=7860
	- GRADIO_SHARE=${GRADIO_SHARE:-false}
	depends_on:
	ollama:
	condition: service_healthy
	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:7860/"]
	interval: 30s
	timeout: 10s
	retries: 3
	start_period: 60s
	restart: unless-stopped

	volumes:
	ollama_data:
	driver: local

	networks:
	default:
	name: rag-network