Spaces:
Sleeping
Sleeping
| version: '3.8' | |
| services: | |
| # Ollama service for local LLM inference | |
| ollama: | |
| image: ollama/ollama:latest | |
| container_name: rag-ollama | |
| volumes: | |
| - ollama_data:/root/.ollama | |
| ports: | |
| - "11434:11434" | |
| environment: | |
| - OLLAMA_HOST=0.0.0.0 | |
| healthcheck: | |
| test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] | |
| interval: 30s | |
| timeout: 10s | |
| retries: 5 | |
| restart: unless-stopped | |
| # Uncomment for GPU support | |
| # deploy: | |
| # resources: | |
| # reservations: | |
| # devices: | |
| # - driver: nvidia | |
| # count: 1 | |
| # capabilities: [gpu] | |
| # RAG Application | |
| rag-app: | |
| build: | |
| context: . | |
| dockerfile: Dockerfile | |
| container_name: rag-terminal | |
| ports: | |
| - "7860:7860" | |
| volumes: | |
| # Persist embeddings and vector store | |
| - ./rag_data:/app/rag_data | |
| - ./.embedding_cache:/app/.embedding_cache | |
| # Mount documents directory (read-only) | |
| - ~/Documents/Books:/app/documents:ro | |
| environment: | |
| # Ollama configuration | |
| - OLLAMA_BASE_URL=http://ollama:11434 | |
| - OLLAMA_MODEL=${OLLAMA_MODEL:-smollm2:360m} | |
| - OLLAMA_MODEL_CLOUD=${OLLAMA_MODEL_CLOUD:-gpt-oss:20b-cloud} | |
| - OLLAMA_API_KEY=${OLLAMA_API_KEY} | |
| # Embedding configuration | |
| - EMBEDDING_METHOD=${EMBEDDING_METHOD:-huggingface} | |
| - HF_TOKEN=${HF_TOKEN} | |
| - OPENAI_API_KEY=${OPENAI_API_KEY} | |
| - VOYAGE_API_KEY=${VOYAGE_API_KEY} | |
| - HF_EMBEDDING_MODEL=${HF_EMBEDDING_MODEL:-BAAI/bge-base-en-v1.5} | |
| # Document configuration | |
| - SAMPLE_DOCUMENT_PATH=${SAMPLE_DOCUMENT_PATH:-/app/documents/Atomic_Habits_James_Clear.pdf} | |
| - SAVE_DIR=/app/rag_data | |
| - CHUNK_SIZE=${CHUNK_SIZE:-1000} | |
| - CHUNK_OVERLAP=${CHUNK_OVERLAP:-200} | |
| - TOP_K=${TOP_K:-5} | |
| # Token limits | |
| - TOKEN_LIMIT_BASE=${TOKEN_LIMIT_BASE:-512} | |
| - TOKEN_LIMIT_PER_SOURCE=${TOKEN_LIMIT_PER_SOURCE:-200} | |
| - TOKEN_LIMIT_MAX=${TOKEN_LIMIT_MAX:-2048} | |
| # Gradio configuration | |
| - GRADIO_SERVER_NAME=0.0.0.0 | |
| - GRADIO_SERVER_PORT=7860 | |
| - GRADIO_SHARE=${GRADIO_SHARE:-false} | |
| depends_on: | |
| ollama: | |
| condition: service_healthy | |
| healthcheck: | |
| test: ["CMD", "curl", "-f", "http://localhost:7860/"] | |
| interval: 30s | |
| timeout: 10s | |
| retries: 3 | |
| start_period: 60s | |
| restart: unless-stopped | |
| volumes: | |
| ollama_data: | |
| driver: local | |
| networks: | |
| default: | |
| name: rag-network | |