Spaces:
Runtime error
Runtime error
| # ============================================ | |
| # AllyCAT GraphRAG Configuration | |
| # ============================================ | |
| # This file contains all configuration options for AllyCAT GraphRAG. | |
| # Copy this file to .env and customize the values. | |
| # ============================================ | |
| # Deployment Mode | |
| # ============================================ | |
| # Automatically run the complete pipeline on startup (Docker deployments) | |
| # Set to true for Heroku, AWS, Google Cloud Run, etc. | |
| AUTO_RUN_PIPELINE=false | |
| # Memory Optimization: Remove pipeline dependencies after completion | |
| # Saves ~350-500 MB RAM - recommended for 1GB containers (DigitalOcean, etc.) | |
| # Set to true to automatically clean up heavy packages after pipeline completes | |
| CLEANUP_PIPELINE_DEPS=false | |
| # ============================================ | |
| # Website Crawling Configuration | |
| # ============================================ | |
| # Website to crawl (required if AUTO_RUN_PIPELINE=true) | |
| WEBSITE_URL=https://example.com | |
| CRAWL_MAX_DOWNLOADS=100 | |
| CRAWL_MAX_DEPTH=3 | |
| WAITTIME_BETWEEN_REQUESTS=0.1 | |
| # ============================================ | |
| # LLM Configuration (Cloud-First) | |
| # ============================================ | |
| # LLM Runtime Environment | |
| # Options: cloud, local_ollama | |
| LLM_RUN_ENV=cloud | |
| # LLM Model Selection | |
| # Cloud providers: cerebras/llama3.1-8b, gemini/gemini-1.5-flash, nebius/meta-llama/Meta-Llama-3.1-8B-Instruct | |
| # Local: ollama/gemma3:1b | |
| LLM_MODEL=cerebras/llama3.1-8b | |
| # ============================================ | |
| # LLM API Keys (Cloud Providers) | |
| # ============================================ | |
| # Get your FREE API keys: | |
| # - Cerebras: https://cerebras.ai/ (recommended) | |
| # - Gemini: https://aistudio.google.com/ | |
| # - Nebius: https://studio.nebius.ai/ | |
| CEREBRAS_API_KEY=your_cerebras_api_key | |
| GEMINI_API_KEY=your_gemini_api_key | |
| NEBIUS_API_KEY=your_nebius_api_key | |
| # ============================================ | |
| # Local Ollama Configuration (Optional) | |
| # ============================================ | |
| # Only needed if LLM_RUN_ENV=local_ollama | |
| # OLLAMA_MODEL=gemma3:1b | |
| # OLLAMA_BASE_URL=http://localhost:11434 | |
| # ============================================ | |
| # Vector Database Configuration | |
| # ============================================ | |
| # Options: cloud_zilliz (recommended), local | |
| VECTOR_DB_TYPE=cloud_zilliz | |
| # Zilliz Cloud Configuration (https://cloud.zilliz.com/) | |
| ZILLIZ_CLUSTER_ENDPOINT=https://your-cluster.zilliz.cloud | |
| ZILLIZ_TOKEN=your_zilliz_token | |
| # Local Milvus Configuration (only if VECTOR_DB_TYPE=local) | |
| # MILVUS_URI=./workspace/milvus_lite.db | |
| # ============================================ | |
| # Graph Database Configuration (Neo4j) | |
| # ============================================ | |
| # Neo4j Aura (Cloud) - Recommended: https://neo4j.com/cloud/aura/ | |
| NEO4J_URI=neo4j+s://your-instance.databases.neo4j.io | |
| NEO4J_USERNAME=neo4j | |
| NEO4J_PASSWORD=your_neo4j_password | |
| NEO4J_DATABASE=neo4j | |
| # Local Neo4j (only for development) | |
| # NEO4J_URI=bolt://localhost:7687 | |
| # ============================================ | |
| # Graph Extraction LLM Provider | |
| # ============================================ | |
| # Provider for entity/relationship extraction | |
| # Options: gemini (recommended, 1500 free requests/day), cerebras | |
| GRAPH_LLM_PROVIDER=gemini | |
| # API keys are shared from LLM Configuration section above | |
| # ============================================ | |
| # Embedding Model Configuration | |
| # ============================================ | |
| # Embedding model for semantic search | |
| # Options: | |
| # - ibm-granite/granite-embedding-30m-english (61 MB, fastest) | |
| # - BAAI/bge-small-en-v1.5 (129 MB, balanced) | |
| # - ibm-granite/granite-embedding-107m-multilingual (219 MB, multilingual) | |
| EMBEDDING_MODEL=ibm-granite/granite-embedding-30m-english | |
| EMBEDDING_LENGTH=384 | |
| # ============================================ | |
| # Chunking Configuration | |
| # ============================================ | |
| CHUNK_SIZE=512 | |
| CHUNK_OVERLAP=20 | |
| # ============================================ | |
| # Graph Extraction Configuration | |
| # ============================================ | |
| # Entity and relationship extraction parameters | |
| GRAPH_MIN_ENTITIES=5 | |
| GRAPH_MAX_ENTITIES=15 | |
| GRAPH_MIN_RELATIONSHIPS=3 | |
| GRAPH_MAX_RELATIONSHIPS=8 | |
| GRAPH_MIN_CONFIDENCE=0.8 | |
| GRAPH_MAX_CONTENT_CHARS=12000 | |
| GRAPH_SENTENCE_BOUNDARY_RATIO=0.7 | |
| # ============================================ | |
| # Graph Community Detection (Phase 2) | |
| # ============================================ | |
| # Leiden algorithm parameters for community detection | |
| GRAPH_MIN_COMMUNITY_SIZE=5 | |
| GRAPH_LEIDEN_RESOLUTION=1.0 | |
| GRAPH_LEIDEN_ITERATIONS=-1 | |
| GRAPH_LEIDEN_SEED=42 | |
| GRAPH_TARGET_COVERAGE_MIN=5.0 | |
| GRAPH_TARGET_COVERAGE_MAX=8.0 | |
| GRAPH_RESOLUTION_CANDIDATES=0.1,0.5,1.0,2.0,5.0,10.0,20.0,30.0,50.0,100.0 | |
| GRAPH_MIN_NODES_FOR_OPTIMIZATION=50 | |
| # ============================================ | |
| # Application Configuration | |
| # ============================================ | |
| # Application type for Docker deployment | |
| # Options: flask_graph (default), chainlit_graph, flask | |
| APP_TYPE=flask_graph | |
| # Flask server port | |
| PORT=8080 | |
| # UI starter prompts (pipe-separated) | |
| UI_STARTER_PROMPTS=What is this website? | What are upcoming events? | Who are some of the partners? | |
| # ============================================ | |
| # Port Configuration | |
| # ============================================ | |
| # Flask apps (Vector RAG vs GraphRAG) - Auto-configured via MY_CONFIG | |
| FLASK_VECTOR_PORT=8081 # app_flask.py (vector-only RAG) | |
| FLASK_GRAPH_PORT=8080 # app_flask_graph.py (GraphRAG) | |
| # Chainlit apps (interactive UI) - Default port: 8000, custom ports for Docker | |
| CHAINLIT_VECTOR_PORT=8082 # app_chainlit.py (Docker only; native Python uses 8000) | |
| CHAINLIT_GRAPH_PORT=8083 # app_chainlit_graph.py (Docker only; native Python uses 8000) | |
| # Docker and external services | |
| DOCKER_PORT=8080 # External Docker exposed port (host side) | |
| DOCKER_APP_PORT=8080 # Internal container port (container side, matches APP_TYPE) | |
| OLLAMA_PORT=11434 # Ollama server port (for local LLM) | |
| # ============================================ | |
| # Workspace Configuration | |
| # ============================================ | |
| # For native execution: use relative path 'workspace' | |
| # For Docker: use absolute path '/allycat/workspace' | |
| WORKSPACE_DIR=workspace | |
| # ============================================ | |
| # Advanced Configuration | |
| # ============================================ | |
| # Hugging Face endpoint (for Chinese users or custom mirrors) | |
| HF_ENDPOINT=https://huggingface.co | |