allycat / env.sample.txt
niloydebbarma's picture
Upload 50 files
a7d2416 verified
# ============================================
# AllyCAT GraphRAG Configuration
# ============================================
# This file contains all configuration options for AllyCAT GraphRAG.
# Copy this file to .env and customize the values.
# ============================================
# Deployment Mode
# ============================================
# Automatically run the complete pipeline on startup (Docker deployments)
# Set to true for Heroku, AWS, Google Cloud Run, etc.
AUTO_RUN_PIPELINE=false
# Memory Optimization: Remove pipeline dependencies after completion
# Saves ~350-500 MB RAM - recommended for 1GB containers (DigitalOcean, etc.)
# Set to true to automatically clean up heavy packages after pipeline completes
CLEANUP_PIPELINE_DEPS=false
# ============================================
# Website Crawling Configuration
# ============================================
# Website to crawl (required if AUTO_RUN_PIPELINE=true)
WEBSITE_URL=https://example.com
CRAWL_MAX_DOWNLOADS=100
CRAWL_MAX_DEPTH=3
WAITTIME_BETWEEN_REQUESTS=0.1
# ============================================
# LLM Configuration (Cloud-First)
# ============================================
# LLM Runtime Environment
# Options: cloud, local_ollama
LLM_RUN_ENV=cloud
# LLM Model Selection
# Cloud providers: cerebras/llama3.1-8b, gemini/gemini-1.5-flash, nebius/meta-llama/Meta-Llama-3.1-8B-Instruct
# Local: ollama/gemma3:1b
LLM_MODEL=cerebras/llama3.1-8b
# ============================================
# LLM API Keys (Cloud Providers)
# ============================================
# Get your FREE API keys:
# - Cerebras: https://cerebras.ai/ (recommended)
# - Gemini: https://aistudio.google.com/
# - Nebius: https://studio.nebius.ai/
CEREBRAS_API_KEY=your_cerebras_api_key
GEMINI_API_KEY=your_gemini_api_key
NEBIUS_API_KEY=your_nebius_api_key
# ============================================
# Local Ollama Configuration (Optional)
# ============================================
# Only needed if LLM_RUN_ENV=local_ollama
# OLLAMA_MODEL=gemma3:1b
# OLLAMA_BASE_URL=http://localhost:11434
# ============================================
# Vector Database Configuration
# ============================================
# Options: cloud_zilliz (recommended), local
VECTOR_DB_TYPE=cloud_zilliz
# Zilliz Cloud Configuration (https://cloud.zilliz.com/)
ZILLIZ_CLUSTER_ENDPOINT=https://your-cluster.zilliz.cloud
ZILLIZ_TOKEN=your_zilliz_token
# Local Milvus Configuration (only if VECTOR_DB_TYPE=local)
# MILVUS_URI=./workspace/milvus_lite.db
# ============================================
# Graph Database Configuration (Neo4j)
# ============================================
# Neo4j Aura (Cloud) - Recommended: https://neo4j.com/cloud/aura/
NEO4J_URI=neo4j+s://your-instance.databases.neo4j.io
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=your_neo4j_password
NEO4J_DATABASE=neo4j
# Local Neo4j (only for development)
# NEO4J_URI=bolt://localhost:7687
# ============================================
# Graph Extraction LLM Provider
# ============================================
# Provider for entity/relationship extraction
# Options: gemini (recommended, 1500 free requests/day), cerebras
GRAPH_LLM_PROVIDER=gemini
# API keys are shared from LLM Configuration section above
# ============================================
# Embedding Model Configuration
# ============================================
# Embedding model for semantic search
# Options:
# - ibm-granite/granite-embedding-30m-english (61 MB, fastest)
# - BAAI/bge-small-en-v1.5 (129 MB, balanced)
# - ibm-granite/granite-embedding-107m-multilingual (219 MB, multilingual)
EMBEDDING_MODEL=ibm-granite/granite-embedding-30m-english
EMBEDDING_LENGTH=384
# ============================================
# Chunking Configuration
# ============================================
CHUNK_SIZE=512
CHUNK_OVERLAP=20
# ============================================
# Graph Extraction Configuration
# ============================================
# Entity and relationship extraction parameters
GRAPH_MIN_ENTITIES=5
GRAPH_MAX_ENTITIES=15
GRAPH_MIN_RELATIONSHIPS=3
GRAPH_MAX_RELATIONSHIPS=8
GRAPH_MIN_CONFIDENCE=0.8
GRAPH_MAX_CONTENT_CHARS=12000
GRAPH_SENTENCE_BOUNDARY_RATIO=0.7
# ============================================
# Graph Community Detection (Phase 2)
# ============================================
# Leiden algorithm parameters for community detection
GRAPH_MIN_COMMUNITY_SIZE=5
GRAPH_LEIDEN_RESOLUTION=1.0
GRAPH_LEIDEN_ITERATIONS=-1
GRAPH_LEIDEN_SEED=42
GRAPH_TARGET_COVERAGE_MIN=5.0
GRAPH_TARGET_COVERAGE_MAX=8.0
GRAPH_RESOLUTION_CANDIDATES=0.1,0.5,1.0,2.0,5.0,10.0,20.0,30.0,50.0,100.0
GRAPH_MIN_NODES_FOR_OPTIMIZATION=50
# ============================================
# Application Configuration
# ============================================
# Application type for Docker deployment
# Options: flask_graph (default), chainlit_graph, flask
APP_TYPE=flask_graph
# Flask server port
PORT=8080
# UI starter prompts (pipe-separated)
UI_STARTER_PROMPTS=What is this website? | What are upcoming events? | Who are some of the partners?
# ============================================
# Port Configuration
# ============================================
# Flask apps (Vector RAG vs GraphRAG) - Auto-configured via MY_CONFIG
FLASK_VECTOR_PORT=8081 # app_flask.py (vector-only RAG)
FLASK_GRAPH_PORT=8080 # app_flask_graph.py (GraphRAG)
# Chainlit apps (interactive UI) - Default port: 8000, custom ports for Docker
CHAINLIT_VECTOR_PORT=8082 # app_chainlit.py (Docker only; native Python uses 8000)
CHAINLIT_GRAPH_PORT=8083 # app_chainlit_graph.py (Docker only; native Python uses 8000)
# Docker and external services
DOCKER_PORT=8080 # External Docker exposed port (host side)
DOCKER_APP_PORT=8080 # Internal container port (container side, matches APP_TYPE)
OLLAMA_PORT=11434 # Ollama server port (for local LLM)
# ============================================
# Workspace Configuration
# ============================================
# For native execution: use relative path 'workspace'
# For Docker: use absolute path '/allycat/workspace'
WORKSPACE_DIR=workspace
# ============================================
# Advanced Configuration
# ============================================
# Hugging Face endpoint (for Chinese users or custom mirrors)
HF_ENDPOINT=https://huggingface.co