Spaces:

niloydebbarma
/

allycat

Runtime error

App Files Files Community

allycat / env.sample.txt

niloydebbarma

Upload 50 files

a7d2416 verified 6 months ago

raw

history blame contribute delete

6.56 kB

	# ============================================
	# AllyCAT GraphRAG Configuration
	# ============================================
	# This file contains all configuration options for AllyCAT GraphRAG.
	# Copy this file to .env and customize the values.

	# ============================================
	# Deployment Mode
	# ============================================
	# Automatically run the complete pipeline on startup (Docker deployments)
	# Set to true for Heroku, AWS, Google Cloud Run, etc.
	AUTO_RUN_PIPELINE=false

	# Memory Optimization: Remove pipeline dependencies after completion
	# Saves ~350-500 MB RAM - recommended for 1GB containers (DigitalOcean, etc.)
	# Set to true to automatically clean up heavy packages after pipeline completes
	CLEANUP_PIPELINE_DEPS=false

	# ============================================
	# Website Crawling Configuration
	# ============================================
	# Website to crawl (required if AUTO_RUN_PIPELINE=true)
	WEBSITE_URL=https://example.com
	CRAWL_MAX_DOWNLOADS=100
	CRAWL_MAX_DEPTH=3
	WAITTIME_BETWEEN_REQUESTS=0.1

	# ============================================
	# LLM Configuration (Cloud-First)
	# ============================================
	# LLM Runtime Environment
	# Options: cloud, local_ollama
	LLM_RUN_ENV=cloud

	# LLM Model Selection
	# Cloud providers: cerebras/llama3.1-8b, gemini/gemini-1.5-flash, nebius/meta-llama/Meta-Llama-3.1-8B-Instruct
	# Local: ollama/gemma3:1b
	LLM_MODEL=cerebras/llama3.1-8b

	# ============================================
	# LLM API Keys (Cloud Providers)
	# ============================================
	# Get your FREE API keys:
	# - Cerebras: https://cerebras.ai/ (recommended)
	# - Gemini: https://aistudio.google.com/
	# - Nebius: https://studio.nebius.ai/

	CEREBRAS_API_KEY=your_cerebras_api_key
	GEMINI_API_KEY=your_gemini_api_key
	NEBIUS_API_KEY=your_nebius_api_key

	# ============================================
	# Local Ollama Configuration (Optional)
	# ============================================
	# Only needed if LLM_RUN_ENV=local_ollama
	# OLLAMA_MODEL=gemma3:1b
	# OLLAMA_BASE_URL=http://localhost:11434

	# ============================================
	# Vector Database Configuration
	# ============================================
	# Options: cloud_zilliz (recommended), local
	VECTOR_DB_TYPE=cloud_zilliz

	# Zilliz Cloud Configuration (https://cloud.zilliz.com/)
	ZILLIZ_CLUSTER_ENDPOINT=https://your-cluster.zilliz.cloud
	ZILLIZ_TOKEN=your_zilliz_token

	# Local Milvus Configuration (only if VECTOR_DB_TYPE=local)
	# MILVUS_URI=./workspace/milvus_lite.db

	# ============================================
	# Graph Database Configuration (Neo4j)
	# ============================================
	# Neo4j Aura (Cloud) - Recommended: https://neo4j.com/cloud/aura/
	NEO4J_URI=neo4j+s://your-instance.databases.neo4j.io
	NEO4J_USERNAME=neo4j
	NEO4J_PASSWORD=your_neo4j_password
	NEO4J_DATABASE=neo4j

	# Local Neo4j (only for development)
	# NEO4J_URI=bolt://localhost:7687

	# ============================================
	# Graph Extraction LLM Provider
	# ============================================
	# Provider for entity/relationship extraction
	# Options: gemini (recommended, 1500 free requests/day), cerebras
	GRAPH_LLM_PROVIDER=gemini

	# API keys are shared from LLM Configuration section above

	# ============================================
	# Embedding Model Configuration
	# ============================================
	# Embedding model for semantic search
	# Options:
	# - ibm-granite/granite-embedding-30m-english (61 MB, fastest)
	# - BAAI/bge-small-en-v1.5 (129 MB, balanced)
	# - ibm-granite/granite-embedding-107m-multilingual (219 MB, multilingual)
	EMBEDDING_MODEL=ibm-granite/granite-embedding-30m-english
	EMBEDDING_LENGTH=384

	# ============================================
	# Chunking Configuration
	# ============================================
	CHUNK_SIZE=512
	CHUNK_OVERLAP=20

	# ============================================
	# Graph Extraction Configuration
	# ============================================
	# Entity and relationship extraction parameters
	GRAPH_MIN_ENTITIES=5
	GRAPH_MAX_ENTITIES=15
	GRAPH_MIN_RELATIONSHIPS=3
	GRAPH_MAX_RELATIONSHIPS=8
	GRAPH_MIN_CONFIDENCE=0.8
	GRAPH_MAX_CONTENT_CHARS=12000
	GRAPH_SENTENCE_BOUNDARY_RATIO=0.7

	# ============================================
	# Graph Community Detection (Phase 2)
	# ============================================
	# Leiden algorithm parameters for community detection
	GRAPH_MIN_COMMUNITY_SIZE=5
	GRAPH_LEIDEN_RESOLUTION=1.0
	GRAPH_LEIDEN_ITERATIONS=-1
	GRAPH_LEIDEN_SEED=42
	GRAPH_TARGET_COVERAGE_MIN=5.0
	GRAPH_TARGET_COVERAGE_MAX=8.0
	GRAPH_RESOLUTION_CANDIDATES=0.1,0.5,1.0,2.0,5.0,10.0,20.0,30.0,50.0,100.0
	GRAPH_MIN_NODES_FOR_OPTIMIZATION=50

	# ============================================
	# Application Configuration
	# ============================================
	# Application type for Docker deployment
	# Options: flask_graph (default), chainlit_graph, flask
	APP_TYPE=flask_graph

	# Flask server port
	PORT=8080

	# UI starter prompts (pipe-separated)
	UI_STARTER_PROMPTS=What is this website? \| What are upcoming events? \| Who are some of the partners?

	# ============================================
	# Port Configuration
	# ============================================
	# Flask apps (Vector RAG vs GraphRAG) - Auto-configured via MY_CONFIG
	FLASK_VECTOR_PORT=8081 # app_flask.py (vector-only RAG)
	FLASK_GRAPH_PORT=8080 # app_flask_graph.py (GraphRAG)

	# Chainlit apps (interactive UI) - Default port: 8000, custom ports for Docker
	CHAINLIT_VECTOR_PORT=8082 # app_chainlit.py (Docker only; native Python uses 8000)
	CHAINLIT_GRAPH_PORT=8083 # app_chainlit_graph.py (Docker only; native Python uses 8000)

	# Docker and external services
	DOCKER_PORT=8080 # External Docker exposed port (host side)
	DOCKER_APP_PORT=8080 # Internal container port (container side, matches APP_TYPE)
	OLLAMA_PORT=11434 # Ollama server port (for local LLM)

	# ============================================
	# Workspace Configuration
	# ============================================
	# For native execution: use relative path 'workspace'
	# For Docker: use absolute path '/allycat/workspace'
	WORKSPACE_DIR=workspace

	# ============================================
	# Advanced Configuration
	# ============================================
	# Hugging Face endpoint (for Chinese users or custom mirrors)
	HF_ENDPOINT=https://huggingface.co