File size: 4,194 Bytes
ce644a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# ============== LLM CONFIGURATION ==============

# Provider: "openai", "anthropic", or "huggingface"
LLM_PROVIDER=openai

# API Keys (at least one required for full LLM analysis)
OPENAI_API_KEY=sk-your-key-here
ANTHROPIC_API_KEY=sk-ant-your-key-here

# Model names (optional - sensible defaults set in config.py)
# OPENAI_MODEL=gpt-5.1
# ANTHROPIC_MODEL=claude-sonnet-4-5-20250929

# ============== HUGGINGFACE CONFIGURATION ==============

# HuggingFace Token - enables gated models and higher rate limits
# Get yours at: https://huggingface.co/settings/tokens
# 
# WITHOUT HF_TOKEN: Falls back to ungated models (zephyr-7b-beta, Qwen2-7B)
# WITH HF_TOKEN: Uses gated models (Llama 3.1, Gemma-2) via inference providers
#
# For HuggingFace Spaces deployment:
#   Set this as a "Secret" in Space Settings -> Variables and secrets
#   Users/judges don't need their own token - the Space secret is used
#
HF_TOKEN=hf_your-token-here
# Alternative: HUGGINGFACE_API_KEY (same as HF_TOKEN)

# Default HuggingFace model for inference (gated, requires auth)
# Can be overridden in UI dropdown
# Latest reasoning models: Qwen3-Next-80B-A3B-Thinking, Qwen3-Next-80B-A3B-Instruct, Llama-3.3-70B-Instruct
HUGGINGFACE_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking

# Fallback models for HuggingFace Inference API (comma-separated)
# Models are tried in order until one succeeds
# Format: model1,model2,model3
# Latest reasoning models first, then reliable fallbacks
# Reasoning models: Qwen3-Next (thinking/instruct), Llama-3.3-70B, Qwen3-235B
# Fallbacks: Llama-3.1-8B, Zephyr-7B (ungated), Qwen2-7B (ungated)
HF_FALLBACK_MODELS=Qwen/Qwen3-Next-80B-A3B-Thinking,Qwen/Qwen3-Next-80B-A3B-Instruct,meta-llama/Llama-3.3-70B-Instruct,meta-llama/Llama-3.1-8B-Instruct,HuggingFaceH4/zephyr-7b-beta,Qwen/Qwen2-7B-Instruct

# Override model/provider selection (optional, usually set via UI)
# HF_MODEL=Qwen/Qwen3-Next-80B-A3B-Thinking
# HF_PROVIDER=hyperbolic

# ============== EMBEDDING CONFIGURATION ==============

# Embedding Provider: "openai", "local", or "huggingface"
# Default: "local" (no API key required)
EMBEDDING_PROVIDER=local

# OpenAI Embedding Model (used if EMBEDDING_PROVIDER=openai)
OPENAI_EMBEDDING_MODEL=text-embedding-3-small

# Local Embedding Model (sentence-transformers, used if EMBEDDING_PROVIDER=local)
# BAAI/bge-small-en-v1.5 is newer, faster, and better than all-MiniLM-L6-v2
LOCAL_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5

# HuggingFace Embedding Model (used if EMBEDDING_PROVIDER=huggingface)
HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2

# ============== AGENT CONFIGURATION ==============

MAX_ITERATIONS=10
SEARCH_TIMEOUT=30
LOG_LEVEL=INFO

# Graph-based execution (experimental)
# USE_GRAPH_EXECUTION=false

# Budget & Rate Limiting
# DEFAULT_TOKEN_LIMIT=100000
# DEFAULT_TIME_LIMIT_MINUTES=10
# DEFAULT_ITERATIONS_LIMIT=10

# ============== WEB SEARCH CONFIGURATION ==============

# Web Search Provider: "serper", "searchxng", "brave", "tavily", or "duckduckgo"
# Default: "duckduckgo" (no API key required)
WEB_SEARCH_PROVIDER=duckduckgo

# Serper API Key (for Google search via Serper)
# SERPER_API_KEY=your-serper-key-here

# SearchXNG Host URL (for self-hosted search)
# SEARCHXNG_HOST=http://localhost:8080

# Brave Search API Key
# BRAVE_API_KEY=your-brave-key-here

# Tavily API Key
# TAVILY_API_KEY=your-tavily-key-here

# ============== EXTERNAL SERVICES ==============

# PubMed (optional - higher rate limits: 10 req/sec vs 3 req/sec)
NCBI_API_KEY=your-ncbi-key-here

# Modal (optional - for secure code execution sandbox)
# MODAL_TOKEN_ID=your-modal-token-id
# MODAL_TOKEN_SECRET=your-modal-token-secret

# ============== VECTOR DATABASE (ChromaDB) ==============

# ChromaDB storage path
CHROMA_DB_PATH=./chroma_db

# Persist ChromaDB to disk (default: true)
# CHROMA_DB_PERSIST=true

# Remote ChromaDB server (optional)
# CHROMA_DB_HOST=localhost
# CHROMA_DB_PORT=8000

# ============== RAG SERVICE CONFIGURATION ==============

# ChromaDB collection name for RAG
# RAG_COLLECTION_NAME=deepcritical_evidence

# Number of top results to retrieve from RAG
# RAG_SIMILARITY_TOP_K=5

# Automatically ingest evidence into RAG
# RAG_AUTO_INGEST=true