File size: 7,694 Bytes
d686bcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a493f04
 
 
d686bcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f19c23
 
 
 
 
 
 
 
 
 
d686bcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a493f04
 
 
 
 
 
 
 
 
d686bcf
 
4d2a2da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# =============================================================================
# KU Doc Assistant β€” Environment Variables
# Copy this file to .env and adjust as needed.
# =============================================================================
#
# Two Docker usage modes β€” pick ONE and uncomment the matching block below.
#
# 1) LOCAL MODE  β€” docker compose --profile local up --build
#    Uses Ollama (in Docker) + local HuggingFace embeddings. No API keys.
#
# 2) CLOUD MODE  β€” docker compose up --build
#    Uses a cloud LLM (OpenAI / Azure / Anthropic / Google) + cloud or local
#    embeddings. Requires the relevant API key(s) set below.
#
# Container-internal addresses (QDRANT_URL, OLLAMA_BASE_URL, API_BASE_URL)
# are overridden by docker-compose.yml `environment:`. Do NOT change them here
# for Docker β€” the localhost defaults below are for bare-metal development.
# =============================================================================


# *****************************************************************************
# EXAMPLE 1 β€” LOCAL MODE (Ollama + HuggingFace, no API keys)
# *****************************************************************************
LLM_PROVIDER=ollama
EMBEDDING_PROVIDER=local

OLLAMA_BASE_URL=http://localhost:11434
OLLAMA_MODEL=gemma4:e4b
LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2

EVALUATOR_LLM_PROVIDER=groq
EVALUATOR_LLM_MODEL=llama-3.3-70b-versatile

# *****************************************************************************
# EXAMPLE 2 β€” CLOUD MODE (OpenAI) β€” uncomment & comment out Example 1 above
# *****************************************************************************
# LLM_PROVIDER=openai
# EMBEDDING_PROVIDER=openai
# OPENAI_API_KEY=sk-...
# OPENAI_BASE_URL=                    # Optional: custom endpoint for OpenAI-compatible APIs
# OPENAI_MODEL=gpt-4o-mini
# OPENAI_EMBEDDING_MODEL=text-embedding-3-small

# *****************************************************************************
# EXAMPLE 2a β€” CLOUD MODE (SiliconFlow, OpenAI-compatible)
# *****************************************************************************
# LLM_PROVIDER=openai
# EMBEDDING_PROVIDER=local
# OPENAI_API_KEY=your-siliconflow-api-key
# OPENAI_BASE_URL=https://api.siliconflow.cn/v1
# OPENAI_MODEL=Qwen/Qwen2.5-72B-Instruct
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2

# *****************************************************************************
# EXAMPLE 2b β€” CLOUD MODE (Azure OpenAI) β€” uncomment & comment out above
# *****************************************************************************
# LLM_PROVIDER=azure_openai
# EMBEDDING_PROVIDER=azure_openai
# AZURE_OPENAI_API_KEY=...
# AZURE_OPENAI_ENDPOINT=https://<resource>.openai.azure.com/
# AZURE_OPENAI_API_VERSION=2024-02-01
# AZURE_OPENAI_DEPLOYMENT=<deployment-name>
# AZURE_OPENAI_EMBEDDING_DEPLOYMENT=<embedding-deployment>

# *****************************************************************************
# EXAMPLE 2c β€” CLOUD MODE (Groq LLM + local embeddings, FREE)
# *****************************************************************************
# LLM_PROVIDER=groq
# EMBEDDING_PROVIDER=local
# GROQ_API_KEY=gsk_...
# GROQ_MODEL=qwen/qwen3-32b
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2

# *****************************************************************************
# EXAMPLE 2d β€” CLOUD MODE (AWS Bedrock)
# *****************************************************************************
# LLM_PROVIDER=bedrock
# EMBEDDING_PROVIDER=bedrock
# AWS_REGION=eu-west-1
# AWS_BEDROCK_MODEL=anthropic.claude-sonnet-4-20250514-v1:0
# AWS_BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
# Note: Uses default AWS credential chain (env vars, ~/.aws/credentials, or IAM role)

# *****************************************************************************
# EXAMPLE 2e β€” CLOUD MODE (Anthropic LLM + local embeddings)
# *****************************************************************************
# LLM_PROVIDER=anthropic
# EMBEDDING_PROVIDER=local
# ANTHROPIC_API_KEY=sk-ant-...
# ANTHROPIC_MODEL=claude-sonnet-4-20250514
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2

# *****************************************************************************
# EXAMPLE 2f β€” CLOUD MODE (Google GenAI)
# *****************************************************************************
# LLM_PROVIDER=google_genai
# EMBEDDING_PROVIDER=google_genai
# GOOGLE_API_KEY=...
# GOOGLE_LLM_MODEL=gemini-2.5-flash
# GOOGLE_EMBEDDING_MODEL=models/embedding-001


# =============================================================================
# Shared settings (apply to all modes)
# =============================================================================

# --- Vector Store / Search ---------------------------------------------------
QDRANT_PATH=./qdrant_data
QDRANT_URL=                         # Empty = local file mode; Docker overrides to http://qdrant:6333
COLLECTION_NAME=ku_documents
EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
EMBEDDING_DIMENSION=384
GENERATION_MODEL=gemma4:e4b
RERANKER_MODEL=cross-encoder/mmarco-mMiniLMv2-L12-H384-v1
CHUNK_SIZE=512
CHUNK_OVERLAP=64
TOP_K=5
BM25_WEIGHT=0.3
DENSE_WEIGHT=0.7
LOG_LEVEL=INFO

# --- Query Translation -------------------------------------------------------
# Translate non-Danish queries to Danish before retrieval (BM25 + vector search).
# Default: true when LLM_PROVIDER=ollama, false for cloud providers.
# TRANSLATE_QUERY=true

# --- RAGAS Evaluation Judge --------------------------------------------------
# Use a strong, independent judge LLM for RAGAS scoring. When generation runs
# on a small local model, a stronger judge gives substantially less noisy
# scores. Leave EVALUATOR_LLM_PROVIDER empty to reuse the generation LLM.
#
# Example: generation = local Ollama (gemma), judge = Qwen3-32B via Groq
# EVALUATOR_LLM_PROVIDER=groq
# EVALUATOR_LLM_MODEL=qwen/qwen3-32b      # optional; defaults to GROQ_MODEL

# --- Inter-service Communication (bare-metal defaults) -----------------------
API_BASE_URL=http://localhost:8000  # Docker overrides to http://api:8000

# --- Token Budget (measure-only) ---------------------------------------------
# When true, the routers log estimated prompt token sizes at the three known
# generation points (generate_answer, planner, synthesizer). No truncation is
# applied β€” this is purely observability. Counts use tiktoken cl100k as a
# baseline with a 1.5x safety factor for non-OpenAI multilingual tokenizers.
# TOKEN_BUDGET_ENABLED=false

# --- LLM Provider Fallback ---------------------------------------------------
# When enabled, the primary LLM is wrapped with LangChain with_fallbacks so
# requests that fail on the primary are retried against each provider in the
# chain (left to right). DEFAULT OFF. Switching from a local privacy-aware
# provider (Ollama) to a cloud provider (OpenAI / Anthropic / ...) has both
# COST and DATA-EXFILTRATION implications. 
# Your requests may leave the tenant when switching from local to cloud.
#
# Limitations to be aware of:
#  - Disabled automatically when AGENT_MODE=react (RunnableWithFallbacks is
#    incompatible with bind_tools used by the react sub-agent).
#  - Mid-stream failures are NOT covered: with_fallbacks only catches errors
#    raised before the first token; a connection drop mid-generation will
#    surface as an exception to the caller.
#  - Each fallback activation is logged at WARNING level naming the destination
#    provider β€” check application logs for unexpected switches.
# LLM_FALLBACK_ENABLED=false
# LLM_FALLBACK_PROVIDERS=openai,anthropic    # Comma-separated provider chain