# ============================================================================= # KU Doc Assistant — Environment Variables # Copy this file to .env and adjust as needed. # ============================================================================= # # Two Docker usage modes — pick ONE and uncomment the matching block below. # # 1) LOCAL MODE — docker compose --profile local up --build # Uses Ollama (in Docker) + local HuggingFace embeddings. No API keys. # # 2) CLOUD MODE — docker compose up --build # Uses a cloud LLM (OpenAI / Azure / Anthropic / Google) + cloud or local # embeddings. Requires the relevant API key(s) set below. # # Container-internal addresses (QDRANT_URL, OLLAMA_BASE_URL, API_BASE_URL) # are overridden by docker-compose.yml `environment:`. Do NOT change them here # for Docker — the localhost defaults below are for bare-metal development. # ============================================================================= # ***************************************************************************** # EXAMPLE 1 — LOCAL MODE (Ollama + HuggingFace, no API keys) # ***************************************************************************** LLM_PROVIDER=ollama EMBEDDING_PROVIDER=local OLLAMA_BASE_URL=http://localhost:11434 OLLAMA_MODEL=gemma4:e4b LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2 EVALUATOR_LLM_PROVIDER=groq EVALUATOR_LLM_MODEL=llama-3.3-70b-versatile # ***************************************************************************** # EXAMPLE 2 — CLOUD MODE (OpenAI) — uncomment & comment out Example 1 above # ***************************************************************************** # LLM_PROVIDER=openai # EMBEDDING_PROVIDER=openai # OPENAI_API_KEY=sk-... # OPENAI_BASE_URL= # Optional: custom endpoint for OpenAI-compatible APIs # OPENAI_MODEL=gpt-4o-mini # OPENAI_EMBEDDING_MODEL=text-embedding-3-small # ***************************************************************************** # EXAMPLE 2a — CLOUD MODE (SiliconFlow, OpenAI-compatible) # ***************************************************************************** # LLM_PROVIDER=openai # EMBEDDING_PROVIDER=local # OPENAI_API_KEY=your-siliconflow-api-key # OPENAI_BASE_URL=https://api.siliconflow.cn/v1 # OPENAI_MODEL=Qwen/Qwen2.5-72B-Instruct # LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2 # ***************************************************************************** # EXAMPLE 2b — CLOUD MODE (Azure OpenAI) — uncomment & comment out above # ***************************************************************************** # LLM_PROVIDER=azure_openai # EMBEDDING_PROVIDER=azure_openai # AZURE_OPENAI_API_KEY=... # AZURE_OPENAI_ENDPOINT=https://.openai.azure.com/ # AZURE_OPENAI_API_VERSION=2024-02-01 # AZURE_OPENAI_DEPLOYMENT= # AZURE_OPENAI_EMBEDDING_DEPLOYMENT= # ***************************************************************************** # EXAMPLE 2c — CLOUD MODE (Groq LLM + local embeddings, FREE) # ***************************************************************************** # LLM_PROVIDER=groq # EMBEDDING_PROVIDER=local # GROQ_API_KEY=gsk_... # GROQ_MODEL=qwen/qwen3-32b # LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2 # ***************************************************************************** # EXAMPLE 2d — CLOUD MODE (AWS Bedrock) # ***************************************************************************** # LLM_PROVIDER=bedrock # EMBEDDING_PROVIDER=bedrock # AWS_REGION=eu-west-1 # AWS_BEDROCK_MODEL=anthropic.claude-sonnet-4-20250514-v1:0 # AWS_BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0 # Note: Uses default AWS credential chain (env vars, ~/.aws/credentials, or IAM role) # ***************************************************************************** # EXAMPLE 2e — CLOUD MODE (Anthropic LLM + local embeddings) # ***************************************************************************** # LLM_PROVIDER=anthropic # EMBEDDING_PROVIDER=local # ANTHROPIC_API_KEY=sk-ant-... # ANTHROPIC_MODEL=claude-sonnet-4-20250514 # LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2 # ***************************************************************************** # EXAMPLE 2f — CLOUD MODE (Google GenAI) # ***************************************************************************** # LLM_PROVIDER=google_genai # EMBEDDING_PROVIDER=google_genai # GOOGLE_API_KEY=... # GOOGLE_LLM_MODEL=gemini-2.5-flash # GOOGLE_EMBEDDING_MODEL=models/embedding-001 # ============================================================================= # Shared settings (apply to all modes) # ============================================================================= # --- Vector Store / Search --------------------------------------------------- QDRANT_PATH=./qdrant_data QDRANT_URL= # Empty = local file mode; Docker overrides to http://qdrant:6333 COLLECTION_NAME=ku_documents EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2 EMBEDDING_DIMENSION=384 GENERATION_MODEL=gemma4:e4b RERANKER_MODEL=cross-encoder/mmarco-mMiniLMv2-L12-H384-v1 CHUNK_SIZE=512 CHUNK_OVERLAP=64 TOP_K=5 BM25_WEIGHT=0.3 DENSE_WEIGHT=0.7 LOG_LEVEL=INFO # --- Query Translation ------------------------------------------------------- # Translate non-Danish queries to Danish before retrieval (BM25 + vector search). # Default: true when LLM_PROVIDER=ollama, false for cloud providers. # TRANSLATE_QUERY=true # --- RAGAS Evaluation Judge -------------------------------------------------- # Use a strong, independent judge LLM for RAGAS scoring. When generation runs # on a small local model, a stronger judge gives substantially less noisy # scores. Leave EVALUATOR_LLM_PROVIDER empty to reuse the generation LLM. # # Example: generation = local Ollama (gemma), judge = Qwen3-32B via Groq # EVALUATOR_LLM_PROVIDER=groq # EVALUATOR_LLM_MODEL=qwen/qwen3-32b # optional; defaults to GROQ_MODEL # --- Inter-service Communication (bare-metal defaults) ----------------------- API_BASE_URL=http://localhost:8000 # Docker overrides to http://api:8000 # --- Token Budget (measure-only) --------------------------------------------- # When true, the routers log estimated prompt token sizes at the three known # generation points (generate_answer, planner, synthesizer). No truncation is # applied — this is purely observability. Counts use tiktoken cl100k as a # baseline with a 1.5x safety factor for non-OpenAI multilingual tokenizers. # TOKEN_BUDGET_ENABLED=false # --- LLM Provider Fallback --------------------------------------------------- # When enabled, the primary LLM is wrapped with LangChain with_fallbacks so # requests that fail on the primary are retried against each provider in the # chain (left to right). DEFAULT OFF. Switching from a local privacy-aware # provider (Ollama) to a cloud provider (OpenAI / Anthropic / ...) has both # COST and DATA-EXFILTRATION implications. # Your requests may leave the tenant when switching from local to cloud. # # Limitations to be aware of: # - Disabled automatically when AGENT_MODE=react (RunnableWithFallbacks is # incompatible with bind_tools used by the react sub-agent). # - Mid-stream failures are NOT covered: with_fallbacks only catches errors # raised before the first token; a connection drop mid-generation will # surface as an exception to the caller. # - Each fallback activation is logged at WARNING level naming the destination # provider — check application logs for unexpected switches. # LLM_FALLBACK_ENABLED=false # LLM_FALLBACK_PROVIDERS=openai,anthropic # Comma-separated provider chain