Dokumentassistent / .env.example
XQ
Add LLM Provider Fallback
4d2a2da
raw
history blame
7.69 kB
# =============================================================================
# KU Doc Assistant β€” Environment Variables
# Copy this file to .env and adjust as needed.
# =============================================================================
#
# Two Docker usage modes β€” pick ONE and uncomment the matching block below.
#
# 1) LOCAL MODE β€” docker compose --profile local up --build
# Uses Ollama (in Docker) + local HuggingFace embeddings. No API keys.
#
# 2) CLOUD MODE β€” docker compose up --build
# Uses a cloud LLM (OpenAI / Azure / Anthropic / Google) + cloud or local
# embeddings. Requires the relevant API key(s) set below.
#
# Container-internal addresses (QDRANT_URL, OLLAMA_BASE_URL, API_BASE_URL)
# are overridden by docker-compose.yml `environment:`. Do NOT change them here
# for Docker β€” the localhost defaults below are for bare-metal development.
# =============================================================================
# *****************************************************************************
# EXAMPLE 1 β€” LOCAL MODE (Ollama + HuggingFace, no API keys)
# *****************************************************************************
LLM_PROVIDER=ollama
EMBEDDING_PROVIDER=local
OLLAMA_BASE_URL=http://localhost:11434
OLLAMA_MODEL=gemma4:e4b
LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
EVALUATOR_LLM_PROVIDER=groq
EVALUATOR_LLM_MODEL=llama-3.3-70b-versatile
# *****************************************************************************
# EXAMPLE 2 β€” CLOUD MODE (OpenAI) β€” uncomment & comment out Example 1 above
# *****************************************************************************
# LLM_PROVIDER=openai
# EMBEDDING_PROVIDER=openai
# OPENAI_API_KEY=sk-...
# OPENAI_BASE_URL= # Optional: custom endpoint for OpenAI-compatible APIs
# OPENAI_MODEL=gpt-4o-mini
# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
# *****************************************************************************
# EXAMPLE 2a β€” CLOUD MODE (SiliconFlow, OpenAI-compatible)
# *****************************************************************************
# LLM_PROVIDER=openai
# EMBEDDING_PROVIDER=local
# OPENAI_API_KEY=your-siliconflow-api-key
# OPENAI_BASE_URL=https://api.siliconflow.cn/v1
# OPENAI_MODEL=Qwen/Qwen2.5-72B-Instruct
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
# *****************************************************************************
# EXAMPLE 2b β€” CLOUD MODE (Azure OpenAI) β€” uncomment & comment out above
# *****************************************************************************
# LLM_PROVIDER=azure_openai
# EMBEDDING_PROVIDER=azure_openai
# AZURE_OPENAI_API_KEY=...
# AZURE_OPENAI_ENDPOINT=https://<resource>.openai.azure.com/
# AZURE_OPENAI_API_VERSION=2024-02-01
# AZURE_OPENAI_DEPLOYMENT=<deployment-name>
# AZURE_OPENAI_EMBEDDING_DEPLOYMENT=<embedding-deployment>
# *****************************************************************************
# EXAMPLE 2c β€” CLOUD MODE (Groq LLM + local embeddings, FREE)
# *****************************************************************************
# LLM_PROVIDER=groq
# EMBEDDING_PROVIDER=local
# GROQ_API_KEY=gsk_...
# GROQ_MODEL=qwen/qwen3-32b
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
# *****************************************************************************
# EXAMPLE 2d β€” CLOUD MODE (AWS Bedrock)
# *****************************************************************************
# LLM_PROVIDER=bedrock
# EMBEDDING_PROVIDER=bedrock
# AWS_REGION=eu-west-1
# AWS_BEDROCK_MODEL=anthropic.claude-sonnet-4-20250514-v1:0
# AWS_BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
# Note: Uses default AWS credential chain (env vars, ~/.aws/credentials, or IAM role)
# *****************************************************************************
# EXAMPLE 2e β€” CLOUD MODE (Anthropic LLM + local embeddings)
# *****************************************************************************
# LLM_PROVIDER=anthropic
# EMBEDDING_PROVIDER=local
# ANTHROPIC_API_KEY=sk-ant-...
# ANTHROPIC_MODEL=claude-sonnet-4-20250514
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
# *****************************************************************************
# EXAMPLE 2f β€” CLOUD MODE (Google GenAI)
# *****************************************************************************
# LLM_PROVIDER=google_genai
# EMBEDDING_PROVIDER=google_genai
# GOOGLE_API_KEY=...
# GOOGLE_LLM_MODEL=gemini-2.5-flash
# GOOGLE_EMBEDDING_MODEL=models/embedding-001
# =============================================================================
# Shared settings (apply to all modes)
# =============================================================================
# --- Vector Store / Search ---------------------------------------------------
QDRANT_PATH=./qdrant_data
QDRANT_URL= # Empty = local file mode; Docker overrides to http://qdrant:6333
COLLECTION_NAME=ku_documents
EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
EMBEDDING_DIMENSION=384
GENERATION_MODEL=gemma4:e4b
RERANKER_MODEL=cross-encoder/mmarco-mMiniLMv2-L12-H384-v1
CHUNK_SIZE=512
CHUNK_OVERLAP=64
TOP_K=5
BM25_WEIGHT=0.3
DENSE_WEIGHT=0.7
LOG_LEVEL=INFO
# --- Query Translation -------------------------------------------------------
# Translate non-Danish queries to Danish before retrieval (BM25 + vector search).
# Default: true when LLM_PROVIDER=ollama, false for cloud providers.
# TRANSLATE_QUERY=true
# --- RAGAS Evaluation Judge --------------------------------------------------
# Use a strong, independent judge LLM for RAGAS scoring. When generation runs
# on a small local model, a stronger judge gives substantially less noisy
# scores. Leave EVALUATOR_LLM_PROVIDER empty to reuse the generation LLM.
#
# Example: generation = local Ollama (gemma), judge = Qwen3-32B via Groq
# EVALUATOR_LLM_PROVIDER=groq
# EVALUATOR_LLM_MODEL=qwen/qwen3-32b # optional; defaults to GROQ_MODEL
# --- Inter-service Communication (bare-metal defaults) -----------------------
API_BASE_URL=http://localhost:8000 # Docker overrides to http://api:8000
# --- Token Budget (measure-only) ---------------------------------------------
# When true, the routers log estimated prompt token sizes at the three known
# generation points (generate_answer, planner, synthesizer). No truncation is
# applied β€” this is purely observability. Counts use tiktoken cl100k as a
# baseline with a 1.5x safety factor for non-OpenAI multilingual tokenizers.
# TOKEN_BUDGET_ENABLED=false
# --- LLM Provider Fallback ---------------------------------------------------
# When enabled, the primary LLM is wrapped with LangChain with_fallbacks so
# requests that fail on the primary are retried against each provider in the
# chain (left to right). DEFAULT OFF. Switching from a local privacy-aware
# provider (Ollama) to a cloud provider (OpenAI / Anthropic / ...) has both
# COST and DATA-EXFILTRATION implications.
# Your requests may leave the tenant when switching from local to cloud.
#
# Limitations to be aware of:
# - Disabled automatically when AGENT_MODE=react (RunnableWithFallbacks is
# incompatible with bind_tools used by the react sub-agent).
# - Mid-stream failures are NOT covered: with_fallbacks only catches errors
# raised before the first token; a connection drop mid-generation will
# surface as an exception to the caller.
# - Each fallback activation is logged at WARNING level naming the destination
# provider β€” check application logs for unexpected switches.
# LLM_FALLBACK_ENABLED=false
# LLM_FALLBACK_PROVIDERS=openai,anthropic # Comma-separated provider chain