Spaces:
Sleeping
Sleeping
File size: 7,694 Bytes
d686bcf a493f04 d686bcf 3f19c23 d686bcf a493f04 d686bcf 4d2a2da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | # =============================================================================
# KU Doc Assistant β Environment Variables
# Copy this file to .env and adjust as needed.
# =============================================================================
#
# Two Docker usage modes β pick ONE and uncomment the matching block below.
#
# 1) LOCAL MODE β docker compose --profile local up --build
# Uses Ollama (in Docker) + local HuggingFace embeddings. No API keys.
#
# 2) CLOUD MODE β docker compose up --build
# Uses a cloud LLM (OpenAI / Azure / Anthropic / Google) + cloud or local
# embeddings. Requires the relevant API key(s) set below.
#
# Container-internal addresses (QDRANT_URL, OLLAMA_BASE_URL, API_BASE_URL)
# are overridden by docker-compose.yml `environment:`. Do NOT change them here
# for Docker β the localhost defaults below are for bare-metal development.
# =============================================================================
# *****************************************************************************
# EXAMPLE 1 β LOCAL MODE (Ollama + HuggingFace, no API keys)
# *****************************************************************************
LLM_PROVIDER=ollama
EMBEDDING_PROVIDER=local
OLLAMA_BASE_URL=http://localhost:11434
OLLAMA_MODEL=gemma4:e4b
LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
EVALUATOR_LLM_PROVIDER=groq
EVALUATOR_LLM_MODEL=llama-3.3-70b-versatile
# *****************************************************************************
# EXAMPLE 2 β CLOUD MODE (OpenAI) β uncomment & comment out Example 1 above
# *****************************************************************************
# LLM_PROVIDER=openai
# EMBEDDING_PROVIDER=openai
# OPENAI_API_KEY=sk-...
# OPENAI_BASE_URL= # Optional: custom endpoint for OpenAI-compatible APIs
# OPENAI_MODEL=gpt-4o-mini
# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
# *****************************************************************************
# EXAMPLE 2a β CLOUD MODE (SiliconFlow, OpenAI-compatible)
# *****************************************************************************
# LLM_PROVIDER=openai
# EMBEDDING_PROVIDER=local
# OPENAI_API_KEY=your-siliconflow-api-key
# OPENAI_BASE_URL=https://api.siliconflow.cn/v1
# OPENAI_MODEL=Qwen/Qwen2.5-72B-Instruct
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
# *****************************************************************************
# EXAMPLE 2b β CLOUD MODE (Azure OpenAI) β uncomment & comment out above
# *****************************************************************************
# LLM_PROVIDER=azure_openai
# EMBEDDING_PROVIDER=azure_openai
# AZURE_OPENAI_API_KEY=...
# AZURE_OPENAI_ENDPOINT=https://<resource>.openai.azure.com/
# AZURE_OPENAI_API_VERSION=2024-02-01
# AZURE_OPENAI_DEPLOYMENT=<deployment-name>
# AZURE_OPENAI_EMBEDDING_DEPLOYMENT=<embedding-deployment>
# *****************************************************************************
# EXAMPLE 2c β CLOUD MODE (Groq LLM + local embeddings, FREE)
# *****************************************************************************
# LLM_PROVIDER=groq
# EMBEDDING_PROVIDER=local
# GROQ_API_KEY=gsk_...
# GROQ_MODEL=qwen/qwen3-32b
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
# *****************************************************************************
# EXAMPLE 2d β CLOUD MODE (AWS Bedrock)
# *****************************************************************************
# LLM_PROVIDER=bedrock
# EMBEDDING_PROVIDER=bedrock
# AWS_REGION=eu-west-1
# AWS_BEDROCK_MODEL=anthropic.claude-sonnet-4-20250514-v1:0
# AWS_BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
# Note: Uses default AWS credential chain (env vars, ~/.aws/credentials, or IAM role)
# *****************************************************************************
# EXAMPLE 2e β CLOUD MODE (Anthropic LLM + local embeddings)
# *****************************************************************************
# LLM_PROVIDER=anthropic
# EMBEDDING_PROVIDER=local
# ANTHROPIC_API_KEY=sk-ant-...
# ANTHROPIC_MODEL=claude-sonnet-4-20250514
# LOCAL_EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
# *****************************************************************************
# EXAMPLE 2f β CLOUD MODE (Google GenAI)
# *****************************************************************************
# LLM_PROVIDER=google_genai
# EMBEDDING_PROVIDER=google_genai
# GOOGLE_API_KEY=...
# GOOGLE_LLM_MODEL=gemini-2.5-flash
# GOOGLE_EMBEDDING_MODEL=models/embedding-001
# =============================================================================
# Shared settings (apply to all modes)
# =============================================================================
# --- Vector Store / Search ---------------------------------------------------
QDRANT_PATH=./qdrant_data
QDRANT_URL= # Empty = local file mode; Docker overrides to http://qdrant:6333
COLLECTION_NAME=ku_documents
EMBEDDING_MODEL=paraphrase-multilingual-MiniLM-L12-v2
EMBEDDING_DIMENSION=384
GENERATION_MODEL=gemma4:e4b
RERANKER_MODEL=cross-encoder/mmarco-mMiniLMv2-L12-H384-v1
CHUNK_SIZE=512
CHUNK_OVERLAP=64
TOP_K=5
BM25_WEIGHT=0.3
DENSE_WEIGHT=0.7
LOG_LEVEL=INFO
# --- Query Translation -------------------------------------------------------
# Translate non-Danish queries to Danish before retrieval (BM25 + vector search).
# Default: true when LLM_PROVIDER=ollama, false for cloud providers.
# TRANSLATE_QUERY=true
# --- RAGAS Evaluation Judge --------------------------------------------------
# Use a strong, independent judge LLM for RAGAS scoring. When generation runs
# on a small local model, a stronger judge gives substantially less noisy
# scores. Leave EVALUATOR_LLM_PROVIDER empty to reuse the generation LLM.
#
# Example: generation = local Ollama (gemma), judge = Qwen3-32B via Groq
# EVALUATOR_LLM_PROVIDER=groq
# EVALUATOR_LLM_MODEL=qwen/qwen3-32b # optional; defaults to GROQ_MODEL
# --- Inter-service Communication (bare-metal defaults) -----------------------
API_BASE_URL=http://localhost:8000 # Docker overrides to http://api:8000
# --- Token Budget (measure-only) ---------------------------------------------
# When true, the routers log estimated prompt token sizes at the three known
# generation points (generate_answer, planner, synthesizer). No truncation is
# applied β this is purely observability. Counts use tiktoken cl100k as a
# baseline with a 1.5x safety factor for non-OpenAI multilingual tokenizers.
# TOKEN_BUDGET_ENABLED=false
# --- LLM Provider Fallback ---------------------------------------------------
# When enabled, the primary LLM is wrapped with LangChain with_fallbacks so
# requests that fail on the primary are retried against each provider in the
# chain (left to right). DEFAULT OFF. Switching from a local privacy-aware
# provider (Ollama) to a cloud provider (OpenAI / Anthropic / ...) has both
# COST and DATA-EXFILTRATION implications.
# Your requests may leave the tenant when switching from local to cloud.
#
# Limitations to be aware of:
# - Disabled automatically when AGENT_MODE=react (RunnableWithFallbacks is
# incompatible with bind_tools used by the react sub-agent).
# - Mid-stream failures are NOT covered: with_fallbacks only catches errors
# raised before the first token; a connection drop mid-generation will
# surface as an exception to the caller.
# - Each fallback activation is logged at WARNING level naming the destination
# provider β check application logs for unexpected switches.
# LLM_FALLBACK_ENABLED=false
# LLM_FALLBACK_PROVIDERS=openai,anthropic # Comma-separated provider chain
|