#  Document AI Analyst — Environment Configuration
#  Copy this file to backend/.env and fill in your values:
#    cp .env.example backend/.env


# ── Application Config ──────────────────────────────────────────────

# Secret key for signing JWT tokens and Flask sessions.
# Generate one: python -c "import secrets; print(secrets.token_urlsafe(32))"
# Required
SECRET_KEY=change-me-in-production

# ── Environment & CORS ──────────────────────────────

# Runtime environment. Set to "production" in production.
# In production, ALLOWED_ORIGINS must be set explicitly (CORS will reject all others).
# Optional — defaults to "development"
ENVIRONMENT=development

# Debug mode. Enables detailed error pages and auto-reload.
# Do NOT enable in production.
# Optional — defaults to False
# DEBUG=False

# Comma-separated list of allowed CORS origins.
# Only used when ENVIRONMENT=production. When empty or during development, all origins are allowed.
# Optional — defaults to "http://localhost:3000,http://localhost:7860"
ALLOWED_ORIGINS=http://localhost:3000,http://localhost:7860

# ── Database ─────────────────────────────────────────────────

# SQLAlchemy database connection string.
# Default: SQLite stored at ./data/app.db
# For Postgres: postgresql+asyncpg://user:pass@host:5432/dbname
# Optional — defaults to sqlite:///./data/app.db
# DATABASE_URL=sqlite:///./data/app.db

# ── Authentication ──────────────────────────────────────────

# JWT signing algorithm. Leave as default unless you know what you're doing.
# Optional — defaults to "HS256"
# JWT_ALGORITHM=HS256

# JWT token expiry in hours. After this period, users must re-login.
# Optional — defaults to 72
# JWT_EXPIRY_HOURS=72

# ── File Upload ─────────────────────────────────────────────

# Directory where uploaded documents (PDFs, DOCXs, etc.) are stored.
# Optional — defaults to "./data/uploads"
# UPLOAD_DIR=./data/uploads

# Maximum upload file size in megabytes.
# Optional — defaults to 50
# MAX_FILE_SIZE_MB=50

# Comma-separated list of allowed file extensions for upload.
# Optional — defaults to "pdf,docx,txt,md"
# ALLOWED_EXTENSIONS=pdf,docx,txt,md

# ── HuggingFace (Required for LLM inference) ────────────────

# HuggingFace API token. Used to call the Inference API for LLM responses.
# Get yours: https://huggingface.co/settings/tokens (free tier available)
# Required (app won't generate answers without it)
HF_TOKEN=your_huggingface_token_here

# ── LLM Configuration ───────────────────────────────────────

# HuggingFace model ID used for answer generation.
# Check available models: https://huggingface.co/models?inference=warm&sort=trending
# Optional — defaults to "mistralai/Mistral-7B-Instruct-v0.3"
# LLM_MODEL=mistralai/Mistral-7B-Instruct-v0.3

# Sampling temperature (0.0 = deterministic, 1.0 = very creative).
# Optional — defaults to 0.3
# LLM_TEMPERATURE=0.3

# Maximum number of tokens the LLM can generate per response.
# Optional — defaults to 1024
# LLM_MAX_NEW_TOKENS=1024

# ── Embeddings (Optional — defaults shown)──────────────────────────────────────────────

# SentenceTransformer model ID for generating document embeddings.
# Model is downloaded once and cached locally. No external API call.
# Optional — defaults to "sentence-transformers/all-MiniLM-L6-v2"
# EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2

# Dimension of the embedding vectors (must match the model output).
# Optional — defaults to 384
# EMBEDDING_DIMENSION=384

# ── RAG Config (Optional — defaults shown) ───────────

# ── ChromaDB (Vector Store) ─────────────────────────────────

# Directory where ChromaDB persists its vector index to disk.
# Optional — defaults to "./data/chroma_db"
# CHROMA_PERSIST_DIR=./data/chroma_db

# ── Document Chunking ───────────────────────────────────────

# Number of characters per document chunk.
# Larger chunks give more context; smaller chunks improve retrieval precision.
# Optional — defaults to 1000
# CHUNK_SIZE=1000

# Character overlap between consecutive chunks. Helps maintain context at boundaries.
# Optional — defaults to 200
# CHUNK_OVERLAP=200

# ── Retrieval ───────────────────────────────────────────────

# Number of candidate chunks retrieved from the vector store during semantic search.
# Optional — defaults to 10
# TOP_K_RETRIEVAL=10

# Number of top chunks passed to the LLM after cross-encoder reranking.
# Must be ≤ TOP_K_RETRIEVAL.
# Optional — defaults to 5
# TOP_K_RERANK=5

# Cross-encoder model used for reranking retrieved chunks by relevance.
# Optional — defaults to "cross-encoder/ms-marco-MiniLM-L-6-v2"
# RERANKER_MODEL=cross-encoder/ms-marco-MiniLM-L-6-v2

# ── (Legacy) Flask-Only Variables ───────────────────────────
# These are only used if you run the old Flask app (app.py) instead of FastAPI.
# They are ignored by the new FastAPI backend.

# MONGO_URI=mongodb://localhost:27017/pdf_assistant
# GOOGLE_CLIENT_ID=your_google_client_id
# GOOGLE_CLIENT_SECRET=your_google_client_secret