InsuranceBot / backend /config.py
rohitsar567's picture
Revert "Revert "feat(#52): PDF upload β†’ persistent JSON + marketplace card + restart-survival""
afdb7c7
Raw
History Blame Contribute Delete
5.3 kB
"""Centralized settings loaded from .env via pydantic-settings.
All API keys + tunables live here. Never read os.environ directly elsewhere.
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional
from dotenv import load_dotenv
ROOT = Path(__file__).resolve().parent.parent
load_dotenv(ROOT / ".env")
class Settings:
# Provider keys
SARVAM_API_KEY: str = os.environ.get("SARVAM_API_KEY", "")
VOYAGE_API_KEY: str = os.environ.get("VOYAGE_API_KEY", "")
# NVIDIA NIM β€” single provider hosting the reasoning stack (brain +
# judge; concrete model IDs are set on NVIDIA_NIM_*_MODEL below).
# Free tier: 40 req/min, no daily cap, no card.
NVIDIA_NIM_API_KEY: str = os.environ.get("NVIDIA_NIM_API_KEY", "")
# CROSS-PROVIDER FALLBACKS β€” last-resort entries appended to BRAIN_CHAIN +
# FAST_BRAIN_CHAIN + JUDGE_CHAIN so the brain + judge survive a full NIM
# outage (regional ingress brownout, full-pool 5xx, etc.). NIM remains
# the PRIMARY provider β€” these only get hit after every NIM candidate in
# the chain has failed. Both keys are optional: if unset the fallback is
# simply skipped by NimChainLLM and the chain continues.
# OPENROUTER_API_KEY β€” https://openrouter.ai/keys (free-tier OSS models)
# GROQ_API_KEY β€” https://console.groq.com/keys (LPU inference, lowest TTFT)
OPENROUTER_API_KEY: str = os.environ.get("OPENROUTER_API_KEY", "")
GROQ_API_KEY: str = os.environ.get("GROQ_API_KEY", "")
# Sarvam endpoints (voice STT/TTS + Indic translation only)
SARVAM_BASE_URL: str = "https://api.sarvam.ai"
SARVAM_STT_PATH: str = "/speech-to-text"
SARVAM_TTS_PATH: str = "/text-to-speech"
SARVAM_CHAT_PATH: str = "/v1/chat/completions"
# Sarvam model identifiers
SARVAM_STT_MODEL: str = "saarika:v2.5"
SARVAM_TTS_MODEL: str = "bulbul:v2"
SARVAM_TTS_SPEAKER: str = "anushka" # natural female advisor voice
SARVAM_LLM_MODEL: str = "sarvam-m" # Sarvam model for Indic translation
# Voyage β€” embeddings run on local BGE; this is kept for back-compat
# with existing extracted/ artifacts.
VOYAGE_MODEL: str = "voyage-3"
# NVIDIA NIM (single source of truth for brain + judge β€” tiered
# routing). Qwen 3-Next 80B + Mistral Large 3 are the production
# models on NIM free tier.
NVIDIA_NIM_BASE_URL: str = "https://integrate.api.nvidia.com/v1"
NVIDIA_NIM_BRAIN_MODEL: str = "qwen/qwen3-next-80b-a3b-instruct"
NVIDIA_NIM_FAST_BRAIN_MODEL: str = "qwen/qwen3-next-80b-a3b-instruct"
NVIDIA_NIM_JUDGE_MODEL: str = "mistralai/mistral-large-3-675b-instruct-2512"
# Storage paths
CORPUS_DIR: Path = ROOT / "rag" / "corpus"
EXTRACTED_DIR: Path = ROOT / "rag" / "extracted"
VECTORS_DIR: Path = ROOT / "rag" / "vectors"
STRUCTURED_DB: Path = ROOT / "rag" / "policies.duckdb"
# Single source of truth for the curated-facts directory. Resolves to
# <repo_root>/40-data; the directory name is intentionally kept
# (parallel to 70-docs/80-audit).
DATA_DIR: Path = ROOT / "40-data"
# #52 β€” PERSISTENT store for user-uploaded policy docs (raw PDF + the
# curated-facts JSON record we derive + the chunk payload to re-index).
#
# On the HF Space, rag/vectors lives on the EPHEMERAL container FS by
# design (KI-119 / entrypoint.sh) so every rebuild pulls a fresh Chroma
# snapshot β€” an uploaded doc indexed only there would vanish on restart.
# There IS a persistent `/data` disk on the Space; entrypoint.sh exports
# UPLOADED_DOCS_DIR=/data/uploaded_docs when /data is writable. We honour
# that env var here so persisted uploads survive a Space rebuild.
#
# Locally (no /data, env unset) it falls back under DATA_DIR so the exact
# same code path works without any HF-specific branching.
UPLOADED_DOCS_DIR: Path = Path(
os.environ.get("UPLOADED_DOCS_DIR", "")
or str(ROOT / "40-data" / "uploaded_docs")
)
# Tunables (overrideable via env vars so the hyperparameter sweep can iterate)
CHUNK_TOKENS: int = int(os.environ.get("CHUNK_TOKENS", "800"))
CHUNK_OVERLAP_TOKENS: int = int(os.environ.get("CHUNK_OVERLAP_TOKENS", "120"))
RAG_TOP_K: int = int(os.environ.get("RAG_TOP_K", "5"))
# Quarantine TTL β€” user-uploaded PDFs live in the SEPARATE
# `user_uploads_quarantine` Chroma collection. They are NOT durable
# corpus; a session's upload is auto-purged after this many seconds of
# no further uploads from that session, so the quarantine index can't
# grow unbounded and stale private docs don't linger. Default 24h.
# The periodic purge task sweeps every QUARANTINE_PURGE_INTERVAL_SEC.
QUARANTINE_TTL_SECONDS: int = int(
os.environ.get("QUARANTINE_TTL_SECONDS", str(24 * 3600))
)
QUARANTINE_PURGE_INTERVAL_SEC: int = int(
os.environ.get("QUARANTINE_PURGE_INTERVAL_SEC", str(30 * 60))
)
@classmethod
def validate(cls) -> list[str]:
"""Return list of missing required keys. Empty list = healthy."""
missing = []
for k in ("SARVAM_API_KEY", "NVIDIA_NIM_API_KEY"):
if not getattr(cls, k):
missing.append(k)
return missing
settings = Settings()