Spaces:
Sleeping
Sleeping
Revert "Revert "feat(#52): PDF upload β persistent JSON + marketplace card + restart-survival""
afdb7c7 | """Centralized settings loaded from .env via pydantic-settings. | |
| All API keys + tunables live here. Never read os.environ directly elsewhere. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from pathlib import Path | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| ROOT = Path(__file__).resolve().parent.parent | |
| load_dotenv(ROOT / ".env") | |
| class Settings: | |
| # Provider keys | |
| SARVAM_API_KEY: str = os.environ.get("SARVAM_API_KEY", "") | |
| VOYAGE_API_KEY: str = os.environ.get("VOYAGE_API_KEY", "") | |
| # NVIDIA NIM β single provider hosting the reasoning stack (brain + | |
| # judge; concrete model IDs are set on NVIDIA_NIM_*_MODEL below). | |
| # Free tier: 40 req/min, no daily cap, no card. | |
| NVIDIA_NIM_API_KEY: str = os.environ.get("NVIDIA_NIM_API_KEY", "") | |
| # CROSS-PROVIDER FALLBACKS β last-resort entries appended to BRAIN_CHAIN + | |
| # FAST_BRAIN_CHAIN + JUDGE_CHAIN so the brain + judge survive a full NIM | |
| # outage (regional ingress brownout, full-pool 5xx, etc.). NIM remains | |
| # the PRIMARY provider β these only get hit after every NIM candidate in | |
| # the chain has failed. Both keys are optional: if unset the fallback is | |
| # simply skipped by NimChainLLM and the chain continues. | |
| # OPENROUTER_API_KEY β https://openrouter.ai/keys (free-tier OSS models) | |
| # GROQ_API_KEY β https://console.groq.com/keys (LPU inference, lowest TTFT) | |
| OPENROUTER_API_KEY: str = os.environ.get("OPENROUTER_API_KEY", "") | |
| GROQ_API_KEY: str = os.environ.get("GROQ_API_KEY", "") | |
| # Sarvam endpoints (voice STT/TTS + Indic translation only) | |
| SARVAM_BASE_URL: str = "https://api.sarvam.ai" | |
| SARVAM_STT_PATH: str = "/speech-to-text" | |
| SARVAM_TTS_PATH: str = "/text-to-speech" | |
| SARVAM_CHAT_PATH: str = "/v1/chat/completions" | |
| # Sarvam model identifiers | |
| SARVAM_STT_MODEL: str = "saarika:v2.5" | |
| SARVAM_TTS_MODEL: str = "bulbul:v2" | |
| SARVAM_TTS_SPEAKER: str = "anushka" # natural female advisor voice | |
| SARVAM_LLM_MODEL: str = "sarvam-m" # Sarvam model for Indic translation | |
| # Voyage β embeddings run on local BGE; this is kept for back-compat | |
| # with existing extracted/ artifacts. | |
| VOYAGE_MODEL: str = "voyage-3" | |
| # NVIDIA NIM (single source of truth for brain + judge β tiered | |
| # routing). Qwen 3-Next 80B + Mistral Large 3 are the production | |
| # models on NIM free tier. | |
| NVIDIA_NIM_BASE_URL: str = "https://integrate.api.nvidia.com/v1" | |
| NVIDIA_NIM_BRAIN_MODEL: str = "qwen/qwen3-next-80b-a3b-instruct" | |
| NVIDIA_NIM_FAST_BRAIN_MODEL: str = "qwen/qwen3-next-80b-a3b-instruct" | |
| NVIDIA_NIM_JUDGE_MODEL: str = "mistralai/mistral-large-3-675b-instruct-2512" | |
| # Storage paths | |
| CORPUS_DIR: Path = ROOT / "rag" / "corpus" | |
| EXTRACTED_DIR: Path = ROOT / "rag" / "extracted" | |
| VECTORS_DIR: Path = ROOT / "rag" / "vectors" | |
| STRUCTURED_DB: Path = ROOT / "rag" / "policies.duckdb" | |
| # Single source of truth for the curated-facts directory. Resolves to | |
| # <repo_root>/40-data; the directory name is intentionally kept | |
| # (parallel to 70-docs/80-audit). | |
| DATA_DIR: Path = ROOT / "40-data" | |
| # #52 β PERSISTENT store for user-uploaded policy docs (raw PDF + the | |
| # curated-facts JSON record we derive + the chunk payload to re-index). | |
| # | |
| # On the HF Space, rag/vectors lives on the EPHEMERAL container FS by | |
| # design (KI-119 / entrypoint.sh) so every rebuild pulls a fresh Chroma | |
| # snapshot β an uploaded doc indexed only there would vanish on restart. | |
| # There IS a persistent `/data` disk on the Space; entrypoint.sh exports | |
| # UPLOADED_DOCS_DIR=/data/uploaded_docs when /data is writable. We honour | |
| # that env var here so persisted uploads survive a Space rebuild. | |
| # | |
| # Locally (no /data, env unset) it falls back under DATA_DIR so the exact | |
| # same code path works without any HF-specific branching. | |
| UPLOADED_DOCS_DIR: Path = Path( | |
| os.environ.get("UPLOADED_DOCS_DIR", "") | |
| or str(ROOT / "40-data" / "uploaded_docs") | |
| ) | |
| # Tunables (overrideable via env vars so the hyperparameter sweep can iterate) | |
| CHUNK_TOKENS: int = int(os.environ.get("CHUNK_TOKENS", "800")) | |
| CHUNK_OVERLAP_TOKENS: int = int(os.environ.get("CHUNK_OVERLAP_TOKENS", "120")) | |
| RAG_TOP_K: int = int(os.environ.get("RAG_TOP_K", "5")) | |
| # Quarantine TTL β user-uploaded PDFs live in the SEPARATE | |
| # `user_uploads_quarantine` Chroma collection. They are NOT durable | |
| # corpus; a session's upload is auto-purged after this many seconds of | |
| # no further uploads from that session, so the quarantine index can't | |
| # grow unbounded and stale private docs don't linger. Default 24h. | |
| # The periodic purge task sweeps every QUARANTINE_PURGE_INTERVAL_SEC. | |
| QUARANTINE_TTL_SECONDS: int = int( | |
| os.environ.get("QUARANTINE_TTL_SECONDS", str(24 * 3600)) | |
| ) | |
| QUARANTINE_PURGE_INTERVAL_SEC: int = int( | |
| os.environ.get("QUARANTINE_PURGE_INTERVAL_SEC", str(30 * 60)) | |
| ) | |
| def validate(cls) -> list[str]: | |
| """Return list of missing required keys. Empty list = healthy.""" | |
| missing = [] | |
| for k in ("SARVAM_API_KEY", "NVIDIA_NIM_API_KEY"): | |
| if not getattr(cls, k): | |
| missing.append(k) | |
| return missing | |
| settings = Settings() | |