from functools import lru_cache from typing import Literal, Optional from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): # LLM LLM_PROVIDER: Literal["groq", "ollama"] GROQ_API_KEY: Optional[str] = None OLLAMA_BASE_URL: Optional[str] = None OLLAMA_MODEL: Optional[str] = None GROQ_MODEL_DEFAULT: str = "llama-3.1-8b-instant" GROQ_MODEL_LARGE: str = "llama-3.3-70b-versatile" GROQ_TRANSCRIBE_MODEL: str = "whisper-large-v3-turbo" # Vector QDRANT_URL: str QDRANT_API_KEY: Optional[str] = None QDRANT_COLLECTION: str = "knowledge_base" # Keepalive ping interval to touch Qdrant regularly and avoid idle expiry. # Default is 6 days (< 1 week) so the database is contacted at least weekly. QDRANT_KEEPALIVE_SECONDS: int = 518400 # In-memory semantic cache # Replaces Redis. No external service required. SEMANTIC_CACHE_SIZE: int = 512 SEMANTIC_CACHE_TTL_SECONDS: int = 3600 SEMANTIC_CACHE_SIMILARITY_THRESHOLD: float = 0.92 # Security ALLOWED_ORIGIN: str = "*" RATE_LIMIT_PER_MINUTE: int = 20 JWT_SECRET: Optional[str] = None JWT_ALGORITHM: str = "HS256" # Separate token for admin operations (DB export for retraining workflow). # Set to any strong random string; share with ADMIN_TOKEN GitHub Actions secret. ADMIN_TOKEN: Optional[str] = None # MLOps (optional — only active when DAGSHUB_TOKEN is set) DAGSHUB_TOKEN: Optional[str] = None DAGSHUB_REPO: str = "1337Xcode/personabot" EVAL_ENABLED: bool = True # App ENVIRONMENT: Literal["local", "staging", "prod", "test"] LOG_LEVEL: str = "INFO" # HF Spaces persistent volume mounts at /data. Local dev uses a relative path. DB_PATH: str = "sqlite.db" # Gemini fast-path — live query traffic only. # GEMINI_CONTEXT_PATH points to the manually maintained context file. # Edit backend/app/services/gemini_context.toon to update fast-path context. GEMINI_API_KEY: Optional[str] = None GEMINI_MODEL: str = "gemini-2.5-flash-lite" GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon" # Durable GitHub interaction log — survives HF Space restarts. # PERSONABOT_WRITE_TOKEN: fine-grained PAT with read+write Contents access # on the PersonaBot repo. When set, every interaction is appended to # data/interactions.jsonl in the repo so training signals persist. # Leave unset in local dev (interactions stay in SQLite only). PERSONABOT_WRITE_TOKEN: Optional[str] = None PERSONABOT_REPO: str = "1337Xcode/PersonaBot" # HuggingFace Space model servers. # In local env, embedder/reranker run in-process (these URLs are ignored). # In prod, the API Space calls the HF embedder/reranker Spaces via HTTP. EMBEDDER_URL: str = "http://localhost:7860" RERANKER_URL: str = "http://localhost:7861" TTS_SPACE_URL: str = "http://localhost:7862" # Speech-to-text upload constraints TRANSCRIBE_MAX_UPLOAD_BYTES: int = 2 * 1024 * 1024 TRANSCRIBE_TIMEOUT_SECONDS: float = 25.0 model_config = SettingsConfigDict(env_file=".env", extra="ignore") @lru_cache def get_settings() -> Settings: return Settings()