Spaces:
Running
Running
| from functools import lru_cache | |
| from typing import Literal, Optional | |
| from pydantic_settings import BaseSettings, SettingsConfigDict | |
| class Settings(BaseSettings): | |
| # LLM | |
| LLM_PROVIDER: Literal["groq", "ollama"] | |
| GROQ_API_KEY: Optional[str] = None | |
| OLLAMA_BASE_URL: Optional[str] = None | |
| OLLAMA_MODEL: Optional[str] = None | |
| GROQ_MODEL_DEFAULT: str = "llama-3.1-8b-instant" | |
| GROQ_MODEL_LARGE: str = "llama-3.3-70b-versatile" | |
| GROQ_TRANSCRIBE_MODEL: str = "whisper-large-v3-turbo" | |
| # Vector | |
| QDRANT_URL: str | |
| QDRANT_API_KEY: Optional[str] = None | |
| QDRANT_COLLECTION: str = "knowledge_base" | |
| # Keepalive ping interval to touch Qdrant regularly and avoid idle expiry. | |
| # Default is 6 days (< 1 week) so the database is contacted at least weekly. | |
| QDRANT_KEEPALIVE_SECONDS: int = 518400 | |
| # In-memory semantic cache | |
| # Replaces Redis. No external service required. | |
| SEMANTIC_CACHE_SIZE: int = 512 | |
| SEMANTIC_CACHE_TTL_SECONDS: int = 3600 | |
| SEMANTIC_CACHE_SIMILARITY_THRESHOLD: float = 0.92 | |
| # Security | |
| ALLOWED_ORIGIN: str = "*" | |
| RATE_LIMIT_PER_MINUTE: int = 20 | |
| JWT_SECRET: Optional[str] = None | |
| JWT_ALGORITHM: str = "HS256" | |
| # Separate token for admin operations (DB export for retraining workflow). | |
| # Set to any strong random string; share with ADMIN_TOKEN GitHub Actions secret. | |
| ADMIN_TOKEN: Optional[str] = None | |
| # MLOps (optional — only active when DAGSHUB_TOKEN is set) | |
| DAGSHUB_TOKEN: Optional[str] = None | |
| DAGSHUB_REPO: str = "1337Xcode/personabot" | |
| EVAL_ENABLED: bool = True | |
| # App | |
| ENVIRONMENT: Literal["local", "staging", "prod", "test"] | |
| LOG_LEVEL: str = "INFO" | |
| # HF Spaces persistent volume mounts at /data. Local dev uses a relative path. | |
| DB_PATH: str = "sqlite.db" | |
| # Gemini fast-path — live query traffic only. | |
| # GEMINI_CONTEXT_PATH points to the manually maintained context file. | |
| # Edit backend/app/services/gemini_context.toon to update fast-path context. | |
| GEMINI_API_KEY: Optional[str] = None | |
| GEMINI_MODEL: str = "gemini-2.5-flash-lite" | |
| GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon" | |
| # Durable GitHub interaction log — survives HF Space restarts. | |
| # PERSONABOT_WRITE_TOKEN: fine-grained PAT with read+write Contents access | |
| # on the PersonaBot repo. When set, every interaction is appended to | |
| # data/interactions.jsonl in the repo so training signals persist. | |
| # Leave unset in local dev (interactions stay in SQLite only). | |
| PERSONABOT_WRITE_TOKEN: Optional[str] = None | |
| PERSONABOT_REPO: str = "1337Xcode/PersonaBot" | |
| # HuggingFace Space model servers. | |
| # In local env, embedder/reranker run in-process (these URLs are ignored). | |
| # In prod, the API Space calls the HF embedder/reranker Spaces via HTTP. | |
| EMBEDDER_URL: str = "http://localhost:7860" | |
| RERANKER_URL: str = "http://localhost:7861" | |
| TTS_SPACE_URL: str = "http://localhost:7862" | |
| # Speech-to-text upload constraints | |
| TRANSCRIBE_MAX_UPLOAD_BYTES: int = 2 * 1024 * 1024 | |
| TRANSCRIBE_TIMEOUT_SECONDS: float = 25.0 | |
| model_config = SettingsConfigDict(env_file=".env", extra="ignore") | |
| def get_settings() -> Settings: | |
| return Settings() | |