Spaces:
Running
Running
File size: 3,247 Bytes
bbe01fe 815b978 bbe01fe 1d47e3c bbe01fe 8fdc5ad 8c8aea8 65543f1 8c8aea8 3d134a6 bbe01fe 815b978 bbe01fe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | from functools import lru_cache
from typing import Literal, Optional
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
# LLM
LLM_PROVIDER: Literal["groq", "ollama"]
GROQ_API_KEY: Optional[str] = None
OLLAMA_BASE_URL: Optional[str] = None
OLLAMA_MODEL: Optional[str] = None
GROQ_MODEL_DEFAULT: str = "llama-3.1-8b-instant"
GROQ_MODEL_LARGE: str = "llama-3.3-70b-versatile"
GROQ_TRANSCRIBE_MODEL: str = "whisper-large-v3-turbo"
# Vector
QDRANT_URL: str
QDRANT_API_KEY: Optional[str] = None
QDRANT_COLLECTION: str = "knowledge_base"
# Keepalive ping interval to touch Qdrant regularly and avoid idle expiry.
# Default is 6 days (< 1 week) so the database is contacted at least weekly.
QDRANT_KEEPALIVE_SECONDS: int = 518400
# In-memory semantic cache
# Replaces Redis. No external service required.
SEMANTIC_CACHE_SIZE: int = 512
SEMANTIC_CACHE_TTL_SECONDS: int = 3600
SEMANTIC_CACHE_SIMILARITY_THRESHOLD: float = 0.92
# Security
ALLOWED_ORIGIN: str = "*"
RATE_LIMIT_PER_MINUTE: int = 20
JWT_SECRET: Optional[str] = None
JWT_ALGORITHM: str = "HS256"
# Separate token for admin operations (DB export for retraining workflow).
# Set to any strong random string; share with ADMIN_TOKEN GitHub Actions secret.
ADMIN_TOKEN: Optional[str] = None
# MLOps (optional — only active when DAGSHUB_TOKEN is set)
DAGSHUB_TOKEN: Optional[str] = None
DAGSHUB_REPO: str = "1337Xcode/personabot"
EVAL_ENABLED: bool = True
# App
ENVIRONMENT: Literal["local", "staging", "prod", "test"]
LOG_LEVEL: str = "INFO"
# HF Spaces persistent volume mounts at /data. Local dev uses a relative path.
DB_PATH: str = "sqlite.db"
# Gemini fast-path — live query traffic only.
# GEMINI_CONTEXT_PATH points to the manually maintained context file.
# Edit backend/app/services/gemini_context.toon to update fast-path context.
GEMINI_API_KEY: Optional[str] = None
GEMINI_MODEL: str = "gemini-2.5-flash-lite"
GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon"
# Durable GitHub interaction log — survives HF Space restarts.
# PERSONABOT_WRITE_TOKEN: fine-grained PAT with read+write Contents access
# on the PersonaBot repo. When set, every interaction is appended to
# data/interactions.jsonl in the repo so training signals persist.
# Leave unset in local dev (interactions stay in SQLite only).
PERSONABOT_WRITE_TOKEN: Optional[str] = None
PERSONABOT_REPO: str = "1337Xcode/PersonaBot"
# HuggingFace Space model servers.
# In local env, embedder/reranker run in-process (these URLs are ignored).
# In prod, the API Space calls the HF embedder/reranker Spaces via HTTP.
EMBEDDER_URL: str = "http://localhost:7860"
RERANKER_URL: str = "http://localhost:7861"
TTS_SPACE_URL: str = "http://localhost:7862"
# Speech-to-text upload constraints
TRANSCRIBE_MAX_UPLOAD_BYTES: int = 2 * 1024 * 1024
TRANSCRIBE_TIMEOUT_SECONDS: float = 25.0
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
@lru_cache
def get_settings() -> Settings:
return Settings()
|