File size: 3,247 Bytes
bbe01fe
 
 
 
 
 
 
 
 
 
 
 
 
 
815b978
bbe01fe
 
 
 
 
1d47e3c
 
 
bbe01fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fdc5ad
 
 
8c8aea8
65543f1
8c8aea8
 
3d134a6
 
 
 
 
 
 
 
bbe01fe
 
 
 
 
815b978
 
 
 
 
bbe01fe
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from functools import lru_cache
from typing import Literal, Optional

from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    # LLM
    LLM_PROVIDER: Literal["groq", "ollama"]
    GROQ_API_KEY: Optional[str] = None
    OLLAMA_BASE_URL: Optional[str] = None
    OLLAMA_MODEL: Optional[str] = None
    GROQ_MODEL_DEFAULT: str = "llama-3.1-8b-instant"
    GROQ_MODEL_LARGE: str = "llama-3.3-70b-versatile"
    GROQ_TRANSCRIBE_MODEL: str = "whisper-large-v3-turbo"

    # Vector
    QDRANT_URL: str
    QDRANT_API_KEY: Optional[str] = None
    QDRANT_COLLECTION: str = "knowledge_base"
    # Keepalive ping interval to touch Qdrant regularly and avoid idle expiry.
    # Default is 6 days (< 1 week) so the database is contacted at least weekly.
    QDRANT_KEEPALIVE_SECONDS: int = 518400

    # In-memory semantic cache
    # Replaces Redis. No external service required.
    SEMANTIC_CACHE_SIZE: int = 512
    SEMANTIC_CACHE_TTL_SECONDS: int = 3600
    SEMANTIC_CACHE_SIMILARITY_THRESHOLD: float = 0.92

    # Security
    ALLOWED_ORIGIN: str = "*"
    RATE_LIMIT_PER_MINUTE: int = 20
    JWT_SECRET: Optional[str] = None
    JWT_ALGORITHM: str = "HS256"
    # Separate token for admin operations (DB export for retraining workflow).
    # Set to any strong random string; share with ADMIN_TOKEN GitHub Actions secret.
    ADMIN_TOKEN: Optional[str] = None

    # MLOps (optional — only active when DAGSHUB_TOKEN is set)
    DAGSHUB_TOKEN: Optional[str] = None
    DAGSHUB_REPO: str = "1337Xcode/personabot"
    EVAL_ENABLED: bool = True

    # App
    ENVIRONMENT: Literal["local", "staging", "prod", "test"]
    LOG_LEVEL: str = "INFO"
    # HF Spaces persistent volume mounts at /data. Local dev uses a relative path.
    DB_PATH: str = "sqlite.db"

    # Gemini fast-path — live query traffic only.
    # GEMINI_CONTEXT_PATH points to the manually maintained context file.
    # Edit backend/app/services/gemini_context.toon to update fast-path context.
    GEMINI_API_KEY: Optional[str] = None
    GEMINI_MODEL: str = "gemini-2.5-flash-lite"
    GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon"

    # Durable GitHub interaction log — survives HF Space restarts.
    # PERSONABOT_WRITE_TOKEN: fine-grained PAT with read+write Contents access
    # on the PersonaBot repo.  When set, every interaction is appended to
    # data/interactions.jsonl in the repo so training signals persist.
    # Leave unset in local dev (interactions stay in SQLite only).
    PERSONABOT_WRITE_TOKEN: Optional[str] = None
    PERSONABOT_REPO: str = "1337Xcode/PersonaBot"

    # HuggingFace Space model servers.
    # In local env, embedder/reranker run in-process (these URLs are ignored).
    # In prod, the API Space calls the HF embedder/reranker Spaces via HTTP.
    EMBEDDER_URL: str = "http://localhost:7860"
    RERANKER_URL: str = "http://localhost:7861"
    TTS_SPACE_URL: str = "http://localhost:7862"

    # Speech-to-text upload constraints
    TRANSCRIBE_MAX_UPLOAD_BYTES: int = 2 * 1024 * 1024
    TRANSCRIBE_TIMEOUT_SECONDS: float = 25.0

    model_config = SettingsConfigDict(env_file=".env", extra="ignore")


@lru_cache
def get_settings() -> Settings:
    return Settings()