Spaces:

1337XCode
/

personabot-api

Running

GitHub Actions

Deploy a45bfc7

815b978 6 days ago

3.25 kB

	from functools import lru_cache
	from typing import Literal, Optional

	from pydantic_settings import BaseSettings, SettingsConfigDict


	class Settings(BaseSettings):
	# LLM
	LLM_PROVIDER: Literal["groq", "ollama"]
	GROQ_API_KEY: Optional[str] = None
	OLLAMA_BASE_URL: Optional[str] = None
	OLLAMA_MODEL: Optional[str] = None
	GROQ_MODEL_DEFAULT: str = "llama-3.1-8b-instant"
	GROQ_MODEL_LARGE: str = "llama-3.3-70b-versatile"
	GROQ_TRANSCRIBE_MODEL: str = "whisper-large-v3-turbo"

	# Vector
	QDRANT_URL: str
	QDRANT_API_KEY: Optional[str] = None
	QDRANT_COLLECTION: str = "knowledge_base"
	# Keepalive ping interval to touch Qdrant regularly and avoid idle expiry.
	# Default is 6 days (< 1 week) so the database is contacted at least weekly.
	QDRANT_KEEPALIVE_SECONDS: int = 518400

	# In-memory semantic cache
	# Replaces Redis. No external service required.
	SEMANTIC_CACHE_SIZE: int = 512
	SEMANTIC_CACHE_TTL_SECONDS: int = 3600
	SEMANTIC_CACHE_SIMILARITY_THRESHOLD: float = 0.92

	# Security
	ALLOWED_ORIGIN: str = "*"
	RATE_LIMIT_PER_MINUTE: int = 20
	JWT_SECRET: Optional[str] = None
	JWT_ALGORITHM: str = "HS256"
	# Separate token for admin operations (DB export for retraining workflow).
	# Set to any strong random string; share with ADMIN_TOKEN GitHub Actions secret.
	ADMIN_TOKEN: Optional[str] = None

	# MLOps (optional — only active when DAGSHUB_TOKEN is set)
	DAGSHUB_TOKEN: Optional[str] = None
	DAGSHUB_REPO: str = "1337Xcode/personabot"
	EVAL_ENABLED: bool = True

	# App
	ENVIRONMENT: Literal["local", "staging", "prod", "test"]
	LOG_LEVEL: str = "INFO"
	# HF Spaces persistent volume mounts at /data. Local dev uses a relative path.
	DB_PATH: str = "sqlite.db"

	# Gemini fast-path — live query traffic only.
	# GEMINI_CONTEXT_PATH points to the manually maintained context file.
	# Edit backend/app/services/gemini_context.toon to update fast-path context.
	GEMINI_API_KEY: Optional[str] = None
	GEMINI_MODEL: str = "gemini-2.5-flash-lite"
	GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon"

	# Durable GitHub interaction log — survives HF Space restarts.
	# PERSONABOT_WRITE_TOKEN: fine-grained PAT with read+write Contents access
	# on the PersonaBot repo. When set, every interaction is appended to
	# data/interactions.jsonl in the repo so training signals persist.
	# Leave unset in local dev (interactions stay in SQLite only).
	PERSONABOT_WRITE_TOKEN: Optional[str] = None
	PERSONABOT_REPO: str = "1337Xcode/PersonaBot"

	# HuggingFace Space model servers.
	# In local env, embedder/reranker run in-process (these URLs are ignored).
	# In prod, the API Space calls the HF embedder/reranker Spaces via HTTP.
	EMBEDDER_URL: str = "http://localhost:7860"
	RERANKER_URL: str = "http://localhost:7861"
	TTS_SPACE_URL: str = "http://localhost:7862"

	# Speech-to-text upload constraints
	TRANSCRIBE_MAX_UPLOAD_BYTES: int = 2 * 1024 * 1024
	TRANSCRIBE_TIMEOUT_SECONDS: float = 25.0

	model_config = SettingsConfigDict(env_file=".env", extra="ignore")


	@lru_cache
	def get_settings() -> Settings:
	return Settings()