import json from pathlib import Path from typing import Optional from pydantic import Field, ValidationError from pydantic_settings import BaseSettings, SettingsConfigDict from dotenv import load_dotenv from src.core.logger import logger BASE_DIR = Path(__file__).parent.parent.parent ENV_FILE = BASE_DIR / ".env" load_dotenv(ENV_FILE, override=True) logger.info(f"Loaded environment from: {ENV_FILE}") SENSITIVE_KEY_MARKERS = ("key", "token", "secret", "password") def _is_sensitive_key(key: str) -> bool: key_lower = key.lower() return any(marker in key_lower for marker in SENSITIVE_KEY_MARKERS) def _redact_sensitive_value(value: object) -> str: if value is None: return "" if isinstance(value, str) and not value: return "" return "" def mask_sensitive_data(data: dict) -> dict: masked = {} for key, value in data.items(): if _is_sensitive_key(key): masked[key] = _redact_sensitive_value(value) continue if isinstance(value, dict): masked[key] = mask_sensitive_data(value) else: masked[key] = value return masked class CoreSettings(BaseSettings): model_config = SettingsConfigDict( env_file=str(ENV_FILE) if ENV_FILE.exists() else None, env_file_encoding="utf-8", case_sensitive=True, extra="ignore", protected_namespaces=(), ) class VoiceSettings(CoreSettings): POCKET_TTS_VOICE: str = Field( default="alba", description="Default voice (alba, marius, javert, jean, fantine, cosette, eponine, azelma) or path to audio file", ) POCKET_TTS_TEMPERATURE: float = Field( default=0.7, ge=0.0, le=2.0, description="Sampling temperature for generation", ) POCKET_TTS_LSD_DECODE_STEPS: int = Field( default=1, ge=1, description="LSD decoding steps (higher = better quality, slower)", ) # LiveKit Audio Input Settings LIVEKIT_SAMPLE_RATE: int = Field( default=24000, description="Audio input sample rate (Hz)", ) LIVEKIT_NUM_CHANNELS: int = Field( default=1, description="Number of audio input channels (1=mono)", ) LIVEKIT_FRAME_SIZE_MS: int = Field( default=20, ge=10, le=100, description="Audio frame size in milliseconds (smaller = faster VAD response)", ) LIVEKIT_PRE_CONNECT_AUDIO: bool = Field( default=True, description="Pre-connect audio before room join", ) LIVEKIT_PRE_CONNECT_TIMEOUT: float = Field( default=3.0, ge=1.0, le=10.0, description="Timeout for pre-connect audio (seconds)", ) # Voice Activity Detection Settings VAD_MIN_SPEECH_DURATION: float = Field( default=0.18, ge=0.1, le=1.0, description="Minimum speech duration (seconds) before VAD activation", ) VAD_MIN_SILENCE_DURATION: float = Field( default=0.30, ge=0.1, le=2.0, description="Minimum silence duration (seconds) before VAD deactivation", ) VAD_THRESHOLD: float = Field( default=0.6, ge=0.0, le=1.0, description="VAD activation threshold (higher = less sensitive, 0.5 is Silero default)", ) MIN_ENDPOINTING_DELAY: float = Field( default=0.15, ge=0.0, le=10.0, description="Minimum endpointing delay (seconds) before committing user turn", ) MAX_ENDPOINTING_DELAY: float = Field( default=1.0, ge=0.1, le=10.0, description="Maximum endpointing delay (seconds) when turn detector expects continuation", ) PREEMPTIVE_GENERATION: bool = Field( default=True, description="Enable speculative LLM/TTS generation before final turn commit", ) class STTSettings(CoreSettings): # Provider selection STT_PROVIDER: str = Field( default="moonshine", description="STT provider: 'nvidia' or 'moonshine'" ) # Moonshine STT settings MOONSHINE_MODEL_ID: str = Field( default="usefulsensors/moonshine-streaming-medium", description="Moonshine model size: tiny, base, small, or medium" ) MOONSHINE_LANGUAGE: str = Field( default="en", description="Language code for Moonshine STT" ) # NVIDIA STT settings NVIDIA_STT_API_KEY: Optional[str] = Field( default=None, description="NVIDIA API key for STT (falls back to NVIDIA_API_KEY if not set)" ) NVIDIA_STT_MODEL: str = Field( default="parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer", description="NVIDIA STT model ID" ) NVIDIA_STT_LANGUAGE_CODE: str = Field( default="en-US", description="Language code for NVIDIA STT" ) class LLMSettings(CoreSettings): # Provider selection LLM_PROVIDER: str = Field( default="huggingface", description="LLM provider: 'nvidia' or 'huggingface'" ) # NVIDIA settings (existing) NVIDIA_API_KEY: Optional[str] = Field(default=None) NVIDIA_MODEL: str = Field(default="qwen/qwen2.5-7b-instruct") # HuggingFace settings (new) HUGGINGFACE_MODEL_ID: str = Field( default="Qwen/Qwen2.5-3B-Instruct", description="HuggingFace model repository ID" ) HUGGINGFACE_DEVICE: Optional[str] = Field( default=None, description="Device for inference: 'cuda', 'cpu', or None for auto-detect" ) # Common LLM parameters LLM_TEMPERATURE: float = Field(default=0.7, ge=0.0, le=2.0) LLM_MAX_TOKENS: int = Field(default=1024, gt=0) LLM_CONN_TIMEOUT_SEC: float = Field( default=12.0, gt=0.0, le=120.0, description="LLM API timeout in seconds for one request attempt", ) LLM_CONN_MAX_RETRY: int = Field( default=1, ge=0, le=10, description="Maximum LLM retry attempts on transient failures", ) LLM_CONN_RETRY_INTERVAL_SEC: float = Field( default=1.0, ge=0.0, le=30.0, description="Delay in seconds between LLM retries", ) TURN_LLM_STALL_TIMEOUT_SEC: float = Field( default=8.0, gt=0.0, le=120.0, description="Warn when a finalized user turn does not reach LLM stage within this timeout", ) class LiveKitSettings(CoreSettings): LIVEKIT_URL: Optional[str] = Field(default=None) LIVEKIT_API_KEY: Optional[str] = Field(default=None) LIVEKIT_API_SECRET: Optional[str] = Field(default=None) LIVEKIT_AGENT_NAME: str = Field(default="open-voice-agent") LIVEKIT_NUM_IDLE_PROCESSES: int = Field(default=1, ge=0) LIVEKIT_JOB_MEMORY_WARN_MB: float = Field( default=6144, gt=0, description="Per-job memory warning threshold in MB", ) class LangfuseSettings(CoreSettings): LANGFUSE_ENABLED: bool = Field( default=False, description="Enable Langfuse tracing via OTEL exporter", ) LANGFUSE_PUBLIC_KEY: Optional[str] = Field(default=None) LANGFUSE_SECRET_KEY: Optional[str] = Field(default=None) LANGFUSE_ENVIRONMENT: str = Field(default="development") LANGFUSE_HOST: Optional[str] = Field( default=None, description="Langfuse host URL, e.g. https://cloud.langfuse.com", ) LANGFUSE_BASE_URL: Optional[str] = Field( default=None, description="Alternative to LANGFUSE_HOST", ) LANGFUSE_TRACE_FINALIZE_TIMEOUT_MS: float = Field( default=8000.0, ge=0.0, le=10000.0, description="Timeout to wait for assistant text before force-finalizing trace", ) LANGFUSE_MAX_PENDING_TRACE_TASKS: int = Field( default=200, ge=1, le=5000, description="Maximum queued background trace emission tasks", ) LANGFUSE_TRACE_FLUSH_TIMEOUT_MS: float = Field( default=1000.0, ge=0.0, le=10000.0, description="Best-effort tracer flush timeout in milliseconds", ) class Settings(CoreSettings): voice: VoiceSettings = Field(default_factory=VoiceSettings) stt: STTSettings = Field(default_factory=STTSettings) llm: LLMSettings = Field(default_factory=LLMSettings) livekit: LiveKitSettings = Field(default_factory=LiveKitSettings) langfuse: LangfuseSettings = Field(default_factory=LangfuseSettings) try: settings = Settings() settings_dict = settings.model_dump() masked_settings = mask_sensitive_data(settings_dict) logger.info(f"Settings loaded: {json.dumps(masked_settings, indent=2)}") except ValidationError as e: safe_errors = e.errors( include_url=False, include_context=False, include_input=False, ) logger.exception( "Error validating settings: %s", json.dumps(safe_errors), ) raise