open-voice-agent / src /core /settings.py
dvalle08's picture
Update deployment environment configuration: Replace hardcoded default environment in agent.py
3a97ef8
import json
from pathlib import Path
from typing import Optional
from pydantic import Field, ValidationError
from pydantic_settings import BaseSettings, SettingsConfigDict
from dotenv import load_dotenv
from src.core.logger import logger
BASE_DIR = Path(__file__).parent.parent.parent
ENV_FILE = BASE_DIR / ".env"
load_dotenv(ENV_FILE, override=True)
logger.info(f"Loaded environment from: {ENV_FILE}")
SENSITIVE_KEY_MARKERS = ("key", "token", "secret", "password")
def _is_sensitive_key(key: str) -> bool:
key_lower = key.lower()
return any(marker in key_lower for marker in SENSITIVE_KEY_MARKERS)
def _redact_sensitive_value(value: object) -> str:
if value is None:
return "<not set>"
if isinstance(value, str) and not value:
return "<not set>"
return "<redacted>"
def mask_sensitive_data(data: dict) -> dict:
masked = {}
for key, value in data.items():
if _is_sensitive_key(key):
masked[key] = _redact_sensitive_value(value)
continue
if isinstance(value, dict):
masked[key] = mask_sensitive_data(value)
else:
masked[key] = value
return masked
class CoreSettings(BaseSettings):
model_config = SettingsConfigDict(
env_file=str(ENV_FILE) if ENV_FILE.exists() else None,
env_file_encoding="utf-8",
case_sensitive=True,
extra="ignore",
protected_namespaces=(),
)
class VoiceSettings(CoreSettings):
POCKET_TTS_VOICE: str = Field(
default="alba",
description="Default voice (alba, marius, javert, jean, fantine, cosette, eponine, azelma) or path to audio file",
)
POCKET_TTS_TEMPERATURE: float = Field(
default=0.7,
ge=0.0,
le=2.0,
description="Sampling temperature for generation",
)
POCKET_TTS_LSD_DECODE_STEPS: int = Field(
default=1,
ge=1,
description="LSD decoding steps (higher = better quality, slower)",
)
# LiveKit Audio Input Settings
LIVEKIT_SAMPLE_RATE: int = Field(
default=24000,
description="Audio input sample rate (Hz)",
)
LIVEKIT_NUM_CHANNELS: int = Field(
default=1,
description="Number of audio input channels (1=mono)",
)
LIVEKIT_FRAME_SIZE_MS: int = Field(
default=20,
ge=10,
le=100,
description="Audio frame size in milliseconds (smaller = faster VAD response)",
)
LIVEKIT_PRE_CONNECT_AUDIO: bool = Field(
default=True,
description="Pre-connect audio before room join",
)
LIVEKIT_PRE_CONNECT_TIMEOUT: float = Field(
default=3.0,
ge=1.0,
le=10.0,
description="Timeout for pre-connect audio (seconds)",
)
# Voice Activity Detection Settings
VAD_MIN_SPEECH_DURATION: float = Field(
default=0.18,
ge=0.1,
le=1.0,
description="Minimum speech duration (seconds) before VAD activation",
)
VAD_MIN_SILENCE_DURATION: float = Field(
default=0.30,
ge=0.1,
le=2.0,
description="Minimum silence duration (seconds) before VAD deactivation",
)
VAD_THRESHOLD: float = Field(
default=0.6,
ge=0.0,
le=1.0,
description="VAD activation threshold (higher = less sensitive, 0.5 is Silero default)",
)
MIN_ENDPOINTING_DELAY: float = Field(
default=0.15,
ge=0.0,
le=10.0,
description="Minimum endpointing delay (seconds) before committing user turn",
)
MAX_ENDPOINTING_DELAY: float = Field(
default=1.0,
ge=0.1,
le=10.0,
description="Maximum endpointing delay (seconds) when turn detector expects continuation",
)
PREEMPTIVE_GENERATION: bool = Field(
default=True,
description="Enable speculative LLM/TTS generation before final turn commit",
)
class STTSettings(CoreSettings):
# Provider selection
STT_PROVIDER: str = Field(
default="moonshine",
description="STT provider: 'nvidia' or 'moonshine'"
)
# Moonshine STT settings
MOONSHINE_MODEL_ID: str = Field(
default="usefulsensors/moonshine-streaming-medium",
description="Moonshine model size: tiny, base, small, or medium"
)
MOONSHINE_LANGUAGE: str = Field(
default="en",
description="Language code for Moonshine STT"
)
# NVIDIA STT settings
NVIDIA_STT_API_KEY: Optional[str] = Field(
default=None,
description="NVIDIA API key for STT (falls back to NVIDIA_API_KEY if not set)"
)
NVIDIA_STT_MODEL: str = Field(
default="parakeet-1.1b-en-US-asr-streaming-silero-vad-sortformer",
description="NVIDIA STT model ID"
)
NVIDIA_STT_LANGUAGE_CODE: str = Field(
default="en-US",
description="Language code for NVIDIA STT"
)
class LLMSettings(CoreSettings):
# Provider selection
LLM_PROVIDER: str = Field(
default="huggingface",
description="LLM provider: 'nvidia' or 'huggingface'"
)
# NVIDIA settings (existing)
NVIDIA_API_KEY: Optional[str] = Field(default=None)
NVIDIA_MODEL: str = Field(default="qwen/qwen2.5-7b-instruct")
# HuggingFace settings (new)
HUGGINGFACE_MODEL_ID: str = Field(
default="Qwen/Qwen2.5-3B-Instruct",
description="HuggingFace model repository ID"
)
HUGGINGFACE_DEVICE: Optional[str] = Field(
default=None,
description="Device for inference: 'cuda', 'cpu', or None for auto-detect"
)
# Common LLM parameters
LLM_TEMPERATURE: float = Field(default=0.7, ge=0.0, le=2.0)
LLM_MAX_TOKENS: int = Field(default=1024, gt=0)
LLM_CONN_TIMEOUT_SEC: float = Field(
default=12.0,
gt=0.0,
le=120.0,
description="LLM API timeout in seconds for one request attempt",
)
LLM_CONN_MAX_RETRY: int = Field(
default=1,
ge=0,
le=10,
description="Maximum LLM retry attempts on transient failures",
)
LLM_CONN_RETRY_INTERVAL_SEC: float = Field(
default=1.0,
ge=0.0,
le=30.0,
description="Delay in seconds between LLM retries",
)
TURN_LLM_STALL_TIMEOUT_SEC: float = Field(
default=8.0,
gt=0.0,
le=120.0,
description="Warn when a finalized user turn does not reach LLM stage within this timeout",
)
class LiveKitSettings(CoreSettings):
LIVEKIT_URL: Optional[str] = Field(default=None)
LIVEKIT_API_KEY: Optional[str] = Field(default=None)
LIVEKIT_API_SECRET: Optional[str] = Field(default=None)
LIVEKIT_AGENT_NAME: str = Field(default="open-voice-agent")
LIVEKIT_NUM_IDLE_PROCESSES: int = Field(default=1, ge=0)
LIVEKIT_JOB_MEMORY_WARN_MB: float = Field(
default=6144,
gt=0,
description="Per-job memory warning threshold in MB",
)
class LangfuseSettings(CoreSettings):
LANGFUSE_ENABLED: bool = Field(
default=False,
description="Enable Langfuse tracing via OTEL exporter",
)
LANGFUSE_PUBLIC_KEY: Optional[str] = Field(default=None)
LANGFUSE_SECRET_KEY: Optional[str] = Field(default=None)
LANGFUSE_ENVIRONMENT: str = Field(default="development")
LANGFUSE_HOST: Optional[str] = Field(
default=None,
description="Langfuse host URL, e.g. https://cloud.langfuse.com",
)
LANGFUSE_BASE_URL: Optional[str] = Field(
default=None,
description="Alternative to LANGFUSE_HOST",
)
LANGFUSE_TRACE_FINALIZE_TIMEOUT_MS: float = Field(
default=8000.0,
ge=0.0,
le=10000.0,
description="Timeout to wait for assistant text before force-finalizing trace",
)
LANGFUSE_MAX_PENDING_TRACE_TASKS: int = Field(
default=200,
ge=1,
le=5000,
description="Maximum queued background trace emission tasks",
)
LANGFUSE_TRACE_FLUSH_TIMEOUT_MS: float = Field(
default=1000.0,
ge=0.0,
le=10000.0,
description="Best-effort tracer flush timeout in milliseconds",
)
class Settings(CoreSettings):
voice: VoiceSettings = Field(default_factory=VoiceSettings)
stt: STTSettings = Field(default_factory=STTSettings)
llm: LLMSettings = Field(default_factory=LLMSettings)
livekit: LiveKitSettings = Field(default_factory=LiveKitSettings)
langfuse: LangfuseSettings = Field(default_factory=LangfuseSettings)
try:
settings = Settings()
settings_dict = settings.model_dump()
masked_settings = mask_sensitive_data(settings_dict)
logger.info(f"Settings loaded: {json.dumps(masked_settings, indent=2)}")
except ValidationError as e:
safe_errors = e.errors(
include_url=False,
include_context=False,
include_input=False,
)
logger.exception(
"Error validating settings: %s",
json.dumps(safe_errors),
)
raise