""" Application configuration using Pydantic Settings. """ import os from pathlib import Path from functools import lru_cache from typing import Literal, Dict from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): """Application settings loaded from environment variables.""" model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", extra="ignore" ) # HuggingFace hf_token: str = "" enable_noise_reduction: bool = True # Denoising (Speech Enhancement) enable_denoiser: bool = True available_whisper_models: Dict[str, str] = { "PhoWhisper Large": "kiendt/PhoWhisper-large-ct2", "PhoWhisper Lora Finetuned": "vyluong/pho-whisper-vi-ct2", } # S2T model default_whisper_model: str = "vyluong/pho-whisper-vi-ct2" wav2vec_model: str = "vyluong/w2vbert_final" # voice emotion detection model default_dual_emotion_model: str = "vyluong/emo_dual_classi" # Diarization model # pyannote/speaker-diarization-3.1 # pyannote/speaker-diarization-community-1 pyannote_model: str = "pyannote/speaker-diarization-community-1" sortformer_model: str = "nvidia/diar_sortformer_4spk-v1" diarization_backend: str = "sortformer" # Device settings device: Literal["cuda", "cpu", "auto"] = "auto" compute_type: str = "float16" # float16 for GPU, int8 for CPU # Upload settings max_upload_size_mb: int = 100 allowed_extensions: list[str] = ["mp3", "wav", "m4a", "ogg", "flac", "webm"] # Audio processing settings sample_rate: int = 16000 channels: int = 1 # Mono enable_loudnorm: bool = True # VAD parameters vad_threshold: float = 0.55 vad_min_speech_duration_ms: int = 200 vad_min_silence_duration_ms: int = 450 vad_speech_pad_ms: int = 250 # Post-processing merge_threshold_s: float = 0.35 # Merge segments from same speaker if gap < this min_segment_duration_s: float = 0.85 # Remove segments shorter than this # Server settings host: str = "0.0.0.0" port: int = 7860 # Paths base_dir: Path = Path(__file__).parent.parent.parent data_dir: Path = base_dir / "data" upload_dir: Path = data_dir / "uploads" processed_dir: Path = data_dir / "processed" def __init__(self, **kwargs): super().__init__(**kwargs) # Ensure directories exist self.upload_dir.mkdir(parents=True, exist_ok=True) self.processed_dir.mkdir(parents=True, exist_ok=True) @property def max_upload_size_bytes(self) -> int: return self.max_upload_size_mb * 1024 * 1024 @property def resolved_device(self) -> str: """Resolve 'auto' to actual device.""" if self.device == "auto": try: import torch return "cuda" if torch.cuda.is_available() else "cpu" except ImportError: return "cpu" return self.device @property def resolved_compute_type(self) -> str: """Get appropriate compute type for device.""" if self.resolved_device == "cuda": return "float16" return "int8" @lru_cache def get_settings() -> Settings: """Get cached settings instance.""" return Settings()