PoC_ASR_v6_dev / app /core /config.py
vyluong's picture
Update app/core/config.py
77f83aa verified
"""
Application configuration using Pydantic Settings.
"""
import os
from pathlib import Path
from functools import lru_cache
from typing import Literal, Dict
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
model_config = SettingsConfigDict(
env_file=".env", env_file_encoding="utf-8", extra="ignore"
)
# HuggingFace
hf_token: str = ""
enable_noise_reduction: bool = True
# Denoising (Speech Enhancement)
enable_denoiser: bool = True
available_whisper_models: Dict[str, str] = {
"PhoWhisper Large": "kiendt/PhoWhisper-large-ct2",
"PhoWhisper Lora Finetuned": "vyluong/pho-whisper-vi-ct2",
}
# S2T model
default_whisper_model: str = "vyluong/pho-whisper-vi-ct2"
wav2vec_model: str = "vyluong/w2vbert_final"
# voice emotion detection model
default_dual_emotion_model: str = "vyluong/emo_dual_classi"
# Diarization model
# pyannote/speaker-diarization-3.1
# pyannote/speaker-diarization-community-1
pyannote_model: str = "pyannote/speaker-diarization-community-1"
sortformer_model: str = "nvidia/diar_sortformer_4spk-v1"
diarization_backend: str = "sortformer"
# Device settings
device: Literal["cuda", "cpu", "auto"] = "auto"
compute_type: str = "float16" # float16 for GPU, int8 for CPU
# Upload settings
max_upload_size_mb: int = 100
allowed_extensions: list[str] = ["mp3", "wav", "m4a", "ogg", "flac", "webm"]
# Audio processing settings
sample_rate: int = 16000
channels: int = 1 # Mono
enable_loudnorm: bool = True
# VAD parameters
vad_threshold: float = 0.55
vad_min_speech_duration_ms: int = 200
vad_min_silence_duration_ms: int = 450
vad_speech_pad_ms: int = 250
# Post-processing
merge_threshold_s: float = 0.35 # Merge segments from same speaker if gap < this
min_segment_duration_s: float = 0.85 # Remove segments shorter than this
# Server settings
host: str = "0.0.0.0"
port: int = 7860
# Paths
base_dir: Path = Path(__file__).parent.parent.parent
data_dir: Path = base_dir / "data"
upload_dir: Path = data_dir / "uploads"
processed_dir: Path = data_dir / "processed"
def __init__(self, **kwargs):
super().__init__(**kwargs)
# Ensure directories exist
self.upload_dir.mkdir(parents=True, exist_ok=True)
self.processed_dir.mkdir(parents=True, exist_ok=True)
@property
def max_upload_size_bytes(self) -> int:
return self.max_upload_size_mb * 1024 * 1024
@property
def resolved_device(self) -> str:
"""Resolve 'auto' to actual device."""
if self.device == "auto":
try:
import torch
return "cuda" if torch.cuda.is_available() else "cpu"
except ImportError:
return "cpu"
return self.device
@property
def resolved_compute_type(self) -> str:
"""Get appropriate compute type for device."""
if self.resolved_device == "cuda":
return "float16"
return "int8"
@lru_cache
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()