| from pathlib import Path | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def init_espeak(): | |
| """Initialize eSpeak environment variables. Must be called before any other imports.""" | |
| os.environ["PHONEMIZER_ESPEAK_LIBRARY"] = ( | |
| r"C:\Program Files\eSpeak NG\libespeak-ng.dll" | |
| ) | |
| os.environ["PHONEMIZER_ESPEAK_PATH"] = r"C:\Program Files\eSpeak NG\espeak-ng.exe" | |
| init_espeak() | |
| from pydantic_settings import BaseSettings | |
| from pydantic import Field | |
| from typing import Optional | |
| class Settings(BaseSettings): | |
| """Settings class to manage application configurations.""" | |
| BASE_DIR: Path = Path(__file__).parent.parent.parent | |
| MODELS_DIR: Path = BASE_DIR / "data" / "models" | |
| VOICES_DIR: Path = BASE_DIR / "data" / "voices" | |
| OUTPUT_DIR: Path = BASE_DIR / "output" | |
| RECORDINGS_DIR: Path = BASE_DIR / "recordings" | |
| ESPEAK_LIBRARY_PATH: str = r"C:\Program Files\eSpeak NG\libespeak-ng.dll" | |
| ESPEAK_PATH: str = r"C:\Program Files\eSpeak NG\espeak-ng.exe" | |
| TTS_MODEL: str = Field(..., env="TTS_MODEL") | |
| VOICE_NAME: str = Field(..., env="VOICE_NAME") | |
| SPEED: float = Field(default=1.0, env="SPEED") | |
| HUGGINGFACE_TOKEN: str = Field(..., env="HUGGINGFACE_TOKEN") | |
| LM_STUDIO_URL: str = Field(..., env="LM_STUDIO_URL") | |
| OLLAMA_URL: str = Field(..., env="OLLAMA_URL") | |
| DEFAULT_SYSTEM_PROMPT: str = Field(..., env="DEFAULT_SYSTEM_PROMPT") | |
| LLM_MODEL: str = Field(..., env="LLM_MODEL") | |
| NUM_THREADS: int = Field(default=2, env="NUM_THREADS") | |
| MAX_TOKENS: int = Field(default=512, env="MAX_TOKENS") | |
| LLM_TEMPERATURE: float = Field(default=0.7, env="LMM_TEMPERATURE") | |
| LLM_STREAM: bool = Field(default=False, env="LLM_STREAM") | |
| LLM_RETRY_DELAY: float = Field(default=0.5, env="LLM_RETRY_DELAY") | |
| MAX_RETRIES: int = Field(default=3, env="MAX_RETRIES") | |
| WHISPER_MODEL: str = Field(default="openai/whisper-tiny.en", env="WHISPER_MODEL") | |
| VAD_MODEL: str = Field(default="pyannote/segmentation-3.0", env="VAD_MODEL") | |
| VAD_MIN_DURATION_ON: float = Field(default=0.1, env="VAD_MIN_DURATION_ON") | |
| VAD_MIN_DURATION_OFF: float = Field(default=0.1, env="VAD_MIN_DURATION_OFF") | |
| CHUNK: int = Field(default=1024, env="CHUNK") | |
| FORMAT: str = Field(default="pyaudio.paFloat32", env="FORMAT") | |
| CHANNELS: int = Field(default=1, env="CHANNELS") | |
| RATE: int = Field(default=16000, env="RATE") | |
| OUTPUT_SAMPLE_RATE: int = Field(default=24000, env="OUTPUT_SAMPLE_RATE") | |
| RECORD_DURATION: int = Field(default=5, env="RECORD_DURATION") | |
| SILENCE_THRESHOLD: float = Field(default=0.01, env="SILENCE_THRESHOLD") | |
| INTERRUPTION_THRESHOLD: float = Field(default=0.02, env="INTERRUPTION_THRESHOLD") | |
| MAX_SILENCE_DURATION: int = Field(default=1, env="MAX_SILENCE_DURATION") | |
| SPEECH_CHECK_TIMEOUT: float = Field(default=0.1, env="SPEECH_CHECK_TIMEOUT") | |
| SPEECH_CHECK_THRESHOLD: float = Field(default=0.02, env="SPEECH_CHECK_THRESHOLD") | |
| ROLLING_BUFFER_TIME: float = Field(default=0.5, env="ROLLING_BUFFER_TIME") | |
| TARGET_SIZE: int = Field(default=15, env="TARGET_SIZE") | |
| FIRST_SENTENCE_SIZE: int = Field(default=3, env="FIRST_SENTENCE_SIZE") | |
| PLAYBACK_DELAY: float = Field(default=0.005, env="PLAYBACK_DELAY") | |
| def setup_directories(self): | |
| """Create necessary directories if they don't exist""" | |
| self.MODELS_DIR.mkdir(parents=True, exist_ok=True) | |
| self.VOICES_DIR.mkdir(parents=True, exist_ok=True) | |
| self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| self.RECORDINGS_DIR.mkdir(parents=True, exist_ok=True) | |
| class Config: | |
| env_file = ".env" | |
| env_file_encoding = "utf-8" | |
| settings = Settings() | |
| def configure_logging(): | |
| """Configure logging to suppress all logs""" | |
| import logging | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| logging.getLogger().setLevel(logging.ERROR) | |
| logging.getLogger("urllib3").setLevel(logging.ERROR) | |
| logging.getLogger("PIL").setLevel(logging.ERROR) | |
| logging.getLogger("matplotlib").setLevel(logging.ERROR) | |
| logging.getLogger("torch").setLevel(logging.ERROR) | |
| logging.getLogger("tensorflow").setLevel(logging.ERROR) | |
| logging.getLogger("whisper").setLevel(logging.ERROR) | |
| logging.getLogger("transformers").setLevel(logging.ERROR) | |
| logging.getLogger("pyannote").setLevel(logging.ERROR) | |
| logging.getLogger("sounddevice").setLevel(logging.ERROR) | |
| logging.getLogger("soundfile").setLevel(logging.ERROR) | |
| logging.getLogger("uvicorn").setLevel(logging.ERROR) | |
| logging.getLogger("fastapi").setLevel(logging.ERROR) | |
| configure_logging() | |