from __future__ import annotations import shutil from pathlib import Path import torch from loguru import logger from api.src.core.config import settings from api.src.core.model_config import BUILTIN_VOICES from api.src.core.paths import ( BUILTIN_VOICES_DIR, CUSTOM_VOICES_DIR, ensure_voice_dirs, get_voice_codes, get_voice_text, get_voice_wav, is_custom_voice, voice_codes_path, ) class VoiceManager: _instance: VoiceManager | None = None def __init__(self) -> None: self._voices: dict[str, dict] = {} @classmethod def get_instance(cls) -> VoiceManager: if cls._instance is None: cls._instance = cls() return cls._instance def scan_voices(self) -> None: ensure_voice_dirs() self._voices.clear() # Built-in voices for name, info in BUILTIN_VOICES.items(): wav_exists = get_voice_wav(name) is not None txt_exists = get_voice_text(name) is not None self._voices[name] = { "name": name, "language": info["language"], "gender": info["gender"], "description": info["description"], "custom": False, "available": wav_exists and txt_exists, } # Custom voices: scan for .wav files for wav in CUSTOM_VOICES_DIR.glob("*.wav"): name = wav.stem if name not in self._voices: txt_exists = get_voice_text(name) is not None self._voices[name] = { "name": name, "language": "unknown", "gender": "unknown", "description": "Custom uploaded voice", "custom": True, "available": txt_exists, } available = sum(1 for v in self._voices.values() if v.get("available", True)) logger.info( f"Scanned {len(self._voices)} voices ({len(BUILTIN_VOICES)} builtin, {available} available)" ) @property def voices(self) -> dict[str, dict]: return self._voices def voice_exists(self, voice_name: str) -> bool: return voice_name in self._voices def get_ref_text(self, voice_name: str) -> str: txt_path = get_voice_text(voice_name) if txt_path is None: raise FileNotFoundError(f"No reference text found for voice '{voice_name}'") return txt_path.read_text(encoding="utf-8").strip() def get_ref_codes(self, voice_name: str, codec_id: str) -> torch.Tensor | None: codes_path = get_voice_codes(voice_name, codec_id) if codes_path is None: return None return torch.load(codes_path, map_location="cpu", weights_only=True) async def get_or_encode_ref_codes( self, voice_name: str, codec_id: str, model_manager: object, model_id: str, ) -> object: codes = self.get_ref_codes(voice_name, codec_id) if codes is not None: return codes wav_path = get_voice_wav(voice_name) if wav_path is None: raise FileNotFoundError(f"No WAV file found for voice '{voice_name}'") logger.info(f"Encoding reference for voice '{voice_name}' with codec '{codec_id}'") ref_codes = await model_manager.encode_reference(model_id, str(wav_path)) # Cache the encoded reference custom = is_custom_voice(voice_name) save_path = voice_codes_path(voice_name, codec_id, custom=custom) torch.save(ref_codes, save_path) logger.info(f"Cached reference codes at {save_path}") return ref_codes def upload_voice( self, voice_name: str, wav_data: bytes, ref_text: str, language: str = "unknown", gender: str = "unknown", ) -> Path: ensure_voice_dirs() wav_path = CUSTOM_VOICES_DIR / f"{voice_name}.wav" txt_path = CUSTOM_VOICES_DIR / f"{voice_name}.txt" wav_path.write_bytes(wav_data) txt_path.write_text(ref_text, encoding="utf-8") self._voices[voice_name] = { "name": voice_name, "language": language, "gender": gender, "description": "Custom uploaded voice", "custom": True, "available": True, } logger.info(f"Uploaded custom voice '{voice_name}' (lang={language}, gender={gender})") return wav_path def delete_voice(self, voice_name: str) -> None: if voice_name in BUILTIN_VOICES: raise ValueError(f"Cannot delete built-in voice '{voice_name}'") if voice_name not in self._voices: raise ValueError(f"Voice '{voice_name}' not found") # Remove all files for this voice for pattern in (f"{voice_name}.wav", f"{voice_name}.txt", f"{voice_name}_*.pt"): for f in CUSTOM_VOICES_DIR.glob(pattern): f.unlink() self._voices.pop(voice_name, None) logger.info(f"Deleted custom voice '{voice_name}'")