Spaces:

grimshaw
/

neuapi

Running

App Files Files Community

neuapi / api /src /inference /voice_manager.py

grimshaw

Upload folder using huggingface_hub

35bb6f4 verified about 1 month ago

Raw

History Blame Contribute Delete

5.12 kB

	from __future__ import annotations

	import shutil
	from pathlib import Path

	import torch
	from loguru import logger

	from api.src.core.config import settings
	from api.src.core.model_config import BUILTIN_VOICES
	from api.src.core.paths import (
	BUILTIN_VOICES_DIR,
	CUSTOM_VOICES_DIR,
	ensure_voice_dirs,
	get_voice_codes,
	get_voice_text,
	get_voice_wav,
	is_custom_voice,
	voice_codes_path,
	)


	class VoiceManager:
	_instance: VoiceManager \| None = None

	def __init__(self) -> None:
	self._voices: dict[str, dict] = {}

	@classmethod
	def get_instance(cls) -> VoiceManager:
	if cls._instance is None:
	cls._instance = cls()
	return cls._instance

	def scan_voices(self) -> None:
	ensure_voice_dirs()
	self._voices.clear()

	# Built-in voices
	for name, info in BUILTIN_VOICES.items():
	wav_exists = get_voice_wav(name) is not None
	txt_exists = get_voice_text(name) is not None
	self._voices[name] = {
	"name": name,
	"language": info["language"],
	"gender": info["gender"],
	"description": info["description"],
	"custom": False,
	"available": wav_exists and txt_exists,
	}

	# Custom voices: scan for .wav files
	for wav in CUSTOM_VOICES_DIR.glob("*.wav"):
	name = wav.stem
	if name not in self._voices:
	txt_exists = get_voice_text(name) is not None
	self._voices[name] = {
	"name": name,
	"language": "unknown",
	"gender": "unknown",
	"description": "Custom uploaded voice",
	"custom": True,
	"available": txt_exists,
	}

	available = sum(1 for v in self._voices.values() if v.get("available", True))
	logger.info(
	f"Scanned {len(self._voices)} voices ({len(BUILTIN_VOICES)} builtin, {available} available)"
	)

	@property
	def voices(self) -> dict[str, dict]:
	return self._voices

	def voice_exists(self, voice_name: str) -> bool:
	return voice_name in self._voices

	def get_ref_text(self, voice_name: str) -> str:
	txt_path = get_voice_text(voice_name)
	if txt_path is None:
	raise FileNotFoundError(f"No reference text found for voice '{voice_name}'")
	return txt_path.read_text(encoding="utf-8").strip()

	def get_ref_codes(self, voice_name: str, codec_id: str) -> torch.Tensor \| None:
	codes_path = get_voice_codes(voice_name, codec_id)
	if codes_path is None:
	return None
	return torch.load(codes_path, map_location="cpu", weights_only=True)

	async def get_or_encode_ref_codes(
	self,
	voice_name: str,
	codec_id: str,
	model_manager: object,
	model_id: str,
	) -> object:
	codes = self.get_ref_codes(voice_name, codec_id)
	if codes is not None:
	return codes

	wav_path = get_voice_wav(voice_name)
	if wav_path is None:
	raise FileNotFoundError(f"No WAV file found for voice '{voice_name}'")

	logger.info(f"Encoding reference for voice '{voice_name}' with codec '{codec_id}'")
	ref_codes = await model_manager.encode_reference(model_id, str(wav_path))

	# Cache the encoded reference
	custom = is_custom_voice(voice_name)
	save_path = voice_codes_path(voice_name, codec_id, custom=custom)
	torch.save(ref_codes, save_path)
	logger.info(f"Cached reference codes at {save_path}")

	return ref_codes

	def upload_voice(
	self,
	voice_name: str,
	wav_data: bytes,
	ref_text: str,
	language: str = "unknown",
	gender: str = "unknown",
	) -> Path:
	ensure_voice_dirs()
	wav_path = CUSTOM_VOICES_DIR / f"{voice_name}.wav"
	txt_path = CUSTOM_VOICES_DIR / f"{voice_name}.txt"

	wav_path.write_bytes(wav_data)
	txt_path.write_text(ref_text, encoding="utf-8")

	self._voices[voice_name] = {
	"name": voice_name,
	"language": language,
	"gender": gender,
	"description": "Custom uploaded voice",
	"custom": True,
	"available": True,
	}

	logger.info(f"Uploaded custom voice '{voice_name}' (lang={language}, gender={gender})")
	return wav_path

	def delete_voice(self, voice_name: str) -> None:
	if voice_name in BUILTIN_VOICES:
	raise ValueError(f"Cannot delete built-in voice '{voice_name}'")

	if voice_name not in self._voices:
	raise ValueError(f"Voice '{voice_name}' not found")

	# Remove all files for this voice
	for pattern in (f"{voice_name}.wav", f"{voice_name}.txt", f"{voice_name}_*.pt"):
	for f in CUSTOM_VOICES_DIR.glob(pattern):
	f.unlink()

	self._voices.pop(voice_name, None)
	logger.info(f"Deleted custom voice '{voice_name}'")