| import torch |
| import torchaudio |
| import os |
| import soundfile as sf |
| from ovos_tts_plugin_manager.tts import TTSFactory |
|
|
| class VoiceConverter: |
| """ |
| RVC voice converter integrated with OpenVoiceOS TTS |
| """ |
|
|
| def __init__(self, model_path, voice_name="en_GB-amy", device=None): |
| self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") |
| |
| if not os.path.exists(model_path): |
| raise FileNotFoundError(f"Model file not found: {model_path}") |
| |
| |
| self.model = torch.load(model_path, map_location=self.device) |
| self.model.eval() |
| print(f"[INFO] Loaded RVC model: {model_path} on {self.device}") |
| |
| |
| self.tts = TTSFactory.get_tts(voice_name=voice_name) |
| print(f"[INFO] Loaded TTS voice: {voice_name}") |
|
|
| def convert(self, input_audio_path): |
| |
| waveform, sample_rate = torchaudio.load(input_audio_path) |
| if waveform.shape[0] > 1: |
| waveform = waveform.mean(dim=0, keepdim=True) |
|
|
| with torch.no_grad(): |
| converted_waveform = self.model(waveform.unsqueeze(0)).squeeze().cpu() |
|
|
| output_path = input_audio_path.replace(".wav", "_converted.wav") |
| sf.write(output_path, converted_waveform.numpy(), sample_rate) |
| return output_path |
|
|
| def synthesize(self, text, output_path="tts_output.wav"): |
| |
| audio_bytes = self.tts.get_tts(text) |
| sf.write(output_path, audio_bytes, 22050) |
| return output_path |