RVX / rvc_lib.py
Ai128474's picture
Update rvc_lib.py
5b56183 verified
import torch
import torchaudio
import os
import soundfile as sf
from ovos_tts_plugin_manager.tts import TTSFactory # correct new way
class VoiceConverter:
"""
RVC voice converter integrated with OpenVoiceOS TTS
"""
def __init__(self, model_path, voice_name="en_GB-amy", device=None):
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model file not found: {model_path}")
# Load RVC model
self.model = torch.load(model_path, map_location=self.device)
self.model.eval()
print(f"[INFO] Loaded RVC model: {model_path} on {self.device}")
# Load TTS via OpenVoiceOS
self.tts = TTSFactory.get_tts(voice_name=voice_name)
print(f"[INFO] Loaded TTS voice: {voice_name}")
def convert(self, input_audio_path):
# RVC conversion
waveform, sample_rate = torchaudio.load(input_audio_path)
if waveform.shape[0] > 1:
waveform = waveform.mean(dim=0, keepdim=True) # convert to mono
with torch.no_grad():
converted_waveform = self.model(waveform.unsqueeze(0)).squeeze().cpu()
output_path = input_audio_path.replace(".wav", "_converted.wav")
sf.write(output_path, converted_waveform.numpy(), sample_rate)
return output_path
def synthesize(self, text, output_path="tts_output.wav"):
# TTS generation
audio_bytes = self.tts.get_tts(text)
sf.write(output_path, audio_bytes, 22050) # default Piper sample rate
return output_path