| import os |
| import gradio as gr |
| import torch |
| import librosa |
| import numpy as np |
| import soundfile as sf |
| import requests |
|
|
| |
| MODEL_URL = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth" |
| MODEL_PATH = "model/D_v13_20231020.pth" |
| os.makedirs("model", exist_ok=True) |
|
|
| def download_model(): |
| if not os.path.exists(MODEL_PATH): |
| print("Downloading model...") |
| response = requests.get(MODEL_URL) |
| with open(MODEL_PATH, "wb") as f: |
| f.write(response.content) |
| print("Model downloaded.") |
|
|
| download_model() |
|
|
| |
| class DummyVoiceChanger(torch.nn.Module): |
| def __init__(self): |
| super().__init__() |
| self.gain = torch.nn.Parameter(torch.tensor(1.0)) |
|
|
| def forward(self, audio): |
| audio = torch.tensor(audio, dtype=torch.float32) |
| return (audio * self.gain).detach().numpy() |
|
|
| model = DummyVoiceChanger() |
| |
| |
|
|
| |
| def convert_voice(audio_file): |
| audio_data, sr = librosa.load(audio_file, sr=16000) |
| audio_data = librosa.util.fix_length(audio_data, size=16000 * 5) |
|
|
| converted = model(audio_data) |
| converted /= np.max(np.abs(converted)) + 1e-6 |
|
|
| output_path = "output.wav" |
| sf.write(output_path, converted, 16000) |
| return output_path |
|
|
| |
| interface = gr.Interface( |
| fn=convert_voice, |
| inputs=gr.Audio(type="filepath", label="Upload Voice"), |
| outputs=gr.Audio(type="filepath", label="Converted Voice"), |
| title="🗣️ AI Voice Changer (No RVC / No TTS)", |
| description="Simple PyTorch voice changer using a dummy model and direct model download. Replace dummy model with real MMVC for production." |
| ) |
|
|
| interface.launch() |