Spaces:

Athagi
/

Gttggtt

Sleeping

App Files Files Community

Gttggtt / app.py

Athagi

Update app.py

29c9c4c verified 11 months ago

raw

history blame contribute delete

1.91 kB

	import os
	import gradio as gr
	import torch
	import librosa
	import numpy as np
	import soundfile as sf
	import requests

	# ========== MODEL SETUP ==========
	MODEL_URL = "https://huggingface.co/MMVC/prelearned-model/resolve/main/D_v13_20231020.pth"
	MODEL_PATH = "model/D_v13_20231020.pth"
	os.makedirs("model", exist_ok=True)

	def download_model():
	if not os.path.exists(MODEL_PATH):
	print("Downloading model...")
	response = requests.get(MODEL_URL)
	with open(MODEL_PATH, "wb") as f:
	f.write(response.content)
	print("Model downloaded.")

	download_model()

	# ========== DUMMY VOICE CHANGER MODEL ==========
	class DummyVoiceChanger(torch.nn.Module):
	def __init__(self):
	super().__init__()
	self.gain = torch.nn.Parameter(torch.tensor(1.0))

	def forward(self, audio):
	audio = torch.tensor(audio, dtype=torch.float32)
	return (audio * self.gain).detach().numpy() # FIXED

	model = DummyVoiceChanger()
	# Skipping real loading of .pth, as it's just a placeholder
	# torch.load(MODEL_PATH) would load it here if needed

	# ========== INFERENCE FUNCTION ==========
	def convert_voice(audio_file):
	audio_data, sr = librosa.load(audio_file, sr=16000)
	audio_data = librosa.util.fix_length(audio_data, size=16000 * 5)

	converted = model(audio_data)
	converted /= np.max(np.abs(converted)) + 1e-6

	output_path = "output.wav"
	sf.write(output_path, converted, 16000)
	return output_path

	# ========== GRADIO INTERFACE ==========
	interface = gr.Interface(
	fn=convert_voice,
	inputs=gr.Audio(type="filepath", label="Upload Voice"),
	outputs=gr.Audio(type="filepath", label="Converted Voice"),
	title="🗣️ AI Voice Changer (No RVC / No TTS)",
	description="Simple PyTorch voice changer using a dummy model and direct model download. Replace dummy model with real MMVC for production."
	)

	interface.launch()