Spaces:

don0726
/

voice

Sleeping

App Files Files Community

voice / app.py

don0726

Update app.py

9a66fa5 verified about 2 months ago

raw

history blame contribute delete

2.41 kB

	import os
	import torch
	import gradio as gr
	import soundfile as sf
	import numpy as np
	import torchaudio
	from omnivoice import OmniVoice

	# 🔥 FIX torchaudio.load globally
	def safe_load(path, args, *kwargs):
	data, sr = sf.read(path)
	data = torch.tensor(data).float()

	if data.ndim > 1:
	data = data.mean(axis=1)

	return data.unsqueeze(0), sr

	torchaudio.load = safe_load

	# Cache fix
	os.environ["HF_HOME"] = "/tmp/hf_cache"
	os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
	os.environ["HF_HUB_CACHE"] = "/tmp/hf_cache"

	device = "cuda" if torch.cuda.is_available() else "cpu"

	# 🚀 LOAD MODEL ONCE
	print("🚀 Loading model...")
	model = OmniVoice.from_pretrained(
	"k2-fsa/OmniVoice",
	device_map=device,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32
	)
	print("✅ Model loaded")

	# 🎯 FUNCTION
	def clone_voice(audio_file, text, lang, ref_text):

	# Load audio safely
	waveform, sr = sf.read(audio_file)
	waveform = torch.tensor(waveform).float()

	if waveform.ndim > 1:
	waveform = waveform.mean(axis=1)

	waveform = waveform.unsqueeze(0)

	# Resample
	if sr != 24000:
	resampler = torchaudio.transforms.Resample(sr, 24000)
	waveform = resampler(waveform)

	temp_audio = "temp.wav"
	sf.write(temp_audio, waveform.squeeze().cpu().numpy(), 24000)

	# Text
	final_text = f"[{lang}] {text}"

	# Generate
	audio = model.generate(
	text=final_text,
	ref_audio=temp_audio,
	ref_text=ref_text if ref_text else None,
	language=lang
	)

	# Fix output
	if isinstance(audio, list):
	audio = audio[0]

	if not isinstance(audio, torch.Tensor):
	audio = torch.tensor(audio)

	if audio.dim() == 1:
	audio = audio.unsqueeze(0)

	# Save output
	output_file = "output.wav"
	sf.write(output_file, audio.squeeze().cpu().numpy(), 24000)

	return output_file


	# 🎨 UI
	demo = gr.Interface(
	fn=clone_voice,
	inputs=[
	gr.Audio(type="filepath", label="Upload Voice Sample"),
	gr.Textbox(label="Enter Text"),
	gr.Textbox(label="Language Code (en, hi, etc)"),
	gr.Textbox(label="Reference Text (optional)")
	],
	outputs=gr.Audio(label="Generated Voice"),
	title="🎤 OmniVoice Cloner",
	description="Upload voice → enter text → language → generate cloned speech"
	)

	demo.launch()