voicee / app.py
don0726's picture
Update app.py
b18808a verified
import gradio as gr
from omnivoice import OmniVoice
import soundfile as sf
import torch
import os
import uuid
# -----------------------------
# Load model (CPU version)
# -----------------------------
print("Loading OmniVoice model on CPU...")
model = OmniVoice.from_pretrained(
"k2-fsa/OmniVoice",
device_map="cpu", # IMPORTANT: CPU only
dtype=torch.float32, # IMPORTANT: float32 for CPU
load_asr=False,
)
print("Model loaded successfully!")
# -----------------------------
# Inference function
# -----------------------------
def clone_voice(text, ref_audio):
if ref_audio is None:
return None
try:
# Generate audio
audio = model.generate(
text=text,
ref_audio=ref_audio,
)
# Save output file
output_path = f"output_{uuid.uuid4().hex}.wav"
sf.write(output_path, audio[0], 24000)
return output_path
except Exception as e:
print("Error:", str(e))
return None
# -----------------------------
# Gradio UI
# -----------------------------
iface = gr.Interface(
fn=clone_voice,
inputs=[
gr.Textbox(label="Text", value="Hello, this is a test of zero-shot voice cloning."),
gr.Audio(type="filepath", label="Reference Audio"),
],
outputs=gr.Audio(type="filepath", label="Cloned Audio"),
title="OmniVoice Voice Cloning API",
description="Upload a voice sample and generate cloned speech.",
)
# -----------------------------
# Launch (important for Spaces)
# -----------------------------
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)