import gradio as gr from omnivoice import OmniVoice import soundfile as sf import torch import os import uuid # ----------------------------- # Load model (CPU version) # ----------------------------- print("Loading OmniVoice model on CPU...") model = OmniVoice.from_pretrained( "k2-fsa/OmniVoice", device_map="cpu", # IMPORTANT: CPU only dtype=torch.float32, # IMPORTANT: float32 for CPU load_asr=False, ) print("Model loaded successfully!") # ----------------------------- # Inference function # ----------------------------- def clone_voice(text, ref_audio): if ref_audio is None: return None try: # Generate audio audio = model.generate( text=text, ref_audio=ref_audio, ) # Save output file output_path = f"output_{uuid.uuid4().hex}.wav" sf.write(output_path, audio[0], 24000) return output_path except Exception as e: print("Error:", str(e)) return None # ----------------------------- # Gradio UI # ----------------------------- iface = gr.Interface( fn=clone_voice, inputs=[ gr.Textbox(label="Text", value="Hello, this is a test of zero-shot voice cloning."), gr.Audio(type="filepath", label="Reference Audio"), ], outputs=gr.Audio(type="filepath", label="Cloned Audio"), title="OmniVoice Voice Cloning API", description="Upload a voice sample and generate cloned speech.", ) # ----------------------------- # Launch (important for Spaces) # ----------------------------- if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)