| import gradio as gr |
| from omnivoice import OmniVoice |
| import soundfile as sf |
| import torch |
| import os |
| import uuid |
|
|
| |
| |
| |
| print("Loading OmniVoice model on CPU...") |
|
|
| model = OmniVoice.from_pretrained( |
| "k2-fsa/OmniVoice", |
| device_map="cpu", |
| dtype=torch.float32, |
| load_asr=False, |
| ) |
|
|
| print("Model loaded successfully!") |
|
|
| |
| |
| |
| def clone_voice(text, ref_audio): |
| if ref_audio is None: |
| return None |
|
|
| try: |
| |
| audio = model.generate( |
| text=text, |
| ref_audio=ref_audio, |
| ) |
|
|
| |
| output_path = f"output_{uuid.uuid4().hex}.wav" |
| sf.write(output_path, audio[0], 24000) |
|
|
| return output_path |
|
|
| except Exception as e: |
| print("Error:", str(e)) |
| return None |
|
|
|
|
| |
| |
| |
| iface = gr.Interface( |
| fn=clone_voice, |
| inputs=[ |
| gr.Textbox(label="Text", value="Hello, this is a test of zero-shot voice cloning."), |
| gr.Audio(type="filepath", label="Reference Audio"), |
| ], |
| outputs=gr.Audio(type="filepath", label="Cloned Audio"), |
| title="OmniVoice Voice Cloning API", |
| description="Upload a voice sample and generate cloned speech.", |
| ) |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| iface.launch(server_name="0.0.0.0", server_port=7860) |