Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| import io | |
| import numpy as np | |
| from pydub import AudioSegment | |
| import tempfile | |
| import os | |
| # Create a custom theme for the application | |
| custom_theme = gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="indigo", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| text_size="lg", | |
| spacing_size="lg", | |
| radius_size="md" | |
| ).set( | |
| button_primary_background_fill="*primary_600", | |
| button_primary_background_fill_hover="*primary_700", | |
| block_title_text_weight="600", | |
| ) | |
| def vibevoice_conversion(audio_file, speaker_id="default"): | |
| """ | |
| Convert audio using the VibeVoice Realtime 0.5B model | |
| """ | |
| try: | |
| # Check if audio file is provided | |
| if audio_file is None: | |
| raise gr.Error("Please upload an audio file") | |
| # Create a temporary file to store the uploaded audio | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: | |
| temp_audio_path = temp_audio.name | |
| # Save the uploaded audio to the temporary file | |
| if isinstance(audio_file, tuple): | |
| # If it's a tuple (sample_rate, audio_data) | |
| sample_rate, audio_data = audio_file | |
| # Convert numpy array to AudioSegment and export as WAV | |
| audio_segment = AudioSegment( | |
| audio_data.tobytes(), | |
| frame_rate=sample_rate, | |
| sample_width=audio_data.dtype.itemsize, | |
| channels=1 if len(audio_data.shape) == 1 else audio_data.shape[0] | |
| ) | |
| audio_segment.export(temp_audio_path, format="wav") | |
| else: | |
| # If it's a file path | |
| audio_segment = AudioSegment.from_file(audio_file) | |
| audio_segment.export(temp_audio_path, format="wav") | |
| # Prepare the request to the VibeVoice API | |
| api_url = "https://anycoderapps-vibevice-realtime-0-5b.hf.space/run/predict" | |
| # Read the audio file as bytes | |
| with open(temp_audio_path, "rb") as f: | |
| audio_bytes = f.read() | |
| # Prepare the payload | |
| payload = { | |
| "data": [ | |
| audio_bytes, | |
| speaker_id | |
| ] | |
| } | |
| # Send request to the VibeVoice API | |
| response = requests.post(api_url, json=payload) | |
| # Clean up temporary file | |
| os.unlink(temp_audio_path) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if "data" in result and len(result["data"]) > 0: | |
| # Get the converted audio data | |
| converted_audio_bytes = result["data"][0] | |
| # Create a temporary file for the converted audio | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_converted: | |
| temp_converted_path = temp_converted.name | |
| temp_converted.write(converted_audio_bytes) | |
| # Return the converted audio file path | |
| return temp_converted_path | |
| else: | |
| raise gr.Error("No audio data received from VibeVoice API") | |
| else: | |
| raise gr.Error(f"VibeVoice API request failed with status code: {response.status_code}") | |
| except Exception as e: | |
| raise gr.Error(f"An error occurred during voice conversion: {str(e)}") | |
| def process_audio(audio_file, speaker_id): | |
| """ | |
| Process the audio file and return the converted audio | |
| """ | |
| try: | |
| # Convert the audio using VibeVoice | |
| converted_audio_path = vibevoice_conversion(audio_file, speaker_id) | |
| # Return the converted audio | |
| return converted_audio_path | |
| except Exception as e: | |
| raise gr.Error(f"Error processing audio: {str(e)}") | |
| # Create the Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π€ VibeVoice Realtime 0.5B - Voice Conversion") | |
| gr.Markdown(""" | |
| ### Convert your voice to different styles using the VibeVoice Realtime 0.5B model | |
| **Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)** | |
| Upload an audio file and select a speaker style to convert your voice. The VibeVoice model can transform your voice while preserving the emotional content and prosody. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Input Audio") | |
| input_audio = gr.Audio( | |
| label="Upload your audio file", | |
| type="filepath", | |
| sources=["upload", "microphone"], | |
| format="wav" | |
| ) | |
| speaker_style = gr.Dropdown( | |
| choices=[ | |
| "default", | |
| "female_1", | |
| "male_1", | |
| "child", | |
| "elderly", | |
| "emotional" | |
| ], | |
| value="default", | |
| label="Select Speaker Style" | |
| ) | |
| convert_btn = gr.Button("π Convert Voice", variant="primary", size="lg") | |
| with gr.Column(): | |
| gr.Markdown("### Converted Audio") | |
| output_audio = gr.Audio( | |
| label="Converted Audio", | |
| type="filepath", | |
| format="wav" | |
| ) | |
| status_text = gr.Textbox( | |
| label="Status", | |
| value="Ready to convert your voice!", | |
| interactive=False | |
| ) | |
| # Add examples | |
| examples = gr.Examples( | |
| examples=[ | |
| ["https://example.com/sample1.wav", "female_1"], | |
| ["https://example.com/sample2.wav", "male_1"], | |
| ["https://example.com/sample3.wav", "emotional"] | |
| ], | |
| inputs=[input_audio, speaker_style], | |
| label="Try these examples:" | |
| ) | |
| # Set up the conversion event | |
| convert_btn.click( | |
| fn=process_audio, | |
| inputs=[input_audio, speaker_style], | |
| outputs=[output_audio, status_text], | |
| api_visibility="public", | |
| api_name="convert_voice" | |
| ) | |
| gr.Markdown(""" | |
| ### About VibeVoice Realtime 0.5B | |
| - **Model**: VibeVoice Realtime 0.5B | |
| - **Size**: 0.5 Billion parameters | |
| - **Features**: Real-time voice conversion with emotional preservation | |
| - **Capabilities**: Speaker style transfer, emotional content preservation, high-quality voice conversion | |
| ### Tips for Best Results | |
| - Use clear, high-quality audio recordings | |
| - Speak naturally and expressively | |
| - For best results, use audio samples of 5-15 seconds | |
| - The model preserves emotional content and prosody from the original voice | |
| """) | |
| # Launch the application with custom theme and settings | |
| demo.launch( | |
| theme=custom_theme, | |
| footer_links=[ | |
| {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, | |
| {"label": "VibeVoice Model", "url": "https://huggingface.co/spaces/anycoderapps/VibeVoice-Realtime-0.5B"}, | |
| {"label": "Gradio", "url": "https://gradio.app"}, | |
| {"label": "Hugging Face", "url": "https://huggingface.co"} | |
| ], | |
| title="VibeVoice Realtime 0.5B - Voice Conversion", | |
| description="Convert your voice to different styles using the VibeVoice Realtime 0.5B model", | |
| show_error=True | |
| ) |