Spaces:
Running
Running
| import gradio as gr | |
| import spaces | |
| import subprocess | |
| import os | |
| from PIL import Image | |
| import ffmpeg | |
| from pydub import AudioSegment | |
| from gtts import gTTS | |
| import numpy as np | |
| import soundfile as sf | |
| def save_audio_mp3(audio_tuple, filename): | |
| #sampling_rate, audio_data = audio_tuple | |
| #audio_bytes = np.array(audio_data, dtype=np.int16).tobytes() | |
| #audio_segment = AudioSegment(audio_bytes, sample_width=2, frame_rate=sampling_rate, channels=1) | |
| #audio_segment.export(filename, format="mp3") | |
| sampling_rate, audio_data = audio_tuple | |
| # Verificar la tasa de muestreo | |
| if not isinstance(sampling_rate, int) or sampling_rate <= 0: | |
| raise ValueError("La tasa de muestreo debe ser un entero positivo.") | |
| # Verificar los datos de audio | |
| if not isinstance(audio_data, np.ndarray): | |
| raise ValueError("Los datos de audio deben ser un array de numpy.") | |
| # Convertir los datos de audio a bytes | |
| audio_bytes = np.array(audio_data, dtype=np.int16).tobytes() | |
| # Crear el segmento de audio | |
| audio_segment = AudioSegment( | |
| data=audio_bytes, | |
| sample_width=2, | |
| frame_rate=sampling_rate, | |
| channels=1 | |
| ) | |
| # Exportar el segmento de audio a un archivo MP3 | |
| audio_segment.export(filename, format="mp3") | |
| return f"Audio saved successfully as {filename}" | |
| def audio_video(): | |
| input_video = ffmpeg.input('results/result_voice.mp4') | |
| input_audio = ffmpeg.input('sample_data/uploaded_audio.mp3') | |
| os.system(f"rm -rf results/final_output.mp4") | |
| ffmpeg.concat(input_video, input_audio, v=1, a=1).output('results/final_output.mp4').run() | |
| return "results/final_output.mp4" | |
| def run_infrence(input_image, input_audio=None, input_text=None): | |
| pil_image = Image.fromarray(input_image.astype(np.uint8)) | |
| save_dir = "sample_data" | |
| if not os.path.exists(save_dir): | |
| os.makedirs(save_dir) | |
| # Save input image | |
| filename = os.path.join(save_dir, "uploaded_image.png") | |
| pil_image.save(filename) | |
| #Save input audio | |
| #save_audio_mp3(input_audio, "sample_data/uploaded_audio.mp3") | |
| if input_text: | |
| tts = gTTS(input_text, lang='en', tld='com.au') | |
| tts.save("sample_data/uploaded_audio.mp3") | |
| else: | |
| save_audio_mp3(input_audio, "sample_data/uploaded_audio.mp3") | |
| command = f'python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face sample_data/uploaded_image.png --audio sample_data/uploaded_audio.mp3' | |
| process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) | |
| output, error = process.communicate() | |
| return audio_video() | |
| def run(): | |
| with gr.Blocks(css=".gradio-container {background-color: lightblue} #radio_div {background-color: #FFD8B4; font-size: 40px;} h3,h1,h2,p {color: black;}") as demo: | |
| #gr.Markdown("<p style='text-align: left;font-size:18px'>"+ "It's like having a magic tool for making videos. You put in a picture and audio, and it creates a video, it's super easy – just upload your picture and audio, and click 'generate'! You've got a cool video" + "</p>") | |
| with gr.Group(): | |
| with gr.Row(): | |
| gr.Markdown("<h1>Fabibi magic video creator</h1> \n <p>") | |
| with gr.Row(): | |
| input_image = gr.Image(label="Input Image") | |
| input_audio = gr.Audio(label="Input Audio (Optional)") | |
| input_text = gr.Textbox(label="Input Text (Optional)", placeholder="Enter text to convert to audio") | |
| video_out = gr.Video(show_label=True, label="Output") | |
| with gr.Row(): | |
| btn = gr.Button("Generate") | |
| btn.click(run_infrence, inputs=[input_image, input_audio, input_text], outputs=[video_out]) | |
| demo.queue() | |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=True) | |
| if __name__ == "__main__": | |
| run() |