import gradio as gr import spaces import subprocess import os from PIL import Image import ffmpeg from pydub import AudioSegment from gtts import gTTS import numpy as np import soundfile as sf def save_audio_mp3(audio_tuple, filename): #sampling_rate, audio_data = audio_tuple #audio_bytes = np.array(audio_data, dtype=np.int16).tobytes() #audio_segment = AudioSegment(audio_bytes, sample_width=2, frame_rate=sampling_rate, channels=1) #audio_segment.export(filename, format="mp3") sampling_rate, audio_data = audio_tuple # Verificar la tasa de muestreo if not isinstance(sampling_rate, int) or sampling_rate <= 0: raise ValueError("La tasa de muestreo debe ser un entero positivo.") # Verificar los datos de audio if not isinstance(audio_data, np.ndarray): raise ValueError("Los datos de audio deben ser un array de numpy.") # Convertir los datos de audio a bytes audio_bytes = np.array(audio_data, dtype=np.int16).tobytes() # Crear el segmento de audio audio_segment = AudioSegment( data=audio_bytes, sample_width=2, frame_rate=sampling_rate, channels=1 ) # Exportar el segmento de audio a un archivo MP3 audio_segment.export(filename, format="mp3") return f"Audio saved successfully as {filename}" def audio_video(): input_video = ffmpeg.input('results/result_voice.mp4') input_audio = ffmpeg.input('sample_data/uploaded_audio.mp3') os.system(f"rm -rf results/final_output.mp4") ffmpeg.concat(input_video, input_audio, v=1, a=1).output('results/final_output.mp4').run() return "results/final_output.mp4" @spaces.GPU def run_infrence(input_image, input_audio=None, input_text=None): pil_image = Image.fromarray(input_image.astype(np.uint8)) save_dir = "sample_data" if not os.path.exists(save_dir): os.makedirs(save_dir) # Save input image filename = os.path.join(save_dir, "uploaded_image.png") pil_image.save(filename) #Save input audio #save_audio_mp3(input_audio, "sample_data/uploaded_audio.mp3") if input_text: tts = gTTS(input_text, lang='en', tld='com.au') tts.save("sample_data/uploaded_audio.mp3") else: save_audio_mp3(input_audio, "sample_data/uploaded_audio.mp3") command = f'python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face sample_data/uploaded_image.png --audio sample_data/uploaded_audio.mp3' process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) output, error = process.communicate() return audio_video() def run(): with gr.Blocks(css=".gradio-container {background-color: lightblue} #radio_div {background-color: #FFD8B4; font-size: 40px;} h3,h1,h2,p {color: black;}") as demo: #gr.Markdown("

"+ "It's like having a magic tool for making videos. You put in a picture and audio, and it creates a video, it's super easy – just upload your picture and audio, and click 'generate'! You've got a cool video" + "

") with gr.Group(): with gr.Row(): gr.Markdown("

Fabibi magic video creator

\n

") with gr.Row(): input_image = gr.Image(label="Input Image") input_audio = gr.Audio(label="Input Audio (Optional)") input_text = gr.Textbox(label="Input Text (Optional)", placeholder="Enter text to convert to audio") video_out = gr.Video(show_label=True, label="Output") with gr.Row(): btn = gr.Button("Generate") btn.click(run_infrence, inputs=[input_image, input_audio, input_text], outputs=[video_out]) demo.queue() demo.launch(server_name="0.0.0.0", server_port=7860, share=True) if __name__ == "__main__": run()