wav2lip_fab_GPU / app.py
fabiolamp's picture
Update app.py
fd95803 verified
import gradio as gr
import spaces
import subprocess
import os
from PIL import Image
import ffmpeg
from pydub import AudioSegment
from gtts import gTTS
import numpy as np
import soundfile as sf
def save_audio_mp3(audio_tuple, filename):
#sampling_rate, audio_data = audio_tuple
#audio_bytes = np.array(audio_data, dtype=np.int16).tobytes()
#audio_segment = AudioSegment(audio_bytes, sample_width=2, frame_rate=sampling_rate, channels=1)
#audio_segment.export(filename, format="mp3")
sampling_rate, audio_data = audio_tuple
# Verificar la tasa de muestreo
if not isinstance(sampling_rate, int) or sampling_rate <= 0:
raise ValueError("La tasa de muestreo debe ser un entero positivo.")
# Verificar los datos de audio
if not isinstance(audio_data, np.ndarray):
raise ValueError("Los datos de audio deben ser un array de numpy.")
# Convertir los datos de audio a bytes
audio_bytes = np.array(audio_data, dtype=np.int16).tobytes()
# Crear el segmento de audio
audio_segment = AudioSegment(
data=audio_bytes,
sample_width=2,
frame_rate=sampling_rate,
channels=1
)
# Exportar el segmento de audio a un archivo MP3
audio_segment.export(filename, format="mp3")
return f"Audio saved successfully as {filename}"
def audio_video():
input_video = ffmpeg.input('results/result_voice.mp4')
input_audio = ffmpeg.input('sample_data/uploaded_audio.mp3')
os.system(f"rm -rf results/final_output.mp4")
ffmpeg.concat(input_video, input_audio, v=1, a=1).output('results/final_output.mp4').run()
return "results/final_output.mp4"
@spaces.GPU
def run_infrence(input_image, input_audio=None, input_text=None):
pil_image = Image.fromarray(input_image.astype(np.uint8))
save_dir = "sample_data"
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# Save input image
filename = os.path.join(save_dir, "uploaded_image.png")
pil_image.save(filename)
#Save input audio
#save_audio_mp3(input_audio, "sample_data/uploaded_audio.mp3")
if input_text:
tts = gTTS(input_text, lang='en', tld='com.au')
tts.save("sample_data/uploaded_audio.mp3")
else:
save_audio_mp3(input_audio, "sample_data/uploaded_audio.mp3")
command = f'python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face sample_data/uploaded_image.png --audio sample_data/uploaded_audio.mp3'
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
output, error = process.communicate()
return audio_video()
def run():
with gr.Blocks(css=".gradio-container {background-color: lightblue} #radio_div {background-color: #FFD8B4; font-size: 40px;} h3,h1,h2,p {color: black;}") as demo:
#gr.Markdown("<p style='text-align: left;font-size:18px'>"+ "It's like having a magic tool for making videos. You put in a picture and audio, and it creates a video, it's super easy – just upload your picture and audio, and click 'generate'! You've got a cool video" + "</p>")
with gr.Group():
with gr.Row():
gr.Markdown("<h1>Fabibi magic video creator</h1> \n <p>")
with gr.Row():
input_image = gr.Image(label="Input Image")
input_audio = gr.Audio(label="Input Audio (Optional)")
input_text = gr.Textbox(label="Input Text (Optional)", placeholder="Enter text to convert to audio")
video_out = gr.Video(show_label=True, label="Output")
with gr.Row():
btn = gr.Button("Generate")
btn.click(run_infrence, inputs=[input_image, input_audio, input_text], outputs=[video_out])
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
if __name__ == "__main__":
run()