File size: 5,367 Bytes
80ebd45 4e7548f 3428530 1da425c 3428530 1da425c 80ebd45 1da425c 0b876e6 1da425c 662f4a1 1da425c 662f4a1 1da425c 662f4a1 1da425c 80ebd45 3428530 1da425c 3428530 0b64d45 f90dece 1da425c 4e7548f 1da425c 5fed0a8 1da425c 9f9e12c 1da425c 4e7548f 3428530 f7f1de5 1b4bfa7 1c61268 b1530ca 1c61268 6575803 5c912e7 0b64d45 51c3285 ee346dc 74e6b76 0b64d45 ee346dc d4dcf70 5c912e7 74e6b76 0b64d45 51c3285 0b64d45 a9850cc bc51d3c 74e6b76 f7dee31 63cddf1 74e6b76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import gradio as gr
import spaces
import subprocess
import os
from PIL import Image
import ffmpeg
from pydub import AudioSegment
import numpy as np
import soundfile as sf
from gtts import gTTS
def save_audio_mp3(audio_tuple, filename):
#sampling_rate, audio_data = audio_tuple
#audio_bytes = np.array(audio_data, dtype=np.int16).tobytes()
#audio_segment = AudioSegment(audio_bytes, sample_width=2, frame_rate=sampling_rate, channels=1)
#audio_segment.export(filename, format="mp3")
sampling_rate, audio_data = audio_tuple
# Verificar la tasa de muestreo
if not isinstance(sampling_rate, int) or sampling_rate <= 0:
raise ValueError("La tasa de muestreo debe ser un entero positivo.")
# Verificar los datos de audio
if not isinstance(audio_data, np.ndarray):
raise ValueError("Los datos de audio deben ser un array de numpy.")
# Convertir los datos de audio a bytes
audio_bytes = np.array(audio_data, dtype=np.int16).tobytes()
# Crear el segmento de audio
audio_segment = AudioSegment(
data=audio_bytes,
sample_width=2,
frame_rate=sampling_rate,
channels=1
)
# Exportar el segmento de audio a un archivo MP3
audio_segment.export(filename, format="mp3")
return f"Audio saved successfully as {filename}"
def audio_video():
input_video = ffmpeg.input('results/result_voice.mp4')
input_audio = ffmpeg.input('sample_data/uploaded_audio.mp3')
os.system(f"rm -rf results/final_output.mp4")
ffmpeg.concat(input_video, input_audio, v=1, a=1).output('results/final_output.mp4').run()
return "results/final_output.mp4"
@spaces.GPU
def run_inference(input_image, input_audio=None, input_text=None):
pil_image = Image.fromarray(input_image.astype(np.uint8))
save_dir = "sample_data"
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# Save input image
filename = os.path.join(save_dir, "uploaded_image.png")
pil_image.save(filename)
#Save input audio
#save_audio_mp3(input_audio, "sample_data/uploaded_audio.mp3")
if input_text:
tts = gTTS(input_text, lang='en', tld='com.au')
tts.save("sample_data/uploaded_audio.mp3")
else:
save_audio_mp3(input_audio, "sample_data/uploaded_audio.mp3")
command = f'python3 inference.py --checkpoint_path checkpoints/wav2lip_gan.pth --face sample_data/uploaded_image.png --audio sample_data/uploaded_audio.mp3'
process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
output, error = process.communicate()
return audio_video()
def run():
custom_css = """
* {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
}
body {
background-color: #ffffff !important;
}
.gradio-container {
background-color: #ffffff !important;
}
button.primary {
background: linear-gradient(90deg, #a8a8a8, #7d7d7d) !important;
color: #ffffff !important;
border: none !important;
}
button.primary:hover {
background: linear-gradient(90deg, #b3b3b3, #8a8a8a) !important;
}
#container {
background-color: #ffffff;
border-radius: 15px;
padding: 20px 30px;
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.08);
margin: 20px auto;
max-width: 800px;
}
div.svelte-iyf88w {
background: #ffffff !important;
}
h1 {
color: #2c3e50;
font-size: 2.4em;
font-weight: 700;
margin-bottom: 10px;
}
.subtitle {
color: #7f8c8d;
font-size: 1.2em;
}
.generate-btn {
font-size: 1.1em;
font-weight: 600;
padding: 14px 32px;
border-radius: 10px;
transition: all 0.25s ease;
margin-top: 15px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
}
.video-small video {
max-width: 400px !important; /* ancho máximo */
height: auto !important;
border-radius: 10px;
display: block;
margin: 0 auto;
}
"""
with gr.Blocks(css=custom_css) as demo:
with gr.Group():
gr.Markdown("""
# Talkie
<div class='subtitle'>Upload an image, add some audio or text, and watch the magic happen! ✨</div>
""")
with gr.Row():
input_image = gr.Image(label="📸 Your image")
with gr.Row():
input_audio = gr.Audio(label="🎵 Your audio (Optional)")
input_text = gr.Textbox(label="💭 Your text", placeholder="Type your text here...")
with gr.Row():
btn = gr.Button("GENERATE", elem_classes=["generate-btn"])
with gr.Row():
video_out = gr.Video(label="🎥 Your video", show_label=True)
btn.click(run_inference, inputs=[input_image, input_audio, input_text], outputs=video_out)
return demo
if __name__ == "__main__":
demo = run()
demo.queue()
demo.launch(server_name="0.0.0.0", server_port=7860) |