| import gradio as gr |
| import whisper |
| import os |
| import tempfile |
| from pydub import AudioSegment |
| import subprocess |
|
|
| |
| print("Cargando modelo Whisper...") |
| model = whisper.load_model("small") |
| print("Modelo cargado.") |
|
|
|
|
| def extract_audio_from_video(video_path): |
| """Extrae audio de video usando ffmpeg""" |
| audio_path = tempfile.mktemp(suffix='.wav') |
|
|
| command = [ |
| 'ffmpeg', |
| '-i', video_path, |
| '-vn', |
| '-acodec', 'pcm_s16le', |
| '-ar', '16000', |
| '-ac', '1', |
| '-y', |
| audio_path |
| ] |
|
|
| result = subprocess.run(command, capture_output=True, text=True) |
| if result.returncode != 0: |
| raise Exception(f"Error extrayendo audio: {result.stderr}") |
|
|
| return audio_path |
|
|
|
|
| def convert_to_wav(input_path): |
| """Convierte cualquier audio a WAV 16kHz mono""" |
| audio = AudioSegment.from_file(input_path) |
| audio_path = tempfile.mktemp(suffix='.wav') |
| audio = audio.set_frame_rate(16000).set_channels(1) |
| audio.export(audio_path, format="wav") |
| return audio_path |
|
|
|
|
| def transcribir_archivo(archivo): |
| """Función principal de transcripción""" |
| if archivo is None: |
| yield "Por favor sube un archivo.", "" |
| return |
|
|
| archivos_temp = [] |
|
|
| try: |
| extension = os.path.splitext(archivo)[1].lower() |
| es_video = extension in ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.mpg', '.mpeg'] |
|
|
| yield "Procesando archivo...", "" |
|
|
| |
| if es_video: |
| yield "Extrayendo audio del video...", "" |
| audio_path = extract_audio_from_video(archivo) |
| else: |
| yield "Convirtiendo audio a WAV...", "" |
| audio_path = convert_to_wav(archivo) |
|
|
| archivos_temp.append(audio_path) |
|
|
| |
| audio = AudioSegment.from_wav(audio_path) |
| duracion_total = len(audio) / 1000 |
| yield f"Audio listo. Duración: {duracion_total:.1f}s. Iniciando transcripción...", "" |
|
|
| |
| |
| |
| resultado = model.transcribe( |
| audio_path, |
| language="es", |
| task="transcribe", |
| fp16=False, |
| temperature=0, |
| best_of=1, |
| beam_size=5, |
| verbose=False |
| ) |
|
|
| texto_final = resultado["text"].strip() |
|
|
| if not texto_final: |
| yield "La transcripción quedó vacía. Verifica que el audio tenga voz clara.", "" |
| return |
|
|
| yield "¡Transcripción completada!", texto_final |
|
|
| except Exception as e: |
| yield f"Error: {str(e)}", "" |
|
|
| finally: |
| for temp_file in archivos_temp: |
| try: |
| if os.path.exists(temp_file): |
| os.remove(temp_file) |
| except: |
| pass |
|
|
|
|
| |
| with gr.Blocks(title="Transcriptor de Video/Audio") as demo: |
| gr.Markdown(""" |
| # 🎙️ Transcriptor de Video y Audio |
| Sube un video o archivo de audio y obtén la transcripción en español. |
| |
| **Formatos soportados:** MP4, AVI, MOV, MKV, MP3, WAV, M4A, OGG, WEBM |
| """) |
|
|
| with gr.Row(): |
| with gr.Column(): |
| archivo_input = gr.File( |
| label="Sube tu video o audio", |
| file_types=["video", "audio"] |
| ) |
| btn_transcribir = gr.Button("🚀 Transcribir", variant="primary") |
|
|
| with gr.Column(): |
| estado = gr.Textbox(label="Estado", interactive=False) |
| resultado = gr.Textbox( |
| label="Transcripción", |
| lines=15, |
| interactive=True, |
| placeholder="La transcripción aparecerá aquí..." |
| ) |
|
|
| btn_transcribir.click( |
| fn=transcribir_archivo, |
| inputs=archivo_input, |
| outputs=[estado, resultado] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |