import gradio as gr import subprocess import os from transformers import WhisperProcessor, WhisperForConditionalGeneration import language_tool_python from pydub import AudioSegment from docx import Document def extract_audio(video_path, audio_path): command = f"ffmpeg -i '{video_path}' -ar 16000 -ac 1 -c:a pcm_s16le '{audio_path}' -y" subprocess.run(command, shell=True, check=True) return audio_path def transcribe_audio(audio_path): processor = WhisperProcessor.from_pretrained("openai/whisper-base") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") audio_input = processor(audio_path, return_tensors="pt", sampling_rate=16000) result = model.generate(**audio_input) transcription = processor.decode(result[0], skip_special_tokens=True) return transcription def correct_text(text): tool = language_tool_python.LanguageTool('es') matches = tool.check(text) return language_tool_python.utils.correct(text, matches) def process_video(video_file): video_path = video_file.name audio_path = os.path.splitext(video_path)[0] + '.wav' extract_audio(video_path, audio_path) transcribed_text = transcribe_audio(audio_path) corrected_text = correct_text(transcribed_text) doc = Document() doc.add_paragraph(corrected_text) doc_path = "transcription.docx" doc.save(doc_path) return corrected_text, doc_path demo = gr.Interface( fn=process_video, inputs=gr.File(label="Sube un archivo de video"), outputs=[ gr.Textbox(label="Texto transcrito y corregido"), gr.File(label="Descargar transcripción Word") ] ) demo.launch()