import os import whisper import spacy import language_tool_python import gradio as gr import subprocess from docx import Document def extract_audio(video_path, audio_path): try: command = f"ffmpeg -i {video_path} -vn -acodec pcm_s16le -ar 16000 -ac 1 {audio_path}" subprocess.run(command, shell=True, check=True) return True except Exception as e: print(f"Error al extraer audio: {e}") return False def transcribe_audio(audio_path): model = whisper.load_model("base") result = model.transcribe(audio_path, word_timestamps=True) return result def correct_text(text): tool = language_tool_python.LanguageTool('es') matches = tool.check(text) return language_tool_python.utils.correct(text, matches) def create_word_doc(segments, output_path): doc = Document() for segment in segments: corrected_text = correct_text(segment['text']) doc.add_paragraph(corrected_text) doc.save(output_path) return output_path def process_video(video_file): audio_path = video_file.replace(".mp4", ".wav") word_output = video_file.replace(".mp4", "_transcription.docx") if extract_audio(video_file, audio_path): result = transcribe_audio(audio_path) segments = result['segments'] doc_path = create_word_doc(segments, word_output) return "Transcripción completada.", doc_path else: return "Error al procesar el archivo.", None demo = gr.Interface( fn=process_video, inputs=gr.File(label="Sube un archivo de video"), outputs=["text", gr.File(label="Descargar transcripción")] ) demo.launch()