Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import subprocess | |
| import os | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| import language_tool_python | |
| from pydub import AudioSegment | |
| from docx import Document | |
| def extract_audio(video_path, audio_path): | |
| command = f"ffmpeg -i '{video_path}' -ar 16000 -ac 1 -c:a pcm_s16le '{audio_path}' -y" | |
| subprocess.run(command, shell=True, check=True) | |
| return audio_path | |
| def transcribe_audio(audio_path): | |
| processor = WhisperProcessor.from_pretrained("openai/whisper-base") | |
| model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") | |
| audio_input = processor(audio_path, return_tensors="pt", sampling_rate=16000) | |
| result = model.generate(**audio_input) | |
| transcription = processor.decode(result[0], skip_special_tokens=True) | |
| return transcription | |
| def correct_text(text): | |
| tool = language_tool_python.LanguageTool('es') | |
| matches = tool.check(text) | |
| return language_tool_python.utils.correct(text, matches) | |
| def process_video(video_file): | |
| video_path = video_file.name | |
| audio_path = os.path.splitext(video_path)[0] + '.wav' | |
| extract_audio(video_path, audio_path) | |
| transcribed_text = transcribe_audio(audio_path) | |
| corrected_text = correct_text(transcribed_text) | |
| doc = Document() | |
| doc.add_paragraph(corrected_text) | |
| doc_path = "transcription.docx" | |
| doc.save(doc_path) | |
| return corrected_text, doc_path | |
| demo = gr.Interface( | |
| fn=process_video, | |
| inputs=gr.File(label="Sube un archivo de video"), | |
| outputs=[ | |
| gr.Textbox(label="Texto transcrito y corregido"), | |
| gr.File(label="Descargar transcripción Word") | |
| ] | |
| ) | |
| demo.launch() | |