Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import subprocess | |
| import os | |
| import librosa | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| from spellchecker import SpellChecker # Usaremos pyspellchecker | |
| from docx import Document | |
| # Funci贸n para extraer audio de video | |
| def extract_audio(video_path, audio_path): | |
| command = f"ffmpeg -i '{video_path}' -ar 16000 -ac 1 -c:a pcm_s16le '{audio_path}' -y" | |
| subprocess.run(command, shell=True, check=True) | |
| return audio_path | |
| # Funci贸n para transcribir el audio usando Whisper | |
| def transcribe_audio(audio_path): | |
| # Cargar el procesador y modelo de Whisper | |
| processor = WhisperProcessor.from_pretrained("openai/whisper-base") | |
| model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") | |
| # Cargar el archivo de audio usando librosa | |
| audio_input, _ = librosa.load(audio_path, sr=16000) | |
| # Preprocesar el audio para el modelo | |
| inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000) | |
| # Realizar la transcripci贸n | |
| result = model.generate(**inputs) | |
| transcription = processor.decode(result[0], skip_special_tokens=True) | |
| return transcription | |
| # Funci贸n para corregir el texto transcrito con pyspellchecker | |
| def correct_text(text): | |
| spell = SpellChecker(language='es') # Usamos espa帽ol para la correcci贸n | |
| words = text.split() # Separar el texto en palabras | |
| corrected_words = [] | |
| for word in words: | |
| corrected = spell.correction(word) | |
| if corrected is None: # Si no se encuentra correcci贸n, dejamos la palabra original | |
| corrected = word | |
| corrected_words.append(corrected) | |
| corrected_text = " ".join(corrected_words) # Volver a unir las palabras | |
| return corrected_text | |
| # Funci贸n principal que procesa el video | |
| def process_video(video_file): | |
| video_path = video_file.name | |
| audio_path = os.path.splitext(video_path)[0] + '.wav' | |
| # Extraer el audio del video | |
| extract_audio(video_path, audio_path) | |
| # Transcribir el audio | |
| transcribed_text = transcribe_audio(audio_path) | |
| # Corregir la transcripci贸n | |
| corrected_text = correct_text(transcribed_text) | |
| # Crear un documento Word con la transcripci贸n corregida | |
| doc = Document() | |
| doc.add_paragraph(corrected_text) | |
| doc_path = "transcription.docx" | |
| doc.save(doc_path) | |
| return corrected_text, doc_path | |
| # Interfaz de Gradio | |
| demo = gr.Interface( | |
| fn=process_video, | |
| inputs=gr.File(label="Sube un archivo de video"), | |
| outputs=[ | |
| gr.Textbox(label="Texto transcrito y corregido"), | |
| gr.File(label="Descargar transcripci贸n Word") | |
| ] | |
| ) | |
| demo.launch() | |