Spaces:

Rajor78
/

Transcript2Word

Sleeping

File size: 1,654 Bytes

import os
import whisper
import spacy
import language_tool_python
import gradio as gr
import subprocess
from docx import Document

def extract_audio(video_path, audio_path):
    try:
        command = f"ffmpeg -i {video_path} -vn -acodec pcm_s16le -ar 16000 -ac 1 {audio_path}"
        subprocess.run(command, shell=True, check=True)
        return True
    except Exception as e:
        print(f"Error al extraer audio: {e}")
        return False

def transcribe_audio(audio_path):
    model = whisper.load_model("base")
    result = model.transcribe(audio_path, word_timestamps=True)
    return result

def correct_text(text):
    tool = language_tool_python.LanguageTool('es')
    matches = tool.check(text)
    return language_tool_python.utils.correct(text, matches)

def create_word_doc(segments, output_path):
    doc = Document()
    for segment in segments:
        corrected_text = correct_text(segment['text'])
        doc.add_paragraph(corrected_text)
    doc.save(output_path)
    return output_path

def process_video(video_file):
    audio_path = video_file.replace(".mp4", ".wav")
    word_output = video_file.replace(".mp4", "_transcription.docx")
    
    if extract_audio(video_file, audio_path):
        result = transcribe_audio(audio_path)
        segments = result['segments']
        doc_path = create_word_doc(segments, word_output)
        return "Transcripción completada.", doc_path
    else:
        return "Error al procesar el archivo.", None

demo = gr.Interface(
    fn=process_video,
    inputs=gr.File(label="Sube un archivo de video"),
    outputs=["text", gr.File(label="Descargar transcripción")]
)

demo.launch()