Rajor78 commited on
Commit
88bf030
verified
1 Parent(s): 97e782a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -48
app.py CHANGED
@@ -3,7 +3,7 @@ import subprocess
3
  import os
4
  import librosa
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
- import language_tool_python
7
  from pydub import AudioSegment
8
  from docx import Document
9
 
@@ -23,50 +23,4 @@ def transcribe_audio(audio_path):
23
  audio_input, _ = librosa.load(audio_path, sr=16000)
24
 
25
  # Preprocesar el audio para el modelo
26
- inputs = processor(audio_input, return_tensors="pt", sampling_rate=16000)
27
-
28
- # Realizar la transcripci贸n
29
- result = model.generate(**inputs)
30
- transcription = processor.decode(result[0], skip_special_tokens=True)
31
-
32
- return transcription
33
-
34
- # Funci贸n para corregir el texto transcrito con LanguageTool
35
- def correct_text(text):
36
- tool = language_tool_python.LanguageTool('es')
37
- matches = tool.check(text)
38
- return language_tool_python.utils.correct(text, matches)
39
-
40
- # Funci贸n principal que procesa el video
41
- def process_video(video_file):
42
- video_path = video_file.name
43
- audio_path = os.path.splitext(video_path)[0] + '.wav'
44
-
45
- # Extraer el audio del video
46
- extract_audio(video_path, audio_path)
47
-
48
- # Transcribir el audio
49
- transcribed_text = transcribe_audio(audio_path)
50
-
51
- # Corregir la transcripci贸n
52
- corrected_text = correct_text(transcribed_text)
53
-
54
- # Crear un documento Word con la transcripci贸n corregida
55
- doc = Document()
56
- doc.add_paragraph(corrected_text)
57
- doc_path = "transcription.docx"
58
- doc.save(doc_path)
59
-
60
- return corrected_text, doc_path
61
-
62
- # Interfaz de Gradio
63
- demo = gr.Interface(
64
- fn=process_video,
65
- inputs=gr.File(label="Sube un archivo de video"),
66
- outputs=[
67
- gr.Textbox(label="Texto transcrito y corregido"),
68
- gr.File(label="Descargar transcripci贸n Word")
69
- ]
70
- )
71
-
72
- demo.launch()
 
3
  import os
4
  import librosa
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
+ from gingerit.gingerit import GingerIt
7
  from pydub import AudioSegment
8
  from docx import Document
9
 
 
23
  audio_input, _ = librosa.load(audio_path, sr=16000)
24
 
25
  # Preprocesar el audio para el modelo
26
+ inputs =