David-Chew-HL commited on
Commit
e186284
·
verified ·
1 Parent(s): b7feb76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -7
app.py CHANGED
@@ -6,13 +6,16 @@ import uuid
6
  from pydub import AudioSegment
7
  import os
8
 
9
- # Load the model
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model = ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
12
  model.eval().to(device).to(torch.bfloat16)
13
 
14
  def transcribe_to_docx(audio_path):
15
- # Convert to mono + 16kHz wav
 
 
 
16
  audio = AudioSegment.from_file(audio_path)
17
  audio = audio.set_channels(1).set_frame_rate(16000)
18
  tmp_wav = f"/tmp/{uuid.uuid4()}.wav"
@@ -20,9 +23,9 @@ def transcribe_to_docx(audio_path):
20
 
21
  # Transcribe
22
  output = model.transcribe([tmp_wav])
23
- transcript = output[0]
24
 
25
- # Create Word doc
26
  doc = Document()
27
  doc.add_heading("Transcription", level=1)
28
  doc.add_paragraph(transcript)
@@ -34,16 +37,26 @@ def transcribe_to_docx(audio_path):
34
  # UI
35
  with gr.Blocks() as demo:
36
  gr.Markdown("## 🎙️ Upload Audio and Download Word Transcription")
 
37
  audio_input = gr.Audio(type="filepath", label="Upload Audio File")
38
- transcribe_button = gr.Button("Transcribe")
39
  transcript_output = gr.Textbox(label="Transcript")
40
  docx_file_output = gr.File(label="Download .docx")
 
 
 
 
41
 
42
  transcribe_button.click(
43
  fn=transcribe_to_docx,
44
  inputs=audio_input,
45
- outputs=[transcript_output, docx_file_output]
 
 
 
 
 
 
46
  )
47
 
48
  demo.launch()
49
-
 
6
  from pydub import AudioSegment
7
  import os
8
 
9
+ # Load model
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model = ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
12
  model.eval().to(device).to(torch.bfloat16)
13
 
14
  def transcribe_to_docx(audio_path):
15
+ if not os.path.exists(audio_path):
16
+ raise FileNotFoundError(f"File not found: {audio_path}")
17
+
18
+ # Convert to mono 16kHz wav
19
  audio = AudioSegment.from_file(audio_path)
20
  audio = audio.set_channels(1).set_frame_rate(16000)
21
  tmp_wav = f"/tmp/{uuid.uuid4()}.wav"
 
23
 
24
  # Transcribe
25
  output = model.transcribe([tmp_wav])
26
+ transcript = output[0].text
27
 
28
+ # Save to Word
29
  doc = Document()
30
  doc.add_heading("Transcription", level=1)
31
  doc.add_paragraph(transcript)
 
37
  # UI
38
  with gr.Blocks() as demo:
39
  gr.Markdown("## 🎙️ Upload Audio and Download Word Transcription")
40
+
41
  audio_input = gr.Audio(type="filepath", label="Upload Audio File")
42
+ transcribe_button = gr.Button("Transcribe", variant="primary")
43
  transcript_output = gr.Textbox(label="Transcript")
44
  docx_file_output = gr.File(label="Download .docx")
45
+ download_button = gr.Button("Ready to Download", visible=False, variant="secondary")
46
+
47
+ def enable_download(transcript, file):
48
+ return gr.update(visible=True, variant="primary"), transcript, file
49
 
50
  transcribe_button.click(
51
  fn=transcribe_to_docx,
52
  inputs=audio_input,
53
+ outputs=[transcript_output, docx_file_output],
54
+ show_progress=True,
55
+ api_name="transcribe"
56
+ ).then(
57
+ fn=enable_download,
58
+ inputs=[transcript_output, docx_file_output],
59
+ outputs=[download_button, transcript_output, docx_file_output]
60
  )
61
 
62
  demo.launch()