Transcriber-2.0 / app.py
David-Chew-HL's picture
Update app.py
e186284 verified
import gradio as gr
from nemo.collections.asr.models import ASRModel
from docx import Document
import torch
import uuid
from pydub import AudioSegment
import os
# Load model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
model.eval().to(device).to(torch.bfloat16)
def transcribe_to_docx(audio_path):
if not os.path.exists(audio_path):
raise FileNotFoundError(f"File not found: {audio_path}")
# Convert to mono 16kHz wav
audio = AudioSegment.from_file(audio_path)
audio = audio.set_channels(1).set_frame_rate(16000)
tmp_wav = f"/tmp/{uuid.uuid4()}.wav"
audio.export(tmp_wav, format="wav")
# Transcribe
output = model.transcribe([tmp_wav])
transcript = output[0].text
# Save to Word
doc = Document()
doc.add_heading("Transcription", level=1)
doc.add_paragraph(transcript)
docx_path = f"/tmp/{uuid.uuid4()}.docx"
doc.save(docx_path)
return transcript, docx_path
# UI
with gr.Blocks() as demo:
gr.Markdown("## 🎙️ Upload Audio and Download Word Transcription")
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
transcribe_button = gr.Button("Transcribe", variant="primary")
transcript_output = gr.Textbox(label="Transcript")
docx_file_output = gr.File(label="Download .docx")
download_button = gr.Button("Ready to Download", visible=False, variant="secondary")
def enable_download(transcript, file):
return gr.update(visible=True, variant="primary"), transcript, file
transcribe_button.click(
fn=transcribe_to_docx,
inputs=audio_input,
outputs=[transcript_output, docx_file_output],
show_progress=True,
api_name="transcribe"
).then(
fn=enable_download,
inputs=[transcript_output, docx_file_output],
outputs=[download_button, transcript_output, docx_file_output]
)
demo.launch()