Spaces:
Sleeping
Sleeping
File size: 1,974 Bytes
08309f4 3057bfd 08309f4 3057bfd 08309f4 3057bfd 08309f4 e186284 3057bfd 08309f4 3057bfd e186284 3057bfd 08309f4 3057bfd e186284 08309f4 e186284 08309f4 3057bfd 08309f4 3057bfd 08309f4 3057bfd 08309f4 3057bfd e186284 3057bfd e186284 3057bfd e186284 3057bfd e186284 08309f4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | import gradio as gr
from nemo.collections.asr.models import ASRModel
from docx import Document
import torch
import uuid
from pydub import AudioSegment
import os
# Load model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ASRModel.from_pretrained(model_name="nvidia/parakeet-tdt-0.6b-v2")
model.eval().to(device).to(torch.bfloat16)
def transcribe_to_docx(audio_path):
if not os.path.exists(audio_path):
raise FileNotFoundError(f"File not found: {audio_path}")
# Convert to mono 16kHz wav
audio = AudioSegment.from_file(audio_path)
audio = audio.set_channels(1).set_frame_rate(16000)
tmp_wav = f"/tmp/{uuid.uuid4()}.wav"
audio.export(tmp_wav, format="wav")
# Transcribe
output = model.transcribe([tmp_wav])
transcript = output[0].text
# Save to Word
doc = Document()
doc.add_heading("Transcription", level=1)
doc.add_paragraph(transcript)
docx_path = f"/tmp/{uuid.uuid4()}.docx"
doc.save(docx_path)
return transcript, docx_path
# UI
with gr.Blocks() as demo:
gr.Markdown("## 🎙️ Upload Audio and Download Word Transcription")
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
transcribe_button = gr.Button("Transcribe", variant="primary")
transcript_output = gr.Textbox(label="Transcript")
docx_file_output = gr.File(label="Download .docx")
download_button = gr.Button("Ready to Download", visible=False, variant="secondary")
def enable_download(transcript, file):
return gr.update(visible=True, variant="primary"), transcript, file
transcribe_button.click(
fn=transcribe_to_docx,
inputs=audio_input,
outputs=[transcript_output, docx_file_output],
show_progress=True,
api_name="transcribe"
).then(
fn=enable_download,
inputs=[transcript_output, docx_file_output],
outputs=[download_button, transcript_output, docx_file_output]
)
demo.launch()
|