Spaces:

David-Chew-HL
/

Transcriber

Sleeping

App Files Files Community

David-Chew-HL commited on Apr 19

Commit

9dd9bee

verified ·

1 Parent(s): 1348049

Create app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import os
+import tempfile
+from pathlib import Path
+import gradio as gr
+import torch
+from qwen_asr import Qwen3ASRModel
+MODEL_NAME = "Qwen/Qwen3-ASR-1.7B"
+LANG_MAP = {
+    "English": "English",
+    "Chinese": "Chinese",
+    "Bilingual": None,  # auto-detect mixed English + Mandarin
+}
+device_map = "cuda:0" if torch.cuda.is_available() else "cpu"
+dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
+model = Qwen3ASRModel.from_pretrained(
+    MODEL_NAME,
+    dtype=dtype,
+    device_map=device_map,
+    max_inference_batch_size=1,
+    max_new_tokens=1024,
+)
+def transcribe(audio_path: str, mode: str):
+    if not audio_path:
+        raise gr.Error("Please upload an audio file.")
+    if mode not in LANG_MAP:
+        raise gr.Error("Invalid mode selected.")
+    language = LANG_MAP[mode]
+    result = model.transcribe(
+        audio=audio_path,
+        language=language,
+    )[0]
+    text = result.text.strip()
+    if not text:
+        text = ""
+    out_dir = Path(tempfile.mkdtemp())
+    txt_path = out_dir / "transcript.txt"
+    txt_path.write_text(text, encoding="utf-8")
+    detected_language = getattr(result, "language", None)
+    meta = f"Mode: {mode}"
+    if detected_language:
+        meta += f"\nDetected language: {detected_language}"
+    return text, str(txt_path), meta
+with gr.Blocks(title="Qwen3 ASR Transcriber") as demo:
+    gr.Markdown("# Qwen3 ASR Transcriber")
+    gr.Markdown(
+        "Upload audio, choose a mode, transcribe it, and download the transcript as a text file."
+    )
+    with gr.Row():
+        audio = gr.Audio(
+            sources=["upload"],
+            type="filepath",
+            label="Upload audio file",
+        )
+        mode = gr.Dropdown(
+            choices=["English", "Chinese", "Bilingual"],
+            value="Bilingual",
+            label="Mode",
+            info="Bilingual means Qwen auto-detects mixed English + Mandarin audio.",
+        )
+    transcribe_btn = gr.Button("Transcribe")
+    transcript = gr.Textbox(
+        label="Transcript",
+        lines=14,
+        show_copy_button=True,
+    )
+    transcript_file = gr.File(
+        label="Download transcript",
+    )
+    metadata = gr.Textbox(
+        label="Info",
+        lines=2,
+        interactive=False,
+    )
+    transcribe_btn.click(
+        fn=transcribe,
+        inputs=[audio, mode],
+        outputs=[transcript, transcript_file, metadata],
+    )
+if __name__ == "__main__":
+    demo.launch()