David-Chew-HL commited on
Commit
9dd9bee
·
verified ·
1 Parent(s): 1348049

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from pathlib import Path
4
+
5
+ import gradio as gr
6
+ import torch
7
+ from qwen_asr import Qwen3ASRModel
8
+
9
+ MODEL_NAME = "Qwen/Qwen3-ASR-1.7B"
10
+
11
+ LANG_MAP = {
12
+ "English": "English",
13
+ "Chinese": "Chinese",
14
+ "Bilingual": None, # auto-detect mixed English + Mandarin
15
+ }
16
+
17
+ device_map = "cuda:0" if torch.cuda.is_available() else "cpu"
18
+ dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
19
+
20
+ model = Qwen3ASRModel.from_pretrained(
21
+ MODEL_NAME,
22
+ dtype=dtype,
23
+ device_map=device_map,
24
+ max_inference_batch_size=1,
25
+ max_new_tokens=1024,
26
+ )
27
+
28
+ def transcribe(audio_path: str, mode: str):
29
+ if not audio_path:
30
+ raise gr.Error("Please upload an audio file.")
31
+
32
+ if mode not in LANG_MAP:
33
+ raise gr.Error("Invalid mode selected.")
34
+
35
+ language = LANG_MAP[mode]
36
+
37
+ result = model.transcribe(
38
+ audio=audio_path,
39
+ language=language,
40
+ )[0]
41
+
42
+ text = result.text.strip()
43
+
44
+ if not text:
45
+ text = ""
46
+
47
+ out_dir = Path(tempfile.mkdtemp())
48
+ txt_path = out_dir / "transcript.txt"
49
+ txt_path.write_text(text, encoding="utf-8")
50
+
51
+ detected_language = getattr(result, "language", None)
52
+
53
+ meta = f"Mode: {mode}"
54
+ if detected_language:
55
+ meta += f"\nDetected language: {detected_language}"
56
+
57
+ return text, str(txt_path), meta
58
+
59
+ with gr.Blocks(title="Qwen3 ASR Transcriber") as demo:
60
+ gr.Markdown("# Qwen3 ASR Transcriber")
61
+ gr.Markdown(
62
+ "Upload audio, choose a mode, transcribe it, and download the transcript as a text file."
63
+ )
64
+
65
+ with gr.Row():
66
+ audio = gr.Audio(
67
+ sources=["upload"],
68
+ type="filepath",
69
+ label="Upload audio file",
70
+ )
71
+ mode = gr.Dropdown(
72
+ choices=["English", "Chinese", "Bilingual"],
73
+ value="Bilingual",
74
+ label="Mode",
75
+ info="Bilingual means Qwen auto-detects mixed English + Mandarin audio.",
76
+ )
77
+
78
+ transcribe_btn = gr.Button("Transcribe")
79
+
80
+ transcript = gr.Textbox(
81
+ label="Transcript",
82
+ lines=14,
83
+ show_copy_button=True,
84
+ )
85
+
86
+ transcript_file = gr.File(
87
+ label="Download transcript",
88
+ )
89
+
90
+ metadata = gr.Textbox(
91
+ label="Info",
92
+ lines=2,
93
+ interactive=False,
94
+ )
95
+
96
+ transcribe_btn.click(
97
+ fn=transcribe,
98
+ inputs=[audio, mode],
99
+ outputs=[transcript, transcript_file, metadata],
100
+ )
101
+
102
+ if __name__ == "__main__":
103
+ demo.launch()