Spaces:

David-Chew-HL
/

Transcriber

Sleeping

App Files Files Community

David-Chew-HL commited on Apr 19

Commit

fa1c2c0

verified ·

1 Parent(s): c8884b6

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -20

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import os
 import tempfile
 from pathlib import Path
@@ -11,7 +13,7 @@ MODEL_NAME = "Qwen/Qwen3-ASR-1.7B"
 LANG_MAP = {
     "English": "English",
     "Chinese": "Chinese",
-    "Bilingual": None,  # auto-detect mixed English + Mandarin
 }
 device_map = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -21,40 +23,94 @@ model = Qwen3ASRModel.from_pretrained(
     MODEL_NAME,
     dtype=dtype,
     device_map=device_map,
-    max_inference_batch_size=1,
-    max_new_tokens=1024,
 )
-def transcribe(audio_path: str, mode: str):
     if not audio_path:
         raise gr.Error("Please upload an audio file.")
     if mode not in LANG_MAP:
         raise gr.Error("Invalid mode selected.")
-    language = LANG_MAP[mode]
-    result = model.transcribe(
-        audio=audio_path,
-        language=language,
-    )[0]
-    text = result.text.strip()
-    if not text:
-        text = ""
-    out_dir = Path(tempfile.mkdtemp())
-    txt_path = out_dir / "transcript.txt"
-    txt_path.write_text(text, encoding="utf-8")
-    detected_language = getattr(result, "language", None)
-    meta = f"Mode: {mode}"
-    if detected_language:
-        meta += f"\nDetected language: {detected_language}"
-    return text, str(txt_path), meta
 with gr.Blocks(title="Qwen3 ASR Transcriber") as demo:
     gr.Markdown("# Qwen3 ASR Transcriber")

 import os
+import shutil
+import subprocess
 import tempfile
 from pathlib import Path
 LANG_MAP = {
     "English": "English",
     "Chinese": "Chinese",
+    "Bilingual": None,  # let Qwen auto-detect
 }
 device_map = "cuda:0" if torch.cuda.is_available() else "cpu"
     MODEL_NAME,
     dtype=dtype,
     device_map=device_map,
+    max_inference_batch_size=1
 )
+def normalize_audio(input_path: str, progress: gr.Progress | None = None) -> str:
+    """
+    Convert uploaded audio to mono 16k WAV.
+    No silence trimming. No noise reduction.
+    """
+    if progress:
+        progress(0.15, desc="Preparing audio...")
+    if shutil.which("ffmpeg") is None:
+        raise gr.Error("ffmpeg is not installed in this environment.")
+    out_dir = Path(tempfile.mkdtemp())
+    out_path = out_dir / "normalized.wav"
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i", input_path,
+        "-ac", "1",         # mono
+        "-ar", "16000",     # 16 kHz
+        "-vn",
+        str(out_path),
+    ]
+    try:
+        subprocess.run(
+            cmd,
+            check=True,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+    except subprocess.CalledProcessError:
+        raise gr.Error("Failed to process the uploaded audio file.")
+    return str(out_path)
+def make_output_txt(text: str, original_audio_path: str) -> str:
+    out_dir = Path(tempfile.mkdtemp())
+    stem = Path(original_audio_path).stem or "transcript"
+    txt_path = out_dir / f"{stem}.txt"
+    txt_path.write_text(text, encoding="utf-8")
+    return str(txt_path)
+def transcribe(audio_path: str, mode: str, progress=gr.Progress()):
     if not audio_path:
         raise gr.Error("Please upload an audio file.")
     if mode not in LANG_MAP:
         raise gr.Error("Invalid mode selected.")
+    progress(0.05, desc="Starting...")
+    normalized_path = None
+    try:
+        normalized_path = normalize_audio(audio_path, progress=progress)
+        progress(0.45, desc="Running transcription...")
+        language = LANG_MAP[mode]
+        result = model.transcribe(
+            audio=normalized_path,
+            language=language,
+        )[0]
+        text = (result.text or "").strip()
+        txt_path = make_output_txt(text, audio_path)
+        detected_language = getattr(result, "language", None)
+        info = f"Mode: {mode}"
+        if detected_language:
+            info += f"\nDetected language: {detected_language}"
+        progress(1.0, desc="Done")
+        return text, txt_path, info
+    finally:
+        if normalized_path and os.path.exists(normalized_path):
+            try:
+                os.remove(normalized_path)
+            except OSError:
+                pass
 with gr.Blocks(title="Qwen3 ASR Transcriber") as demo:
     gr.Markdown("# Qwen3 ASR Transcriber")