Transcriber / app.py
David-Chew-HL's picture
Update app.py
a47e964 verified
import json
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
import gradio as gr
from huggingface_hub import snapshot_download
REPO_ID = "Daumee/Qwen3-ASR-0.6B-ONNX-CPU"
LANGUAGE_MAP = {
"English": "English",
"Chinese": "Chinese",
"Bilingual": None, # auto-detect
}
MODEL_DIR = snapshot_download(repo_id=REPO_ID)
def normalize_audio(input_path: str, progress: gr.Progress | None = None) -> str:
"""Convert uploaded audio to mono 16 kHz WAV. No trimming, no denoising."""
if progress:
progress(0.15, desc="Preparing audio...")
if shutil.which("ffmpeg") is None:
raise gr.Error("ffmpeg is not installed.")
out_dir = Path(tempfile.mkdtemp())
out_path = out_dir / "normalized.wav"
cmd = [
"ffmpeg",
"-y",
"-i", input_path,
"-ac", "1",
"-ar", "16000",
"-vn",
str(out_path),
]
try:
subprocess.run(
cmd,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
except subprocess.CalledProcessError as e:
raise gr.Error("Failed to process the uploaded audio file.") from e
return str(out_path)
def run_onnx_asr(audio_path: str, mode: str, progress: gr.Progress | None = None) -> dict:
if mode not in LANGUAGE_MAP:
raise gr.Error("Invalid mode selected.")
language = LANGUAGE_MAP[mode]
script_path = Path(MODEL_DIR) / "onnx_inference.py"
if not script_path.exists():
raise gr.Error("onnx_inference.py was not found in the downloaded model repo.")
cmd = ["python", str(script_path), audio_path, "--json"]
if language is not None:
cmd.extend(["--language", language])
if progress:
progress(0.45, desc="Running transcription...")
try:
proc = subprocess.run(
cmd,
cwd=MODEL_DIR,
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
stderr = (e.stderr or "").strip()
stdout = (e.stdout or "").strip()
detail = stderr or stdout or "Unknown ASR error."
raise gr.Error(detail[:1500]) from e
output = (proc.stdout or "").strip().splitlines()
parsed = None
for line in reversed(output):
line = line.strip()
if not line:
continue
try:
parsed = json.loads(line)
break
except json.JSONDecodeError:
continue
if not isinstance(parsed, dict):
return {
"text": (proc.stdout or "").strip(),
"language": None,
}
return parsed
def make_txt_file(text: str, original_audio_path: str) -> str:
out_dir = Path(tempfile.mkdtemp())
stem = Path(original_audio_path).stem or "transcript"
out_path = out_dir / f"{stem}.txt"
out_path.write_text(text, encoding="utf-8")
return str(out_path)
def transcribe(audio_file: str, mode: str, progress=gr.Progress()):
if not audio_file:
raise gr.Error("Please upload an audio file.")
progress(0.05, desc="Starting...")
normalized_path = None
try:
normalized_path = normalize_audio(audio_file, progress=progress)
result = run_onnx_asr(normalized_path, mode=mode, progress=progress)
text = (result.get("text") or result.get("transcript") or "").strip()
txt_file = make_txt_file(text, audio_file)
detected_language = result.get("language") or result.get("detected_language")
info = f"Mode: {mode}"
if detected_language:
info += f"\nDetected language: {detected_language}"
progress(1.0, desc="Done")
return text, txt_file, info
finally:
if normalized_path and os.path.exists(normalized_path):
try:
os.remove(normalized_path)
except OSError:
pass
with gr.Blocks(title="Qwen3 ASR ONNX CPU") as demo:
gr.Markdown("# Qwen3 ASR ONNX CPU")
gr.Markdown(
"Upload audio, choose a mode, transcribe with Qwen3-ASR ONNX on CPU, and download the transcript."
)
with gr.Row():
audio = gr.Audio(
sources=["upload"],
type="filepath",
label="Upload audio file",
)
mode = gr.Dropdown(
choices=["English", "Chinese", "Bilingual"],
value="Bilingual",
label="Mode",
info="Bilingual means auto-detect.",
)
transcribe_btn = gr.Button("Transcribe")
transcript = gr.Textbox(label="Transcript", lines=14)
download_file = gr.File(label="Download transcript")
metadata = gr.Textbox(label="Info", lines=2, interactive=False)
transcribe_btn.click(
fn=transcribe,
inputs=[audio, mode],
outputs=[transcript, download_file, metadata],
)
if __name__ == "__main__":
demo.launch()