Spaces:
Sleeping
Sleeping
File size: 4,964 Bytes
93122f4 9dd9bee fa1c2c0 9dd9bee 93122f4 9dd9bee 93122f4 9dd9bee 93122f4 9dd9bee 93122f4 9dd9bee 93122f4 9dd9bee fa1c2c0 93122f4 fa1c2c0 93122f4 fa1c2c0 93122f4 fa1c2c0 93122f4 fa1c2c0 93122f4 a47e964 fa1c2c0 a47e964 93122f4 fa1c2c0 a47e964 93122f4 9dd9bee fa1c2c0 9dd9bee 93122f4 fa1c2c0 93122f4 9dd9bee a47e964 fa1c2c0 93122f4 fa1c2c0 9dd9bee fa1c2c0 a47e964 9dd9bee fa1c2c0 9dd9bee 93122f4 9dd9bee 93122f4 9dd9bee 93122f4 9dd9bee 93122f4 9dd9bee a47e964 93122f4 9dd9bee a47e964 9dd9bee 93122f4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | import json
import os
import shutil
import subprocess
import tempfile
from pathlib import Path
import gradio as gr
from huggingface_hub import snapshot_download
REPO_ID = "Daumee/Qwen3-ASR-0.6B-ONNX-CPU"
LANGUAGE_MAP = {
"English": "English",
"Chinese": "Chinese",
"Bilingual": None, # auto-detect
}
MODEL_DIR = snapshot_download(repo_id=REPO_ID)
def normalize_audio(input_path: str, progress: gr.Progress | None = None) -> str:
"""Convert uploaded audio to mono 16 kHz WAV. No trimming, no denoising."""
if progress:
progress(0.15, desc="Preparing audio...")
if shutil.which("ffmpeg") is None:
raise gr.Error("ffmpeg is not installed.")
out_dir = Path(tempfile.mkdtemp())
out_path = out_dir / "normalized.wav"
cmd = [
"ffmpeg",
"-y",
"-i", input_path,
"-ac", "1",
"-ar", "16000",
"-vn",
str(out_path),
]
try:
subprocess.run(
cmd,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
except subprocess.CalledProcessError as e:
raise gr.Error("Failed to process the uploaded audio file.") from e
return str(out_path)
def run_onnx_asr(audio_path: str, mode: str, progress: gr.Progress | None = None) -> dict:
if mode not in LANGUAGE_MAP:
raise gr.Error("Invalid mode selected.")
language = LANGUAGE_MAP[mode]
script_path = Path(MODEL_DIR) / "onnx_inference.py"
if not script_path.exists():
raise gr.Error("onnx_inference.py was not found in the downloaded model repo.")
cmd = ["python", str(script_path), audio_path, "--json"]
if language is not None:
cmd.extend(["--language", language])
if progress:
progress(0.45, desc="Running transcription...")
try:
proc = subprocess.run(
cmd,
cwd=MODEL_DIR,
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
stderr = (e.stderr or "").strip()
stdout = (e.stdout or "").strip()
detail = stderr or stdout or "Unknown ASR error."
raise gr.Error(detail[:1500]) from e
output = (proc.stdout or "").strip().splitlines()
parsed = None
for line in reversed(output):
line = line.strip()
if not line:
continue
try:
parsed = json.loads(line)
break
except json.JSONDecodeError:
continue
if not isinstance(parsed, dict):
return {
"text": (proc.stdout or "").strip(),
"language": None,
}
return parsed
def make_txt_file(text: str, original_audio_path: str) -> str:
out_dir = Path(tempfile.mkdtemp())
stem = Path(original_audio_path).stem or "transcript"
out_path = out_dir / f"{stem}.txt"
out_path.write_text(text, encoding="utf-8")
return str(out_path)
def transcribe(audio_file: str, mode: str, progress=gr.Progress()):
if not audio_file:
raise gr.Error("Please upload an audio file.")
progress(0.05, desc="Starting...")
normalized_path = None
try:
normalized_path = normalize_audio(audio_file, progress=progress)
result = run_onnx_asr(normalized_path, mode=mode, progress=progress)
text = (result.get("text") or result.get("transcript") or "").strip()
txt_file = make_txt_file(text, audio_file)
detected_language = result.get("language") or result.get("detected_language")
info = f"Mode: {mode}"
if detected_language:
info += f"\nDetected language: {detected_language}"
progress(1.0, desc="Done")
return text, txt_file, info
finally:
if normalized_path and os.path.exists(normalized_path):
try:
os.remove(normalized_path)
except OSError:
pass
with gr.Blocks(title="Qwen3 ASR ONNX CPU") as demo:
gr.Markdown("# Qwen3 ASR ONNX CPU")
gr.Markdown(
"Upload audio, choose a mode, transcribe with Qwen3-ASR ONNX on CPU, and download the transcript."
)
with gr.Row():
audio = gr.Audio(
sources=["upload"],
type="filepath",
label="Upload audio file",
)
mode = gr.Dropdown(
choices=["English", "Chinese", "Bilingual"],
value="Bilingual",
label="Mode",
info="Bilingual means auto-detect.",
)
transcribe_btn = gr.Button("Transcribe")
transcript = gr.Textbox(label="Transcript", lines=14)
download_file = gr.File(label="Download transcript")
metadata = gr.Textbox(label="Info", lines=2, interactive=False)
transcribe_btn.click(
fn=transcribe,
inputs=[audio, mode],
outputs=[transcript, download_file, metadata],
)
if __name__ == "__main__":
demo.launch()
|