from pathlib import Path import json import gradio as gr ROOT = Path(__file__).parent def main() -> gr.Blocks: tokens = json.loads((ROOT / "tokens.json").read_text(encoding="utf-8")) mel_filters = json.loads((ROOT / "mel_filters_slaney_80x257.json").read_text(encoding="utf-8")) app_js = (ROOT / "app.js").read_text(encoding="utf-8") head = ( "" ) with gr.Blocks( title="VisualEars WebGPU ASR", head=head, css=""" .gradio-container { max-width: 1180px !important; } #visualears-root { font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; color: #111827; } #visualears-root .topbar { display: flex; align-items: center; justify-content: space-between; gap: 16px; margin-bottom: 16px; } #visualears-root h1 { font-size: 28px; line-height: 1.15; margin: 0; letter-spacing: 0; } #visualears-root .subtle { color: #4b5563; margin: 6px 0 0; font-size: 14px; } #visualears-root .controls { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 10px; align-items: end; margin: 14px 0; } #visualears-root button, #visualears-root select, #visualears-root input { width: 100%; min-height: 40px; border: 1px solid #cbd5e1; border-radius: 7px; background: white; color: #111827; font: inherit; padding: 8px 10px; } #visualears-root button.primary { background: #111827; color: white; border-color: #111827; } #visualears-root button:disabled { opacity: 0.45; cursor: not-allowed; } #visualears-root label { display: block; font-size: 12px; color: #475569; margin-bottom: 5px; } #visualears-root .status { min-height: 42px; overflow-wrap: anywhere; border: 1px solid #d1d5db; background: #f8fafc; border-radius: 7px; padding: 10px 12px; margin: 10px 0 14px; } #visualears-root .transcript { min-height: 150px; direction: rtl; text-align: right; font-size: 26px; line-height: 1.65; border: 1px solid #cbd5e1; border-radius: 8px; padding: 18px; background: #ffffff; overflow-wrap: anywhere; white-space: pre-wrap; } #visualears-root .stats { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 10px; margin-top: 14px; } #visualears-root .stat { border: 1px solid #d1d5db; border-radius: 8px; padding: 12px; background: #ffffff; min-height: 74px; } #visualears-root .stat b { display: block; font-size: 12px; color: #64748b; margin-bottom: 8px; font-weight: 600; } #visualears-root .stat span { font-size: 20px; color: #111827; overflow-wrap: anywhere; } #visualears-root .meter { height: 8px; background: #e5e7eb; overflow: hidden; border-radius: 999px; margin-top: 8px; } #visualears-root .meter > div { height: 100%; width: 0%; background: #10b981; transition: width 120ms linear; } @media (max-width: 820px) { #visualears-root .topbar { align-items: flex-start; flex-direction: column; } #visualears-root .controls, #visualears-root .stats { grid-template-columns: 1fr 1fr; } #visualears-root .transcript { font-size: 22px; } } @media (max-width: 520px) { #visualears-root .controls, #visualears-root .stats { grid-template-columns: 1fr; } } """, ) as demo: gr.HTML( """

VisualEars PhaseB Persian ASR FP16

Browser-only WebGPU decode with the PhaseB FP16 ONNX export.

Idle. Load the PhaseB FP16 model first; the first load downloads about 232 MB and then uses browser cache.
Final: ... Partial: ...
Decode Time-
RTF-
Realtime Speed-
Audio Window-
Feature Frames-
Provider-
Heap Used-
GPU Adapter-
Last Decode-
""" ) return demo if __name__ == "__main__": main().launch()