Spaces:

Luigi
/

Streaming-Zipformer

Sleeping

App Files Files Community

Luigi commited on Jun 6, 2025

Commit

231cd3a

1 Parent(s): 221a9c5

1. add to traditional chinese conversion 2. improve ui layout

Browse files

Files changed (2) hide show

app/asr_worker.py +7 -2
app/static/index.html +74 -6

app/asr_worker.py CHANGED Viewed

@@ -2,6 +2,10 @@ import numpy as np
 import sherpa_onnx
 from pathlib import Path
 import scipy.signal
 def resample_audio(audio, orig_sr, target_sr):
     return scipy.signal.resample_poly(audio, target_sr, orig_sr)
@@ -33,7 +37,7 @@ def stream_audio(raw_pcm_bytes, stream, recognizer):
     if recognizer.is_ready(stream):
         recognizer.decode_streams([stream])
     result = recognizer.get_result(stream)
-    return result, rms
 def finalize_stream(stream, recognizer):
     tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
@@ -41,4 +45,5 @@ def finalize_stream(stream, recognizer):
     stream.input_finished()
     while recognizer.is_ready(stream):
         recognizer.decode_streams([stream])
-    return recognizer.get_result(stream)

 import sherpa_onnx
 from pathlib import Path
 import scipy.signal
+from opencc import OpenCC
+converter = OpenCC('s2t')
 def resample_audio(audio, orig_sr, target_sr):
     return scipy.signal.resample_poly(audio, target_sr, orig_sr)
     if recognizer.is_ready(stream):
         recognizer.decode_streams([stream])
     result = recognizer.get_result(stream)
+    return converter.convert(result), rms
 def finalize_stream(stream, recognizer):
     tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
     stream.input_finished()
     while recognizer.is_ready(stream):
         recognizer.decode_streams([stream])
+    result = recognizer.get_result(stream)
+    return converter.convert(result)

app/static/index.html CHANGED Viewed

@@ -1,13 +1,81 @@
 <!DOCTYPE html>
-<html>
 <head>
-  <title>FastAPI Real-Time ASR</title>
 </head>
 <body>
-  <h1>🎤 Speak into your mic...</h1>
-  <div>Volume: <progress id="vol" max="1" value="0"></progress></div>
-  <p>Partial: <span id="partial"></span></p>
-  <p>Final: <b id="final"></b></p>
   <script>
     const ws = new WebSocket("wss://" + location.host + "/ws");
     const vol = document.getElementById("vol");

 <!DOCTYPE html>
+<html lang="en">
 <head>
+  <meta charset="UTF-8" />
+  <title>🎤 Real-Time ASR Demo</title>
+  <style>
+    body {
+      font-family: "Segoe UI", sans-serif;
+      background-color: #f5f6fa;
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      justify-content: center;
+      min-height: 100vh;
+      margin: 0;
+      padding: 2rem;
+      color: #2f3640;
+    }
+    h1 {
+      margin-bottom: 1rem;
+      font-size: 2rem;
+    }
+    #vol {
+      width: 300px;
+      height: 20px;
+      margin-bottom: 1rem;
+      appearance: none;
+    }
+    #vol::-webkit-progress-bar {
+      background-color: #dcdde1;
+      border-radius: 8px;
+    }
+    #vol::-webkit-progress-value {
+      background-color: #44bd32;
+      border-radius: 8px;
+      transition: width 0.2s;
+    }
+    .output {
+      width: 90%;
+      max-width: 800px;
+      text-align: left;
+      margin-top: 2rem;
+      background: white;
+      padding: 1rem 1.5rem;
+      border-radius: 10px;
+      box-shadow: 0 0 10px rgba(0,0,0,0.1);
+    }
+    .label {
+      font-weight: bold;
+      color: #718093;
+    }
+    #partial {
+      font-size: 1.25rem;
+      color: #353b48;
+    }
+    #final {
+      font-size: 1.4rem;
+      color: #e84118;
+    }
+  </style>
 </head>
 <body>
+  <h1>🎤 Speak into your microphone</h1>
+  <progress id="vol" max="1" value="0"></progress>
+  <div class="output">
+    <div><span class="label">Partial:</span> <span id="partial">...</span></div>
+    <div><span class="label">Final:</span> <b id="final">...</b></div>
+  </div>
   <script>
     const ws = new WebSocket("wss://" + location.host + "/ws");
     const vol = document.getElementById("vol");