Spaces:
Sleeping
Sleeping
1. add to traditional chinese conversion 2. improve ui layout
Browse files- app/asr_worker.py +7 -2
- app/static/index.html +74 -6
app/asr_worker.py
CHANGED
|
@@ -2,6 +2,10 @@ import numpy as np
|
|
| 2 |
import sherpa_onnx
|
| 3 |
from pathlib import Path
|
| 4 |
import scipy.signal
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
def resample_audio(audio, orig_sr, target_sr):
|
| 7 |
return scipy.signal.resample_poly(audio, target_sr, orig_sr)
|
|
@@ -33,7 +37,7 @@ def stream_audio(raw_pcm_bytes, stream, recognizer):
|
|
| 33 |
if recognizer.is_ready(stream):
|
| 34 |
recognizer.decode_streams([stream])
|
| 35 |
result = recognizer.get_result(stream)
|
| 36 |
-
return result, rms
|
| 37 |
|
| 38 |
def finalize_stream(stream, recognizer):
|
| 39 |
tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
|
|
@@ -41,4 +45,5 @@ def finalize_stream(stream, recognizer):
|
|
| 41 |
stream.input_finished()
|
| 42 |
while recognizer.is_ready(stream):
|
| 43 |
recognizer.decode_streams([stream])
|
| 44 |
-
|
|
|
|
|
|
| 2 |
import sherpa_onnx
|
| 3 |
from pathlib import Path
|
| 4 |
import scipy.signal
|
| 5 |
+
from opencc import OpenCC
|
| 6 |
+
|
| 7 |
+
converter = OpenCC('s2t')
|
| 8 |
+
|
| 9 |
|
| 10 |
def resample_audio(audio, orig_sr, target_sr):
|
| 11 |
return scipy.signal.resample_poly(audio, target_sr, orig_sr)
|
|
|
|
| 37 |
if recognizer.is_ready(stream):
|
| 38 |
recognizer.decode_streams([stream])
|
| 39 |
result = recognizer.get_result(stream)
|
| 40 |
+
return converter.convert(result), rms
|
| 41 |
|
| 42 |
def finalize_stream(stream, recognizer):
|
| 43 |
tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
|
|
|
|
| 45 |
stream.input_finished()
|
| 46 |
while recognizer.is_ready(stream):
|
| 47 |
recognizer.decode_streams([stream])
|
| 48 |
+
result = recognizer.get_result(stream)
|
| 49 |
+
return converter.convert(result)
|
app/static/index.html
CHANGED
|
@@ -1,13 +1,81 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
-
<html>
|
| 3 |
<head>
|
| 4 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
</head>
|
| 6 |
<body>
|
| 7 |
-
<h1>🎤 Speak into your
|
| 8 |
-
<
|
| 9 |
-
|
| 10 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
<script>
|
| 12 |
const ws = new WebSocket("wss://" + location.host + "/ws");
|
| 13 |
const vol = document.getElementById("vol");
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<title>🎤 Real-Time ASR Demo</title>
|
| 6 |
+
<style>
|
| 7 |
+
body {
|
| 8 |
+
font-family: "Segoe UI", sans-serif;
|
| 9 |
+
background-color: #f5f6fa;
|
| 10 |
+
display: flex;
|
| 11 |
+
flex-direction: column;
|
| 12 |
+
align-items: center;
|
| 13 |
+
justify-content: center;
|
| 14 |
+
min-height: 100vh;
|
| 15 |
+
margin: 0;
|
| 16 |
+
padding: 2rem;
|
| 17 |
+
color: #2f3640;
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
h1 {
|
| 21 |
+
margin-bottom: 1rem;
|
| 22 |
+
font-size: 2rem;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
#vol {
|
| 26 |
+
width: 300px;
|
| 27 |
+
height: 20px;
|
| 28 |
+
margin-bottom: 1rem;
|
| 29 |
+
appearance: none;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
#vol::-webkit-progress-bar {
|
| 33 |
+
background-color: #dcdde1;
|
| 34 |
+
border-radius: 8px;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
#vol::-webkit-progress-value {
|
| 38 |
+
background-color: #44bd32;
|
| 39 |
+
border-radius: 8px;
|
| 40 |
+
transition: width 0.2s;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.output {
|
| 44 |
+
width: 90%;
|
| 45 |
+
max-width: 800px;
|
| 46 |
+
text-align: left;
|
| 47 |
+
margin-top: 2rem;
|
| 48 |
+
background: white;
|
| 49 |
+
padding: 1rem 1.5rem;
|
| 50 |
+
border-radius: 10px;
|
| 51 |
+
box-shadow: 0 0 10px rgba(0,0,0,0.1);
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
.label {
|
| 55 |
+
font-weight: bold;
|
| 56 |
+
color: #718093;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
#partial {
|
| 60 |
+
font-size: 1.25rem;
|
| 61 |
+
color: #353b48;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
#final {
|
| 65 |
+
font-size: 1.4rem;
|
| 66 |
+
color: #e84118;
|
| 67 |
+
}
|
| 68 |
+
</style>
|
| 69 |
</head>
|
| 70 |
<body>
|
| 71 |
+
<h1>🎤 Speak into your microphone</h1>
|
| 72 |
+
<progress id="vol" max="1" value="0"></progress>
|
| 73 |
+
|
| 74 |
+
<div class="output">
|
| 75 |
+
<div><span class="label">Partial:</span> <span id="partial">...</span></div>
|
| 76 |
+
<div><span class="label">Final:</span> <b id="final">...</b></div>
|
| 77 |
+
</div>
|
| 78 |
+
|
| 79 |
<script>
|
| 80 |
const ws = new WebSocket("wss://" + location.host + "/ws");
|
| 81 |
const vol = document.getElementById("vol");
|