api-stt / app.py
tudeplom's picture
Update app.py
b90ae45 verified
raw
history blame
1.99 kB
import os
import wave
import json
import tempfile
import ffmpeg
import eventlet
import eventlet.wsgi
from flask import Flask
from flask_socketio import SocketIO, emit
from vosk import Model, KaldiRecognizer
# Khởi tạo Flask app
app = Flask(__name__)
socketio = SocketIO(app, cors_allowed_origins="*")
# Load model Vosk
MODEL_PATH = "model/vosk-model"
print("\u2705 Đang tải model Vosk...")
model = Model(MODEL_PATH)
# Xử lý âm thanh WebM từng đoạn
@socketio.on("audio_chunk")
def handle_audio_chunk(data):
webm_path, wav_path = None, None
try:
# Lưu WebM tạm thời
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm:
webm_path = temp_webm.name
temp_webm.write(data)
# Chuyển đổi sang WAV
wav_path = tempfile.mktemp(suffix=".wav")
ffmpeg.input(webm_path).output(
wav_path, acodec="pcm_s16le", ac=1, ar=16000, format="wav"
).run(overwrite_output=True, quiet=True)
# Nhận diện giọng nói
result_text = ""
with wave.open(wav_path, "rb") as wf:
rec = KaldiRecognizer(model, wf.getframerate())
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
result_text += json.loads(rec.Result()).get("text", "") + " "
else:
result_text += json.loads(rec.PartialResult()).get("partial", "") + " "
# Gửi kết quả về client
emit("stt_result", {"text": result_text.strip()})
except Exception as e:
print(f"Lỗi xử lý audio_chunk: {e}")
emit("stt_error", {"error": str(e)})
finally:
for path in [webm_path, wav_path]:
if path and os.path.exists(path):
os.remove(path)
if __name__ == "__main__":
eventlet.wsgi.server(eventlet.listen(("0.0.0.0", 7860)), app)