api-stt / app.py
tudeplom's picture
Update app.py
c80867b verified
raw
history blame
1.92 kB
import os
import wave
import json
import tempfile
import ffmpeg
from flask import Flask
from flask_socketio import SocketIO, emit
from vosk import Model, KaldiRecognizer
# Khởi tạo Flask app
app = Flask(__name__)
socketio = SocketIO(app, cors_allowed_origins="*")
# Load model Vosk
MODEL_PATH = "model/vosk-model"
print("\u2705 Đang tải model Vosk...")
model = Model(MODEL_PATH)
# Xử lý âm thanh WebM từng đoạn
@socketio.on("audio_chunk")
def handle_audio_chunk(data):
try:
# Lưu WebM tạm thời
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm:
webm_path = temp_webm.name
temp_webm.write(data)
# Chuyển đổi sang WAV
wav_path = tempfile.mktemp(suffix=".wav")
ffmpeg.input(webm_path).output(
wav_path, acodec="pcm_s16le", ac=1, ar=16000, format="wav"
).run(overwrite_output=True, quiet=True)
# Nhận diện giọng nói
with wave.open(wav_path, "rb") as wf:
rec = KaldiRecognizer(model, wf.getframerate())
result_text = ""
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
result_text += json.loads(rec.Result()).get("text", "") + " "
else:
result_text += json.loads(rec.PartialResult()).get("partial", "") + " "
# Gửi kết quả về client
emit("stt_result", {"text": result_text.strip()})
except Exception as e:
emit("stt_error", {"error": str(e)})
finally:
# Xóa file tạm
for path in [webm_path, wav_path]:
if path and os.path.exists(path):
os.remove(path)
if __name__ == "__main__":
socketio.run(app, host="0.0.0.0", port=7860, debug=True)