import os import wave import json import tempfile import ffmpeg import eventlet import eventlet.wsgi from flask import Flask from flask_socketio import SocketIO, emit from vosk import Model, KaldiRecognizer # Khởi tạo Flask app app = Flask(__name__) socketio = SocketIO(app, cors_allowed_origins="*") # Load model Vosk MODEL_PATH = "model/vosk-model" print("\u2705 Đang tải model Vosk...") model = Model(MODEL_PATH) # Xử lý âm thanh WebM từng đoạn @socketio.on("audio_chunk") def handle_audio_chunk(data): webm_path, wav_path = None, None try: # Lưu WebM tạm thời with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm: webm_path = temp_webm.name temp_webm.write(data) # Chuyển đổi sang WAV wav_path = tempfile.mktemp(suffix=".wav") ffmpeg.input(webm_path).output( wav_path, acodec="pcm_s16le", ac=1, ar=16000, format="wav" ).run(overwrite_output=True, quiet=True) # Nhận diện giọng nói result_text = "" with wave.open(wav_path, "rb") as wf: rec = KaldiRecognizer(model, wf.getframerate()) while True: data = wf.readframes(4000) if len(data) == 0: break if rec.AcceptWaveform(data): result_text += json.loads(rec.Result()).get("text", "") + " " else: result_text += json.loads(rec.PartialResult()).get("partial", "") + " " # Gửi kết quả về client emit("stt_result", {"text": result_text.strip()}) except Exception as e: print(f"Lỗi xử lý audio_chunk: {e}") emit("stt_error", {"error": str(e)}) finally: for path in [webm_path, wav_path]: if path and os.path.exists(path): os.remove(path) if __name__ == "__main__": eventlet.wsgi.server(eventlet.listen(("0.0.0.0", 7860)), app)