import os import wave import json import tempfile import ffmpeg from flask import Flask from flask_socketio import SocketIO, emit from vosk import Model, KaldiRecognizer # Khởi tạo Flask app app = Flask(__name__) socketio = SocketIO(app, cors_allowed_origins="*") # Load model Vosk MODEL_PATH = "model/vosk-model" print("\u2705 Đang tải model Vosk...") model = Model(MODEL_PATH) # Xử lý âm thanh WebM từng đoạn @socketio.on("audio_chunk") def handle_audio_chunk(data): try: # Lưu WebM tạm thời with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm: webm_path = temp_webm.name temp_webm.write(data) # Chuyển đổi sang WAV wav_path = tempfile.mktemp(suffix=".wav") ffmpeg.input(webm_path).output( wav_path, acodec="pcm_s16le", ac=1, ar=16000, format="wav" ).run(overwrite_output=True, quiet=True) # Nhận diện giọng nói with wave.open(wav_path, "rb") as wf: rec = KaldiRecognizer(model, wf.getframerate()) result_text = "" while True: data = wf.readframes(4000) if len(data) == 0: break if rec.AcceptWaveform(data): result_text += json.loads(rec.Result()).get("text", "") + " " else: result_text += json.loads(rec.PartialResult()).get("partial", "") + " " # Gửi kết quả về client emit("stt_result", {"text": result_text.strip()}) except Exception as e: emit("stt_error", {"error": str(e)}) finally: # Xóa file tạm for path in [webm_path, wav_path]: if path and os.path.exists(path): os.remove(path) if __name__ == "__main__": socketio.run(app, host="0.0.0.0", port=7860, debug=True)