Spaces:

tudeplom
/

api-stt

Sleeping

App Files Files Community

tudeplom commited on Mar 31, 2025

Commit

c80867b

verified ·

1 Parent(s): d457454

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -127

app.py CHANGED Viewed

@@ -3,145 +3,56 @@ import wave
 import json
 import tempfile
 import ffmpeg
-from flask import Flask, request, jsonify
-from flask_cors import CORS
 from vosk import Model, KaldiRecognizer
-from flasgger import Swagger
-# Thư mục chứa model
-MODEL_PATH = "model/vosk-model"
-print("\u2705 Đang tải model Vosk...")
-model = Model(MODEL_PATH)
 # Khởi tạo Flask app
 app = Flask(__name__)
-CORS(app)
-Swagger(app)
-@app.route("/")
-def home():
-    """API Home
-    ---
-    responses:
-      200:
-        description: API đang chạy
-    """
-    return "\u2705 Vosk STT API đang chạy!"
-@app.route("/stt", methods=["POST"])
-def stt():
-    """Chuyển đổi giọng nói thành văn bản (Speech-to-Text)
-    ---
-    consumes:
-      - multipart/form-data
-    parameters:
-      - in: formData
-        name: file
-        type: file
-        required: true
-        description: File âm thanh WebM (sẽ được chuyển đổi sang WAV mono PCM)
-    responses:
-      200:
-        description: Kết quả chuyển đổi văn bản
-        schema:
-          type: object
-          properties:
-            text:
-              type: string
-              example: "Xin chào thế giới"
-      400:
-        description: Lỗi nếu file âm thanh không hợp lệ hoặc không tìm thấy
-      500:
-        description: Lỗi server nội bộ
-    """
-    if "file" not in request.files:
-        return jsonify({"error": "Không tìm thấy file âm thanh! Vui lòng gửi trường 'file'."}), 400
-    audio_file = request.files["file"]
-    if audio_file.filename == "":
-        return jsonify({"error": "Không có file được chọn!"}), 400
-    # Lưu file WebM tạm thời
-    with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm_file:
-        webm_path = temp_webm_file.name
-        audio_file.save(webm_path)
-    # Kiểm tra kích thước tệp
-    if os.path.getsize(webm_path) < 100:
-        os.remove(webm_path)
-        return jsonify({"error": "Tệp âm thanh quá nhỏ hoặc rỗng!"}), 400
-    # Đường dẫn file WAV tạm thời sau khi chuyển đổi
-    wav_path = None
-    wf = None
     try:
-        # Kiểm tra tệp WebM bằng ffprobe trước khi chuyển đổi
-        try:
-            probe = ffmpeg.probe(webm_path)
-            if 'streams' not in probe or not any(s['codec_type'] == 'audio' for s in probe['streams']):
-                raise ValueError("Tệp không chứa luồng âm thanh hợp lệ!")
-        except ffmpeg.Error as e:
-            error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
-            return jsonify({"error": f"Lỗi kiểm tra tệp WebM: {error_message}"}), 500
-        # Chuyển đổi WebM sang WAV mono PCM
         wav_path = tempfile.mktemp(suffix=".wav")
-        stream = ffmpeg.input(webm_path)
-        stream = ffmpeg.output(
-            stream,
-            wav_path,
-            acodec="pcm_s16le",  # PCM 16-bit signed little-endian
-            ac=1,               # Mono
-            ar=16000,           # Tần số mẫu 16kHz, phù hợp với Vosk
-            format="wav"
-        )
-        # Thêm cờ -vn để bỏ qua video nếu có và -y để ghi đè
-        ffmpeg.run(stream, overwrite_output=True, quiet=True, capture_stdout=True, capture_stderr=True)
-        # Mở file WAV đã chuyển đổi
-        wf = wave.open(wav_path, "rb")
-        # Kiểm tra định dạng WAV mono PCM
-        if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
-            return jsonify({"error": "Định dạng WAV sau chuyển đổi không đúng!"}), 400
-        # Khởi tạo KaldiRecognizer
-        rec = KaldiRecognizer(model, wf.getframerate())
-        result_text = ""
-        # Đọc và xử lý dữ liệu âm thanh
-        while True:
-            data = wf.readframes(4000)
-            if len(data) == 0:
-                break
-            if rec.AcceptWaveform(data):
-                result = json.loads(rec.Result())
-                result_text += result.get("text", "") + " "
-            else:
-                partial_result = json.loads(rec.PartialResult())
-                if partial_result.get("partial", ""):
-                    result_text += partial_result["partial"] + " "
-        # Trả về kết quả đã xử lý
-        final_text = result_text.strip()
-        if not final_text:
-            final_text = "Không nh��n diện được nội dung âm thanh."
-        return jsonify({"text": final_text})
-    except ffmpeg.Error as e:
-        error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
-        return jsonify({"error": f"Lỗi chuyển đổi âm thanh từ WebM sang WAV: {error_message}"}), 500
     except Exception as e:
-        return jsonify({"error": f"Lỗi xử lý âm thanh: {str(e)}"}), 500
     finally:
-        # Đóng file WAV nếu đã mở
-        if wf is not None:
-            wf.close()
-        # Xóa các file tạm
         for path in [webm_path, wav_path]:
             if path and os.path.exists(path):
                 os.remove(path)
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860, debug=True)

 import json
 import tempfile
 import ffmpeg
+from flask import Flask
+from flask_socketio import SocketIO, emit
 from vosk import Model, KaldiRecognizer
 # Khởi tạo Flask app
 app = Flask(__name__)
+socketio = SocketIO(app, cors_allowed_origins="*")
+# Load model Vosk
+MODEL_PATH = "model/vosk-model"
+print("\u2705 Đang tải model Vosk...")
+model = Model(MODEL_PATH)
+# Xử lý âm thanh WebM từng đoạn
+@socketio.on("audio_chunk")
+def handle_audio_chunk(data):
     try:
+        # Lưu WebM tạm thời
+        with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm:
+            webm_path = temp_webm.name
+            temp_webm.write(data)
+        # Chuyển đổi sang WAV
         wav_path = tempfile.mktemp(suffix=".wav")
+        ffmpeg.input(webm_path).output(
+            wav_path, acodec="pcm_s16le", ac=1, ar=16000, format="wav"
+        ).run(overwrite_output=True, quiet=True)
+        # Nhận diện giọng nói
+        with wave.open(wav_path, "rb") as wf:
+            rec = KaldiRecognizer(model, wf.getframerate())
+            result_text = ""
+            while True:
+                data = wf.readframes(4000)
+                if len(data) == 0:
+                    break
+                if rec.AcceptWaveform(data):
+                    result_text += json.loads(rec.Result()).get("text", "") + " "
+                else:
+                    result_text += json.loads(rec.PartialResult()).get("partial", "") + " "
+        # Gửi kết quả về client
+        emit("stt_result", {"text": result_text.strip()})
     except Exception as e:
+        emit("stt_error", {"error": str(e)})
     finally:
+        # Xóa file tạm
         for path in [webm_path, wav_path]:
             if path and os.path.exists(path):
                 os.remove(path)
 if __name__ == "__main__":
+    socketio.run(app, host="0.0.0.0", port=7860, debug=True)