Spaces:

tudeplom
/

api-stt

Sleeping

App Files Files Community

tudeplom commited on Mar 31

Commit

475d403

verified ·

1 Parent(s): 6357dfa

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -86

app.py CHANGED Viewed

@@ -1,18 +1,14 @@
 import json
 import tempfile
 import ffmpeg
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 from vosk import Model, KaldiRecognizer
 from flasgger import Swagger
-# Thư mục chứa model
 MODEL_PATH = "model/vosk-model"
 print("\u2705 Đang tải model Vosk...")
 model = Model(MODEL_PATH)
@@ -24,40 +20,10 @@ Swagger(app)
 @app.route("/")
 def home():
-    """API Home
-    ---
-    responses:
-      200:
-        description: API đang chạy
-    """
     return "\u2705 Vosk STT API đang chạy!"
 @app.route("/stt", methods=["POST"])
 def stt():
-    """Chuyển đổi giọng nói thành văn bản (Speech-to-Text)
-    ---
-    consumes:
-      - multipart/form-data
-    parameters:
-      - in: formData
-        name: file
-        type: file
-        required: true
-        description: File âm thanh WebM (sẽ được chuyển đổi sang WAV mono PCM)
-    responses:
-      200:
-        description: Kết quả chuyển đổi văn bản
-        schema:
-          type: object
-          properties:
-            text:
-              type: string
-              example: "Xin chào thế giới"
-      400:
-        description: Lỗi nếu file âm thanh không hợp lệ hoặc không tìm thấy
-      500:
-        description: Lỗi server nội bộ
-    """
     if "file" not in request.files:
         return jsonify({"error": "Không tìm thấy file âm thanh! Vui lòng gửi trường 'file'."}), 400
@@ -70,50 +36,29 @@ def stt():
         webm_path = temp_webm_file.name
         audio_file.save(webm_path)
-    # Kiểm tra kích thước tệp
     if os.path.getsize(webm_path) < 100:
         os.remove(webm_path)
         return jsonify({"error": "Tệp âm thanh quá nhỏ hoặc rỗng!"}), 400
-    # Đường dẫn file WAV tạm thời sau khi chuyển đổi
-    wav_path = None
-    wf = None
     try:
-        # Kiểm tra tệp WebM bằng ffprobe trước khi chuyển đổi
-        try:
-            probe = ffmpeg.probe(webm_path)
-            if 'streams' not in probe or not any(s['codec_type'] == 'audio' for s in probe['streams']):
-                raise ValueError("Tệp không chứa luồng âm thanh hợp lệ!")
-        except ffmpeg.Error as e:
-            error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
-            return jsonify({"error": f"Lỗi kiểm tra tệp WebM: {error_message}"}), 500
-        # Chuyển đổi WebM sang WAV mono PCM
-        wav_path = tempfile.mktemp(suffix=".wav")
-        stream = ffmpeg.input(webm_path)
-        stream = ffmpeg.output(
-            stream,
-            wav_path,
-            acodec="pcm_s16le",  # PCM 16-bit signed little-endian
-            ac=1,               # Mono
-            ar=16000,           # Tần số mẫu 16kHz, phù hợp với Vosk
-            format="wav"
-        )
-        # Thêm cờ -vn để bỏ qua video nếu có và -y để ghi đè
-        ffmpeg.run(stream, overwrite_output=True, quiet=True, capture_stdout=True, capture_stderr=True)
-        # Mở file WAV đã chuyển đổi
         wf = wave.open(wav_path, "rb")
-        # Kiểm tra định dạng WAV mono PCM
         if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
-            return jsonify({"error": "Định dạng WAV sau chuyển đổi không đúng!"}), 400
-        # Khởi tạo KaldiRecognizer
         rec = KaldiRecognizer(model, wf.getframerate())
         result_text = ""
-        # Đọc và xử lý dữ liệu âm thanh
         while True:
             data = wf.readframes(4000)
             if len(data) == 0:
@@ -123,29 +68,19 @@ def stt():
                 result_text += result.get("text", "") + " "
             else:
                 partial_result = json.loads(rec.PartialResult())
-                if partial_result.get("partial", ""):
-                    result_text += partial_result["partial"] + " "
-        # Trả về kết quả đã xử lý
-        final_text = result_text.strip()
-        if not final_text:
-            final_text = "Không nh��n diện được nội dung âm thanh."
         return jsonify({"text": final_text})
-    except ffmpeg.Error as e:
-        error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
-        return jsonify({"error": f"Lỗi chuyển đổi âm thanh từ WebM sang WAV: {error_message}"}), 500
     except Exception as e:
-        return jsonify({"error": f"Lỗi xử lý âm thanh: {str(e)}"}), 500
     finally:
-        # Đóng file WAV nếu đã mở
-        if wf is not None:
-            wf.close()
-        # Xóa các file tạm
-        for path in [webm_path, wav_path]:
-            if path and os.path.exists(path):
-                os.remove(path)
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, debug=True)

 import json
 import tempfile
 import ffmpeg
+import wave  # ✅ Fix lỗi thiếu import wave
+import os
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 from vosk import Model, KaldiRecognizer
 from flasgger import Swagger
+# Load model Vosk
 MODEL_PATH = "model/vosk-model"
 print("\u2705 Đang tải model Vosk...")
 model = Model(MODEL_PATH)
 @app.route("/")
 def home():
     return "\u2705 Vosk STT API đang chạy!"
 @app.route("/stt", methods=["POST"])
 def stt():
     if "file" not in request.files:
         return jsonify({"error": "Không tìm thấy file âm thanh! Vui lòng gửi trường 'file'."}), 400
         webm_path = temp_webm_file.name
         audio_file.save(webm_path)
+    # Kiểm tra kích thước file
     if os.path.getsize(webm_path) < 100:
         os.remove(webm_path)
         return jsonify({"error": "Tệp âm thanh quá nhỏ hoặc rỗng!"}), 400
+    # Xử lý tệp WebM -> WAV
+    wav_path = tempfile.mktemp(suffix=".wav")
     try:
+        ffmpeg.input(webm_path).output(
+            wav_path, acodec="pcm_s16le", ac=1, ar=16000
+        ).run(overwrite_output=True, quiet=True)
         wf = wave.open(wav_path, "rb")
+        # Kiểm tra WAV có đúng chuẩn không
         if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
+            wf.close()
+            return jsonify({"error": "Định dạng WAV không hợp lệ!"}), 400
+        # Chạy Vosk để nhận diện giọng nói
         rec = KaldiRecognizer(model, wf.getframerate())
         result_text = ""
         while True:
             data = wf.readframes(4000)
             if len(data) == 0:
                 result_text += result.get("text", "") + " "
             else:
                 partial_result = json.loads(rec.PartialResult())
+                result_text += partial_result.get("partial", "") + " "
+        wf.close()  # ✅ Fix lỗi quên đóng file
+        final_text = result_text.strip() or "Không nhận diện được nội dung âm thanh."  # ✅ Fix lỗi Unicode
         return jsonify({"text": final_text})
     except Exception as e:
+        return jsonify({"error": f"Lỗi xử lý: {str(e)}"}), 500
     finally:
+        os.remove(webm_path) if os.path.exists(webm_path) else None
+        os.remove(wav_path) if os.path.exists(wav_path) else None
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, debug=True)