Spaces:

tudeplom
/

api-stt

Sleeping

App Files Files Community

api-stt / app.py

tudeplom

Update app.py

b7dec53 verified 9 months ago

raw

history blame contribute delete

4.64 kB

	import json
	import tempfile
	import ffmpeg
	import os
	import wave # ✅ Fix lỗi thiếu import wave
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	from vosk import Model, KaldiRecognizer
	from flasgger import Swagger

	# Khởi tạo Flask app trước khi sử dụng
	app = Flask(__name__)
	CORS(app)
	Swagger(app)

	# Load model Vosk
	MODEL_PATH = "model/vosk-model"
	print("\u2705 Đang tải model Vosk...")
	model = Model(MODEL_PATH)

	@app.route("/")
	def home():
	"""API Home
	---
	responses:
	200:
	description: API đang chạy
	"""
	return "\u2705 Vosk STT API đang chạy!"

	@app.route("/stt", methods=["POST"])
	def stt():
	"""Chuyển đổi giọng nói thành văn bản (Speech-to-Text)
	---
	consumes:
	- multipart/form-data
	parameters:
	- in: formData
	name: file
	type: file
	required: true
	description: File âm thanh WebM (sẽ được chuyển đổi sang WAV mono PCM)
	responses:
	200:
	description: Kết quả chuyển đổi văn bản
	schema:
	type: object
	properties:
	text:
	type: string
	example: "Xin chào thế giới"
	400:
	description: Lỗi nếu file âm thanh không hợp lệ hoặc không tìm thấy
	500:
	description: Lỗi server nội bộ
	"""
	if "file" not in request.files:
	return jsonify({"error": "Không tìm thấy file âm thanh! Vui lòng gửi trường 'file'."}), 400

	audio_file = request.files["file"]
	if audio_file.filename == "":
	return jsonify({"error": "Không có file được chọn!"}), 400

	# Lưu file WebM tạm thời
	with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as temp_webm_file:
	webm_path = temp_webm_file.name
	audio_file.save(webm_path)

	# Kiểm tra kích thước tệp
	if os.path.getsize(webm_path) < 100:
	os.remove(webm_path)
	return jsonify({"error": "Tệp âm thanh quá nhỏ hoặc rỗng!"}), 400

	# Xử lý tệp WebM -> WAV
	wav_path = tempfile.mktemp(suffix=".wav")
	try:
	# Kiểm tra tệp WebM có chứa audio hợp lệ không
	try:
	probe = ffmpeg.probe(webm_path)
	if 'streams' not in probe or not any(s['codec_type'] == 'audio' for s in probe['streams']):
	raise ValueError("Tệp không chứa luồng âm thanh hợp lệ!")
	except ffmpeg.Error as e:
	error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
	return jsonify({"error": f"Lỗi kiểm tra tệp WebM: {error_message}"}), 500

	# Chuyển đổi WebM sang WAV mono PCM
	ffmpeg.input(webm_path).output(
	wav_path, acodec="pcm_s16le", ac=1, ar=16000
	).run(overwrite_output=True, quiet=True)

	# Mở file WAV
	wf = wave.open(wav_path, "rb")

	# Kiểm tra định dạng WAV có hợp lệ không
	if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
	wf.close()
	os.remove(wav_path)
	return jsonify({"error": "Định dạng WAV không hợp lệ!"}), 400

	# Chạy Vosk để nhận diện giọng nói
	rec = KaldiRecognizer(model, wf.getframerate())
	result_text = ""

	while True:
	data = wf.readframes(4000)
	if len(data) == 0:
	break
	if rec.AcceptWaveform(data):
	result = json.loads(rec.Result())
	result_text += result.get("text", "") + " "
	else:
	partial_result = json.loads(rec.PartialResult())
	if partial_result.get("partial", ""):
	result_text += partial_result["partial"] + " "

	wf.close() # ✅ Đóng file WAV

	final_text = result_text.strip() or "Không nhận diện được nội dung âm thanh." # ✅ Fix lỗi Unicode
	return jsonify({"text": final_text})

	except ffmpeg.Error as e:
	error_message = e.stderr.decode('utf-8') if e.stderr else str(e)
	return jsonify({"error": f"Lỗi chuyển đổi âm thanh từ WebM sang WAV: {error_message}"}), 500
	except Exception as e:
	return jsonify({"error": f"Lỗi xử lý âm thanh: {str(e)}"}), 500
	finally:
	# Xóa các file tạm
	if os.path.exists(webm_path):
	os.remove(webm_path)
	if os.path.exists(wav_path):
	os.remove(wav_path)

	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860, debug=True)