File size: 1,612 Bytes
23590ba 62ca4e5 fe652f1 23590ba 62ca4e5 23590ba 3b61113 981b713 62ca4e5 981b713 62ca4e5 981b713 3b61113 6432060 62ca4e5 fe652f1 62ca4e5 ceda7cb 62ca4e5 3b61113 62ca4e5 ceda7cb 9225a67 62ca4e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import os
import json
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from vosk import Model, KaldiRecognizer
import wave
# Kiểm tra model
MODEL_PATH = "model/vosk-model"
if not os.path.exists(MODEL_PATH):
raise Exception("❌ Model Vosk không tìm thấy!")
print("✅ Đang tải model Vosk...")
model = Model(MODEL_PATH)
# Tạo app FastAPI
app = FastAPI()
# Cấu hình CORS để hỗ trợ API
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Cho phép tất cả nguồn
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.post("/stt")
async def speech_to_text(audio: UploadFile = File(...)):
"""Nhận file âm thanh và trả về văn bản."""
try:
with wave.open(audio.file, "rb") as wf:
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000:
raise HTTPException(status_code=400, detail="❌ File âm thanh phải là WAV mono 16kHz 16-bit.")
rec = KaldiRecognizer(model, 16000)
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
rec.AcceptWaveform(data)
result = json.loads(rec.Result())
return {"text": result.get("text", "")}
except Exception as e:
raise HTTPException(status_code=500, detail=f"❌ Lỗi xử lý âm thanh: {str(e)}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |