File size: 1,612 Bytes
23590ba
 
62ca4e5
fe652f1
23590ba
62ca4e5
23590ba
3b61113
981b713
 
62ca4e5
981b713
62ca4e5
981b713
 
3b61113
6432060
 
62ca4e5
fe652f1
 
 
 
 
 
 
 
62ca4e5
 
 
ceda7cb
62ca4e5
 
 
 
 
 
 
 
 
 
 
 
 
3b61113
62ca4e5
ceda7cb
9225a67
62ca4e5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import json
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from vosk import Model, KaldiRecognizer
import wave

# Kiểm tra model
MODEL_PATH = "model/vosk-model"
if not os.path.exists(MODEL_PATH):
    raise Exception("❌ Model Vosk không tìm thấy!")

print("✅ Đang tải model Vosk...")
model = Model(MODEL_PATH)

# Tạo app FastAPI
app = FastAPI()

# Cấu hình CORS để hỗ trợ API
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Cho phép tất cả nguồn
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.post("/stt")
async def speech_to_text(audio: UploadFile = File(...)):
    """Nhận file âm thanh và trả về văn bản."""
    try:
        with wave.open(audio.file, "rb") as wf:
            if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() != 16000:
                raise HTTPException(status_code=400, detail="❌ File âm thanh phải là WAV mono 16kHz 16-bit.")
            
            rec = KaldiRecognizer(model, 16000)
            while True:
                data = wf.readframes(4000)
                if len(data) == 0:
                    break
                rec.AcceptWaveform(data)
            
            result = json.loads(rec.Result())
            return {"text": result.get("text", "")}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"❌ Lỗi xử lý âm thanh: {str(e)}")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)