Truku-AI-Game / server.py
Lowking's picture
Update server.py
ee7b99a verified
Raw
History Blame Contribute Delete
5.24 kB
import os
import time
import shutil
import subprocess
import difflib # 文字比對用
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from gradio_client import Client, handle_file # ✨ 記得匯入 handle_file
app = FastAPI()
# 允許跨域請求
app.add_middleware(
CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
)
UPLOAD_DIR = "temp_uploads"
if not os.path.exists(UPLOAD_DIR): os.makedirs(UPLOAD_DIR)
# 格式為: "帳號名稱/專案名稱"
ASR_API_URL = "ithuan/sapolita-kaldi"
# --- 核心 1: 轉檔 (WebM -> WAV) ---
def convert_webm_to_wav(webm_path):
wav_path = webm_path.replace(".webm", ".wav")
# 判斷 ffmpeg 指令
if os.path.exists("ffmpeg.exe"): ffmpeg_cmd = "ffmpeg.exe"
else: ffmpeg_cmd = "ffmpeg"
# ASR 需要 16kHz 單聲道
cmd = f'{ffmpeg_cmd} -y -i "{webm_path}" -ar 16000 -ac 1 "{wav_path}" -loglevel quiet'
try:
subprocess.run(cmd, shell=True, check=True)
return wav_path
except:
print("❌ FFmpeg 轉檔失敗,請確認 ffmpeg.exe 是否存在")
return None
# --- 核心 2: 呼叫外部 ASR API (依照您提供的新邏輯) ---
def speech_to_text(audio_path):
print(f"📡 正在傳送音檔至 ASR: {ASR_API_URL} ...")
try:
client = Client(ASR_API_URL)
# ✨ 設定太魯閣語代碼
lang_code = "formosan_trv"
# ✨ 呼叫 API: 參數順序是 (語言代碼, 音檔)
# 並且使用 handle_file 包裝音檔
result = client.predict(
lang_code, # 參數 1: 語言代碼
handle_file(audio_path), # 參數 2: 音檔
api_name="/automatic_speech_recognition"
)
# result 通常回傳純文字,或是包含文字的 Tuple
# 這裡做個轉型確保安全
text_result = str(result).strip()
print(f"👂 ASR 聽到: {text_result}")
return text_result
except Exception as e:
print(f"❌ ASR API 呼叫失敗: {e}")
return None
# --- 核心 3: 文字相似度評分 ---
def calculate_text_score(user_text, target_text):
if not user_text or user_text == "None": return 0
# 1. 統一轉小寫並去除標點 (太魯閣語常有 ? 或 !)
u = user_text.lower().replace("?", "").replace("!", "").replace(".", "").strip()
t = target_text.lower().replace("?", "").replace("!", "").replace(".", "").strip()
print(f"🔍 比對文字:\n - 學生說: {u}\n - 正確答案: {t}")
# 2. 完全命中
if u == t: return 100
# 3. 模糊比對
similarity = difflib.SequenceMatcher(None, u, t).ratio()
score = int(similarity * 100)
# 鼓勵機制:只要有開口,且辨識出東西,就加一點分
if score > 0: score = min(100, score + 10)
return score
# --- API 接口 ---
@app.post("/api/score")
async def score_audio(file: UploadFile = File(...), target_word: str = Form(...)):
user_path = None
wav_path = None
try:
# 1. 存下使用者錄音
filename = f"user_{int(time.time())}.webm"
user_path = os.path.join(UPLOAD_DIR, filename)
with open(user_path, "wb") as f:
f.write(await file.read())
# 2. 轉檔
wav_path = convert_webm_to_wav(user_path)
if not wav_path:
return JSONResponse(content={"status": "error", "message": "轉檔失敗"})
# 3. 呼叫 ASR
asr_result = speech_to_text(wav_path)
# 4. 評分
if asr_result:
real_score = calculate_text_score(asr_result, target_word)
# 依分數給評語
if real_score == 100:
comment = f"太強了!完全正確!(辨識: {asr_result})"
elif real_score >= 80:
comment = f"發音很標準喔!(辨識: {asr_result})"
elif real_score >= 60:
comment = f"很接近了,加油!(辨識: {asr_result})"
else:
comment = f"AI 聽成:{asr_result},請再清楚一點!"
else:
real_score = 0
comment = "AI 聽不太清楚,請再試一次!"
return JSONResponse(content={"status": "success", "score": real_score, "comment": comment})
except Exception as e:
print(f"Server Error: {str(e)}")
return JSONResponse(content={"status": "error", "message": str(e)})
finally:
# 清理暫存檔
if user_path and os.path.exists(user_path):
try: os.remove(user_path)
except: pass
if wav_path and os.path.exists(wav_path):
try: os.remove(wav_path)
except: pass
# --- 啟動網頁伺服器 ---
app.mount("/", StaticFiles(directory=".", html=True), name="static")
if __name__ == "__main__":
import uvicorn
print("🚀 ASR (Sapolita-Kaldi) 伺服器啟動中... (http://localhost:8000)")
uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)