import os import time import shutil import subprocess import difflib # 文字比對用 from fastapi import FastAPI, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from fastapi.staticfiles import StaticFiles from gradio_client import Client, handle_file # ✨ 記得匯入 handle_file app = FastAPI() # 允許跨域請求 app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) UPLOAD_DIR = "temp_uploads" if not os.path.exists(UPLOAD_DIR): os.makedirs(UPLOAD_DIR) # 格式為: "帳號名稱/專案名稱" ASR_API_URL = "ithuan/sapolita-kaldi" # --- 核心 1: 轉檔 (WebM -> WAV) --- def convert_webm_to_wav(webm_path): wav_path = webm_path.replace(".webm", ".wav") # 判斷 ffmpeg 指令 if os.path.exists("ffmpeg.exe"): ffmpeg_cmd = "ffmpeg.exe" else: ffmpeg_cmd = "ffmpeg" # ASR 需要 16kHz 單聲道 cmd = f'{ffmpeg_cmd} -y -i "{webm_path}" -ar 16000 -ac 1 "{wav_path}" -loglevel quiet' try: subprocess.run(cmd, shell=True, check=True) return wav_path except: print("❌ FFmpeg 轉檔失敗,請確認 ffmpeg.exe 是否存在") return None # --- 核心 2: 呼叫外部 ASR API (依照您提供的新邏輯) --- def speech_to_text(audio_path): print(f"📡 正在傳送音檔至 ASR: {ASR_API_URL} ...") try: client = Client(ASR_API_URL) # ✨ 設定太魯閣語代碼 lang_code = "formosan_trv" # ✨ 呼叫 API: 參數順序是 (語言代碼, 音檔) # 並且使用 handle_file 包裝音檔 result = client.predict( lang_code, # 參數 1: 語言代碼 handle_file(audio_path), # 參數 2: 音檔 api_name="/automatic_speech_recognition" ) # result 通常回傳純文字,或是包含文字的 Tuple # 這裡做個轉型確保安全 text_result = str(result).strip() print(f"👂 ASR 聽到: {text_result}") return text_result except Exception as e: print(f"❌ ASR API 呼叫失敗: {e}") return None # --- 核心 3: 文字相似度評分 --- def calculate_text_score(user_text, target_text): if not user_text or user_text == "None": return 0 # 1. 統一轉小寫並去除標點 (太魯閣語常有 ? 或 !) u = user_text.lower().replace("?", "").replace("!", "").replace(".", "").strip() t = target_text.lower().replace("?", "").replace("!", "").replace(".", "").strip() print(f"🔍 比對文字:\n - 學生說: {u}\n - 正確答案: {t}") # 2. 完全命中 if u == t: return 100 # 3. 模糊比對 similarity = difflib.SequenceMatcher(None, u, t).ratio() score = int(similarity * 100) # 鼓勵機制:只要有開口,且辨識出東西,就加一點分 if score > 0: score = min(100, score + 10) return score # --- API 接口 --- @app.post("/api/score") async def score_audio(file: UploadFile = File(...), target_word: str = Form(...)): user_path = None wav_path = None try: # 1. 存下使用者錄音 filename = f"user_{int(time.time())}.webm" user_path = os.path.join(UPLOAD_DIR, filename) with open(user_path, "wb") as f: f.write(await file.read()) # 2. 轉檔 wav_path = convert_webm_to_wav(user_path) if not wav_path: return JSONResponse(content={"status": "error", "message": "轉檔失敗"}) # 3. 呼叫 ASR asr_result = speech_to_text(wav_path) # 4. 評分 if asr_result: real_score = calculate_text_score(asr_result, target_word) # 依分數給評語 if real_score == 100: comment = f"太強了!完全正確!(辨識: {asr_result})" elif real_score >= 80: comment = f"發音很標準喔!(辨識: {asr_result})" elif real_score >= 60: comment = f"很接近了,加油!(辨識: {asr_result})" else: comment = f"AI 聽成:{asr_result},請再清楚一點!" else: real_score = 0 comment = "AI 聽不太清楚,請再試一次!" return JSONResponse(content={"status": "success", "score": real_score, "comment": comment}) except Exception as e: print(f"Server Error: {str(e)}") return JSONResponse(content={"status": "error", "message": str(e)}) finally: # 清理暫存檔 if user_path and os.path.exists(user_path): try: os.remove(user_path) except: pass if wav_path and os.path.exists(wav_path): try: os.remove(wav_path) except: pass # --- 啟動網頁伺服器 --- app.mount("/", StaticFiles(directory=".", html=True), name="static") if __name__ == "__main__": import uvicorn print("🚀 ASR (Sapolita-Kaldi) 伺服器啟動中... (http://localhost:8000)") uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)