Spaces:

Lowking
/

Truku-AI-Game

Sleeping

App Files Files Community

Truku-AI-Game / server.py

Lowking

Update server.py

ee7b99a verified 6 months ago

Raw

History Blame Contribute Delete

5.24 kB

	import os
	import time
	import shutil
	import subprocess
	import difflib # 文字比對用
	from fastapi import FastAPI, UploadFile, File, Form
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from fastapi.staticfiles import StaticFiles
	from gradio_client import Client, handle_file # ✨ 記得匯入 handle_file

	app = FastAPI()

	# 允許跨域請求
	app.add_middleware(
	CORSMiddleware, allow_origins=[""], allow_credentials=True, allow_methods=[""], allow_headers=["*"],
	)

	UPLOAD_DIR = "temp_uploads"
	if not os.path.exists(UPLOAD_DIR): os.makedirs(UPLOAD_DIR)

	# 格式為: "帳號名稱/專案名稱"
	ASR_API_URL = "ithuan/sapolita-kaldi"

	# --- 核心 1: 轉檔 (WebM -> WAV) ---
	def convert_webm_to_wav(webm_path):
	wav_path = webm_path.replace(".webm", ".wav")

	# 判斷 ffmpeg 指令
	if os.path.exists("ffmpeg.exe"): ffmpeg_cmd = "ffmpeg.exe"
	else: ffmpeg_cmd = "ffmpeg"

	# ASR 需要 16kHz 單聲道
	cmd = f'{ffmpeg_cmd} -y -i "{webm_path}" -ar 16000 -ac 1 "{wav_path}" -loglevel quiet'

	try:
	subprocess.run(cmd, shell=True, check=True)
	return wav_path
	except:
	print("❌ FFmpeg 轉檔失敗，請確認 ffmpeg.exe 是否存在")
	return None

	# --- 核心 2: 呼叫外部 ASR API (依照您提供的新邏輯) ---
	def speech_to_text(audio_path):
	print(f"📡 正在傳送音檔至 ASR: {ASR_API_URL} ...")

	try:
	client = Client(ASR_API_URL)

	# ✨ 設定太魯閣語代碼
	lang_code = "formosan_trv"

	# ✨ 呼叫 API: 參數順序是 (語言代碼, 音檔)
	# 並且使用 handle_file 包裝音檔
	result = client.predict(
	lang_code, # 參數 1: 語言代碼
	handle_file(audio_path), # 參數 2: 音檔
	api_name="/automatic_speech_recognition"
	)

	# result 通常回傳純文字，或是包含文字的 Tuple
	# 這裡做個轉型確保安全
	text_result = str(result).strip()
	print(f"👂 ASR 聽到: {text_result}")
	return text_result

	except Exception as e:
	print(f"❌ ASR API 呼叫失敗: {e}")
	return None

	# --- 核心 3: 文字相似度評分 ---
	def calculate_text_score(user_text, target_text):
	if not user_text or user_text == "None": return 0

	# 1. 統一轉小寫並去除標點 (太魯閣語常有 ? 或 !)
	u = user_text.lower().replace("?", "").replace("!", "").replace(".", "").strip()
	t = target_text.lower().replace("?", "").replace("!", "").replace(".", "").strip()

	print(f"🔍 比對文字:\n - 學生說: {u}\n - 正確答案: {t}")

	# 2. 完全命中
	if u == t: return 100

	# 3. 模糊比對
	similarity = difflib.SequenceMatcher(None, u, t).ratio()
	score = int(similarity * 100)

	# 鼓勵機制：只要有開口，且辨識出東西，就加一點分
	if score > 0: score = min(100, score + 10)

	return score

	# --- API 接口 ---
	@app.post("/api/score")
	async def score_audio(file: UploadFile = File(...), target_word: str = Form(...)):
	user_path = None
	wav_path = None
	try:
	# 1. 存下使用者錄音
	filename = f"user_{int(time.time())}.webm"
	user_path = os.path.join(UPLOAD_DIR, filename)
	with open(user_path, "wb") as f:
	f.write(await file.read())

	# 2. 轉檔
	wav_path = convert_webm_to_wav(user_path)
	if not wav_path:
	return JSONResponse(content={"status": "error", "message": "轉檔失敗"})

	# 3. 呼叫 ASR
	asr_result = speech_to_text(wav_path)

	# 4. 評分
	if asr_result:
	real_score = calculate_text_score(asr_result, target_word)

	# 依分數給評語
	if real_score == 100:
	comment = f"太強了！完全正確！(辨識: {asr_result})"
	elif real_score >= 80:
	comment = f"發音很標準喔！(辨識: {asr_result})"
	elif real_score >= 60:
	comment = f"很接近了，加油！(辨識: {asr_result})"
	else:
	comment = f"AI 聽成：{asr_result}，請再清楚一點！"
	else:
	real_score = 0
	comment = "AI 聽不太清楚，請再試一次！"

	return JSONResponse(content={"status": "success", "score": real_score, "comment": comment})

	except Exception as e:
	print(f"Server Error: {str(e)}")
	return JSONResponse(content={"status": "error", "message": str(e)})
	finally:
	# 清理暫存檔
	if user_path and os.path.exists(user_path):
	try: os.remove(user_path)
	except: pass
	if wav_path and os.path.exists(wav_path):
	try: os.remove(wav_path)
	except: pass

	# --- 啟動網頁伺服器 ---
	app.mount("/", StaticFiles(directory=".", html=True), name="static")

	if __name__ == "__main__":
	import uvicorn
	print("🚀 ASR (Sapolita-Kaldi) 伺服器啟動中... (http://localhost:8000)")
	uvicorn.run("server:app", host="0.0.0.0", port=8000, reload=True)