Spaces:

XiaoBai1221
/

Bloom_Ware

Sleeping

App Files Files Community

Bloom_Ware / routers /voice.py

XiaoBai1221

Good

91f3927 5 months ago

raw

history blame contribute delete

7.53 kB

	"""
	語音相關 API 路由
	包含語音登入、TTS、STT 等
	"""

	import logging
	from typing import Optional
	from fastapi import APIRouter, HTTPException, Depends
	from pydantic import BaseModel

	from core.auth import require_auth
	from core.database import set_user_speaker_label, get_user_by_speaker_label

	logger = logging.getLogger("routers.voice")

	router = APIRouter(prefix="/api/voice", tags=["語音"])


	class SpeakerLabelBindRequest(BaseModel):
	"""綁定語音標籤請求"""
	speaker_label: str


	class TTSRequest(BaseModel):
	"""TTS 請求"""
	text: str
	voice: str = "coral"
	speed: float = 1.0
	emotion: Optional[str] = None # 情緒標籤（neutral, happy, sad, angry, fear, surprise）
	care_mode: bool = False # 是否為關懷模式


	@router.post("/bind-speaker")
	async def bind_speaker_label(
	request: SpeakerLabelBindRequest,
	user: dict = Depends(require_auth)
	):
	"""
	綁定語音標籤到用戶帳號
	"""
	user_id = user.get("sub")
	if not user_id:
	raise HTTPException(status_code=401, detail="無效的用戶")

	result = await set_user_speaker_label(user_id, request.speaker_label)

	if not result.get("success"):
	error = result.get("error")
	if error == "SPEAKER_LABEL_TAKEN":
	raise HTTPException(status_code=409, detail="此語音標籤已被其他用戶綁定")
	elif error == "USER_NOT_FOUND":
	raise HTTPException(status_code=404, detail="用戶不存在")
	else:
	raise HTTPException(status_code=500, detail=error)

	return {"success": True, "message": "語音標籤綁定成功"}


	@router.get("/lookup-speaker/{speaker_label}")
	async def lookup_speaker(speaker_label: str):
	"""
	根據語音標籤查找用戶（用於語音登入）
	"""
	user = await get_user_by_speaker_label(speaker_label)

	if not user:
	raise HTTPException(status_code=404, detail="找不到對應的用戶")

	return {
	"success": True,
	"user": {
	"id": user.get("id"),
	"name": user.get("name"),
	}
	}


	@router.post("/tts")
	async def text_to_speech(
	request: TTSRequest,
	user: dict = Depends(require_auth)
	):
	"""
	文字轉語音
	"""
	try:
	from services.tts_service import tts_service

	result = await tts_service.synthesize(
	text=request.text,
	voice=request.voice,
	speed=request.speed,
	emotion=request.emotion,
	care_mode=request.care_mode,
	)

	if not result.get("success"):
	raise HTTPException(status_code=500, detail=result.get("error"))

	# 返回 base64 編碼的音頻
	import base64
	audio_base64 = base64.b64encode(result["audio_data"]).decode("utf-8")

	return {
	"success": True,
	"audio": audio_base64,
	"voice": result.get("voice"),
	}

	except ImportError:
	raise HTTPException(status_code=503, detail="TTS 服務不可用")
	except Exception as e:
	logger.exception(f"TTS 失敗: {e}")
	raise HTTPException(status_code=500, detail=str(e))


	class VoiceLoginRequest(BaseModel):
	"""語音登入請求"""
	audio_base64: str # base64 編碼的 PCM16 音訊
	sample_rate: int = 16000


	class VoiceLoginResponse(BaseModel):
	"""語音登入回應"""
	success: bool
	access_token: str = None
	user: dict = None
	emotion: str = None
	error: str = None


	@router.post("/login", response_model=VoiceLoginResponse)
	async def voice_login(request: VoiceLoginRequest):
	"""
	語音登入 API

	流程：
	1. 接收 base64 編碼的音訊
	2. 執行身份辨識 + 情緒辨識
	3. 查詢 speaker_label 對應的用戶
	4. 生成 JWT token
	5. 回傳 token + 情緒
	"""
	import base64
	import jwt
	from datetime import datetime, timedelta
	from core.config import settings

	try:
	# 取得 VoiceAuthService 實例
	from fastapi import Request
	from main import app

	voice_auth = getattr(app.state, "voice_auth", None)
	if not voice_auth:
	# 嘗試動態建立
	from services.voice_login import VoiceAuthService, VoiceLoginConfig
	voice_auth = VoiceAuthService(config=VoiceLoginConfig(
	window_seconds=3,
	required_windows=1,
	))

	# 解碼音訊
	audio_bytes = base64.b64decode(request.audio_base64)

	# 建立臨時 session 並處理音訊
	temp_user_id = f"voice_login_{datetime.now().timestamp()}"
	voice_auth.start_session(temp_user_id, request.sample_rate)
	voice_auth._buffers[temp_user_id] = bytearray(audio_bytes)

	# 執行辨識
	result = voice_auth.stop_and_authenticate(temp_user_id)

	# 清理 session
	voice_auth.clear_session(temp_user_id)

	if not result.get("success"):
	error_code = result.get("error", "UNKNOWN_ERROR")
	error_messages = {
	"NO_AUDIO": "沒有收到音訊資料",
	"AUDIO_TOO_SHORT": "音訊太短，請錄製至少 3 秒",
	"LOW_SNR": "環境太吵，請在安靜的地方重試",
	"INCONSISTENT_WINDOWS": "無法確認身份，請重試",
	"THRESHOLD_NOT_MET": "無法確認身份，請重試",
	"MODEL_ERROR": "辨識系統錯誤，請稍後重試",
	}
	return VoiceLoginResponse(
	success=False,
	error=error_messages.get(error_code, f"辨識失敗：{error_code}")
	)

	# 取得辨識結果
	speaker_label = result.get("label")
	emotion = result.get("emotion", {})
	emotion_label = emotion.get("label", "neutral") if isinstance(emotion, dict) else "neutral"

	logger.info(f"🎙️ 語音辨識成功: speaker={speaker_label}, emotion={emotion_label}")

	# 查詢對應的用戶
	user = await get_user_by_speaker_label(speaker_label)

	if not user:
	return VoiceLoginResponse(
	success=False,
	error=f"找不到綁定的帳號。請先使用 Google 登入並綁定語音。"
	)

	# 生成 JWT token
	user_id = user.get("id")
	user_name = user.get("name", "用戶")
	user_email = user.get("email", "")

	payload = {
	"sub": user_id,
	"name": user_name,
	"email": user_email,
	"iat": datetime.utcnow(),
	"exp": datetime.utcnow() + timedelta(days=7),
	"login_method": "voice",
	"emotion": emotion_label,
	}

	token = jwt.encode(payload, settings.JWT_SECRET, algorithm="HS256")

	logger.info(f"✅ 語音登入成功: user={user_name}, emotion={emotion_label}")

	return VoiceLoginResponse(
	success=True,
	access_token=token,
	user={
	"id": user_id,
	"name": user_name,
	"email": user_email,
	},
	emotion=emotion_label,
	)

	except Exception as e:
	logger.exception(f"❌ 語音登入失敗: {e}")
	return VoiceLoginResponse(
	success=False,
	error=f"系統錯誤：{str(e)}"
	)