Spaces:

XiaoBai1221
/

Bloom_Ware

Running

App Files Files Community

XiaoBai1221 commited on Dec 4, 2025

Commit

3fe95cf

1 Parent(s): d751b82

Play

Browse files

Files changed (2) hide show

core/intent_detector.py +42 -14
services/realtime_stt_service.py +22 -28

core/intent_detector.py CHANGED Viewed

@@ -198,14 +198,19 @@ class IntentDetector:
 - 位置查詢：「我在哪」「where am I」使用 reverse_geocode
 - YouBike 查詢：YouBike/Ubike/微笑單車 使用 tdx_youbike
-【情緒判斷】
-根據用戶消息的語氣判斷情緒：
-- neutral: 平靜、中性
-- happy: 開心、興奮
-- sad: 難過、沮喪
-- angry: 生氣、煩躁
-- fear: 恐懼、擔心
-- surprise: 驚訝、意外"""
     def _parse_function_calling_response(
         self,
@@ -255,13 +260,36 @@ class IntentDetector:
         return False, {"emotion": emotion}
     def _extract_emotion_from_response(self, response: Dict[str, Any]) -> str:
-        """從回應中提取情緒"""
-        # 嘗試從 content 中提取
         content = response.get("content", "")
-        if content:
-            for emotion in self.EMOTIONS:
-                if emotion in content.lower():
-                    return emotion
         return "neutral"

 - 位置查詢：「我在哪」「where am I」使用 reverse_geocode
 - YouBike 查詢：YouBike/Ubike/微笑單車 使用 tdx_youbike
+【情緒判斷 - 重要】
+根據用戶消息的語氣判斷情緒，並在回應開頭以 [EMOTION:xxx] 格式輸出：
+- [EMOTION:neutral] - 平靜、中性、一般詢問
+- [EMOTION:happy] - 開心、興奮、正面情緒（如：我很快樂、太棒了、好開心）
+- [EMOTION:sad] - 難過、沮喪、失落
+- [EMOTION:angry] - 生氣、煩躁、憤怒
+- [EMOTION:fear] - 恐懼、擔心、焦慮
+- [EMOTION:surprise] - 驚訝、意外
+範例：
+- 用戶說「我很快樂」→ 回應開頭必須是 [EMOTION:happy]
+- 用戶說「今天天氣如何」→ 回應開頭必須是 [EMOTION:neutral]
+- 用戶說「我好難過」→ 回應開頭必須是 [EMOTION:sad]"""
     def _parse_function_calling_response(
         self,
         return False, {"emotion": emotion}
     def _extract_emotion_from_response(self, response: Dict[str, Any]) -> str:
+        """從回應中提取情緒
+        優先使用 [EMOTION:xxx] 格式提取，降級使用關鍵字匹配
+        """
+        import re
         content = response.get("content", "")
+        if not content:
+            return "neutral"
+        # 優先：使用正則表達式提取 [EMOTION:xxx] 格式
+        emotion_match = re.search(r'\[EMOTION:(\w+)\]', content, re.IGNORECASE)
+        if emotion_match:
+            extracted = emotion_match.group(1).lower()
+            if extracted in self.EMOTIONS:
+                logger.info(f"🎭 從格式化標籤提取情緒: {extracted}")
+                return extracted
+        # 降級：使用關鍵字匹配（但需要更精確的匹配）
+        content_lower = content.lower()
+        for emotion in self.EMOTIONS:
+            # 使用單詞邊界匹配，避免誤判（如 "not angry" 被判為 angry）
+            pattern = rf'\b{emotion}\b'
+            if re.search(pattern, content_lower):
+                # 檢查是否有否定詞在前面
+                negation_pattern = rf'(not|no|isn\'t|aren\'t|wasn\'t|weren\'t|don\'t|doesn\'t|didn\'t|never|neither)\s+{emotion}'
+                if re.search(negation_pattern, content_lower):
+                    continue  # 跳過被否定的情緒
+                logger.info(f"🎭 從關鍵字提取情緒: {emotion}")
+                return emotion
         return "neutral"

services/realtime_stt_service.py CHANGED Viewed

@@ -45,27 +45,22 @@ class RealtimeSTTService:
         self._receive_task: Optional[asyncio.Task] = None
         self.current_language: str = "zh"
-    def _build_language_prompt(self) -> str:
         """
-        建立語言提示，引導 Whisper 優先識別支援的 5 種語言
-        Whisper 的 prompt 參數可以包含：
-        - 多語言範例文字
-        - 引導模型識別特定語言
         Returns:
-            語言提示字串
         """
-        # 使用多語言範例引導 Whisper（每種語言的常見詞彙）
-        prompt_samples = [
-            "你好",  # 中文
-            "Hello",  # 英文
-            "Halo",  # 印尼文
-            "こんにちは",  # 日文
-            "Xin chào"  # 越南文
-        ]
-        return ", ".join(prompt_samples)
     def _validate_language(self, language: str) -> Optional[str]:
         """
@@ -146,19 +141,22 @@ class RealtimeSTTService:
             self.is_connected = True
             logger.info("✅ 已連接到 OpenAI Realtime API")
-            # 建立語言提示（引導 Whisper 優先識別支援的 5 種語言）
-            language_prompt = self._build_language_prompt()
             # 發送 session 配置（正確格式：需要 session 物件包裹）
             session_config = {
                 "type": "transcription_session.update",
                 "session": {
                     "input_audio_format": "pcm16",
-                    "input_audio_transcription": {
-                        "model": model,
-                        "prompt": language_prompt  # 使用語言提示引導識別
-                        # 不指定 language，讓 Whisper 自動檢測（但透過 prompt 引導）
-                    },
                     "turn_detection": {
                         "type": "server_vad",
                         "threshold": 0.5,
@@ -170,10 +168,6 @@ class RealtimeSTTService:
                     }
                 }
             }
-            # 如果指定了語言，則加入配置
-            if validated_language:
-                session_config["session"]["input_audio_transcription"]["language"] = validated_language
             await self.ws.send(json.dumps(session_config))
             logger.info("📤 已發送 session 配置（含語言引導提示）")

         self._receive_task: Optional[asyncio.Task] = None
         self.current_language: str = "zh"
+    def _build_language_prompt(self, language: Optional[str] = None) -> Optional[str]:
         """
+        建立語言提示
+        注意：不使用具體詞彙（如「你好」「Hello」），避免 Whisper 在靜音或
+        低音量時產生幻覺，將 prompt 中的文字當作轉錄結果輸出。
+        Args:
+            language: 語言代碼（zh/en/id/ja/vi）或 None（自動檢測）
         Returns:
+            語言提示字串，或 None（不使用 prompt）
         """
+        # 不使用 prompt，完全依賴 language 參數和音頻內容
+        # 這樣可以避免 Whisper 幻覺出 prompt 中的文字
+        return None
     def _validate_language(self, language: str) -> Optional[str]:
         """
             self.is_connected = True
             logger.info("✅ 已連接到 OpenAI Realtime API")
             # 發送 session 配置（正確格式：需要 session 物件包裹）
+            # 不使用 prompt 參數，避免 Whisper 幻覺
+            transcription_config = {
+                "model": model,
+            }
+            # 如果指定了語言，加入 language 參數
+            if validated_language:
+                transcription_config["language"] = validated_language
+                logger.info(f"🌐 Whisper 語言設定: {validated_language}")
             session_config = {
                 "type": "transcription_session.update",
                 "session": {
                     "input_audio_format": "pcm16",
+                    "input_audio_transcription": transcription_config,
                     "turn_detection": {
                         "type": "server_vad",
                         "threshold": 0.5,
                     }
                 }
             }
             await self.ws.send(json.dumps(session_config))
             logger.info("📤 已發送 session 配置（含語言引導提示）")