Spaces:

DawnC
/

PawMatchAI

Running on Zero

File size: 18,125 Bytes

1b3ab7b

# %%writefile smart_breed_filter.py
"""
Smart Breed Filter - 智慧品種過濾系統

設計原則：
1. 只對「真正危害用戶」的情況進行干預
2. 無傷大雅的偏好差異維持原有評分邏輯
3. 所有規則基於通用性設計，不針對特定品種硬編碼

危害類型：
- 安全風險：幼童 + 高風險行為特徵
- 生活品質嚴重影響：噪音零容忍 + 焦慮/警戒吠叫品種
"""

from typing import Dict, List, Tuple, Optional, Set
from dataclasses import dataclass
from breed_noise_info import breed_noise_info


@dataclass
class UserPriorityContext:
    """用戶優先級上下文"""
    noise_intolerance: bool = False      # 噪音零容忍
    has_young_children: bool = False     # 有幼童
    is_beginner: bool = False            # 新手
    is_senior: bool = False              # 老年人
    priority_dimensions: Dict[str, str] = None  # 各維度優先級

    def __post_init__(self):
        if self.priority_dimensions is None:
            self.priority_dimensions = {}


class PriorityParser:
    """
    優先級語意解析器

    識別用戶是否對某些維度有「絕對需求」vs「一般偏好」
    只在用戶明確強調時才觸發嚴格約束
    """

    # 絕對需求信號詞
    ABSOLUTE_SIGNALS = [
        'most importantly', 'absolutely need', 'must have', 'essential',
        'critical', 'cannot', "can't", 'no way', 'zero tolerance',
        'very noise sensitive', 'neighbors complain', 'thin walls'
    ]

    # 主要需求信號詞
    PRIMARY_SIGNALS = [
        'first', 'primarily', 'main priority', 'most important',
        'first priority', 'number one'
    ]

    # 維度關鍵詞
    DIMENSION_KEYWORDS = {
        'noise': ['quiet', 'noise', 'bark', 'silent', 'neighbors',
                  'thin walls', 'apartment noise', 'loud', 'vocal'],
        'children': ['kids', 'children', 'child', 'toddler', 'baby',
                    'infant', 'young kids', 'aged 1', 'aged 2', 'aged 3',
                    'aged 4', 'aged 5', 'preschool'],
        'exercise': ['active', 'exercise', 'running', 'hiking', 'energetic',
                    'athletic', 'jogging', 'outdoor activities'],
        'grooming': ['maintenance', 'grooming', 'shedding', 'brush', 'coat',
                    'low maintenance', 'easy care'],
    }

    def parse(self, user_input: str) -> UserPriorityContext:
        """解析用戶輸入，提取優先級上下文"""
        text = user_input.lower()
        context = UserPriorityContext()

        # 檢測噪音零容忍
        context.noise_intolerance = self._detect_noise_intolerance(text)

        # 檢測是否有幼童
        context.has_young_children = self._detect_young_children(text)

        # 檢測各維度優先級
        context.priority_dimensions = self._detect_dimension_priorities(text)

        return context

    def _detect_noise_intolerance(self, text: str) -> bool:
        """
        檢測噪音零容忍

        只有當用戶明確表達噪音是嚴重問題時才觸發
        例如：thin walls, neighbors complain, noise sensitive neighbors
        """
        # 強烈噪音敏感信號
        strong_signals = [
            'thin walls', 'noise sensitive', 'neighbors complain',
            'zero tolerance', 'cannot bark', "can't bark",
            'absolutely quiet', 'must be quiet', 'noise restriction'
        ]

        # 需要同時出現「噪音相關詞」+「強調詞」
        noise_words = ['quiet', 'noise', 'bark', 'silent', 'loud']
        emphasis_words = ['most importantly', 'absolutely', 'must', 'essential',
                         'critical', 'very', 'extremely', 'cannot', "can't"]

        # 檢查強烈信號
        if any(signal in text for signal in strong_signals):
            return True

        # 檢查組合：噪音詞 + 強調詞
        has_noise_word = any(w in text for w in noise_words)
        has_emphasis = any(w in text for w in emphasis_words)

        return has_noise_word and has_emphasis

    def _detect_young_children(self, text: str) -> bool:
        """
        檢測是否有幼童或一般兒童

        對於兒童安全，我們採取保守策略：
        - 明確提到 kids/children 就視為有兒童風險需要考慮
        - 因為牧羊本能的 nipping 對任何年齡兒童都有風險
        """
        # 任何提到兒童的情況都需要考慮安全
        child_signals = [
            'kids', 'children', 'child', 'toddler', 'baby', 'infant',
            'young kids', 'young children',
            'aged 1', 'aged 2', 'aged 3', 'aged 4', 'aged 5',
            '1 year', '2 year', '3 year', '4 year', '5 year',
            'preschool', 'newborn', 'family with'
        ]
        return any(signal in text for signal in child_signals)

    def _detect_dimension_priorities(self, text: str) -> Dict[str, str]:
        """檢測各維度的優先級"""
        priorities = {}

        for dimension, keywords in self.DIMENSION_KEYWORDS.items():
            if any(kw in text for kw in keywords):
                # 檢查是否有絕對需求信號
                if any(signal in text for signal in self.ABSOLUTE_SIGNALS):
                    # 檢查信號是否與該維度相關（在附近）
                    for signal in self.ABSOLUTE_SIGNALS:
                        if signal in text:
                            signal_pos = text.find(signal)
                            for kw in keywords:
                                if kw in text:
                                    kw_pos = text.find(kw)
                                    # 如果信號詞和維度關鍵詞距離在50字符內
                                    if abs(signal_pos - kw_pos) < 80:
                                        priorities[dimension] = 'ABSOLUTE'
                                        break
                            if dimension in priorities:
                                break

                # 檢查是否有主要需求信號
                if dimension not in priorities:
                    if any(signal in text for signal in self.PRIMARY_SIGNALS):
                        priorities[dimension] = 'PRIMARY'
                    else:
                        priorities[dimension] = 'PREFERENCE'

        return priorities


class BreedRiskAnalyzer:
    """
    品種風險分析器

    只分析「真正的危害風險」，不對一般偏好差異進行干預
    """

    # 焦慮相關觸發詞（會導致持續吠叫的真正問題）
    ANXIETY_TRIGGERS = ['anxiety', 'separation anxiety', 'loneliness']

    # 高警戒觸發詞（會導致頻繁吠叫）
    HIGH_ALERT_TRIGGERS = ['stranger alerts', 'strangers approaching',
                           'suspicious activity', 'territorial defense',
                           'protecting territory']

    # 牧羊/追逐本能（對幼童有 nipping 風險）
    HERDING_INDICATORS = ['herding instincts', 'herding', 'nipping']

    # 獵物驅動（可能追逐小孩）
    PREY_DRIVE_INDICATORS = ['prey drive', 'prey sighting', 'chase']

    def analyze_noise_risk(self, breed_info: Dict, noise_info: Dict) -> Dict:
        """
        分析品種的噪音風險

        只標記「真正會造成問題」的品種：
        - 有焦慮吠叫傾向（持續性問題）
        - 高度警戒吠叫（頻繁問題）

        不標記：
        - 偶爾興奮吠叫（正常狗行為）
        - 打招呼吠叫（短暫且可控）
        """
        noise_notes = noise_info.get('noise_notes', '').lower()
        noise_level = noise_info.get('noise_level', 'Moderate').lower()
        temperament = breed_info.get('Temperament', '').lower()

        risk_factors = []

        # 1. 焦慮觸發 - 這是真正的問題（持續性吠叫）
        has_anxiety = any(t in noise_notes for t in self.ANXIETY_TRIGGERS)
        if has_anxiety:
            risk_factors.append('anxiety_barking')

        # 2. 高度警戒 - 頻繁吠叫風險
        has_high_alert = any(t in noise_notes for t in self.HIGH_ALERT_TRIGGERS)
        if has_high_alert:
            risk_factors.append('high_alert_barking')

        # 3. 敏感性格 + 焦慮觸發的組合（更嚴重）
        is_sensitive = 'sensitive' in temperament
        if is_sensitive and has_anxiety:
            risk_factors.append('sensitive_anxiety_combo')

        # 4. 基礎噪音等級高
        if noise_level in ['high', 'moderate-high', 'moderate to high']:
            risk_factors.append('high_base_noise')

        # 計算風險等級
        # 只有真正問題的組合才是 HIGH
        if 'sensitive_anxiety_combo' in risk_factors:
            risk_level = 'HIGH'
        elif 'anxiety_barking' in risk_factors and 'high_alert_barking' in risk_factors:
            risk_level = 'HIGH'
        elif 'anxiety_barking' in risk_factors or len(risk_factors) >= 2:
            risk_level = 'MODERATE'
        elif len(risk_factors) >= 1:
            risk_level = 'LOW'
        else:
            risk_level = 'NONE'

        return {
            'risk_level': risk_level,
            'risk_factors': risk_factors
        }

    def analyze_child_safety_risk(self, breed_info: Dict, noise_info: Dict) -> Dict:
        """
        分析品種對幼童的安全風險

        只標記「真正的安全風險」：
        - 牧羊本能（nipping 風險）
        - 高獵物驅動 + 大體型（追逐風險）
        - Good with Children = No 且有其他風險因素

        不標記：
        - 只是體型大但性格溫和
        - 活力高但無追逐/牧羊本能
        """
        temperament = breed_info.get('Temperament', '').lower()
        description = breed_info.get('Description', '').lower()
        noise_notes = noise_info.get('noise_notes', '').lower()
        size = breed_info.get('Size', '').lower()
        good_with_children = breed_info.get('Good with Children', 'Yes')
        exercise = breed_info.get('Exercise Needs', '').lower()

        risk_factors = []

        # 1. 牧羊本能 - 真正的 nipping 風險
        has_herding = any(ind in noise_notes or ind in description
                         for ind in self.HERDING_INDICATORS)
        if has_herding:
            risk_factors.append('herding_instinct')

        # 2. 獵物驅動 - 追逐風險
        has_prey_drive = any(ind in noise_notes or ind in description
                            for ind in self.PREY_DRIVE_INDICATORS)
        if has_prey_drive:
            risk_factors.append('prey_drive')

        # 3. Good with Children = No 是強烈信號
        if good_with_children == 'No':
            risk_factors.append('not_child_friendly')

        # 4. 大體型 + 高驅動 + 牧羊/獵物本能的組合才是風險
        is_large = size in ['large', 'giant']
        is_very_high_energy = 'very high' in exercise

        if is_large and (has_herding or has_prey_drive) and is_very_high_energy:
            risk_factors.append('large_high_drive_instinct')

        # 計算風險等級
        # 只有真正危險的組合才是 HIGH
        if 'not_child_friendly' in risk_factors and len(risk_factors) >= 2:
            risk_level = 'HIGH'
        elif 'large_high_drive_instinct' in risk_factors:
            risk_level = 'HIGH'
        elif 'herding_instinct' in risk_factors and is_very_high_energy:
            # 牧羊本能 + 高能量 = 對兒童的真正風險（nipping + 控制不住）
            risk_level = 'HIGH'
        elif 'herding_instinct' in risk_factors or 'prey_drive' in risk_factors:
            # 單獨的牧羊或獵物本能仍是中等風險
            risk_level = 'MODERATE'
        elif 'not_child_friendly' in risk_factors:
            risk_level = 'MODERATE'
        elif len(risk_factors) >= 1:
            risk_level = 'LOW'
        else:
            risk_level = 'NONE'

        return {
            'risk_level': risk_level,
            'risk_factors': risk_factors
        }


class SmartBreedFilter:
    """
    智慧品種過濾器

    整合優先級解析和風險分析，只對真正危害用戶的情況進行干預
    """

    def __init__(self):
        self.priority_parser = PriorityParser()
        self.risk_analyzer = BreedRiskAnalyzer()

    def analyze_user_context(self, user_input: str) -> UserPriorityContext:
        """分析用戶輸入，提取優先級上下文"""
        return self.priority_parser.parse(user_input)

    def should_exclude_breed(self, breed_info: Dict, noise_info: Dict,
                            user_context: UserPriorityContext) -> Tuple[bool, str]:
        """
        判斷是否應該排除該品種

        返回: (是否排除, 排除原因)
        """
        # 1. 噪音零容忍 + 高噪音風險
        if user_context.noise_intolerance:
            noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info)
            if noise_risk['risk_level'] == 'HIGH':
                return True, f"High noise risk ({', '.join(noise_risk['risk_factors'])}) conflicts with noise intolerance"

        # 2. 有幼童 + 高兒童安全風險
        if user_context.has_young_children:
            child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info)
            if child_risk['risk_level'] == 'HIGH':
                return True, f"Child safety risk ({', '.join(child_risk['risk_factors'])}) with young children"

        return False, ""

    def calculate_risk_penalty(self, breed_info: Dict, noise_info: Dict,
                               user_context: UserPriorityContext) -> float:
        """
        計算風險懲罰分數

        只對中等風險進行輕微降權，不排除
        返回: 懲罰係數 (0.0 - 0.3)
        """
        penalty = 0.0

        # 噪音相關懲罰（只在用戶關注噪音時）
        if 'noise' in user_context.priority_dimensions:
            noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info)
            if noise_risk['risk_level'] == 'MODERATE':
                penalty += 0.1
            elif noise_risk['risk_level'] == 'HIGH' and not user_context.noise_intolerance:
                penalty += 0.15

        # 兒童安全相關懲罰（只在用戶有孩子時）
        if 'children' in user_context.priority_dimensions or user_context.has_young_children:
            child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info)
            if child_risk['risk_level'] == 'MODERATE':
                penalty += 0.1
            elif child_risk['risk_level'] == 'HIGH' and not user_context.has_young_children:
                penalty += 0.15

        return min(penalty, 0.3)  # 最大懲罰 30%

    def filter_and_adjust_recommendations(self, recommendations: List[Dict],
                                          user_input: str) -> List[Dict]:
        """
        過濾並調整推薦結果

        這是主要入口函數，整合所有過濾和調整邏輯
        """
        user_context = self.analyze_user_context(user_input)

        filtered_recommendations = []

        for rec in recommendations:
            breed = rec.get('breed', '')

            # 智能獲取品種資訊：優先從 info 欄位，否則從 rec 本身，最後從資料庫
            breed_info = rec.get('info')
            if not breed_info:
                # 嘗試從 rec 中構建標準化的 breed_info（處理大小寫差異）
                breed_info = {
                    'Temperament': rec.get('Temperament', rec.get('temperament', '')),
                    'Description': rec.get('Description', rec.get('description', '')),
                    'Size': rec.get('Size', rec.get('size', '')),
                    'Exercise Needs': rec.get('Exercise Needs', rec.get('exercise_needs', '')),
                    'Good with Children': rec.get('Good with Children', rec.get('good_with_children', 'Yes')),
                    'Care Level': rec.get('Care Level', rec.get('care_level', '')),
                }
                # 如果關鍵資訊缺失，從資料庫獲取
                if not breed_info['Temperament'] and not breed_info['Description']:
                    from dog_database import get_dog_description
                    db_info = get_dog_description(breed.replace(' ', '_'))
                    if db_info:
                        breed_info = db_info

            # 獲取噪音資訊（嘗試兩種品種名稱格式）
            noise_info = breed_noise_info.get(breed) or breed_noise_info.get(breed.replace(' ', '_'), {
                'noise_notes': '',
                'noise_level': 'Moderate'
            })

            # 檢查是否應該排除
            should_exclude, reason = self.should_exclude_breed(
                breed_info, noise_info, user_context
            )

            if should_exclude:
                print(f"  [SmartFilter] Excluded {breed}: {reason}")
                continue

            # 計算風險懲罰
            penalty = self.calculate_risk_penalty(breed_info, noise_info, user_context)

            if penalty > 0:
                original_score = rec.get('final_score', rec.get('overall_score', 0.8))
                adjusted_score = original_score * (1 - penalty)
                rec['final_score'] = adjusted_score
                rec['risk_penalty'] = penalty

            filtered_recommendations.append(rec)

        # 重新排序
        filtered_recommendations.sort(key=lambda x: -x.get('final_score', 0))

        # 更新排名
        for i, rec in enumerate(filtered_recommendations):
            rec['rank'] = i + 1

        return filtered_recommendations


# 模組級便捷函數
_smart_filter = None

def get_smart_filter() -> SmartBreedFilter:
    """獲取單例過濾器"""
    global _smart_filter
    if _smart_filter is None:
        _smart_filter = SmartBreedFilter()
    return _smart_filter

def apply_smart_filtering(recommendations: List[Dict], user_input: str) -> List[Dict]:
    """便捷函數：應用智慧過濾"""
    return get_smart_filter().filter_and_adjust_recommendations(recommendations, user_input)