File size: 18,125 Bytes
1b3ab7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
# %%writefile smart_breed_filter.py
"""
Smart Breed Filter - 智慧品種過濾系統

設計原則:
1. 只對「真正危害用戶」的情況進行干預
2. 無傷大雅的偏好差異維持原有評分邏輯
3. 所有規則基於通用性設計,不針對特定品種硬編碼

危害類型:
- 安全風險:幼童 + 高風險行為特徵
- 生活品質嚴重影響:噪音零容忍 + 焦慮/警戒吠叫品種
"""

from typing import Dict, List, Tuple, Optional, Set
from dataclasses import dataclass
from breed_noise_info import breed_noise_info


@dataclass
class UserPriorityContext:
    """用戶優先級上下文"""
    noise_intolerance: bool = False      # 噪音零容忍
    has_young_children: bool = False     # 有幼童
    is_beginner: bool = False            # 新手
    is_senior: bool = False              # 老年人
    priority_dimensions: Dict[str, str] = None  # 各維度優先級

    def __post_init__(self):
        if self.priority_dimensions is None:
            self.priority_dimensions = {}


class PriorityParser:
    """
    優先級語意解析器

    識別用戶是否對某些維度有「絕對需求」vs「一般偏好」
    只在用戶明確強調時才觸發嚴格約束
    """

    # 絕對需求信號詞
    ABSOLUTE_SIGNALS = [
        'most importantly', 'absolutely need', 'must have', 'essential',
        'critical', 'cannot', "can't", 'no way', 'zero tolerance',
        'very noise sensitive', 'neighbors complain', 'thin walls'
    ]

    # 主要需求信號詞
    PRIMARY_SIGNALS = [
        'first', 'primarily', 'main priority', 'most important',
        'first priority', 'number one'
    ]

    # 維度關鍵詞
    DIMENSION_KEYWORDS = {
        'noise': ['quiet', 'noise', 'bark', 'silent', 'neighbors',
                  'thin walls', 'apartment noise', 'loud', 'vocal'],
        'children': ['kids', 'children', 'child', 'toddler', 'baby',
                    'infant', 'young kids', 'aged 1', 'aged 2', 'aged 3',
                    'aged 4', 'aged 5', 'preschool'],
        'exercise': ['active', 'exercise', 'running', 'hiking', 'energetic',
                    'athletic', 'jogging', 'outdoor activities'],
        'grooming': ['maintenance', 'grooming', 'shedding', 'brush', 'coat',
                    'low maintenance', 'easy care'],
    }

    def parse(self, user_input: str) -> UserPriorityContext:
        """解析用戶輸入,提取優先級上下文"""
        text = user_input.lower()
        context = UserPriorityContext()

        # 檢測噪音零容忍
        context.noise_intolerance = self._detect_noise_intolerance(text)

        # 檢測是否有幼童
        context.has_young_children = self._detect_young_children(text)

        # 檢測各維度優先級
        context.priority_dimensions = self._detect_dimension_priorities(text)

        return context

    def _detect_noise_intolerance(self, text: str) -> bool:
        """
        檢測噪音零容忍

        只有當用戶明確表達噪音是嚴重問題時才觸發
        例如:thin walls, neighbors complain, noise sensitive neighbors
        """
        # 強烈噪音敏感信號
        strong_signals = [
            'thin walls', 'noise sensitive', 'neighbors complain',
            'zero tolerance', 'cannot bark', "can't bark",
            'absolutely quiet', 'must be quiet', 'noise restriction'
        ]

        # 需要同時出現「噪音相關詞」+「強調詞」
        noise_words = ['quiet', 'noise', 'bark', 'silent', 'loud']
        emphasis_words = ['most importantly', 'absolutely', 'must', 'essential',
                         'critical', 'very', 'extremely', 'cannot', "can't"]

        # 檢查強烈信號
        if any(signal in text for signal in strong_signals):
            return True

        # 檢查組合:噪音詞 + 強調詞
        has_noise_word = any(w in text for w in noise_words)
        has_emphasis = any(w in text for w in emphasis_words)

        return has_noise_word and has_emphasis

    def _detect_young_children(self, text: str) -> bool:
        """
        檢測是否有幼童或一般兒童

        對於兒童安全,我們採取保守策略:
        - 明確提到 kids/children 就視為有兒童風險需要考慮
        - 因為牧羊本能的 nipping 對任何年齡兒童都有風險
        """
        # 任何提到兒童的情況都需要考慮安全
        child_signals = [
            'kids', 'children', 'child', 'toddler', 'baby', 'infant',
            'young kids', 'young children',
            'aged 1', 'aged 2', 'aged 3', 'aged 4', 'aged 5',
            '1 year', '2 year', '3 year', '4 year', '5 year',
            'preschool', 'newborn', 'family with'
        ]
        return any(signal in text for signal in child_signals)

    def _detect_dimension_priorities(self, text: str) -> Dict[str, str]:
        """檢測各維度的優先級"""
        priorities = {}

        for dimension, keywords in self.DIMENSION_KEYWORDS.items():
            if any(kw in text for kw in keywords):
                # 檢查是否有絕對需求信號
                if any(signal in text for signal in self.ABSOLUTE_SIGNALS):
                    # 檢查信號是否與該維度相關(在附近)
                    for signal in self.ABSOLUTE_SIGNALS:
                        if signal in text:
                            signal_pos = text.find(signal)
                            for kw in keywords:
                                if kw in text:
                                    kw_pos = text.find(kw)
                                    # 如果信號詞和維度關鍵詞距離在50字符內
                                    if abs(signal_pos - kw_pos) < 80:
                                        priorities[dimension] = 'ABSOLUTE'
                                        break
                            if dimension in priorities:
                                break

                # 檢查是否有主要需求信號
                if dimension not in priorities:
                    if any(signal in text for signal in self.PRIMARY_SIGNALS):
                        priorities[dimension] = 'PRIMARY'
                    else:
                        priorities[dimension] = 'PREFERENCE'

        return priorities


class BreedRiskAnalyzer:
    """
    品種風險分析器

    只分析「真正的危害風險」,不對一般偏好差異進行干預
    """

    # 焦慮相關觸發詞(會導致持續吠叫的真正問題)
    ANXIETY_TRIGGERS = ['anxiety', 'separation anxiety', 'loneliness']

    # 高警戒觸發詞(會導致頻繁吠叫)
    HIGH_ALERT_TRIGGERS = ['stranger alerts', 'strangers approaching',
                           'suspicious activity', 'territorial defense',
                           'protecting territory']

    # 牧羊/追逐本能(對幼童有 nipping 風險)
    HERDING_INDICATORS = ['herding instincts', 'herding', 'nipping']

    # 獵物驅動(可能追逐小孩)
    PREY_DRIVE_INDICATORS = ['prey drive', 'prey sighting', 'chase']

    def analyze_noise_risk(self, breed_info: Dict, noise_info: Dict) -> Dict:
        """
        分析品種的噪音風險

        只標記「真正會造成問題」的品種:
        - 有焦慮吠叫傾向(持續性問題)
        - 高度警戒吠叫(頻繁問題)

        不標記:
        - 偶爾興奮吠叫(正常狗行為)
        - 打招呼吠叫(短暫且可控)
        """
        noise_notes = noise_info.get('noise_notes', '').lower()
        noise_level = noise_info.get('noise_level', 'Moderate').lower()
        temperament = breed_info.get('Temperament', '').lower()

        risk_factors = []

        # 1. 焦慮觸發 - 這是真正的問題(持續性吠叫)
        has_anxiety = any(t in noise_notes for t in self.ANXIETY_TRIGGERS)
        if has_anxiety:
            risk_factors.append('anxiety_barking')

        # 2. 高度警戒 - 頻繁吠叫風險
        has_high_alert = any(t in noise_notes for t in self.HIGH_ALERT_TRIGGERS)
        if has_high_alert:
            risk_factors.append('high_alert_barking')

        # 3. 敏感性格 + 焦慮觸發的組合(更嚴重)
        is_sensitive = 'sensitive' in temperament
        if is_sensitive and has_anxiety:
            risk_factors.append('sensitive_anxiety_combo')

        # 4. 基礎噪音等級高
        if noise_level in ['high', 'moderate-high', 'moderate to high']:
            risk_factors.append('high_base_noise')

        # 計算風險等級
        # 只有真正問題的組合才是 HIGH
        if 'sensitive_anxiety_combo' in risk_factors:
            risk_level = 'HIGH'
        elif 'anxiety_barking' in risk_factors and 'high_alert_barking' in risk_factors:
            risk_level = 'HIGH'
        elif 'anxiety_barking' in risk_factors or len(risk_factors) >= 2:
            risk_level = 'MODERATE'
        elif len(risk_factors) >= 1:
            risk_level = 'LOW'
        else:
            risk_level = 'NONE'

        return {
            'risk_level': risk_level,
            'risk_factors': risk_factors
        }

    def analyze_child_safety_risk(self, breed_info: Dict, noise_info: Dict) -> Dict:
        """
        分析品種對幼童的安全風險

        只標記「真正的安全風險」:
        - 牧羊本能(nipping 風險)
        - 高獵物驅動 + 大體型(追逐風險)
        - Good with Children = No 且有其他風險因素

        不標記:
        - 只是體型大但性格溫和
        - 活力高但無追逐/牧羊本能
        """
        temperament = breed_info.get('Temperament', '').lower()
        description = breed_info.get('Description', '').lower()
        noise_notes = noise_info.get('noise_notes', '').lower()
        size = breed_info.get('Size', '').lower()
        good_with_children = breed_info.get('Good with Children', 'Yes')
        exercise = breed_info.get('Exercise Needs', '').lower()

        risk_factors = []

        # 1. 牧羊本能 - 真正的 nipping 風險
        has_herding = any(ind in noise_notes or ind in description
                         for ind in self.HERDING_INDICATORS)
        if has_herding:
            risk_factors.append('herding_instinct')

        # 2. 獵物驅動 - 追逐風險
        has_prey_drive = any(ind in noise_notes or ind in description
                            for ind in self.PREY_DRIVE_INDICATORS)
        if has_prey_drive:
            risk_factors.append('prey_drive')

        # 3. Good with Children = No 是強烈信號
        if good_with_children == 'No':
            risk_factors.append('not_child_friendly')

        # 4. 大體型 + 高驅動 + 牧羊/獵物本能的組合才是風險
        is_large = size in ['large', 'giant']
        is_very_high_energy = 'very high' in exercise

        if is_large and (has_herding or has_prey_drive) and is_very_high_energy:
            risk_factors.append('large_high_drive_instinct')

        # 計算風險等級
        # 只有真正危險的組合才是 HIGH
        if 'not_child_friendly' in risk_factors and len(risk_factors) >= 2:
            risk_level = 'HIGH'
        elif 'large_high_drive_instinct' in risk_factors:
            risk_level = 'HIGH'
        elif 'herding_instinct' in risk_factors and is_very_high_energy:
            # 牧羊本能 + 高能量 = 對兒童的真正風險(nipping + 控制不住)
            risk_level = 'HIGH'
        elif 'herding_instinct' in risk_factors or 'prey_drive' in risk_factors:
            # 單獨的牧羊或獵物本能仍是中等風險
            risk_level = 'MODERATE'
        elif 'not_child_friendly' in risk_factors:
            risk_level = 'MODERATE'
        elif len(risk_factors) >= 1:
            risk_level = 'LOW'
        else:
            risk_level = 'NONE'

        return {
            'risk_level': risk_level,
            'risk_factors': risk_factors
        }


class SmartBreedFilter:
    """
    智慧品種過濾器

    整合優先級解析和風險分析,只對真正危害用戶的情況進行干預
    """

    def __init__(self):
        self.priority_parser = PriorityParser()
        self.risk_analyzer = BreedRiskAnalyzer()

    def analyze_user_context(self, user_input: str) -> UserPriorityContext:
        """分析用戶輸入,提取優先級上下文"""
        return self.priority_parser.parse(user_input)

    def should_exclude_breed(self, breed_info: Dict, noise_info: Dict,
                            user_context: UserPriorityContext) -> Tuple[bool, str]:
        """
        判斷是否應該排除該品種

        返回: (是否排除, 排除原因)
        """
        # 1. 噪音零容忍 + 高噪音風險
        if user_context.noise_intolerance:
            noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info)
            if noise_risk['risk_level'] == 'HIGH':
                return True, f"High noise risk ({', '.join(noise_risk['risk_factors'])}) conflicts with noise intolerance"

        # 2. 有幼童 + 高兒童安全風險
        if user_context.has_young_children:
            child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info)
            if child_risk['risk_level'] == 'HIGH':
                return True, f"Child safety risk ({', '.join(child_risk['risk_factors'])}) with young children"

        return False, ""

    def calculate_risk_penalty(self, breed_info: Dict, noise_info: Dict,
                               user_context: UserPriorityContext) -> float:
        """
        計算風險懲罰分數

        只對中等風險進行輕微降權,不排除
        返回: 懲罰係數 (0.0 - 0.3)
        """
        penalty = 0.0

        # 噪音相關懲罰(只在用戶關注噪音時)
        if 'noise' in user_context.priority_dimensions:
            noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info)
            if noise_risk['risk_level'] == 'MODERATE':
                penalty += 0.1
            elif noise_risk['risk_level'] == 'HIGH' and not user_context.noise_intolerance:
                penalty += 0.15

        # 兒童安全相關懲罰(只在用戶有孩子時)
        if 'children' in user_context.priority_dimensions or user_context.has_young_children:
            child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info)
            if child_risk['risk_level'] == 'MODERATE':
                penalty += 0.1
            elif child_risk['risk_level'] == 'HIGH' and not user_context.has_young_children:
                penalty += 0.15

        return min(penalty, 0.3)  # 最大懲罰 30%

    def filter_and_adjust_recommendations(self, recommendations: List[Dict],
                                          user_input: str) -> List[Dict]:
        """
        過濾並調整推薦結果

        這是主要入口函數,整合所有過濾和調整邏輯
        """
        user_context = self.analyze_user_context(user_input)

        filtered_recommendations = []

        for rec in recommendations:
            breed = rec.get('breed', '')

            # 智能獲取品種資訊:優先從 info 欄位,否則從 rec 本身,最後從資料庫
            breed_info = rec.get('info')
            if not breed_info:
                # 嘗試從 rec 中構建標準化的 breed_info(處理大小寫差異)
                breed_info = {
                    'Temperament': rec.get('Temperament', rec.get('temperament', '')),
                    'Description': rec.get('Description', rec.get('description', '')),
                    'Size': rec.get('Size', rec.get('size', '')),
                    'Exercise Needs': rec.get('Exercise Needs', rec.get('exercise_needs', '')),
                    'Good with Children': rec.get('Good with Children', rec.get('good_with_children', 'Yes')),
                    'Care Level': rec.get('Care Level', rec.get('care_level', '')),
                }
                # 如果關鍵資訊缺失,從資料庫獲取
                if not breed_info['Temperament'] and not breed_info['Description']:
                    from dog_database import get_dog_description
                    db_info = get_dog_description(breed.replace(' ', '_'))
                    if db_info:
                        breed_info = db_info

            # 獲取噪音資訊(嘗試兩種品種名稱格式)
            noise_info = breed_noise_info.get(breed) or breed_noise_info.get(breed.replace(' ', '_'), {
                'noise_notes': '',
                'noise_level': 'Moderate'
            })

            # 檢查是否應該排除
            should_exclude, reason = self.should_exclude_breed(
                breed_info, noise_info, user_context
            )

            if should_exclude:
                print(f"  [SmartFilter] Excluded {breed}: {reason}")
                continue

            # 計算風險懲罰
            penalty = self.calculate_risk_penalty(breed_info, noise_info, user_context)

            if penalty > 0:
                original_score = rec.get('final_score', rec.get('overall_score', 0.8))
                adjusted_score = original_score * (1 - penalty)
                rec['final_score'] = adjusted_score
                rec['risk_penalty'] = penalty

            filtered_recommendations.append(rec)

        # 重新排序
        filtered_recommendations.sort(key=lambda x: -x.get('final_score', 0))

        # 更新排名
        for i, rec in enumerate(filtered_recommendations):
            rec['rank'] = i + 1

        return filtered_recommendations


# 模組級便捷函數
_smart_filter = None

def get_smart_filter() -> SmartBreedFilter:
    """獲取單例過濾器"""
    global _smart_filter
    if _smart_filter is None:
        _smart_filter = SmartBreedFilter()
    return _smart_filter

def apply_smart_filtering(recommendations: List[Dict], user_input: str) -> List[Dict]:
    """便捷函數:應用智慧過濾"""
    return get_smart_filter().filter_and_adjust_recommendations(recommendations, user_input)