Spaces:
Running
on
Zero
Running
on
Zero
File size: 18,125 Bytes
1b3ab7b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 |
# %%writefile smart_breed_filter.py
"""
Smart Breed Filter - 智慧品種過濾系統
設計原則:
1. 只對「真正危害用戶」的情況進行干預
2. 無傷大雅的偏好差異維持原有評分邏輯
3. 所有規則基於通用性設計,不針對特定品種硬編碼
危害類型:
- 安全風險:幼童 + 高風險行為特徵
- 生活品質嚴重影響:噪音零容忍 + 焦慮/警戒吠叫品種
"""
from typing import Dict, List, Tuple, Optional, Set
from dataclasses import dataclass
from breed_noise_info import breed_noise_info
@dataclass
class UserPriorityContext:
"""用戶優先級上下文"""
noise_intolerance: bool = False # 噪音零容忍
has_young_children: bool = False # 有幼童
is_beginner: bool = False # 新手
is_senior: bool = False # 老年人
priority_dimensions: Dict[str, str] = None # 各維度優先級
def __post_init__(self):
if self.priority_dimensions is None:
self.priority_dimensions = {}
class PriorityParser:
"""
優先級語意解析器
識別用戶是否對某些維度有「絕對需求」vs「一般偏好」
只在用戶明確強調時才觸發嚴格約束
"""
# 絕對需求信號詞
ABSOLUTE_SIGNALS = [
'most importantly', 'absolutely need', 'must have', 'essential',
'critical', 'cannot', "can't", 'no way', 'zero tolerance',
'very noise sensitive', 'neighbors complain', 'thin walls'
]
# 主要需求信號詞
PRIMARY_SIGNALS = [
'first', 'primarily', 'main priority', 'most important',
'first priority', 'number one'
]
# 維度關鍵詞
DIMENSION_KEYWORDS = {
'noise': ['quiet', 'noise', 'bark', 'silent', 'neighbors',
'thin walls', 'apartment noise', 'loud', 'vocal'],
'children': ['kids', 'children', 'child', 'toddler', 'baby',
'infant', 'young kids', 'aged 1', 'aged 2', 'aged 3',
'aged 4', 'aged 5', 'preschool'],
'exercise': ['active', 'exercise', 'running', 'hiking', 'energetic',
'athletic', 'jogging', 'outdoor activities'],
'grooming': ['maintenance', 'grooming', 'shedding', 'brush', 'coat',
'low maintenance', 'easy care'],
}
def parse(self, user_input: str) -> UserPriorityContext:
"""解析用戶輸入,提取優先級上下文"""
text = user_input.lower()
context = UserPriorityContext()
# 檢測噪音零容忍
context.noise_intolerance = self._detect_noise_intolerance(text)
# 檢測是否有幼童
context.has_young_children = self._detect_young_children(text)
# 檢測各維度優先級
context.priority_dimensions = self._detect_dimension_priorities(text)
return context
def _detect_noise_intolerance(self, text: str) -> bool:
"""
檢測噪音零容忍
只有當用戶明確表達噪音是嚴重問題時才觸發
例如:thin walls, neighbors complain, noise sensitive neighbors
"""
# 強烈噪音敏感信號
strong_signals = [
'thin walls', 'noise sensitive', 'neighbors complain',
'zero tolerance', 'cannot bark', "can't bark",
'absolutely quiet', 'must be quiet', 'noise restriction'
]
# 需要同時出現「噪音相關詞」+「強調詞」
noise_words = ['quiet', 'noise', 'bark', 'silent', 'loud']
emphasis_words = ['most importantly', 'absolutely', 'must', 'essential',
'critical', 'very', 'extremely', 'cannot', "can't"]
# 檢查強烈信號
if any(signal in text for signal in strong_signals):
return True
# 檢查組合:噪音詞 + 強調詞
has_noise_word = any(w in text for w in noise_words)
has_emphasis = any(w in text for w in emphasis_words)
return has_noise_word and has_emphasis
def _detect_young_children(self, text: str) -> bool:
"""
檢測是否有幼童或一般兒童
對於兒童安全,我們採取保守策略:
- 明確提到 kids/children 就視為有兒童風險需要考慮
- 因為牧羊本能的 nipping 對任何年齡兒童都有風險
"""
# 任何提到兒童的情況都需要考慮安全
child_signals = [
'kids', 'children', 'child', 'toddler', 'baby', 'infant',
'young kids', 'young children',
'aged 1', 'aged 2', 'aged 3', 'aged 4', 'aged 5',
'1 year', '2 year', '3 year', '4 year', '5 year',
'preschool', 'newborn', 'family with'
]
return any(signal in text for signal in child_signals)
def _detect_dimension_priorities(self, text: str) -> Dict[str, str]:
"""檢測各維度的優先級"""
priorities = {}
for dimension, keywords in self.DIMENSION_KEYWORDS.items():
if any(kw in text for kw in keywords):
# 檢查是否有絕對需求信號
if any(signal in text for signal in self.ABSOLUTE_SIGNALS):
# 檢查信號是否與該維度相關(在附近)
for signal in self.ABSOLUTE_SIGNALS:
if signal in text:
signal_pos = text.find(signal)
for kw in keywords:
if kw in text:
kw_pos = text.find(kw)
# 如果信號詞和維度關鍵詞距離在50字符內
if abs(signal_pos - kw_pos) < 80:
priorities[dimension] = 'ABSOLUTE'
break
if dimension in priorities:
break
# 檢查是否有主要需求信號
if dimension not in priorities:
if any(signal in text for signal in self.PRIMARY_SIGNALS):
priorities[dimension] = 'PRIMARY'
else:
priorities[dimension] = 'PREFERENCE'
return priorities
class BreedRiskAnalyzer:
"""
品種風險分析器
只分析「真正的危害風險」,不對一般偏好差異進行干預
"""
# 焦慮相關觸發詞(會導致持續吠叫的真正問題)
ANXIETY_TRIGGERS = ['anxiety', 'separation anxiety', 'loneliness']
# 高警戒觸發詞(會導致頻繁吠叫)
HIGH_ALERT_TRIGGERS = ['stranger alerts', 'strangers approaching',
'suspicious activity', 'territorial defense',
'protecting territory']
# 牧羊/追逐本能(對幼童有 nipping 風險)
HERDING_INDICATORS = ['herding instincts', 'herding', 'nipping']
# 獵物驅動(可能追逐小孩)
PREY_DRIVE_INDICATORS = ['prey drive', 'prey sighting', 'chase']
def analyze_noise_risk(self, breed_info: Dict, noise_info: Dict) -> Dict:
"""
分析品種的噪音風險
只標記「真正會造成問題」的品種:
- 有焦慮吠叫傾向(持續性問題)
- 高度警戒吠叫(頻繁問題)
不標記:
- 偶爾興奮吠叫(正常狗行為)
- 打招呼吠叫(短暫且可控)
"""
noise_notes = noise_info.get('noise_notes', '').lower()
noise_level = noise_info.get('noise_level', 'Moderate').lower()
temperament = breed_info.get('Temperament', '').lower()
risk_factors = []
# 1. 焦慮觸發 - 這是真正的問題(持續性吠叫)
has_anxiety = any(t in noise_notes for t in self.ANXIETY_TRIGGERS)
if has_anxiety:
risk_factors.append('anxiety_barking')
# 2. 高度警戒 - 頻繁吠叫風險
has_high_alert = any(t in noise_notes for t in self.HIGH_ALERT_TRIGGERS)
if has_high_alert:
risk_factors.append('high_alert_barking')
# 3. 敏感性格 + 焦慮觸發的組合(更嚴重)
is_sensitive = 'sensitive' in temperament
if is_sensitive and has_anxiety:
risk_factors.append('sensitive_anxiety_combo')
# 4. 基礎噪音等級高
if noise_level in ['high', 'moderate-high', 'moderate to high']:
risk_factors.append('high_base_noise')
# 計算風險等級
# 只有真正問題的組合才是 HIGH
if 'sensitive_anxiety_combo' in risk_factors:
risk_level = 'HIGH'
elif 'anxiety_barking' in risk_factors and 'high_alert_barking' in risk_factors:
risk_level = 'HIGH'
elif 'anxiety_barking' in risk_factors or len(risk_factors) >= 2:
risk_level = 'MODERATE'
elif len(risk_factors) >= 1:
risk_level = 'LOW'
else:
risk_level = 'NONE'
return {
'risk_level': risk_level,
'risk_factors': risk_factors
}
def analyze_child_safety_risk(self, breed_info: Dict, noise_info: Dict) -> Dict:
"""
分析品種對幼童的安全風險
只標記「真正的安全風險」:
- 牧羊本能(nipping 風險)
- 高獵物驅動 + 大體型(追逐風險)
- Good with Children = No 且有其他風險因素
不標記:
- 只是體型大但性格溫和
- 活力高但無追逐/牧羊本能
"""
temperament = breed_info.get('Temperament', '').lower()
description = breed_info.get('Description', '').lower()
noise_notes = noise_info.get('noise_notes', '').lower()
size = breed_info.get('Size', '').lower()
good_with_children = breed_info.get('Good with Children', 'Yes')
exercise = breed_info.get('Exercise Needs', '').lower()
risk_factors = []
# 1. 牧羊本能 - 真正的 nipping 風險
has_herding = any(ind in noise_notes or ind in description
for ind in self.HERDING_INDICATORS)
if has_herding:
risk_factors.append('herding_instinct')
# 2. 獵物驅動 - 追逐風險
has_prey_drive = any(ind in noise_notes or ind in description
for ind in self.PREY_DRIVE_INDICATORS)
if has_prey_drive:
risk_factors.append('prey_drive')
# 3. Good with Children = No 是強烈信號
if good_with_children == 'No':
risk_factors.append('not_child_friendly')
# 4. 大體型 + 高驅動 + 牧羊/獵物本能的組合才是風險
is_large = size in ['large', 'giant']
is_very_high_energy = 'very high' in exercise
if is_large and (has_herding or has_prey_drive) and is_very_high_energy:
risk_factors.append('large_high_drive_instinct')
# 計算風險等級
# 只有真正危險的組合才是 HIGH
if 'not_child_friendly' in risk_factors and len(risk_factors) >= 2:
risk_level = 'HIGH'
elif 'large_high_drive_instinct' in risk_factors:
risk_level = 'HIGH'
elif 'herding_instinct' in risk_factors and is_very_high_energy:
# 牧羊本能 + 高能量 = 對兒童的真正風險(nipping + 控制不住)
risk_level = 'HIGH'
elif 'herding_instinct' in risk_factors or 'prey_drive' in risk_factors:
# 單獨的牧羊或獵物本能仍是中等風險
risk_level = 'MODERATE'
elif 'not_child_friendly' in risk_factors:
risk_level = 'MODERATE'
elif len(risk_factors) >= 1:
risk_level = 'LOW'
else:
risk_level = 'NONE'
return {
'risk_level': risk_level,
'risk_factors': risk_factors
}
class SmartBreedFilter:
"""
智慧品種過濾器
整合優先級解析和風險分析,只對真正危害用戶的情況進行干預
"""
def __init__(self):
self.priority_parser = PriorityParser()
self.risk_analyzer = BreedRiskAnalyzer()
def analyze_user_context(self, user_input: str) -> UserPriorityContext:
"""分析用戶輸入,提取優先級上下文"""
return self.priority_parser.parse(user_input)
def should_exclude_breed(self, breed_info: Dict, noise_info: Dict,
user_context: UserPriorityContext) -> Tuple[bool, str]:
"""
判斷是否應該排除該品種
返回: (是否排除, 排除原因)
"""
# 1. 噪音零容忍 + 高噪音風險
if user_context.noise_intolerance:
noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info)
if noise_risk['risk_level'] == 'HIGH':
return True, f"High noise risk ({', '.join(noise_risk['risk_factors'])}) conflicts with noise intolerance"
# 2. 有幼童 + 高兒童安全風險
if user_context.has_young_children:
child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info)
if child_risk['risk_level'] == 'HIGH':
return True, f"Child safety risk ({', '.join(child_risk['risk_factors'])}) with young children"
return False, ""
def calculate_risk_penalty(self, breed_info: Dict, noise_info: Dict,
user_context: UserPriorityContext) -> float:
"""
計算風險懲罰分數
只對中等風險進行輕微降權,不排除
返回: 懲罰係數 (0.0 - 0.3)
"""
penalty = 0.0
# 噪音相關懲罰(只在用戶關注噪音時)
if 'noise' in user_context.priority_dimensions:
noise_risk = self.risk_analyzer.analyze_noise_risk(breed_info, noise_info)
if noise_risk['risk_level'] == 'MODERATE':
penalty += 0.1
elif noise_risk['risk_level'] == 'HIGH' and not user_context.noise_intolerance:
penalty += 0.15
# 兒童安全相關懲罰(只在用戶有孩子時)
if 'children' in user_context.priority_dimensions or user_context.has_young_children:
child_risk = self.risk_analyzer.analyze_child_safety_risk(breed_info, noise_info)
if child_risk['risk_level'] == 'MODERATE':
penalty += 0.1
elif child_risk['risk_level'] == 'HIGH' and not user_context.has_young_children:
penalty += 0.15
return min(penalty, 0.3) # 最大懲罰 30%
def filter_and_adjust_recommendations(self, recommendations: List[Dict],
user_input: str) -> List[Dict]:
"""
過濾並調整推薦結果
這是主要入口函數,整合所有過濾和調整邏輯
"""
user_context = self.analyze_user_context(user_input)
filtered_recommendations = []
for rec in recommendations:
breed = rec.get('breed', '')
# 智能獲取品種資訊:優先從 info 欄位,否則從 rec 本身,最後從資料庫
breed_info = rec.get('info')
if not breed_info:
# 嘗試從 rec 中構建標準化的 breed_info(處理大小寫差異)
breed_info = {
'Temperament': rec.get('Temperament', rec.get('temperament', '')),
'Description': rec.get('Description', rec.get('description', '')),
'Size': rec.get('Size', rec.get('size', '')),
'Exercise Needs': rec.get('Exercise Needs', rec.get('exercise_needs', '')),
'Good with Children': rec.get('Good with Children', rec.get('good_with_children', 'Yes')),
'Care Level': rec.get('Care Level', rec.get('care_level', '')),
}
# 如果關鍵資訊缺失,從資料庫獲取
if not breed_info['Temperament'] and not breed_info['Description']:
from dog_database import get_dog_description
db_info = get_dog_description(breed.replace(' ', '_'))
if db_info:
breed_info = db_info
# 獲取噪音資訊(嘗試兩種品種名稱格式)
noise_info = breed_noise_info.get(breed) or breed_noise_info.get(breed.replace(' ', '_'), {
'noise_notes': '',
'noise_level': 'Moderate'
})
# 檢查是否應該排除
should_exclude, reason = self.should_exclude_breed(
breed_info, noise_info, user_context
)
if should_exclude:
print(f" [SmartFilter] Excluded {breed}: {reason}")
continue
# 計算風險懲罰
penalty = self.calculate_risk_penalty(breed_info, noise_info, user_context)
if penalty > 0:
original_score = rec.get('final_score', rec.get('overall_score', 0.8))
adjusted_score = original_score * (1 - penalty)
rec['final_score'] = adjusted_score
rec['risk_penalty'] = penalty
filtered_recommendations.append(rec)
# 重新排序
filtered_recommendations.sort(key=lambda x: -x.get('final_score', 0))
# 更新排名
for i, rec in enumerate(filtered_recommendations):
rec['rank'] = i + 1
return filtered_recommendations
# 模組級便捷函數
_smart_filter = None
def get_smart_filter() -> SmartBreedFilter:
"""獲取單例過濾器"""
global _smart_filter
if _smart_filter is None:
_smart_filter = SmartBreedFilter()
return _smart_filter
def apply_smart_filtering(recommendations: List[Dict], user_input: str) -> List[Dict]:
"""便捷函數:應用智慧過濾"""
return get_smart_filter().filter_and_adjust_recommendations(recommendations, user_input)
|