kofdai commited on
Commit
ea2fff6
·
verified ·
1 Parent(s): 5c2d62b

Upload web_search_autonomy.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. web_search_autonomy.py +127 -0
web_search_autonomy.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ import numpy as np
3
+ import re
4
+
5
+ class WebSearchAutonomySystem:
6
+ """
7
+ Web検索の必要性を4層のハイブリッドモデルで自律的に判定します。
8
+ """
9
+
10
+ def __init__(self):
11
+ self.decision_history = []
12
+
13
+ def _check_level1_keywords(self, question: str) -> dict:
14
+ """レベル1:事前判定(キーワードベース)"""
15
+ triggers = {
16
+ "temporal": ["2025年", "最新", "今年", "最近", "昨日", "今週", "現在", "今", "最新版", "新規", "新しい", "更新"],
17
+ "current_events": ["ニュース", "報告", "速報", "発表", "公開", "リリース"],
18
+ "regulatory": ["認可", "承認", "FDA", "EMA", "PMDA", "許可", "ガイドライン", "基準", "法的", "規制", "ルール"]
19
+ }
20
+ detected = [kw for category, kws in triggers.items() for kw in kws if kw in question]
21
+ if detected:
22
+ return {"should_search": True, "confidence": 0.95, "reason": f"L1: Trigger keyword(s) found: {', '.join(detected)}"}
23
+ return {"should_search": False, "confidence": 0.0}
24
+
25
+ def _check_level2_semantics(self, question: str) -> dict:
26
+ """レベル2:セマンティック分析"""
27
+ question_types = {
28
+ "epidemiology": {"keywords": ["患者数", "発症率", "流行", "疫学"], "web_necessity": 0.9},
29
+ "treatment": {"keywords": ["治療", "薬", "手術", "療法"], "web_necessity": 0.8},
30
+ "prognosis": {"keywords": ["予後", "生存率", "予測"], "web_necessity": 0.7},
31
+ "diagnosis": {"keywords": ["診断", "検査", "診断基準"], "web_necessity": 0.6},
32
+ "mechanism": {"keywords": ["メカニズム", "機序", "なぜ", "仕組み"], "web_necessity": 0.4}
33
+ }
34
+ max_necessity = 0.0
35
+ match = None
36
+ for category, config in question_types.items():
37
+ if any(kw in question for kw in config["keywords"]) and config["web_necessity"] > max_necessity:
38
+ max_necessity = config["web_necessity"]
39
+ match = category
40
+ if max_necessity > 0.5:
41
+ return {"should_search": True, "confidence": 0.70, "reason": f"L2: Question type is '{match}'"}
42
+ return {"should_search": False, "confidence": 0.0}
43
+
44
+ def _check_level3_inference_state(self, inference_state) -> dict:
45
+ """レベル3:推論中の動的判定"""
46
+ if not inference_state or not hasattr(inference_state, 'partial_response'):
47
+ return {"should_search": False, "confidence": 0.0}
48
+
49
+ partial_response = inference_state.partial_response
50
+ uncertainty_indicators = ["不明である", "確定的ではない", "議論の余地がある", "~かもしれない", "可能性がある", "詳しくは", "正確には", "詳細については", "最新情報では"]
51
+
52
+ for indicator in uncertainty_indicators:
53
+ if indicator in partial_response:
54
+ return {"should_search": True, "confidence": 0.9, "reason": f"L3: Uncertainty phrase found: '{indicator}'"}
55
+ return {"should_search": False, "confidence": 0.0}
56
+
57
+ def _check_level4_special_cases(self, question: str) -> dict:
58
+ """レベル4:特殊ケース判定"""
59
+ special_triggers = {
60
+ "drug": r"(医薬品|薬|ドラッグ).*(名前|効果|副作用)",
61
+ "legal": r"(法的|合法|違法|規制)",
62
+ "geographic": r"(日本|アメリカ|EU).*(ガイドライン|基準)",
63
+ "conference": r"(学会|カンファレンス).*(発表|報告)"
64
+ }
65
+ for trigger_type, pattern in special_triggers.items():
66
+ if re.search(pattern, question):
67
+ return {"should_search": True, "confidence": 0.75, "reason": f"L4: Special case matched: '{trigger_type}'"}
68
+ return {"should_search": False, "confidence": 0.0}
69
+
70
+ def _aggregate_decisions(self, decisions: dict) -> dict:
71
+ """4レベルの判定を統合"""
72
+ weights = {"level1": 0.4, "level2": 0.2, "level3": 0.3, "level4": 0.1}
73
+ score = sum(decisions[level].get("confidence", 0) * weight for level, weight in weights.items() if decisions[level].get("should_search"))
74
+
75
+ return {
76
+ "should_search": score >= 0.3,
77
+ "aggregate_score": score,
78
+ "decision_details": decisions,
79
+ "confidence": np.mean([d.get("confidence", 0) for d in decisions.values() if d.get("should_search")]) if any(d.get("should_search") for d in decisions.values()) else 0.0
80
+ }
81
+
82
+ def should_search(self, question: str, inference_state=None) -> dict:
83
+ """Web検索が必要か総合的に判定します。"""
84
+ decisions = {
85
+ "level1": self._check_level1_keywords(question),
86
+ "level2": self._check_level2_semantics(question),
87
+ "level3": self._check_level3_inference_state(inference_state),
88
+ "level4": self._check_level4_special_cases(question)
89
+ }
90
+
91
+ final_decision = self._aggregate_decisions(decisions)
92
+ self.decision_history.append({"timestamp": datetime.now().isoformat(), "question": question, "decisions": decisions, "final": final_decision})
93
+ return final_decision
94
+
95
+ if __name__ == '__main__':
96
+ # 'numpy' が必要です: pip install numpy
97
+ search_system = WebSearchAutonomySystem()
98
+
99
+ # --- テストケース ---
100
+ test_questions = [
101
+ "心筋梗塞のメカニズムについて教えて", # L2(0.4) -> score 0.08 -> No
102
+ "2025年最新の心筋梗塞治療ガイドラインは?", # L1(0.95), L4(0.75) -> score 0.4*0.95 + 0.1*0.75 = 0.455 -> Yes
103
+ "糖尿病の疫学について知りたい", # L2(0.9) -> score 0.2*0.9 = 0.18 -> No (but close)
104
+ "その薬の法的な扱いはどうなっていますか?", # L4(0.75) -> score 0.1*0.75 = 0.075 -> No
105
+ ]
106
+
107
+ print("--- 事前判定テスト ---")
108
+ for q in test_questions:
109
+ decision = search_system.should_search(q)
110
+ print(f"\n質問: '{q}'")
111
+ print(f" -> 検索判定: {'Yes' if decision['should_search'] else 'No'} (スコア: {decision['aggregate_score']:.3f})")
112
+ for level, details in decision['decision_details'].items():
113
+ if details['should_search']:
114
+ print(f" - {details['reason']}")
115
+
116
+ print("\n--- 推論中動的判定テスト ---")
117
+ class MockInferenceState:
118
+ def __init__(self, text):
119
+ self.partial_response = text
120
+
121
+ inference_state = MockInferenceState("この症状の原因は明確ではなく、いくつかの可能性があると言われています。")
122
+ dynamic_decision = search_system.should_search("原因は?", inference_state=inference_state)
123
+ print(f"質問: '原因は?' (推論中: '{inference_state.partial_response}')")
124
+ print(f" -> 検索判定: {'Yes' if dynamic_decision['should_search'] else 'No'} (スコア: {dynamic_decision['aggregate_score']:.3f})")
125
+ for level, details in dynamic_decision['decision_details'].items():
126
+ if details['should_search']:
127
+ print(f" - {details['reason']}")