kofdai commited on
Commit
6df88eb
·
verified ·
1 Parent(s): e7f519c

Upload hallucination_detector.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hallucination_detector.py +98 -0
hallucination_detector.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ def _classify_risk_level(score: float) -> str:
4
+ """リスクスコアをカテゴリに分類する。"""
5
+ if score < 0.1: return "very_low"
6
+ if score < 0.3: return "low"
7
+ if score < 0.6: return "moderate"
8
+ if score < 0.8: return "high"
9
+ return "very_high"
10
+
11
+ def calculate_hallucination_risk_score(alpha_response: dict, validation_result: dict) -> dict:
12
+ """
13
+ α-Lobeの回答とβ-Lobeの検証結果から、ハルシネーションのリスクを計算します。
14
+
15
+ Args:
16
+ alpha_response (dict): α-Lobeからの構造化レスポンス。
17
+ validation_result (dict): β-Lobeによる検証結果。
18
+
19
+ Returns:
20
+ dict: ハルシネーションリスクスコアと関連情報。
21
+ """
22
+ risk_score = 0.0
23
+
24
+ # --- 検証結果に基づくリスク加算 ---
25
+ anchor_passed = validation_result["checks"]["anchor_facts"]["passed"]
26
+ logic_passed = validation_result["checks"].get("logic", {"passed": True})["passed"]
27
+ context_passed = validation_result["checks"].get("context", {"passed": True})["passed"]
28
+
29
+ # Anchor事実との矛盾は最大のリスク
30
+ risk_score += (1 - (1 if anchor_passed else 0)) * 0.5
31
+ # 論理矛盾も高いリスク
32
+ risk_score += (1 - (1 if logic_passed else 0)) * 0.3
33
+ # 医学的文脈の矛盾
34
+ risk_score += (1 - (1 if context_passed else 0)) * 0.2
35
+
36
+ # --- 回答内容に基づくリスク加算 ---
37
+
38
+ # α-Lobe自体の自信度が低い場合
39
+ alpha_confidence = alpha_response.get("confidence", 0.7)
40
+ if alpha_confidence < 0.5:
41
+ risk_score += 0.1
42
+
43
+ # 不確実性に関する言及がない場合、過信しているリスク
44
+ uncertainties = alpha_response.get("uncertainties", [])
45
+ if not uncertainties:
46
+ risk_score += 0.05
47
+
48
+ # 引用元が全くない場合
49
+ sources_cited = alpha_response.get("sources_cited", [])
50
+ if not sources_cited:
51
+ risk_score += 0.1
52
+
53
+ final_risk_score = min(1.0, risk_score)
54
+
55
+ return {
56
+ "hallucination_risk_score": final_risk_score,
57
+ "risk_level": _classify_risk_level(final_risk_score),
58
+ "action_required": final_risk_score >= 0.3
59
+ }
60
+
61
+ if __name__ == '__main__':
62
+ # --- ダミーデータによる使用例 ---
63
+
64
+ # ケース1: 安全な回答
65
+ safe_alpha_res = {
66
+ "confidence": 0.9, "uncertainties": [], "sources_cited": ["JCS 2023 Guideline"]
67
+ }
68
+ safe_validation_res = {
69
+ "checks": {"anchor_facts": {"passed": True}, "logic": {"passed": True}, "context": {"passed": True}}
70
+ }
71
+ risk_1 = calculate_hallucination_risk_score(safe_alpha_res, safe_validation_res)
72
+ print(f"--- Case 1: Safe Response ---")
73
+ print(f" Risk Score: {risk_1['hallucination_risk_score']:.2f} ({risk_1['risk_level']})")
74
+ print(f" Action Required: {risk_1['action_required']}")
75
+
76
+ # ケース2: リスクのある回答 (事実誤認、引用なし)
77
+ risky_alpha_res = {
78
+ "confidence": 0.95, "uncertainties": [], "sources_cited": []
79
+ }
80
+ risky_validation_res = {
81
+ "checks": {"anchor_facts": {"passed": False}, "logic": {"passed": True}, "context": {"passed": True}}
82
+ }
83
+ risk_2 = calculate_hallucination_risk_score(risky_alpha_res, risky_validation_res)
84
+ print(f"\n--- Case 2: Risky Response ---")
85
+ print(f" Risk Score: {risk_2['hallucination_risk_score']:.2f} ({risk_2['risk_level']})")
86
+ print(f" Action Required: {risk_2['action_required']}")
87
+
88
+ # ケース3: リスク中程度の回答 (論理エラー、自信度低い)
89
+ medium_alpha_res = {
90
+ "confidence": 0.4, "uncertainties": ["かもしれない"], "sources_cited": ["Some Journal"]
91
+ }
92
+ medium_validation_res = {
93
+ "checks": {"anchor_facts": {"passed": True}, "logic": {"passed": False}, "context": {"passed": True}}
94
+ }
95
+ risk_3 = calculate_hallucination_risk_score(medium_alpha_res, medium_validation_res)
96
+ print(f"\n--- Case 3: Medium Risk Response ---")
97
+ print(f" Risk Score: {risk_3['hallucination_risk_score']:.2f} ({risk_3['risk_level']})")
98
+ print(f" Action Required: {risk_3['action_required']}")