Upload hallucination_detector.py with huggingface_hub
Browse files- hallucination_detector.py +98 -0
hallucination_detector.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
def _classify_risk_level(score: float) -> str:
|
| 4 |
+
"""リスクスコアをカテゴリに分類する。"""
|
| 5 |
+
if score < 0.1: return "very_low"
|
| 6 |
+
if score < 0.3: return "low"
|
| 7 |
+
if score < 0.6: return "moderate"
|
| 8 |
+
if score < 0.8: return "high"
|
| 9 |
+
return "very_high"
|
| 10 |
+
|
| 11 |
+
def calculate_hallucination_risk_score(alpha_response: dict, validation_result: dict) -> dict:
|
| 12 |
+
"""
|
| 13 |
+
α-Lobeの回答とβ-Lobeの検証結果から、ハルシネーションのリスクを計算します。
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
alpha_response (dict): α-Lobeからの構造化レスポンス。
|
| 17 |
+
validation_result (dict): β-Lobeによる検証結果。
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
dict: ハルシネーションリスクスコアと関連情報。
|
| 21 |
+
"""
|
| 22 |
+
risk_score = 0.0
|
| 23 |
+
|
| 24 |
+
# --- 検証結果に基づくリスク加算 ---
|
| 25 |
+
anchor_passed = validation_result["checks"]["anchor_facts"]["passed"]
|
| 26 |
+
logic_passed = validation_result["checks"].get("logic", {"passed": True})["passed"]
|
| 27 |
+
context_passed = validation_result["checks"].get("context", {"passed": True})["passed"]
|
| 28 |
+
|
| 29 |
+
# Anchor事実との矛盾は最大のリスク
|
| 30 |
+
risk_score += (1 - (1 if anchor_passed else 0)) * 0.5
|
| 31 |
+
# 論理矛盾も高いリスク
|
| 32 |
+
risk_score += (1 - (1 if logic_passed else 0)) * 0.3
|
| 33 |
+
# 医学的文脈の矛盾
|
| 34 |
+
risk_score += (1 - (1 if context_passed else 0)) * 0.2
|
| 35 |
+
|
| 36 |
+
# --- 回答内容に基づくリスク加算 ---
|
| 37 |
+
|
| 38 |
+
# α-Lobe自体の自信度が低い場合
|
| 39 |
+
alpha_confidence = alpha_response.get("confidence", 0.7)
|
| 40 |
+
if alpha_confidence < 0.5:
|
| 41 |
+
risk_score += 0.1
|
| 42 |
+
|
| 43 |
+
# 不確実性に関する言及がない場合、過信しているリスク
|
| 44 |
+
uncertainties = alpha_response.get("uncertainties", [])
|
| 45 |
+
if not uncertainties:
|
| 46 |
+
risk_score += 0.05
|
| 47 |
+
|
| 48 |
+
# 引用元が全くない場合
|
| 49 |
+
sources_cited = alpha_response.get("sources_cited", [])
|
| 50 |
+
if not sources_cited:
|
| 51 |
+
risk_score += 0.1
|
| 52 |
+
|
| 53 |
+
final_risk_score = min(1.0, risk_score)
|
| 54 |
+
|
| 55 |
+
return {
|
| 56 |
+
"hallucination_risk_score": final_risk_score,
|
| 57 |
+
"risk_level": _classify_risk_level(final_risk_score),
|
| 58 |
+
"action_required": final_risk_score >= 0.3
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
if __name__ == '__main__':
|
| 62 |
+
# --- ダミーデータによる使用例 ---
|
| 63 |
+
|
| 64 |
+
# ケース1: 安全な回答
|
| 65 |
+
safe_alpha_res = {
|
| 66 |
+
"confidence": 0.9, "uncertainties": [], "sources_cited": ["JCS 2023 Guideline"]
|
| 67 |
+
}
|
| 68 |
+
safe_validation_res = {
|
| 69 |
+
"checks": {"anchor_facts": {"passed": True}, "logic": {"passed": True}, "context": {"passed": True}}
|
| 70 |
+
}
|
| 71 |
+
risk_1 = calculate_hallucination_risk_score(safe_alpha_res, safe_validation_res)
|
| 72 |
+
print(f"--- Case 1: Safe Response ---")
|
| 73 |
+
print(f" Risk Score: {risk_1['hallucination_risk_score']:.2f} ({risk_1['risk_level']})")
|
| 74 |
+
print(f" Action Required: {risk_1['action_required']}")
|
| 75 |
+
|
| 76 |
+
# ケース2: リスクのある回答 (事実誤認、引用なし)
|
| 77 |
+
risky_alpha_res = {
|
| 78 |
+
"confidence": 0.95, "uncertainties": [], "sources_cited": []
|
| 79 |
+
}
|
| 80 |
+
risky_validation_res = {
|
| 81 |
+
"checks": {"anchor_facts": {"passed": False}, "logic": {"passed": True}, "context": {"passed": True}}
|
| 82 |
+
}
|
| 83 |
+
risk_2 = calculate_hallucination_risk_score(risky_alpha_res, risky_validation_res)
|
| 84 |
+
print(f"\n--- Case 2: Risky Response ---")
|
| 85 |
+
print(f" Risk Score: {risk_2['hallucination_risk_score']:.2f} ({risk_2['risk_level']})")
|
| 86 |
+
print(f" Action Required: {risk_2['action_required']}")
|
| 87 |
+
|
| 88 |
+
# ケース3: リスク中程度の回答 (論理エラー、自信度低い)
|
| 89 |
+
medium_alpha_res = {
|
| 90 |
+
"confidence": 0.4, "uncertainties": ["かもしれない"], "sources_cited": ["Some Journal"]
|
| 91 |
+
}
|
| 92 |
+
medium_validation_res = {
|
| 93 |
+
"checks": {"anchor_facts": {"passed": True}, "logic": {"passed": False}, "context": {"passed": True}}
|
| 94 |
+
}
|
| 95 |
+
risk_3 = calculate_hallucination_risk_score(medium_alpha_res, medium_validation_res)
|
| 96 |
+
print(f"\n--- Case 3: Medium Risk Response ---")
|
| 97 |
+
print(f" Risk Score: {risk_3['hallucination_risk_score']:.2f} ({risk_3['risk_level']})")
|
| 98 |
+
print(f" Action Required: {risk_3['action_required']}")
|