Spaces:

ahaahaaha
/

adaptive_rag

Paused

adaptive_rag / hallucination_detector.py

lanny xu

add async

8821b53 17 days ago

19.7 kB

	"""
	专业幻觉检测模块
	支持多种检测方法：NLI模型、专门检测模型、轻量级模型、混合检测
	"""

	import re
	from typing import List, Dict, Tuple
	import torch
	from transformers import (
	AutoModelForSequenceClassification,
	AutoTokenizer,
	pipeline
	)
	from sklearn.metrics.pairwise import cosine_similarity
	import numpy as np

	# 导入轻量级检测器
	from lightweight_hallucination_detector import LightweightHallucinationDetector


	class VectaraHallucinationDetector:
	"""
	Vectara 专门的幻觉检测模型
	使用 HHEM (Hughes Hallucination Evaluation Model)
	"""

	def __init__(self):
	"""初始化 Vectara 幻觉检测模型"""
	print("🔧 初始化 Vectara 幻觉检测模型...")

	try:
	self.model_name = "vectara/hallucination_evaluation_model"
	self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
	self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
	self.model.eval() # 设置为评估模式

	# 移动到GPU（如果可用）
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.model.to(self.device)

	print(f"✅ Vectara 模型加载成功 (device: {self.device})")
	except Exception as e:
	print(f"⚠️ Vectara 模型加载失败: {e}")
	print("💡 尝试使用 NLI 模型作为备选...")
	self.model = None

	def detect(self, generation: str, documents: str) -> Dict:
	"""
	检测幻觉

	Args:
	generation: LLM 生成的内容
	documents: 参考文档

	Returns:
	{
	"has_hallucination": bool,
	"hallucination_score": float (0-1),
	"factuality_score": float (0-1)
	}
	"""
	if self.model is None:
	return {"has_hallucination": False, "hallucination_score": 0.0, "factuality_score": 1.0}

	try:
	# 准备输入
	inputs = self.tokenizer(
	documents,
	generation,
	return_tensors="pt",
	truncation=True,
	max_length=512,
	padding=True
	).to(self.device)

	# 推理
	with torch.no_grad():
	outputs = self.model(**inputs)
	logits = outputs.logits
	probs = torch.softmax(logits, dim=-1)

	# Vectara 模型输出：[0] = factual, [1] = hallucinated
	factuality_score = probs[0][0].item()
	hallucination_score = probs[0][1].item()

	# 判断是否有幻觉（阈值 0.5）
	has_hallucination = hallucination_score > 0.5

	return {
	"has_hallucination": has_hallucination,
	"hallucination_score": hallucination_score,
	"factuality_score": factuality_score
	}

	except Exception as e:
	print(f"❌ Vectara 检测失败: {e}")
	return {"has_hallucination": False, "hallucination_score": 0.0, "factuality_score": 1.0}

	def grade(self, generation: str, documents) -> str:
	"""
	兼容原有接口的检测方法

	Args:
	generation: LLM 生成的内容
	documents: 参考文档（可以是字符串或列表）

	Returns:
	"yes" 表示无幻觉，"no" 表示有幻觉
	"""
	# 处理文档格式
	if isinstance(documents, list):
	doc_text = "\n\n".join([
	doc.page_content if hasattr(doc, 'page_content') else str(doc)
	for doc in documents
	])
	else:
	doc_text = str(documents)

	# 检测幻觉
	result = self.detect(generation, doc_text)

	# 打印详细信息
	if result['has_hallucination']:
	print(f"⚠️ Vectara 检测到幻觉 (得分: {result['hallucination_score']:.2f})")
	else:
	print(f"✅ Vectara 未检测到幻觉 (真实性得分: {result['factuality_score']:.2f})")

	# 返回兼容格式
	return "no" if result['has_hallucination'] else "yes"


	class NLIHallucinationDetector:
	"""
	基于 NLI (Natural Language Inference) 的幻觉检测
	使用 DeBERTa 模型
	"""

	def __init__(self):
	"""初始化 NLI 模型"""
	print("🔧 初始化 NLI 幻觉检测模型...")

	# 尝试多个模型，按照从小到大的顺序
	models_to_try = [
	"cross-encoder/nli-deberta-v3-xsmall", # 最小 40MB
	"cross-encoder/nli-deberta-v3-small", # 小 150MB
	"cross-encoder/nli-MiniLM2-L6-H768", # 轻量 90MB
	"facebook/bart-large-mnli", # 备用
	]

	self.nli_model = None

	for model_name in models_to_try:
	try:
	print(f" 尝试加载: {model_name}...")
	self.nli_model = pipeline(
	"text-classification",
	model=model_name,
	device=0 if torch.cuda.is_available() else -1,
	truncation=True,
	max_length=512
	)
	print(f"✅ NLI 模型加载成功: {model_name}")
	self.model_name = model_name
	break # 成功加载，退出循环
	except Exception as e:
	print(f" ⚠️ {model_name} 加载失败: {str(e)[:80]}")
	continue

	if self.nli_model is None:
	print("❌ 所有 NLI 模型加载失败，将禁用 NLI 检测")

	def split_sentences(self, text: str) -> List[str]:
	"""分割句子"""
	# 简单的句子分割（可以用更复杂的 NLP 工具）
	sentences = re.split(r'[。！？\.\!\?]\s*', text)
	return [s.strip() for s in sentences if s.strip()]

	def detect(self, generation: str, documents) -> Dict:
	"""
	检测幻觉（支持多文档最大匹配策略）

	Args:
	generation: LLM 生成的内容
	documents: 参考文档 (str 或 List[Document/str])

	Returns:
	{
	"has_hallucination": bool,
	"contradiction_count": int,
	"neutral_count": int,
	"entailment_count": int,
	"problematic_sentences": List[str]
	}
	"""
	if self.nli_model is None:
	print("⚠️ NLI 模型未加载，跳过检测")
	return {
	"has_hallucination": False,
	"contradiction_count": 0,
	"neutral_count": 0,
	"entailment_count": 0,
	"problematic_sentences": []
	}

	# 1. 预处理文档列表
	docs_content = []
	if isinstance(documents, list):
	for doc in documents:
	if hasattr(doc, 'page_content'):
	docs_content.append(doc.page_content)
	else:
	docs_content.append(str(doc))
	else:
	# 如果是单个字符串，尝试按换行符分割，或者作为单文档处理
	docs_content = [str(documents)]

	# 2. 分割生成内容为句子
	sentences = self.split_sentences(generation)

	if not sentences:
	print("⚠️ 没有检测到有效句子")
	return {
	"has_hallucination": False,
	"contradiction_count": 0,
	"neutral_count": 0,
	"entailment_count": 0,
	"problematic_sentences": []
	}

	contradiction_count = 0
	neutral_count = 0
	entailment_count = 0
	problematic_sentences = []

	# 3. 逐句检测 (Max-Entailment Strategy)
	for sentence in sentences:
	if len(sentence) < 10: # 跳过太短的句子
	continue

	# 默认为 Neutral (找不到支持)
	best_label = "neutral"
	best_score = 0.0

	# 遍历所有文档块，寻找最佳匹配
	# 只要有一个文档能 Entail (支持) 这个句子，就算通过
	sentence_supported = False

	for doc_content in docs_content:
	# 截断单个文档块以适应模型 (保留前 800 字符，通常足够覆盖 512 tokens)
	# 注意：这里是对单个文档块截断，而不是对所有文档拼接后截断
	premise = doc_content[:800]

	try:
	# NLI 推理
	if hasattr(self, 'model_name') and 'cross-encoder' in self.model_name:
	result = self.nli_model(
	f"{premise} [SEP] {sentence}",
	truncation=True,
	max_length=512
	)
	else:
	result = self.nli_model(
	sentence,
	premise,
	truncation=True,
	max_length=512
	)

	# 解析结果
	if isinstance(result, list) and len(result) > 0:
	current_label = result[0]['label'].lower()
	current_score = result[0]['score']

	# 优先级逻辑：Entailment > Contradiction > Neutral
	# 如果找到 Entailment，立即停止查找（已验证）
	if 'entailment' in current_label or 'entail' in current_label:
	best_label = "entailment"
	sentence_supported = True
	break

	# 如果是 Contradiction，记录下来，但继续找（也许其他文档能解释）
	if 'contradiction' in current_label or 'contradict' in current_label:
	# 只有当目前是 Neutral 时才更新为 Contradiction
	# 这样防止 Contradiction 覆盖了潜在的 Entailment (虽然上面break了，但这逻辑保持严谨)
	if best_label == "neutral":
	best_label = "contradiction"
	best_score = current_score

	else:
	continue

	except Exception as e:
	print(f"⚠️ NLI 子任务失败: {str(e)[:50]}")
	continue

	# 统计该句子的最终判定
	if best_label == "entailment":
	entailment_count += 1
	elif best_label == "contradiction":
	contradiction_count += 1
	problematic_sentences.append(sentence)
	else: # neutral
	neutral_count += 1

	# 4. 综合评分
	total_sentences = contradiction_count + neutral_count + entailment_count

	has_hallucination = False
	if total_sentences > 0:
	contradiction_ratio = contradiction_count / total_sentences
	neutral_ratio = neutral_count / total_sentences
	# 阈值判断
	has_hallucination = (contradiction_ratio > 0.3) or (neutral_ratio > 0.8)

	# Debug 信息
	print(f"📊 NLI 检测结果: Entail={entailment_count}, Contra={contradiction_count}, Neutral={neutral_count}")

	return {
	"has_hallucination": has_hallucination,
	"contradiction_count": contradiction_count,
	"neutral_count": neutral_count,
	"entailment_count": entailment_count,
	"problematic_sentences": problematic_sentences
	}

	def grade(self, generation: str, documents) -> str:
	"""
	兼容原有接口的检测方法

	Args:
	generation: LLM 生成的内容
	documents: 参考文档（可以是字符串或列表）

	Returns:
	"yes" 表示无幻觉，"no" 表示有幻觉
	"""
	# 处理文档格式
	if isinstance(documents, list):
	doc_text = "\n\n".join([
	doc.page_content if hasattr(doc, 'page_content') else str(doc)
	for doc in documents
	])
	else:
	doc_text = str(documents)

	# 检测幻觉
	result = self.detect(generation, doc_text)

	# 打印详细信息
	if result['has_hallucination']:
	print(f"⚠️ NLI 检测到幻觉")
	print(f" 矛盾句子: {result['contradiction_count']}")
	print(f" 中立句子: {result['neutral_count']}")
	print(f" 蕴含句子: {result['entailment_count']}")
	if result['problematic_sentences']:
	print(f" 问题句子: {result['problematic_sentences'][:2]}")
	else:
	print(f"✅ NLI 未检测到幻觉")

	# 返回兼容格式
	return "no" if result['has_hallucination'] else "yes"


	class HybridHallucinationDetector:
	"""
	混合幻觉检测器
	结合 Vectara 模型和 NLI 模型，提供最佳检测效果
	"""

	def __init__(self, use_vectara: bool = True, use_nli: bool = True):
	"""
	初始化混合检测器

	Args:
	use_vectara: 是否使用 Vectara 模型
	use_nli: 是否使用 NLI 模型
	"""
	self.detectors = {}

	if use_vectara:
	try:
	self.detectors['vectara'] = VectaraHallucinationDetector()
	except Exception as e:
	print(f"⚠️ Vectara 检测器初始化失败: {e}")

	if use_nli:
	try:
	self.detectors['nli'] = NLIHallucinationDetector()
	except Exception as e:
	print(f"⚠️ NLI 检测器初始化失败: {e}")

	if not self.detectors:
	raise RuntimeError("❌ 所有检测器初始化失败！")

	print(f"✅ 混合检测器就绪，已加载: {list(self.detectors.keys())}")

	def detect(self, generation: str, documents: str) -> Dict:
	"""
	综合检测幻觉

	Returns:
	{
	"has_hallucination": bool,
	"confidence": float,
	"vectara_result": Dict,
	"nli_result": Dict,
	"method_used": str
	}
	"""
	results = {
	"has_hallucination": False,
	"confidence": 0.0,
	"method_used": ""
	}

	# 1. 优先使用 Vectara（最准确）
	if 'vectara' in self.detectors:
	vectara_result = self.detectors['vectara'].detect(generation, documents)
	results['vectara_result'] = vectara_result

	if vectara_result['hallucination_score'] > 0.3: # 降低阈值以提高灵敏度
	results['has_hallucination'] = True
	results['confidence'] = vectara_result['hallucination_score']
	results['method_used'] = 'vectara'
	return results
	else:
	# Vectara 未检测到幻觉，设置 method_used
	results['method_used'] = 'vectara'

	# 2. 如果 Vectara 不确定或不可用，使用 NLI 二次确认
	if 'nli' in self.detectors:
	nli_result = self.detectors['nli'].detect(generation, documents)
	results['nli_result'] = nli_result

	if nli_result['has_hallucination']:
	results['has_hallucination'] = True
	# 计算置信度
	total_sentences = (nli_result['contradiction_count'] +
	nli_result['neutral_count'] +
	nli_result['entailment_count'])
	if total_sentences > 0:
	results['confidence'] = (nli_result['contradiction_count'] +
	nli_result['neutral_count'] * 0.5) / total_sentences
	results['method_used'] = 'nli'
	else:
	# 未检测到幻觉，也要设置 method_used
	if not results['method_used']: # 只有当前面没有设置时
	results['method_used'] = 'nli'

	# 如果两个模型都有结果，投票决定
	if 'vectara_result' in results and 'nli_result' in results:
	vectara_vote = results['vectara_result']['has_hallucination']
	nli_vote = results['nli_result']['has_hallucination']

	if vectara_vote and nli_vote:
	results['has_hallucination'] = True
	results['confidence'] = min(
	results.get('vectara_result', {}).get('hallucination_score', 0.5),
	results.get('confidence', 0.5)
	)
	results['method_used'] = 'vectara+nli'

	return results

	def grade(self, generation: str, documents) -> str:
	"""
	兼容原有接口的检测方法

	Args:
	generation: LLM 生成的内容
	documents: 参考文档（可以是字符串或列表）

	Returns:
	"yes" 表示无幻觉，"no" 表示有幻觉
	"""
	# 处理文档格式
	if isinstance(documents, list):
	doc_text = "\n\n".join([
	doc.page_content if hasattr(doc, 'page_content') else str(doc)
	for doc in documents
	])
	else:
	doc_text = str(documents)

	# 检测幻觉
	result = self.detect(generation, doc_text)

	# 打印详细信息
	if result['has_hallucination']:
	print(f"⚠️ 检测到幻觉 (置信度: {result['confidence']:.2f}, 方法: {result['method_used']})")
	if 'nli_result' in result:
	print(f" 矛盾句子: {result['nli_result']['contradiction_count']}")
	if result['nli_result']['problematic_sentences']:
	print(f" 问题句子: {result['nli_result']['problematic_sentences'][:2]}")
	else:
	print(f"✅ 未检测到幻觉 (方法: {result['method_used']})")

	# 返回兼容格式
	return "no" if result['has_hallucination'] else "yes"


	def initialize_hallucination_detector(method: str = "nli") -> object:
	"""
	初始化幻觉检测器

	Args:
	method: 'vectara', 'nli', 或 'hybrid' (推荐)

	Returns:
	幻觉检测器实例
	"""
	if method == "vectara":
	return VectaraHallucinationDetector()
	elif method == "nli":
	return NLIHallucinationDetector()
	elif method == "hybrid":
	return HybridHallucinationDetector(use_vectara=False, use_nli=True) # 禁用Vectara，使用NLI
	else:
	raise ValueError(f"未知的检测方法: {method}")