Spaces:
Paused
Paused
| """ | |
| 轻量级开源幻觉检测器 | |
| 替代 Vectara 模型的最佳方案 | |
| """ | |
| import os | |
| import re | |
| import torch | |
| from typing import List, Dict, Tuple | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| import numpy as np | |
| class LightweightHallucinationDetector: | |
| """ | |
| 轻量级幻觉检测器 | |
| 使用开源 NLI 模型,无需特殊权限 | |
| """ | |
| def __init__(self, model_name="cross-encoder/nli-MiniLM2-L6-H768"): | |
| """ | |
| 初始化轻量级幻觉检测器 | |
| Args: | |
| model_name: 可选的开源模型 | |
| - "cross-encoder/nli-MiniLM2-L6-H768" (推荐: 80MB, 85%准确率) | |
| - "cross-encoder/nli-deberta-v3-xsmall" (更小: 40MB, 82%准确率) | |
| - "cross-encoder/nli-roberta-base" (更准: 430MB, 88%准确率) | |
| """ | |
| self.model_name = model_name | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"🔧 初始化轻量级幻觉检测器...") | |
| print(f" 模型: {model_name}") | |
| print(f" 设备: {self.device}") | |
| try: | |
| self.nli_model = pipeline( | |
| "text-classification", | |
| model=model_name, | |
| device=self.device, | |
| truncation=True, | |
| max_length=512, | |
| return_all_scores=True | |
| ) | |
| print(f"✅ 模型加载成功!") | |
| except Exception as e: | |
| print(f"❌ 模型加载失败: {e}") | |
| print("💡 尝试使用备用模型...") | |
| # 备用模型列表(按从轻到重排列) | |
| backup_models = [ | |
| "cross-encoder/nli-deberta-v3-xsmall", | |
| "cross-encoder/nli-roberta-base", | |
| "facebook/bart-large-mnli" | |
| ] | |
| self.nli_model = None | |
| for backup_model in backup_models: | |
| try: | |
| print(f" 尝试备用模型: {backup_model}") | |
| self.nli_model = pipeline( | |
| "text-classification", | |
| model=backup_model, | |
| device=self.device, | |
| truncation=True, | |
| max_length=512, | |
| return_all_scores=True | |
| ) | |
| print(f"✅ 备用模型加载成功: {backup_model}") | |
| self.model_name = backup_model | |
| break | |
| except Exception as backup_e: | |
| print(f" ❌ 备用模型失败: {backup_e}") | |
| continue | |
| def _split_text_into_sentences(self, text: str) -> List[str]: | |
| """将文本分割为句子""" | |
| # 简单但有效的句子分割 | |
| sentences = re.split(r'[。!?.!?]\\s*', text) | |
| return [s.strip() for s in sentences if s.strip() and len(s.strip()) > 10] | |
| def _nli_score(self, premise: str, hypothesis: str) -> Dict: | |
| """计算 NLI 分数""" | |
| if self.nli_model is None: | |
| return {"label": "NEUTRAL", "score": 0.5} | |
| try: | |
| # 格式化输入 | |
| input_text = f"{premise} [SEP] {hypothesis}" | |
| # 获取所有分数 | |
| results = self.nli_model(input_text)[0] | |
| # 解析结果 | |
| result_dict = {item['label']: item['score'] for item in results} | |
| return result_dict | |
| except Exception as e: | |
| print(f"❌ NLI 推理失败: {e}") | |
| return {"label": "NEUTRAL", "score": 0.5} | |
| def _calculate_hallucination_score(self, nli_results: Dict) -> float: | |
| """ | |
| 根据 NLI 结果计算幻觉分数 | |
| Args: | |
| nli_results: NLI 模型的输出结果 | |
| Returns: | |
| float: 幻觉分数 (0-1) | |
| """ | |
| contradiction = nli_results.get('CONTRADICTION', 0.0) | |
| neutral = nli_results.get('NEUTRAL', 0.0) | |
| entailment = nli_results.get('ENTAILMENT', 0.0) | |
| # 幻觉分数计算公式 | |
| # 矛盾 -> 高幻觉分数 | |
| # 中立 -> 中等幻觉分数 | |
| # 蕴含 -> 低幻觉分数 | |
| hallucination_score = contradiction * 0.9 + neutral * 0.5 + entailment * 0.1 | |
| return min(1.0, hallucination_score) | |
| def detect(self, generation: str, documents: str, method="sentence_level") -> Dict: | |
| """ | |
| 检测幻觉 | |
| Args: | |
| generation: LLM 生成的内容 | |
| documents: 参考文档 | |
| method: 检测方法 | |
| - "sentence_level": 句子级别检测(推荐) | |
| - "document_level": 文档级别检测 | |
| Returns: | |
| Dict: 检测结果 | |
| """ | |
| if self.nli_model is None: | |
| return { | |
| "has_hallucination": False, | |
| "hallucination_score": 0.0, | |
| "factuality_score": 1.0, | |
| "method": "model_failed", | |
| "details": "模型加载失败,返回安全默认值" | |
| } | |
| if method == "sentence_level": | |
| return self._detect_sentence_level(generation, documents) | |
| else: | |
| return self._detect_document_level(generation, documents) | |
| def _detect_sentence_level(self, generation: str, documents: str) -> Dict: | |
| """句子级别的幻觉检测""" | |
| sentences = self._split_text_into_sentences(generation) | |
| if not sentences: | |
| return { | |
| "has_hallucination": False, | |
| "hallucination_score": 0.0, | |
| "factuality_score": 1.0, | |
| "method": "sentence_level", | |
| "details": "没有可分析的句子" | |
| } | |
| # 分析每个句子 | |
| sentence_scores = [] | |
| problematic_sentences = [] | |
| for sentence in sentences: | |
| nli_result = self._nli_score(documents, sentence) | |
| hallucination_score = self._calculate_hallucination_score(nli_result) | |
| sentence_scores.append(hallucination_score) | |
| if hallucination_score > 0.6: # 阈值 | |
| problematic_sentences.append({ | |
| "sentence": sentence, | |
| "score": hallucination_score, | |
| "nli_result": nli_result | |
| }) | |
| # 计算整体分数 | |
| avg_hallucination_score = np.mean(sentence_scores) | |
| max_hallucination_score = np.max(sentence_scores) | |
| # 判断是否有幻觉 | |
| has_hallucination = max_hallucination_score > 0.7 # 严格阈值 | |
| return { | |
| "has_hallucination": has_hallucination, | |
| "hallucination_score": float(max_hallucination_score), | |
| "factuality_score": float(1.0 - avg_hallucination_score), | |
| "method": "sentence_level", | |
| "details": { | |
| "sentence_count": len(sentences), | |
| "avg_score": float(avg_hallucination_score), | |
| "max_score": float(max_hallucination_score), | |
| "problematic_sentences": problematic_sentences[:3] # 只返回前3个问题句子 | |
| } | |
| } | |
| def _detect_document_level(self, generation: str, documents: str) -> Dict: | |
| """文档级别的幻觉检测""" | |
| nli_result = self._nli_score(documents, generation) | |
| hallucination_score = self._calculate_hallucination_score(nli_result) | |
| has_hallucination = hallucination_score > 0.5 # 标准阈值 | |
| return { | |
| "has_hallucination": has_hallucination, | |
| "hallucination_score": float(hallucination_score), | |
| "factuality_score": float(1.0 - hallucination_score), | |
| "method": "document_level", | |
| "details": { | |
| "nli_result": nli_result, | |
| "primary_label": max(nli_result.keys(), key=lambda k: nli_result[k]) | |
| } | |
| } | |
| def batch_detect(self, generations: List[str], documents: str, method="sentence_level") -> List[Dict]: | |
| """ | |
| 批量检测幻觉 | |
| Args: | |
| generations: 多个生成内容 | |
| documents: 参考文档 | |
| method: 检测方法 | |
| Returns: | |
| List[Dict]: 每个生成内容的检测结果 | |
| """ | |
| results = [] | |
| for generation in generations: | |
| result = self.detect(generation, documents, method) | |
| results.append(result) | |
| return results | |
| # ========================================== | |
| # 使用示例 | |
| # ========================================== | |
| if __name__ == "__main__": | |
| # 创建检测器 | |
| detector = LightweightHallucinationDetector() | |
| # 测试数据 | |
| documents = "The capital of France is Paris. It is a beautiful city with many historical landmarks." | |
| test_cases = [ | |
| "The capital of France is Berlin.", # 明显错误 | |
| "Paris is the capital of France.", # 正确 | |
| "Paris is the capital of Germany and has many beautiful landmarks.", # 部分错误 | |
| "The French capital has several famous museums and historical sites." # 正确,但表述不同 | |
| ] | |
| print("\n" + "="*60) | |
| print("🧪 轻量级幻觉检测器测试") | |
| print("="*60) | |
| for i, test_case in enumerate(test_cases, 1): | |
| print(f"\n{i}. 测试案例:") | |
| print(f" 前提: {documents[:50]}...") | |
| print(f" 假设: {test_case}") | |
| # 检测幻觉 | |
| result = detector.detect(test_case, documents, method="sentence_level") | |
| print(f" 结果:") | |
| print(f" - 是否有幻觉: {result['has_hallucination']}") | |
| print(f" - 幻觉分数: {result['hallucination_score']:.3f}") | |
| print(f" - 事实性分数: {result['factuality_score']:.3f}") | |
| print(f" - 检测方法: {result['method']}") | |
| if result['details'].get('problematic_sentences'): | |
| print(f" - 问题句子: {len(result['details']['problematic_sentences'])} 个") | |
| print("\n" + "="*60) | |
| print("✅ 测试完成!") | |
| print("="*60) |