Spaces:

ahaahaaha
/

adaptive_rag

Paused

File size: 19,736 Bytes

"""
专业幻觉检测模块
支持多种检测方法：NLI模型、专门检测模型、轻量级模型、混合检测
"""

import re
from typing import List, Dict, Tuple
import torch
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    pipeline
)
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# 导入轻量级检测器
from lightweight_hallucination_detector import LightweightHallucinationDetector


class VectaraHallucinationDetector:
    """
    Vectara 专门的幻觉检测模型
    使用 HHEM (Hughes Hallucination Evaluation Model)
    """
    
    def __init__(self):
        """初始化 Vectara 幻觉检测模型"""
        print("🔧 初始化 Vectara 幻觉检测模型...")
        
        try:
            self.model_name = "vectara/hallucination_evaluation_model"
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
            self.model.eval()  # 设置为评估模式
            
            # 移动到GPU（如果可用）
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            self.model.to(self.device)
            
            print(f"✅ Vectara 模型加载成功 (device: {self.device})")
        except Exception as e:
            print(f"⚠️ Vectara 模型加载失败: {e}")
            print("💡 尝试使用 NLI 模型作为备选...")
            self.model = None
    
    def detect(self, generation: str, documents: str) -> Dict:
        """
        检测幻觉
        
        Args:
            generation: LLM 生成的内容
            documents: 参考文档
            
        Returns:
            {
                "has_hallucination": bool,
                "hallucination_score": float (0-1),
                "factuality_score": float (0-1)
            }
        """
        if self.model is None:
            return {"has_hallucination": False, "hallucination_score": 0.0, "factuality_score": 1.0}
        
        try:
            # 准备输入
            inputs = self.tokenizer(
                documents,
                generation,
                return_tensors="pt",
                truncation=True,
                max_length=512,
                padding=True
            ).to(self.device)
            
            # 推理
            with torch.no_grad():
                outputs = self.model(**inputs)
                logits = outputs.logits
                probs = torch.softmax(logits, dim=-1)
            
            # Vectara 模型输出：[0] = factual, [1] = hallucinated
            factuality_score = probs[0][0].item()
            hallucination_score = probs[0][1].item()
            
            # 判断是否有幻觉（阈值 0.5）
            has_hallucination = hallucination_score > 0.5
            
            return {
                "has_hallucination": has_hallucination,
                "hallucination_score": hallucination_score,
                "factuality_score": factuality_score
            }
        
        except Exception as e:
            print(f"❌ Vectara 检测失败: {e}")
            return {"has_hallucination": False, "hallucination_score": 0.0, "factuality_score": 1.0}
    
    def grade(self, generation: str, documents) -> str:
        """
        兼容原有接口的检测方法
        
        Args:
            generation: LLM 生成的内容
            documents: 参考文档（可以是字符串或列表）
            
        Returns:
            "yes" 表示无幻觉，"no" 表示有幻觉
        """
        # 处理文档格式
        if isinstance(documents, list):
            doc_text = "\n\n".join([
                doc.page_content if hasattr(doc, 'page_content') else str(doc)
                for doc in documents
            ])
        else:
            doc_text = str(documents)
        
        # 检测幻觉
        result = self.detect(generation, doc_text)
        
        # 打印详细信息
        if result['has_hallucination']:
            print(f"⚠️ Vectara 检测到幻觉 (得分: {result['hallucination_score']:.2f})")
        else:
            print(f"✅ Vectara 未检测到幻觉 (真实性得分: {result['factuality_score']:.2f})")
        
        # 返回兼容格式
        return "no" if result['has_hallucination'] else "yes"


class NLIHallucinationDetector:
    """
    基于 NLI (Natural Language Inference) 的幻觉检测
    使用 DeBERTa 模型
    """
    
    def __init__(self):
        """初始化 NLI 模型"""
        print("🔧 初始化 NLI 幻觉检测模型...")
        
        # 尝试多个模型，按照从小到大的顺序
        models_to_try = [
            "cross-encoder/nli-deberta-v3-xsmall",  # 最小 40MB
            "cross-encoder/nli-deberta-v3-small",   # 小 150MB
            "cross-encoder/nli-MiniLM2-L6-H768",    # 轻量 90MB
            "facebook/bart-large-mnli",              # 备用
        ]
        
        self.nli_model = None
        
        for model_name in models_to_try:
            try:
                print(f"   尝试加载: {model_name}...")
                self.nli_model = pipeline(
                    "text-classification",
                    model=model_name,
                    device=0 if torch.cuda.is_available() else -1,
                    truncation=True,
                    max_length=512
                )
                print(f"✅ NLI 模型加载成功: {model_name}")
                self.model_name = model_name
                break  # 成功加载，退出循环
            except Exception as e:
                print(f"   ⚠️ {model_name} 加载失败: {str(e)[:80]}")
                continue
        
        if self.nli_model is None:
            print("❌ 所有 NLI 模型加载失败，将禁用 NLI 检测")
    
    def split_sentences(self, text: str) -> List[str]:
        """分割句子"""
        # 简单的句子分割（可以用更复杂的 NLP 工具）
        sentences = re.split(r'[。！？\.\!\?]\s*', text)
        return [s.strip() for s in sentences if s.strip()]
    
    def detect(self, generation: str, documents) -> Dict:
        """
        检测幻觉（支持多文档最大匹配策略）
        
        Args:
            generation: LLM 生成的内容
            documents: 参考文档 (str 或 List[Document/str])
            
        Returns:
            {
                "has_hallucination": bool,
                "contradiction_count": int,
                "neutral_count": int,
                "entailment_count": int,
                "problematic_sentences": List[str]
            }
        """
        if self.nli_model is None:
            print("⚠️ NLI 模型未加载，跳过检测")
            return {
                "has_hallucination": False,
                "contradiction_count": 0,
                "neutral_count": 0,
                "entailment_count": 0,
                "problematic_sentences": []
            }
        
        # 1. 预处理文档列表
        docs_content = []
        if isinstance(documents, list):
            for doc in documents:
                if hasattr(doc, 'page_content'):
                    docs_content.append(doc.page_content)
                else:
                    docs_content.append(str(doc))
        else:
            # 如果是单个字符串，尝试按换行符分割，或者作为单文档处理
            docs_content = [str(documents)]

        # 2. 分割生成内容为句子
        sentences = self.split_sentences(generation)
        
        if not sentences:
            print("⚠️ 没有检测到有效句子")
            return {
                "has_hallucination": False,
                "contradiction_count": 0,
                "neutral_count": 0,
                "entailment_count": 0,
                "problematic_sentences": []
            }
        
        contradiction_count = 0
        neutral_count = 0
        entailment_count = 0
        problematic_sentences = []
        
        # 3. 逐句检测 (Max-Entailment Strategy)
        for sentence in sentences:
            if len(sentence) < 10:  # 跳过太短的句子
                continue
            
            # 默认为 Neutral (找不到支持)
            best_label = "neutral"
            best_score = 0.0
            
            # 遍历所有文档块，寻找最佳匹配
            # 只要有一个文档能 Entail (支持) 这个句子，就算通过
            sentence_supported = False
            
            for doc_content in docs_content:
                # 截断单个文档块以适应模型 (保留前 800 字符，通常足够覆盖 512 tokens)
                # 注意：这里是对单个文档块截断，而不是对所有文档拼接后截断
                premise = doc_content[:800]
                
                try:
                    # NLI 推理
                    if hasattr(self, 'model_name') and 'cross-encoder' in self.model_name:
                        result = self.nli_model(
                            f"{premise} [SEP] {sentence}",
                            truncation=True,
                            max_length=512
                        )
                    else:
                        result = self.nli_model(
                            sentence,
                            premise,
                            truncation=True,
                            max_length=512
                        )
                    
                    # 解析结果
                    if isinstance(result, list) and len(result) > 0:
                        current_label = result[0]['label'].lower()
                        current_score = result[0]['score']
                        
                        # 优先级逻辑：Entailment > Contradiction > Neutral
                        # 如果找到 Entailment，立即停止查找（已验证）
                        if 'entailment' in current_label or 'entail' in current_label:
                            best_label = "entailment"
                            sentence_supported = True
                            break
                        
                        # 如果是 Contradiction，记录下来，但继续找（也许其他文档能解释）
                        if 'contradiction' in current_label or 'contradict' in current_label:
                            # 只有当目前是 Neutral 时才更新为 Contradiction
                            # 这样防止 Contradiction 覆盖了潜在的 Entailment (虽然上面break了，但这逻辑保持严谨)
                            if best_label == "neutral":
                                best_label = "contradiction"
                                best_score = current_score
                                
                    else:
                        continue
                        
                except Exception as e:
                    print(f"⚠️ NLI 子任务失败: {str(e)[:50]}")
                    continue
            
            # 统计该句子的最终判定
            if best_label == "entailment":
                entailment_count += 1
            elif best_label == "contradiction":
                contradiction_count += 1
                problematic_sentences.append(sentence)
            else: # neutral
                neutral_count += 1
                
        # 4. 综合评分
        total_sentences = contradiction_count + neutral_count + entailment_count
        
        has_hallucination = False
        if total_sentences > 0:
            contradiction_ratio = contradiction_count / total_sentences
            neutral_ratio = neutral_count / total_sentences
            # 阈值判断
            has_hallucination = (contradiction_ratio > 0.3) or (neutral_ratio > 0.8)
            
            # Debug 信息
            print(f"📊 NLI 检测结果: Entail={entailment_count}, Contra={contradiction_count}, Neutral={neutral_count}")
        
        return {
            "has_hallucination": has_hallucination,
            "contradiction_count": contradiction_count,
            "neutral_count": neutral_count,
            "entailment_count": entailment_count,
            "problematic_sentences": problematic_sentences
        }
    
    def grade(self, generation: str, documents) -> str:
        """
        兼容原有接口的检测方法
        
        Args:
            generation: LLM 生成的内容
            documents: 参考文档（可以是字符串或列表）
            
        Returns:
            "yes" 表示无幻觉，"no" 表示有幻觉
        """
        # 处理文档格式
        if isinstance(documents, list):
            doc_text = "\n\n".join([
                doc.page_content if hasattr(doc, 'page_content') else str(doc)
                for doc in documents
            ])
        else:
            doc_text = str(documents)
        
        # 检测幻觉
        result = self.detect(generation, doc_text)
        
        # 打印详细信息
        if result['has_hallucination']:
            print(f"⚠️ NLI 检测到幻觉")
            print(f"   矛盾句子: {result['contradiction_count']}")
            print(f"   中立句子: {result['neutral_count']}")
            print(f"   蕴含句子: {result['entailment_count']}")
            if result['problematic_sentences']:
                print(f"   问题句子: {result['problematic_sentences'][:2]}")
        else:
            print(f"✅ NLI 未检测到幻觉")
        
        # 返回兼容格式
        return "no" if result['has_hallucination'] else "yes"


class HybridHallucinationDetector:
    """
    混合幻觉检测器
    结合 Vectara 模型和 NLI 模型，提供最佳检测效果
    """
    
    def __init__(self, use_vectara: bool = True, use_nli: bool = True):
        """
        初始化混合检测器
        
        Args:
            use_vectara: 是否使用 Vectara 模型
            use_nli: 是否使用 NLI 模型
        """
        self.detectors = {}
        
        if use_vectara:
            try:
                self.detectors['vectara'] = VectaraHallucinationDetector()
            except Exception as e:
                print(f"⚠️ Vectara 检测器初始化失败: {e}")
        
        if use_nli:
            try:
                self.detectors['nli'] = NLIHallucinationDetector()
            except Exception as e:
                print(f"⚠️ NLI 检测器初始化失败: {e}")
        
        if not self.detectors:
            raise RuntimeError("❌ 所有检测器初始化失败！")
        
        print(f"✅ 混合检测器就绪，已加载: {list(self.detectors.keys())}")
    
    def detect(self, generation: str, documents: str) -> Dict:
        """
        综合检测幻觉
        
        Returns:
            {
                "has_hallucination": bool,
                "confidence": float,
                "vectara_result": Dict,
                "nli_result": Dict,
                "method_used": str
            }
        """
        results = {
            "has_hallucination": False,
            "confidence": 0.0,
            "method_used": ""
        }
        
        # 1. 优先使用 Vectara（最准确）
        if 'vectara' in self.detectors:
            vectara_result = self.detectors['vectara'].detect(generation, documents)
            results['vectara_result'] = vectara_result
            
            if vectara_result['hallucination_score'] > 0.3:  # 降低阈值以提高灵敏度
                results['has_hallucination'] = True
                results['confidence'] = vectara_result['hallucination_score']
                results['method_used'] = 'vectara'
                return results
            else:
                # Vectara 未检测到幻觉，设置 method_used
                results['method_used'] = 'vectara'
        
        # 2. 如果 Vectara 不确定或不可用，使用 NLI 二次确认
        if 'nli' in self.detectors:
            nli_result = self.detectors['nli'].detect(generation, documents)
            results['nli_result'] = nli_result
            
            if nli_result['has_hallucination']:
                results['has_hallucination'] = True
                # 计算置信度
                total_sentences = (nli_result['contradiction_count'] + 
                                 nli_result['neutral_count'] + 
                                 nli_result['entailment_count'])
                if total_sentences > 0:
                    results['confidence'] = (nli_result['contradiction_count'] + 
                                           nli_result['neutral_count'] * 0.5) / total_sentences
                results['method_used'] = 'nli'
            else:
                # 未检测到幻觉，也要设置 method_used
                if not results['method_used']:  # 只有当前面没有设置时
                    results['method_used'] = 'nli'
        
        # 如果两个模型都有结果，投票决定
        if 'vectara_result' in results and 'nli_result' in results:
            vectara_vote = results['vectara_result']['has_hallucination']
            nli_vote = results['nli_result']['has_hallucination']
            
            if vectara_vote and nli_vote:
                results['has_hallucination'] = True
                results['confidence'] = min(
                    results.get('vectara_result', {}).get('hallucination_score', 0.5),
                    results.get('confidence', 0.5)
                )
                results['method_used'] = 'vectara+nli'
        
        return results
    
    def grade(self, generation: str, documents) -> str:
        """
        兼容原有接口的检测方法
        
        Args:
            generation: LLM 生成的内容
            documents: 参考文档（可以是字符串或列表）
            
        Returns:
            "yes" 表示无幻觉，"no" 表示有幻觉
        """
        # 处理文档格式
        if isinstance(documents, list):
            doc_text = "\n\n".join([
                doc.page_content if hasattr(doc, 'page_content') else str(doc)
                for doc in documents
            ])
        else:
            doc_text = str(documents)
        
        # 检测幻觉
        result = self.detect(generation, doc_text)
        
        # 打印详细信息
        if result['has_hallucination']:
            print(f"⚠️ 检测到幻觉 (置信度: {result['confidence']:.2f}, 方法: {result['method_used']})")
            if 'nli_result' in result:
                print(f"   矛盾句子: {result['nli_result']['contradiction_count']}")
                if result['nli_result']['problematic_sentences']:
                    print(f"   问题句子: {result['nli_result']['problematic_sentences'][:2]}")
        else:
            print(f"✅ 未检测到幻觉 (方法: {result['method_used']})")
        
        # 返回兼容格式
        return "no" if result['has_hallucination'] else "yes"


def initialize_hallucination_detector(method: str = "nli") -> object:
    """
    初始化幻觉检测器
    
    Args:
        method: 'vectara', 'nli', 或 'hybrid' (推荐)
        
    Returns:
        幻觉检测器实例
    """
    if method == "vectara":
        return VectaraHallucinationDetector()
    elif method == "nli":
        return NLIHallucinationDetector()
    elif method == "hybrid":
        return HybridHallucinationDetector(use_vectara=False, use_nli=True)  # 禁用Vectara，使用NLI
    else:
        raise ValueError(f"未知的检测方法: {method}")