Spaces:

adi-123
/

Fact_Checker

Build error

App Files Files Community

adi-123 commited on Dec 7, 2025

Commit

a3075d5

verified ·

1 Parent(s): 45547cc

Create utils.py

Browse files

Files changed (1) hide show

utils.py +885 -0

utils.py ADDED Viewed

	@@ -0,0 +1,885 @@

+import os
+import logging
+from typing import List, Dict, Any
+from dotenv import load_dotenv
+from langchain.schema import Document as LangchainDocument
+from langchain_community.vectorstores import FAISS
+from langchain_together.chat_models import ChatTogether
+from langchain_together.embeddings import TogetherEmbeddings
+import spacy
+import pandas as pd
+import json
+import re
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('fact_checker.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+load_dotenv()
+logger.info("Environment variables loaded")
+# ---------- API Key Helper -------------------------------------------------
+def get_together_api_key() -> str:
+    """Get Together AI API key from environment variables."""
+    try:
+        key = os.getenv("TOGETHER_API_KEY")
+        if key:
+            logger.info("Together AI API key found")
+            return key
+        # If not found, raise error
+        error_msg = (
+            "TOGETHER_API_KEY not found. Please set it in one of these ways:\n"
+            "1. Create a .env file with: TOGETHER_API_KEY=your_key_here\n"
+            "2. Set environment variable: export TOGETHER_API_KEY=your_key_here"
+        )
+        logger.error(error_msg)
+        raise EnvironmentError(error_msg)
+    except Exception as e:
+        logger.exception("Error retrieving Together AI API key")
+        raise
+# ========================================================================
+# FACT-CHECKING SYSTEM COMPONENTS (OOP Architecture)
+# ========================================================================
+class ClaimExtractor:
+    """
+    Handles claim and entity extraction using NLP (spaCy).
+    Follows Single Responsibility Principle.
+    """
+    # Supported entity types for extraction
+    ENTITY_TYPES = ['ORG', 'GPE', 'PERSON', 'DATE', 'EVENT', 'MONEY',
+                    'PERCENT', 'LAW', 'PRODUCT']
+    def __init__(self, model_name: str = "en_core_web_sm"):
+        """
+        Initialize the ClaimExtractor with a spaCy model.
+        Args:
+            model_name: Name of the spaCy model to use
+        """
+        self.model_name = model_name
+        self._nlp = None
+    @property
+    def nlp(self):
+        """Lazy load spaCy model to avoid startup overhead."""
+        if self._nlp is None:
+            try:
+                logger.info(f"Loading spaCy model: {self.model_name}")
+                self._nlp = spacy.load(self.model_name)
+                logger.info(f"Successfully loaded spaCy model: {self.model_name}")
+            except OSError as e:
+                logger.error(f"spaCy model '{self.model_name}' not found")
+                raise RuntimeError(
+                    f"spaCy model '{self.model_name}' not found. "
+                    f"Please install it with: python -m spacy download {self.model_name}"
+                )
+            except Exception as e:
+                logger.exception(f"Unexpected error loading spaCy model: {self.model_name}")
+                raise
+        return self._nlp
+    def extract_entities(self, doc) -> List[Dict[str, Any]]:
+        """
+        Extract named entities from a spaCy document.
+        Args:
+            doc: spaCy document object
+        Returns:
+            List of entity dictionaries with text, type, and position
+        """
+        try:
+            entities = []
+            for ent in doc.ents:
+                if ent.label_ in self.ENTITY_TYPES:
+                    entities.append({
+                        'text': ent.text,
+                        'type': ent.label_,
+                        'start': ent.start_char,
+                        'end': ent.end_char
+                    })
+            logger.debug(f"Extracted {len(entities)} entities")
+            return entities
+        except Exception as e:
+            logger.exception("Error extracting entities")
+            return []
+    def extract_claims(self, text: str, min_length: int = 10) -> List[Dict[str, Any]]:
+        """
+        Extract key claims and named entities from input text.
+        Args:
+            text: Input text (e.g., news post, social media statement)
+            min_length: Minimum length for a sentence to be considered a claim
+        Returns:
+            List of claim dictionaries with 'text', 'type', and 'entities'
+        """
+        try:
+            logger.info(f"Extracting claims from text ({len(text)} chars)")
+            doc = self.nlp(text)
+            entities = self.extract_entities(doc)
+            # Extract sentences as potential claims
+            claims = []
+            for sent in doc.sents:
+                sent_text = sent.text.strip()
+                if len(sent_text) >= min_length:
+                    # Find entities in this sentence
+                    sent_entities = [
+                        e for e in entities
+                        if e['start'] >= sent.start_char and e['end'] <= sent.end_char
+                    ]
+                    claims.append({
+                        'text': sent_text,
+                        'type': 'statement',
+                        'entities': sent_entities
+                    })
+            # If no claims extracted, treat entire text as one claim
+            if not claims:
+                logger.debug("No sentences found, using entire text as claim")
+                claims.append({
+                    'text': text.strip(),
+                    'type': 'statement',
+                    'entities': entities
+                })
+            logger.info(f"Extracted {len(claims)} claim(s)")
+            return claims
+        except Exception as e:
+            logger.exception("Error extracting claims")
+            # Return fallback claim
+            return [{
+                'text': text.strip(),
+                'type': 'statement',
+                'entities': []
+            }]
+class FactsDatabase:
+    """
+    Manages the verified facts database and vector store.
+    Handles loading, embedding, and persistence.
+    """
+    DEFAULT_CSV_PATH = "verified_facts_db.csv"
+    DEFAULT_INDEX_PATH = "faiss_index_facts"
+    EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
+    def __init__(self, api_key: str = None):
+        """
+        Initialize the FactsDatabase.
+        Args:
+            api_key: Together AI API key (optional, can use get_together_api_key)
+        """
+        logger.info("Initializing FactsDatabase")
+        self.api_key = api_key or get_together_api_key()
+        try:
+            self.embeddings = TogetherEmbeddings(
+                model=self.EMBEDDING_MODEL,
+                api_key=self.api_key
+            )
+            logger.info(f"Embeddings initialized with model: {self.EMBEDDING_MODEL}")
+            # Initialize ClaimExtractor for entity extraction from facts
+            self.claim_extractor = ClaimExtractor()
+            logger.info("ClaimExtractor initialized for database entity extraction")
+        except Exception as e:
+            logger.exception("Error initializing embeddings")
+            raise
+    def load_from_csv(
+        self,
+        csv_path: str = None,
+        index_path: str = None
+    ) -> str:
+        """
+        Load verified facts from CSV and create FAISS vector store.
+        Args:
+            csv_path: Path to verified facts CSV file
+            index_path: Path to save FAISS index
+        Returns:
+            Status message with count of loaded facts
+        """
+        csv_path = csv_path or self.DEFAULT_CSV_PATH
+        index_path = index_path or self.DEFAULT_INDEX_PATH
+        try:
+            logger.info(f"Loading facts from CSV: {csv_path}")
+            # Read verified facts
+            df = pd.read_csv(csv_path)
+            logger.info(f"Loaded {len(df)} rows from CSV")
+            # Handle different CSV formats
+            if 'fact_text' in df.columns:
+                fact_column = 'fact_text'
+                logger.debug("Using 'fact_text' column")
+            elif 'fact' in df.columns:
+                fact_column = 'fact'
+                logger.debug("Using 'fact' column")
+            else:
+                error_msg = "CSV must contain a 'fact' or 'fact_text' column"
+                logger.error(error_msg)
+                raise ValueError(error_msg)
+            # Create documents with metadata
+            logger.info("Creating documents with metadata")
+            documents = self._create_documents(df, fact_column)
+            logger.info(f"Created {len(documents)} documents")
+            # Create FAISS index
+            logger.info("Creating FAISS vector index...")
+            vector_store = FAISS.from_documents(documents, self.embeddings)
+            logger.info("FAISS index created successfully")
+            # Save to disk
+            logger.info(f"Saving FAISS index to: {index_path}")
+            vector_store.save_local(index_path)
+            logger.info("FAISS index saved successfully")
+            return f"✅ Successfully loaded {len(documents)} verified facts into vector store"
+        except FileNotFoundError:
+            raise FileNotFoundError(f"Verified facts CSV not found at: {csv_path}")
+        except Exception as e:
+            raise RuntimeError(f"Error loading verified facts: {str(e)}")
+    def _create_documents(
+        self,
+        df: pd.DataFrame,
+        fact_column: str
+    ) -> List[LangchainDocument]:
+        """
+        Create LangChain documents from DataFrame with entity extraction.
+        Args:
+            df: Pandas DataFrame with facts
+            fact_column: Name of the column containing fact text
+        Returns:
+            List of LangChain documents with metadata including extracted entities
+        """
+        try:
+            documents = []
+            multi_sentence_count = 0
+            pronoun_count = 0
+            for idx, row in df.iterrows():
+                fact_text = row[fact_column]
+                # Extract fact_id if available
+                if 'fact_id' in df.columns:
+                    fact_id = row['fact_id']
+                else:
+                    fact_id = f"F{idx:03d}"
+                # DATA VALIDATION: Check for multi-sentence facts
+                sentences = fact_text.split('.')
+                if len([s for s in sentences if s.strip()]) > 1:
+                    multi_sentence_count += 1
+                    logger.warning(
+                        f"Fact {fact_id} contains multiple sentences ({len(sentences)} sentences). "
+                        f"Consider splitting for better retrieval: {fact_text[:80]}..."
+                    )
+                # DATA VALIDATION: Check for unresolved pronouns
+                pronouns = ['he ', 'she ', 'it ', 'they ', 'them ', 'his ', 'her ', 'their ']
+                if any(pronoun in fact_text.lower() for pronoun in pronouns):
+                    pronoun_count += 1
+                    logger.warning(
+                        f"Fact {fact_id} contains pronouns - may cause coreference issues: {fact_text[:80]}..."
+                    )
+                # ENTITY EXTRACTION: Extract entities from fact text
+                entities = []
+                entities_dict = {}
+                try:
+                    claims = self.claim_extractor.extract_claims(fact_text)
+                    if claims and len(claims) > 0:
+                        entities = claims[0].get('entities', [])
+                        # Convert entities list to dict for easier access
+                        entities_dict = {
+                            'organizations': [e['text'] for e in entities if e['type'] in ['ORG', 'ORGANIZATION']],
+                            'locations': [e['text'] for e in entities if e['type'] in ['GPE', 'LOC', 'LOCATION']],
+                            'persons': [e['text'] for e in entities if e['type'] in ['PERSON', 'PER']],
+                            'dates': [e['text'] for e in entities if e['type'] == 'DATE'],
+                            'percentages': [e['text'] for e in entities if e['type'] in ['PERCENT', 'PERCENTAGE']],
+                            'money': [e['text'] for e in entities if e['type'] in ['MONEY', 'CURRENCY']],
+                            'all_entities': [e['text'] for e in entities]
+                        }
+                        logger.debug(f"Fact {fact_id}: Extracted {len(entities)} entities")
+                except Exception as e:
+                    logger.warning(f"Failed to extract entities from fact {fact_id}: {str(e)}")
+                # Create metadata with entities
+                metadata = {
+                    'source': row.get('source', 'Verified Database'),
+                    'date': row.get('date', 'N/A'),
+                    'category': row.get('category', 'General'),
+                    'fact_id': fact_id,
+                    'entities': entities,  # Full entity list with types
+                    'entities_dict': entities_dict  # Organized by type for easy filtering
+                }
+                # Create LangChain document with metadata
+                doc = LangchainDocument(
+                    page_content=fact_text,
+                    metadata=metadata
+                )
+                documents.append(doc)
+            # Summary logging
+            logger.info(f"Created {len(documents)} documents from DataFrame")
+            if multi_sentence_count > 0:
+                logger.warning(
+                    f"⚠️  {multi_sentence_count}/{len(documents)} facts contain multiple sentences. "
+                    f"Consider atomic splitting for better granularity."
+                )
+            if pronoun_count > 0:
+                logger.warning(
+                    f"⚠️  {pronoun_count}/{len(documents)} facts contain pronouns. "
+                    f"Consider coreference resolution."
+                )
+            # Log entity extraction statistics
+            total_entities = sum(len(doc.metadata.get('entities', [])) for doc in documents)
+            avg_entities = total_entities / len(documents) if documents else 0
+            logger.info(
+                f"Entity extraction complete: {total_entities} total entities "
+                f"({avg_entities:.1f} avg per fact)"
+            )
+            return documents
+        except Exception as e:
+            logger.exception("Error creating documents from DataFrame")
+            raise
+class FactRetriever:
+    """
+    Retrieves similar facts from the vector store using semantic search.
+    Implements retrieval strategies and similarity scoring.
+    """
+    DEFAULT_INDEX_PATH = "faiss_index_facts"
+    EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
+    def __init__(self, api_key: str = None, index_path: str = None):
+        """
+        Initialize the FactRetriever.
+        Args:
+            api_key: Together AI API key
+            index_path: Path to FAISS index
+        """
+        self.api_key = api_key or get_together_api_key()
+        self.index_path = index_path or self.DEFAULT_INDEX_PATH
+        logger.info(f"Initializing FactRetriever with index path: {self.index_path}")
+        try:
+            self.embeddings = TogetherEmbeddings(
+                model=self.EMBEDDING_MODEL,
+                api_key=self.api_key
+            )
+            logger.info(f"Embeddings model initialized: {self.EMBEDDING_MODEL}")
+        except Exception as e:
+            logger.exception("Error initializing embeddings model")
+            raise
+        self._vector_store = None
+    @property
+    def vector_store(self):
+        """Lazy load vector store to avoid unnecessary I/O."""
+        if self._vector_store is None:
+            try:
+                logger.info(f"Loading FAISS index from: {self.index_path}")
+                self._vector_store = FAISS.load_local(
+                    self.index_path,
+                    self.embeddings,
+                    allow_dangerous_deserialization=True
+                )
+                logger.info("FAISS index loaded successfully")
+            except FileNotFoundError:
+                error_msg = f"FAISS index not found at: {self.index_path}. Please initialize the database first."
+                logger.error(error_msg)
+                raise FileNotFoundError(error_msg)
+            except Exception as e:
+                logger.exception("Error loading FAISS index")
+                raise RuntimeError(f"Error loading FAISS index: {str(e)}")
+        return self._vector_store
+    def retrieve(
+        self,
+        claim: str,
+        top_k: int = 3,
+        similarity_threshold: float = 0.0
+    ) -> List[Dict[str, Any]]:
+        """
+        Retrieve most similar verified facts for a given claim.
+        Args:
+            claim: The claim text to verify
+            top_k: Number of similar facts to retrieve
+            similarity_threshold: Minimum similarity score (0-1)
+        Returns:
+            List of dictionaries with 'fact', 'metadata', and 'similarity'
+        """
+        try:
+            logger.info(f"Retrieving top-{top_k} facts for claim: {claim[:100]}...")
+            # Perform similarity search with scores
+            docs_with_scores = self.vector_store.similarity_search_with_score(
+                claim, k=top_k
+            )
+            logger.debug(f"Retrieved {len(docs_with_scores)} documents from FAISS")
+            # Format and filter results
+            similar_facts = []
+            for doc, score in docs_with_scores:
+                # FAISS returns distance, convert to similarity
+                similarity = self._normalize_similarity(score)
+                if similarity >= similarity_threshold:
+                    similar_facts.append({
+                        'fact': doc.page_content,
+                        'metadata': doc.metadata,
+                        'similarity': round(similarity, 3)
+                    })
+                    logger.debug(f"Fact similarity: {similarity:.3f} - {doc.page_content[:50]}...")
+            logger.info(f"Filtered to {len(similar_facts)} facts above threshold {similarity_threshold}")
+            return similar_facts
+        except Exception as e:
+            logger.exception("Error retrieving similar facts")
+            raise RuntimeError(f"Error retrieving similar facts: {str(e)}")
+    @staticmethod
+    def _normalize_similarity(distance: float) -> float:
+        """
+        Convert FAISS distance to similarity score (0-1 range).
+        Args:
+            distance: FAISS distance score (lower = more similar)
+        Returns:
+            Normalized similarity score
+        """
+        return 1 / (1 + distance)
+class ClaimClassifier:
+    """
+    Uses LLM to classify claims as True/False/Unverifiable.
+    Handles prompt engineering and response parsing.
+    """
+    LLM_MODEL = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
+    TEMPERATURE = 0.3
+    # Verdict constants
+    VERDICT_TRUE = "Likely True"
+    VERDICT_FALSE = "Likely False"
+    VERDICT_UNVERIFIABLE = "Unverifiable"
+    def __init__(self, api_key: str = None):
+        """
+        Initialize the ClaimClassifier.
+        Args:
+            api_key: Together AI API key
+        """
+        self.api_key = api_key or get_together_api_key()
+        logger.info(f"Initializing ClaimClassifier with model: {self.LLM_MODEL}")
+        try:
+            self.llm = ChatTogether(
+                model=self.LLM_MODEL,
+                temperature=self.TEMPERATURE,
+                api_key=self.api_key
+            )
+            logger.info(f"LLM initialized successfully (temperature={self.TEMPERATURE})")
+        except Exception as e:
+            logger.exception("Error initializing LLM")
+            raise
+    def classify(
+        self,
+        claim: str,
+        retrieved_facts: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """
+        Classify a claim against retrieved facts using LLM.
+        Args:
+            claim: The original claim to verify
+            retrieved_facts: List of similar facts with metadata
+        Returns:
+            Dictionary with 'verdict', 'confidence', 'reasoning', 'evidence_used'
+        """
+        logger.info(f"Classifying claim with {len(retrieved_facts)} retrieved facts")
+        # Build prompt with evidence
+        prompt = self._build_prompt(claim, retrieved_facts)
+        logger.debug(f"Built prompt with {len(prompt)} characters")
+        try:
+            # Get LLM response
+            logger.info("Invoking LLM for claim classification")
+            response = self.llm.invoke([{"role": "user", "content": prompt}])
+            response_text = response.content.strip()
+            logger.debug(f"LLM response received ({len(response_text)} chars)")
+            # Parse JSON response
+            result = self._parse_response(response_text)
+            logger.info(f"Classification result: {result['verdict']} (confidence: {result['confidence']})")
+            # Add retrieved facts as evidence details
+            result['evidence_details'] = retrieved_facts
+            return result
+        except json.JSONDecodeError as e:
+            logger.error(f"JSON parsing failed: {str(e)}")
+            return self._fallback_response(retrieved_facts, "JSON parsing failed")
+        except Exception as e:
+            logger.exception("Error during claim classification")
+            return self._fallback_response(retrieved_facts, str(e))
+    def _build_prompt(
+        self,
+        claim: str,
+        retrieved_facts: List[Dict[str, Any]]
+    ) -> str:
+        """
+        Build the classification prompt for the LLM.
+        Args:
+            claim: The claim to verify
+            retrieved_facts: Retrieved evidence
+        Returns:
+            Formatted prompt string
+        """
+        # Format evidence
+        evidence_text = self._format_evidence(retrieved_facts)
+        # Construct prompt
+        prompt = f"""You are a fact-checking assistant. Your task is to verify the following claim against verified evidence.
+CLAIM TO VERIFY:
+"{claim}"
+VERIFIED EVIDENCE FROM DATABASE:
+{evidence_text}
+INSTRUCTIONS:
+1. Compare the claim against the verified evidence carefully
+2. Classify the claim as one of:
+   - "{self.VERDICT_TRUE}" - if evidence strongly supports the claim
+   - "{self.VERDICT_FALSE}" - if evidence contradicts the claim
+   - "{self.VERDICT_UNVERIFIABLE}" - if insufficient or conflicting evidence
+3. Provide your analysis in EXACTLY this JSON format (no additional text):
+{{
+  "verdict": "{self.VERDICT_TRUE}" | "{self.VERDICT_FALSE}" | "{self.VERDICT_UNVERIFIABLE}",
+  "confidence": "high" | "medium" | "low",
+  "reasoning": "Explain your decision in 2-3 sentences",
+  "evidence_used": ["fact 1", "fact 2"]
+}}
+IMPORTANT:
+- Be objective and base your verdict only on the evidence provided
+- If the evidence is vague or irrelevant, mark as "{self.VERDICT_UNVERIFIABLE}"
+- Consider dates, entities, and specific details when comparing
+- Return ONLY the JSON object, no other text
+YOUR RESPONSE:"""
+        return prompt
+    def _format_evidence(self, retrieved_facts: List[Dict[str, Any]]) -> str:
+        """
+        Format retrieved facts for the prompt.
+        Args:
+            retrieved_facts: List of facts with metadata
+        Returns:
+            Formatted evidence string
+        """
+        if not retrieved_facts:
+            return "No similar verified facts found in the database."
+        evidence_lines = []
+        for i, fact in enumerate(retrieved_facts, 1):
+            lines = [
+                f"Evidence {i}:",
+                f"{fact['fact']}",
+                f"Source: {fact['metadata'].get('source', 'Unknown')}",
+                f"Date: {fact['metadata'].get('date', 'Unknown')}",
+                f"Similarity: {fact['similarity']:.2f}"
+            ]
+            evidence_lines.append("\n".join(lines))
+        return "\n\n".join(evidence_lines)
+    def _parse_response(self, response_text: str) -> Dict[str, Any]:
+        """
+        Parse LLM JSON response.
+        Args:
+            response_text: Raw LLM response
+        Returns:
+            Parsed result dictionary
+        """
+        try:
+            # Try to extract JSON if LLM added extra text
+            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+            if json_match:
+                response_text = json_match.group(0)
+                logger.debug("Extracted JSON from LLM response")
+            result = json.loads(response_text)
+            logger.debug("Successfully parsed JSON response")
+            # Validate required fields
+            required_fields = ['verdict', 'confidence', 'reasoning', 'evidence_used']
+            missing_fields = [field for field in required_fields if field not in result]
+            if missing_fields:
+                logger.warning(f"Missing fields in LLM response: {missing_fields}")
+                for field in missing_fields:
+                    result[field] = "Unknown" if field != 'evidence_used' else []
+            return result
+        except Exception as e:
+            logger.exception("Error parsing LLM response")
+            raise
+    def _fallback_response(
+        self,
+        retrieved_facts: List[Dict[str, Any]],
+        error_msg: str
+    ) -> Dict[str, Any]:
+        """
+        Create fallback response on error.
+        Args:
+            retrieved_facts: Retrieved evidence
+            error_msg: Error message
+        Returns:
+            Fallback response dictionary
+        """
+        logger.warning(f"Creating fallback response due to: {error_msg}")
+        return {
+            'verdict': self.VERDICT_UNVERIFIABLE,
+            'confidence': 'low',
+            'reasoning': f'Error during fact-checking: {error_msg}',
+            'evidence_used': [],
+            'evidence_details': retrieved_facts,
+            'error': error_msg
+        }
+class FactChecker:
+    """
+    Main orchestrator for the fact-checking pipeline.
+    Coordinates ClaimExtractor, FactRetriever, and ClaimClassifier.
+    Follows Facade pattern to provide simple interface.
+    """
+    def __init__(self, api_key: str = None):
+        """
+        Initialize the FactChecker with all required components.
+        Args:
+            api_key: Together AI API key
+        """
+        logger.info("Initializing FactChecker pipeline")
+        self.api_key = api_key or get_together_api_key()
+        try:
+            # Initialize components (Dependency Injection)
+            logger.debug("Initializing ClaimExtractor")
+            self.claim_extractor = ClaimExtractor()
+            logger.debug("Initializing FactRetriever")
+            self.fact_retriever = FactRetriever(api_key=self.api_key)
+            logger.debug("Initializing ClaimClassifier")
+            self.claim_classifier = ClaimClassifier(api_key=self.api_key)
+            logger.info("FactChecker initialization complete")
+        except Exception as e:
+            logger.exception("Error initializing FactChecker")
+            raise
+    def check_claim(self, user_claim: str, top_k: int = 3) -> Dict[str, Any]:
+        """
+        Main fact-checking pipeline that orchestrates the entire process.
+        Args:
+            user_claim: User's input claim/statement to verify
+            top_k: Number of similar facts to retrieve
+        Returns:
+            Complete fact-check result with verdict, evidence, and reasoning
+        """
+        logger.info("=" * 60)
+        logger.info(f"Starting fact-check pipeline for claim: {user_claim[:100]}...")
+        logger.info("=" * 60)
+        try:
+            # Step 1: Extract claims from input
+            logger.info("Step 1: Extracting claims from input")
+            claims = self.claim_extractor.extract_claims(user_claim)
+            # For simplicity, fact-check the first/main claim
+            main_claim = claims[0]['text'] if claims else user_claim
+            logger.info(f"Main claim identified: {main_claim[:100]}...")
+            # Step 2: Retrieve similar facts
+            logger.info(f"Step 2: Retrieving top-{top_k} similar facts")
+            similar_facts = self.fact_retriever.retrieve(main_claim, top_k=top_k)
+            logger.info(f"Retrieved {len(similar_facts)} similar facts")
+            # Step 3: Classify using LLM
+            logger.info("Step 3: Classifying claim using LLM")
+            result = self.claim_classifier.classify(main_claim, similar_facts)
+            # Step 4: Add metadata
+            logger.info("Step 4: Adding metadata to result")
+            result['original_input'] = user_claim
+            result['extracted_claim'] = main_claim
+            result['entities_found'] = claims[0].get('entities', []) if claims else []
+            result['total_claims_extracted'] = len(claims)
+            logger.info(f"Fact-check complete: {result['verdict']}")
+            logger.info("=" * 60)
+            return result
+        except Exception as e:
+            logger.exception("Error in fact-checking pipeline")
+            logger.info("=" * 60)
+            return self._error_response(user_claim, str(e))
+    def _error_response(self, user_claim: str, error_msg: str) -> Dict[str, Any]:
+        """
+        Create error response when pipeline fails.
+        Args:
+            user_claim: Original user claim
+            error_msg: Error message
+        Returns:
+            Error response dictionary
+        """
+        logger.error(f"Creating error response for claim: {error_msg}")
+        return {
+            'verdict': 'Unverifiable',
+            'confidence': 'low',
+            'reasoning': f'Error during fact-checking pipeline: {error_msg}',
+            'evidence_used': [],
+            'evidence_details': [],
+            'original_input': user_claim,
+            'extracted_claim': user_claim,
+            'entities_found': [],
+            'error': error_msg
+        }
+# ========================================================================
+# LEGACY FUNCTION WRAPPERS (for backward compatibility)
+# ========================================================================
+def load_verified_facts(csv_path: str = "verified_facts_db.csv") -> str:
+    """
+    Legacy wrapper for backward compatibility.
+    Uses FactsDatabase class internally.
+    Args:
+        csv_path: Path to verified facts CSV file
+    Returns:
+        Status message
+    """
+    db = FactsDatabase()
+    return db.load_from_csv(csv_path)
+def retrieve_similar_facts(
+    claim: str,
+    top_k: int = 3,
+    similarity_threshold: float = 0.0
+) -> List[Dict[str, Any]]:
+    """
+    Legacy wrapper for backward compatibility.
+    Uses FactRetriever class internally.
+    Args:
+        claim: The claim text to verify
+        top_k: Number of similar facts to retrieve
+        similarity_threshold: Minimum similarity score (0-1)
+    Returns:
+        List of dictionaries with 'fact', 'metadata', and 'similarity'
+    """
+    retriever = FactRetriever()
+    return retriever.retrieve(claim, top_k, similarity_threshold)
+def classify_claim(claim: str, retrieved_facts: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Legacy wrapper for backward compatibility.
+    Uses ClaimClassifier class internally.
+    Args:
+        claim: The original claim to verify
+        retrieved_facts: List of similar facts with metadata
+    Returns:
+        Dictionary with 'verdict', 'confidence', 'reasoning', 'evidence_used'
+    """
+    classifier = ClaimClassifier()
+    return classifier.classify(claim, retrieved_facts)
+def fact_check_claim(user_claim: str, top_k: int = 3) -> Dict[str, Any]:
+    """
+    Legacy wrapper for backward compatibility.
+    Uses FactChecker class internally.
+    Args:
+        user_claim: User's input claim/statement to verify
+        top_k: Number of similar facts to retrieve
+    Returns:
+        Complete fact-check result with verdict, evidence, and reasoning
+    """
+    checker = FactChecker()
+    return checker.check_claim(user_claim, top_k)