""" G0 Hallucination Detector - Hugging Face Space Detects when LLMs make things up using 3-criterion grounding analysis. """ import gradio as gr import numpy as np from sentence_transformers import SentenceTransformer from typing import Optional import time # Load model once at startup print("Loading embedding model...") model = SentenceTransformer('all-MiniLM-L6-v2') print("Model loaded.") def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: """Compute cosine similarity between two vectors.""" return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-8)) def compute_tracking(claim_emb: np.ndarray, source_embs: list[np.ndarray]) -> float: """ TRACKING: Does the claim follow from the sources? High similarity = claim tracks the source content. """ if not source_embs: return 0.0 similarities = [cosine_similarity(claim_emb, src) for src in source_embs] return float(max(similarities)) def compute_intervention(claim: str, sources: list[str]) -> float: """ INTERVENTION: Would changing sources change the claim? Approximated by checking keyword overlap. """ claim_words = set(claim.lower().split()) source_words = set() for src in sources: source_words.update(src.lower().split()) if not claim_words: return 0.0 overlap = len(claim_words & source_words) / len(claim_words) return overlap def compute_counterfactual(claim_emb: np.ndarray, source_embs: list[np.ndarray]) -> float: """ COUNTERFACTUAL: In worlds without this source, would the claim still hold? Approximated by checking how unique the grounding is. """ if len(source_embs) < 2: return compute_tracking(claim_emb, source_embs) similarities = [cosine_similarity(claim_emb, src) for src in source_embs] max_sim = max(similarities) second_max = sorted(similarities)[-2] if len(similarities) > 1 else 0 # If only one source grounds it well, counterfactual dependence is high return max_sim * (1 - second_max + 0.1) def detect_hallucination(claim: str, sources: str) -> dict: """ Main detection function. G0 = (TRACKING × INTERVENTION × COUNTERFACTUAL)^(1/3) Returns grounding score where: - 1.0 = fully grounded (not a hallucination) - 0.0 = completely ungrounded (hallucination) """ start = time.time() # Parse sources (one per line) source_list = [s.strip() for s in sources.strip().split('\n') if s.strip()] if not source_list: return { "g0_score": 0.0, "verdict": "HALLUCINATION (no sources provided)", "tracking": 0.0, "intervention": 0.0, "counterfactual": 0.0, "latency_ms": round((time.time() - start) * 1000, 1) } # Compute embeddings claim_emb = model.encode(claim, convert_to_numpy=True) source_embs = [model.encode(src, convert_to_numpy=True) for src in source_list] # Compute three criteria tracking = compute_tracking(claim_emb, source_embs) intervention = compute_intervention(claim, source_list) counterfactual = compute_counterfactual(claim_emb, source_embs) # G0 = geometric mean of three criteria g0 = (tracking * intervention * counterfactual) ** (1/3) # Determine verdict if g0 >= 0.7: verdict = "GROUNDED - Claim is well-supported by sources" elif g0 >= 0.4: verdict = "PARTIAL - Claim has some support but may contain unsupported elements" else: verdict = "HALLUCINATION - Claim is not supported by provided sources" latency = round((time.time() - start) * 1000, 1) return { "g0_score": round(g0, 3), "verdict": verdict, "tracking": round(tracking, 3), "intervention": round(intervention, 3), "counterfactual": round(counterfactual, 3), "latency_ms": latency } def format_output(result: dict) -> str: """Format result for display.""" return f"""## Result **G0 Score:** {result['g0_score']} (0 = hallucination, 1 = grounded) **Verdict:** {result['verdict']} ### Component Scores - **Tracking:** {result['tracking']} - Does the claim follow from sources? - **Intervention:** {result['intervention']} - Would changing sources change the claim? - **Counterfactual:** {result['counterfactual']} - Is the claim uniquely grounded? *Latency: {result['latency_ms']}ms* """ def run_detection(claim: str, sources: str) -> str: """Gradio wrapper.""" if not claim.strip(): return "Please enter a claim to check." if not sources.strip(): return "Please enter at least one source (one per line)." result = detect_hallucination(claim, sources) return format_output(result) # Example inputs examples = [ [ "The Eiffel Tower was built in 1889 and is located in Paris, France.", "The Eiffel Tower is a wrought-iron lattice tower in Paris, France.\nIt was constructed from 1887 to 1889 as the entrance arch for the 1889 World's Fair." ], [ "The Great Wall of China is visible from space with the naked eye.", "The Great Wall of China is a series of fortifications built along the historical northern borders of China.\nContrary to popular belief, it is not visible from space with the naked eye under normal conditions." ], [ "Python was created by Guido van Rossum in 1991.", "Python is a high-level programming language.\nIt was created by Guido van Rossum and first released in 1991." ], [ "Einstein invented the lightbulb.", "Albert Einstein was a theoretical physicist who developed the theory of relativity.\nThomas Edison is credited with inventing the practical incandescent lightbulb in 1879." ] ] # Build Gradio interface with gr.Blocks(title="G0 Hallucination Detector", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # G0 Hallucination Detector Detect when LLMs make things up. Enter a claim and the sources it should be grounded in. **G0 Score:** Geometric mean of three criteria: - **Tracking:** Does the claim follow from the sources? - **Intervention:** Would changing sources change the claim? - **Counterfactual:** In worlds without these sources, would the claim still hold? Score ranges: 0.0 (hallucination) → 1.0 (fully grounded) """) with gr.Row(): with gr.Column(): claim_input = gr.Textbox( label="Claim to verify", placeholder="Enter the claim you want to check...", lines=2 ) sources_input = gr.Textbox( label="Sources (one per line)", placeholder="Enter source texts, one per line...", lines=5 ) submit_btn = gr.Button("Detect Hallucination", variant="primary") with gr.Column(): output = gr.Markdown(label="Result") gr.Examples( examples=examples, inputs=[claim_input, sources_input], label="Try these examples" ) submit_btn.click( fn=run_detection, inputs=[claim_input, sources_input], outputs=output ) gr.Markdown(""" --- **Free to use.** If this helps you, consider supporting: **[Cash App $ryancreating](https://cash.app/$ryancreating)** --- **How it works:** Uses sentence embeddings to measure semantic similarity between claims and sources, then computes a 3-criterion grounding metric. Built by Crystalline Labs """) if __name__ == "__main__": demo.launch()