Spaces:
Running
Running
| """ | |
| G0 Hallucination Detector - Hugging Face Space | |
| Detects when LLMs make things up using 3-criterion grounding analysis. | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| from typing import Optional | |
| import time | |
| # Load model once at startup | |
| print("Loading embedding model...") | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| print("Model loaded.") | |
| def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: | |
| """Compute cosine similarity between two vectors.""" | |
| return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-8)) | |
| def compute_tracking(claim_emb: np.ndarray, source_embs: list[np.ndarray]) -> float: | |
| """ | |
| TRACKING: Does the claim follow from the sources? | |
| High similarity = claim tracks the source content. | |
| """ | |
| if not source_embs: | |
| return 0.0 | |
| similarities = [cosine_similarity(claim_emb, src) for src in source_embs] | |
| return float(max(similarities)) | |
| def compute_intervention(claim: str, sources: list[str]) -> float: | |
| """ | |
| INTERVENTION: Would changing sources change the claim? | |
| Approximated by checking keyword overlap. | |
| """ | |
| claim_words = set(claim.lower().split()) | |
| source_words = set() | |
| for src in sources: | |
| source_words.update(src.lower().split()) | |
| if not claim_words: | |
| return 0.0 | |
| overlap = len(claim_words & source_words) / len(claim_words) | |
| return overlap | |
| def compute_counterfactual(claim_emb: np.ndarray, source_embs: list[np.ndarray]) -> float: | |
| """ | |
| COUNTERFACTUAL: In worlds without this source, would the claim still hold? | |
| Approximated by checking how unique the grounding is. | |
| """ | |
| if len(source_embs) < 2: | |
| return compute_tracking(claim_emb, source_embs) | |
| similarities = [cosine_similarity(claim_emb, src) for src in source_embs] | |
| max_sim = max(similarities) | |
| second_max = sorted(similarities)[-2] if len(similarities) > 1 else 0 | |
| # If only one source grounds it well, counterfactual dependence is high | |
| return max_sim * (1 - second_max + 0.1) | |
| def detect_hallucination(claim: str, sources: str) -> dict: | |
| """ | |
| Main detection function. | |
| G0 = (TRACKING × INTERVENTION × COUNTERFACTUAL)^(1/3) | |
| Returns grounding score where: | |
| - 1.0 = fully grounded (not a hallucination) | |
| - 0.0 = completely ungrounded (hallucination) | |
| """ | |
| start = time.time() | |
| # Parse sources (one per line) | |
| source_list = [s.strip() for s in sources.strip().split('\n') if s.strip()] | |
| if not source_list: | |
| return { | |
| "g0_score": 0.0, | |
| "verdict": "HALLUCINATION (no sources provided)", | |
| "tracking": 0.0, | |
| "intervention": 0.0, | |
| "counterfactual": 0.0, | |
| "latency_ms": round((time.time() - start) * 1000, 1) | |
| } | |
| # Compute embeddings | |
| claim_emb = model.encode(claim, convert_to_numpy=True) | |
| source_embs = [model.encode(src, convert_to_numpy=True) for src in source_list] | |
| # Compute three criteria | |
| tracking = compute_tracking(claim_emb, source_embs) | |
| intervention = compute_intervention(claim, source_list) | |
| counterfactual = compute_counterfactual(claim_emb, source_embs) | |
| # G0 = geometric mean of three criteria | |
| g0 = (tracking * intervention * counterfactual) ** (1/3) | |
| # Determine verdict | |
| if g0 >= 0.7: | |
| verdict = "GROUNDED - Claim is well-supported by sources" | |
| elif g0 >= 0.4: | |
| verdict = "PARTIAL - Claim has some support but may contain unsupported elements" | |
| else: | |
| verdict = "HALLUCINATION - Claim is not supported by provided sources" | |
| latency = round((time.time() - start) * 1000, 1) | |
| return { | |
| "g0_score": round(g0, 3), | |
| "verdict": verdict, | |
| "tracking": round(tracking, 3), | |
| "intervention": round(intervention, 3), | |
| "counterfactual": round(counterfactual, 3), | |
| "latency_ms": latency | |
| } | |
| def format_output(result: dict) -> str: | |
| """Format result for display.""" | |
| return f"""## Result | |
| **G0 Score:** {result['g0_score']} (0 = hallucination, 1 = grounded) | |
| **Verdict:** {result['verdict']} | |
| ### Component Scores | |
| - **Tracking:** {result['tracking']} - Does the claim follow from sources? | |
| - **Intervention:** {result['intervention']} - Would changing sources change the claim? | |
| - **Counterfactual:** {result['counterfactual']} - Is the claim uniquely grounded? | |
| *Latency: {result['latency_ms']}ms* | |
| """ | |
| def run_detection(claim: str, sources: str) -> str: | |
| """Gradio wrapper.""" | |
| if not claim.strip(): | |
| return "Please enter a claim to check." | |
| if not sources.strip(): | |
| return "Please enter at least one source (one per line)." | |
| result = detect_hallucination(claim, sources) | |
| return format_output(result) | |
| # Example inputs | |
| examples = [ | |
| [ | |
| "The Eiffel Tower was built in 1889 and is located in Paris, France.", | |
| "The Eiffel Tower is a wrought-iron lattice tower in Paris, France.\nIt was constructed from 1887 to 1889 as the entrance arch for the 1889 World's Fair." | |
| ], | |
| [ | |
| "The Great Wall of China is visible from space with the naked eye.", | |
| "The Great Wall of China is a series of fortifications built along the historical northern borders of China.\nContrary to popular belief, it is not visible from space with the naked eye under normal conditions." | |
| ], | |
| [ | |
| "Python was created by Guido van Rossum in 1991.", | |
| "Python is a high-level programming language.\nIt was created by Guido van Rossum and first released in 1991." | |
| ], | |
| [ | |
| "Einstein invented the lightbulb.", | |
| "Albert Einstein was a theoretical physicist who developed the theory of relativity.\nThomas Edison is credited with inventing the practical incandescent lightbulb in 1879." | |
| ] | |
| ] | |
| # Build Gradio interface | |
| with gr.Blocks(title="G0 Hallucination Detector", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # G0 Hallucination Detector | |
| Detect when LLMs make things up. Enter a claim and the sources it should be grounded in. | |
| **G0 Score:** Geometric mean of three criteria: | |
| - **Tracking:** Does the claim follow from the sources? | |
| - **Intervention:** Would changing sources change the claim? | |
| - **Counterfactual:** In worlds without these sources, would the claim still hold? | |
| Score ranges: 0.0 (hallucination) → 1.0 (fully grounded) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| claim_input = gr.Textbox( | |
| label="Claim to verify", | |
| placeholder="Enter the claim you want to check...", | |
| lines=2 | |
| ) | |
| sources_input = gr.Textbox( | |
| label="Sources (one per line)", | |
| placeholder="Enter source texts, one per line...", | |
| lines=5 | |
| ) | |
| submit_btn = gr.Button("Detect Hallucination", variant="primary") | |
| with gr.Column(): | |
| output = gr.Markdown(label="Result") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[claim_input, sources_input], | |
| label="Try these examples" | |
| ) | |
| submit_btn.click( | |
| fn=run_detection, | |
| inputs=[claim_input, sources_input], | |
| outputs=output | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Free to use.** If this helps you, consider supporting: **[Cash App $ryancreating](https://cash.app/$ryancreating)** | |
| --- | |
| **How it works:** Uses sentence embeddings to measure semantic similarity between claims and sources, | |
| then computes a 3-criterion grounding metric. | |
| Built by Crystalline Labs | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |