aphoticshaman commited on
Commit
36571e1
·
verified ·
1 Parent(s): c02ae9b

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +51 -12
  2. app.py +225 -0
  3. deploy.sh +15 -0
  4. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,51 @@
1
- ---
2
- title: G0 Detector
3
- emoji: 👀
4
- colorFrom: blue
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 6.3.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: G0 Hallucination Detector
3
+ emoji: 🔍
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: Detect when LLMs hallucinate using 3-criterion grounding
12
+ ---
13
+
14
+ # G0 Hallucination Detector
15
+
16
+ Detect when LLMs make things up using a 3-criterion grounding metric.
17
+
18
+ ## How It Works
19
+
20
+ **G0 = (Tracking × Intervention × Counterfactual)^(1/3)**
21
+
22
+ - **Tracking:** Does the claim semantically follow from the sources?
23
+ - **Intervention:** Would changing the sources change the claim?
24
+ - **Counterfactual:** Is the claim uniquely dependent on these sources?
25
+
26
+ ## Scores
27
+
28
+ - **0.7-1.0:** Grounded - claim is well-supported
29
+ - **0.4-0.7:** Partial - some support, may contain unsupported elements
30
+ - **0.0-0.4:** Hallucination - claim not supported by sources
31
+
32
+ ## Use Cases
33
+
34
+ - Verify LLM outputs before production
35
+ - Audit RAG pipeline responses
36
+ - Research on hallucination detection
37
+
38
+ ## API
39
+
40
+ ```python
41
+ import gradio_client
42
+
43
+ client = gradio_client.Client("crystalline-labs/g0-detector")
44
+ result = client.predict(
45
+ claim="The Eiffel Tower was built in 1889",
46
+ sources="The Eiffel Tower was constructed from 1887 to 1889.",
47
+ api_name="/predict"
48
+ )
49
+ ```
50
+
51
+ Built by Crystalline Labs
app.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ G0 Hallucination Detector - Hugging Face Space
3
+ Detects when LLMs make things up using 3-criterion grounding analysis.
4
+ """
5
+
6
+ import gradio as gr
7
+ import numpy as np
8
+ from sentence_transformers import SentenceTransformer
9
+ from typing import Optional
10
+ import time
11
+
12
+ # Load model once at startup
13
+ print("Loading embedding model...")
14
+ model = SentenceTransformer('all-MiniLM-L6-v2')
15
+ print("Model loaded.")
16
+
17
+
18
+ def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
19
+ """Compute cosine similarity between two vectors."""
20
+ return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-8))
21
+
22
+
23
+ def compute_tracking(claim_emb: np.ndarray, source_embs: list[np.ndarray]) -> float:
24
+ """
25
+ TRACKING: Does the claim follow from the sources?
26
+ High similarity = claim tracks the source content.
27
+ """
28
+ if not source_embs:
29
+ return 0.0
30
+ similarities = [cosine_similarity(claim_emb, src) for src in source_embs]
31
+ return float(max(similarities))
32
+
33
+
34
+ def compute_intervention(claim: str, sources: list[str]) -> float:
35
+ """
36
+ INTERVENTION: Would changing sources change the claim?
37
+ Approximated by checking keyword overlap.
38
+ """
39
+ claim_words = set(claim.lower().split())
40
+ source_words = set()
41
+ for src in sources:
42
+ source_words.update(src.lower().split())
43
+
44
+ if not claim_words:
45
+ return 0.0
46
+
47
+ overlap = len(claim_words & source_words) / len(claim_words)
48
+ return overlap
49
+
50
+
51
+ def compute_counterfactual(claim_emb: np.ndarray, source_embs: list[np.ndarray]) -> float:
52
+ """
53
+ COUNTERFACTUAL: In worlds without this source, would the claim still hold?
54
+ Approximated by checking how unique the grounding is.
55
+ """
56
+ if len(source_embs) < 2:
57
+ return compute_tracking(claim_emb, source_embs)
58
+
59
+ similarities = [cosine_similarity(claim_emb, src) for src in source_embs]
60
+ max_sim = max(similarities)
61
+ second_max = sorted(similarities)[-2] if len(similarities) > 1 else 0
62
+
63
+ # If only one source grounds it well, counterfactual dependence is high
64
+ return max_sim * (1 - second_max + 0.1)
65
+
66
+
67
+ def detect_hallucination(claim: str, sources: str) -> dict:
68
+ """
69
+ Main detection function.
70
+
71
+ G0 = (TRACKING × INTERVENTION × COUNTERFACTUAL)^(1/3)
72
+
73
+ Returns grounding score where:
74
+ - 1.0 = fully grounded (not a hallucination)
75
+ - 0.0 = completely ungrounded (hallucination)
76
+ """
77
+ start = time.time()
78
+
79
+ # Parse sources (one per line)
80
+ source_list = [s.strip() for s in sources.strip().split('\n') if s.strip()]
81
+
82
+ if not source_list:
83
+ return {
84
+ "g0_score": 0.0,
85
+ "verdict": "HALLUCINATION (no sources provided)",
86
+ "tracking": 0.0,
87
+ "intervention": 0.0,
88
+ "counterfactual": 0.0,
89
+ "latency_ms": round((time.time() - start) * 1000, 1)
90
+ }
91
+
92
+ # Compute embeddings
93
+ claim_emb = model.encode(claim, convert_to_numpy=True)
94
+ source_embs = [model.encode(src, convert_to_numpy=True) for src in source_list]
95
+
96
+ # Compute three criteria
97
+ tracking = compute_tracking(claim_emb, source_embs)
98
+ intervention = compute_intervention(claim, source_list)
99
+ counterfactual = compute_counterfactual(claim_emb, source_embs)
100
+
101
+ # G0 = geometric mean of three criteria
102
+ g0 = (tracking * intervention * counterfactual) ** (1/3)
103
+
104
+ # Determine verdict
105
+ if g0 >= 0.7:
106
+ verdict = "GROUNDED - Claim is well-supported by sources"
107
+ elif g0 >= 0.4:
108
+ verdict = "PARTIAL - Claim has some support but may contain unsupported elements"
109
+ else:
110
+ verdict = "HALLUCINATION - Claim is not supported by provided sources"
111
+
112
+ latency = round((time.time() - start) * 1000, 1)
113
+
114
+ return {
115
+ "g0_score": round(g0, 3),
116
+ "verdict": verdict,
117
+ "tracking": round(tracking, 3),
118
+ "intervention": round(intervention, 3),
119
+ "counterfactual": round(counterfactual, 3),
120
+ "latency_ms": latency
121
+ }
122
+
123
+
124
+ def format_output(result: dict) -> str:
125
+ """Format result for display."""
126
+ return f"""## Result
127
+
128
+ **G0 Score:** {result['g0_score']} (0 = hallucination, 1 = grounded)
129
+
130
+ **Verdict:** {result['verdict']}
131
+
132
+ ### Component Scores
133
+ - **Tracking:** {result['tracking']} - Does the claim follow from sources?
134
+ - **Intervention:** {result['intervention']} - Would changing sources change the claim?
135
+ - **Counterfactual:** {result['counterfactual']} - Is the claim uniquely grounded?
136
+
137
+ *Latency: {result['latency_ms']}ms*
138
+ """
139
+
140
+
141
+ def run_detection(claim: str, sources: str) -> str:
142
+ """Gradio wrapper."""
143
+ if not claim.strip():
144
+ return "Please enter a claim to check."
145
+ if not sources.strip():
146
+ return "Please enter at least one source (one per line)."
147
+
148
+ result = detect_hallucination(claim, sources)
149
+ return format_output(result)
150
+
151
+
152
+ # Example inputs
153
+ examples = [
154
+ [
155
+ "The Eiffel Tower was built in 1889 and is located in Paris, France.",
156
+ "The Eiffel Tower is a wrought-iron lattice tower in Paris, France.\nIt was constructed from 1887 to 1889 as the entrance arch for the 1889 World's Fair."
157
+ ],
158
+ [
159
+ "The Great Wall of China is visible from space with the naked eye.",
160
+ "The Great Wall of China is a series of fortifications built along the historical northern borders of China.\nContrary to popular belief, it is not visible from space with the naked eye under normal conditions."
161
+ ],
162
+ [
163
+ "Python was created by Guido van Rossum in 1991.",
164
+ "Python is a high-level programming language.\nIt was created by Guido van Rossum and first released in 1991."
165
+ ],
166
+ [
167
+ "Einstein invented the lightbulb.",
168
+ "Albert Einstein was a theoretical physicist who developed the theory of relativity.\nThomas Edison is credited with inventing the practical incandescent lightbulb in 1879."
169
+ ]
170
+ ]
171
+
172
+ # Build Gradio interface
173
+ with gr.Blocks(title="G0 Hallucination Detector", theme=gr.themes.Soft()) as demo:
174
+ gr.Markdown("""
175
+ # G0 Hallucination Detector
176
+
177
+ Detect when LLMs make things up. Enter a claim and the sources it should be grounded in.
178
+
179
+ **G0 Score:** Geometric mean of three criteria:
180
+ - **Tracking:** Does the claim follow from the sources?
181
+ - **Intervention:** Would changing sources change the claim?
182
+ - **Counterfactual:** In worlds without these sources, would the claim still hold?
183
+
184
+ Score ranges: 0.0 (hallucination) → 1.0 (fully grounded)
185
+ """)
186
+
187
+ with gr.Row():
188
+ with gr.Column():
189
+ claim_input = gr.Textbox(
190
+ label="Claim to verify",
191
+ placeholder="Enter the claim you want to check...",
192
+ lines=2
193
+ )
194
+ sources_input = gr.Textbox(
195
+ label="Sources (one per line)",
196
+ placeholder="Enter source texts, one per line...",
197
+ lines=5
198
+ )
199
+ submit_btn = gr.Button("Detect Hallucination", variant="primary")
200
+
201
+ with gr.Column():
202
+ output = gr.Markdown(label="Result")
203
+
204
+ gr.Examples(
205
+ examples=examples,
206
+ inputs=[claim_input, sources_input],
207
+ label="Try these examples"
208
+ )
209
+
210
+ submit_btn.click(
211
+ fn=run_detection,
212
+ inputs=[claim_input, sources_input],
213
+ outputs=output
214
+ )
215
+
216
+ gr.Markdown("""
217
+ ---
218
+ **How it works:** Uses sentence embeddings to measure semantic similarity between claims and sources,
219
+ then computes a 3-criterion grounding metric. [Source code on GitHub](https://github.com/crystalline-labs/g0-detector)
220
+
221
+ Built by Crystalline Labs
222
+ """)
223
+
224
+ if __name__ == "__main__":
225
+ demo.launch()
deploy.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Deploy G0 Hallucination Detector to Hugging Face Spaces
3
+
4
+ # Login (opens browser)
5
+ huggingface-cli login
6
+
7
+ # Create and push space
8
+ huggingface-cli repo create g0-detector --type space --space_sdk gradio -y
9
+ git init
10
+ git remote add origin https://huggingface.co/spaces/$HF_USERNAME/g0-detector
11
+ git add .
12
+ git commit -m "Initial deploy: G0 Hallucination Detector"
13
+ git push -u origin main
14
+
15
+ echo "Done! Your space will be live at: https://huggingface.co/spaces/$HF_USERNAME/g0-detector"
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ sentence-transformers>=2.2.0
3
+ numpy>=1.21.0
4
+ torch>=2.0.0