adi-123 commited on
Commit
45547cc
Β·
verified Β·
1 Parent(s): 13c0456

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +471 -0
app.py ADDED
@@ -0,0 +1,471 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import logging
4
+ import pandas as pd
5
+ from typing import Dict, Any, Tuple, Optional
6
+
7
+ # Import fact-checking utilities
8
+ from utils import (
9
+ load_verified_facts,
10
+ fact_check_claim,
11
+ )
12
+
13
+ # Configure logging
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # ---------------------------------------------------------------------------#
17
+ # Gradio Fact-Checking Application
18
+ # ---------------------------------------------------------------------------#
19
+
20
+ # Global state for database initialization
21
+ DATABASE_INITIALIZED = False
22
+
23
+
24
+ def initialize_database() -> str:
25
+ """Initialize the verified facts database."""
26
+ global DATABASE_INITIALIZED
27
+
28
+ logger.info("Database initialization requested")
29
+
30
+ if os.path.exists("faiss_index_facts"):
31
+ DATABASE_INITIALIZED = True
32
+ logger.info("Database already exists at faiss_index_facts/")
33
+ return """
34
+ > ## βœ… Database Ready
35
+ >
36
+ > The fact database is **already initialized** and loaded successfully.
37
+ >
38
+ > πŸ“ **Location:** `faiss_index_facts/`
39
+
40
+ <br>
41
+
42
+ **🎯 Next Step:** Go to the **Fact Check** tab to start verifying claims!
43
+ """
44
+
45
+ try:
46
+ logger.info("Starting database initialization process")
47
+ status_msg = load_verified_facts()
48
+ DATABASE_INITIALIZED = True
49
+ logger.info("Database initialized successfully")
50
+ return f"""
51
+ > ## βœ… Initialization Complete
52
+ >
53
+ > {status_msg}
54
+
55
+ <br>
56
+
57
+ **Status:** 🟒 Ready to verify claims
58
+
59
+ **🎯 Next Step:** Switch to the **Fact Check** tab to start using the system!
60
+ """
61
+ except Exception as e:
62
+ logger.exception("Database initialization failed")
63
+ return f"""
64
+ > ## ❌ Initialization Failed
65
+ >
66
+ > **Error Message:**
67
+ > ```
68
+ > {str(e)}
69
+ > ```
70
+
71
+ <br>
72
+
73
+ **πŸ”§ Troubleshooting Steps:**
74
+
75
+ 1. βœ“ Ensure `verified_facts_db.csv` exists in your project directory
76
+ 2. βœ“ Verify your `TOGETHER_API_KEY` is set in the `.env` file
77
+ 3. βœ“ Check your internet connection (required for embedding API)
78
+ 4. βœ“ Make sure all dependencies are installed: `pip install -r requirements.txt`
79
+
80
+ <br>
81
+
82
+ πŸ’‘ **Need Help?** Check the README.md for detailed setup instructions.
83
+ """
84
+
85
+
86
+ def load_sample_claims() -> list:
87
+ """Load sample claims from CSV file."""
88
+ logger.info("Loading sample claims")
89
+ sample_claims = []
90
+
91
+ if os.path.exists("social_media_feed.csv"):
92
+ try:
93
+ logger.debug("Reading sample claims from CSV")
94
+ df = pd.read_csv("social_media_feed.csv")
95
+ # Handle different column names
96
+ if 'claim' in df.columns:
97
+ sample_claims = df['claim'].tolist()[:10] # First 10 claims
98
+ logger.info(f"Loaded {len(sample_claims)} sample claims from 'claim' column")
99
+ elif 'text' in df.columns:
100
+ sample_claims = df['text'].tolist()[:10]
101
+ logger.info(f"Loaded {len(sample_claims)} sample claims from 'text' column")
102
+ except Exception as e:
103
+ logger.warning(f"Failed to load sample claims from CSV: {str(e)}")
104
+
105
+ # Default samples if CSV not available
106
+ if not sample_claims:
107
+ logger.info("Using default sample claims")
108
+ sample_claims = [
109
+ "The Indian government has announced free electricity to all farmers starting July 2025.",
110
+ "India's GDP growth rate reached 8.2% in Q1 2024.",
111
+ "The Reserve Bank of India has reduced interest rates by 2% this month.",
112
+ ]
113
+
114
+ return sample_claims
115
+
116
+
117
+ def format_result(result: Dict[str, Any]) -> str:
118
+ """Format fact-checking results for display."""
119
+
120
+ # Verdict with emoji
121
+ verdict = result.get('verdict', 'Unknown')
122
+ verdict_emoji = {
123
+ 'Likely True': 'βœ…',
124
+ 'Likely False': '❌',
125
+ 'Unverifiable': 'πŸ€·β€β™‚οΈ',
126
+ 'Unknown': '❓'
127
+ }.get(verdict, '❓')
128
+
129
+ # Confidence indicator
130
+ confidence = result.get('confidence', 'unknown')
131
+ confidence_emoji = {
132
+ 'high': '🟒',
133
+ 'medium': '🟑',
134
+ 'low': 'πŸ”΄'
135
+ }.get(confidence, 'βšͺ')
136
+
137
+ # Build output markdown
138
+ output = "# πŸ“Š Fact-Check Results\n\n"
139
+ output += f"> ## {verdict_emoji} Verdict: **{verdict}**\n"
140
+ output += f">\n"
141
+ output += f"> **Confidence Level:** {confidence_emoji} {confidence.capitalize()}\n\n"
142
+
143
+ # Extracted claim (if different from input)
144
+ extracted_claim = result.get('extracted_claim', '')
145
+ original_input = result.get('original_input', '')
146
+
147
+ if extracted_claim and extracted_claim != original_input:
148
+ output += f"### 🎯 Extracted Claim\n"
149
+ output += f"> {extracted_claim}\n\n"
150
+
151
+ # Entities found
152
+ entities = result.get('entities_found', [])
153
+ if entities:
154
+ output += f"### 🏷️ Key Entities Detected\n"
155
+ entity_list = [f"**{e['text']}** ({e['type']})" for e in entities[:5]]
156
+ output += ", ".join(entity_list) + "\n\n"
157
+
158
+ # Reasoning
159
+ output += f"### πŸ’­ Reasoning\n"
160
+ reasoning = result.get('reasoning', 'No reasoning provided')
161
+ output += f"> {reasoning}\n\n"
162
+
163
+ # Evidence from verified sources
164
+ output += "### πŸ“š Evidence from Verified Sources\n\n"
165
+ evidence_details = result.get('evidence_details', [])
166
+
167
+ if evidence_details:
168
+ for i, evidence in enumerate(evidence_details, 1):
169
+ similarity_pct = evidence['similarity'] * 100
170
+ output += f"#### Evidence #{i}\n"
171
+ output += f"**Similarity Score:** {similarity_pct:.1f}%\n\n"
172
+ output += f"> {evidence['fact']}\n\n"
173
+ metadata = evidence.get('metadata', {})
174
+ output += f"- πŸ“° **Source:** {metadata.get('source', 'Unknown')}\n"
175
+ output += f"- πŸ“… **Date:** {metadata.get('date', 'Unknown')}\n"
176
+ output += f"- 🏷️ **Category:** {metadata.get('category', 'General')}\n"
177
+
178
+ # Display extracted entities from database fact
179
+ entities_dict = metadata.get('entities_dict', {})
180
+ if entities_dict and any(entities_dict.values()):
181
+ output += f"- 🏷️ **Entities Found:** "
182
+ entity_parts = []
183
+ if entities_dict.get('organizations'):
184
+ entity_parts.append(f"Orgs: {', '.join(entities_dict['organizations'][:3])}")
185
+ if entities_dict.get('locations'):
186
+ entity_parts.append(f"Locations: {', '.join(entities_dict['locations'][:3])}")
187
+ if entities_dict.get('dates'):
188
+ entity_parts.append(f"Dates: {', '.join(entities_dict['dates'][:3])}")
189
+ if entities_dict.get('percentages'):
190
+ entity_parts.append(f"Percentages: {', '.join(entities_dict['percentages'][:2])}")
191
+
192
+ if entity_parts:
193
+ output += " | ".join(entity_parts)
194
+ else:
195
+ output += "None"
196
+ output += "\n"
197
+
198
+ output += "\n"
199
+ else:
200
+ output += "> ⚠️ No relevant evidence found in the database\n\n"
201
+
202
+ # Evidence used by LLM
203
+ evidence_used = result.get('evidence_used', [])
204
+ if evidence_used and evidence_used != ['']:
205
+ output += "<br>\n\n"
206
+ output += "### 🎯 Evidence Cited by AI\n\n"
207
+ for i, ev in enumerate(evidence_used, 1):
208
+ output += f"{i}. {ev}\n"
209
+ output += "\n"
210
+
211
+ # Error information (if any)
212
+ if 'error' in result:
213
+ output += f"\n⚠️ **Note:** {result['error']}\n"
214
+
215
+ return output
216
+
217
+
218
+ def check_fact(claim: str) -> str:
219
+ """Check a fact claim and return formatted results."""
220
+ global DATABASE_INITIALIZED
221
+
222
+ logger.info(f"Fact-check requested for claim: {claim[:100]}...")
223
+
224
+ # Check if database is initialized
225
+ if not DATABASE_INITIALIZED:
226
+ if os.path.exists("faiss_index_facts"):
227
+ DATABASE_INITIALIZED = True
228
+ logger.info("Database auto-detected and marked as initialized")
229
+ else:
230
+ logger.warning("Database not initialized - prompting user")
231
+ return "❌ **Error:** Please initialize the fact database first using the button in the Configuration tab."
232
+
233
+ # Check if claim is provided
234
+ if not claim or not claim.strip():
235
+ logger.warning("Empty claim provided")
236
+ return "⚠️ **Warning:** Please enter a claim to verify."
237
+
238
+ try:
239
+ logger.info("Running fact-checking pipeline")
240
+ # Run fact-checking pipeline
241
+ result = fact_check_claim(claim)
242
+
243
+ logger.info(f"Fact-check completed with verdict: {result.get('verdict', 'Unknown')}")
244
+ # Format and return results
245
+ return format_result(result)
246
+
247
+ except Exception as e:
248
+ logger.exception("Error during fact-checking in app layer")
249
+ return f"❌ **Error during fact-checking:** {str(e)}"
250
+
251
+
252
+ def use_sample_claim(sample_dropdown: str) -> str:
253
+ """Return the selected sample claim."""
254
+ if sample_dropdown and sample_dropdown != "-- Select a sample claim --":
255
+ return sample_dropdown
256
+ return ""
257
+
258
+
259
+ # ---------------------------------------------------------------------------#
260
+ # Gradio Interface
261
+ # ---------------------------------------------------------------------------#
262
+
263
+ def create_interface():
264
+ """Create and configure the Gradio interface."""
265
+
266
+ # Load sample claims
267
+ sample_claims = load_sample_claims()
268
+ sample_options = ["-- Select a sample claim --"] + sample_claims
269
+
270
+ # Create Gradio Blocks interface
271
+ with gr.Blocks(title="Fact-Checker") as demo:
272
+
273
+ gr.Markdown("# πŸ” Fact-Checker")
274
+ gr.Markdown("*Verify claims against trusted sources using AI-powered analysis*")
275
+
276
+ with gr.Tabs():
277
+
278
+ # Main Fact-Checking Tab
279
+ with gr.Tab("πŸ“ Fact Check"):
280
+
281
+ with gr.Row():
282
+ with gr.Column(scale=3):
283
+ claim_input = gr.Textbox(
284
+ label="Enter claim to verify",
285
+ placeholder="Example: India's GDP growth rate reached 8.2% in Q1 2024.",
286
+ lines=4,
287
+ max_lines=10
288
+ )
289
+
290
+ with gr.Column(scale=1):
291
+ sample_dropdown = gr.Dropdown(
292
+ choices=sample_options,
293
+ label="πŸ“‹ Sample Claims",
294
+ value="-- Select a sample claim --"
295
+ )
296
+ use_sample_btn = gr.Button("πŸ“ Use This Sample", size="sm")
297
+
298
+ check_btn = gr.Button("πŸ” Check Fact", variant="primary", size="lg")
299
+
300
+ result_output = gr.Markdown(
301
+ value="""
302
+ > ## πŸ‘‹ Welcome to Fact-Checker!
303
+ >
304
+ > Get started by entering a claim or selecting a sample from the dropdown.
305
+ >
306
+ > ### πŸš€ How to Use:
307
+ >
308
+ > 1. **Enter a claim** in the text box above (or select a sample)
309
+ > 2. **Click "Check Fact"** to analyze the claim
310
+ > 3. **Review results** with AI-generated verdict, confidence, and evidence
311
+
312
+ <br>
313
+
314
+ πŸ’‘ **Tip:** Make sure the database is initialized in the Configuration tab before checking facts!
315
+
316
+ Results will appear here after you check your first claim.
317
+ """
318
+ )
319
+
320
+ # Button actions
321
+ check_btn.click(
322
+ fn=check_fact,
323
+ inputs=[claim_input],
324
+ outputs=[result_output]
325
+ )
326
+
327
+ use_sample_btn.click(
328
+ fn=use_sample_claim,
329
+ inputs=[sample_dropdown],
330
+ outputs=[claim_input]
331
+ )
332
+
333
+ # Configuration Tab
334
+ with gr.Tab("βš™οΈ Configuration"):
335
+ gr.Markdown("""
336
+ ## πŸ—„οΈ Database Initialization
337
+
338
+ Initialize the verified facts database to enable fact-checking capabilities.
339
+ """)
340
+
341
+ init_btn = gr.Button(
342
+ "πŸ”„ Initialize Fact Database",
343
+ variant="primary",
344
+ size="lg"
345
+ )
346
+
347
+ init_output = gr.Markdown(
348
+ value="""
349
+ > ## πŸ“‹ Ready to Initialize
350
+ >
351
+ > Click the **Initialize Fact Database** button above to:
352
+ >
353
+ > - πŸ“₯ Load verified facts from `verified_facts_db.csv`
354
+ > - πŸ”„ Generate embeddings using Together AI
355
+ > - πŸ’Ύ Create FAISS vector index for semantic search
356
+ > - βœ… Enable fact-checking capabilities
357
+
358
+ <br>
359
+
360
+ πŸ’‘ **Note:** This is a one-time setup process (unless you delete the `faiss_index_facts` folder).
361
+ """
362
+ )
363
+
364
+ init_btn.click(
365
+ fn=initialize_database,
366
+ inputs=[],
367
+ outputs=[init_output]
368
+ )
369
+
370
+ gr.Markdown("<br><br>")
371
+
372
+ gr.Markdown("""
373
+ ## ℹ️ Quick Guide
374
+
375
+ > ### πŸ“ Steps to Verify a Claim:
376
+ >
377
+ > 1. **Initialize** the database using the button above (one-time setup)
378
+ > 2. **Navigate** to the "Fact Check" tab
379
+ > 3. **Enter** a claim or select a sample from the dropdown
380
+ > 4. **Click** "Check Fact" to run the verification
381
+ > 5. **Review** the AI-generated verdict with evidence and reasoning
382
+
383
+ <br>
384
+
385
+ > ### 🎯 Understanding Results:
386
+ >
387
+ > **Verdict Types:**
388
+ > - βœ… **Likely True** β€” Evidence supports the claim
389
+ > - ❌ **Likely False** β€” Evidence contradicts the claim
390
+ > - πŸ€·β€β™‚οΈ **Unverifiable** β€” Insufficient or conflicting evidence
391
+ >
392
+ > **Confidence Indicators:**
393
+ > - 🟒 **High** β€” Strong evidence match with verified facts
394
+ > - 🟑 **Medium** β€” Moderate evidence alignment
395
+ > - πŸ”΄ **Low** β€” Weak or minimal evidence found
396
+ """)
397
+
398
+ # About Tab
399
+ with gr.Tab("ℹ️ About"):
400
+ gr.Markdown("""
401
+ # About Fact-Checker
402
+
403
+ This AI-powered fact-checking system verifies claims from news and social media against a database of verified facts using Retrieval-Augmented Generation (RAG).
404
+
405
+ ## How It Works
406
+
407
+ 1. **Claim Extraction**: Uses spaCy NLP to extract key claims and entities
408
+ 2. **Fact Retrieval**: Searches for similar verified facts using FAISS vector database
409
+ 3. **LLM Classification**: Uses Meta-Llama-3.1-8B to classify claims with reasoning
410
+ 4. **Structured Output**: Provides verdict, confidence, evidence, and reasoning
411
+
412
+ ## Technology Stack
413
+
414
+ - **NLP**: spaCy (en_core_web_sm)
415
+ - **Embeddings**: BAAI/bge-base-en-v1.5
416
+ - **Vector DB**: FAISS
417
+ - **LLM**: Meta-Llama-3.1-8B-Instruct-Turbo (via Together AI)
418
+ - **UI**: Gradio
419
+
420
+ ## Source Code
421
+
422
+ This project demonstrates:
423
+ - Retrieval-Augmented Generation (RAG)
424
+ - Object-Oriented Programming (SOLID principles)
425
+ - Prompt Engineering
426
+ - Production ML System Design
427
+ """)
428
+
429
+ gr.Markdown("---")
430
+ gr.Markdown("*Built with ❀️ using Gradio, LangChain, and Together AI*")
431
+
432
+ return demo
433
+
434
+
435
+ # ---------------------------------------------------------------------------#
436
+ # Main Entry Point
437
+ # ---------------------------------------------------------------------------#
438
+
439
+ if __name__ == "__main__":
440
+ logger.info("=" * 60)
441
+ logger.info("Starting Fact-Checker application")
442
+ logger.info("=" * 60)
443
+
444
+ # Auto-initialize database on startup (for HuggingFace Spaces deployment)
445
+ if not os.path.exists("faiss_index_facts"):
446
+ print("πŸ”„ Auto-initializing fact database...")
447
+ logger.info("Auto-initializing fact database on startup")
448
+ try:
449
+ status = initialize_database()
450
+ print("βœ… Database initialized successfully")
451
+ logger.info("Auto-initialization completed successfully")
452
+ except Exception as e:
453
+ print(f"⚠️ Database initialization failed: {str(e)}")
454
+ logger.error(f"Auto-initialization failed: {str(e)}")
455
+ print("πŸ’‘ You can manually initialize from the Configuration tab")
456
+ else:
457
+ print("βœ… Database already initialized")
458
+ logger.info("Database detected at startup")
459
+ DATABASE_INITIALIZED = True
460
+
461
+ logger.info("Creating Gradio interface")
462
+ demo = create_interface()
463
+
464
+ logger.info("Launching Gradio app on port 7860")
465
+ demo.launch(
466
+ server_name="0.0.0.0",
467
+ server_port=7860,
468
+ share=False,
469
+ show_error=True
470
+ )
471
+ logger.info("Application shutdown")