DevNumb commited on
Commit
fff1053
Β·
verified Β·
1 Parent(s): 56ba6d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +221 -121
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
- from sentence_transformers import SentenceTransformer, util
5
  import torch
 
6
  import re
7
  import logging
8
- from typing import Dict, List
 
 
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO)
@@ -13,23 +16,33 @@ logger = logging.getLogger(__name__)
13
 
14
  class FakeNewsDetector:
15
  def __init__(self):
16
- logger.info("Loading Sentence Transformer model...")
17
- self.model = SentenceTransformer('all-MiniLM-L6-v2')
18
- logger.info("Model loaded successfully!")
19
 
20
- # Enhanced fake news patterns
 
 
 
 
 
 
 
 
 
 
21
  self.fake_news_patterns = [
22
  "conspiracy theory", "false claim", "misinformation", "debunked",
23
- "hoax", "unverified", "clickbait", "deep state", "fake news",
24
  "mainstream media lies", "cover up", "they don't want you to know",
25
- "secret truth", "hidden facts", "wake up people"
 
26
  ]
27
 
28
  # Credible sources
29
  self.credible_sources = [
30
  'reuters.com', 'apnews.com', 'bbc.com', 'nytimes.com',
31
  'theguardian.com', 'washingtonpost.com', 'npr.org',
32
- 'wsj.com', 'ft.com', 'bloomberg.com'
 
33
  ]
34
 
35
  # Fake news indicators
@@ -37,23 +50,62 @@ class FakeNewsDetector:
37
  "exclusive reveal", "shocking truth", "they don't want you to know",
38
  "mainstream media won't report this", "breaking secret news",
39
  "you won't believe", "this will shock you", "do your own research",
40
- "the truth they're hiding", "wake up sheeple"
41
  ]
42
 
43
  # Sensational words
44
  self.sensational_words = [
45
  'shocking', 'amazing', 'unbelievable', 'incredible', 'astounding',
46
- 'mind-blowing', 'explosive', 'bombshell', 'earth-shattering'
 
47
  ]
48
 
49
- def extract_content(self, url: str) -> Dict:
50
- """Extract text content from a webpage"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  try:
52
  headers = {
53
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
54
  }
55
 
56
- # Validate URL
57
  if not url.startswith(('http://', 'https://')):
58
  url = 'https://' + url
59
 
@@ -63,14 +115,14 @@ class FakeNewsDetector:
63
  soup = BeautifulSoup(response.content, 'html.parser')
64
 
65
  # Remove unwanted elements
66
- for element in soup(["script", "style", "nav", "footer", "header"]):
67
  element.decompose()
68
 
69
  # Extract title
70
  title = soup.find('title')
71
  title_text = title.get_text().strip() if title else "No title found"
72
 
73
- # Try to find main content
74
  content_text = ""
75
  content_selectors = [
76
  'article',
@@ -81,7 +133,8 @@ class FakeNewsDetector:
81
  'main',
82
  '[role="main"]',
83
  '.news-content',
84
- '.story-body'
 
85
  ]
86
 
87
  for selector in content_selectors:
@@ -90,13 +143,13 @@ class FakeNewsDetector:
90
  content_parts = []
91
  for elem in elements:
92
  text = elem.get_text().strip()
93
- if len(text) > 100: # Only take substantial content
94
  content_parts.append(text)
95
  if content_parts:
96
  content_text = ' '.join(content_parts)
97
  break
98
 
99
- # Fallback to body if no specific content found
100
  if not content_text or len(content_text) < 200:
101
  body = soup.find('body')
102
  if body:
@@ -112,12 +165,11 @@ class FakeNewsDetector:
112
  'url': url
113
  }
114
 
115
- except requests.exceptions.RequestException as e:
116
- return {'success': False, 'error': f"Network error: {str(e)}"}
117
  except Exception as e:
118
- return {'success': False, 'error': f"Extraction error: {str(e)}"}
 
119
 
120
- def clean_text(self, text: str) -> str:
121
  """Clean and normalize text"""
122
  # Remove extra whitespace
123
  text = re.sub(r'\s+', ' ', text)
@@ -125,55 +177,70 @@ class FakeNewsDetector:
125
  text = re.sub(r'[^\w\s.,!?;:()-]', '', text)
126
  return text.strip()
127
 
128
- def analyze_content(self, text: str) -> Dict:
129
  """Analyze text content for fake news indicators"""
130
  text_lower = text.lower()
131
 
132
- # Sentiment and style analysis
133
  sensational_score = sum(1 for word in self.sensational_words if word in text_lower)
134
  fake_indicator_count = sum(1 for indicator in self.fake_indicators if indicator in text_lower)
135
 
136
  # Punctuation analysis
137
  exclamation_count = text.count('!')
138
  question_count = text.count('?')
 
 
139
  capital_words = len(re.findall(r'\b[A-Z]{3,}\b', text))
140
 
 
 
 
 
141
  return {
142
  'sensational_score': sensational_score,
143
  'fake_indicator_count': fake_indicator_count,
144
  'exclamation_count': exclamation_count,
145
  'question_count': question_count,
146
  'capital_words': capital_words,
 
147
  'text_length': len(text)
148
  }
149
 
150
- def check_source_credibility(self, url: str) -> float:
151
  """Check if the source is known to be credible"""
152
- domain_credibility = 0.0
153
 
 
154
  for credible_source in self.credible_sources:
155
- if credible_source in url.lower():
156
- domain_credibility = 0.8
157
- break
158
-
159
  # Penalize known unreliable domains
160
- unreliable_domains = ['.blogspot.', '.wordpress.', 'medium.com']
161
  for domain in unreliable_domains:
162
- if domain in url.lower():
163
- domain_credibility = max(0.0, domain_credibility - 0.3)
164
 
165
- return domain_credibility
166
 
167
- def semantic_analysis(self, text: str) -> float:
168
  """Analyze semantic similarity with known fake news patterns"""
169
  try:
170
  if not text or len(text) < 50:
171
  return 0.0
172
 
173
- text_embedding = self.model.encode([text], convert_to_tensor=True)
174
- pattern_embeddings = self.model.encode(self.fake_news_patterns, convert_to_tensor=True)
 
 
 
 
 
 
 
175
 
176
- similarities = util.pytorch_cos_sim(text_embedding, pattern_embeddings)
 
177
  max_similarity = float(torch.max(similarities).item())
178
 
179
  return max_similarity
@@ -182,7 +249,7 @@ class FakeNewsDetector:
182
  logger.error(f"Semantic analysis error: {e}")
183
  return 0.0
184
 
185
- def detect_fake_news(self, url: str) -> Dict:
186
  """Main fake news detection function"""
187
  logger.info(f"Analyzing URL: {url}")
188
 
@@ -190,12 +257,11 @@ class FakeNewsDetector:
190
  content_data = self.extract_content(url)
191
  if not content_data['success']:
192
  return {
193
- 'is_fake': 'Error',
194
  'confidence': 0.0,
195
- 'error': content_data.get('error', 'Unknown error'),
196
- 'details': {},
197
  'title': 'Error',
198
- 'content_preview': ''
199
  }
200
 
201
  title = content_data['title']
@@ -204,18 +270,17 @@ class FakeNewsDetector:
204
 
205
  if len(content.strip()) < 100:
206
  return {
207
- 'is_fake': 'Insufficient Content',
208
  'confidence': 0.0,
209
- 'error': 'Not enough text content found to analyze',
210
- 'details': {},
211
  'title': title,
212
- 'content_preview': content[:200] + '...' if len(content) > 200 else content
213
  }
214
 
215
  # Perform analyses
216
  source_credibility = self.check_source_credibility(url)
217
  content_analysis = self.analyze_content(full_text)
218
- semantic_similarity = self.semantic_analysis(full_text)
219
 
220
  # Calculate fake news score
221
  fake_score = 0.0
@@ -224,7 +289,7 @@ class FakeNewsDetector:
224
  source_factor = (1 - source_credibility) * 0.25
225
  fake_score += source_factor
226
 
227
- # Semantic similarity
228
  semantic_factor = semantic_similarity * 0.35
229
  fake_score += semantic_factor
230
 
@@ -233,7 +298,8 @@ class FakeNewsDetector:
233
  content_analysis['sensational_score'] * 0.05 +
234
  content_analysis['fake_indicator_count'] * 0.15 +
235
  min(content_analysis['exclamation_count'] * 0.02, 0.1) +
236
- min(content_analysis['capital_words'] * 0.01, 0.05)
 
237
  ) * 0.4
238
  fake_score += content_factor
239
 
@@ -241,30 +307,45 @@ class FakeNewsDetector:
241
 
242
  # Determine result
243
  if fake_score > 0.7:
244
- result = "🚨 Likely Fake News"
245
  color = "red"
246
  elif fake_score > 0.5:
247
- result = "⚠️ Suspicious Content"
248
  color = "orange"
249
  elif fake_score > 0.3:
250
- result = "πŸ€” Potentially Misleading"
251
  color = "yellow"
252
  else:
253
- result = "βœ… Likely Credible"
254
  color = "green"
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  return {
257
- 'is_fake': result,
258
  'confidence': fake_score,
259
- 'color': color,
260
- 'details': {
261
- 'title': title,
262
- 'content_preview': content[:300] + '...' if len(content) > 300 else content,
263
- 'source_credibility': source_credibility,
264
- 'semantic_similarity': semantic_similarity,
265
- 'content_analysis': content_analysis,
266
- 'url_analyzed': url
267
- }
268
  }
269
 
270
  # Initialize detector
@@ -272,95 +353,114 @@ detector = FakeNewsDetector()
272
 
273
  def analyze_url(url):
274
  """Gradio interface function"""
275
- if not url:
276
- return "Please enter a URL", "", "", "", "white"
277
 
278
  try:
279
  result = detector.detect_fake_news(url)
280
 
281
- if 'error' in result and result['is_fake'] in ['Error', 'Insufficient Content']:
282
- return result['is_fake'], f"Error: {result.get('error', 'Unknown error')}", "", "", "white"
283
-
284
- # Format output
285
  confidence_percent = f"{result['confidence'] * 100:.1f}%"
286
 
287
- details_text = f"""
288
- **πŸ“Š Detailed Analysis Results:**
289
-
290
- **Source Analysis:**
291
- - Source Credibility Score: {result['details']['source_credibility']:.2f}/1.0
292
-
293
- **Content Analysis:**
294
- - Semantic Similarity to Fake Patterns: {result['details']['semantic_similarity']:.2f}/1.0
295
- - Sensational Language Score: {result['details']['content_analysis']['sensational_score']}
296
- - Fake News Indicators Found: {result['details']['content_analysis']['fake_indicator_count']}
297
- - Exclamation Marks: {result['details']['content_analysis']['exclamation_count']}
298
- - ALL-CAPS Words: {result['details']['content_analysis']['capital_words']}
299
-
300
- **Content Preview:**
301
- {result['details']['content_preview']}
302
- """
303
-
304
- return result['is_fake'], confidence_percent, details_text, result['details']['title'], result['color']
305
 
306
  except Exception as e:
307
  logger.error(f"Analysis error: {e}")
308
- return "Error", f"An error occurred during analysis: {str(e)}", "", "", "white"
309
 
310
  # Create Gradio interface
311
- with gr.Blocks(theme=gr.themes.Soft(), title="Fake News Detector") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  gr.Markdown("""
313
  # πŸ•΅οΈ Fake News Detector
314
- **Enter a URL to analyze news articles for potential fake news indicators**
315
 
316
- This tool uses AI to analyze content patterns, source credibility, and semantic similarities to detect potential fake news.
317
  """)
318
 
319
  with gr.Row():
320
- with gr.Column():
321
  url_input = gr.Textbox(
322
- label="Enter News Article URL",
323
  placeholder="https://example.com/news-article",
324
- lines=1
 
325
  )
326
- analyze_btn = gr.Button("Analyze Article", variant="primary")
327
 
328
- with gr.Column():
329
- result_status = gr.Textbox(label="Analysis Result", interactive=False)
330
- confidence_score = gr.Textbox(label="Confidence Score", interactive=False)
331
- article_title = gr.Textbox(label="Article Title", interactive=False)
 
 
 
 
 
 
 
 
 
 
332
 
333
- with gr.Row():
334
- details_output = gr.Markdown(label="Detailed Analysis")
335
-
336
- # Set up event handling
337
- analyze_btn.click(
338
- fn=analyze_url,
339
- inputs=url_input,
340
- outputs=[result_status, confidence_score, details_output, article_title]
341
  )
342
 
343
- # Example for quick testing
344
  gr.Examples(
 
345
  examples=[
346
- ["https://www.reuters.com/world/"],
347
- ["https://apnews.com/"],
348
- ["https://www.bbc.com/news"]
349
  ],
350
  inputs=url_input
351
  )
352
 
353
  gr.Markdown("""
354
  ---
355
- **How it works:**
356
- 1. πŸ” Extracts content from the provided URL
357
- 2. πŸ€– Analyzes text using Sentence Transformers
358
- 3. πŸ“Š Checks for fake news patterns and indicators
359
- 4. βš–οΈ Evaluates source credibility
360
- 5. πŸ“ˆ Provides confidence score and detailed analysis
361
 
362
- **Note:** This is an AI-powered tool and should be used as a supplementary resource for fact-checking.
 
 
 
 
 
 
 
 
 
363
  """)
 
 
 
 
 
 
 
364
 
365
  if __name__ == "__main__":
366
- demo.launch(share=True)
 
 
 
 
 
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
+ from transformers import AutoTokenizer, AutoModel
5
  import torch
6
+ import torch.nn.functional as F
7
  import re
8
  import logging
9
+ import os
10
+ import numpy as np
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
 
13
  # Set up logging
14
  logging.basicConfig(level=logging.INFO)
 
16
 
17
  class FakeNewsDetector:
18
  def __init__(self):
19
+ logger.info("Loading sentence transformer model directly...")
 
 
20
 
21
+ try:
22
+ # Load model and tokenizer directly
23
+ self.model_name = "sentence-transformers/all-MiniLM-L6-v2"
24
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
25
+ self.model = AutoModel.from_pretrained(self.model_name)
26
+ logger.info("Model loaded successfully!")
27
+ except Exception as e:
28
+ logger.error(f"Error loading model: {e}")
29
+ raise
30
+
31
+ # Fake news patterns
32
  self.fake_news_patterns = [
33
  "conspiracy theory", "false claim", "misinformation", "debunked",
34
+ "hoax", "unverified", "clickbait", "fake news", "deep state",
35
  "mainstream media lies", "cover up", "they don't want you to know",
36
+ "secret truth", "hidden facts", "wake up people", "government lying",
37
+ "media conspiracy", "false flag", "planned pandemic"
38
  ]
39
 
40
  # Credible sources
41
  self.credible_sources = [
42
  'reuters.com', 'apnews.com', 'bbc.com', 'nytimes.com',
43
  'theguardian.com', 'washingtonpost.com', 'npr.org',
44
+ 'wsj.com', 'ft.com', 'bloomberg.com', 'abcnews.go.com',
45
+ 'cbsnews.com', 'nbcnews.com', 'cnn.com'
46
  ]
47
 
48
  # Fake news indicators
 
50
  "exclusive reveal", "shocking truth", "they don't want you to know",
51
  "mainstream media won't report this", "breaking secret news",
52
  "you won't believe", "this will shock you", "do your own research",
53
+ "the truth they're hiding", "wake up sheeple", "open your eyes"
54
  ]
55
 
56
  # Sensational words
57
  self.sensational_words = [
58
  'shocking', 'amazing', 'unbelievable', 'incredible', 'astounding',
59
+ 'mind-blowing', 'explosive', 'bombshell', 'earth-shattering',
60
+ 'revolutionary', 'game-changing', 'miracle'
61
  ]
62
 
63
+ def mean_pooling(self, model_output, attention_mask):
64
+ """Apply mean pooling to get sentence embeddings"""
65
+ token_embeddings = model_output[0] # First element contains all token embeddings
66
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
67
+ sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
68
+ sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
69
+ return sum_embeddings / sum_mask
70
+
71
+ def get_sentence_embedding(self, text):
72
+ """Get sentence embedding using the model"""
73
+ try:
74
+ if not text or len(text.strip()) == 0:
75
+ return None
76
+
77
+ # Tokenize sentences
78
+ encoded_input = self.tokenizer(
79
+ text,
80
+ padding=True,
81
+ truncation=True,
82
+ max_length=512,
83
+ return_tensors='pt'
84
+ )
85
+
86
+ # Compute token embeddings
87
+ with torch.no_grad():
88
+ model_output = self.model(**encoded_input)
89
+
90
+ # Perform pooling
91
+ sentence_embeddings = self.mean_pooling(model_output, encoded_input['attention_mask'])
92
+
93
+ # Normalize embeddings
94
+ sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
95
+
96
+ return sentence_embeddings
97
+
98
+ except Exception as e:
99
+ logger.error(f"Error getting sentence embedding: {e}")
100
+ return None
101
+
102
+ def extract_content(self, url: str):
103
+ """Extract content from URL"""
104
  try:
105
  headers = {
106
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
107
  }
108
 
 
109
  if not url.startswith(('http://', 'https://')):
110
  url = 'https://' + url
111
 
 
115
  soup = BeautifulSoup(response.content, 'html.parser')
116
 
117
  # Remove unwanted elements
118
+ for element in soup(["script", "style", "nav", "footer", "header", "aside"]):
119
  element.decompose()
120
 
121
  # Extract title
122
  title = soup.find('title')
123
  title_text = title.get_text().strip() if title else "No title found"
124
 
125
+ # Try multiple content selectors
126
  content_text = ""
127
  content_selectors = [
128
  'article',
 
133
  'main',
134
  '[role="main"]',
135
  '.news-content',
136
+ '.story-body',
137
+ '.content'
138
  ]
139
 
140
  for selector in content_selectors:
 
143
  content_parts = []
144
  for elem in elements:
145
  text = elem.get_text().strip()
146
+ if len(text) > 100: # Only substantial content
147
  content_parts.append(text)
148
  if content_parts:
149
  content_text = ' '.join(content_parts)
150
  break
151
 
152
+ # Fallback to body
153
  if not content_text or len(content_text) < 200:
154
  body = soup.find('body')
155
  if body:
 
165
  'url': url
166
  }
167
 
 
 
168
  except Exception as e:
169
+ logger.error(f"Content extraction error: {e}")
170
+ return {'success': False, 'error': str(e)}
171
 
172
+ def clean_text(self, text: str):
173
  """Clean and normalize text"""
174
  # Remove extra whitespace
175
  text = re.sub(r'\s+', ' ', text)
 
177
  text = re.sub(r'[^\w\s.,!?;:()-]', '', text)
178
  return text.strip()
179
 
180
+ def analyze_content(self, text: str):
181
  """Analyze text content for fake news indicators"""
182
  text_lower = text.lower()
183
 
184
+ # Count various indicators
185
  sensational_score = sum(1 for word in self.sensational_words if word in text_lower)
186
  fake_indicator_count = sum(1 for indicator in self.fake_indicators if indicator in text_lower)
187
 
188
  # Punctuation analysis
189
  exclamation_count = text.count('!')
190
  question_count = text.count('?')
191
+
192
+ # Check for all-caps words
193
  capital_words = len(re.findall(r'\b[A-Z]{3,}\b', text))
194
 
195
+ # Check for emotional language
196
+ emotional_words = ['outrageous', 'disgusting', 'horrible', 'terrible', 'awful']
197
+ emotional_count = sum(1 for word in emotional_words if word in text_lower)
198
+
199
  return {
200
  'sensational_score': sensational_score,
201
  'fake_indicator_count': fake_indicator_count,
202
  'exclamation_count': exclamation_count,
203
  'question_count': question_count,
204
  'capital_words': capital_words,
205
+ 'emotional_count': emotional_count,
206
  'text_length': len(text)
207
  }
208
 
209
+ def check_source_credibility(self, url: str):
210
  """Check if the source is known to be credible"""
211
+ url_lower = url.lower()
212
 
213
+ # Check credible sources
214
  for credible_source in self.credible_sources:
215
+ if credible_source in url_lower:
216
+ return 0.8 # High credibility
217
+
 
218
  # Penalize known unreliable domains
219
+ unreliable_domains = ['.blogspot.', '.wordpress.', '.tumblr.', 'medium.com']
220
  for domain in unreliable_domains:
221
+ if domain in url_lower:
222
+ return 0.2 # Low credibility
223
 
224
+ return 0.5 # Neutral credibility
225
 
226
+ def semantic_similarity_analysis(self, text: str):
227
  """Analyze semantic similarity with known fake news patterns"""
228
  try:
229
  if not text or len(text) < 50:
230
  return 0.0
231
 
232
+ # Get embedding for input text
233
+ text_embedding = self.get_sentence_embedding(text)
234
+ if text_embedding is None:
235
+ return 0.0
236
+
237
+ # Get embeddings for fake news patterns
238
+ pattern_embeddings = self.get_sentence_embedding(self.fake_news_patterns)
239
+ if pattern_embeddings is None:
240
+ return 0.0
241
 
242
+ # Calculate cosine similarity
243
+ similarities = F.cosine_similarity(text_embedding, pattern_embeddings)
244
  max_similarity = float(torch.max(similarities).item())
245
 
246
  return max_similarity
 
249
  logger.error(f"Semantic analysis error: {e}")
250
  return 0.0
251
 
252
+ def detect_fake_news(self, url: str):
253
  """Main fake news detection function"""
254
  logger.info(f"Analyzing URL: {url}")
255
 
 
257
  content_data = self.extract_content(url)
258
  if not content_data['success']:
259
  return {
260
+ 'status': '❌ Extraction Failed',
261
  'confidence': 0.0,
262
+ 'message': f"Could not extract content: {content_data.get('error', 'Unknown error')}",
 
263
  'title': 'Error',
264
+ 'color': 'red'
265
  }
266
 
267
  title = content_data['title']
 
270
 
271
  if len(content.strip()) < 100:
272
  return {
273
+ 'status': '⚠️ Insufficient Content',
274
  'confidence': 0.0,
275
+ 'message': 'Not enough text content found to analyze. The article may be behind a paywall or require JavaScript.',
 
276
  'title': title,
277
+ 'color': 'orange'
278
  }
279
 
280
  # Perform analyses
281
  source_credibility = self.check_source_credibility(url)
282
  content_analysis = self.analyze_content(full_text)
283
+ semantic_similarity = self.semantic_similarity_analysis(full_text)
284
 
285
  # Calculate fake news score
286
  fake_score = 0.0
 
289
  source_factor = (1 - source_credibility) * 0.25
290
  fake_score += source_factor
291
 
292
+ # Semantic similarity with fake patterns
293
  semantic_factor = semantic_similarity * 0.35
294
  fake_score += semantic_factor
295
 
 
298
  content_analysis['sensational_score'] * 0.05 +
299
  content_analysis['fake_indicator_count'] * 0.15 +
300
  min(content_analysis['exclamation_count'] * 0.02, 0.1) +
301
+ min(content_analysis['capital_words'] * 0.01, 0.05) +
302
+ min(content_analysis['emotional_count'] * 0.03, 0.05)
303
  ) * 0.4
304
  fake_score += content_factor
305
 
 
307
 
308
  # Determine result
309
  if fake_score > 0.7:
310
+ status = "🚨 Likely Fake News"
311
  color = "red"
312
  elif fake_score > 0.5:
313
+ status = "⚠️ Suspicious Content"
314
  color = "orange"
315
  elif fake_score > 0.3:
316
+ status = "πŸ€” Potentially Misleading"
317
  color = "yellow"
318
  else:
319
+ status = "βœ… Likely Credible"
320
  color = "green"
321
 
322
+ # Create detailed analysis message
323
+ message = f"""
324
+ **πŸ“Š Detailed Analysis Results:**
325
+
326
+ **Source Analysis:**
327
+ - Source Credibility Score: {source_credibility:.2f}/1.0
328
+
329
+ **Semantic Analysis:**
330
+ - Similarity to Fake News Patterns: {semantic_similarity:.2f}/1.0
331
+
332
+ **Content Analysis:**
333
+ - Sensational Language Score: {content_analysis['sensational_score']}
334
+ - Fake News Indicators Found: {content_analysis['fake_indicator_count']}
335
+ - Exclamation Marks: {content_analysis['exclamation_count']}
336
+ - ALL-CAPS Words: {content_analysis['capital_words']}
337
+ - Emotional Language: {content_analysis['emotional_count']}
338
+
339
+ **Content Preview:**
340
+ {content[:400]}...
341
+ """.strip()
342
+
343
  return {
344
+ 'status': status,
345
  'confidence': fake_score,
346
+ 'message': message,
347
+ 'title': title,
348
+ 'color': color
 
 
 
 
 
 
349
  }
350
 
351
  # Initialize detector
 
353
 
354
  def analyze_url(url):
355
  """Gradio interface function"""
356
+ if not url.strip():
357
+ return "Please enter a URL", "0%", "No analysis performed.", "No title", "gray"
358
 
359
  try:
360
  result = detector.detect_fake_news(url)
361
 
 
 
 
 
362
  confidence_percent = f"{result['confidence'] * 100:.1f}%"
363
 
364
+ return (
365
+ result['status'],
366
+ confidence_percent,
367
+ result['message'],
368
+ result['title'],
369
+ result['color']
370
+ )
 
 
 
 
 
 
 
 
 
 
 
371
 
372
  except Exception as e:
373
  logger.error(f"Analysis error: {e}")
374
+ return "❌ Analysis Error", "0%", f"An error occurred: {str(e)}", "Error", "red"
375
 
376
  # Create Gradio interface
377
+ with gr.Blocks(
378
+ theme=gr.themes.Soft(),
379
+ title="Fake News Detector",
380
+ css="""
381
+ .gradio-container {
382
+ max-width: 900px !important;
383
+ }
384
+ .result-box {
385
+ padding: 10px;
386
+ border-radius: 5px;
387
+ margin: 5px 0;
388
+ }
389
+ """
390
+ ) as demo:
391
+
392
  gr.Markdown("""
393
  # πŸ•΅οΈ Fake News Detector
394
+ **Analyze news articles for potential fake news using AI and Semantic Analysis**
395
 
396
+ *This tool uses sentence transformers to analyze content patterns and detect potential misinformation*
397
  """)
398
 
399
  with gr.Row():
400
+ with gr.Column(scale=2):
401
  url_input = gr.Textbox(
402
+ label="πŸ“° Enter News Article URL",
403
  placeholder="https://example.com/news-article",
404
+ lines=1,
405
+ max_lines=1
406
  )
407
+ analyze_btn = gr.Button("πŸ” Analyze Article", variant="primary", size="lg")
408
 
409
+ with gr.Column(scale=1):
410
+ with gr.Group():
411
+ result_status = gr.Textbox(
412
+ label="🎯 Analysis Result",
413
+ interactive=False
414
+ )
415
+ confidence_score = gr.Textbox(
416
+ label="πŸ“ˆ Confidence Score",
417
+ interactive=False
418
+ )
419
+ article_title = gr.Textbox(
420
+ label="πŸ“ Article Title",
421
+ interactive=False
422
+ )
423
 
424
+ details_output = gr.Markdown(
425
+ label="πŸ“Š Detailed Analysis"
 
 
 
 
 
 
426
  )
427
 
428
+ # Examples
429
  gr.Examples(
430
+ label="πŸ’‘ Try these examples:",
431
  examples=[
432
+ ["https://www.bbc.com/news"],
433
+ ["https://www.reuters.com/"],
434
+ ["https://apnews.com/"]
435
  ],
436
  inputs=url_input
437
  )
438
 
439
  gr.Markdown("""
440
  ---
 
 
 
 
 
 
441
 
442
+ **πŸ” How it works:**
443
+ 1. **Content Extraction**: Extracts text from the provided URL
444
+ 2. **Semantic Analysis**: Uses sentence transformers to analyze similarity with known fake news patterns
445
+ 3. **Source Verification**: Checks the domain against known credible sources
446
+ 4. **Pattern Detection**: Identifies sensational language and fake news indicators
447
+ 5. **Confidence Scoring**: Provides a comprehensive confidence score
448
+
449
+ **⚠️ Disclaimer**: This is an AI-powered educational tool. Always verify information through multiple credible sources and fact-checking organizations.
450
+
451
+ *Built with ❀️ using Transformers from Hugging Face*
452
  """)
453
+
454
+ # Set up the analysis button
455
+ analyze_btn.click(
456
+ fn=analyze_url,
457
+ inputs=url_input,
458
+ outputs=[result_status, confidence_score, details_output, article_title]
459
+ )
460
 
461
  if __name__ == "__main__":
462
+ demo.launch(
463
+ server_name="0.0.0.0",
464
+ server_port=7860,
465
+ share=False
466
+ )