DevNumb commited on
Commit
dd2c1f8
Β·
verified Β·
1 Parent(s): 244a241

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -20
app.py CHANGED
@@ -22,7 +22,9 @@ class FakeNewsDetector:
22
  "text-classification",
23
  model=MODEL,
24
  tokenizer=MODEL,
25
- device=-1 # CPU mode for free tier
 
 
26
  )
27
 
28
  logger.info("βœ… Model loaded successfully!")
@@ -115,6 +117,15 @@ class FakeNewsDetector:
115
  text = re.sub(r'[^\w\s.,!?;:()-]', '', text)
116
  return text.strip()
117
 
 
 
 
 
 
 
 
 
 
118
  def analyze_content(self, text: str):
119
  """Analyze text for fake news indicators"""
120
  text_lower = text.lower()
@@ -160,7 +171,6 @@ class FakeNewsDetector:
160
 
161
  title = content_data['title']
162
  content = content_data['content']
163
- full_text = f"{title}. {content}"
164
 
165
  if len(content.strip()) < 100:
166
  return {
@@ -170,9 +180,16 @@ class FakeNewsDetector:
170
  'title': title
171
  }
172
 
173
- # Use RoBERTa model (auto-truncates to 512 tokens)
 
 
 
 
 
 
 
174
  try:
175
- result = self.classifier(full_text)[0]
176
 
177
  label = result['label']
178
  score = result['score']
@@ -184,29 +201,29 @@ class FakeNewsDetector:
184
 
185
  except Exception as e:
186
  logger.error(f"Model error: {e}")
187
- return {
188
- 'status': '❌ Analysis Error',
189
- 'confidence': 0.0,
190
- 'message': f"Model error: {str(e)}",
191
- 'title': title
192
- }
193
 
194
  # Additional analysis
195
  source_credibility = self.check_source_credibility(url)
196
  content_analysis = self.analyze_content(full_text)
197
 
198
- # Combined score (80% model, 20% source)
199
  model_weight = score if is_fake else (1 - score)
200
- source_weight = (1 - source_credibility) * 0.2
201
- combined_score = (model_weight * 0.8) + source_weight
 
 
202
 
203
  # Determine status
204
- if is_fake and score > 0.75:
205
  status = "🚨 Likely Fake News"
206
- elif is_fake and score > 0.55:
207
  status = "⚠️ Suspicious Content"
208
- elif not is_fake and score > 0.75:
209
  status = "βœ… Likely Credible"
 
 
210
  else:
211
  status = "πŸ€” Uncertain - Verify Manually"
212
 
@@ -231,7 +248,7 @@ class FakeNewsDetector:
231
  **Combined Score: {combined_score * 100:.1f}%**
232
 
233
  **Preview:**
234
- {content[:350]}...
235
 
236
  ---
237
  **Note:** This is an AI prediction. Always verify from multiple sources.
@@ -244,6 +261,53 @@ class FakeNewsDetector:
244
  'title': title
245
  }
246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  # Initialize detector
248
  logger.info("Initializing Fake News Detector...")
249
  detector = FakeNewsDetector()
@@ -328,9 +392,10 @@ with gr.Blocks(
328
  **How it works:**
329
 
330
  1. **Extracts** article text from URL
331
- 2. **Analyzes** using RoBERTa transformer (40k+ articles trained)
332
- 3. **Checks** source credibility
333
- 4. **Provides** confidence score
 
334
 
335
  **Model:** `jy46604790/Fake-News-Bert-Detect` (RoBERTa-based)
336
 
 
22
  "text-classification",
23
  model=MODEL,
24
  tokenizer=MODEL,
25
+ device=-1, # CPU mode for free tier
26
+ max_length=512, # Explicitly set max length
27
+ truncation=True # Enable truncation
28
  )
29
 
30
  logger.info("βœ… Model loaded successfully!")
 
117
  text = re.sub(r'[^\w\s.,!?;:()-]', '', text)
118
  return text.strip()
119
 
120
+ def truncate_text(self, text: str, max_words: int = 400):
121
+ """Truncate text to maximum words for the model"""
122
+ words = text.split()
123
+ if len(words) > max_words:
124
+ truncated = ' '.join(words[:max_words])
125
+ logger.info(f"Text truncated from {len(words)} to {max_words} words")
126
+ return truncated
127
+ return text
128
+
129
  def analyze_content(self, text: str):
130
  """Analyze text for fake news indicators"""
131
  text_lower = text.lower()
 
171
 
172
  title = content_data['title']
173
  content = content_data['content']
 
174
 
175
  if len(content.strip()) < 100:
176
  return {
 
180
  'title': title
181
  }
182
 
183
+ # Prepare text for model (title + truncated content)
184
+ full_text = f"{title}. {content}"
185
+
186
+ # Truncate text to safe length for the model
187
+ truncated_text = self.truncate_text(full_text, max_words=350)
188
+ logger.info(f"Text length: {len(truncated_text)} characters")
189
+
190
+ # Use RoBERTa model with error handling
191
  try:
192
+ result = self.classifier(truncated_text)[0]
193
 
194
  label = result['label']
195
  score = result['score']
 
201
 
202
  except Exception as e:
203
  logger.error(f"Model error: {e}")
204
+ # Fallback to content analysis only
205
+ return self.fallback_analysis(title, content, url, str(e))
 
 
 
 
206
 
207
  # Additional analysis
208
  source_credibility = self.check_source_credibility(url)
209
  content_analysis = self.analyze_content(full_text)
210
 
211
+ # Combined score (80% model, 20% source and content analysis)
212
  model_weight = score if is_fake else (1 - score)
213
+ source_weight = (1 - source_credibility) * 0.15
214
+ content_weight = min(content_analysis['fake_indicator_count'] * 0.05, 0.05)
215
+
216
+ combined_score = (model_weight * 0.8) + source_weight + content_weight
217
 
218
  # Determine status
219
+ if is_fake and combined_score > 0.7:
220
  status = "🚨 Likely Fake News"
221
+ elif is_fake and combined_score > 0.5:
222
  status = "⚠️ Suspicious Content"
223
+ elif not is_fake and combined_score > 0.7:
224
  status = "βœ… Likely Credible"
225
+ elif not is_fake and combined_score > 0.5:
226
+ status = "πŸ“° Probably Real News"
227
  else:
228
  status = "πŸ€” Uncertain - Verify Manually"
229
 
 
248
  **Combined Score: {combined_score * 100:.1f}%**
249
 
250
  **Preview:**
251
+ {content[:300]}...
252
 
253
  ---
254
  **Note:** This is an AI prediction. Always verify from multiple sources.
 
261
  'title': title
262
  }
263
 
264
+ def fallback_analysis(self, title: str, content: str, url: str, error: str):
265
+ """Fallback analysis when model fails"""
266
+ source_credibility = self.check_source_credibility(url)
267
+ content_analysis = self.analyze_content(f"{title}. {content}")
268
+
269
+ # Simple heuristic based on source and content
270
+ fake_score = (
271
+ (1 - source_credibility) * 0.6 +
272
+ min(content_analysis['fake_indicator_count'] * 0.2, 0.4)
273
+ )
274
+
275
+ if fake_score > 0.6:
276
+ status = "⚠️ Suspicious (Fallback Analysis)"
277
+ elif fake_score > 0.3:
278
+ status = "πŸ€” Uncertain (Fallback Analysis)"
279
+ else:
280
+ status = "πŸ“° Probably Real (Fallback Analysis)"
281
+
282
+ message = f"""
283
+ **πŸ“Š Fallback Analysis (Model Error):**
284
+
285
+ **Model Error:** {error}
286
+
287
+ **Source Analysis:**
288
+ - Source Credibility: {source_credibility * 10:.1f}/10
289
+
290
+ **Content Indicators:**
291
+ - Fake News Keywords: {content_analysis['fake_indicator_count']}
292
+ - Exclamation Marks: {content_analysis['exclamation_count']}
293
+ - ALL-CAPS Words: {content_analysis['capital_words']}
294
+
295
+ **Fallback Score: {fake_score * 100:.1f}%**
296
+
297
+ **Preview:**
298
+ {content[:300]}...
299
+
300
+ ---
301
+ *Using fallback analysis due to model error*
302
+ """.strip()
303
+
304
+ return {
305
+ 'status': status,
306
+ 'confidence': fake_score,
307
+ 'message': message,
308
+ 'title': title
309
+ }
310
+
311
  # Initialize detector
312
  logger.info("Initializing Fake News Detector...")
313
  detector = FakeNewsDetector()
 
392
  **How it works:**
393
 
394
  1. **Extracts** article text from URL
395
+ 2. **Truncates** to model-safe length (350 words)
396
+ 3. **Analyzes** using RoBERTa transformer
397
+ 4. **Checks** source credibility and content patterns
398
+ 5. **Provides** confidence score
399
 
400
  **Model:** `jy46604790/Fake-News-Bert-Detect` (RoBERTa-based)
401