Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,7 +22,9 @@ class FakeNewsDetector:
|
|
| 22 |
"text-classification",
|
| 23 |
model=MODEL,
|
| 24 |
tokenizer=MODEL,
|
| 25 |
-
device=-1 # CPU mode for free tier
|
|
|
|
|
|
|
| 26 |
)
|
| 27 |
|
| 28 |
logger.info("β
Model loaded successfully!")
|
|
@@ -115,6 +117,15 @@ class FakeNewsDetector:
|
|
| 115 |
text = re.sub(r'[^\w\s.,!?;:()-]', '', text)
|
| 116 |
return text.strip()
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
def analyze_content(self, text: str):
|
| 119 |
"""Analyze text for fake news indicators"""
|
| 120 |
text_lower = text.lower()
|
|
@@ -160,7 +171,6 @@ class FakeNewsDetector:
|
|
| 160 |
|
| 161 |
title = content_data['title']
|
| 162 |
content = content_data['content']
|
| 163 |
-
full_text = f"{title}. {content}"
|
| 164 |
|
| 165 |
if len(content.strip()) < 100:
|
| 166 |
return {
|
|
@@ -170,9 +180,16 @@ class FakeNewsDetector:
|
|
| 170 |
'title': title
|
| 171 |
}
|
| 172 |
|
| 173 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
try:
|
| 175 |
-
result = self.classifier(
|
| 176 |
|
| 177 |
label = result['label']
|
| 178 |
score = result['score']
|
|
@@ -184,29 +201,29 @@ class FakeNewsDetector:
|
|
| 184 |
|
| 185 |
except Exception as e:
|
| 186 |
logger.error(f"Model error: {e}")
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
'confidence': 0.0,
|
| 190 |
-
'message': f"Model error: {str(e)}",
|
| 191 |
-
'title': title
|
| 192 |
-
}
|
| 193 |
|
| 194 |
# Additional analysis
|
| 195 |
source_credibility = self.check_source_credibility(url)
|
| 196 |
content_analysis = self.analyze_content(full_text)
|
| 197 |
|
| 198 |
-
# Combined score (80% model, 20% source)
|
| 199 |
model_weight = score if is_fake else (1 - score)
|
| 200 |
-
source_weight = (1 - source_credibility) * 0.
|
| 201 |
-
|
|
|
|
|
|
|
| 202 |
|
| 203 |
# Determine status
|
| 204 |
-
if is_fake and
|
| 205 |
status = "π¨ Likely Fake News"
|
| 206 |
-
elif is_fake and
|
| 207 |
status = "β οΈ Suspicious Content"
|
| 208 |
-
elif not is_fake and
|
| 209 |
status = "β
Likely Credible"
|
|
|
|
|
|
|
| 210 |
else:
|
| 211 |
status = "π€ Uncertain - Verify Manually"
|
| 212 |
|
|
@@ -231,7 +248,7 @@ class FakeNewsDetector:
|
|
| 231 |
**Combined Score: {combined_score * 100:.1f}%**
|
| 232 |
|
| 233 |
**Preview:**
|
| 234 |
-
{content[:
|
| 235 |
|
| 236 |
---
|
| 237 |
**Note:** This is an AI prediction. Always verify from multiple sources.
|
|
@@ -244,6 +261,53 @@ class FakeNewsDetector:
|
|
| 244 |
'title': title
|
| 245 |
}
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
# Initialize detector
|
| 248 |
logger.info("Initializing Fake News Detector...")
|
| 249 |
detector = FakeNewsDetector()
|
|
@@ -328,9 +392,10 @@ with gr.Blocks(
|
|
| 328 |
**How it works:**
|
| 329 |
|
| 330 |
1. **Extracts** article text from URL
|
| 331 |
-
2. **
|
| 332 |
-
3. **
|
| 333 |
-
4. **
|
|
|
|
| 334 |
|
| 335 |
**Model:** `jy46604790/Fake-News-Bert-Detect` (RoBERTa-based)
|
| 336 |
|
|
|
|
| 22 |
"text-classification",
|
| 23 |
model=MODEL,
|
| 24 |
tokenizer=MODEL,
|
| 25 |
+
device=-1, # CPU mode for free tier
|
| 26 |
+
max_length=512, # Explicitly set max length
|
| 27 |
+
truncation=True # Enable truncation
|
| 28 |
)
|
| 29 |
|
| 30 |
logger.info("β
Model loaded successfully!")
|
|
|
|
| 117 |
text = re.sub(r'[^\w\s.,!?;:()-]', '', text)
|
| 118 |
return text.strip()
|
| 119 |
|
| 120 |
+
def truncate_text(self, text: str, max_words: int = 400):
|
| 121 |
+
"""Truncate text to maximum words for the model"""
|
| 122 |
+
words = text.split()
|
| 123 |
+
if len(words) > max_words:
|
| 124 |
+
truncated = ' '.join(words[:max_words])
|
| 125 |
+
logger.info(f"Text truncated from {len(words)} to {max_words} words")
|
| 126 |
+
return truncated
|
| 127 |
+
return text
|
| 128 |
+
|
| 129 |
def analyze_content(self, text: str):
|
| 130 |
"""Analyze text for fake news indicators"""
|
| 131 |
text_lower = text.lower()
|
|
|
|
| 171 |
|
| 172 |
title = content_data['title']
|
| 173 |
content = content_data['content']
|
|
|
|
| 174 |
|
| 175 |
if len(content.strip()) < 100:
|
| 176 |
return {
|
|
|
|
| 180 |
'title': title
|
| 181 |
}
|
| 182 |
|
| 183 |
+
# Prepare text for model (title + truncated content)
|
| 184 |
+
full_text = f"{title}. {content}"
|
| 185 |
+
|
| 186 |
+
# Truncate text to safe length for the model
|
| 187 |
+
truncated_text = self.truncate_text(full_text, max_words=350)
|
| 188 |
+
logger.info(f"Text length: {len(truncated_text)} characters")
|
| 189 |
+
|
| 190 |
+
# Use RoBERTa model with error handling
|
| 191 |
try:
|
| 192 |
+
result = self.classifier(truncated_text)[0]
|
| 193 |
|
| 194 |
label = result['label']
|
| 195 |
score = result['score']
|
|
|
|
| 201 |
|
| 202 |
except Exception as e:
|
| 203 |
logger.error(f"Model error: {e}")
|
| 204 |
+
# Fallback to content analysis only
|
| 205 |
+
return self.fallback_analysis(title, content, url, str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
# Additional analysis
|
| 208 |
source_credibility = self.check_source_credibility(url)
|
| 209 |
content_analysis = self.analyze_content(full_text)
|
| 210 |
|
| 211 |
+
# Combined score (80% model, 20% source and content analysis)
|
| 212 |
model_weight = score if is_fake else (1 - score)
|
| 213 |
+
source_weight = (1 - source_credibility) * 0.15
|
| 214 |
+
content_weight = min(content_analysis['fake_indicator_count'] * 0.05, 0.05)
|
| 215 |
+
|
| 216 |
+
combined_score = (model_weight * 0.8) + source_weight + content_weight
|
| 217 |
|
| 218 |
# Determine status
|
| 219 |
+
if is_fake and combined_score > 0.7:
|
| 220 |
status = "π¨ Likely Fake News"
|
| 221 |
+
elif is_fake and combined_score > 0.5:
|
| 222 |
status = "β οΈ Suspicious Content"
|
| 223 |
+
elif not is_fake and combined_score > 0.7:
|
| 224 |
status = "β
Likely Credible"
|
| 225 |
+
elif not is_fake and combined_score > 0.5:
|
| 226 |
+
status = "π° Probably Real News"
|
| 227 |
else:
|
| 228 |
status = "π€ Uncertain - Verify Manually"
|
| 229 |
|
|
|
|
| 248 |
**Combined Score: {combined_score * 100:.1f}%**
|
| 249 |
|
| 250 |
**Preview:**
|
| 251 |
+
{content[:300]}...
|
| 252 |
|
| 253 |
---
|
| 254 |
**Note:** This is an AI prediction. Always verify from multiple sources.
|
|
|
|
| 261 |
'title': title
|
| 262 |
}
|
| 263 |
|
| 264 |
+
def fallback_analysis(self, title: str, content: str, url: str, error: str):
|
| 265 |
+
"""Fallback analysis when model fails"""
|
| 266 |
+
source_credibility = self.check_source_credibility(url)
|
| 267 |
+
content_analysis = self.analyze_content(f"{title}. {content}")
|
| 268 |
+
|
| 269 |
+
# Simple heuristic based on source and content
|
| 270 |
+
fake_score = (
|
| 271 |
+
(1 - source_credibility) * 0.6 +
|
| 272 |
+
min(content_analysis['fake_indicator_count'] * 0.2, 0.4)
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
if fake_score > 0.6:
|
| 276 |
+
status = "β οΈ Suspicious (Fallback Analysis)"
|
| 277 |
+
elif fake_score > 0.3:
|
| 278 |
+
status = "π€ Uncertain (Fallback Analysis)"
|
| 279 |
+
else:
|
| 280 |
+
status = "π° Probably Real (Fallback Analysis)"
|
| 281 |
+
|
| 282 |
+
message = f"""
|
| 283 |
+
**π Fallback Analysis (Model Error):**
|
| 284 |
+
|
| 285 |
+
**Model Error:** {error}
|
| 286 |
+
|
| 287 |
+
**Source Analysis:**
|
| 288 |
+
- Source Credibility: {source_credibility * 10:.1f}/10
|
| 289 |
+
|
| 290 |
+
**Content Indicators:**
|
| 291 |
+
- Fake News Keywords: {content_analysis['fake_indicator_count']}
|
| 292 |
+
- Exclamation Marks: {content_analysis['exclamation_count']}
|
| 293 |
+
- ALL-CAPS Words: {content_analysis['capital_words']}
|
| 294 |
+
|
| 295 |
+
**Fallback Score: {fake_score * 100:.1f}%**
|
| 296 |
+
|
| 297 |
+
**Preview:**
|
| 298 |
+
{content[:300]}...
|
| 299 |
+
|
| 300 |
+
---
|
| 301 |
+
*Using fallback analysis due to model error*
|
| 302 |
+
""".strip()
|
| 303 |
+
|
| 304 |
+
return {
|
| 305 |
+
'status': status,
|
| 306 |
+
'confidence': fake_score,
|
| 307 |
+
'message': message,
|
| 308 |
+
'title': title
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
# Initialize detector
|
| 312 |
logger.info("Initializing Fake News Detector...")
|
| 313 |
detector = FakeNewsDetector()
|
|
|
|
| 392 |
**How it works:**
|
| 393 |
|
| 394 |
1. **Extracts** article text from URL
|
| 395 |
+
2. **Truncates** to model-safe length (350 words)
|
| 396 |
+
3. **Analyzes** using RoBERTa transformer
|
| 397 |
+
4. **Checks** source credibility and content patterns
|
| 398 |
+
5. **Provides** confidence score
|
| 399 |
|
| 400 |
**Model:** `jy46604790/Fake-News-Bert-Detect` (RoBERTa-based)
|
| 401 |
|