Spaces:

DevNumb
/

fakeNewsDetector

Running

App Files Files Community

DevNumb commited on Nov 19, 2025

Commit

dd2c1f8

verified ·

1 Parent(s): 244a241

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -20

app.py CHANGED Viewed

@@ -22,7 +22,9 @@ class FakeNewsDetector:
                 "text-classification",
                 model=MODEL,
                 tokenizer=MODEL,
-                device=-1  # CPU mode for free tier
             )
             logger.info("✅ Model loaded successfully!")
@@ -115,6 +117,15 @@ class FakeNewsDetector:
         text = re.sub(r'[^\w\s.,!?;:()-]', '', text)
         return text.strip()
     def analyze_content(self, text: str):
         """Analyze text for fake news indicators"""
         text_lower = text.lower()
@@ -160,7 +171,6 @@ class FakeNewsDetector:
         title = content_data['title']
         content = content_data['content']
-        full_text = f"{title}. {content}"
         if len(content.strip()) < 100:
             return {
@@ -170,9 +180,16 @@ class FakeNewsDetector:
                 'title': title
             }
-        # Use RoBERTa model (auto-truncates to 512 tokens)
         try:
-            result = self.classifier(full_text)[0]
             label = result['label']
             score = result['score']
@@ -184,29 +201,29 @@ class FakeNewsDetector:
         except Exception as e:
             logger.error(f"Model error: {e}")
-            return {
-                'status': '❌ Analysis Error',
-                'confidence': 0.0,
-                'message': f"Model error: {str(e)}",
-                'title': title
-            }
         # Additional analysis
         source_credibility = self.check_source_credibility(url)
         content_analysis = self.analyze_content(full_text)
-        # Combined score (80% model, 20% source)
         model_weight = score if is_fake else (1 - score)
-        source_weight = (1 - source_credibility) * 0.2
-        combined_score = (model_weight * 0.8) + source_weight
         # Determine status
-        if is_fake and score > 0.75:
             status = "🚨 Likely Fake News"
-        elif is_fake and score > 0.55:
             status = "⚠️ Suspicious Content"
-        elif not is_fake and score > 0.75:
             status = "✅ Likely Credible"
         else:
             status = "🤔 Uncertain - Verify Manually"
@@ -231,7 +248,7 @@ class FakeNewsDetector:
 **Combined Score: {combined_score * 100:.1f}%**
 **Preview:**
-{content[:350]}...
 ---
 **Note:** This is an AI prediction. Always verify from multiple sources.
@@ -244,6 +261,53 @@ class FakeNewsDetector:
             'title': title
         }
 # Initialize detector
 logger.info("Initializing Fake News Detector...")
 detector = FakeNewsDetector()
@@ -328,9 +392,10 @@ with gr.Blocks(
     **How it works:**
     1. **Extracts** article text from URL
-    2. **Analyzes** using RoBERTa transformer (40k+ articles trained)
-    3. **Checks** source credibility
-    4. **Provides** confidence score
     **Model:** `jy46604790/Fake-News-Bert-Detect` (RoBERTa-based)

                 "text-classification",
                 model=MODEL,
                 tokenizer=MODEL,
+                device=-1,  # CPU mode for free tier
+                max_length=512,  # Explicitly set max length
+                truncation=True  # Enable truncation
             )
             logger.info("✅ Model loaded successfully!")
         text = re.sub(r'[^\w\s.,!?;:()-]', '', text)
         return text.strip()
+    def truncate_text(self, text: str, max_words: int = 400):
+        """Truncate text to maximum words for the model"""
+        words = text.split()
+        if len(words) > max_words:
+            truncated = ' '.join(words[:max_words])
+            logger.info(f"Text truncated from {len(words)} to {max_words} words")
+            return truncated
+        return text
     def analyze_content(self, text: str):
         """Analyze text for fake news indicators"""
         text_lower = text.lower()
         title = content_data['title']
         content = content_data['content']
         if len(content.strip()) < 100:
             return {
                 'title': title
             }
+        # Prepare text for model (title + truncated content)
+        full_text = f"{title}. {content}"
+        # Truncate text to safe length for the model
+        truncated_text = self.truncate_text(full_text, max_words=350)
+        logger.info(f"Text length: {len(truncated_text)} characters")
+        # Use RoBERTa model with error handling
         try:
+            result = self.classifier(truncated_text)[0]
             label = result['label']
             score = result['score']
         except Exception as e:
             logger.error(f"Model error: {e}")
+            # Fallback to content analysis only
+            return self.fallback_analysis(title, content, url, str(e))
         # Additional analysis
         source_credibility = self.check_source_credibility(url)
         content_analysis = self.analyze_content(full_text)
+        # Combined score (80% model, 20% source and content analysis)
         model_weight = score if is_fake else (1 - score)
+        source_weight = (1 - source_credibility) * 0.15
+        content_weight = min(content_analysis['fake_indicator_count'] * 0.05, 0.05)
+        combined_score = (model_weight * 0.8) + source_weight + content_weight
         # Determine status
+        if is_fake and combined_score > 0.7:
             status = "🚨 Likely Fake News"
+        elif is_fake and combined_score > 0.5:
             status = "⚠️ Suspicious Content"
+        elif not is_fake and combined_score > 0.7:
             status = "✅ Likely Credible"
+        elif not is_fake and combined_score > 0.5:
+            status = "📰 Probably Real News"
         else:
             status = "🤔 Uncertain - Verify Manually"
 **Combined Score: {combined_score * 100:.1f}%**
 **Preview:**
+{content[:300]}...
 ---
 **Note:** This is an AI prediction. Always verify from multiple sources.
             'title': title
         }
+    def fallback_analysis(self, title: str, content: str, url: str, error: str):
+        """Fallback analysis when model fails"""
+        source_credibility = self.check_source_credibility(url)
+        content_analysis = self.analyze_content(f"{title}. {content}")
+        # Simple heuristic based on source and content
+        fake_score = (
+            (1 - source_credibility) * 0.6 +
+            min(content_analysis['fake_indicator_count'] * 0.2, 0.4)
+        )
+        if fake_score > 0.6:
+            status = "⚠️ Suspicious (Fallback Analysis)"
+        elif fake_score > 0.3:
+            status = "🤔 Uncertain (Fallback Analysis)"
+        else:
+            status = "📰 Probably Real (Fallback Analysis)"
+        message = f"""
+**📊 Fallback Analysis (Model Error):**
+**Model Error:** {error}
+**Source Analysis:**
+- Source Credibility: {source_credibility * 10:.1f}/10
+**Content Indicators:**
+- Fake News Keywords: {content_analysis['fake_indicator_count']}
+- Exclamation Marks: {content_analysis['exclamation_count']}
+- ALL-CAPS Words: {content_analysis['capital_words']}
+**Fallback Score: {fake_score * 100:.1f}%**
+**Preview:**
+{content[:300]}...
+---
+*Using fallback analysis due to model error*
+        """.strip()
+        return {
+            'status': status,
+            'confidence': fake_score,
+            'message': message,
+            'title': title
+        }
 # Initialize detector
 logger.info("Initializing Fake News Detector...")
 detector = FakeNewsDetector()
     **How it works:**
     1. **Extracts** article text from URL
+    2. **Truncates** to model-safe length (350 words)
+    3. **Analyzes** using RoBERTa transformer
+    4. **Checks** source credibility and content patterns
+    5. **Provides** confidence score
     **Model:** `jy46604790/Fake-News-Bert-Detect` (RoBERTa-based)