Spaces:

chirag1121
/

Fake_News_Classifier_Model

Sleeping

App Files Files

chirag1121 commited on Apr 12

Commit

c4e7a27

verified ·

1 Parent(s): 164d559

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -79

app.py CHANGED Viewed

@@ -24,7 +24,6 @@ MODEL_NAME = "jy46604790/Fake-News-Bert-Detect"
 # Source credibility database
 SOURCE_CREDIBILITY = {
-    # High credibility (score: 0.9–1.0)
     "bbc.com": 0.97, "bbc.co.uk": 0.97,
     "reuters.com": 0.96, "apnews.com": 0.95,
     "theguardian.com": 0.93, "nytimes.com": 0.92,
@@ -35,12 +34,10 @@ SOURCE_CREDIBILITY = {
     "cdc.gov": 0.97, "gov.uk": 0.94,
     "thehindu.com": 0.88, "ndtv.com": 0.82,
     "hindustantimes.com": 0.80, "timesofindia.com": 0.79,
-    # Medium credibility (0.5–0.8)
     "cnn.com": 0.78, "foxnews.com": 0.65,
     "huffpost.com": 0.70, "buzzfeed.com": 0.62,
     "vice.com": 0.68, "vox.com": 0.74,
     "medium.com": 0.52, "substack.com": 0.50,
-    # Low credibility (< 0.5) — examples of known misinformation sites
     "infowars.com": 0.05, "naturalnews.com": 0.08,
     "beforeitsnews.com": 0.06, "worldnewsdailyreport.com": 0.04,
     "empirenews.net": 0.04, "theonion.com": 0.10,
@@ -80,7 +77,6 @@ def classify_text(text, tokenizer, model):
         outputs = model(**inputs)
     probs = torch.softmax(outputs.logits, dim=1).squeeze().numpy()
-    # Model labels: 0 = FAKE, 1 = REAL (adjust if needed for your model)
     labels = model.config.id2label
     fake_idx = next((i for i, l in labels.items() if "fake" in l.lower() or "0" == str(i)), 0)
     real_idx = 1 - fake_idx
@@ -103,8 +99,7 @@ def get_source_credibility(url_or_domain):
     if domain in SOURCE_CREDIBILITY:
         score = SOURCE_CREDIBILITY[domain]
     else:
-        # Heuristics for unknown sources
-        score = 0.45  # default unknown
         if domain.endswith(".gov") or domain.endswith(".edu"):
             score = 0.90
         elif domain.endswith(".org"):
@@ -249,7 +244,6 @@ html, body, [class*="css"] {
 }
 .stApp { background: #050a14; }
-/* Hero banner */
 .hero {
     background: linear-gradient(135deg, #0f172a 0%, #1a0a2e 50%, #0f172a 100%);
     border: 1px solid #1e293b;
@@ -275,7 +269,6 @@ html, body, [class*="css"] {
 }
 .hero p { color: #94a3b8; font-size: 1.05rem; margin-top: 0.5rem; margin-bottom: 0; }
-/* Cards */
 .card {
     background: #0f172a;
     border: 1px solid #1e293b;
@@ -303,7 +296,6 @@ html, body, [class*="css"] {
 .fake-label { color: #ef4444; }
 .real-label { color: #22c55e; }
-/* Indicator pills */
 .indicator-pill {
     display: inline-block;
     background: #1e1030;
@@ -316,7 +308,6 @@ html, body, [class*="css"] {
     font-family: 'JetBrains Mono', monospace;
 }
-/* News cards */
 .news-card {
     background: #0f172a;
     border: 1px solid #1e293b;
@@ -329,13 +320,11 @@ html, body, [class*="css"] {
 .news-card h4 { color: #e2e8f0; font-size: 0.95rem; margin: 0 0 0.4rem 0; }
 .news-card p { color: #64748b; font-size: 0.82rem; margin: 0; }
-/* Sidebar */
 section[data-testid="stSidebar"] {
     background: #080d1a;
     border-right: 1px solid #1e293b;
 }
-/* Inputs */
 .stTextArea textarea, .stTextInput input {
     background: #0f172a !important;
     border: 1px solid #334155 !important;
@@ -357,13 +346,11 @@ section[data-testid="stSidebar"] {
 }
 .stButton > button:hover { opacity: 0.85 !important; }
-/* Section headers */
 .section-title {
     font-size: 0.75rem; font-weight: 700; letter-spacing: 3px;
     color: #6366f1; text-transform: uppercase; margin-bottom: 0.75rem;
 }
-/* Metric boxes */
 .metric-box {
     background: #0f172a;
     border: 1px solid #1e293b;
@@ -378,7 +365,7 @@ div[data-testid="stMetricValue"] { color: #818cf8 !important; font-family: 'Syne
 </style>
 """, unsafe_allow_html=True)
-# ── Sidebar ─────────────────────────────────
 with st.sidebar:
     st.markdown("## 🔍 FakeScope")
     st.markdown("---")
@@ -386,7 +373,7 @@ with st.sidebar:
     st.markdown("---")
     st.markdown("**About the Model**")
     st.caption(f"`{MODEL_NAME}`")
-    st.caption("Fine-tuned RoBERTa — no local training required.")
     st.markdown("---")
     st.markdown("**Credibility DB**")
     st.caption(f"{len(SOURCE_CREDIBILITY)} known sources indexed.")
@@ -441,27 +428,34 @@ if mode == "📝 Paste Article / Text":
             vcol1, vcol2, vcol3 = st.columns([1, 2, 1])
             with vcol2:
                 if prediction == "FAKE":
-    low_conf = confidence < 0.75
-    warning = "<div style='color:#fbbf24;font-size:0.85rem;margin-top:0.5rem'>⚠ Low confidence — verify manually before concluding</div>" if low_conf else ""
-    st.markdown(
-        f"""
-        <div class="verdict-fake">
-            <div class="verdict-label fake-label">⚠ FAKE NEWS</div>
-            <div style="color:#94a3b8;margin-top:0.4rem;font-size:0.95rem;">
-                Confidence: <b style="color:#f8fafc">{confidence*100:.1f}%</b>
-            </div>
-            {warning}
-        </div>""",
-        unsafe_allow_html=True,
-    )
                 else:
-                    st.markdown(f"""
-                    <div class="verdict-real">
-                        <div class="verdict-label real-label">✅ LIKELY REAL</div>
-                        <div style="color:#94a3b8;margin-top:0.4rem;font-size:0.95rem;">
-                            Confidence: <b style="color:#f8fafc">{confidence*100:.1f}%</b>
-                        </div>
-                    </div>""", unsafe_allow_html=True)
             st.markdown("<br>", unsafe_allow_html=True)
@@ -478,11 +472,16 @@ if mode == "📝 Paste Article / Text":
             # ── Source Credibility ───────────────────
             st.markdown('<div class="section-title">Source Credibility Score</div>', unsafe_allow_html=True)
-            st.markdown(f"""
-            <div class="card">
-              <span style="font-size:1.1rem">{cred_label}</span>
-              <span style="color:#64748b;font-family:monospace;font-size:0.85rem;margin-left:1rem">{domain or 'Unknown domain'}</span>
-            </div>""", unsafe_allow_html=True)
             st.plotly_chart(credibility_bar_chart(domain or "Unknown", cred_score),
                             use_container_width=True, config={"displayModeBar": False})
@@ -503,9 +502,11 @@ if mode == "📝 Paste Article / Text":
                     if fake_prob > 0.85:
                         reasons.append("Very high BERT fake-probability score (>85%)")
                     if cred_score < 0.5:
-                        reasons.append(f"Source '{domain}' has very low credibility ({cred_score*100:.0f}/100)")
                     if indicators:
-                        reasons.append(f"{len(indicators)} sensational/clickbait linguistic patterns found")
                     if reasons:
                         st.markdown("**Key reasons for FAKE classification:**")
                         for r in reasons:
@@ -515,17 +516,25 @@ if mode == "📝 Paste Article / Text":
             st.markdown('<div class="section-title">Analytics Summary</div>', unsafe_allow_html=True)
             m1, m2, m3, m4 = st.columns(4)
             with m1:
-                st.markdown(f'<div class="metric-box"><div class="val">{fake_prob*100:.0f}%</div><div class="lbl">FAKE PROB</div></div>',
-                            unsafe_allow_html=True)
             with m2:
-                st.markdown(f'<div class="metric-box"><div class="val">{real_prob*100:.0f}%</div><div class="lbl">REAL PROB</div></div>',
-                            unsafe_allow_html=True)
             with m3:
-                st.markdown(f'<div class="metric-box"><div class="val">{cred_score*100:.0f}</div><div class="lbl">SOURCE SCORE</div></div>',
-                            unsafe_allow_html=True)
             with m4:
-                st.markdown(f'<div class="metric-box"><div class="val">{len(indicators)}</div><div class="lbl">RED FLAGS</div></div>',
-                            unsafe_allow_html=True)
 # ════════════════════════════════════════════
 #  MODE 2 — Live News Feed
@@ -578,7 +587,6 @@ else:
                     progress.progress((i + 1) / len(articles))
                 progress.empty()
-                # Summary metrics
                 fake_count = sum(1 for r in results if r["prediction"] == "FAKE")
                 real_count = len(results) - fake_count
                 avg_conf = np.mean([r["confidence"] for r in results]) * 100
@@ -588,19 +596,26 @@ else:
                             unsafe_allow_html=True)
                 sm1, sm2, sm3, sm4 = st.columns(4)
                 with sm1:
-                    st.markdown(f'<div class="metric-box"><div class="val">{len(results)}</div><div class="lbl">ARTICLES</div></div>',
-                                unsafe_allow_html=True)
                 with sm2:
-                    st.markdown(f'<div class="metric-box"><div class="val" style="color:#ef4444">{fake_count}</div><div class="lbl">FLAGGED FAKE</div></div>',
-                                unsafe_allow_html=True)
                 with sm3:
-                    st.markdown(f'<div class="metric-box"><div class="val" style="color:#22c55e">{real_count}</div><div class="lbl">LIKELY REAL</div></div>',
-                                unsafe_allow_html=True)
                 with sm4:
-                    st.markdown(f'<div class="metric-box"><div class="val">{avg_conf:.0f}%</div><div class="lbl">AVG CONFIDENCE</div></div>',
-                                unsafe_allow_html=True)
-                # Batch chart
                 st.markdown("<br>", unsafe_allow_html=True)
                 titles_short = [r["title"][:45] + "…" if len(r["title"]) > 45 else r["title"]
                                 for r in results]
@@ -627,7 +642,6 @@ else:
                 st.plotly_chart(fig_batch, use_container_width=True,
                                 config={"displayModeBar": False})
-                # Individual cards
                 st.markdown('<div class="section-title">Individual Article Results</div>',
                             unsafe_allow_html=True)
                 for r in results:
@@ -637,23 +651,26 @@ else:
                         f'<span class="indicator-pill">{ind}</span>'
                         for ind in r["indicators"][:2]
                     ) if r["indicators"] else ""
-                    st.markdown(f"""
-                    <div class="news-card">
-                      <div style="display:flex;justify-content:space-between;align-items:flex-start">
-                        <h4>{r['title']}</h4>
-                        <span style="background:{badge_color}22;color:{badge_color};
-                          border:1px solid {badge_color};border-radius:99px;
-                          padding:0.2rem 0.8rem;font-size:0.8rem;font-weight:700;
-                          white-space:nowrap;margin-left:1rem">{badge_text}</span>
-                      </div>
-                      <p>📰 {r['source']} &nbsp;·&nbsp; Confidence: {r['confidence']*100:.1f}%
-                         &nbsp;·&nbsp; Source credibility: {r['cred_label']}</p>
-                      {ind_html}
-                      <p style="margin-top:0.5rem"><a href="{r['url']}" target="_blank"
-                         style="color:#6366f1;font-size:0.8rem">Read original →</a></p>
-                    </div>""", unsafe_allow_html=True)
-# ── Footer ──────────────────────────────────
 st.markdown("---")
 st.markdown(
     '<p style="text-align:center;color:#334155;font-size:0.8rem">'

 # Source credibility database
 SOURCE_CREDIBILITY = {
     "bbc.com": 0.97, "bbc.co.uk": 0.97,
     "reuters.com": 0.96, "apnews.com": 0.95,
     "theguardian.com": 0.93, "nytimes.com": 0.92,
     "cdc.gov": 0.97, "gov.uk": 0.94,
     "thehindu.com": 0.88, "ndtv.com": 0.82,
     "hindustantimes.com": 0.80, "timesofindia.com": 0.79,
     "cnn.com": 0.78, "foxnews.com": 0.65,
     "huffpost.com": 0.70, "buzzfeed.com": 0.62,
     "vice.com": 0.68, "vox.com": 0.74,
     "medium.com": 0.52, "substack.com": 0.50,
     "infowars.com": 0.05, "naturalnews.com": 0.08,
     "beforeitsnews.com": 0.06, "worldnewsdailyreport.com": 0.04,
     "empirenews.net": 0.04, "theonion.com": 0.10,
         outputs = model(**inputs)
     probs = torch.softmax(outputs.logits, dim=1).squeeze().numpy()
     labels = model.config.id2label
     fake_idx = next((i for i, l in labels.items() if "fake" in l.lower() or "0" == str(i)), 0)
     real_idx = 1 - fake_idx
     if domain in SOURCE_CREDIBILITY:
         score = SOURCE_CREDIBILITY[domain]
     else:
+        score = 0.45
         if domain.endswith(".gov") or domain.endswith(".edu"):
             score = 0.90
         elif domain.endswith(".org"):
 }
 .stApp { background: #050a14; }
 .hero {
     background: linear-gradient(135deg, #0f172a 0%, #1a0a2e 50%, #0f172a 100%);
     border: 1px solid #1e293b;
 }
 .hero p { color: #94a3b8; font-size: 1.05rem; margin-top: 0.5rem; margin-bottom: 0; }
 .card {
     background: #0f172a;
     border: 1px solid #1e293b;
 .fake-label { color: #ef4444; }
 .real-label { color: #22c55e; }
 .indicator-pill {
     display: inline-block;
     background: #1e1030;
     font-family: 'JetBrains Mono', monospace;
 }
 .news-card {
     background: #0f172a;
     border: 1px solid #1e293b;
 .news-card h4 { color: #e2e8f0; font-size: 0.95rem; margin: 0 0 0.4rem 0; }
 .news-card p { color: #64748b; font-size: 0.82rem; margin: 0; }
 section[data-testid="stSidebar"] {
     background: #080d1a;
     border-right: 1px solid #1e293b;
 }
 .stTextArea textarea, .stTextInput input {
     background: #0f172a !important;
     border: 1px solid #334155 !important;
 }
 .stButton > button:hover { opacity: 0.85 !important; }
 .section-title {
     font-size: 0.75rem; font-weight: 700; letter-spacing: 3px;
     color: #6366f1; text-transform: uppercase; margin-bottom: 0.75rem;
 }
 .metric-box {
     background: #0f172a;
     border: 1px solid #1e293b;
 </style>
 """, unsafe_allow_html=True)
+# ── Sidebar ──────────────────────────────────
 with st.sidebar:
     st.markdown("## 🔍 FakeScope")
     st.markdown("---")
     st.markdown("---")
     st.markdown("**About the Model**")
     st.caption(f"`{MODEL_NAME}`")
+    st.caption("Fine-tuned BERT — no local training required.")
     st.markdown("---")
     st.markdown("**Credibility DB**")
     st.caption(f"{len(SOURCE_CREDIBILITY)} known sources indexed.")
             vcol1, vcol2, vcol3 = st.columns([1, 2, 1])
             with vcol2:
                 if prediction == "FAKE":
+                    low_conf = confidence < 0.75
+                    warning = (
+                        "<div style='color:#fbbf24;font-size:0.85rem;margin-top:0.5rem'>"
+                        "⚠ Low confidence — verify manually before concluding</div>"
+                        if low_conf else ""
+                    )
+                    st.markdown(
+                        f"""
+                        <div class="verdict-fake">
+                            <div class="verdict-label fake-label">⚠ FAKE NEWS</div>
+                            <div style="color:#94a3b8;margin-top:0.4rem;font-size:0.95rem;">
+                                Confidence: <b style="color:#f8fafc">{confidence*100:.1f}%</b>
+                            </div>
+                            {warning}
+                        </div>""",
+                        unsafe_allow_html=True,
+                    )
                 else:
+                    st.markdown(
+                        f"""
+                        <div class="verdict-real">
+                            <div class="verdict-label real-label">✅ LIKELY REAL</div>
+                            <div style="color:#94a3b8;margin-top:0.4rem;font-size:0.95rem;">
+                                Confidence: <b style="color:#f8fafc">{confidence*100:.1f}%</b>
+                            </div>
+                        </div>""",
+                        unsafe_allow_html=True,
+                    )
             st.markdown("<br>", unsafe_allow_html=True)
             # ── Source Credibility ───────────────────
             st.markdown('<div class="section-title">Source Credibility Score</div>', unsafe_allow_html=True)
+            st.markdown(
+                f"""
+                <div class="card">
+                  <span style="font-size:1.1rem">{cred_label}</span>
+                  <span style="color:#64748b;font-family:monospace;font-size:0.85rem;margin-left:1rem">
+                    {domain or 'Unknown domain'}
+                  </span>
+                </div>""",
+                unsafe_allow_html=True,
+            )
             st.plotly_chart(credibility_bar_chart(domain or "Unknown", cred_score),
                             use_container_width=True, config={"displayModeBar": False})
                     if fake_prob > 0.85:
                         reasons.append("Very high BERT fake-probability score (>85%)")
                     if cred_score < 0.5:
+                        reasons.append(
+                            f"Source '{domain}' has very low credibility ({cred_score*100:.0f}/100)")
                     if indicators:
+                        reasons.append(
+                            f"{len(indicators)} sensational/clickbait linguistic patterns found")
                     if reasons:
                         st.markdown("**Key reasons for FAKE classification:**")
                         for r in reasons:
             st.markdown('<div class="section-title">Analytics Summary</div>', unsafe_allow_html=True)
             m1, m2, m3, m4 = st.columns(4)
             with m1:
+                st.markdown(
+                    f'<div class="metric-box"><div class="val">{fake_prob*100:.0f}%</div>'
+                    f'<div class="lbl">FAKE PROB</div></div>',
+                    unsafe_allow_html=True)
             with m2:
+                st.markdown(
+                    f'<div class="metric-box"><div class="val">{real_prob*100:.0f}%</div>'
+                    f'<div class="lbl">REAL PROB</div></div>',
+                    unsafe_allow_html=True)
             with m3:
+                st.markdown(
+                    f'<div class="metric-box"><div class="val">{cred_score*100:.0f}</div>'
+                    f'<div class="lbl">SOURCE SCORE</div></div>',
+                    unsafe_allow_html=True)
             with m4:
+                st.markdown(
+                    f'<div class="metric-box"><div class="val">{len(indicators)}</div>'
+                    f'<div class="lbl">RED FLAGS</div></div>',
+                    unsafe_allow_html=True)
 # ════════════════════════════════════════════
 #  MODE 2 — Live News Feed
                     progress.progress((i + 1) / len(articles))
                 progress.empty()
                 fake_count = sum(1 for r in results if r["prediction"] == "FAKE")
                 real_count = len(results) - fake_count
                 avg_conf = np.mean([r["confidence"] for r in results]) * 100
                             unsafe_allow_html=True)
                 sm1, sm2, sm3, sm4 = st.columns(4)
                 with sm1:
+                    st.markdown(
+                        f'<div class="metric-box"><div class="val">{len(results)}</div>'
+                        f'<div class="lbl">ARTICLES</div></div>',
+                        unsafe_allow_html=True)
                 with sm2:
+                    st.markdown(
+                        f'<div class="metric-box"><div class="val" style="color:#ef4444">{fake_count}</div>'
+                        f'<div class="lbl">FLAGGED FAKE</div></div>',
+                        unsafe_allow_html=True)
                 with sm3:
+                    st.markdown(
+                        f'<div class="metric-box"><div class="val" style="color:#22c55e">{real_count}</div>'
+                        f'<div class="lbl">LIKELY REAL</div></div>',
+                        unsafe_allow_html=True)
                 with sm4:
+                    st.markdown(
+                        f'<div class="metric-box"><div class="val">{avg_conf:.0f}%</div>'
+                        f'<div class="lbl">AVG CONFIDENCE</div></div>',
+                        unsafe_allow_html=True)
                 st.markdown("<br>", unsafe_allow_html=True)
                 titles_short = [r["title"][:45] + "…" if len(r["title"]) > 45 else r["title"]
                                 for r in results]
                 st.plotly_chart(fig_batch, use_container_width=True,
                                 config={"displayModeBar": False})
                 st.markdown('<div class="section-title">Individual Article Results</div>',
                             unsafe_allow_html=True)
                 for r in results:
                         f'<span class="indicator-pill">{ind}</span>'
                         for ind in r["indicators"][:2]
                     ) if r["indicators"] else ""
+                    st.markdown(
+                        f"""
+                        <div class="news-card">
+                          <div style="display:flex;justify-content:space-between;align-items:flex-start">
+                            <h4>{r['title']}</h4>
+                            <span style="background:{badge_color}22;color:{badge_color};
+                              border:1px solid {badge_color};border-radius:99px;
+                              padding:0.2rem 0.8rem;font-size:0.8rem;font-weight:700;
+                              white-space:nowrap;margin-left:1rem">{badge_text}</span>
+                          </div>
+                          <p>📰 {r['source']} &nbsp;·&nbsp; Confidence: {r['confidence']*100:.1f}%
+                             &nbsp;·&nbsp; Source credibility: {r['cred_label']}</p>
+                          {ind_html}
+                          <p style="margin-top:0.5rem"><a href="{r['url']}" target="_blank"
+                             style="color:#6366f1;font-size:0.8rem">Read original →</a></p>
+                        </div>""",
+                        unsafe_allow_html=True,
+                    )
+# ── Footer ───────────────────────────────────
 st.markdown("---")
 st.markdown(
     '<p style="text-align:center;color:#334155;font-size:0.8rem">'