import streamlit as st import torch import numpy as np import requests import time import re from PIL import Image from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, pipeline, ) from huggingface_hub import hf_hub_download import tensorflow as tf import plotly.graph_objects as go import plotly.express as px # ───────────────────────────────────────────── # PAGE CONFIG # ───────────────────────────────────────────── st.set_page_config( page_title="DeepTrace AI", page_icon="🔬", layout="wide", initial_sidebar_state="collapsed", ) # ───────────────────────────────────────────── # GLOBAL CSS (dark cyber-forensics aesthetic) # ───────────────────────────────────────────── st.markdown("""
""", unsafe_allow_html=True) # ───────────────────────────────────────────── # MODEL LOADERS # ───────────────────────────────────────────── @st.cache_resource(show_spinner=False) def load_text_model(): model_name = "hamzab/roberta-fake-news-classification" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) model.eval() return tokenizer, model # @st.cache_resource(show_spinner=False) # def load_image_model(): # import os # import keras # Keras 3 explicit import # filenames_to_try = [ # "image_detector_v2.keras", # "image_detector_v2.h5", # ] # path = None # for fname in filenames_to_try: # try: # path = hf_hub_download( # repo_id="syeda-Rija20/image-detector", # filename=fname # ) # break # except Exception: # continue # if path is None: # raise RuntimeError( # "Could not download image model from HuggingFace Hub. " # "Check that 'syeda-Rija20/image-detector' is public and the file exists." # ) # # Try keras 3 native load first, then tf.keras fallback # try: # model = keras.saving.load_model(path, compile=False) # except Exception: # try: # model = tf.keras.models.load_model(path, compile=False) # except Exception as e: # raise RuntimeError(f"Failed to load image model: {e}") @st.cache_resource(show_spinner=False) def load_image_model(): import keras import json config_path = hf_hub_download( repo_id="syeda-Rija20/image-detector", filename="model_config.json" ) weights_path = hf_hub_download( repo_id="syeda-Rija20/image-detector", filename="image_detector_clean.weights.h5" ) with open(config_path) as f: config = f.read() model = keras.models.model_from_json(config) model.load_weights(weights_path) return model # ───────────────────────────────────────────── # PREDICTION FUNCTIONS # ───────────────────────────────────────────── CLICKBAIT_WORDS = [ "SHOCKING", "BREAKING", "EXPOSED", "YOU WON'T BELIEVE", "UNBELIEVABLE", "MUST SEE", "URGENT", "SECRET", "LEAKED", "BANNED", "CENSORED", "THEY DON'T WANT", "EXCLUSIVE" ] FEAR_WORDS = [ "danger", "crisis", "collapse", "attack", "war", "threat", "catastrophe", "disaster", "chaos", "doom", "apocalypse", "deadly", "terror", "panic", "emergency" ] def predict_news(text, tokenizer, model): inputs = tokenizer( text, return_tensors="pt", truncation=True, padding=True, max_length=512 ) with torch.no_grad(): outputs = model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=1) pred = torch.argmax(probs).item() confidence = torch.max(probs).item() * 100 label = model.config.id2label[pred] return label, confidence, probs[0].tolist() def manipulation_score(text): text_up = text.upper() cb_hits = [w for w in CLICKBAIT_WORDS if w in text_up] fear_hits = [w for w in FEAR_WORDS if w.lower() in text.lower()] exclamations = text.count("!") caps_ratio = sum(1 for c in text if c.isupper()) / max(len(text), 1) score = ( len(cb_hits) * 20 + len(fear_hits) * 10 + min(exclamations * 5, 20) + min(caps_ratio * 100, 20) ) return min(int(score), 100), cb_hits, fear_hits def predict_image(img, model): img_r = img.resize((224, 224)).convert("RGB") arr = np.array(img_r, dtype=np.float32) / 255.0 arr = np.expand_dims(arr, axis=0) pred = model.predict(arr, verbose=0) conf = float(pred[0][0]) * 100 # 0 = AI, 1 = Real (based on training) if conf < 50: return "AI GENERATED", 100 - conf else: return "REAL IMAGE", conf # ───────────────────────────────────────────── # CHART HELPERS # ───────────────────────────────────────────── def gauge_chart(value, title, color): fig = go.Figure(go.Indicator( mode = "gauge+number", value = value, title = {"text": title, "font": {"family": "Space Mono", "size": 11, "color": "#64748b"}}, number= {"suffix": "%", "font": {"family": "Syne", "size": 28, "color": "#e2e8f0"}}, gauge = { "axis" : {"range": [0, 100], "tickcolor": "#1e2d45", "tickfont": {"color": "#64748b", "size": 9}}, "bar" : {"color": color, "thickness": 0.3}, "bgcolor" : "#0b1120", "bordercolor": "#1e2d45", "steps" : [ {"range": [0, 33], "color": "rgba(16,185,129,0.08)"}, {"range": [33, 66], "color": "rgba(245,158,11,0.08)"}, {"range": [66,100], "color": "rgba(239,68,68,0.08)"}, ], "threshold" : {"line": {"color": color, "width": 2}, "value": value}, } )) fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", margin=dict(t=30, b=10, l=20, r=20), height=200, ) return fig def bar_chart(labels, values, colors): fig = go.Figure(go.Bar( x=values, y=labels, orientation="h", marker_color=colors, marker_line_width=0, text=[f"{v:.1f}%" for v in values], textfont={"family": "Space Mono", "size": 10, "color": "#e2e8f0"}, textposition="outside", )) fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", xaxis=dict(range=[0,120], visible=False), yaxis=dict(tickfont={"family": "Space Mono", "size": 10, "color": "#64748b"}), margin=dict(t=10, b=10, l=10, r=60), height=120, showlegend=False, ) return fig # ───────────────────────────────────────────── # HERO # ───────────────────────────────────────────── st.markdown("""
🔬
DeepTrace AI
Multi-Modal Misinformation Detection System · v2.0

Advanced forensic AI for detecting fake news, AI-generated images, and emotional manipulation in digital media.

STATUS
● ONLINE
""", unsafe_allow_html=True) # ───────────────────────────────────────────── # TABS # ───────────────────────────────────────────── tab1, tab2, tab3 = st.tabs([ "📰 Fake News Detector", "🖼️ AI Image Detector", "⚙️ About & Models", ]) # ══════════════════════════════════════════════ # TAB 1 — FAKE NEWS # ══════════════════════════════════════════════ with tab1: st.markdown('
', unsafe_allow_html=True) st.markdown("""
📰 Fake News & Manipulation Detector
Paste a news article or headline. The AI will analyse authenticity, emotional manipulation, and clickbait signals.
""", unsafe_allow_html=True) col_in, col_out = st.columns([1, 1], gap="large") with col_in: # Init session state if "news_text" not in st.session_state: st.session_state["news_text"] = "" # Sample buttons BEFORE text area so state is set on rerun st.markdown('
🧪 Try Samples
', unsafe_allow_html=True) s1, s2 = st.columns(2) with s1: if st.button("✅ Real News", key="real_sample"): st.session_state["news_text"] = ( "NASA's James Webb Space Telescope has captured the deepest infrared " "image of the universe ever taken, revealing thousands of galaxies that " "existed over 13 billion years ago. The image was released in July 2022 " "and marks a major milestone in space exploration and astrophysics." ) st.rerun() with s2: if st.button("⚠️ Fake News", key="fake_sample"): st.session_state["news_text"] = ( "SHOCKING!! Scientists EXPOSED: drinking hot lemon water CURES cancer in " "30 days! Big Pharma has been HIDING this SECRET for decades to protect " "their profits. SHARE before it gets DELETED!! You WON'T BELIEVE what " "they don't want you to know!!" ) st.rerun() st.markdown('
📝 Input Text
', unsafe_allow_html=True) user_text = st.text_area( "Article / Headline", value=st.session_state["news_text"], height=220, placeholder="Paste your news article or headline here…", label_visibility="collapsed", key="news_textarea", ) st.session_state["news_text"] = user_text st.markdown('
', unsafe_allow_html=True) analyze_clicked = st.button("🔍 ANALYZE TEXT", key="analyze_btn") with col_out: if analyze_clicked and user_text.strip(): with st.spinner("Loading NLP model…"): tokenizer, text_model = load_text_model() with st.spinner("Analysing content…"): time.sleep(0.3) label, conf, probs_list = predict_news(user_text, tokenizer, text_model) manip, cb_hits, fear_hits = manipulation_score(user_text) word_count = len(user_text.split()) sent_count = user_text.count(".") + user_text.count("!") + user_text.count("?") excl_count = user_text.count("!") # ── Verdict ── is_fake = label.upper() == "FAKE" badge_class = "verdict-fake" if is_fake else "verdict-real" verdict_icon = "⚠️ FAKE NEWS" if is_fake else "✅ REAL NEWS" st.markdown(f"""
{verdict_icon}
Confidence: {conf:.1f}%
""", unsafe_allow_html=True) # ── Gauge row ── g1, g2, g3 = st.columns(3) with g1: fake_prob = probs_list[0] * 100 if len(probs_list) > 0 else conf st.plotly_chart(gauge_chart(fake_prob, "FAKE PROB", "#ef4444"), use_column_width=True) with g2: real_prob = probs_list[1] * 100 if len(probs_list) > 1 else (100 - conf) st.plotly_chart(gauge_chart(real_prob, "REAL PROB", "#10b981"), use_column_width=True) with g3: st.plotly_chart(gauge_chart(manip, "MANIPULATION", "#f59e0b"), use_column_width=True) # ── Metrics ── st.markdown(f"""
{word_count} Words
{excl_count} Exclamations
{len(cb_hits)} Clickbait Hits
{len(fear_hits)} Fear Words
""", unsafe_allow_html=True) # ── Tags ── if cb_hits or fear_hits: st.markdown('
🚨 Detected Signals
', unsafe_allow_html=True) tags_html = "" for w in cb_hits: tags_html += f'🔴 {w}' for w in fear_hits[:6]: tags_html += f'🟡 {w}' st.markdown(f'
{tags_html}
', unsafe_allow_html=True) # ── Analysis summary ── manip_level = "HIGH" if manip > 60 else "MEDIUM" if manip > 30 else "LOW" manip_color = "#ef4444" if manip > 60 else "#f59e0b" if manip > 30 else "#10b981" st.markdown(f"""
🧠 Analysis Summary

The model classified this content as {'FAKE' if is_fake else 'REAL'} with {conf:.1f}% confidence.
Manipulation score is {manip_level} ({manip}%) — detected {len(cb_hits)} clickbait keyword(s) and {len(fear_hits)} fear-based word(s). {'
⚠️ High emotional manipulation detected — verify from multiple sources.' if manip > 50 else ''}

""", unsafe_allow_html=True) elif analyze_clicked: st.warning("Please enter some text to analyse.") else: st.markdown("""
📰
AWAITING INPUT
Paste an article and click Analyze
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # ══════════════════════════════════════════════ # TAB 2 — IMAGE DETECTOR # ══════════════════════════════════════════════ with tab2: st.markdown('
', unsafe_allow_html=True) st.markdown("""
🖼️ AI Image Forensic Analyser
Upload any image. The model will determine whether it was taken by a camera or generated by an AI system.
""", unsafe_allow_html=True) col_up, col_res = st.columns([1, 1], gap="large") with col_up: st.markdown('
📂 Upload Image
', unsafe_allow_html=True) uploaded = st.file_uploader( "Upload image", type=["jpg", "jpeg", "png", "webp"], label_visibility="collapsed", ) st.markdown('
', unsafe_allow_html=True) if uploaded: img = Image.open(uploaded) st.image(img, caption="Uploaded Image", use_column_width=True) # Image metadata w, h = img.size st.markdown(f"""
📐 Image Metadata
{w}×{h} Resolution
{img.mode} Color Mode
{uploaded.name.split('.')[-1].upper()} Format
""", unsafe_allow_html=True) scan_clicked = st.button("🔬 SCAN IMAGE", key="scan_btn", disabled=uploaded is None) with col_res: if scan_clicked and uploaded: with st.spinner("Loading vision model…"): img_model = load_image_model() with st.spinner("Running forensic scan…"): time.sleep(0.5) verdict, score = predict_image(img, img_model) is_ai = verdict == "AI GENERATED" badge_class = "verdict-fake" if is_ai else "verdict-real" icon = "🤖 AI GENERATED" if is_ai else "📷 REAL IMAGE" st.markdown(f"""
{icon}
Detection confidence: {score:.1f}%
""", unsafe_allow_html=True) # Gauges ai_score = score if is_ai else 100 - score real_score = 100 - ai_score g1, g2 = st.columns(2) with g1: st.plotly_chart(gauge_chart(ai_score, "AI PROBABILITY", "#ef4444"), use_column_width=True) with g2: st.plotly_chart(gauge_chart(real_score, "REAL PROBABILITY", "#10b981"), use_column_width=True) # Confidence bar fig_bar = bar_chart( ["AI Generated", "Real / Authentic"], [ai_score, real_score], ["#ef4444" if ai_score > real_score else "#475569", "#10b981" if real_score >= ai_score else "#475569"] ) st.markdown('
📊 Score Breakdown
', unsafe_allow_html=True) st.plotly_chart(fig_bar, use_column_width=True) st.markdown('
', unsafe_allow_html=True) # Summary risk = "HIGH" if ai_score > 75 else "MEDIUM" if ai_score > 45 else "LOW" risk_col = "#ef4444" if ai_score > 75 else "#f59e0b" if ai_score > 45 else "#10b981" st.markdown(f"""
🧠 Forensic Summary

Forensic analysis indicates this image is {'likely AI-generated' if is_ai else 'likely authentic'} with {score:.1f}% confidence.
AI generation risk level: {risk}. {'
⚠️ Do not use this image as evidence without further verification.' if is_ai else '
✅ Image appears to be from a real camera source.'}

""", unsafe_allow_html=True) elif not uploaded: st.markdown("""
🖼️
NO IMAGE UPLOADED
Upload an image and click Scan
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # ══════════════════════════════════════════════ # TAB 3 — ABOUT # ══════════════════════════════════════════════ with tab3: st.markdown('
', unsafe_allow_html=True) st.markdown("""
⚙️ System Architecture & Models
Technical details about the AI models and pipeline powering DeepTrace AI.
🧠
NLP Engine
RoBERTa-based transformer fine-tuned on 72,000+ news articles. Classifies content as FAKE or REAL with confidence scoring.

hamzab/roberta-fake-news-classification
👁️
Vision Engine
EfficientNetB3 transfer learning model fine-tuned on AI-generated vs real image datasets. Detects synthetic textures and generation artifacts.

Muniba930/image-detector
🔍
Manipulation Analyser
Rule-based linguistic analyser detecting clickbait keywords, fear-based language, excessive punctuation, and emotional manipulation patterns.

Custom lexical engine
""", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # Tech stack st.markdown("""
🛠️ Tech Stack
Streamlit Hugging Face PyTorch TensorFlow Transformers EfficientNetB3 RoBERTa Plotly Pillow
⚠️ Limitations & Disclaimer

• This tool is for research and educational purposes — not a definitive fact-checker.
• Image model performance is limited by training data diversity; very recent AI generators may not be detected.
• Always cross-reference with trusted news sources before drawing conclusions.
• The manipulation score is heuristic-based and may produce false positives on legitimate breaking news.

""", unsafe_allow_html=True) st.markdown("""
DEEPTRACE AI · MULTI-MODAL MISINFORMATION DETECTION · BUILT WITH ❤️ USING OPEN-SOURCE AI
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True)