Spaces:

junaid17
/

Fake_News_Detector

Sleeping

App Files Files Community

junaid17 commited on Sep 17, 2025

Commit

08b7be7

verified ·

1 Parent(s): 0099b70

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +1 -0
Fake_news_detector.ipynb +0 -0
News_classifier.pt +3 -0
README.md +35 -0
app.py +62 -0
requirements.txt +10 -0
tokenizer_distilbert/special_tokens_map.json +7 -0
tokenizer_distilbert/tokenizer_config.json +58 -0
tokenizer_distilbert/vocab.txt +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ News_classifier.pt filter=lfs diff=lfs merge=lfs -text

Fake_news_detector.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

News_classifier.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2881ab6ee32b411cf5cbefe1d9c9f3fa98b71e4027f7102bfa45ccfcac8cd9ae
+size 266459281

README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+# Fake News Detector
+This project is a Fake News Detection system using DistilBERT and PyTorch, with a Streamlit web app for user interaction.
+## Features
+- Data preprocessing and visualization (Jupyter Notebook)
+- Model training using DistilBERT embeddings
+- Streamlit app for real-time news classification
+## Files
+- `Fakke_news_detector.ipynb`: Data analysis, preprocessing, model training
+- `app.py`: Streamlit web app for fake news detection
+- `News_classifier.pt`: Trained PyTorch model weights
+- `tokenizer_distilbert/`: Saved tokenizer files
+## Usage
+1. **Install dependencies**
+   ```powershell
+   pip install -r requirements.txt
+   ```
+2. **Run the Streamlit app**
+   ```powershell
+   streamlit run app.py
+   ```
+3. **Interact**
+   - Paste news text in the app to check if it is Fake or True.
+## Training
+- See the notebook for data loading, preprocessing, model training, and evaluation steps.
+## Model & Tokenizer
+- The model and tokenizer are saved after training and loaded in the app for inference.
+## Requirements
+See `requirements.txt` for all required Python packages.

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import streamlit as st
+import torch
+from transformers import DistilBertTokenizer, DistilBertModel
+import torch.nn as nn
+class NewsClassifier(nn.Module):
+    def __init__(self):
+        super(NewsClassifier, self).__init__()
+        self.bert = DistilBertModel.from_pretrained("distilbert-base-uncased")
+        for param in self.bert.parameters():
+            param.requires_grad = False
+        self.classifier = nn.Sequential(
+            nn.Linear(self.bert.config.hidden_size, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, 128),
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(128, 64),
+            nn.BatchNorm1d(64),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(64, 2)
+        )
+    def forward(self, input_ids, attention_mask):
+        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        sentence_embeddings = bert_output.last_hidden_state[:, 0, :]
+        return self.classifier(sentence_embeddings)
+@st.cache_resource
+def load_model_and_tokenizer():
+    tokenizer = DistilBertTokenizer.from_pretrained("tokenizer_distilbert")
+    model = NewsClassifier()
+    model.load_state_dict(torch.load("News_classifier.pt", map_location="cpu"))
+    model.eval()
+    return tokenizer, model
+tokenizer, model = load_model_and_tokenizer()
+class_names = ["True", "Fake"]
+st.title("Fake News Detection App")
+st.write("Paste a news article/text below to check if it is **Fake** or **True**.")
+news_text = st.text_area("Enter News Text", height=200)
+if st.button("Predict"):
+    if news_text.strip():
+        encoding = tokenizer(news_text, padding="max_length", max_length=200, truncation=True, return_tensors="pt")
+        input_ids = encoding["input_ids"]
+        attention_mask = encoding["attention_mask"]
+        with torch.no_grad():
+            outputs = model(input_ids, attention_mask)
+            prediction = torch.argmax(outputs, dim=1).item()
+        result = class_names[prediction]
+        st.success(f"This news is **{result}**.")
+    else:
+        st.warning("Please enter some news text!")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+streamlit
+transformers
+torch
+pandas
+seaborn
+matplotlib
+wordcloud
+scikit-learn
+tqdm
+nltk

tokenizer_distilbert/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer_distilbert/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

tokenizer_distilbert/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff