Spaces:

YAMITEK
/

News_classification

Sleeping

App Files Files Community

YAMITEK commited on Apr 25, 2025

Commit

18e75ed

verified ·

1 Parent(s): c4fd2ad

Upload 14 files

Browse files

Files changed (15) hide show

.gitattributes +3 -0
Fake.csv +3 -0
True.csv +3 -0
app.py +78 -0
app_onnx.py +87 -0
lstm_news_classifier.onnx +3 -0
news_classfication.pth +3 -0
news_classification_documentation.docx +3 -0
news_classification_full_model.pth +3 -0
news_classification_notebook.ipynb +0 -0
news_classification_notebook_with_onnx.ipynb +0 -0
news_classification_traced.pt +3 -0
requirements.txt +10 -0
tokenizer.pkl +3 -0
tokenizer_new.pkl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Fake.csv filter=lfs diff=lfs merge=lfs -text
+news_classification_documentation.docx filter=lfs diff=lfs merge=lfs -text
+True.csv filter=lfs diff=lfs merge=lfs -text

Fake.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bebf8bcfe95678bf2c732bf413a2ce5f621af0102c82bf08083b2e5d3c693d0c
+size 62789876

True.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba0844414a65dc6ae7402b8eee5306da24b6b56488d6767135af466c7dcb2775
+size 53582940

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import streamlit as st
+import torch
+import re
+import torch.nn as nn
+import joblib
+import torch.nn.functional as F
+st.title("News Classification")
+## mopdel
+vocab_size = 37852
+embedding_dim = 45
+hidden_units = 25
+num_classes = 2
+max_len = 55
+class LSTMModel(nn.Module):
+    def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
+        super(LSTMModel, self).__init__()
+        self.embedding = nn.Embedding(vocab_size, embedding_dim)
+        self.lstm = nn.LSTM(embedding_dim, hidden_units, batch_first=True, dropout=0.2,bidirectional=True)
+        self.fc = nn.Linear(hidden_units* 2, num_classes)
+    def forward(self, x):
+        x = self.embedding(x)
+        output, _ = self.lstm(x)
+        x = output[:, -1, :]
+        x = self.fc(x)
+        return F.softmax(x, dim=1)
+model = LSTMModel(vocab_size, embedding_dim, hidden_units, num_classes)
+## load the weights
+model.load_state_dict(torch.load( "news_classfication.pth", map_location=torch.device("cpu")))
+model.eval()
+tokenizer=joblib.load("tokenizer.pkl")
+def preprocess(words):
+    normalized = []
+    for i in words:
+        i = i.lower()
+        # get rid of urlss
+        i = re.sub('https?://\S+|www\.\S+', '', i)
+        # get rid of non words and extra spaces
+        i = re.sub('\\W', ' ', i)
+        i = re.sub('\n', '', i)
+        i = re.sub(' +', ' ', i)
+        i = re.sub('^ ', '', i)
+        i = re.sub(' $', '', i)
+        normalized.append(i)
+        text=[tokenizer.encode(text.lower()).ids for text in normalized]
+        max_length = 20
+        flattened_text = [token for sublist in text for token in sublist]
+        if len(flattened_text) > max_length:
+            flattened_text = flattened_text[:max_length]
+        else:
+            flattened_text += [0] * (max_length - len(flattened_text))
+        text_tensor = torch.tensor(flattened_text, dtype=torch.long)
+        text_tensor = text_tensor.unsqueeze(0)
+    return text_tensor
+text=st.text_input("Enter the news  Tittle ",value="Sheriff David Clarke Becomes An Internet Joke For Threatening To Poke People 'In The Eye'")
+if st.button("submit"):
+    words=text.split()
+    v=preprocess(words)
+    output=model(v)
+    if output.argmax()==0:
+        st.write("Its a Fake news")
+    else:
+        st.write("Its not a Fake news")

app_onnx.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import streamlit as st
+import onnxruntime as ort
+import torch
+import numpy as np
+import pickle
+@st.cache_resource
+def load_model():
+    return ort.InferenceSession(r"C:\Users\ADMIN\Desktop\lstm_news_classifier (1).onnx")
+@st.cache_data
+def load_tokenizer():
+    with open(r"C:\Users\ADMIN\Desktop\tokenizer.pkl", "rb") as f:
+        tokenizer = pickle.load(f)
+    return tokenizer
+@st.cache_data
+def load_vocab():
+    try:
+        with open("vocab.pkl", "rb") as f:
+            vocab = pickle.load(f)
+        return vocab
+    except FileNotFoundError:
+        return None
+# 🔁 Extract max_length from ONNX input shape
+def get_input_length(session):
+    input_shape = session.get_inputs()[0].shape
+    return input_shape[1] if isinstance(input_shape[1], int) else 55  # fallback
+def predict(text, session, tokenizer, vocab=None):
+    max_length = get_input_length(session)
+    if vocab:
+        tokens = tokenizer(text)
+        indices = [vocab.get(token, vocab.get('<unk>', 0)) for token in tokens]
+    else:
+        encoding = tokenizer.encode(text)
+        indices = encoding.ids if hasattr(encoding, "ids") else encoding["input_ids"]
+    padded = indices[:max_length] + [0] * (max_length - len(indices))
+    input_array = np.array([padded], dtype=np.int64)
+    inputs = {session.get_inputs()[0].name: input_array}
+    output = session.run(None, inputs)[0]
+    probs = torch.softmax(torch.tensor(output), dim=1)
+    pred = torch.argmax(probs, dim=1).item()
+    confidence = probs[0][pred].item()
+    return pred, confidence
+# 🖼 Streamlit UI
+st.set_page_config(page_title="Fake News Detector", page_icon="📰")
+st.title("📰 Fake News Detector")
+url = "https://tse1.mm.bing.net/th?id=OIP.P_-960Qckr5FUEU3KvjCMwHaEc&pid=Api&rs=1&c=1&qlt=95&w=208&h=124"
+st.image(url, width=400)
+st.markdown(f"""
+    <style>
+        /* Set the background image for the entire app */
+        .stApp {{
+            background-color:#add8e6;
+            background-size: 100px;
+            background-repeat:no;
+            background-attachment: auto;
+            background-position:full;
+        }}
+    </style>
+    """, unsafe_allow_html=True)
+user_input = st.text_area("Enter News Text:", height=100)
+if st.button("Detect"):
+    if user_input.strip() == "":
+        st.warning("Please enter some text.")
+    else:
+        with st.spinner("Analyzing..."):
+            session = load_model()
+            tokenizer = load_tokenizer()
+            vocab = load_vocab()
+            label, confidence = predict(user_input, session, tokenizer, vocab)
+            label_name = "Fake" if label == 1 else "Real"
+            color = "🔴" if label == 1 else "🟢"
+            st.markdown(f"### Prediction: {color} **{label_name} News**")
+            st.markdown(f"**Confidence:** {confidence:.2%}")

lstm_news_classifier.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17d2730135209f99ef9c31fa9455509173bd961434cb1ec9e38cb9d269172fac
+size 6873675

news_classfication.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05a6a9c12929a5d98deb0e7af30d2c3f39fc2c82a65c32771404cb28e9028daa
+size 6874152

news_classification_documentation.docx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:867e520e6a166b99dcfa59a68fa792dc003140f55f6ae64cd569b18b26e3208f
+size 362107

news_classification_full_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c30b6353d636173e3858d9e2d1f546679e442c1096b0f3ba9a17edc7a5cbbd1
+size 6876168

news_classification_notebook.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

news_classification_notebook_with_onnx.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

news_classification_traced.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb4a10b9112fcf3618973679ce969f246214885a5b9fc088470d6857f265e5fa
+size 6882946

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+streamlit==1.30.0
+pandas==2.1.4
+torch==2.2.0
+torchvision==0.17.0
+numpy==1.26.3
+scikit-learn==1.3.2
+tokenizers==0.15.1
+joblib==1.2.0
+onnx==1.17.0
+onnxruntime==1.20.1

tokenizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd836e1eeb44d11836cbe52a4d10d4b2d5579c6265611dfd16941dfdfb1f9ed2
+size 645410

tokenizer_new.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd836e1eeb44d11836cbe52a4d10d4b2d5579c6265611dfd16941dfdfb1f9ed2
+size 645410