YAMITEK commited on
Commit
18e75ed
·
verified ·
1 Parent(s): c4fd2ad

Upload 14 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Fake.csv filter=lfs diff=lfs merge=lfs -text
37
+ news_classification_documentation.docx filter=lfs diff=lfs merge=lfs -text
38
+ True.csv filter=lfs diff=lfs merge=lfs -text
Fake.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bebf8bcfe95678bf2c732bf413a2ce5f621af0102c82bf08083b2e5d3c693d0c
3
+ size 62789876
True.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0844414a65dc6ae7402b8eee5306da24b6b56488d6767135af466c7dcb2775
3
+ size 53582940
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import re
4
+ import torch.nn as nn
5
+ import joblib
6
+ import torch.nn.functional as F
7
+
8
+ st.title("News Classification")
9
+
10
+ ## mopdel
11
+
12
+ vocab_size = 37852
13
+ embedding_dim = 45
14
+ hidden_units = 25
15
+ num_classes = 2
16
+ max_len = 55
17
+
18
+ class LSTMModel(nn.Module):
19
+ def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
20
+ super(LSTMModel, self).__init__()
21
+ self.embedding = nn.Embedding(vocab_size, embedding_dim)
22
+ self.lstm = nn.LSTM(embedding_dim, hidden_units, batch_first=True, dropout=0.2,bidirectional=True)
23
+ self.fc = nn.Linear(hidden_units* 2, num_classes)
24
+
25
+ def forward(self, x):
26
+ x = self.embedding(x)
27
+ output, _ = self.lstm(x)
28
+ x = output[:, -1, :]
29
+ x = self.fc(x)
30
+ return F.softmax(x, dim=1)
31
+
32
+ model = LSTMModel(vocab_size, embedding_dim, hidden_units, num_classes)
33
+ ## load the weights
34
+ model.load_state_dict(torch.load( "news_classfication.pth", map_location=torch.device("cpu")))
35
+ model.eval()
36
+
37
+ tokenizer=joblib.load("tokenizer.pkl")
38
+
39
+ def preprocess(words):
40
+ normalized = []
41
+ for i in words:
42
+ i = i.lower()
43
+ # get rid of urlss
44
+ i = re.sub('https?://\S+|www\.\S+', '', i)
45
+ # get rid of non words and extra spaces
46
+ i = re.sub('\\W', ' ', i)
47
+ i = re.sub('\n', '', i)
48
+ i = re.sub(' +', ' ', i)
49
+ i = re.sub('^ ', '', i)
50
+ i = re.sub(' $', '', i)
51
+
52
+ normalized.append(i)
53
+ text=[tokenizer.encode(text.lower()).ids for text in normalized]
54
+ max_length = 20
55
+ flattened_text = [token for sublist in text for token in sublist]
56
+ if len(flattened_text) > max_length:
57
+ flattened_text = flattened_text[:max_length]
58
+ else:
59
+ flattened_text += [0] * (max_length - len(flattened_text))
60
+ text_tensor = torch.tensor(flattened_text, dtype=torch.long)
61
+ text_tensor = text_tensor.unsqueeze(0)
62
+ return text_tensor
63
+
64
+
65
+
66
+
67
+ text=st.text_input("Enter the news Tittle ",value="Sheriff David Clarke Becomes An Internet Joke For Threatening To Poke People 'In The Eye'")
68
+
69
+ if st.button("submit"):
70
+ words=text.split()
71
+ v=preprocess(words)
72
+ output=model(v)
73
+ if output.argmax()==0:
74
+ st.write("Its a Fake news")
75
+ else:
76
+ st.write("Its not a Fake news")
77
+
78
+
app_onnx.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import onnxruntime as ort
3
+ import torch
4
+ import numpy as np
5
+ import pickle
6
+
7
+ @st.cache_resource
8
+ def load_model():
9
+ return ort.InferenceSession(r"C:\Users\ADMIN\Desktop\lstm_news_classifier (1).onnx")
10
+
11
+ @st.cache_data
12
+ def load_tokenizer():
13
+ with open(r"C:\Users\ADMIN\Desktop\tokenizer.pkl", "rb") as f:
14
+ tokenizer = pickle.load(f)
15
+ return tokenizer
16
+
17
+ @st.cache_data
18
+ def load_vocab():
19
+ try:
20
+ with open("vocab.pkl", "rb") as f:
21
+ vocab = pickle.load(f)
22
+ return vocab
23
+ except FileNotFoundError:
24
+ return None
25
+
26
+ # 🔁 Extract max_length from ONNX input shape
27
+ def get_input_length(session):
28
+ input_shape = session.get_inputs()[0].shape
29
+ return input_shape[1] if isinstance(input_shape[1], int) else 55 # fallback
30
+
31
+ def predict(text, session, tokenizer, vocab=None):
32
+ max_length = get_input_length(session)
33
+
34
+ if vocab:
35
+ tokens = tokenizer(text)
36
+ indices = [vocab.get(token, vocab.get('<unk>', 0)) for token in tokens]
37
+ else:
38
+ encoding = tokenizer.encode(text)
39
+ indices = encoding.ids if hasattr(encoding, "ids") else encoding["input_ids"]
40
+
41
+ padded = indices[:max_length] + [0] * (max_length - len(indices))
42
+ input_array = np.array([padded], dtype=np.int64)
43
+
44
+ inputs = {session.get_inputs()[0].name: input_array}
45
+ output = session.run(None, inputs)[0]
46
+ probs = torch.softmax(torch.tensor(output), dim=1)
47
+ pred = torch.argmax(probs, dim=1).item()
48
+ confidence = probs[0][pred].item()
49
+
50
+ return pred, confidence
51
+
52
+ # 🖼 Streamlit UI
53
+ st.set_page_config(page_title="Fake News Detector", page_icon="📰")
54
+ st.title("📰 Fake News Detector")
55
+ url = "https://tse1.mm.bing.net/th?id=OIP.P_-960Qckr5FUEU3KvjCMwHaEc&pid=Api&rs=1&c=1&qlt=95&w=208&h=124"
56
+ st.image(url, width=400)
57
+
58
+ st.markdown(f"""
59
+ <style>
60
+ /* Set the background image for the entire app */
61
+ .stApp {{
62
+ background-color:#add8e6;
63
+ background-size: 100px;
64
+ background-repeat:no;
65
+ background-attachment: auto;
66
+ background-position:full;
67
+ }}
68
+ </style>
69
+ """, unsafe_allow_html=True)
70
+
71
+ user_input = st.text_area("Enter News Text:", height=100)
72
+
73
+ if st.button("Detect"):
74
+ if user_input.strip() == "":
75
+ st.warning("Please enter some text.")
76
+ else:
77
+ with st.spinner("Analyzing..."):
78
+ session = load_model()
79
+ tokenizer = load_tokenizer()
80
+ vocab = load_vocab()
81
+
82
+ label, confidence = predict(user_input, session, tokenizer, vocab)
83
+ label_name = "Fake" if label == 1 else "Real"
84
+ color = "🔴" if label == 1 else "🟢"
85
+
86
+ st.markdown(f"### Prediction: {color} **{label_name} News**")
87
+ st.markdown(f"**Confidence:** {confidence:.2%}")
lstm_news_classifier.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d2730135209f99ef9c31fa9455509173bd961434cb1ec9e38cb9d269172fac
3
+ size 6873675
news_classfication.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05a6a9c12929a5d98deb0e7af30d2c3f39fc2c82a65c32771404cb28e9028daa
3
+ size 6874152
news_classification_documentation.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867e520e6a166b99dcfa59a68fa792dc003140f55f6ae64cd569b18b26e3208f
3
+ size 362107
news_classification_full_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c30b6353d636173e3858d9e2d1f546679e442c1096b0f3ba9a17edc7a5cbbd1
3
+ size 6876168
news_classification_notebook.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
news_classification_notebook_with_onnx.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
news_classification_traced.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb4a10b9112fcf3618973679ce969f246214885a5b9fc088470d6857f265e5fa
3
+ size 6882946
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.30.0
2
+ pandas==2.1.4
3
+ torch==2.2.0
4
+ torchvision==0.17.0
5
+ numpy==1.26.3
6
+ scikit-learn==1.3.2
7
+ tokenizers==0.15.1
8
+ joblib==1.2.0
9
+ onnx==1.17.0
10
+ onnxruntime==1.20.1
tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd836e1eeb44d11836cbe52a4d10d4b2d5579c6265611dfd16941dfdfb1f9ed2
3
+ size 645410
tokenizer_new.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd836e1eeb44d11836cbe52a4d10d4b2d5579c6265611dfd16941dfdfb1f9ed2
3
+ size 645410