YAMITEK commited on
Commit
4f78b24
·
verified ·
1 Parent(s): 7b8ada3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -78
app.py CHANGED
@@ -1,78 +1,97 @@
1
- import streamlit as st
2
- import torch
3
- import re
4
- import torch.nn as nn
5
- import joblib
6
- import torch.nn.functional as F
7
-
8
- st.title("News Classification")
9
-
10
- ## mopdel
11
-
12
- vocab_size = 37852
13
- embedding_dim = 45
14
- hidden_units = 25
15
- num_classes = 2
16
- max_len = 55
17
-
18
- class LSTMModel(nn.Module):
19
- def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
20
- super(LSTMModel, self).__init__()
21
- self.embedding = nn.Embedding(vocab_size, embedding_dim)
22
- self.lstm = nn.LSTM(embedding_dim, hidden_units, batch_first=True, dropout=0.2,bidirectional=True)
23
- self.fc = nn.Linear(hidden_units* 2, num_classes)
24
-
25
- def forward(self, x):
26
- x = self.embedding(x)
27
- output, _ = self.lstm(x)
28
- x = output[:, -1, :]
29
- x = self.fc(x)
30
- return F.softmax(x, dim=1)
31
-
32
- model = LSTMModel(vocab_size, embedding_dim, hidden_units, num_classes)
33
- ## load the weights
34
- model.load_state_dict(torch.load( "news_classfication.pth", map_location=torch.device("cpu")))
35
- model.eval()
36
-
37
- tokenizer=joblib.load("tokenizer.pkl")
38
-
39
- def preprocess(words):
40
- normalized = []
41
- for i in words:
42
- i = i.lower()
43
- # get rid of urlss
44
- i = re.sub('https?://\S+|www\.\S+', '', i)
45
- # get rid of non words and extra spaces
46
- i = re.sub('\\W', ' ', i)
47
- i = re.sub('\n', '', i)
48
- i = re.sub(' +', ' ', i)
49
- i = re.sub('^ ', '', i)
50
- i = re.sub(' $', '', i)
51
-
52
- normalized.append(i)
53
- text=[tokenizer.encode(text.lower()).ids for text in normalized]
54
- max_length = 20
55
- flattened_text = [token for sublist in text for token in sublist]
56
- if len(flattened_text) > max_length:
57
- flattened_text = flattened_text[:max_length]
58
- else:
59
- flattened_text += [0] * (max_length - len(flattened_text))
60
- text_tensor = torch.tensor(flattened_text, dtype=torch.long)
61
- text_tensor = text_tensor.unsqueeze(0)
62
- return text_tensor
63
-
64
-
65
-
66
-
67
- text=st.text_input("Enter the news Tittle ",value="Sheriff David Clarke Becomes An Internet Joke For Threatening To Poke People 'In The Eye'")
68
-
69
- if st.button("submit"):
70
- words=text.split()
71
- v=preprocess(words)
72
- output=model(v)
73
- if output.argmax()==0:
74
- st.write("Its a Fake news")
75
- else:
76
- st.write("Its not a Fake news")
77
-
78
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import re
4
+ import torch.nn as nn
5
+ import joblib
6
+ import torch.nn.functional as F
7
+
8
+
9
+
10
+ ## mopdel
11
+
12
+ vocab_size = 37852
13
+ embedding_dim = 45
14
+ hidden_units = 25
15
+ num_classes = 2
16
+ max_len = 55
17
+
18
+ class LSTMModel(nn.Module):
19
+ def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
20
+ super(LSTMModel, self).__init__()
21
+ self.embedding = nn.Embedding(vocab_size, embedding_dim)
22
+ self.lstm = nn.LSTM(embedding_dim, hidden_units, batch_first=True, dropout=0.2,bidirectional=True)
23
+ self.fc = nn.Linear(hidden_units* 2, num_classes)
24
+
25
+ def forward(self, x):
26
+ x = self.embedding(x)
27
+ output, _ = self.lstm(x)
28
+ x = output[:, -1, :]
29
+ x = self.fc(x)
30
+ return F.softmax(x, dim=1)
31
+
32
+ model = LSTMModel(vocab_size, embedding_dim, hidden_units, num_classes)
33
+ ## load the weights
34
+ model.load_state_dict(torch.load( "news_classfication.pth", map_location=torch.device("cpu")))
35
+ model.eval()
36
+
37
+ tokenizer=joblib.load("tokenizer.pkl")
38
+
39
+ def preprocess(words):
40
+ normalized = []
41
+ for i in words:
42
+ i = i.lower()
43
+ # get rid of urlss
44
+ i = re.sub('https?://\S+|www\.\S+', '', i)
45
+ # get rid of non words and extra spaces
46
+ i = re.sub('\\W', ' ', i)
47
+ i = re.sub('\n', '', i)
48
+ i = re.sub(' +', ' ', i)
49
+ i = re.sub('^ ', '', i)
50
+ i = re.sub(' $', '', i)
51
+
52
+ normalized.append(i)
53
+ text=[tokenizer.encode(text.lower()).ids for text in normalized]
54
+ max_length = 20
55
+ flattened_text = [token for sublist in text for token in sublist]
56
+ if len(flattened_text) > max_length:
57
+ flattened_text = flattened_text[:max_length]
58
+ else:
59
+ flattened_text += [0] * (max_length - len(flattened_text))
60
+ text_tensor = torch.tensor(flattened_text, dtype=torch.long)
61
+ text_tensor = text_tensor.unsqueeze(0)
62
+ return text_tensor
63
+
64
+ # 🖼 Streamlit UI
65
+ st.set_page_config(page_title="Fake News Detector", page_icon="📰")
66
+ st.title("📰 Fake News Detector")
67
+ url = "https://tse1.mm.bing.net/th?id=OIP.P_-960Qckr5FUEU3KvjCMwHaEc&pid=Api&rs=1&c=1&qlt=95&w=208&h=124"
68
+ st.image(url, width=400)
69
+
70
+ st.markdown(f"""
71
+ <style>
72
+ /* Set the background image for the entire app */
73
+ .stApp {{
74
+ background-color:#add8e6;
75
+ background-size: 100px;
76
+ background-repeat:no;
77
+ background-attachment: auto;
78
+ background-position:full;
79
+ }}
80
+ </style>
81
+ """, unsafe_allow_html=True)
82
+
83
+ user_input = st.text_area("Enter News Text:", value="Sheriff David Clarke Becomes An Internet Joke For Threatening To Poke People 'In The Eye'",height=100)
84
+
85
+
86
+ if st.button("submit"):
87
+ words=text.split()
88
+ v=preprocess(words)
89
+ output=model(v)
90
+ if output.argmax() == 0:
91
+ st.write("🚨 This is *Fake News*.")
92
+ else:
93
+ st.write("✅ This is *Not Fake News*.")
94
+
95
+
96
+
97
+