Spaces:
Sleeping
Sleeping
File size: 2,773 Bytes
6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 4f78b24 6035a10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | import streamlit as st
import torch
import re
import torch.nn as nn
import torch.nn.functional as F
import joblib
# Model parameters
vocab_size = 37852
embedding_dim = 45
hidden_units = 25
num_classes = 2
max_len = 55
# Define the LSTM model
class LSTMModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_units, num_classes):
super(LSTMModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_units, batch_first=True, dropout=0.2, bidirectional=True)
self.fc = nn.Linear(hidden_units * 2, num_classes)
def forward(self, x):
x = self.embedding(x)
output, _ = self.lstm(x)
x = output[:, -1, :]
x = self.fc(x)
return F.softmax(x, dim=1)
# Load model and tokenizer
model = LSTMModel(vocab_size, embedding_dim, hidden_units, num_classes)
model.load_state_dict(torch.load("news_classfication.pth", map_location=torch.device("cpu")))
model.eval()
tokenizer = joblib.load("tokenizer.pkl")
# Preprocessing function
def preprocess(text):
text = text.lower()
text = re.sub('https?://\S+|www\.\S+', '', text)
text = re.sub('\\W', ' ', text)
text = re.sub('\n', '', text)
text = re.sub(' +', ' ', text)
text = re.sub('^ ', '', text)
text = re.sub(' $', '', text)
# Tokenization
tokenized = tokenizer.encode(text).ids
# Padding or truncating to fixed length
max_length = 20
if len(tokenized) > max_length:
tokenized = tokenized[:max_length]
else:
tokenized += [0] * (max_length - len(tokenized))
# Convert to tensor
text_tensor = torch.tensor(tokenized, dtype=torch.long).unsqueeze(0)
return text_tensor
# Streamlit UI
st.set_page_config(page_title="Fake News Detector", page_icon="📰")
st.title("📰 Fake News Detector")
# Display an image
url = "https://tse1.mm.bing.net/th?id=OIP.P_-960Qckr5FUEU3KvjCMwHaEc&pid=Api&rs=1&c=1&qlt=95&w=208&h=124"
st.image(url, width=400)
# Styling the background
st.markdown("""
<style>
.stApp {
background-color: #add8e6;
}
</style>
""", unsafe_allow_html=True)
# Text input
user_input = st.text_area(
"Enter News Text:",
value="Sheriff David Clarke Becomes An Internet Joke For Threatening To Poke People 'In The Eye'",
height=100
)
# Predict button
if st.button("Submit"):
if user_input.strip() == "":
st.warning("Please enter some text to classify.")
else:
input_tensor = preprocess(user_input)
output = model(input_tensor)
prediction = output.argmax().item()
if prediction == 0:
st.error("🚨 This is *Fake News*.")
else:
st.success("✅ This is *Not Fake News*.")
|