News_Classifier / app.py
Mpavan45's picture
Update app.py
0f50349 verified
raw
history blame
7.63 kB
import streamlit as st
import numpy as np
import re
import emoji
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
import keras
from keras.utils import pad_sequences
import pickle
import os
# Streamlit UI
st.set_page_config(page_title="PressGuard", page_icon="🛡️")
# Radium color effect for the title
st.markdown("""
<style>
.radium {
font-size: 60px;
font-weight: bold;
color: #f4ff81; /* Radium-like light greenish-yellow color */
text-shadow: 0 0 5px #f4ff81, 0 0 10px #f4ff81, 0 0 20px #f4ff81, 0 0 30px #ccff66;
text-align: center;
}
.tagline {
font-size: 20px;
color: #ffffff;
text-align: center;
margin-bottom: 30px;
}
</style>
<div class='radium'>🛡️ PressGuard</div>
<div class='tagline'>Classify and Filter Trustworthy News</div>
""", unsafe_allow_html=True)
st.markdown(
"""
<style>
body {
background-image: url('https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/ut9wBSlRR2CCpw95V5ej8.jpeg');
background-size: cover;
background-repeat: no-repeat;
background-attachment: fixed;
}
</style>
""",
unsafe_allow_html=True
)
# Background Image and Enhanced Styling
st.markdown(
"""
<style>
body {
background-image: url('https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/ut9wBSlRR2CCpw95V5ej8.jpeg');
background-size: cover;
background-repeat: no-repeat;
background-attachment: fixed;
}
.centered-container {
text-align: center;
}
.title {
font-size: 60px;
font-weight: bold;
color: white;
background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000);
padding: 20px;
border-radius: 20px;
box-shadow: 0 8px 25px rgba(136, 14, 79, 0.5),
0 4px 15px rgba(74, 35, 90, 0.6),
inset 0 2px 10px rgba(49, 27, 146, 0.4);
display: inline-block;
margin-bottom: 20px;
animation: elegantFadeSlide 1.5s ease-out forwards;
}
.prompt-box {
font-size: 22px;
font-weight: bold;
color: white;
text-align: center;
background: linear-gradient(135deg, #33ccff, #ff99cc, #33ff99, #ffcc00);
background-size: 400% 400%;
animation: gradientAnimation 8s ease infinite;
padding: 15px;
border-radius: 15px;
box-shadow: 0 0 15px rgba(255, 255, 255, 0.7),
0 0 25px rgba(136, 14, 79, 0.7),
0 0 35px rgba(49, 27, 146, 0.7);
transition: all 0.4s ease-in-out;
}
.prompt-box:hover {
transform: scale(1.05) rotate(1deg);
box-shadow: 0 0 25px rgba(255, 255, 255, 0.9),
0 0 35px rgba(136, 14, 79, 0.9),
0 0 45px rgba(49, 27, 146, 0.9);
}
@keyframes gradientAnimation {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
.analyze-button {
width: 180px;
height: 60px;
border-radius: 50px;
background: linear-gradient(45deg, #880E4F, #4A235A, #311B92, #000000);
font-size: 20px;
font-weight: bold;
color: white;
border: none;
box-shadow: 0 8px 25px rgba(136, 14, 79, 0.5),
0 4px 15px rgba(74, 35, 90, 0.6),
0 2px 10px rgba(49, 27, 146, 0.7),
inset 0 1px 5px rgba(0, 0, 0, 0.4);
cursor: pointer;
transition: all 0.4s ease-in-out;
}
.analyze-button:hover {
transform: scale(1.1);
background: linear-gradient(225deg, #880E4F, #4A235A, #311B92, #000000);
box-shadow: 0 12px 35px rgba(49, 27, 146, 0.8),
0 8px 25px rgba(74, 35, 90, 0.7),
0 4px 15px rgba(136, 14, 79, 0.6);
}
.result-box {
text-align: center;
font-size: 28px;
font-weight: bold;
background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000);
color: white;
padding: 30px;
border-radius: 20px;
display: inline-block;
margin-top: 30px;
box-shadow: 0 6px 20px rgba(74, 35, 90, 0.5),
0 3px 15px rgba(136, 14, 79, 0.4),
inset 0 2px 10px rgba(49, 27, 146, 0.3);
}
</style>
""",
unsafe_allow_html=True
)
# Title and Prompt
st.markdown("<div class='prompt-box'>Paste the article content below to analyze its category with PressGuard🛡️</div>", unsafe_allow_html=True)
# Check if NLTK resources are already downloaded
nltk_data_path = os.path.expanduser('~/nltk_data')
if not os.path.exists(nltk_data_path):
os.makedirs(nltk_data_path)
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt', download_dir=nltk_data_path)
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords', download_dir=nltk_data_path)
try:
nltk.data.find('corpora/wordnet')
except LookupError:
nltk.download('wordnet', download_dir=nltk_data_path)
# Initialize stopwords and lemmatizer
stop_words = set(stopwords.words('english')).union({"pm"})
lemmatizer = WordNetLemmatizer()
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
# Preprocessing Function
def pre_process(x):
x = x.lower()
x = re.sub("<.*?>", "", x)
x = re.sub("http[s]?://.+?\\S+", "", x)
x = re.sub("[@#].+?\\S", "", x)
x = re.sub(r"\\_+", " ", x)
x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
x = emoji.demojize(x)
x = re.sub(":.*?:", "", x)
x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
words = word_tokenize(x)
words = [word for word in words if word not in stop_words]
x = " ".join([lemmatizer.lemmatize(word) for word in words])
return x
# Load Model
@st.cache_resource
def load_model():
model = keras.models.load_model("model_m3_new.keras")
vectorizer = keras.models.load_model("vec_text_m3_new.keras")
with open("label_encoder_m5.pkl", 'rb') as file:
label_encoder = pickle.load(file)
return model, vectorizer, label_encoder
model, vectorizer, label_encoder = load_model()
# Prediction Function
def predict_category(text):
processed_text = [pre_process(text)]
text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128)
prediction = model.predict(text_vectorized)
category_idx = np.argmax(prediction, axis=1)[0]
return label_encoder.inverse_transform([category_idx])[0]
# User Input
input_text = st.text_area("Enter News Article:", height=200)
if st.button("Analyze", key="analyze-btn", help="Click to classify the news article"):
if input_text:
category = predict_category(input_text)
st.markdown(f"<div class='result-box'>Predicted Category: {category}</div>", unsafe_allow_html=True)
else:
st.warning("Please enter some text to analyze.")