import streamlit as st
import numpy as np
import re
import emoji
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
import keras
from keras.utils import pad_sequences
import pickle
import os
# Streamlit UI
st.set_page_config(page_title="PressGuard", page_icon="🛡️")
# Radium color effect for the title
st.markdown("""
🛡️ PressGuard
""", unsafe_allow_html=True)
# st.markdown(
# """
#
# """,
# unsafe_allow_html=True
# )
# Apply custom CSS for the background image and overlay
background_image_url="https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/yiXBcm5bq8gcMoaMRSYEv.webp"
st.markdown(
f"""
""",
unsafe_allow_html=True
)
# Background Image and Enhanced Styling
st.markdown(
"""
""",
unsafe_allow_html=True
)
# Title and Prompt
st.markdown("Paste the article content below to analyze its category with PressGuard🛡️
", unsafe_allow_html=True)
# Download necessary resources
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
# Initialize stopwords and lemmatizer
stop_words = set(stopwords.words('english')).union({"pm"})
lemmatizer = WordNetLemmatizer()
# ✅ Preprocessing Function
def pre_process(x):
x = x.lower()
x = re.sub("<.*?>", "", x)
x = re.sub("http[s]?://.+?\\S+", "", x)
x = re.sub("[@#].+?\\S", "", x)
x = re.sub(r"\\_+", " ", x)
x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
x = emoji.demojize(x)
x = re.sub(":.*?:", "", x)
x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
words = word_tokenize(x)
words = [word for word in words if word not in stop_words]
x = " ".join([lemmatizer.lemmatize(word) for word in words])
return x
# ✅ Load Model and Vectorizer
@st.cache_resource
def load_model():
# Load the model
model = tf.keras.models.load_model("model_m3_new.keras")
vectorizer = keras.models.load_model("vec_text_m3_new.keras")
# Load label encoder
with open("label_encoder_m5.pkl", 'rb') as file:
label_encoder = pickle.load(file)
return model, vectorizer, label_encoder
# Load models
model, vectorizer, label_encoder = load_model()
# ✅ Prediction Function
def predict_category(text):
processed_text = [pre_process(text)]
text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128)
prediction = model.predict(text_vectorized)
category_idx = np.argmax(prediction, axis=1)[0]
return label_encoder.inverse_transform([category_idx])[0]
# ✅ Streamlit UI
st.markdown("""
AI-Powered News Categorization
""", unsafe_allow_html=True)
input_text = st.text_area("Enter News Article:", height=200)
if st.button("Analyze", key="analyze-btn", help="Click to classify the news article"):
if input_text:
category = predict_category(input_text)
st.markdown(f"Predicted Category: {category}
", unsafe_allow_html=True)
else:
st.warning("Please enter some text to analyze.")