import streamlit as st import numpy as np import re import emoji import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer import tensorflow as tf import keras from keras.utils import pad_sequences import pickle import os # Streamlit UI st.set_page_config(page_title="PressGuard", page_icon="🛡️") # Radium color effect for the title st.markdown("""
🛡️ PressGuard
""", unsafe_allow_html=True) # st.markdown( # """ # # """, # unsafe_allow_html=True # ) # Apply custom CSS for the background image and overlay background_image_url="https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/yiXBcm5bq8gcMoaMRSYEv.webp" st.markdown( f""" """, unsafe_allow_html=True ) # Background Image and Enhanced Styling st.markdown( """ """, unsafe_allow_html=True ) # Title and Prompt st.markdown("
Paste the article content below to analyze its category with PressGuard🛡️
", unsafe_allow_html=True) # Download necessary resources nltk.download('punkt_tab') nltk.download('stopwords') nltk.download('wordnet') # Initialize stopwords and lemmatizer stop_words = set(stopwords.words('english')).union({"pm"}) lemmatizer = WordNetLemmatizer() # ✅ Preprocessing Function def pre_process(x): x = x.lower() x = re.sub("<.*?>", "", x) x = re.sub("http[s]?://.+?\\S+", "", x) x = re.sub("[@#].+?\\S", "", x) x = re.sub(r"\\_+", " ", x) x = re.sub("^[A-Za-z.].*\\s-\\s", "", x) x = emoji.demojize(x) x = re.sub(":.*?:", "", x) x = re.sub("[^a-zA-Z0-9\\s_]", "", x) words = word_tokenize(x) words = [word for word in words if word not in stop_words] x = " ".join([lemmatizer.lemmatize(word) for word in words]) return x # ✅ Load Model and Vectorizer @st.cache_resource def load_model(): # Load the model model = tf.keras.models.load_model("model_m3_new.keras") vectorizer = keras.models.load_model("vec_text_m3_new.keras") # Load label encoder with open("label_encoder_m5.pkl", 'rb') as file: label_encoder = pickle.load(file) return model, vectorizer, label_encoder # Load models model, vectorizer, label_encoder = load_model() # ✅ Prediction Function def predict_category(text): processed_text = [pre_process(text)] text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128) prediction = model.predict(text_vectorized) category_idx = np.argmax(prediction, axis=1)[0] return label_encoder.inverse_transform([category_idx])[0] # ✅ Streamlit UI st.markdown("""
AI-Powered News Categorization
""", unsafe_allow_html=True) input_text = st.text_area("Enter News Article:", height=200) if st.button("Analyze", key="analyze-btn", help="Click to classify the news article"): if input_text: category = predict_category(input_text) st.markdown(f"
Predicted Category: {category}
", unsafe_allow_html=True) else: st.warning("Please enter some text to analyze.")