import base64 import streamlit as st import numpy as np import re import emoji import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer # Download necessary resources nltk.download('punkt_tab') nltk.download('stopwords') nltk.download('wordnet') import tensorflow import keras from keras.utils import pad_sequences import pickle # Streamlit UI st.set_page_config(page_title="News Category Classifier", page_icon="📰", layout="centered") def set_background(image_path): with open(image_path, "rb") as img_file: encoded_img = base64.b64encode(img_file.read()).decode() bg_image_style = f""" """ st.markdown(bg_image_style, unsafe_allow_html=True) # Update the image path set_background("Images/News image 1.png") # Ensure the image is in the correct folder # Initialize stopwords and lemmatizer stop_words = set(stopwords.words('english')).union({"pm"}) lemmatizer = WordNetLemmatizer() def pre_process(x): x = x.lower() x = re.sub("<.*?>", "", x) x = re.sub("http[s]?://.+?\\S+", "", x) x = re.sub("[@#].+?\\S", "", x) x = re.sub(r"\\_+", " ", x) x = re.sub("^[A-Za-z.].*\\s-\\s", "", x) x = emoji.demojize(x) x = re.sub(":.*?:", "", x) x = re.sub("[^a-zA-Z0-9\\s_]", "", x) words = word_tokenize(x) words = [word for word in words if word not in stop_words] x = " ".join([lemmatizer.lemmatize(word) for word in words]) return x @st.cache_resource def load_model(): model_path = "news_model.keras" vectorizer_path = "news_tv_model.keras" label_encoder_path = "label_encoder.pkl" model = keras.models.load_model(model_path) vectorizer = keras.models.load_model(vectorizer_path) with open(label_encoder_path, 'rb') as file: label_encoder = pickle.load(file) return model, vectorizer, label_encoder model, vectorizer, label_encoder = load_model() def predict_category(text): processed_text = [pre_process(text)] text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82) prediction = model.predict(text_vectorized) category_idx = np.argmax(prediction, axis=1)[0] return label_encoder.inverse_transform([category_idx])[0] # UI st.markdown( """ """, unsafe_allow_html=True ) st.markdown("
📰 News Classifier
", unsafe_allow_html=True) st.markdown("
Enter a news headline or article snippet to analyze its category.
", unsafe_allow_html=True) user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...") if st.button("Analyze 🍿"): if user_input.strip(): category = predict_category(user_input) st.markdown(f"
🗂️ Predicted Category: {category}
", unsafe_allow_html=True) else: st.warning("⚠️ Please enter some text to analyze.")