import base64 import streamlit as st import numpy as np import re import emoji import os import nltk from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer # Download necessary resources nltk.download('punkt_tab') nltk.download('stopwords') nltk.download('wordnet') import tensorflow as tf import keras from keras.preprocessing.sequence import pad_sequences import pickle # ✅ Enable full-width mode for Hugging Face st.set_page_config(page_title="Intelligent News Classifier", page_icon="🧠", layout="wide") # ✅ Function to set background image def set_background(image_path): if not os.path.exists(image_path): st.error(f"❌ Background image not found: {image_path}") return with open(image_path, "rb") as img_file: encoded_img = base64.b64encode(img_file.read()).decode() bg_image_style = f""" """ st.markdown(bg_image_style, unsafe_allow_html=True) # ✅ Set background image set_background("Images/image.jpg") # ✅ Initialize stopwords and lemmatizer stop_words = set(stopwords.words('english')).union({"pm"}) lemmatizer = WordNetLemmatizer() # ✅ Text Preprocessing Function def pre_process(text): text = text.lower() text = re.sub("<.*?>", "", text) # Remove HTML tags text = re.sub("http[s]?://\\S+", "", text) # Remove URLs text = re.sub("[@#]\\S+", "", text) # Remove mentions and hashtags text = re.sub(r"\\_+", " ", text) # Replace underscores with spaces text = emoji.demojize(text) # Convert emojis to text text = re.sub(":.*?:", "", text) # Remove emoji text text = re.sub("[^a-zA-Z0-9\\s_]", "", text) # Remove special characters words = word_tokenize(text) words = [word for word in words if word not in stop_words] text = " ".join([lemmatizer.lemmatize(word) for word in words]) return text # ✅ Cache Model Loading for Performance @st.cache_resource def load_model(): model_path = "news_model.keras" vectorizer_path = "news_tv_model.keras" label_encoder_path = "label_encoder.pkl" model = keras.models.load_model(model_path) vectorizer = keras.models.load_model(vectorizer_path) with open(label_encoder_path, 'rb') as file: label_encoder = pickle.load(file) return model, vectorizer, label_encoder # ✅ Load the models model, vectorizer, label_encoder = load_model() # ✅ Prediction Function def predict_category(text): processed_text = [pre_process(text)] text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82) prediction = model.predict(text_vectorized) category_idx = np.argmax(prediction, axis=1)[0] return label_encoder.inverse_transform([category_idx])[0] # ✅ Streamlit UI Design st.markdown(""" """, unsafe_allow_html=True) # ✅ Page Title st.markdown("