import streamlit as st import pickle import tensorflow as tf import numpy as np import re import emoji import nltk from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer from nltk.corpus import stopwords # Ensure necessary downloads nltk.download("punkt") nltk.download("wordnet") nltk.download('stopwords') lemmatizer = WordNetLemmatizer() stop_words = set(stopwords.words('english')) def pre_process(text): text = text.lower() text = re.sub("<.*?>", "", text) text = re.sub("http[s]?://\\S+", "", text) text = re.sub("[@#]\\S+", "", text) text = re.sub(r"\\_+", " ", text) text = re.sub("^[A-Za-z.].*\\s-\\s", "", text) text = emoji.demojize(text) text = re.sub(":.*?:", "", text) text = re.sub("[^a-zA-Z0-9\\s_]", "", text) words = word_tokenize(text) words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words] return " ".join(words) # Load the label encoder with open("label_encoder.pkl", "rb") as f: label_encoder = pickle.load(f) # Load the text vectorization model text_vectorizer = tf.keras.models.load_model("news_tv_model.keras") # Load the news classification model news_model = tf.keras.models.load_model("news_model.keras") def predict_category(text): processed_text = [pre_process(text)] vectorized_text = text_vectorizer(processed_text) prediction = news_model.predict(vectorized_text) predicted_label_index = np.argmax(prediction, axis=1)[0] return label_encoder.inverse_transform([predicted_label_index])[0] # Streamlit UI st.title("News Classification App") # User input user_text = st.text_area("Enter your news content for classification.") if st.button("Predict Category"): if user_text.strip(): category = predict_category(user_text) st.success(f"Predicted Category: {category}") else: st.warning("Please enter some text to classify.")