import streamlit as st import numpy as np import re import emoji from textblob import TextBlob import spacy import nltk from nltk.corpus import stopwords import tensorflow as tf import keras from keras.utils import pad_sequences import pickle # Page Config st.set_page_config(page_title="Newsense AI", page_icon="📰", layout="wide") # # Download necessary resources # # nltk.download('stopwords') # # Load SpaCy model # nlp = spacy.load("en_core_web_sm") # # Stopwords # stop_words = set(stopwords.words('english')).union({"pm"}) # # Pre-processing function (without parentheses extraction) # def pre_process(x): # # Convert to lowercase # x = x.lower() # # Remove HTML tags # x = re.sub(r"<.*?>", "", x) # # Remove URLs # x = re.sub(r"http[s]?://\S+", "", x) # # Remove mentions (@, #) # x = re.sub(r"[@#]\S+", "", x) # # Remove emojis # x = emoji.replace_emoji(x, replace="") # # Remove special characters (-, ., :, \, ,) # x = re.sub(r"[-.:,\\]", " ", x) # # Remove single and double quotes # x = re.sub(r"['\"](.*?)['\"]", r'\1', x) # # Remove content inside parentheses # x = re.sub(r"\(.*?\)", "", x) # # Remove extra spaces # x = re.sub(r"\s+", " ", x).strip() # # Spell checking # x = str(TextBlob(x).correct()) # # Lemmatization using SpaCy # x = " ".join([token.lemma_ for token in nlp(x)]) # return " ".join(x) # @st.cache_resource # def load_model(): # model = keras.models.load_model("model_m3_new.keras") # with open("label_encoder_m5.pkl", 'rb') as file: # label_encoder = pickle.load(file) # return model, label_encoder # model, label_encoder = load_model() # def predict_category(text): # cleaned_text = pre_process(text) # vectorizer = keras.models.load_model("vec_text_m3_new.keras") # # Vectorizing the pre-processed text # text_vectorized = pad_sequences(vectorizer.predict(np.array([cleaned_text])).numpy(), padding='pre', maxlen=128) # # Model prediction # prediction = model.predict(text_vectorized) # category_idx = np.argmax(prediction, axis=1)[0] # return label_encoder.inverse_transform([category_idx])[0], cleaned_text # Custom CSS st.markdown( """ """, unsafe_allow_html=True ) # Streamlit UI layout st.markdown('

📰 Newsense AI - News Classification

', unsafe_allow_html=True) # Input and button section st.markdown('

', unsafe_allow_html=True) user_input = st.text_area("Enter your news article:", height=200) # Predict button if st.button("Classify", key="analyze-button"): if user_input: category, cleaned_text = predict_category(user_input) # Display the prediction and cleaned text st.markdown(f'

Prediction: {category}

', unsafe_allow_html=True) st.markdown(f'

Cleaned Text: {cleaned_text}

', unsafe_allow_html=True) else: st.warning("Please enter some text to classify!") st.markdown('

', unsafe_allow_html=True)