News_Classifier / app.py
Mpavan45's picture
Update app.py
6ea85ab verified
raw
history blame
5.78 kB
import streamlit as st
import numpy as np
import re
import emoji
from textblob import TextBlob
import spacy
import nltk
from nltk.corpus import stopwords
import tensorflow as tf
import keras
from keras.utils import pad_sequences
import pickle
# Page Config
st.set_page_config(page_title="Newsense AI", page_icon="πŸ“°", layout="wide")
# # Download necessary resources
# # nltk.download('stopwords')
# # Load SpaCy model
# nlp = spacy.load("en_core_web_sm")
# # Stopwords
# stop_words = set(stopwords.words('english')).union({"pm"})
# # Pre-processing function (without parentheses extraction)
# def pre_process(x):
# # Convert to lowercase
# x = x.lower()
# # Remove HTML tags
# x = re.sub(r"<.*?>", "", x)
# # Remove URLs
# x = re.sub(r"http[s]?://\S+", "", x)
# # Remove mentions (@, #)
# x = re.sub(r"[@#]\S+", "", x)
# # Remove emojis
# x = emoji.replace_emoji(x, replace="")
# # Remove special characters (-, ., :, \, ,)
# x = re.sub(r"[-.:,\\]", " ", x)
# # Remove single and double quotes
# x = re.sub(r"['\"](.*?)['\"]", r'\1', x)
# # Remove content inside parentheses
# x = re.sub(r"\(.*?\)", "", x)
# # Remove extra spaces
# x = re.sub(r"\s+", " ", x).strip()
# # Spell checking
# x = str(TextBlob(x).correct())
# # Lemmatization using SpaCy
# x = " ".join([token.lemma_ for token in nlp(x)])
# return " ".join(x)
# @st.cache_resource
# def load_model():
# model = keras.models.load_model("model_m3_new.keras")
# with open("label_encoder_m5.pkl", 'rb') as file:
# label_encoder = pickle.load(file)
# return model, label_encoder
# model, label_encoder = load_model()
# def predict_category(text):
# cleaned_text = pre_process(text)
# vectorizer = keras.models.load_model("vec_text_m3_new.keras")
# # Vectorizing the pre-processed text
# text_vectorized = pad_sequences(vectorizer.predict(np.array([cleaned_text])).numpy(), padding='pre', maxlen=128)
# # Model prediction
# prediction = model.predict(text_vectorized)
# category_idx = np.argmax(prediction, axis=1)[0]
# return label_encoder.inverse_transform([category_idx])[0], cleaned_text
# Custom CSS
st.markdown(
"""
<style>
body {
background-image: url('https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/WwOWG8MBGYxHnIeM2Dowo.webp');
background-size: cover;
background-repeat: no-repeat;
background-attachment: fixed;
}
.title {
font-size: 60px;
font-weight: bold;
color: white;
background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000);
padding: 20px;
border-radius: 20px;
box-shadow: 0 8px 25px rgba(136, 14, 79, 0.5),
0 4px 15px rgba(74, 35, 90, 0.6);
display: inline-block;
margin-bottom: 20px;
text-align: center;
animation: elegantFadeSlide 1.5s ease-out forwards;
}
.input-box {
display: flex;
flex-direction: column;
align-items: center;
gap: 20px;
margin: 0 auto;
width: 80%;
}
.input-prompt {
font-size: 22px;
font-weight: bold;
color: #ffffff;
text-align: center;
opacity: 0.8;
}
div.stTextArea textarea {
width: 100%;
height: 200px;
padding: 20px;
border-radius: 15px;
background-color: rgba(0, 0, 0, 0.7);
color: white;
font-size: 18px;
outline: none;
box-shadow: 0 6px 20px rgba(136, 14, 79, 0.3);
transition: all 0.5s ease;
}
div.stTextArea textarea:hover {
transform: scale(1.05);
box-shadow: 0 10px 30px rgba(136, 14, 79, 0.5);
}
.analyze-button {
width: 200px;
height: 60px;
border-radius: 30px;
background: linear-gradient(45deg, #880E4F, #4A235A, #311B92, #000000);
font-size: 20px;
font-weight: bold;
color: white;
border: none;
cursor: pointer;
transition: all 0.4s ease;
}
.analyze-button:hover {
transform: scale(1.1);
box-shadow: 0 12px 35px rgba(49, 27, 146, 0.8);
}
.result-box {
text-align: center;
font-size: 28px;
font-weight: bold;
color: white;
background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000);
padding: 30px;
border-radius: 20px;
box-shadow: 0 6px 20px rgba(74, 35, 90, 0.5);
margin-top: 30px;
}
</style>
""",
unsafe_allow_html=True
)
# Streamlit UI layout
st.markdown('<div class="title">πŸ“° Newsense AI - News Classification</div>', unsafe_allow_html=True)
# Input and button section
st.markdown('<div class="input-box">', unsafe_allow_html=True)
user_input = st.text_area("Enter your news article:", height=200)
# Predict button
if st.button("Classify", key="analyze-button"):
if user_input:
category, cleaned_text = predict_category(user_input)
# Display the prediction and cleaned text
st.markdown(f'<div class="result-box">Prediction: {category}</div>', unsafe_allow_html=True)
st.markdown(f'<div class="result-box">Cleaned Text: {cleaned_text}</div>', unsafe_allow_html=True)
else:
st.warning("Please enter some text to classify!")
st.markdown('</div>', unsafe_allow_html=True)