Adityaganesh's picture
Update app.py
c161086 verified
raw
history blame
5.48 kB
import base64
import streamlit as st
import numpy as np
import re
import emoji
import os
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary resources
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
import tensorflow
import keras
from keras.utils import pad_sequences
import pickle
# Set Streamlit page configuration
st.set_page_config(page_title="News Category Classifier", page_icon="๐Ÿ“ฐ", layout="centered")
# Function to set background image
def set_background(image_path):
if not os.path.exists(image_path):
st.error(f"โŒ Background image not found: {image_path}")
return
with open(image_path, "rb") as img_file:
encoded_img = base64.b64encode(img_file.read()).decode()
bg_image_style = f"""
<style>
body {{
background-image: url("data:image/jpg;base64,{encoded_img}");
background-size: cover;
background-repeat: no-repeat;
background-position: center;
background-attachment: fixed;
}}
</style>
"""
st.markdown(bg_image_style, unsafe_allow_html=True)
# Set background image
set_background("Images/News image.jpg")
# Initialize stopwords and lemmatizer
stop_words = set(stopwords.words('english')).union({"pm"})
lemmatizer = WordNetLemmatizer()
# Preprocessing function
def pre_process(x):
x = x.lower()
x = re.sub("<.*?>", "", x) # Remove HTML tags
x = re.sub("http[s]?://\S+", "", x) # Remove URLs
x = re.sub("[@#]\S+", "", x) # Remove mentions and hashtags
x = re.sub(r"\_+", " ", x) # Replace underscores with space
x = emoji.demojize(x) # Convert emojis to text
x = re.sub(":.*?:", "", x) # Remove emoji text
x = re.sub("[^a-zA-Z0-9\s_]", "", x) # Remove special characters
words = word_tokenize(x)
words = [word for word in words if word not in stop_words]
x = " ".join([lemmatizer.lemmatize(word) for word in words])
return x
# Cache model loading to improve performance
@st.cache_resource
def load_model():
model_path = "news_model.keras"
vectorizer_path = "news_tv_model.keras"
label_encoder_path = "label_encoder.pkl"
model = keras.models.load_model(model_path)
vectorizer = keras.models.load_model(vectorizer_path)
with open(label_encoder_path, 'rb') as file:
label_encoder = pickle.load(file)
return model, vectorizer, label_encoder
# Load the models
model, vectorizer, label_encoder = load_model()
# Prediction function
def predict_category(text):
processed_text = [pre_process(text)]
text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
prediction = model.predict(text_vectorized)
category_idx = np.argmax(prediction, axis=1)[0]
return label_encoder.inverse_transform([category_idx])[0]
# Streamlit UI
st.markdown(
"""
<style>
.title {
color: #ffffff;
font-size: 2.4em;
text-align: center;
font-weight: 700;
text-transform: uppercase;
text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0);
padding: 10px;
}
.subtitle {
color: #ffff;
font-size: 1.3em;
text-align: center;
font-weight: 600;
text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0);
padding: 5px;
}
.classify-button {
background-color: #3498db;
color: white;
font-size: 1.2em;
padding: 12px 24px;
border: none;
border-radius: 8px;
cursor: pointer;
display: block;
margin: 20px auto;
transition: 0.3s;
}
.classify-button:hover {
background-color: #2980b9;
}
.result-box {
background: linear-gradient(135deg, #6284FF 30%, #FF0000 70%);
padding: 20px;
border-radius: 10px;
text-align: center;
margin-top: 30px;
position: relative;
overflow: hidden;
border: 2px solid transparent;
background-clip: padding-box, border-box;
border-image: linear-gradient(135deg, #6284FF 30%, #FF0000 70%);
border-image-slice: 0;
transition: transform 0.3s ease-in-out, box-shadow 0.3s ease-in-out;
}
.result-box:hover {
transform: scale(1.05);
box-shadow: 0px 10px 30px rgba(98, 132, 255, 0.8),
0px 10px 30px rgba(255, 0, 0, 0.8);
}
.result-text {
font-size: 1.8em;
color: #ffffff;
font-weight: 900;
text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5);
animation: fadeIn 0.8s ease-in-out;
}
</style>
""",
unsafe_allow_html=True
)
# Page title
st.markdown("<div class='title'>๐Ÿ“ฐ News Classifier</div>", unsafe_allow_html=True)
st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True)
# User input
user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")
# Button to analyze
if st.button("Analyze ๐Ÿฟ", key="analyze_button"):
if user_input.strip():
category = predict_category(user_input)
st.markdown(f"<div class='result-box'><span class='result-text'>๐Ÿ—‚๏ธ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True)
else:
st.warning("โš ๏ธ Please enter some text to analyze.")