Adityaganesh's picture
Update app.py
77fcbcc verified
raw
history blame
4.97 kB
import base64
import streamlit as st
import numpy as np
import re
import emoji
import os
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary resources
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
import tensorflow as tf
import keras
from keras.preprocessing.sequence import pad_sequences
import pickle
# βœ… Enable full-width mode for Hugging Face
st.set_page_config(page_title="Intelligent News Classifier", page_icon="🧠", layout="wide")
# βœ… Function to set background image
def set_background(image_path):
if not os.path.exists(image_path):
st.error(f"❌ Background image not found: {image_path}")
return
with open(image_path, "rb") as img_file:
encoded_img = base64.b64encode(img_file.read()).decode()
bg_image_style = f"""
<style>
.stApp {{
background-image: url("data:image/png;base64,{encoded_img}");
background-size: cover;
background-repeat: no-repeat;
background-position: center;
background-attachment: fixed;
}}
</style>
"""
st.markdown(bg_image_style, unsafe_allow_html=True)
# βœ… Set background image
set_background("Images/picture.png")
# βœ… Initialize stopwords and lemmatizer
stop_words = set(stopwords.words('english')).union({"pm"})
lemmatizer = WordNetLemmatizer()
# βœ… Text Preprocessing Function
def pre_process(text):
text = text.lower()
text = re.sub("<.*?>", "", text) # Remove HTML tags
text = re.sub("http[s]?://\\S+", "", text) # Remove URLs
text = re.sub("[@#]\\S+", "", text) # Remove mentions and hashtags
text = re.sub(r"\\_+", " ", text) # Replace underscores with spaces
text = emoji.demojize(text) # Convert emojis to text
text = re.sub(":.*?:", "", text) # Remove emoji text
text = re.sub("[^a-zA-Z0-9\\s_]", "", text) # Remove special characters
words = word_tokenize(text)
words = [word for word in words if word not in stop_words]
text = " ".join([lemmatizer.lemmatize(word) for word in words])
return text
# βœ… Cache Model Loading for Performance
@st.cache_resource
def load_model():
model_path = "news_model.keras"
vectorizer_path = "news_tv_model.keras"
label_encoder_path = "label_encoder.pkl"
model = keras.models.load_model(model_path)
vectorizer = keras.models.load_model(vectorizer_path)
with open(label_encoder_path, 'rb') as file:
label_encoder = pickle.load(file)
return model, vectorizer, label_encoder
# βœ… Load the models
model, vectorizer, label_encoder = load_model()
# βœ… Prediction Function
def predict_category(text):
processed_text = [pre_process(text)]
text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
prediction = model.predict(text_vectorized)
category_idx = np.argmax(prediction, axis=1)[0]
return label_encoder.inverse_transform([category_idx])[0]
# βœ… Category Color Mapping
category_colors = {
"Sports": "#27ae60", # Green
"Politics": "#2980b9", # Blue
"Entertainment": "#8e44ad", # Purple
"Technology": "#e67e22", # Orange
"Business": "#c0392b", # Red
"Default": "#ffffff" # White
}
# βœ… Streamlit UI Design
st.markdown("""
<style>
.title {
color: #ffffff;
font-size: 2.8em;
text-align: center;
font-weight: 700;
text-transform: uppercase;
text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0);
padding: 15px;
}
.subtitle {
color: #ffffff;
font-size: 1.5em;
text-align: center;
font-weight: 600;
text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0);
padding: 10px;
}
.result-box {
padding: 25px;
border-radius: 12px;
text-align: center;
margin-top: 30px;
font-size: 2em;
font-weight: 900;
text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5);
}
</style>
""", unsafe_allow_html=True)
# βœ… Page Title
st.markdown("<div class='title'>🧠 Intelligent News Classifier</div>", unsafe_allow_html=True)
st.markdown("<div class='subtitle'>Find out what type of news you're reading!.</div>", unsafe_allow_html=True)
# βœ… User Input
user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")
# βœ… Analyze Button
if st.button("Analyze 🧐"):
if user_input.strip():
category = predict_category(user_input)
color = category_colors.get(category, category_colors["Default"])
st.markdown(
f"""
<div class='result-box' style='color: {color};'>
πŸ—‚οΈ Predicted Category: <strong>{category}</strong>
</div>
""",
unsafe_allow_html=True
)
else:
st.warning("⚠️ Please enter some text to analyze.")