|
|
import base64 |
|
|
import streamlit as st |
|
|
import numpy as np |
|
|
import re |
|
|
import emoji |
|
|
|
|
|
import nltk |
|
|
from nltk.tokenize import word_tokenize |
|
|
from nltk.corpus import stopwords |
|
|
from nltk.stem import WordNetLemmatizer |
|
|
|
|
|
nltk.download('punkt_tab') |
|
|
nltk.download('stopwords') |
|
|
nltk.download('wordnet') |
|
|
|
|
|
import tensorflow |
|
|
import keras |
|
|
from keras.utils import pad_sequences |
|
|
|
|
|
import pickle |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="News Category Classifier", page_icon="๐ฐ", layout="centered") |
|
|
|
|
|
def set_background(image_path): |
|
|
with open(image_path, "rb") as img_file: |
|
|
encoded_img = base64.b64encode(img_file.read()).decode() |
|
|
|
|
|
bg_image_style = f""" |
|
|
<style> |
|
|
.stApp {{ |
|
|
background-image: url("data:image/png;base64,{encoded_img}"); |
|
|
background-size: 100% 100%; |
|
|
background-repeat: no-repeat; |
|
|
background-attachment: fixed; |
|
|
background-position: center; |
|
|
}} |
|
|
</style> |
|
|
""" |
|
|
st.markdown(bg_image_style, unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
set_background("Images/News image 1.png") |
|
|
|
|
|
|
|
|
stop_words = set(stopwords.words('english')).union({"pm"}) |
|
|
lemmatizer = WordNetLemmatizer() |
|
|
|
|
|
def pre_process(x): |
|
|
x = x.lower() |
|
|
x = re.sub("<.*?>", "", x) |
|
|
x = re.sub("http[s]?://.+?\\S+", "", x) |
|
|
x = re.sub("[@#].+?\\S", "", x) |
|
|
x = re.sub(r"\\_+", " ", x) |
|
|
x = re.sub("^[A-Za-z.].*\\s-\\s", "", x) |
|
|
x = emoji.demojize(x) |
|
|
x = re.sub(":.*?:", "", x) |
|
|
x = re.sub("[^a-zA-Z0-9\\s_]", "", x) |
|
|
words = word_tokenize(x) |
|
|
words = [word for word in words if word not in stop_words] |
|
|
x = " ".join([lemmatizer.lemmatize(word) for word in words]) |
|
|
return x |
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(): |
|
|
model_path = "news_model.keras" |
|
|
vectorizer_path = "news_tv_model.keras" |
|
|
label_encoder_path = "label_encoder.pkl" |
|
|
|
|
|
model = keras.models.load_model(model_path) |
|
|
vectorizer = keras.models.load_model(vectorizer_path) |
|
|
with open(label_encoder_path, 'rb') as file: |
|
|
label_encoder = pickle.load(file) |
|
|
return model, vectorizer, label_encoder |
|
|
|
|
|
model, vectorizer, label_encoder = load_model() |
|
|
|
|
|
def predict_category(text): |
|
|
processed_text = [pre_process(text)] |
|
|
text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82) |
|
|
prediction = model.predict(text_vectorized) |
|
|
category_idx = np.argmax(prediction, axis=1)[0] |
|
|
return label_encoder.inverse_transform([category_idx])[0] |
|
|
|
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
<style> |
|
|
.title { |
|
|
color: #ffffff; |
|
|
font-size: 2.4em; |
|
|
text-align: center; |
|
|
font-weight: 700; |
|
|
text-transform: uppercase; |
|
|
text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0); |
|
|
padding: 10px; |
|
|
} |
|
|
.subtitle { |
|
|
color: #ffff; |
|
|
font-size: 1.3em; |
|
|
text-align: center; |
|
|
font-weight: 600; |
|
|
text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0); |
|
|
padding: 5px; |
|
|
} |
|
|
.classify-button { |
|
|
background-color: #3498db; |
|
|
color: white; |
|
|
font-size: 1.2em; |
|
|
padding: 12px 24px; |
|
|
border: none; |
|
|
border-radius: 8px; |
|
|
cursor: pointer; |
|
|
display: block; |
|
|
margin: 20px auto; |
|
|
transition: 0.3s; |
|
|
} |
|
|
.classify-button:hover { |
|
|
background-color: #2980b9; |
|
|
} |
|
|
.result-box { |
|
|
background: linear-gradient(135deg, #6284FF 30%, #FF0000 70%); |
|
|
padding: 20px; |
|
|
border-radius: 10px; |
|
|
text-align: center; |
|
|
margin-top: 30px; |
|
|
position: relative; |
|
|
overflow: hidden; |
|
|
border: 2px solid transparent; |
|
|
background-clip: padding-box, border-box; |
|
|
border-image: linear-gradient(135deg, #6284FF 30%, #FF0000 70%); |
|
|
border-image-slice: 0; |
|
|
transition: transform 0.3s ease-in-out, box-shadow 0.3s ease-in-out; |
|
|
} |
|
|
.result-box:hover { |
|
|
transform: scale(1.05); |
|
|
box-shadow: 0px 10px 30px rgba(98, 132, 255, 0.8), |
|
|
0px 10px 30px rgba(255, 0, 0, 0.8); |
|
|
} |
|
|
.result-text { |
|
|
font-size: 1.8em; |
|
|
color: #ffffff; |
|
|
font-weight: 900; |
|
|
text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5); |
|
|
animation: fadeIn 0.8s ease-in-out; |
|
|
} |
|
|
</style> |
|
|
""", |
|
|
unsafe_allow_html=True |
|
|
) |
|
|
|
|
|
st.markdown("<div class='title'>๐ฐ News Classifier</div>", unsafe_allow_html=True) |
|
|
st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True) |
|
|
|
|
|
user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...") |
|
|
|
|
|
if st.button("Analyze ๐ฟ"): |
|
|
if user_input.strip(): |
|
|
category = predict_category(user_input) |
|
|
st.markdown(f"<div class='result-box'><span class='result-text'>๐๏ธ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True) |
|
|
else: |
|
|
st.warning("โ ๏ธ Please enter some text to analyze.") |
|
|
|