|
|
import base64 |
|
|
import streamlit as st |
|
|
import numpy as np |
|
|
import re |
|
|
import emoji |
|
|
import os |
|
|
import nltk |
|
|
from nltk.tokenize import word_tokenize |
|
|
from nltk.corpus import stopwords |
|
|
from nltk.stem import WordNetLemmatizer |
|
|
|
|
|
|
|
|
nltk.download('punkt_tab') |
|
|
nltk.download('stopwords') |
|
|
nltk.download('wordnet') |
|
|
|
|
|
import tensorflow as tf |
|
|
import keras |
|
|
from keras.preprocessing.sequence import pad_sequences |
|
|
import pickle |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Intelligent News Classifier", page_icon="π§ ", layout="wide") |
|
|
|
|
|
|
|
|
def set_background(image_path): |
|
|
if not os.path.exists(image_path): |
|
|
st.error(f"β Background image not found: {image_path}") |
|
|
return |
|
|
|
|
|
with open(image_path, "rb") as img_file: |
|
|
encoded_img = base64.b64encode(img_file.read()).decode() |
|
|
|
|
|
bg_image_style = f""" |
|
|
<style> |
|
|
.stApp {{ |
|
|
background-image: url("data:image/jpg;base64,{encoded_img}"); |
|
|
background-size: cover; |
|
|
background-repeat: no-repeat; |
|
|
background-position: center; |
|
|
background-attachment: fixed; |
|
|
}} |
|
|
</style> |
|
|
""" |
|
|
st.markdown(bg_image_style, unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
set_background("Images/image.jpg") |
|
|
|
|
|
|
|
|
stop_words = set(stopwords.words('english')).union({"pm"}) |
|
|
lemmatizer = WordNetLemmatizer() |
|
|
|
|
|
|
|
|
def pre_process(text): |
|
|
text = text.lower() |
|
|
text = re.sub("<.*?>", "", text) |
|
|
text = re.sub("http[s]?://\\S+", "", text) |
|
|
text = re.sub("[@#]\\S+", "", text) |
|
|
text = re.sub(r"\\_+", " ", text) |
|
|
text = emoji.demojize(text) |
|
|
text = re.sub(":.*?:", "", text) |
|
|
text = re.sub("[^a-zA-Z0-9\\s_]", "", text) |
|
|
words = word_tokenize(text) |
|
|
words = [word for word in words if word not in stop_words] |
|
|
text = " ".join([lemmatizer.lemmatize(word) for word in words]) |
|
|
return text |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(): |
|
|
model_path = "news_model.keras" |
|
|
vectorizer_path = "news_tv_model.keras" |
|
|
label_encoder_path = "label_encoder.pkl" |
|
|
|
|
|
model = keras.models.load_model(model_path) |
|
|
vectorizer = keras.models.load_model(vectorizer_path) |
|
|
|
|
|
with open(label_encoder_path, 'rb') as file: |
|
|
label_encoder = pickle.load(file) |
|
|
|
|
|
return model, vectorizer, label_encoder |
|
|
|
|
|
|
|
|
model, vectorizer, label_encoder = load_model() |
|
|
|
|
|
|
|
|
def predict_category(text): |
|
|
processed_text = [pre_process(text)] |
|
|
text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82) |
|
|
prediction = model.predict(text_vectorized) |
|
|
category_idx = np.argmax(prediction, axis=1)[0] |
|
|
return label_encoder.inverse_transform([category_idx])[0] |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.title { |
|
|
color: #ffffff; |
|
|
font-size: 2.8em; |
|
|
text-align: center; |
|
|
font-weight: 700; |
|
|
text-transform: uppercase; |
|
|
text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0); |
|
|
padding: 15px; |
|
|
} |
|
|
.subtitle { |
|
|
color: #ffffff; |
|
|
font-size: 1.5em; |
|
|
text-align: center; |
|
|
font-weight: 600; |
|
|
text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0); |
|
|
padding: 10px; |
|
|
} |
|
|
.result-box { |
|
|
background-color: #000000; /* Black background */ |
|
|
padding: 25px; |
|
|
border-radius: 12px; |
|
|
text-align: center; |
|
|
margin-top: 30px; |
|
|
font-size: 2em; |
|
|
font-weight: 900; |
|
|
text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5); |
|
|
} |
|
|
.result-text { |
|
|
color: #27ae60; /* Green text */ |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<div class='title'>π§ Intelligent News Classifier</div>", unsafe_allow_html=True) |
|
|
st.markdown("<div class='subtitle'>Find out what type of news you're reading!</div>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...") |
|
|
|
|
|
|
|
|
if st.button("Analyze π§"): |
|
|
if user_input.strip(): |
|
|
category = predict_category(user_input) |
|
|
st.markdown( |
|
|
f""" |
|
|
<div class='result-box'> |
|
|
<span class='result-text'>ποΈ Predicted Category: <strong>{category}</strong></span> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True |
|
|
) |
|
|
else: |
|
|
st.warning("β οΈ Please enter some text to analyze.") |
|
|
|