| import base64 |
| import streamlit as st |
| import numpy as np |
| import re |
| import emoji |
|
|
| import nltk |
| from nltk.tokenize import word_tokenize |
| from nltk.corpus import stopwords |
| from nltk.stem import WordNetLemmatizer |
|
|
| |
| nltk.download('punkt_tab') |
| nltk.download('stopwords') |
| nltk.download('wordnet') |
|
|
| import tensorflow |
| import keras |
| from keras.utils import pad_sequences |
| import pickle |
| import base64 |
|
|
| |
| st.set_page_config(page_title="News Category Classifier", page_icon="π°", layout="centered") |
|
|
| def set_background(image_path): |
| with open(image_path, "rb") as img_file: |
| encoded_img = base64.b64encode(img_file.read()).decode() |
|
|
| bg_image_style = f""" |
| <style> |
| .stApp {{ |
| background-image: url("data:image/png;base64,{encoded_img}"); |
| background-size: cover; |
| background-repeat: no-repeat; |
| background-attachment: fixed; |
| }} |
| </style> |
| """ |
| st.markdown(bg_image_style, unsafe_allow_html=True) |
|
|
| |
| set_background("page/Images/bkg4.jpg") |
|
|
| |
| stop_words = set(stopwords.words('english')).union({"pm"}) |
| lemmatizer = WordNetLemmatizer() |
|
|
| def pre_process(x): |
| x = x.lower() |
| x = re.sub("<.*?>", "", x) |
| x = re.sub("http[s]?://.+?\\S+", "", x) |
| x = re.sub("[@#].+?\\S", "", x) |
| x = re.sub(r"\\_+", " ", x) |
| x = re.sub("^[A-Za-z.].*\\s-\\s", "", x) |
| x = emoji.demojize(x) |
| x = re.sub(":.*?:", "", x) |
| x = re.sub("[^a-zA-Z0-9\\s_]", "", x) |
| words = word_tokenize(x) |
| words = [word for word in words if word not in stop_words] |
| x = " ".join([lemmatizer.lemmatize(word) for word in words]) |
| return x |
|
|
| @st.cache_resource |
| def load_model(): |
| model_path = "news_model.keras" |
| vectorizer_path = "news_tv_model.keras" |
| label_encoder_path = "label_encoder.pkl" |
|
|
| model = keras.models.load_model(model_path) |
| vectorizer = keras.models.load_model(vectorizer_path) |
| with open(label_encoder_path, 'rb') as file: |
| label_encoder = pickle.load(file) |
| return model, vectorizer, label_encoder |
|
|
| model, vectorizer, label_encoder = load_model() |
|
|
| def predict_category(text): |
| processed_text = [pre_process(text)] |
| text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82) |
| prediction = model.predict(text_vectorized) |
| category_idx = np.argmax(prediction, axis=1)[0] |
| return label_encoder.inverse_transform([category_idx])[0] |
|
|
| |
| st.markdown( |
| """ |
| <style> |
| .title { |
| color: #ffffff; |
| font-size: 2.4em; |
| text-align: center; |
| font-weight: 700; |
| text-transform: uppercase; |
| text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0); |
| padding: 10px; |
| } |
| .subtitle { |
| color: #ffff; |
| font-size: 1.3em; |
| text-align: center; |
| font-weight: 600; |
| text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0); |
| padding: 5px; |
| } |
| .classify-button { |
| background-color: #3498db; |
| color: white; |
| font-size: 1.2em; |
| padding: 12px 24px; |
| border: none; |
| border-radius: 8px; |
| cursor: pointer; |
| display: block; |
| margin: 20px auto; |
| transition: 0.3s; |
| } |
| .classify-button:hover { |
| background-color: #2980b9; |
| } |
| .result-box { |
| background: linear-gradient(135deg, #6284FF 30%, #FF0000 70%); |
| padding: 20px; |
| border-radius: 10px; |
| text-align: center; |
| margin-top: 30px; |
| position: relative; |
| overflow: hidden; |
| border: 2px solid transparent; |
| background-clip: padding-box, border-box; |
| border-image: linear-gradient(135deg, #6284FF 30%, #FF0000 70%); |
| border-image-slice: 0; |
| transition: transform 0.3s ease-in-out, box-shadow 0.3s ease-in-out; |
| } |
| .result-box:hover { |
| transform: scale(1.05); |
| box-shadow: 0px 10px 30px rgba(98, 132, 255, 0.8), |
| 0px 10px 30px rgba(255, 0, 0, 0.8); |
| } |
| .result-text { |
| font-size: 1.8em; |
| color: #ffffff; |
| font-weight: 900; |
| text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5); |
| animation: fadeIn 0.8s ease-in-out; |
| } |
| </style> |
| """, |
| unsafe_allow_html=True |
| ) |
|
|
| st.markdown("<div class='title'>π° News Classifier</div>", unsafe_allow_html=True) |
| st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True) |
|
|
| user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...") |
|
|
| if st.button("Analyze π·οΈ"): |
| if user_input.strip(): |
| category = predict_category(user_input) |
| st.markdown(f"<div class='result-box'><span class='result-text'>ποΈ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True) |
| else: |
| st.warning("β οΈ Please enter some text to analyze.") |