File size: 4,761 Bytes
e4fbc3e 4bb4749 c161086 4bb4749 4b40003 4bb4749 c161086 4b40003 bbb6e4c 4f354bb 4bb4749 14ec450 4b40003 14ec450 4b40003 de79f8d 77fcbcc 4b40003 de79f8d 4b40003 c161086 e4fbc3e 4b40003 14ec450 89216a0 7d8a652 de79f8d 4b40003 77fcbcc 89216a0 c161086 de79f8d 4b40003 4bb4749 4b40003 de79f8d 4b40003 de79f8d 89a4ed9 a65ee9f 4b40003 de79f8d 4b40003 a65ee9f c161086 a65ee9f 4bb4749 de79f8d a65ee9f 4bb4749 de79f8d 4bb4749 1df2517 4b40003 a65ee9f 4bb4749 de79f8d 77fcbcc 4b40003 de79f8d 4b40003 de79f8d 4b40003 de79f8d 4b40003 de79f8d 4b40003 1118b28 de79f8d 4b40003 de79f8d 4b40003 1118b28 4b40003 77fcbcc 4bb4749 de79f8d 8c835da 1118b28 f52a7f0 de79f8d 4b40003 4bb4749 de79f8d 77fcbcc 4b40003 77fcbcc 1118b28 77fcbcc 4bb4749 d76686a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import base64
import streamlit as st
import numpy as np
import re
import emoji
import os
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Download necessary resources
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
import tensorflow as tf
import keras
from keras.preprocessing.sequence import pad_sequences
import pickle
# β
Enable full-width mode for Hugging Face
st.set_page_config(page_title="Intelligent News Classifier", page_icon="π§ ", layout="wide")
# β
Function to set background image
def set_background(image_path):
if not os.path.exists(image_path):
st.error(f"β Background image not found: {image_path}")
return
with open(image_path, "rb") as img_file:
encoded_img = base64.b64encode(img_file.read()).decode()
bg_image_style = f"""
<style>
.stApp {{
background-image: url("data:image/jpg;base64,{encoded_img}");
background-size: cover;
background-repeat: no-repeat;
background-position: center;
background-attachment: fixed;
}}
</style>
"""
st.markdown(bg_image_style, unsafe_allow_html=True)
# β
Set background image
set_background("Images/image.jpg")
# β
Initialize stopwords and lemmatizer
stop_words = set(stopwords.words('english')).union({"pm"})
lemmatizer = WordNetLemmatizer()
# β
Text Preprocessing Function
def pre_process(text):
text = text.lower()
text = re.sub("<.*?>", "", text) # Remove HTML tags
text = re.sub("http[s]?://\\S+", "", text) # Remove URLs
text = re.sub("[@#]\\S+", "", text) # Remove mentions and hashtags
text = re.sub(r"\\_+", " ", text) # Replace underscores with spaces
text = emoji.demojize(text) # Convert emojis to text
text = re.sub(":.*?:", "", text) # Remove emoji text
text = re.sub("[^a-zA-Z0-9\\s_]", "", text) # Remove special characters
words = word_tokenize(text)
words = [word for word in words if word not in stop_words]
text = " ".join([lemmatizer.lemmatize(word) for word in words])
return text
# β
Cache Model Loading for Performance
@st.cache_resource
def load_model():
model_path = "news_model.keras"
vectorizer_path = "news_tv_model.keras"
label_encoder_path = "label_encoder.pkl"
model = keras.models.load_model(model_path)
vectorizer = keras.models.load_model(vectorizer_path)
with open(label_encoder_path, 'rb') as file:
label_encoder = pickle.load(file)
return model, vectorizer, label_encoder
# β
Load the models
model, vectorizer, label_encoder = load_model()
# β
Prediction Function
def predict_category(text):
processed_text = [pre_process(text)]
text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
prediction = model.predict(text_vectorized)
category_idx = np.argmax(prediction, axis=1)[0]
return label_encoder.inverse_transform([category_idx])[0]
# β
Streamlit UI Design
st.markdown("""
<style>
.title {
color: #ffffff;
font-size: 2.8em;
text-align: center;
font-weight: 700;
text-transform: uppercase;
text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0);
padding: 15px;
}
.subtitle {
color: #ffffff;
font-size: 1.5em;
text-align: center;
font-weight: 600;
text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0);
padding: 10px;
}
.result-box {
background-color: #000000; /* Black background */
padding: 25px;
border-radius: 12px;
text-align: center;
margin-top: 30px;
font-size: 2em;
font-weight: 900;
text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5);
}
.result-text {
color: #27ae60; /* Green text */
}
</style>
""", unsafe_allow_html=True)
# β
Page Title
st.markdown("<div class='title'>π§ Intelligent News Classifier</div>", unsafe_allow_html=True)
st.markdown("<div class='subtitle'>Find out what type of news you're reading!</div>", unsafe_allow_html=True)
# β
User Input
user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")
# β
Analyze Button
if st.button("Analyze π§"):
if user_input.strip():
category = predict_category(user_input)
st.markdown(
f"""
<div class='result-box'>
<span class='result-text'>ποΈ Predicted Category: <strong>{category}</strong></span>
</div>
""",
unsafe_allow_html=True
)
else:
st.warning("β οΈ Please enter some text to analyze.")
|