Spaces:

Adityaganesh
/

News_Classification_prediction

Sleeping

App Files Files Community

News_Classification_prediction / app.py

Adityaganesh

Update app.py

c161086 verified about 1 year ago

raw

history blame

5.48 kB

	import base64
	import streamlit as st
	import numpy as np
	import re
	import emoji
	import os

	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download necessary resources
	nltk.download('punkt_tab')
	nltk.download('stopwords')
	nltk.download('wordnet')

	import tensorflow
	import keras
	from keras.utils import pad_sequences

	import pickle

	# Set Streamlit page configuration
	st.set_page_config(page_title="News Category Classifier", page_icon="📰", layout="centered")


	# Function to set background image
	def set_background(image_path):
	if not os.path.exists(image_path):
	st.error(f"❌ Background image not found: {image_path}")
	return

	with open(image_path, "rb") as img_file:
	encoded_img = base64.b64encode(img_file.read()).decode()

	bg_image_style = f"""
	<style>
	body {{
	background-image: url("data:image/jpg;base64,{encoded_img}");
	background-size: cover;
	background-repeat: no-repeat;
	background-position: center;
	background-attachment: fixed;
	}}
	</style>
	"""
	st.markdown(bg_image_style, unsafe_allow_html=True)

	# Set background image
	set_background("Images/News image.jpg")


	# Initialize stopwords and lemmatizer
	stop_words = set(stopwords.words('english')).union({"pm"})
	lemmatizer = WordNetLemmatizer()


	# Preprocessing function
	def pre_process(x):
	x = x.lower()
	x = re.sub("<.*?>", "", x) # Remove HTML tags
	x = re.sub("http[s]?://\S+", "", x) # Remove URLs
	x = re.sub("[@#]\S+", "", x) # Remove mentions and hashtags
	x = re.sub(r"\_+", " ", x) # Replace underscores with space
	x = emoji.demojize(x) # Convert emojis to text
	x = re.sub(":.*?:", "", x) # Remove emoji text
	x = re.sub("[^a-zA-Z0-9\s_]", "", x) # Remove special characters
	words = word_tokenize(x)
	words = [word for word in words if word not in stop_words]
	x = " ".join([lemmatizer.lemmatize(word) for word in words])
	return x


	# Cache model loading to improve performance
	@st.cache_resource
	def load_model():
	model_path = "news_model.keras"
	vectorizer_path = "news_tv_model.keras"
	label_encoder_path = "label_encoder.pkl"

	model = keras.models.load_model(model_path)
	vectorizer = keras.models.load_model(vectorizer_path)

	with open(label_encoder_path, 'rb') as file:
	label_encoder = pickle.load(file)

	return model, vectorizer, label_encoder


	# Load the models
	model, vectorizer, label_encoder = load_model()


	# Prediction function
	def predict_category(text):
	processed_text = [pre_process(text)]
	text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
	prediction = model.predict(text_vectorized)
	category_idx = np.argmax(prediction, axis=1)[0]
	return label_encoder.inverse_transform([category_idx])[0]


	# Streamlit UI
	st.markdown(
	"""
	<style>
	.title {
	color: #ffffff;
	font-size: 2.4em;
	text-align: center;
	font-weight: 700;
	text-transform: uppercase;
	text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0);
	padding: 10px;
	}
	.subtitle {
	color: #ffff;
	font-size: 1.3em;
	text-align: center;
	font-weight: 600;
	text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0);
	padding: 5px;
	}
	.classify-button {
	background-color: #3498db;
	color: white;
	font-size: 1.2em;
	padding: 12px 24px;
	border: none;
	border-radius: 8px;
	cursor: pointer;
	display: block;
	margin: 20px auto;
	transition: 0.3s;
	}
	.classify-button:hover {
	background-color: #2980b9;
	}
	.result-box {
	background: linear-gradient(135deg, #6284FF 30%, #FF0000 70%);
	padding: 20px;
	border-radius: 10px;
	text-align: center;
	margin-top: 30px;
	position: relative;
	overflow: hidden;
	border: 2px solid transparent;
	background-clip: padding-box, border-box;
	border-image: linear-gradient(135deg, #6284FF 30%, #FF0000 70%);
	border-image-slice: 0;
	transition: transform 0.3s ease-in-out, box-shadow 0.3s ease-in-out;
	}
	.result-box:hover {
	transform: scale(1.05);
	box-shadow: 0px 10px 30px rgba(98, 132, 255, 0.8),
	0px 10px 30px rgba(255, 0, 0, 0.8);
	}
	.result-text {
	font-size: 1.8em;
	color: #ffffff;
	font-weight: 900;
	text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5);
	animation: fadeIn 0.8s ease-in-out;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	# Page title
	st.markdown("<div class='title'>📰 News Classifier</div>", unsafe_allow_html=True)
	st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True)

	# User input
	user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")

	# Button to analyze
	if st.button("Analyze 🍿", key="analyze_button"):
	if user_input.strip():
	category = predict_category(user_input)
	st.markdown(f"<div class='result-box'><span class='result-text'>🗂️ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True)
	else:
	st.warning("⚠️ Please enter some text to analyze.")