Spaces:

Adityaganesh
/

News_Classification_prediction

Sleeping

App Files Files Community

News_Classification_prediction / app.py

Adityaganesh

Update app.py

77fcbcc verified 9 months ago

raw

history blame

4.97 kB

	import base64
	import streamlit as st
	import numpy as np
	import re
	import emoji
	import os
	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download necessary resources
	nltk.download('punkt_tab')
	nltk.download('stopwords')
	nltk.download('wordnet')

	import tensorflow as tf
	import keras
	from keras.preprocessing.sequence import pad_sequences
	import pickle

	# ✅ Enable full-width mode for Hugging Face
	st.set_page_config(page_title="Intelligent News Classifier", page_icon="🧠", layout="wide")

	# ✅ Function to set background image
	def set_background(image_path):
	if not os.path.exists(image_path):
	st.error(f"❌ Background image not found: {image_path}")
	return

	with open(image_path, "rb") as img_file:
	encoded_img = base64.b64encode(img_file.read()).decode()

	bg_image_style = f"""
	<style>
	.stApp {{
	background-image: url("data:image/png;base64,{encoded_img}");
	background-size: cover;
	background-repeat: no-repeat;
	background-position: center;
	background-attachment: fixed;
	}}
	</style>
	"""
	st.markdown(bg_image_style, unsafe_allow_html=True)

	# ✅ Set background image
	set_background("Images/picture.png")

	# ✅ Initialize stopwords and lemmatizer
	stop_words = set(stopwords.words('english')).union({"pm"})
	lemmatizer = WordNetLemmatizer()

	# ✅ Text Preprocessing Function
	def pre_process(text):
	text = text.lower()
	text = re.sub("<.*?>", "", text) # Remove HTML tags
	text = re.sub("http[s]?://\\S+", "", text) # Remove URLs
	text = re.sub("[@#]\\S+", "", text) # Remove mentions and hashtags
	text = re.sub(r"\\_+", " ", text) # Replace underscores with spaces
	text = emoji.demojize(text) # Convert emojis to text
	text = re.sub(":.*?:", "", text) # Remove emoji text
	text = re.sub("[^a-zA-Z0-9\\s_]", "", text) # Remove special characters
	words = word_tokenize(text)
	words = [word for word in words if word not in stop_words]
	text = " ".join([lemmatizer.lemmatize(word) for word in words])
	return text

	# ✅ Cache Model Loading for Performance
	@st.cache_resource
	def load_model():
	model_path = "news_model.keras"
	vectorizer_path = "news_tv_model.keras"
	label_encoder_path = "label_encoder.pkl"

	model = keras.models.load_model(model_path)
	vectorizer = keras.models.load_model(vectorizer_path)

	with open(label_encoder_path, 'rb') as file:
	label_encoder = pickle.load(file)

	return model, vectorizer, label_encoder

	# ✅ Load the models
	model, vectorizer, label_encoder = load_model()

	# ✅ Prediction Function
	def predict_category(text):
	processed_text = [pre_process(text)]
	text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
	prediction = model.predict(text_vectorized)
	category_idx = np.argmax(prediction, axis=1)[0]
	return label_encoder.inverse_transform([category_idx])[0]

	# ✅ Category Color Mapping
	category_colors = {
	"Sports": "#27ae60", # Green
	"Politics": "#2980b9", # Blue
	"Entertainment": "#8e44ad", # Purple
	"Technology": "#e67e22", # Orange
	"Business": "#c0392b", # Red
	"Default": "#ffffff" # White
	}

	# ✅ Streamlit UI Design
	st.markdown("""
	<style>
	.title {
	color: #ffffff;
	font-size: 2.8em;
	text-align: center;
	font-weight: 700;
	text-transform: uppercase;
	text-shadow: 2px 2px 8px rgba(0, 0, 0, 1.0);
	padding: 15px;
	}
	.subtitle {
	color: #ffffff;
	font-size: 1.5em;
	text-align: center;
	font-weight: 600;
	text-shadow: 1px 1px 6px rgba(0, 0, 0, 1.0);
	padding: 10px;
	}
	.result-box {
	padding: 25px;
	border-radius: 12px;
	text-align: center;
	margin-top: 30px;
	font-size: 2em;
	font-weight: 900;
	text-shadow: 3px 3px 10px rgba(0, 0, 0, 0.5);
	}
	</style>
	""", unsafe_allow_html=True)

	# ✅ Page Title
	st.markdown("<div class='title'>🧠 Intelligent News Classifier</div>", unsafe_allow_html=True)
	st.markdown("<div class='subtitle'>Find out what type of news you're reading!.</div>", unsafe_allow_html=True)

	# ✅ User Input
	user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")

	# ✅ Analyze Button
	if st.button("Analyze 🧐"):
	if user_input.strip():
	category = predict_category(user_input)
	color = category_colors.get(category, category_colors["Default"])
	st.markdown(
	f"""
	<div class='result-box' style='color: {color};'>
	🗂️ Predicted Category: <strong>{category}</strong>
	</div>
	""",
	unsafe_allow_html=True
	)
	else:
	st.warning("⚠️ Please enter some text to analyze.")