Spaces:

Mpavan45
/

News_Classifier

Sleeping

App Files Files Community

News_Classifier / app.py

Mpavan45

Update app.py

6ea85ab verified 10 months ago

raw

history blame

5.78 kB

	import streamlit as st
	import numpy as np
	import re
	import emoji
	from textblob import TextBlob
	import spacy
	import nltk
	from nltk.corpus import stopwords
	import tensorflow as tf
	import keras
	from keras.utils import pad_sequences
	import pickle

	# Page Config
	st.set_page_config(page_title="Newsense AI", page_icon="📰", layout="wide")

	# # Download necessary resources
	# # nltk.download('stopwords')

	# # Load SpaCy model
	# nlp = spacy.load("en_core_web_sm")

	# # Stopwords
	# stop_words = set(stopwords.words('english')).union({"pm"})

	# # Pre-processing function (without parentheses extraction)
	# def pre_process(x):
	# # Convert to lowercase
	# x = x.lower()

	# # Remove HTML tags
	# x = re.sub(r"<.*?>", "", x)

	# # Remove URLs
	# x = re.sub(r"http[s]?://\S+", "", x)

	# # Remove mentions (@, #)
	# x = re.sub(r"[@#]\S+", "", x)

	# # Remove emojis
	# x = emoji.replace_emoji(x, replace="")

	# # Remove special characters (-, ., :, \, ,)
	# x = re.sub(r"[-.:,\\]", " ", x)

	# # Remove single and double quotes
	# x = re.sub(r"['\"](.*?)['\"]", r'\1', x)

	# # Remove content inside parentheses
	# x = re.sub(r"\(.*?\)", "", x)

	# # Remove extra spaces
	# x = re.sub(r"\s+", " ", x).strip()

	# # Spell checking
	# x = str(TextBlob(x).correct())

	# # Lemmatization using SpaCy
	# x = " ".join([token.lemma_ for token in nlp(x)])

	# return " ".join(x)

	# @st.cache_resource
	# def load_model():
	# model = keras.models.load_model("model_m3_new.keras")
	# with open("label_encoder_m5.pkl", 'rb') as file:
	# label_encoder = pickle.load(file)
	# return model, label_encoder

	# model, label_encoder = load_model()

	# def predict_category(text):
	# cleaned_text = pre_process(text)

	# vectorizer = keras.models.load_model("vec_text_m3_new.keras")

	# # Vectorizing the pre-processed text
	# text_vectorized = pad_sequences(vectorizer.predict(np.array([cleaned_text])).numpy(), padding='pre', maxlen=128)

	# # Model prediction
	# prediction = model.predict(text_vectorized)
	# category_idx = np.argmax(prediction, axis=1)[0]

	# return label_encoder.inverse_transform([category_idx])[0], cleaned_text

	# Custom CSS
	st.markdown(
	"""
	<style>
	body {
	background-image: url('https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/WwOWG8MBGYxHnIeM2Dowo.webp');
	background-size: cover;
	background-repeat: no-repeat;
	background-attachment: fixed;
	}
	.title {
	font-size: 60px;
	font-weight: bold;
	color: white;
	background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000);
	padding: 20px;
	border-radius: 20px;
	box-shadow: 0 8px 25px rgba(136, 14, 79, 0.5),
	0 4px 15px rgba(74, 35, 90, 0.6);
	display: inline-block;
	margin-bottom: 20px;
	text-align: center;
	animation: elegantFadeSlide 1.5s ease-out forwards;
	}
	.input-box {
	display: flex;
	flex-direction: column;
	align-items: center;
	gap: 20px;
	margin: 0 auto;
	width: 80%;
	}
	.input-prompt {
	font-size: 22px;
	font-weight: bold;
	color: #ffffff;
	text-align: center;
	opacity: 0.8;
	}
	div.stTextArea textarea {
	width: 100%;
	height: 200px;
	padding: 20px;
	border-radius: 15px;
	background-color: rgba(0, 0, 0, 0.7);
	color: white;
	font-size: 18px;
	outline: none;
	box-shadow: 0 6px 20px rgba(136, 14, 79, 0.3);
	transition: all 0.5s ease;
	}
	div.stTextArea textarea:hover {
	transform: scale(1.05);
	box-shadow: 0 10px 30px rgba(136, 14, 79, 0.5);
	}
	.analyze-button {
	width: 200px;
	height: 60px;
	border-radius: 30px;
	background: linear-gradient(45deg, #880E4F, #4A235A, #311B92, #000000);
	font-size: 20px;
	font-weight: bold;
	color: white;
	border: none;
	cursor: pointer;
	transition: all 0.4s ease;
	}
	.analyze-button:hover {
	transform: scale(1.1);
	box-shadow: 0 12px 35px rgba(49, 27, 146, 0.8);
	}
	.result-box {
	text-align: center;
	font-size: 28px;
	font-weight: bold;
	color: white;
	background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000);
	padding: 30px;
	border-radius: 20px;
	box-shadow: 0 6px 20px rgba(74, 35, 90, 0.5);
	margin-top: 30px;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	# Streamlit UI layout
	st.markdown('<div class="title">📰 Newsense AI - News Classification</div>', unsafe_allow_html=True)

	# Input and button section
	st.markdown('<div class="input-box">', unsafe_allow_html=True)
	user_input = st.text_area("Enter your news article:", height=200)

	# Predict button
	if st.button("Classify", key="analyze-button"):
	if user_input:
	category, cleaned_text = predict_category(user_input)

	# Display the prediction and cleaned text
	st.markdown(f'<div class="result-box">Prediction: {category}</div>', unsafe_allow_html=True)
	st.markdown(f'<div class="result-box">Cleaned Text: {cleaned_text}</div>', unsafe_allow_html=True)
	else:
	st.warning("Please enter some text to classify!")

	st.markdown('</div>', unsafe_allow_html=True)