Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import re | |
| import emoji | |
| from textblob import TextBlob | |
| import spacy | |
| import nltk | |
| from nltk.corpus import stopwords | |
| import tensorflow as tf | |
| import keras | |
| from keras.utils import pad_sequences | |
| import pickle | |
| # Page Config | |
| st.set_page_config(page_title="Newsense AI", page_icon="π°", layout="wide") | |
| # # Download necessary resources | |
| # # nltk.download('stopwords') | |
| # # Load SpaCy model | |
| # nlp = spacy.load("en_core_web_sm") | |
| # # Stopwords | |
| # stop_words = set(stopwords.words('english')).union({"pm"}) | |
| # # Pre-processing function (without parentheses extraction) | |
| # def pre_process(x): | |
| # # Convert to lowercase | |
| # x = x.lower() | |
| # # Remove HTML tags | |
| # x = re.sub(r"<.*?>", "", x) | |
| # # Remove URLs | |
| # x = re.sub(r"http[s]?://\S+", "", x) | |
| # # Remove mentions (@, #) | |
| # x = re.sub(r"[@#]\S+", "", x) | |
| # # Remove emojis | |
| # x = emoji.replace_emoji(x, replace="") | |
| # # Remove special characters (-, ., :, \, ,) | |
| # x = re.sub(r"[-.:,\\]", " ", x) | |
| # # Remove single and double quotes | |
| # x = re.sub(r"['\"](.*?)['\"]", r'\1', x) | |
| # # Remove content inside parentheses | |
| # x = re.sub(r"\(.*?\)", "", x) | |
| # # Remove extra spaces | |
| # x = re.sub(r"\s+", " ", x).strip() | |
| # # Spell checking | |
| # x = str(TextBlob(x).correct()) | |
| # # Lemmatization using SpaCy | |
| # x = " ".join([token.lemma_ for token in nlp(x)]) | |
| # return " ".join(x) | |
| # @st.cache_resource | |
| # def load_model(): | |
| # model = keras.models.load_model("model_m3_new.keras") | |
| # with open("label_encoder_m5.pkl", 'rb') as file: | |
| # label_encoder = pickle.load(file) | |
| # return model, label_encoder | |
| # model, label_encoder = load_model() | |
| # def predict_category(text): | |
| # cleaned_text = pre_process(text) | |
| # vectorizer = keras.models.load_model("vec_text_m3_new.keras") | |
| # # Vectorizing the pre-processed text | |
| # text_vectorized = pad_sequences(vectorizer.predict(np.array([cleaned_text])).numpy(), padding='pre', maxlen=128) | |
| # # Model prediction | |
| # prediction = model.predict(text_vectorized) | |
| # category_idx = np.argmax(prediction, axis=1)[0] | |
| # return label_encoder.inverse_transform([category_idx])[0], cleaned_text | |
| # Custom CSS | |
| st.markdown( | |
| """ | |
| <style> | |
| body { | |
| background-image: url('https://cdn-uploads.huggingface.co/production/uploads/675fab3a2d0851e23d23cad3/WwOWG8MBGYxHnIeM2Dowo.webp'); | |
| background-size: cover; | |
| background-repeat: no-repeat; | |
| background-attachment: fixed; | |
| } | |
| .title { | |
| font-size: 60px; | |
| font-weight: bold; | |
| color: white; | |
| background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000); | |
| padding: 20px; | |
| border-radius: 20px; | |
| box-shadow: 0 8px 25px rgba(136, 14, 79, 0.5), | |
| 0 4px 15px rgba(74, 35, 90, 0.6); | |
| display: inline-block; | |
| margin-bottom: 20px; | |
| text-align: center; | |
| animation: elegantFadeSlide 1.5s ease-out forwards; | |
| } | |
| .input-box { | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| gap: 20px; | |
| margin: 0 auto; | |
| width: 80%; | |
| } | |
| .input-prompt { | |
| font-size: 22px; | |
| font-weight: bold; | |
| color: #ffffff; | |
| text-align: center; | |
| opacity: 0.8; | |
| } | |
| div.stTextArea textarea { | |
| width: 100%; | |
| height: 200px; | |
| padding: 20px; | |
| border-radius: 15px; | |
| background-color: rgba(0, 0, 0, 0.7); | |
| color: white; | |
| font-size: 18px; | |
| outline: none; | |
| box-shadow: 0 6px 20px rgba(136, 14, 79, 0.3); | |
| transition: all 0.5s ease; | |
| } | |
| div.stTextArea textarea:hover { | |
| transform: scale(1.05); | |
| box-shadow: 0 10px 30px rgba(136, 14, 79, 0.5); | |
| } | |
| .analyze-button { | |
| width: 200px; | |
| height: 60px; | |
| border-radius: 30px; | |
| background: linear-gradient(45deg, #880E4F, #4A235A, #311B92, #000000); | |
| font-size: 20px; | |
| font-weight: bold; | |
| color: white; | |
| border: none; | |
| cursor: pointer; | |
| transition: all 0.4s ease; | |
| } | |
| .analyze-button:hover { | |
| transform: scale(1.1); | |
| box-shadow: 0 12px 35px rgba(49, 27, 146, 0.8); | |
| } | |
| .result-box { | |
| text-align: center; | |
| font-size: 28px; | |
| font-weight: bold; | |
| color: white; | |
| background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000); | |
| padding: 30px; | |
| border-radius: 20px; | |
| box-shadow: 0 6px 20px rgba(74, 35, 90, 0.5); | |
| margin-top: 30px; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # Streamlit UI layout | |
| st.markdown('<div class="title">π° Newsense AI - News Classification</div>', unsafe_allow_html=True) | |
| # Input and button section | |
| st.markdown('<div class="input-box">', unsafe_allow_html=True) | |
| user_input = st.text_area("Enter your news article:", height=200) | |
| # Predict button | |
| if st.button("Classify", key="analyze-button"): | |
| if user_input: | |
| category, cleaned_text = predict_category(user_input) | |
| # Display the prediction and cleaned text | |
| st.markdown(f'<div class="result-box">Prediction: {category}</div>', unsafe_allow_html=True) | |
| st.markdown(f'<div class="result-box">Cleaned Text: {cleaned_text}</div>', unsafe_allow_html=True) | |
| else: | |
| st.warning("Please enter some text to classify!") | |
| st.markdown('</div>', unsafe_allow_html=True) | |