Spaces:

Mpavan45
/

News_Classifier

Sleeping

App Files Files Community

Mpavan45 commited on Mar 19, 2025

Commit

db9518c

verified ·

1 Parent(s): ada421e

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -30

app.py CHANGED Viewed

@@ -1,42 +1,191 @@
 import streamlit as st
-import tensorflow as tf
 import numpy as np
-import dill
-# Load the trained model with custom layers
-from tensorflow.keras.layers import TextVectorization
-model = tf.keras.models.load_model("news_classification_rnn1.h5",
-                                   custom_objects={"TextVectorization": TextVectorization})
-# Load Preprocessing Function
-with open("preprocessing1.pkl", "rb") as f:
-    clean_text = dill.load(f)
-# Load Text Vectorization Layer
-with open("vector.pkl", "rb") as f:
-    vectorizer = dill.load(f)
-# Define News Categories
-news_categories = ["Business", "Sci/Tech", "Sports", "World"]
-# Streamlit UI
-st.title("📰 News Classification with Simple RNN")
-st.write("Enter a news headline to predict its category.")
-user_input = st.text_area("Enter News Text:", "")
-if st.button("Classify"):
-    if user_input.strip():
-        # Preprocess input
-        processed_text = clean_text(user_input)
-        # Vectorize input and convert to numpy array
-        text_sequence = np.array(vectorizer([processed_text]))
-        # Predict Category
-        prediction = model.predict(text_sequence)
-        category = np.argmax(prediction)
-        st.success(f"Predicted Category: **{news_categories[category]}**")
-    else:
-        st.warning("⚠ Please enter a news headline.")

 import streamlit as st
 import numpy as np
+import re
+import emoji
+from textblob import TextBlob
+import spacy
+import nltk
+from nltk.corpus import stopwords
+import tensorflow as tf
+import keras
+from keras.utils import pad_sequences
+import pickle
+# Page Config
+st.set_page_config(page_title="Newsense AI", page_icon="📰", layout="wide")
+# Download necessary resources
+nltk.download('stopwords')
+# Load SpaCy model
+nlp = spacy.load("en_core_web_sm")
+# Stopwords
+stop_words = set(stopwords.words('english')).union({"pm"})
+# Pre-processing function (without parentheses extraction)
+def pre_process(x):
+    # Convert to lowercase
+    x = x.lower()
+    # Remove HTML tags
+    x = re.sub(r"<.*?>", "", x)
+    # Remove URLs
+    x = re.sub(r"http[s]?://\S+", "", x)
+    # Remove mentions (@, #)
+    x = re.sub(r"[@#]\S+", "", x)
+    # Remove emojis
+    x = emoji.replace_emoji(x, replace="")
+    # Remove special characters (-, ., :, \, ,)
+    x = re.sub(r"[-.:,\\]", " ", x)
+    # Remove single and double quotes
+    x = re.sub(r"['\"](.*?)['\"]", r'\1', x)
+    # Remove content inside parentheses
+    x = re.sub(r"\(.*?\)", "", x)
+    # Remove extra spaces
+    x = re.sub(r"\s+", " ", x).strip()
+    # Spell checking
+    x = str(TextBlob(x).correct())
+    # Lemmatization using SpaCy
+    x = " ".join([token.lemma_ for token in nlp(x)])
+    return " ".join(x)
+@st.cache_resource
+def load_model():
+    model = keras.models.load_model("model_m3_new.keras")
+    with open("label_encoder_m5.pkl", 'rb') as file:
+        label_encoder = pickle.load(file)
+    return model, label_encoder
+model, label_encoder = load_model()
+def predict_category(text):
+    cleaned_text = pre_process(text)
+    vectorizer = keras.models.load_model("vec_text_m3_new.keras")
+    # Vectorizing the pre-processed text
+    text_vectorized = pad_sequences(vectorizer.predict(np.array([cleaned_text])).numpy(), padding='pre', maxlen=128)
+    # Model prediction
+    prediction = model.predict(text_vectorized)
+    category_idx = np.argmax(prediction, axis=1)[0]
+    return label_encoder.inverse_transform([category_idx])[0], cleaned_text
+# Custom CSS
+st.markdown(
+    """
+    <style>
+        body {
+            background-image: url('https://cdn-uploads.huggingface.co/production/uploads/67441c51a784a9d15cb12871/4FFTjgkYjYUq6w-0gR15v.jpeg');
+            background-size: cover;
+            background-repeat: no-repeat;
+            background-attachment: fixed;
+        }
+        .title {
+            font-size: 60px;
+            font-weight: bold;
+            color: white;
+            background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000);
+            padding: 20px;
+            border-radius: 20px;
+            box-shadow: 0 8px 25px rgba(136, 14, 79, 0.5),
+                        0 4px 15px rgba(74, 35, 90, 0.6);
+            display: inline-block;
+            margin-bottom: 20px;
+            text-align: center;
+            animation: elegantFadeSlide 1.5s ease-out forwards;
+        }
+        .input-box {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            gap: 20px;
+            margin: 0 auto;
+            width: 80%;
+        }
+        .input-prompt {
+            font-size: 22px;
+            font-weight: bold;
+            color: #ffffff;
+            text-align: center;
+            opacity: 0.8;
+        }
+        div.stTextArea textarea {
+            width: 100%;
+            height: 200px;
+            padding: 20px;
+            border-radius: 15px;
+            background-color: rgba(0, 0, 0, 0.7);
+            color: white;
+            font-size: 18px;
+            outline: none;
+            box-shadow: 0 6px 20px rgba(136, 14, 79, 0.3);
+            transition: all 0.5s ease;
+        }
+        div.stTextArea textarea:hover {
+            transform: scale(1.05);
+            box-shadow: 0 10px 30px rgba(136, 14, 79, 0.5);
+        }
+        .analyze-button {
+            width: 200px;
+            height: 60px;
+            border-radius: 30px;
+            background: linear-gradient(45deg, #880E4F, #4A235A, #311B92, #000000);
+            font-size: 20px;
+            font-weight: bold;
+            color: white;
+            border: none;
+            cursor: pointer;
+            transition: all 0.4s ease;
+        }
+        .analyze-button:hover {
+            transform: scale(1.1);
+            box-shadow: 0 12px 35px rgba(49, 27, 146, 0.8);
+        }
+        .result-box {
+            text-align: center;
+            font-size: 28px;
+            font-weight: bold;
+            color: white;
+            background: linear-gradient(60deg, #880E4F, #4A235A, #311B92, #000000);
+            padding: 30px;
+            border-radius: 20px;
+            box-shadow: 0 6px 20px rgba(74, 35, 90, 0.5);
+            margin-top: 30px;
+        }
+    </style>
+    """,
+    unsafe_allow_html=True
+)
+# Streamlit UI layout
+st.markdown('<div class="title">📰 Newsense AI - News Classification</div>', unsafe_allow_html=True)
+# Input and button section
+st.markdown('<div class="input-box">', unsafe_allow_html=True)
+user_input = st.text_area("Enter your news article:", height=200)
+# Predict button
+if st.button("Classify", key="analyze-button"):
+    if user_input:
+        category, cleaned_text = predict_category(user_input)
+        # Display the prediction and cleaned text
+        st.markdown(f'<div class="result-box">Prediction: {category}</div>', unsafe_allow_html=True)
+        st.markdown(f'<div class="result-box">Cleaned Text: {cleaned_text}</div>', unsafe_allow_html=True)
+    else:
+        st.warning("Please enter some text to classify!")
+st.markdown('</div>', unsafe_allow_html=True)