Spaces:

Adityaganesh
/

News_Classification_prediction

Sleeping

App Files Files Community

Adityaganesh commited on Apr 2, 2025

Commit

c161086

verified ·

1 Parent(s): a4c3d0e

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -21

app.py CHANGED Viewed

@@ -3,11 +3,13 @@ import streamlit as st
 import numpy as np
 import re
 import emoji
 import nltk
 from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 # Download necessary resources
 nltk.download('punkt_tab')
 nltk.download('stopwords')
@@ -19,54 +21,58 @@ from keras.utils import pad_sequences
 import pickle
-# Streamlit UI
 st.set_page_config(page_title="News Category Classifier", page_icon="📰", layout="centered")
 def set_background(image_path):
     with open(image_path, "rb") as img_file:
         encoded_img = base64.b64encode(img_file.read()).decode()
     bg_image_style = f"""
     <style>
-        .stApp::before {{
-            content: "";
-            position: fixed;
-            top: 0;
-            left: 0;
-            width: 100%;
-            height: 100%;
             background-image: url("data:image/jpg;base64,{encoded_img}");
             background-size: cover;
             background-repeat: no-repeat;
             background-position: center;
-            z-index: -1;
         }}
     </style>
     """
     st.markdown(bg_image_style, unsafe_allow_html=True)
-# Update the image path
-set_background("Images/News image.jpg")  # Ensure the image is in the correct folder
 # Initialize stopwords and lemmatizer
 stop_words = set(stopwords.words('english')).union({"pm"})
 lemmatizer = WordNetLemmatizer()
 def pre_process(x):
     x = x.lower()
-    x = re.sub("<.*?>", "", x)
-    x = re.sub("http[s]?://.+?\\S+", "", x)
-    x = re.sub("[@#].+?\\S", "", x)
-    x = re.sub(r"\\_+", " ", x)
-    x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
-    x = emoji.demojize(x)
-    x = re.sub(":.*?:", "", x)
-    x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
     words = word_tokenize(x)
     words = [word for word in words if word not in stop_words]
     x = " ".join([lemmatizer.lemmatize(word) for word in words])
     return x
 @st.cache_resource
 def load_model():
     model_path = "news_model.keras"
@@ -75,12 +81,18 @@ def load_model():
     model = keras.models.load_model(model_path)
     vectorizer = keras.models.load_model(vectorizer_path)
     with open(label_encoder_path, 'rb') as file:
         label_encoder = pickle.load(file)
     return model, vectorizer, label_encoder
 model, vectorizer, label_encoder = load_model()
 def predict_category(text):
     processed_text = [pre_process(text)]
     text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
@@ -88,7 +100,8 @@ def predict_category(text):
     category_idx = np.argmax(prediction, axis=1)[0]
     return label_encoder.inverse_transform([category_idx])[0]
-# UI
 st.markdown(
     """
     <style>
@@ -155,12 +168,15 @@ st.markdown(
     unsafe_allow_html=True
 )
 st.markdown("<div class='title'>📰 News Classifier</div>", unsafe_allow_html=True)
 st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True)
 user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")
-if st.button("Analyze 🍿"):
     if user_input.strip():
         category = predict_category(user_input)
         st.markdown(f"<div class='result-box'><span class='result-text'>🗂️ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True)

 import numpy as np
 import re
 import emoji
+import os
 import nltk
 from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 # Download necessary resources
 nltk.download('punkt_tab')
 nltk.download('stopwords')
 import pickle
+# Set Streamlit page configuration
 st.set_page_config(page_title="News Category Classifier", page_icon="📰", layout="centered")
+# Function to set background image
 def set_background(image_path):
+    if not os.path.exists(image_path):
+        st.error(f"❌ Background image not found: {image_path}")
+        return
     with open(image_path, "rb") as img_file:
         encoded_img = base64.b64encode(img_file.read()).decode()
     bg_image_style = f"""
     <style>
+        body {{
             background-image: url("data:image/jpg;base64,{encoded_img}");
             background-size: cover;
             background-repeat: no-repeat;
             background-position: center;
+            background-attachment: fixed;
         }}
     </style>
     """
     st.markdown(bg_image_style, unsafe_allow_html=True)
+# Set background image
+set_background("Images/News image.jpg")
 # Initialize stopwords and lemmatizer
 stop_words = set(stopwords.words('english')).union({"pm"})
 lemmatizer = WordNetLemmatizer()
+# Preprocessing function
 def pre_process(x):
     x = x.lower()
+    x = re.sub("<.*?>", "", x)  # Remove HTML tags
+    x = re.sub("http[s]?://\S+", "", x)  # Remove URLs
+    x = re.sub("[@#]\S+", "", x)  # Remove mentions and hashtags
+    x = re.sub(r"\_+", " ", x)  # Replace underscores with space
+    x = emoji.demojize(x)  # Convert emojis to text
+    x = re.sub(":.*?:", "", x)  # Remove emoji text
+    x = re.sub("[^a-zA-Z0-9\s_]", "", x)  # Remove special characters
     words = word_tokenize(x)
     words = [word for word in words if word not in stop_words]
     x = " ".join([lemmatizer.lemmatize(word) for word in words])
     return x
+# Cache model loading to improve performance
 @st.cache_resource
 def load_model():
     model_path = "news_model.keras"
     model = keras.models.load_model(model_path)
     vectorizer = keras.models.load_model(vectorizer_path)
     with open(label_encoder_path, 'rb') as file:
         label_encoder = pickle.load(file)
     return model, vectorizer, label_encoder
+# Load the models
 model, vectorizer, label_encoder = load_model()
+# Prediction function
 def predict_category(text):
     processed_text = [pre_process(text)]
     text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
     category_idx = np.argmax(prediction, axis=1)[0]
     return label_encoder.inverse_transform([category_idx])[0]
+# Streamlit UI
 st.markdown(
     """
     <style>
     unsafe_allow_html=True
 )
+# Page title
 st.markdown("<div class='title'>📰 News Classifier</div>", unsafe_allow_html=True)
 st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True)
+# User input
 user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")
+# Button to analyze
+if st.button("Analyze 🍿", key="analyze_button"):
     if user_input.strip():
         category = predict_category(user_input)
         st.markdown(f"<div class='result-box'><span class='result-text'>🗂️ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True)