Spaces:

Adityaganesh
/

News_Classification_prediction

Sleeping

App Files Files Community

Adityaganesh commited on Apr 2, 2025

Commit

14ec450

verified ·

1 Parent(s): c161086

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -19

app.py CHANGED Viewed

@@ -11,21 +11,20 @@ from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
 # Download necessary resources
-nltk.download('punkt_tab')
 nltk.download('stopwords')
 nltk.download('wordnet')
-import tensorflow
 import keras
-from keras.utils import pad_sequences
 import pickle
 # Set Streamlit page configuration
 st.set_page_config(page_title="News Category Classifier", page_icon="📰", layout="centered")
-# Function to set background image
 def set_background(image_path):
     if not os.path.exists(image_path):
         st.error(f"❌ Background image not found: {image_path}")
@@ -36,12 +35,9 @@ def set_background(image_path):
     bg_image_style = f"""
     <style>
-        body {{
-            background-image: url("data:image/jpg;base64,{encoded_img}");
             background-size: cover;
-            background-repeat: no-repeat;
-            background-position: center;
-            background-attachment: fixed;
         }}
     </style>
     """
@@ -55,23 +51,21 @@ set_background("Images/News image.jpg")
 stop_words = set(stopwords.words('english')).union({"pm"})
 lemmatizer = WordNetLemmatizer()
 # Preprocessing function
 def pre_process(x):
     x = x.lower()
     x = re.sub("<.*?>", "", x)  # Remove HTML tags
-    x = re.sub("http[s]?://\S+", "", x)  # Remove URLs
-    x = re.sub("[@#]\S+", "", x)  # Remove mentions and hashtags
     x = re.sub(r"\_+", " ", x)  # Replace underscores with space
     x = emoji.demojize(x)  # Convert emojis to text
-    x = re.sub(":.*?:", "", x)  # Remove emoji text
-    x = re.sub("[^a-zA-Z0-9\s_]", "", x)  # Remove special characters
     words = word_tokenize(x)
     words = [word for word in words if word not in stop_words]
     x = " ".join([lemmatizer.lemmatize(word) for word in words])
     return x
 # Cache model loading to improve performance
 @st.cache_resource
 def load_model():
@@ -87,11 +81,9 @@ def load_model():
     return model, vectorizer, label_encoder
 # Load the models
 model, vectorizer, label_encoder = load_model()
 # Prediction function
 def predict_category(text):
     processed_text = [pre_process(text)]
@@ -100,7 +92,6 @@ def predict_category(text):
     category_idx = np.argmax(prediction, axis=1)[0]
     return label_encoder.inverse_transform([category_idx])[0]
 # Streamlit UI
 st.markdown(
     """

 from nltk.stem import WordNetLemmatizer
 # Download necessary resources
+nltk.download('punkt')
 nltk.download('stopwords')
 nltk.download('wordnet')
+import tensorflow as tf
 import keras
+from keras.preprocessing.sequence import pad_sequences
 import pickle
 # Set Streamlit page configuration
 st.set_page_config(page_title="News Category Classifier", page_icon="📰", layout="centered")
+# Function to set background image correctly
 def set_background(image_path):
     if not os.path.exists(image_path):
         st.error(f"❌ Background image not found: {image_path}")
     bg_image_style = f"""
     <style>
+        .stApp {{
+            background: url("data:image/jpg;base64,{encoded_img}") no-repeat center center fixed;
             background-size: cover;
         }}
     </style>
     """
 stop_words = set(stopwords.words('english')).union({"pm"})
 lemmatizer = WordNetLemmatizer()
 # Preprocessing function
 def pre_process(x):
     x = x.lower()
     x = re.sub("<.*?>", "", x)  # Remove HTML tags
+    x = re.sub(r"http[s]?://\S+", "", x)  # Remove URLs
+    x = re.sub(r"[@#]\S+", "", x)  # Remove mentions and hashtags
     x = re.sub(r"\_+", " ", x)  # Replace underscores with space
     x = emoji.demojize(x)  # Convert emojis to text
+    x = re.sub(r":.*?:", "", x)  # Remove emoji text
+    x = re.sub(r"[^a-zA-Z0-9\s_]", "", x)  # Remove special characters
     words = word_tokenize(x)
     words = [word for word in words if word not in stop_words]
     x = " ".join([lemmatizer.lemmatize(word) for word in words])
     return x
 # Cache model loading to improve performance
 @st.cache_resource
 def load_model():
     return model, vectorizer, label_encoder
 # Load the models
 model, vectorizer, label_encoder = load_model()
 # Prediction function
 def predict_category(text):
     processed_text = [pre_process(text)]
     category_idx = np.argmax(prediction, axis=1)[0]
     return label_encoder.inverse_transform([category_idx])[0]
 # Streamlit UI
 st.markdown(
     """