Adityaganesh commited on
Commit
c161086
Β·
verified Β·
1 Parent(s): a4c3d0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -21
app.py CHANGED
@@ -3,11 +3,13 @@ import streamlit as st
3
  import numpy as np
4
  import re
5
  import emoji
 
6
 
7
  import nltk
8
  from nltk.tokenize import word_tokenize
9
  from nltk.corpus import stopwords
10
  from nltk.stem import WordNetLemmatizer
 
11
  # Download necessary resources
12
  nltk.download('punkt_tab')
13
  nltk.download('stopwords')
@@ -19,54 +21,58 @@ from keras.utils import pad_sequences
19
 
20
  import pickle
21
 
22
- # Streamlit UI
23
  st.set_page_config(page_title="News Category Classifier", page_icon="πŸ“°", layout="centered")
24
 
 
 
25
  def set_background(image_path):
 
 
 
 
26
  with open(image_path, "rb") as img_file:
27
  encoded_img = base64.b64encode(img_file.read()).decode()
28
 
29
  bg_image_style = f"""
30
  <style>
31
- .stApp::before {{
32
- content: "";
33
- position: fixed;
34
- top: 0;
35
- left: 0;
36
- width: 100%;
37
- height: 100%;
38
  background-image: url("data:image/jpg;base64,{encoded_img}");
39
  background-size: cover;
40
  background-repeat: no-repeat;
41
  background-position: center;
42
- z-index: -1;
43
  }}
44
  </style>
45
  """
46
  st.markdown(bg_image_style, unsafe_allow_html=True)
47
 
48
- # Update the image path
49
- set_background("Images/News image.jpg") # Ensure the image is in the correct folder
 
50
 
51
  # Initialize stopwords and lemmatizer
52
  stop_words = set(stopwords.words('english')).union({"pm"})
53
  lemmatizer = WordNetLemmatizer()
54
 
 
 
55
  def pre_process(x):
56
  x = x.lower()
57
- x = re.sub("<.*?>", "", x)
58
- x = re.sub("http[s]?://.+?\\S+", "", x)
59
- x = re.sub("[@#].+?\\S", "", x)
60
- x = re.sub(r"\\_+", " ", x)
61
- x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
62
- x = emoji.demojize(x)
63
- x = re.sub(":.*?:", "", x)
64
- x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
65
  words = word_tokenize(x)
66
  words = [word for word in words if word not in stop_words]
67
  x = " ".join([lemmatizer.lemmatize(word) for word in words])
68
  return x
69
 
 
 
70
  @st.cache_resource
71
  def load_model():
72
  model_path = "news_model.keras"
@@ -75,12 +81,18 @@ def load_model():
75
 
76
  model = keras.models.load_model(model_path)
77
  vectorizer = keras.models.load_model(vectorizer_path)
 
78
  with open(label_encoder_path, 'rb') as file:
79
  label_encoder = pickle.load(file)
 
80
  return model, vectorizer, label_encoder
81
 
 
 
82
  model, vectorizer, label_encoder = load_model()
83
 
 
 
84
  def predict_category(text):
85
  processed_text = [pre_process(text)]
86
  text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
@@ -88,7 +100,8 @@ def predict_category(text):
88
  category_idx = np.argmax(prediction, axis=1)[0]
89
  return label_encoder.inverse_transform([category_idx])[0]
90
 
91
- # UI
 
92
  st.markdown(
93
  """
94
  <style>
@@ -155,12 +168,15 @@ st.markdown(
155
  unsafe_allow_html=True
156
  )
157
 
 
158
  st.markdown("<div class='title'>πŸ“° News Classifier</div>", unsafe_allow_html=True)
159
  st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True)
160
 
 
161
  user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")
162
 
163
- if st.button("Analyze 🍿"):
 
164
  if user_input.strip():
165
  category = predict_category(user_input)
166
  st.markdown(f"<div class='result-box'><span class='result-text'>πŸ—‚οΈ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True)
 
3
  import numpy as np
4
  import re
5
  import emoji
6
+ import os
7
 
8
  import nltk
9
  from nltk.tokenize import word_tokenize
10
  from nltk.corpus import stopwords
11
  from nltk.stem import WordNetLemmatizer
12
+
13
  # Download necessary resources
14
  nltk.download('punkt_tab')
15
  nltk.download('stopwords')
 
21
 
22
  import pickle
23
 
24
+ # Set Streamlit page configuration
25
  st.set_page_config(page_title="News Category Classifier", page_icon="πŸ“°", layout="centered")
26
 
27
+
28
+ # Function to set background image
29
  def set_background(image_path):
30
+ if not os.path.exists(image_path):
31
+ st.error(f"❌ Background image not found: {image_path}")
32
+ return
33
+
34
  with open(image_path, "rb") as img_file:
35
  encoded_img = base64.b64encode(img_file.read()).decode()
36
 
37
  bg_image_style = f"""
38
  <style>
39
+ body {{
 
 
 
 
 
 
40
  background-image: url("data:image/jpg;base64,{encoded_img}");
41
  background-size: cover;
42
  background-repeat: no-repeat;
43
  background-position: center;
44
+ background-attachment: fixed;
45
  }}
46
  </style>
47
  """
48
  st.markdown(bg_image_style, unsafe_allow_html=True)
49
 
50
+ # Set background image
51
+ set_background("Images/News image.jpg")
52
+
53
 
54
  # Initialize stopwords and lemmatizer
55
  stop_words = set(stopwords.words('english')).union({"pm"})
56
  lemmatizer = WordNetLemmatizer()
57
 
58
+
59
+ # Preprocessing function
60
  def pre_process(x):
61
  x = x.lower()
62
+ x = re.sub("<.*?>", "", x) # Remove HTML tags
63
+ x = re.sub("http[s]?://\S+", "", x) # Remove URLs
64
+ x = re.sub("[@#]\S+", "", x) # Remove mentions and hashtags
65
+ x = re.sub(r"\_+", " ", x) # Replace underscores with space
66
+ x = emoji.demojize(x) # Convert emojis to text
67
+ x = re.sub(":.*?:", "", x) # Remove emoji text
68
+ x = re.sub("[^a-zA-Z0-9\s_]", "", x) # Remove special characters
 
69
  words = word_tokenize(x)
70
  words = [word for word in words if word not in stop_words]
71
  x = " ".join([lemmatizer.lemmatize(word) for word in words])
72
  return x
73
 
74
+
75
+ # Cache model loading to improve performance
76
  @st.cache_resource
77
  def load_model():
78
  model_path = "news_model.keras"
 
81
 
82
  model = keras.models.load_model(model_path)
83
  vectorizer = keras.models.load_model(vectorizer_path)
84
+
85
  with open(label_encoder_path, 'rb') as file:
86
  label_encoder = pickle.load(file)
87
+
88
  return model, vectorizer, label_encoder
89
 
90
+
91
+ # Load the models
92
  model, vectorizer, label_encoder = load_model()
93
 
94
+
95
+ # Prediction function
96
  def predict_category(text):
97
  processed_text = [pre_process(text)]
98
  text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=82)
 
100
  category_idx = np.argmax(prediction, axis=1)[0]
101
  return label_encoder.inverse_transform([category_idx])[0]
102
 
103
+
104
+ # Streamlit UI
105
  st.markdown(
106
  """
107
  <style>
 
168
  unsafe_allow_html=True
169
  )
170
 
171
+ # Page title
172
  st.markdown("<div class='title'>πŸ“° News Classifier</div>", unsafe_allow_html=True)
173
  st.markdown("<div class='subtitle'>Enter a news headline or article snippet to analyze its category.</div>", unsafe_allow_html=True)
174
 
175
+ # User input
176
  user_input = st.text_area("Enter text here:", height=150, placeholder="Type your news text here...")
177
 
178
+ # Button to analyze
179
+ if st.button("Analyze 🍿", key="analyze_button"):
180
  if user_input.strip():
181
  category = predict_category(user_input)
182
  st.markdown(f"<div class='result-box'><span class='result-text'>πŸ—‚οΈ Predicted Category: <strong>{category}</strong></span></div>", unsafe_allow_html=True)