Adityaganesh commited on
Commit
14ec450
Β·
verified Β·
1 Parent(s): c161086

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -19
app.py CHANGED
@@ -11,21 +11,20 @@ from nltk.corpus import stopwords
11
  from nltk.stem import WordNetLemmatizer
12
 
13
  # Download necessary resources
14
- nltk.download('punkt_tab')
15
  nltk.download('stopwords')
16
  nltk.download('wordnet')
17
 
18
- import tensorflow
19
  import keras
20
- from keras.utils import pad_sequences
21
 
22
  import pickle
23
 
24
  # Set Streamlit page configuration
25
  st.set_page_config(page_title="News Category Classifier", page_icon="πŸ“°", layout="centered")
26
 
27
-
28
- # Function to set background image
29
  def set_background(image_path):
30
  if not os.path.exists(image_path):
31
  st.error(f"❌ Background image not found: {image_path}")
@@ -36,12 +35,9 @@ def set_background(image_path):
36
 
37
  bg_image_style = f"""
38
  <style>
39
- body {{
40
- background-image: url("data:image/jpg;base64,{encoded_img}");
41
  background-size: cover;
42
- background-repeat: no-repeat;
43
- background-position: center;
44
- background-attachment: fixed;
45
  }}
46
  </style>
47
  """
@@ -55,23 +51,21 @@ set_background("Images/News image.jpg")
55
  stop_words = set(stopwords.words('english')).union({"pm"})
56
  lemmatizer = WordNetLemmatizer()
57
 
58
-
59
  # Preprocessing function
60
  def pre_process(x):
61
  x = x.lower()
62
  x = re.sub("<.*?>", "", x) # Remove HTML tags
63
- x = re.sub("http[s]?://\S+", "", x) # Remove URLs
64
- x = re.sub("[@#]\S+", "", x) # Remove mentions and hashtags
65
  x = re.sub(r"\_+", " ", x) # Replace underscores with space
66
  x = emoji.demojize(x) # Convert emojis to text
67
- x = re.sub(":.*?:", "", x) # Remove emoji text
68
- x = re.sub("[^a-zA-Z0-9\s_]", "", x) # Remove special characters
69
  words = word_tokenize(x)
70
  words = [word for word in words if word not in stop_words]
71
  x = " ".join([lemmatizer.lemmatize(word) for word in words])
72
  return x
73
 
74
-
75
  # Cache model loading to improve performance
76
  @st.cache_resource
77
  def load_model():
@@ -87,11 +81,9 @@ def load_model():
87
 
88
  return model, vectorizer, label_encoder
89
 
90
-
91
  # Load the models
92
  model, vectorizer, label_encoder = load_model()
93
 
94
-
95
  # Prediction function
96
  def predict_category(text):
97
  processed_text = [pre_process(text)]
@@ -100,7 +92,6 @@ def predict_category(text):
100
  category_idx = np.argmax(prediction, axis=1)[0]
101
  return label_encoder.inverse_transform([category_idx])[0]
102
 
103
-
104
  # Streamlit UI
105
  st.markdown(
106
  """
 
11
  from nltk.stem import WordNetLemmatizer
12
 
13
  # Download necessary resources
14
+ nltk.download('punkt')
15
  nltk.download('stopwords')
16
  nltk.download('wordnet')
17
 
18
+ import tensorflow as tf
19
  import keras
20
+ from keras.preprocessing.sequence import pad_sequences
21
 
22
  import pickle
23
 
24
  # Set Streamlit page configuration
25
  st.set_page_config(page_title="News Category Classifier", page_icon="πŸ“°", layout="centered")
26
 
27
+ # Function to set background image correctly
 
28
  def set_background(image_path):
29
  if not os.path.exists(image_path):
30
  st.error(f"❌ Background image not found: {image_path}")
 
35
 
36
  bg_image_style = f"""
37
  <style>
38
+ .stApp {{
39
+ background: url("data:image/jpg;base64,{encoded_img}") no-repeat center center fixed;
40
  background-size: cover;
 
 
 
41
  }}
42
  </style>
43
  """
 
51
  stop_words = set(stopwords.words('english')).union({"pm"})
52
  lemmatizer = WordNetLemmatizer()
53
 
 
54
  # Preprocessing function
55
  def pre_process(x):
56
  x = x.lower()
57
  x = re.sub("<.*?>", "", x) # Remove HTML tags
58
+ x = re.sub(r"http[s]?://\S+", "", x) # Remove URLs
59
+ x = re.sub(r"[@#]\S+", "", x) # Remove mentions and hashtags
60
  x = re.sub(r"\_+", " ", x) # Replace underscores with space
61
  x = emoji.demojize(x) # Convert emojis to text
62
+ x = re.sub(r":.*?:", "", x) # Remove emoji text
63
+ x = re.sub(r"[^a-zA-Z0-9\s_]", "", x) # Remove special characters
64
  words = word_tokenize(x)
65
  words = [word for word in words if word not in stop_words]
66
  x = " ".join([lemmatizer.lemmatize(word) for word in words])
67
  return x
68
 
 
69
  # Cache model loading to improve performance
70
  @st.cache_resource
71
  def load_model():
 
81
 
82
  return model, vectorizer, label_encoder
83
 
 
84
  # Load the models
85
  model, vectorizer, label_encoder = load_model()
86
 
 
87
  # Prediction function
88
  def predict_category(text):
89
  processed_text = [pre_process(text)]
 
92
  category_idx = np.argmax(prediction, axis=1)[0]
93
  return label_encoder.inverse_transform([category_idx])[0]
94
 
 
95
  # Streamlit UI
96
  st.markdown(
97
  """