Mpavan45 commited on
Commit
d15065c
·
verified ·
1 Parent(s): e18387b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -66
app.py CHANGED
@@ -10,73 +10,12 @@ import tensorflow as tf
10
  import keras
11
  from keras.utils import pad_sequences
12
  import pickle
 
13
 
14
  # Streamlit UI
15
  st.set_page_config(page_title="PressGuard", page_icon="🛡️")
16
 
17
- # Radium color effect for the title
18
- st.markdown("""
19
- <style>
20
- .radium {
21
- font-size: 60px;
22
- font-weight: bold;
23
- color: #f4ff81; /* Radium-like light greenish-yellow color */
24
- text-shadow: 0 0 5px #f4ff81, 0 0 10px #f4ff81, 0 0 20px #f4ff81, 0 0 30px #f4ff81;
25
- text-align: center;
26
- }
27
- .tagline {
28
- font-size: 20px;
29
- color: #ffffff;
30
- text-align: center;
31
- margin-bottom: 30px;
32
- }
33
- </style>
34
- <div class='radium'>🛡️ PressGuard</div>
35
- <div class='tagline'>Classify and Filter Trustworthy News</div>
36
- """, unsafe_allow_html=True)
37
-
38
- # Download necessary resources
39
- nltk.download('punkt')
40
- nltk.download('stopwords')
41
- nltk.download('wordnet')
42
-
43
- # Initialize stopwords and lemmatizer
44
- stop_words = set(stopwords.words('english')).union({"pm"})
45
- lemmatizer = WordNetLemmatizer()
46
-
47
- def pre_process(x):
48
- x = x.lower()
49
- x = re.sub("<.*?>", "", x)
50
- x = re.sub("http[s]?://.+?\\S+", "", x)
51
- x = re.sub("[@#].+?\\S", "", x)
52
- x = re.sub(r"\\_+", " ", x)
53
- x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
54
- x = emoji.demojize(x)
55
- x = re.sub(":.*?:", "", x)
56
- x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
57
- words = word_tokenize(x)
58
- words = [word for word in words if word not in stop_words]
59
- x = " ".join([lemmatizer.lemmatize(word) for word in words])
60
- return x
61
-
62
- @st.cache_resource
63
- def load_model():
64
- model = keras.models.load_model("model_m3_new.keras")
65
- vectorizer = keras.models.load_model("vec_text_m3_new.keras")
66
- with open("label_encoder_m5.pkl", 'rb') as file:
67
- label_encoder = pickle.load(file)
68
- return model, vectorizer, label_encoder
69
-
70
- model, vectorizer, label_encoder = load_model()
71
-
72
- def predict_category(text):
73
- processed_text = [pre_process(text)]
74
- text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128)
75
- prediction = model.predict(text_vectorized)
76
- category_idx = np.argmax(prediction, axis=1)[0]
77
- return label_encoder.inverse_transform([category_idx])[0]
78
-
79
- # Custom CSS with Radium Color Effect for the Prompt
80
  st.markdown(
81
  """
82
  <style>
@@ -86,9 +25,11 @@ st.markdown(
86
  background-repeat: no-repeat;
87
  background-attachment: fixed;
88
  }
 
89
  .centered-container {
90
  text-align: center;
91
  }
 
92
  .title {
93
  font-size: 60px;
94
  font-weight: bold;
@@ -104,7 +45,6 @@ st.markdown(
104
  animation: elegantFadeSlide 1.5s ease-out forwards;
105
  }
106
 
107
- /* Radium Effect for the Prompt */
108
  .prompt-box {
109
  font-size: 22px;
110
  font-weight: bold;
@@ -177,11 +117,71 @@ st.markdown(
177
  """,
178
  unsafe_allow_html=True
179
  )
180
- # Option 3: Using single string (Best for simplicity)
 
181
  st.markdown("<div class='centered-container'><h1 class='title'>PressGuard</h1></div>", unsafe_allow_html=True)
182
  st.markdown("<div class='prompt-box'>Paste the article content below to analyze its category with Newsense AI</div>", unsafe_allow_html=True)
183
 
184
- # User input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  input_text = st.text_area("Enter News Article:", height=200)
186
 
187
  if st.button("Analyze", key="analyze-btn", help="Click to classify the news article"):
 
10
  import keras
11
  from keras.utils import pad_sequences
12
  import pickle
13
+ import os
14
 
15
  # Streamlit UI
16
  st.set_page_config(page_title="PressGuard", page_icon="🛡️")
17
 
18
+ # Background Image and Enhanced Styling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  st.markdown(
20
  """
21
  <style>
 
25
  background-repeat: no-repeat;
26
  background-attachment: fixed;
27
  }
28
+
29
  .centered-container {
30
  text-align: center;
31
  }
32
+
33
  .title {
34
  font-size: 60px;
35
  font-weight: bold;
 
45
  animation: elegantFadeSlide 1.5s ease-out forwards;
46
  }
47
 
 
48
  .prompt-box {
49
  font-size: 22px;
50
  font-weight: bold;
 
117
  """,
118
  unsafe_allow_html=True
119
  )
120
+
121
+ # Title and Prompt
122
  st.markdown("<div class='centered-container'><h1 class='title'>PressGuard</h1></div>", unsafe_allow_html=True)
123
  st.markdown("<div class='prompt-box'>Paste the article content below to analyze its category with Newsense AI</div>", unsafe_allow_html=True)
124
 
125
+ # Check if NLTK resources are already downloaded
126
+ nltk_data_path = os.path.expanduser('~/nltk_data')
127
+ if not os.path.exists(nltk_data_path):
128
+ os.makedirs(nltk_data_path)
129
+
130
+ try:
131
+ nltk.data.find('tokenizers/punkt')
132
+ except LookupError:
133
+ nltk.download('punkt', download_dir=nltk_data_path)
134
+
135
+ try:
136
+ nltk.data.find('corpora/stopwords')
137
+ except LookupError:
138
+ nltk.download('stopwords', download_dir=nltk_data_path)
139
+
140
+ try:
141
+ nltk.data.find('corpora/wordnet')
142
+ except LookupError:
143
+ nltk.download('wordnet', download_dir=nltk_data_path)
144
+
145
+ # Initialize stopwords and lemmatizer
146
+ stop_words = set(stopwords.words('english')).union({"pm"})
147
+ lemmatizer = WordNetLemmatizer()
148
+
149
+ # Preprocessing Function
150
+ def pre_process(x):
151
+ x = x.lower()
152
+ x = re.sub("<.*?>", "", x)
153
+ x = re.sub("http[s]?://.+?\\S+", "", x)
154
+ x = re.sub("[@#].+?\\S", "", x)
155
+ x = re.sub(r"\\_+", " ", x)
156
+ x = re.sub("^[A-Za-z.].*\\s-\\s", "", x)
157
+ x = emoji.demojize(x)
158
+ x = re.sub(":.*?:", "", x)
159
+ x = re.sub("[^a-zA-Z0-9\\s_]", "", x)
160
+ words = word_tokenize(x)
161
+ words = [word for word in words if word not in stop_words]
162
+ x = " ".join([lemmatizer.lemmatize(word) for word in words])
163
+ return x
164
+
165
+ # Load Model
166
+ @st.cache_resource
167
+ def load_model():
168
+ model = keras.models.load_model("model_m3_new.keras")
169
+ vectorizer = keras.models.load_model("vec_text_m3_new.keras")
170
+ with open("label_encoder_m5.pkl", 'rb') as file:
171
+ label_encoder = pickle.load(file)
172
+ return model, vectorizer, label_encoder
173
+
174
+ model, vectorizer, label_encoder = load_model()
175
+
176
+ # Prediction Function
177
+ def predict_category(text):
178
+ processed_text = [pre_process(text)]
179
+ text_vectorized = pad_sequences(vectorizer(processed_text).numpy().tolist(), padding='pre', maxlen=128)
180
+ prediction = model.predict(text_vectorized)
181
+ category_idx = np.argmax(prediction, axis=1)[0]
182
+ return label_encoder.inverse_transform([category_idx])[0]
183
+
184
+ # User Input
185
  input_text = st.text_area("Enter News Article:", height=200)
186
 
187
  if st.button("Analyze", key="analyze-btn", help="Click to classify the news article"):