ABHI010 commited on
Commit
03df076
·
verified ·
1 Parent(s): 53ceef1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -19
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import pandas as pd
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.feature_extraction.text import CountVectorizer
@@ -52,7 +51,7 @@ spam_keywords = [
52
  def highlight_keywords(text):
53
  highlighted = text
54
  for keyword in spam_keywords:
55
- pattern = re.compile(rf"{keyword}", re.IGNORECASE)
56
  highlighted = pattern.sub(f"<span class='highlight'>{keyword}</span>", highlighted)
57
  return highlighted
58
 
@@ -105,6 +104,32 @@ def generate_performance_metrics():
105
  "confusion_matrix_plot": img_base64,
106
  }
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  # Updated CSS
109
  custom_css = """
110
  body {
@@ -115,20 +140,17 @@ body {
115
  background-attachment: fixed;
116
  color: #333;
117
  }
118
-
119
  h1, h2, h3 {
120
  text-align: center;
121
  color: #ffffff;
122
  text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.7);
123
  }
124
-
125
  .gradio-container {
126
  background-color: rgba(255, 255, 255, 0.8);
127
  border-radius: 10px;
128
  padding: 20px;
129
  box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.3);
130
  }
131
-
132
  button {
133
  background-color: #1e90ff;
134
  color: white;
@@ -139,19 +161,16 @@ button {
139
  font-size: 1.2em;
140
  transition: transform 0.2s, background-color 0.3s;
141
  }
142
-
143
  button:hover {
144
  background-color: #1c86ee;
145
  transform: scale(1.05);
146
  }
147
-
148
  .highlight {
149
  background-color: #ffeb3b;
150
  font-weight: bold;
151
  padding: 0 3px;
152
  border-radius: 3px;
153
  }
154
-
155
  .metric {
156
  font-size: 1.2em;
157
  text-align: center;
@@ -204,13 +223,7 @@ def create_interface():
204
  analyze_button.click(
205
  fn=email_analysis_pipeline,
206
  inputs=email_input,
207
- outputs=[
208
- result_output,
209
- confidence_output,
210
- highlighted_output,
211
- keywords_output,
212
- advice_output
213
- ]
214
  )
215
 
216
  gr.Markdown("## 📊 Model Performance Analytics")
@@ -224,6 +237,22 @@ def create_interface():
224
  gr.Markdown("### Confusion Matrix")
225
  gr.HTML(f"<img src='data:image/png;base64,{performance_metrics['confusion_matrix_plot']}' style='max-width: 100%; height: auto;' />")
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  gr.Markdown("## 📘 Glossary and Explanation of Labels")
228
  gr.Markdown(
229
  """
@@ -231,14 +260,19 @@ def create_interface():
231
  - **Spam:** Unwanted or harmful emails flagged by the system.
232
  - **Ham:** Legitimate, safe emails.
233
 
 
 
 
 
 
 
 
 
234
  ### Metrics:
235
- - **Accuracy:** Percentage of correct classifications.
236
  - **Precision:** Out of predicted Spam, how many are actually Spam.
237
  - **Recall:** Out of all actual Spam emails, how many are predicted as Spam.
238
  - **F1 Score:** Harmonic mean of Precision and Recall.
239
-
240
- ### Confusion Matrix:
241
- Shows the distribution of true vs predicted labels.
242
  """
243
  )
244
 
@@ -247,3 +281,4 @@ def create_interface():
247
  # Launch the interface
248
  interface = create_interface()
249
  interface.launch(share=True)
 
 
 
1
  import pandas as pd
2
  from sklearn.model_selection import train_test_split
3
  from sklearn.feature_extraction.text import CountVectorizer
 
51
  def highlight_keywords(text):
52
  highlighted = text
53
  for keyword in spam_keywords:
54
+ pattern = re.compile(rf"(\b{keyword}\b)", re.IGNORECASE)
55
  highlighted = pattern.sub(f"<span class='highlight'>{keyword}</span>", highlighted)
56
  return highlighted
57
 
 
104
  "confusion_matrix_plot": img_base64,
105
  }
106
 
107
+ # Function to add new email data and retrain the model
108
+ def save_and_retrain(email_text, label):
109
+ try:
110
+ # Convert label to numeric value (0 for Ham, 1 for Spam)
111
+ label_numeric = 1 if label == "Spam" else 0
112
+
113
+ # Add the new data to the dataset
114
+ new_data = pd.DataFrame({"text": [email_text], "spam": [label_numeric]})
115
+ global dataset, X, y, model, vectorizer
116
+ dataset = pd.concat([dataset, new_data], ignore_index=True)
117
+
118
+ # Vectorize the updated text data
119
+ X = vectorizer.fit_transform(dataset['text'])
120
+ y = dataset['spam']
121
+
122
+ # Retrain the model
123
+ model.fit(X, y)
124
+
125
+ # Save the updated model and vectorizer
126
+ joblib.dump(model, 'spam_model.pkl')
127
+ joblib.dump(vectorizer, 'spam_vectorizer.pkl')
128
+
129
+ return "Model retrained successfully with new data!"
130
+ except Exception as e:
131
+ return f"Error while retraining: {str(e)}"
132
+
133
  # Updated CSS
134
  custom_css = """
135
  body {
 
140
  background-attachment: fixed;
141
  color: #333;
142
  }
 
143
  h1, h2, h3 {
144
  text-align: center;
145
  color: #ffffff;
146
  text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.7);
147
  }
 
148
  .gradio-container {
149
  background-color: rgba(255, 255, 255, 0.8);
150
  border-radius: 10px;
151
  padding: 20px;
152
  box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.3);
153
  }
 
154
  button {
155
  background-color: #1e90ff;
156
  color: white;
 
161
  font-size: 1.2em;
162
  transition: transform 0.2s, background-color 0.3s;
163
  }
 
164
  button:hover {
165
  background-color: #1c86ee;
166
  transform: scale(1.05);
167
  }
 
168
  .highlight {
169
  background-color: #ffeb3b;
170
  font-weight: bold;
171
  padding: 0 3px;
172
  border-radius: 3px;
173
  }
 
174
  .metric {
175
  font-size: 1.2em;
176
  text-align: center;
 
223
  analyze_button.click(
224
  fn=email_analysis_pipeline,
225
  inputs=email_input,
226
+ outputs=[result_output, confidence_output, highlighted_output, keywords_output, advice_output]
 
 
 
 
 
 
227
  )
228
 
229
  gr.Markdown("## 📊 Model Performance Analytics")
 
237
  gr.Markdown("### Confusion Matrix")
238
  gr.HTML(f"<img src='data:image/png;base64,{performance_metrics['confusion_matrix_plot']}' style='max-width: 100%; height: auto;' />")
239
 
240
+ gr.Markdown("## 🛠️ Save and Retrain the Model")
241
+ with gr.Row():
242
+ email_for_retraining = gr.Textbox(
243
+ lines=8, placeholder="Enter the email content to label as Spam or Ham and retrain", label="Email Content"
244
+ )
245
+ label_input = gr.Radio(["Spam", "Ham"], label="Label", type="value")
246
+
247
+ retrain_button = gr.Button("Save & Retrain Model")
248
+ retrain_result = gr.Textbox(label="Retrain Result", interactive=False)
249
+
250
+ retrain_button.click(
251
+ fn=save_and_retrain,
252
+ inputs=[email_for_retraining, label_input],
253
+ outputs=retrain_result
254
+ )
255
+
256
  gr.Markdown("## 📘 Glossary and Explanation of Labels")
257
  gr.Markdown(
258
  """
 
260
  - **Spam:** Unwanted or harmful emails flagged by the system.
261
  - **Ham:** Legitimate, safe emails.
262
 
263
+ ### Confusion Matrix:
264
+ The confusion matrix shows the performance of the model by comparing the true labels with the predicted ones.
265
+ It consists of:
266
+ - **True Positives (TP):** Correctly predicted spam emails.
267
+ - **True Negatives (TN):** Correctly predicted ham emails.
268
+ - **False Positives (FP):** Ham emails incorrectly predicted as spam.
269
+ - **False Negatives (FN):** Spam emails incorrectly predicted as ham.
270
+
271
  ### Metrics:
272
+ - **Accuracy:** The percentage of correct classifications.
273
  - **Precision:** Out of predicted Spam, how many are actually Spam.
274
  - **Recall:** Out of all actual Spam emails, how many are predicted as Spam.
275
  - **F1 Score:** Harmonic mean of Precision and Recall.
 
 
 
276
  """
277
  )
278
 
 
281
  # Launch the interface
282
  interface = create_interface()
283
  interface.launch(share=True)
284
+