leygit commited on
Commit
88630ff
·
verified ·
1 Parent(s): dd35c69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -14
app.py CHANGED
@@ -165,20 +165,12 @@ def generate_performance_metrics():
165
  def create_interface():
166
  performance_metrics = generate_performance_metrics()
167
  with gr.Blocks() as interface:
168
- with gr.Tab("Demo"):
169
- gr.Markdown("Spam and Phishing Email Detection")
170
  gr.Markdown(
171
  """
172
  Welcome to the Spam and Phishing Email Detection Demo! This tool leverages DistilBERT, a lightweight yet powerful transformer model, to classify emails as ham (legitimate), spam, or phishing based on their content.
173
-
174
- To provide a comprehensive overview of the system, the demo is divided into three key sections:
175
-
176
- Detection Demo – Input an email and see real-time classification results.
177
-
178
- Metrics Analysis – Gain insights into the performance of the model, including accuracy, precision, recall, and F1-score.
179
-
180
- Credits – Acknowledging the datasets, tools, and frameworks that made this project possible.
181
- This project aims to enhance email security by identifying malicious messages with high accuracy, reducing the risk of scams and fraud. Feel free to explore the demo and see how AI is improving cybersecurity!
182
  """)
183
 
184
 
@@ -206,7 +198,7 @@ def create_interface():
206
  outputs=[result_output, confidence_output]
207
  )
208
 
209
- with gr.Tab("Analysis"):
210
  gr.Markdown("## Dataset Overview")
211
  gr.Markdown("### Dataet Headers")
212
  gr.DataFrame(df)
@@ -228,9 +220,91 @@ def create_interface():
228
  gr.Textbox(value=performance_metrics["recall"], label="Recall", interactive=False)
229
  gr.Textbox(value=performance_metrics["f1_score"], label="F1 Score", interactive=False)
230
 
231
- with gr.Tab("Glossary"):
232
- gr.Markdown(" ## Credits and Reference ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
 
 
 
 
 
 
 
 
 
234
  return interface
235
 
236
  # Launch the interface
 
165
  def create_interface():
166
  performance_metrics = generate_performance_metrics()
167
  with gr.Blocks() as interface:
168
+ with gr.Tab(" 📨 Demo"):
169
+ gr.Markdown("📧🔍 Spam and Phishing Email Detection")
170
  gr.Markdown(
171
  """
172
  Welcome to the Spam and Phishing Email Detection Demo! This tool leverages DistilBERT, a lightweight yet powerful transformer model, to classify emails as ham (legitimate), spam, or phishing based on their content.
173
+ This project aims to enhance email security by identifying malicious messages with high accuracy, reducing the risk of scams and fraud. Feel free to explore the demo and see how AI can provide a safer environment for everyone.
 
 
 
 
 
 
 
 
174
  """)
175
 
176
 
 
198
  outputs=[result_output, confidence_output]
199
  )
200
 
201
+ with gr.Tab(" 📈 Analysis"):
202
  gr.Markdown("## Dataset Overview")
203
  gr.Markdown("### Dataet Headers")
204
  gr.DataFrame(df)
 
220
  gr.Textbox(value=performance_metrics["recall"], label="Recall", interactive=False)
221
  gr.Textbox(value=performance_metrics["f1_score"], label="F1 Score", interactive=False)
222
 
223
+ with gr.Tab("📜 Glossary"):
224
+ with gr.Column():
225
+ with gr.Row():
226
+ gr.Markdown(
227
+ """
228
+ ### Label Definitions
229
+ - Spam: Unwanted or potentially harmful emails detected by the system.
230
+ - Ham: Legitimate and safe emails.
231
+
232
+ ### Evaluation Metrics
233
+ - Accuracy: Measures the percentage of correctly classified emails.
234
+ - Precision: Out of all emails classified as spam, how many were actually spam?
235
+ - Recall: Out of all actual spam emails, how many were identified correctly?
236
+ - F1 Score: A balance between precision and recall for overall performance assessment.
237
+
238
+ """
239
+ )
240
+ gr.Column():
241
+ gr.Markdown(" ## 🔍 Libraries Used and Their Objectives")
242
+ gr.Markdown(
243
+ """
244
+ ### 1. Pandas (import pandas as pd)
245
+
246
+ Objective: Data manipulation and preprocessing.
247
+ Justification: Used for loading, cleaning, and structuring the email dataset for analysis and model training.
248
+
249
+ ### 2. NumPy (import numpy as np)
250
+
251
+ Objective: Efficient numerical operations.
252
+ Justification: Facilitates handling large datasets and computations, such as text vectorization and matrix operations.
253
+
254
+ ### 3. Torch & Torch-related Libraries
255
+
256
+ import torch – Core deep learning framework for model training.
257
+ import torch.nn as nn – Defines deep learning model architecture.
258
+ import torch.optim as optim – Implements optimization algorithms.
259
+ import torch.nn.functional as F – Provides additional functions like activation and loss functions.
260
+ from torch.utils.data import Dataset, DataLoader – Handles data batching and loading for model training.
261
+ Justification: Essential for training and fine-tuning DistilBERT on email classification.
262
+
263
+ ### 4. Transformers (from transformers import DistilBertTokenizer, DistilBertForSequenceClassification)
264
+
265
+ Objective: Tokenization and model training using DistilBERT.
266
+ Justification: DistilBERT offers a lighter yet powerful alternative to BERT, improving efficiency while maintaining accuracy.
267
+
268
+ ### 5. Scikit-learn (sklearn)
269
+
270
+ Feature Extraction:
271
+ CountVectorizer: Converts text into a matrix of token counts.
272
+ TfidfVectorizer: Converts text into TF-IDF features, which measure the importance of words in documents.
273
+ Model Training & Evaluation:
274
+ MultinomialNB: Implements the Naïve Bayes classifier for a baseline model.
275
+ train_test_split: Splits the dataset for training and testing.
276
+ classification_report, accuracy_score, precision_score, recall_score, f1_score: Computes evaluation metrics.
277
+ Justification: Used for feature extraction, baseline modeling, and performance evaluation of different models.
278
+
279
+ ### 6. Matplotlib & Seaborn (import matplotlib.pyplot as plt, import seaborn as sns)
280
+
281
+ Objective: Data visualization.
282
+ Justification: Used to visualize word distributions, spam vs. ham comparisons, and model performance metrics.
283
+
284
+ ### 7. Gradio (import gradio as gr)
285
+
286
+ Objective: Building an interactive web-based demo.
287
+ Justification: Allows users to test the spam detection system by inputting emails and viewing real-time predictions.
288
+ """)
289
+ gr.Column():
290
+ gr.Markdown("## 🎉 Thanks & Acknowledgments 🎉")
291
+ gr.Markdown("""
292
+ ### 🙌 Special Thanks to Our Contributors
293
+
294
+ **🔹 Remus**
295
+ - Led **Data Collection & Preprocessing**, ensuring a clean dataset for training.
296
+ - Developed the **Baseline Model**, which served as the foundation for further improvements.
297
+ - Fine-tuned **BERT**, optimizing hyperparameters to enhance accuracy.
298
 
299
+ **🔹 Ashley**
300
+ - Played a key role in **Data Collection & Preprocessing**, improving dataset quality.
301
+ - Successfully handled the **Deployment on Hugging Face**, making the model accessible to users.
302
+ - Implemented and optimized **DistilBERT**, achieving a balance between speed and performance.
303
+
304
+ This project was a collaborative effort, and we appreciate the hard work put into making it a success! 🚀
305
+ """)
306
+
307
+
308
  return interface
309
 
310
  # Launch the interface