mimi111222 commited on
Commit
b3b7658
Β·
verified Β·
1 Parent(s): 2df702f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -159
app.py CHANGED
@@ -24,7 +24,7 @@ st.set_page_config(
24
  initial_sidebar_state="collapsed"
25
  )
26
 
27
- # Premium Black & Gold CSS Styling
28
  st.markdown("""
29
  <style>
30
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700;800&display=swap');
@@ -51,10 +51,6 @@ section[data-testid="stSidebar"] {
51
  display: none;
52
  }
53
 
54
- .element-container {
55
- background: transparent !important;
56
- }
57
-
58
  /* Hero Section */
59
  .hero-container {
60
  background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
@@ -124,74 +120,16 @@ section[data-testid="stSidebar"] {
124
  box-shadow: 0 8px 25px rgba(255,215,0,0.4);
125
  position: relative;
126
  z-index: 1;
127
- transition: all 0.3s ease;
128
- }
129
-
130
- .hero-badge:hover {
131
- transform: translateY(-3px);
132
- box-shadow: 0 12px 35px rgba(255,215,0,0.6);
133
  }
134
 
135
- /* Glass Cards */
136
- .glass-card {
137
- background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
138
- backdrop-filter: blur(20px);
139
- border-radius: 24px;
140
- padding: 2.5rem;
141
- margin-bottom: 2rem;
142
- box-shadow: 0 15px 45px rgba(0,0,0,0.5), 0 5px 15px rgba(255,215,0,0.1);
143
- border: 2px solid rgba(218,165,32,0.2);
144
- transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
145
- position: relative;
146
- }
147
-
148
- .glass-card::before {
149
- content: '';
150
- position: absolute;
151
- top: 0;
152
- left: 0;
153
- right: 0;
154
- height: 4px;
155
- background: linear-gradient(90deg, #FFD700 0%, #FFA500 100%);
156
- border-radius: 24px 24px 0 0;
157
- opacity: 0;
158
- transition: opacity 0.3s ease;
159
- }
160
-
161
- .glass-card:hover {
162
- transform: translateY(-8px);
163
- box-shadow: 0 20px 60px rgba(0,0,0,0.6), 0 8px 20px rgba(255,215,0,0.2);
164
- border-color: rgba(218,165,32,0.4);
165
- }
166
-
167
- .glass-card:hover::before {
168
- opacity: 1;
169
- }
170
-
171
- /* Section Headers */
172
- .section-header {
173
- font-size: 1.8rem;
174
  font-weight: 700;
175
- color: #f5f5f5;
176
- margin-bottom: 1.5rem;
177
- display: flex;
178
- align-items: center;
179
- gap: 0.75rem;
180
- position: relative;
181
- z-index: 2;
182
- }
183
-
184
- .section-icon {
185
- width: 40px;
186
- height: 40px;
187
- background: linear-gradient(135deg, #FFD700 0%, #FFA500 100%);
188
- border-radius: 12px;
189
- display: flex;
190
- align-items: center;
191
- justify-content: center;
192
- font-size: 1.5rem;
193
- box-shadow: 0 4px 15px rgba(255,215,0,0.3);
194
- flex-shrink: 0;
195
  }
196
 
197
  /* Stats Grid */
@@ -230,18 +168,12 @@ section[data-testid="stSidebar"] {
230
  box-shadow: 0 15px 40px rgba(255,215,0,0.5);
231
  }
232
 
233
- .stat-card:hover::before {
234
- top: -30%;
235
- right: -30%;
236
- }
237
-
238
  .stat-value {
239
  font-size: 3rem;
240
  font-weight: 900;
241
  margin-bottom: 0.5rem;
242
  position: relative;
243
  z-index: 1;
244
- text-shadow: 0 2px 10px rgba(0,0,0,0.2);
245
  color: #0f0f0f;
246
  }
247
 
@@ -359,14 +291,6 @@ section[data-testid="stSidebar"] {
359
  font-weight: 700;
360
  }
361
 
362
- /* Expanders */
363
- .streamlit-expanderHeader {
364
- background: rgba(218,165,32,0.15);
365
- border-radius: 12px;
366
- font-weight: 600;
367
- color: #f5f5f5;
368
- }
369
-
370
  /* Footer */
371
  .footer {
372
  background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
@@ -400,10 +324,6 @@ section[data-testid="stSidebar"] {
400
  background: rgba(218,165,32,0.1);
401
  }
402
 
403
- .stFileUploader label {
404
- color: #e5e7eb !important;
405
- }
406
-
407
  /* Metric Cards */
408
  .metric-container {
409
  background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
@@ -413,24 +333,12 @@ section[data-testid="stSidebar"] {
413
  box-shadow: 0 2px 8px rgba(0,0,0,0.3);
414
  }
415
 
416
- .metric-container div {
417
- color: #e5e7eb;
418
- }
419
-
420
- /* Dataframe Styling */
421
- .dataframe {
422
  border-radius: 12px;
423
- overflow: hidden;
424
- }
425
-
426
- /* Animations */
427
- @keyframes fadeIn {
428
- from { opacity: 0; transform: translateY(20px); }
429
- to { opacity: 1; transform: translateY(0); }
430
- }
431
-
432
- .glass-card {
433
- animation: fadeIn 0.6s ease forwards;
434
  }
435
 
436
  /* Hide Streamlit Branding */
@@ -448,7 +356,6 @@ def safe_read_csv(path):
448
  try:
449
  return pd.read_csv(path)
450
  except Exception as e:
451
- st.warning(f"Could not read {path}: {e}")
452
  return pd.DataFrame()
453
 
454
  def preprocess_text(text):
@@ -478,40 +385,35 @@ st.markdown("""
478
  main_csv_path = "Phishing_Email.csv"
479
  sample_csv_path = "Phishing_Email_Sample.csv"
480
 
481
- with st.container():
482
- st.markdown('<div class="glass-card">', unsafe_allow_html=True)
483
- st.markdown('<div class="section-header"><div class="section-icon">πŸ“‚</div>Dataset Configuration</div>', unsafe_allow_html=True)
484
-
485
- uploaded_file = st.file_uploader("Upload your phishing dataset (optional)", type=["csv"], help="Upload Phishing_Email.csv for full training")
486
-
487
- if uploaded_file is not None:
488
- df = load_csv_from_bytes(uploaded_file.read())
489
- elif os.path.exists(main_csv_path):
490
- df = safe_read_csv(main_csv_path)
491
- elif os.path.exists(sample_csv_path):
492
- st.info("πŸ“Š Using sample dataset for demonstration")
493
- df = safe_read_csv(sample_csv_path)
494
- else:
495
- st.info("πŸ“Š Using built-in demo dataset")
496
- # FIXED: Enhanced demo dataset with more samples for proper training
497
- df = pd.DataFrame({
498
- "Email Text": [
499
- "Urgent! Your account has been suspended. Click http://fakebank.com to verify.",
500
- "WINNER! Claim your $1000 prize now at http://scam.com before it expires!",
501
- "Hi team, attached is the agenda for tomorrow's meeting. Regards.",
502
- "Hello Umaima, congrats on your results. Let's celebrate this week!",
503
- "Action required: Update your bank password at http://phishingsite.com immediately.",
504
- "Reminder: Project deadline is next Monday. Please submit your updates.",
505
- "Your PayPal account needs verification. Click here: http://fake-paypal.com",
506
- "Thanks for your email. I'll review the document and get back to you tomorrow."
507
- ],
508
- "Email Type": [
509
- "Phishing Email", "Phishing Email", "Safe Email", "Safe Email",
510
- "Phishing Email", "Safe Email", "Phishing Email", "Safe Email"
511
- ]
512
- })
513
-
514
- st.markdown('</div>', unsafe_allow_html=True)
515
 
516
  # Clean & Prepare Dataset
517
  if "Unnamed: 0" in df.columns:
@@ -538,8 +440,7 @@ phishing_count = (df['label'] == 1).sum()
538
  safe_count = (df['label'] == 0).sum()
539
  total_count = len(df)
540
 
541
- st.markdown('<div class="glass-card">', unsafe_allow_html=True)
542
- st.markdown('<div class="section-header"><div class="section-icon">πŸ“Š</div>Dataset Statistics</div>', unsafe_allow_html=True)
543
 
544
  st.markdown(f"""
545
  <div class="stats-grid">
@@ -565,35 +466,27 @@ st.markdown(f"""
565
  with st.expander("πŸ” View Dataset Preview", expanded=False):
566
  st.dataframe(df[[text_col, label_col]].head(10), use_container_width=True)
567
 
568
- st.markdown('</div>', unsafe_allow_html=True)
569
-
570
- # Model Training - ULTIMATE FIX
571
  @st.cache_resource
572
  def train_model(processed_texts, labels, test_size=0.2, random_state=42):
573
- # Check if we have enough samples for stratified split
574
  unique_labels, counts = np.unique(labels, return_counts=True)
575
  min_samples = counts.min()
576
 
577
- # Determine if stratification is safe
578
- # Need at least 2 samples per class AND test_size must allow at least 1 sample per class in split
579
  min_test_samples = int(np.ceil(min_samples * test_size))
580
  min_train_samples = min_samples - min_test_samples
581
 
582
  use_stratify = (min_samples >= 2 and min_train_samples >= 1 and min_test_samples >= 1 and len(unique_labels) > 1)
583
 
584
  if not use_stratify:
585
- # Use simple split without stratification
586
  X_train, X_test, y_train, y_test = train_test_split(
587
  processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
588
  )
589
  else:
590
- # Try stratified split with fallback
591
  try:
592
  X_train, X_test, y_train, y_test = train_test_split(
593
  processed_texts, labels, test_size=test_size, random_state=random_state, stratify=labels
594
  )
595
  except ValueError:
596
- # Fallback to simple split
597
  X_train, X_test, y_train, y_test = train_test_split(
598
  processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
599
  )
@@ -622,8 +515,7 @@ model_info = train_model(df['processed_text'].tolist(), df['label'].values)
622
  vectorizer, model, accuracy = model_info["vectorizer"], model_info["model"], model_info["accuracy"]
623
 
624
  # Model Performance
625
- st.markdown('<div class="glass-card">', unsafe_allow_html=True)
626
- st.markdown('<div class="section-header"><div class="section-icon">🎯</div>Model Performance</div>', unsafe_allow_html=True)
627
 
628
  col1, col2, col3 = st.columns(3)
629
 
@@ -680,11 +572,8 @@ with st.expander("πŸ“ˆ Detailed Metrics & Confusion Matrix"):
680
  report_df = pd.DataFrame(model_info["report"]).transpose().round(3)
681
  st.dataframe(report_df, use_container_width=True, height=200)
682
 
683
- st.markdown('</div>', unsafe_allow_html=True)
684
-
685
  # Inference UI
686
- st.markdown('<div class="glass-card">', unsafe_allow_html=True)
687
- st.markdown('<div class="section-header"><div class="section-icon">βœ‰οΈ</div>Email Threat Scanner</div>', unsafe_allow_html=True)
688
 
689
  col_input, col_hints = st.columns([2, 1])
690
 
@@ -807,8 +696,6 @@ with col_hints:
807
  </div>
808
  """, unsafe_allow_html=True)
809
 
810
- st.markdown('</div>', unsafe_allow_html=True)
811
-
812
  # Footer
813
  st.markdown("""
814
  <div class="footer">
 
24
  initial_sidebar_state="collapsed"
25
  )
26
 
27
+ # Premium Black & Gold CSS Styling - CLEAN VERSION (NO BOXES)
28
  st.markdown("""
29
  <style>
30
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700;800&display=swap');
 
51
  display: none;
52
  }
53
 
 
 
 
 
54
  /* Hero Section */
55
  .hero-container {
56
  background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
 
120
  box-shadow: 0 8px 25px rgba(255,215,0,0.4);
121
  position: relative;
122
  z-index: 1;
 
 
 
 
 
 
123
  }
124
 
125
+ /* Section Headers - CLEAN TEXT ONLY */
126
+ .section-title {
127
+ font-size: 2rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  font-weight: 700;
129
+ color: #FFD700;
130
+ margin: 3rem 0 1.5rem 0;
131
+ text-align: center;
132
+ letter-spacing: 0.5px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  }
134
 
135
  /* Stats Grid */
 
168
  box-shadow: 0 15px 40px rgba(255,215,0,0.5);
169
  }
170
 
 
 
 
 
 
171
  .stat-value {
172
  font-size: 3rem;
173
  font-weight: 900;
174
  margin-bottom: 0.5rem;
175
  position: relative;
176
  z-index: 1;
 
177
  color: #0f0f0f;
178
  }
179
 
 
291
  font-weight: 700;
292
  }
293
 
 
 
 
 
 
 
 
 
294
  /* Footer */
295
  .footer {
296
  background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
 
324
  background: rgba(218,165,32,0.1);
325
  }
326
 
 
 
 
 
327
  /* Metric Cards */
328
  .metric-container {
329
  background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
 
333
  box-shadow: 0 2px 8px rgba(0,0,0,0.3);
334
  }
335
 
336
+ /* Expanders */
337
+ .streamlit-expanderHeader {
338
+ background: rgba(218,165,32,0.15);
 
 
 
339
  border-radius: 12px;
340
+ font-weight: 600;
341
+ color: #f5f5f5;
 
 
 
 
 
 
 
 
 
342
  }
343
 
344
  /* Hide Streamlit Branding */
 
356
  try:
357
  return pd.read_csv(path)
358
  except Exception as e:
 
359
  return pd.DataFrame()
360
 
361
  def preprocess_text(text):
 
385
  main_csv_path = "Phishing_Email.csv"
386
  sample_csv_path = "Phishing_Email_Sample.csv"
387
 
388
+ st.markdown('<div class="section-title">πŸ“‚ Dataset Configuration</div>', unsafe_allow_html=True)
389
+
390
+ uploaded_file = st.file_uploader("Upload your phishing dataset (optional)", type=["csv"], help="Upload Phishing_Email.csv for full training")
391
+
392
+ if uploaded_file is not None:
393
+ df = load_csv_from_bytes(uploaded_file.read())
394
+ elif os.path.exists(main_csv_path):
395
+ df = safe_read_csv(main_csv_path)
396
+ elif os.path.exists(sample_csv_path):
397
+ st.info("πŸ“Š Using sample dataset for demonstration")
398
+ df = safe_read_csv(sample_csv_path)
399
+ else:
400
+ st.info("πŸ“Š Using built-in demo dataset")
401
+ df = pd.DataFrame({
402
+ "Email Text": [
403
+ "Urgent! Your account has been suspended. Click http://fakebank.com to verify.",
404
+ "WINNER! Claim your $1000 prize now at http://scam.com before it expires!",
405
+ "Hi team, attached is the agenda for tomorrow's meeting. Regards.",
406
+ "Hello Umaima, congrats on your results. Let's celebrate this week!",
407
+ "Action required: Update your bank password at http://phishingsite.com immediately.",
408
+ "Reminder: Project deadline is next Monday. Please submit your updates.",
409
+ "Your PayPal account needs verification. Click here: http://fake-paypal.com",
410
+ "Thanks for your email. I'll review the document and get back to you tomorrow."
411
+ ],
412
+ "Email Type": [
413
+ "Phishing Email", "Phishing Email", "Safe Email", "Safe Email",
414
+ "Phishing Email", "Safe Email", "Phishing Email", "Safe Email"
415
+ ]
416
+ })
 
 
 
 
 
417
 
418
  # Clean & Prepare Dataset
419
  if "Unnamed: 0" in df.columns:
 
440
  safe_count = (df['label'] == 0).sum()
441
  total_count = len(df)
442
 
443
+ st.markdown('<div class="section-title">πŸ“Š Dataset Statistics</div>', unsafe_allow_html=True)
 
444
 
445
  st.markdown(f"""
446
  <div class="stats-grid">
 
466
  with st.expander("πŸ” View Dataset Preview", expanded=False):
467
  st.dataframe(df[[text_col, label_col]].head(10), use_container_width=True)
468
 
469
+ # Model Training
 
 
470
  @st.cache_resource
471
  def train_model(processed_texts, labels, test_size=0.2, random_state=42):
 
472
  unique_labels, counts = np.unique(labels, return_counts=True)
473
  min_samples = counts.min()
474
 
 
 
475
  min_test_samples = int(np.ceil(min_samples * test_size))
476
  min_train_samples = min_samples - min_test_samples
477
 
478
  use_stratify = (min_samples >= 2 and min_train_samples >= 1 and min_test_samples >= 1 and len(unique_labels) > 1)
479
 
480
  if not use_stratify:
 
481
  X_train, X_test, y_train, y_test = train_test_split(
482
  processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
483
  )
484
  else:
 
485
  try:
486
  X_train, X_test, y_train, y_test = train_test_split(
487
  processed_texts, labels, test_size=test_size, random_state=random_state, stratify=labels
488
  )
489
  except ValueError:
 
490
  X_train, X_test, y_train, y_test = train_test_split(
491
  processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
492
  )
 
515
  vectorizer, model, accuracy = model_info["vectorizer"], model_info["model"], model_info["accuracy"]
516
 
517
  # Model Performance
518
+ st.markdown('<div class="section-title">🎯 Model Performance</div>', unsafe_allow_html=True)
 
519
 
520
  col1, col2, col3 = st.columns(3)
521
 
 
572
  report_df = pd.DataFrame(model_info["report"]).transpose().round(3)
573
  st.dataframe(report_df, use_container_width=True, height=200)
574
 
 
 
575
  # Inference UI
576
+ st.markdown('<div class="section-title">βœ‰οΈ Email Threat Scanner</div>', unsafe_allow_html=True)
 
577
 
578
  col_input, col_hints = st.columns([2, 1])
579
 
 
696
  </div>
697
  """, unsafe_allow_html=True)
698
 
 
 
699
  # Footer
700
  st.markdown("""
701
  <div class="footer">