mimi111222 commited on
Commit
844ac8e
Β·
verified Β·
1 Parent(s): 68431df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +373 -191
app.py CHANGED
@@ -12,6 +12,8 @@ from sklearn.model_selection import train_test_split
12
  from sklearn.feature_extraction.text import TfidfVectorizer
13
  from sklearn.linear_model import LogisticRegression
14
  from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
 
 
15
  import matplotlib.pyplot as plt
16
  import seaborn as sns
17
  import io
@@ -19,12 +21,20 @@ import os
19
 
20
  # Page Configuration
21
  st.set_page_config(
22
- page_title="AI Phishing Shield – by Umaima Qureshi",
23
  layout="wide",
24
  initial_sidebar_state="collapsed"
25
  )
26
 
27
- # Premium Black & Gold CSS Styling - ENHANCED VERSION
 
 
 
 
 
 
 
 
28
  st.markdown("""
29
  <style>
30
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700;800;900&display=swap');
@@ -51,16 +61,13 @@ section[data-testid="stSidebar"] {
51
  display: none;
52
  }
53
 
54
- /* Hero Section - Enhanced */
55
  .hero-container {
56
  background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
57
  border-radius: 32px;
58
  padding: 4rem 3rem;
59
  margin-bottom: 3rem;
60
- box-shadow:
61
- 0 25px 70px rgba(0,0,0,0.6),
62
- 0 10px 30px rgba(218,165,32,0.25),
63
- inset 0 1px 0 rgba(255,255,255,0.1);
64
  position: relative;
65
  overflow: hidden;
66
  border: 2px solid rgba(218,165,32,0.4);
@@ -105,7 +112,6 @@ section[data-testid="stSidebar"] {
105
  z-index: 1;
106
  letter-spacing: -0.03em;
107
  filter: drop-shadow(0 4px 20px rgba(255,215,0,0.4));
108
- text-shadow: 0 0 80px rgba(255,215,0,0.3);
109
  }
110
 
111
  .hero-subtitle {
@@ -137,9 +143,7 @@ section[data-testid="stSidebar"] {
137
  font-size: 1.05rem;
138
  font-weight: 700;
139
  margin-top: 1.8rem;
140
- box-shadow:
141
- 0 8px 25px rgba(255,215,0,0.5),
142
- 0 0 40px rgba(255,215,0,0.3);
143
  position: relative;
144
  z-index: 1;
145
  transition: all 0.3s ease;
@@ -147,9 +151,7 @@ section[data-testid="stSidebar"] {
147
 
148
  .hero-badge:hover {
149
  transform: translateY(-2px);
150
- box-shadow:
151
- 0 12px 35px rgba(255,215,0,0.6),
152
- 0 0 50px rgba(255,215,0,0.4);
153
  }
154
 
155
  /* Section Headers */
@@ -178,7 +180,7 @@ section[data-testid="stSidebar"] {
178
  border-radius: 2px;
179
  }
180
 
181
- /* Stats Grid - Enhanced */
182
  .stats-grid {
183
  display: grid;
184
  grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
@@ -192,10 +194,7 @@ section[data-testid="stSidebar"] {
192
  border-radius: 24px;
193
  text-align: center;
194
  color: #0f0f0f;
195
- box-shadow:
196
- 0 10px 30px rgba(255,215,0,0.35),
197
- 0 0 40px rgba(255,215,0,0.2),
198
- inset 0 1px 0 rgba(255,255,255,0.3);
199
  transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
200
  position: relative;
201
  overflow: hidden;
@@ -220,10 +219,7 @@ section[data-testid="stSidebar"] {
220
 
221
  .stat-card:hover {
222
  transform: translateY(-10px) scale(1.03);
223
- box-shadow:
224
- 0 20px 50px rgba(255,215,0,0.5),
225
- 0 0 60px rgba(255,215,0,0.3),
226
- inset 0 1px 0 rgba(255,255,255,0.4);
227
  }
228
 
229
  .stat-value {
@@ -247,7 +243,7 @@ section[data-testid="stSidebar"] {
247
  color: #0f0f0f;
248
  }
249
 
250
- /* Input Areas - Enhanced */
251
  .stTextArea textarea {
252
  border-radius: 18px;
253
  border: 2px solid rgba(218,165,32,0.35);
@@ -265,7 +261,7 @@ section[data-testid="stSidebar"] {
265
  background: rgba(26,26,26,0.95) !important;
266
  }
267
 
268
- /* Buttons - Enhanced */
269
  .stButton > button {
270
  background: linear-gradient(135deg, #FFD700 0%, #FFA500 100%);
271
  color: #0f0f0f;
@@ -275,9 +271,7 @@ section[data-testid="stSidebar"] {
275
  font-size: 1.15rem;
276
  font-weight: 700;
277
  transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
278
- box-shadow:
279
- 0 4px 15px rgba(255,215,0,0.4),
280
- 0 0 30px rgba(255,215,0,0.2);
281
  width: 100%;
282
  letter-spacing: 0.5px;
283
  position: relative;
@@ -304,42 +298,22 @@ section[data-testid="stSidebar"] {
304
 
305
  .stButton > button:hover {
306
  transform: translateY(-3px);
307
- box-shadow:
308
- 0 8px 25px rgba(255,215,0,0.6),
309
- 0 0 50px rgba(255,215,0,0.3);
310
  }
311
 
312
  .stButton > button:active {
313
  transform: translateY(-1px);
314
  }
315
 
316
- /* Alert Boxes - Enhanced */
317
- .alert-danger {
318
- background: linear-gradient(135deg, #ef4444 0%, #dc2626 100%);
319
- color: white;
320
  padding: 2rem;
321
  border-radius: 20px;
322
  font-size: 1.1rem;
323
  font-weight: 600;
324
- box-shadow:
325
- 0 10px 30px rgba(239,68,68,0.4),
326
- 0 0 50px rgba(239,68,68,0.2);
327
  margin: 1.5rem 0;
328
  border: 2px solid rgba(255,255,255,0.1);
329
- }
330
-
331
- .alert-success {
332
- background: linear-gradient(135deg, #10b981 0%, #059669 100%);
333
  color: white;
334
- padding: 2rem;
335
- border-radius: 20px;
336
- font-size: 1.1rem;
337
- font-weight: 600;
338
- box-shadow:
339
- 0 10px 30px rgba(16,185,129,0.4),
340
- 0 0 50px rgba(16,185,129,0.2);
341
- margin: 1.5rem 0;
342
- border: 2px solid rgba(255,255,255,0.1);
343
  }
344
 
345
  .confidence-bar {
@@ -359,15 +333,13 @@ section[data-testid="stSidebar"] {
359
  box-shadow: 0 0 10px rgba(255,255,255,0.5);
360
  }
361
 
362
- /* Hints Panel - Enhanced */
363
  .hints-panel {
364
  background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
365
  border-radius: 20px;
366
  padding: 2rem;
367
  border-left: 5px solid #FFD700;
368
- box-shadow:
369
- 0 4px 15px rgba(0,0,0,0.4),
370
- inset 0 1px 0 rgba(255,255,255,0.05);
371
  backdrop-filter: blur(10px);
372
  }
373
 
@@ -395,26 +367,22 @@ section[data-testid="stSidebar"] {
395
  box-shadow: 0 2px 8px rgba(255,215,0,0.4);
396
  }
397
 
398
- /* Metric Cards - Enhanced */
399
  .metric-container {
400
  background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
401
  padding: 1.8rem;
402
  border-radius: 16px;
403
  border-left: 5px solid #FFD700;
404
- box-shadow:
405
- 0 4px 12px rgba(0,0,0,0.4),
406
- inset 0 1px 0 rgba(255,255,255,0.05);
407
  transition: all 0.3s ease;
408
  }
409
 
410
  .metric-container:hover {
411
  transform: translateY(-2px);
412
- box-shadow:
413
- 0 6px 18px rgba(0,0,0,0.5),
414
- inset 0 1px 0 rgba(255,255,255,0.08);
415
  }
416
 
417
- /* File Uploader - Enhanced */
418
  .stFileUploader {
419
  border: 2px dashed rgba(218,165,32,0.45);
420
  border-radius: 18px;
@@ -429,7 +397,7 @@ section[data-testid="stSidebar"] {
429
  box-shadow: 0 0 20px rgba(255,215,0,0.15);
430
  }
431
 
432
- /* Expanders - Enhanced */
433
  .streamlit-expanderHeader {
434
  background: linear-gradient(135deg, rgba(218,165,32,0.2) 0%, rgba(218,165,32,0.1) 100%) !important;
435
  border-radius: 14px !important;
@@ -489,7 +457,7 @@ section[data-testid="stSidebar"] {
489
  color: #e5e7eb !important;
490
  }
491
 
492
- /* Footer - Enhanced */
493
  .footer {
494
  background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
495
  border-radius: 20px;
@@ -497,9 +465,7 @@ section[data-testid="stSidebar"] {
497
  text-align: center;
498
  margin-top: 4rem;
499
  color: #9ca3af;
500
- box-shadow:
501
- 0 8px 24px rgba(0,0,0,0.4),
502
- inset 0 1px 0 rgba(255,255,255,0.05);
503
  border: 2px solid rgba(218,165,32,0.3);
504
  }
505
 
@@ -560,23 +526,136 @@ def safe_read_csv(path):
560
  except Exception as e:
561
  return pd.DataFrame()
562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  def preprocess_text(text):
 
564
  if not isinstance(text, str):
565
  text = str(text)
566
  text = text.lower()
567
- text = re.sub(r'http\S+|www\S+|https\S+', ' url ', text)
568
- text = re.sub(r'\S+@\S+', ' email ', text)
 
 
 
 
569
  text = re.sub(r'[^a-z\s]', ' ', text)
570
  text = re.sub(r'\s+', ' ', text).strip()
571
  return text
572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  # Hero Header
574
  st.markdown("""
575
  <div class="hero-container">
576
  <div class="hero-title">πŸ›‘οΈ AI Phishing Shield</div>
577
  <div class="hero-subtitle">Advanced Machine Learning Protection Against Email Threats</div>
578
  <div class="hero-description">
579
- Powered by TF-IDF vectorization and Logistic Regression, trained on thousands of real-world phishing examples.
580
  Get instant threat analysis with confidence scoring and explainable AI insights.
581
  </div>
582
  <div class="hero-badge">⚑ Developed by Umaima Qureshi</div>
@@ -617,6 +696,12 @@ else:
617
  ]
618
  })
619
 
 
 
 
 
 
 
620
  # Clean & Prepare Dataset
621
  if "Unnamed: 0" in df.columns:
622
  df = df.drop(columns=["Unnamed: 0"])
@@ -671,6 +756,7 @@ with st.expander("πŸ” View Dataset Preview", expanded=False):
671
  # Model Training
672
  @st.cache_resource
673
  def train_model(processed_texts, labels, test_size=0.2, random_state=42):
 
674
  unique_labels, counts = np.unique(labels, return_counts=True)
675
  min_samples = counts.min()
676
 
@@ -693,18 +779,31 @@ def train_model(processed_texts, labels, test_size=0.2, random_state=42):
693
  processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
694
  )
695
 
696
- vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,2))
 
 
 
 
 
 
 
697
  X_train_vec = vectorizer.fit_transform(X_train)
698
  X_test_vec = vectorizer.transform(X_test)
699
-
700
- model = LogisticRegression(max_iter=1000, solver='liblinear')
 
 
 
 
 
 
701
  model.fit(X_train_vec, y_train)
702
-
703
  y_pred = model.predict(X_test_vec)
704
  acc = accuracy_score(y_test, y_pred)
705
  cm = confusion_matrix(y_test, y_pred)
706
  report = classification_report(y_test, y_pred, output_dict=True, zero_division=0)
707
-
708
  return {
709
  "vectorizer": vectorizer,
710
  "model": model,
@@ -713,8 +812,17 @@ def train_model(processed_texts, labels, test_size=0.2, random_state=42):
713
  "report": report
714
  }
715
 
716
- model_info = train_model(df['processed_text'].tolist(), df['label'].values)
717
- vectorizer, model, accuracy = model_info["vectorizer"], model_info["model"], model_info["accuracy"]
 
 
 
 
 
 
 
 
 
718
 
719
  # Model Performance
720
  st.markdown('<div class="section-title">🎯 Model Performance</div>', unsafe_allow_html=True)
@@ -747,53 +855,23 @@ with col3:
747
  </div>
748
  """, unsafe_allow_html=True)
749
 
750
- # Fixed Confusion Matrix Section - No re-rendering
751
  with st.expander("πŸ“ˆ Detailed Metrics & Confusion Matrix"):
752
  col_matrix, col_report = st.columns([1, 1.5])
753
 
754
  with col_matrix:
755
- # Use container to prevent re-rendering
756
- with st.container():
757
- # Set dark theme for matplotlib
758
- plt.style.use('dark_background')
759
- fig, ax = plt.subplots(figsize=(5, 4), facecolor='#1a1a1a')
760
- ax.set_facecolor('#1a1a1a')
761
-
762
- sns.heatmap(
763
- model_info["confusion_matrix"],
764
- annot=True,
765
- fmt="d",
766
- ax=ax,
767
- cmap="YlOrBr",
768
- cbar=True,
769
- square=True,
770
- annot_kws={"size": 16, "weight": "bold", "color": "#0f0f0f"},
771
- linewidths=2,
772
- linecolor='#0f0f0f',
773
- cbar_kws={'label': 'Count', 'shrink': 0.8}
774
- )
775
-
776
- ax.set_xlabel("Predicted", fontsize=11, fontweight='bold', color='#FFD700')
777
- ax.set_ylabel("Actual", fontsize=11, fontweight='bold', color='#FFD700')
778
- ax.set_xticklabels(["Safe", "Phishing"], fontsize=10, color='#e5e7eb')
779
- ax.set_yticklabels(["Safe", "Phishing"], fontsize=10, rotation=0, color='#e5e7eb')
780
- ax.set_title("Confusion Matrix", fontsize=13, fontweight='bold', pad=12, color='#FFD700')
781
-
782
- # Style the colorbar
783
- cbar = ax.collections[0].colorbar
784
- cbar.ax.yaxis.set_tick_params(color='#e5e7eb')
785
- plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='#e5e7eb')
786
-
787
- plt.tight_layout()
788
- st.pyplot(fig, use_container_width=True)
789
- plt.close(fig) # Close figure to prevent memory leak
790
 
791
  with col_report:
792
  st.markdown("**πŸ“Š Classification Report:**")
793
  report_df = pd.DataFrame(model_info["report"]).transpose().round(3)
794
  st.dataframe(
795
- report_df,
796
- use_container_width=True,
797
  height=250
798
  )
799
 
@@ -817,82 +895,169 @@ with col_input:
817
  email_input = uploaded_txt.read().decode("utf-8", errors="ignore")
818
  except Exception:
819
  email_input = str(uploaded_txt.getvalue())
820
-
821
  if st.button("πŸ” Analyze Email Threat"):
822
  if not email_input.strip():
823
  st.warning("⚠️ Please paste or upload email content to analyze")
824
  else:
825
- with st.spinner("πŸ” Analyzing email threat..."):
826
- processed_input = preprocess_text(email_input)
827
- input_vec = vectorizer.transform([processed_input])
828
-
829
- try:
830
- proba = model.predict_proba(input_vec)[0][1]
831
- except Exception:
 
 
832
  try:
833
- score = model.decision_function(input_vec)[0]
834
- proba = 1/(1+np.exp(-score))
835
- except Exception:
836
- proba = None
837
-
838
- pred = model.predict(input_vec)[0]
839
-
840
- if pred == 1:
841
- conf_pct = f"{proba:.1%}" if proba is not None else "N/A"
842
- st.markdown(f"""
843
- <div class="alert-danger">
844
- <div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 0.75rem;">
845
- <div style="font-size: 2.5rem;">⚠️</div>
846
- <div>
847
- <div style="font-size: 1.5rem; font-weight: 800; letter-spacing: 0.5px;">PHISHING DETECTED</div>
848
- <div style="font-size: 1.05rem; opacity: 0.95; margin-top: 0.25rem;">Threat Confidence: {conf_pct}</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
  </div>
850
- </div>
851
- <div class="confidence-bar">
852
- <div class="confidence-fill" style="width: {proba*100 if proba else 0}%;"></div>
853
- </div>
854
- </div>
855
- """, unsafe_allow_html=True)
856
-
857
- st.markdown("**πŸ” Threat Indicators Detected:**")
858
- indicators = []
859
- if "url" in processed_input:
860
- indicators.append("πŸ”— Suspicious URL tokens detected")
861
- if re.search(r'\b(urgent|immediately|verify|password|suspended|click|act now|action required)\b', processed_input):
862
- indicators.append("⚑ Urgency manipulation tactics")
863
- if re.search(r'\b(bank|account|verify|login|password|security|credential|paypal)\b', processed_input):
864
- indicators.append("🏦 Financial/security keywords present")
865
- if re.search(r'\b(winner|prize|congratulations|claim|free|won)\b', processed_input):
866
- indicators.append("🎁 Reward/prize baiting language")
867
- if re.search(r'\b(confirm|update|validate|unlock|restore)\b', processed_input):
868
- indicators.append("πŸ” Account action requests")
869
-
870
- for indicator in indicators:
871
- st.markdown(f"- {indicator}")
872
-
873
- if not indicators:
874
- st.markdown("- ⚠️ Content pattern matches known phishing templates")
875
-
876
- st.error("🚨 **Recommendation:** Do NOT click any links. Delete this email immediately and report to your IT security team.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877
 
878
- else:
879
- conf_pct = f"{(1-proba):.1%}" if proba is not None else "N/A"
880
- st.markdown(f"""
881
- <div class="alert-success">
882
- <div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 0.75rem;">
883
- <div style="font-size: 2.5rem;">βœ…</div>
884
- <div>
885
- <div style="font-size: 1.5rem; font-weight: 800; letter-spacing: 0.5px;">EMAIL APPEARS SAFE</div>
886
- <div style="font-size: 1.05rem; opacity: 0.95; margin-top: 0.25rem;">Safety Confidence: {conf_pct}</div>
 
 
 
 
 
 
887
  </div>
888
- </div>
889
- <div class="confidence-bar">
890
- <div class="confidence-fill" style="width: {(1-proba)*100 if proba else 100}%;"></div>
891
- </div>
892
- </div>
893
- """, unsafe_allow_html=True)
894
- st.markdown("**βœ“ No obvious threat indicators found in content analysis**")
895
- st.info("πŸ’‘ **Best Practice:** Always verify sender identity through known contact methods and be cautious with unexpected emails, even if they appear safe.")
 
 
 
 
 
 
 
896
 
897
  with col_hints:
898
  st.markdown("""
@@ -924,6 +1089,11 @@ with col_hints:
924
  <div><strong>Prize/reward</strong> language is a common phishing tactic</div>
925
  </div>
926
 
 
 
 
 
 
927
  <div class="hint-item">
928
  <div class="hint-icon">⚠️</div>
929
  <div><strong>Limitations:</strong> This tool analyzes text content only. Always verify sender identity separately.</div>
@@ -931,6 +1101,18 @@ with col_hints:
931
  </div>
932
  """, unsafe_allow_html=True)
933
 
 
 
 
 
 
 
 
 
 
 
 
 
934
  # Additional Tips Section
935
  st.markdown('<div class="section-title">πŸ’‘ Phishing Protection Tips</div>', unsafe_allow_html=True)
936
 
@@ -938,8 +1120,8 @@ col_tip1, col_tip2, col_tip3 = st.columns(3)
938
 
939
  with col_tip1:
940
  st.markdown("""
941
- <div style="background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
942
- padding: 1.5rem; border-radius: 16px; border-left: 4px solid #FFD700;
943
  box-shadow: 0 4px 15px rgba(0,0,0,0.3); height: 100%;">
944
  <div style="font-size: 2rem; margin-bottom: 0.75rem;">πŸ”</div>
945
  <div style="font-weight: 700; font-size: 1.1rem; color: #FFD700; margin-bottom: 0.75rem;">Verify Sender</div>
@@ -951,8 +1133,8 @@ with col_tip1:
951
 
952
  with col_tip2:
953
  st.markdown("""
954
- <div style="background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
955
- padding: 1.5rem; border-radius: 16px; border-left: 4px solid #FFD700;
956
  box-shadow: 0 4px 15px rgba(0,0,0,0.3); height: 100%;">
957
  <div style="font-size: 2rem; margin-bottom: 0.75rem;">πŸ”—</div>
958
  <div style="font-weight: 700; font-size: 1.1rem; color: #FFD700; margin-bottom: 0.75rem;">Hover Links</div>
@@ -964,8 +1146,8 @@ with col_tip2:
964
 
965
  with col_tip3:
966
  st.markdown("""
967
- <div style="background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
968
- padding: 1.5rem; border-radius: 16px; border-left: 4px solid #FFD700;
969
  box-shadow: 0 4px 15px rgba(0,0,0,0.3); height: 100%;">
970
  <div style="font-size: 2rem; margin-bottom: 0.75rem;">πŸ“ž</div>
971
  <div style="font-weight: 700; font-size: 1.1rem; color: #FFD700; margin-bottom: 0.75rem;">Contact Directly</div>
@@ -986,7 +1168,7 @@ st.markdown("""
986
  For production use: Implement additional verification layers, link scanning, attachment analysis, and human oversight
987
  </div>
988
  <div style="margin-top: 1.5rem; padding-top: 1.5rem; border-top: 1px solid rgba(218,165,32,0.2); font-size: 0.9rem; color: #6b7280;">
989
- Powered by TF-IDF β€’ Logistic Regression β€’ Scikit-learn β€’ Streamlit
990
  </div>
991
  <div style="margin-top: 1rem; font-size: 0.85rem; color: #6b7280;">
992
  Β© 2024 AI Phishing Shield | All Rights Reserved
 
12
  from sklearn.feature_extraction.text import TfidfVectorizer
13
  from sklearn.linear_model import LogisticRegression
14
  from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
15
+ import matplotlib
16
+ matplotlib.use('Agg') # Use non-interactive backend
17
  import matplotlib.pyplot as plt
18
  import seaborn as sns
19
  import io
 
21
 
22
  # Page Configuration
23
  st.set_page_config(
24
+ page_title="AI Phishing Shield – by Umaima Qureshi",
25
  layout="wide",
26
  initial_sidebar_state="collapsed"
27
  )
28
 
29
+ # Initialize Session State
30
+ if 'model_trained' not in st.session_state:
31
+ st.session_state.model_trained = False
32
+ if 'analysis_history' not in st.session_state:
33
+ st.session_state.analysis_history = []
34
+ if 'cm_plot_cached' not in st.session_state:
35
+ st.session_state.cm_plot_cached = None
36
+
37
+ # Premium Black & Gold CSS Styling
38
  st.markdown("""
39
  <style>
40
  @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700;800;900&display=swap');
 
61
  display: none;
62
  }
63
 
64
+ /* Hero Section */
65
  .hero-container {
66
  background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
67
  border-radius: 32px;
68
  padding: 4rem 3rem;
69
  margin-bottom: 3rem;
70
+ box-shadow: 0 25px 70px rgba(0,0,0,0.6), 0 10px 30px rgba(218,165,32,0.25), inset 0 1px 0 rgba(255,255,255,0.1);
 
 
 
71
  position: relative;
72
  overflow: hidden;
73
  border: 2px solid rgba(218,165,32,0.4);
 
112
  z-index: 1;
113
  letter-spacing: -0.03em;
114
  filter: drop-shadow(0 4px 20px rgba(255,215,0,0.4));
 
115
  }
116
 
117
  .hero-subtitle {
 
143
  font-size: 1.05rem;
144
  font-weight: 700;
145
  margin-top: 1.8rem;
146
+ box-shadow: 0 8px 25px rgba(255,215,0,0.5), 0 0 40px rgba(255,215,0,0.3);
 
 
147
  position: relative;
148
  z-index: 1;
149
  transition: all 0.3s ease;
 
151
 
152
  .hero-badge:hover {
153
  transform: translateY(-2px);
154
+ box-shadow: 0 12px 35px rgba(255,215,0,0.6), 0 0 50px rgba(255,215,0,0.4);
 
 
155
  }
156
 
157
  /* Section Headers */
 
180
  border-radius: 2px;
181
  }
182
 
183
+ /* Stats Grid */
184
  .stats-grid {
185
  display: grid;
186
  grid-template-columns: repeat(auto-fit, minmax(240px, 1fr));
 
194
  border-radius: 24px;
195
  text-align: center;
196
  color: #0f0f0f;
197
+ box-shadow: 0 10px 30px rgba(255,215,0,0.35), 0 0 40px rgba(255,215,0,0.2), inset 0 1px 0 rgba(255,255,255,0.3);
 
 
 
198
  transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
199
  position: relative;
200
  overflow: hidden;
 
219
 
220
  .stat-card:hover {
221
  transform: translateY(-10px) scale(1.03);
222
+ box-shadow: 0 20px 50px rgba(255,215,0,0.5), 0 0 60px rgba(255,215,0,0.3), inset 0 1px 0 rgba(255,255,255,0.4);
 
 
 
223
  }
224
 
225
  .stat-value {
 
243
  color: #0f0f0f;
244
  }
245
 
246
+ /* Input Areas */
247
  .stTextArea textarea {
248
  border-radius: 18px;
249
  border: 2px solid rgba(218,165,32,0.35);
 
261
  background: rgba(26,26,26,0.95) !important;
262
  }
263
 
264
+ /* Buttons */
265
  .stButton > button {
266
  background: linear-gradient(135deg, #FFD700 0%, #FFA500 100%);
267
  color: #0f0f0f;
 
271
  font-size: 1.15rem;
272
  font-weight: 700;
273
  transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
274
+ box-shadow: 0 4px 15px rgba(255,215,0,0.4), 0 0 30px rgba(255,215,0,0.2);
 
 
275
  width: 100%;
276
  letter-spacing: 0.5px;
277
  position: relative;
 
298
 
299
  .stButton > button:hover {
300
  transform: translateY(-3px);
301
+ box-shadow: 0 8px 25px rgba(255,215,0,0.6), 0 0 50px rgba(255,215,0,0.3);
 
 
302
  }
303
 
304
  .stButton > button:active {
305
  transform: translateY(-1px);
306
  }
307
 
308
+ /* Dynamic Alert Boxes */
309
+ .alert-box {
 
 
310
  padding: 2rem;
311
  border-radius: 20px;
312
  font-size: 1.1rem;
313
  font-weight: 600;
 
 
 
314
  margin: 1.5rem 0;
315
  border: 2px solid rgba(255,255,255,0.1);
 
 
 
 
316
  color: white;
 
 
 
 
 
 
 
 
 
317
  }
318
 
319
  .confidence-bar {
 
333
  box-shadow: 0 0 10px rgba(255,255,255,0.5);
334
  }
335
 
336
+ /* Hints Panel */
337
  .hints-panel {
338
  background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
339
  border-radius: 20px;
340
  padding: 2rem;
341
  border-left: 5px solid #FFD700;
342
+ box-shadow: 0 4px 15px rgba(0,0,0,0.4), inset 0 1px 0 rgba(255,255,255,0.05);
 
 
343
  backdrop-filter: blur(10px);
344
  }
345
 
 
367
  box-shadow: 0 2px 8px rgba(255,215,0,0.4);
368
  }
369
 
370
+ /* Metric Cards */
371
  .metric-container {
372
  background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
373
  padding: 1.8rem;
374
  border-radius: 16px;
375
  border-left: 5px solid #FFD700;
376
+ box-shadow: 0 4px 12px rgba(0,0,0,0.4), inset 0 1px 0 rgba(255,255,255,0.05);
 
 
377
  transition: all 0.3s ease;
378
  }
379
 
380
  .metric-container:hover {
381
  transform: translateY(-2px);
382
+ box-shadow: 0 6px 18px rgba(0,0,0,0.5), inset 0 1px 0 rgba(255,255,255,0.08);
 
 
383
  }
384
 
385
+ /* File Uploader */
386
  .stFileUploader {
387
  border: 2px dashed rgba(218,165,32,0.45);
388
  border-radius: 18px;
 
397
  box-shadow: 0 0 20px rgba(255,215,0,0.15);
398
  }
399
 
400
+ /* Expanders */
401
  .streamlit-expanderHeader {
402
  background: linear-gradient(135deg, rgba(218,165,32,0.2) 0%, rgba(218,165,32,0.1) 100%) !important;
403
  border-radius: 14px !important;
 
457
  color: #e5e7eb !important;
458
  }
459
 
460
+ /* Footer */
461
  .footer {
462
  background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
463
  border-radius: 20px;
 
465
  text-align: center;
466
  margin-top: 4rem;
467
  color: #9ca3af;
468
+ box-shadow: 0 8px 24px rgba(0,0,0,0.4), inset 0 1px 0 rgba(255,255,255,0.05);
 
 
469
  border: 2px solid rgba(218,165,32,0.3);
470
  }
471
 
 
526
  except Exception as e:
527
  return pd.DataFrame()
528
 
529
+ def sanitize_input(text):
530
+ """Sanitize user input to prevent injection"""
531
+ text = re.sub(r'<script.*?</script>', '', text, flags=re.DOTALL | re.IGNORECASE)
532
+ text = re.sub(r'<.*?>', '', text)
533
+ return text
534
+
535
+ def validate_email_input(text):
536
+ """Validate email input"""
537
+ if len(text.strip()) < 10:
538
+ return False, "Email content too short for analysis (minimum 10 characters)"
539
+ if len(text) > 10000:
540
+ return False, "Email content too long (maximum 10,000 characters)"
541
+ return True, ""
542
+
543
+ @st.cache_data
544
+ def preprocess_text_cached(text):
545
+ """Cached version of text preprocessing"""
546
+ return preprocess_text(text)
547
+
548
  def preprocess_text(text):
549
+ """Enhanced preprocessing with better phishing indicator preservation"""
550
  if not isinstance(text, str):
551
  text = str(text)
552
  text = text.lower()
553
+ # Enhanced URL detection - preserve URL patterns better
554
+ text = re.sub(r'http\S+|www\S+|https\S+', ' suspiciousurl ', text)
555
+ text = re.sub(r'\S+@\S+', ' emailaddress ', text)
556
+ # Preserve important phishing indicators
557
+ text = re.sub(r'\$\d+', ' moneymention ', text)
558
+ text = re.sub(r'\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}', ' cardnumber ', text)
559
  text = re.sub(r'[^a-z\s]', ' ', text)
560
  text = re.sub(r'\s+', ' ', text).strip()
561
  return text
562
 
563
+ def calculate_phishing_score(text):
564
+ """Enhanced phishing detection with multi-factor scoring"""
565
+ score = 0
566
+ text_lower = text.lower()
567
+
568
+ # High-risk phishing keywords (weight: 15 points each)
569
+ high_risk = ['verify', 'suspended', 'urgent', 'immediately', 'click here', 'act now',
570
+ 'confirm identity', 'account locked', 'unusual activity', 'security alert',
571
+ 'expire', 'limited time', 'action required', 'update payment', 'validate']
572
+ score += sum(15 for word in high_risk if word in text_lower)
573
+
574
+ # Financial/security keywords (weight: 12 points each)
575
+ financial = ['bank', 'credit card', 'password', 'ssn', 'social security', 'paypal',
576
+ 'billing', 'payment', 'account number', 'pin', 'cvv', 'credential']
577
+ score += sum(12 for word in financial if word in text_lower)
578
+
579
+ # Prize/reward scam indicators (weight: 18 points each)
580
+ prize_scam = ['won', 'winner', 'prize', 'claim now', 'congratulations', 'free money',
581
+ 'inheritance', 'lottery', 'jackpot', 'cash prize', '$1000', '$10000']
582
+ score += sum(18 for word in prize_scam if word in text_lower)
583
+
584
+ # Urgency + financial combo (weight: 25 points)
585
+ if any(urg in text_lower for urg in ['urgent', 'immediately', 'now', 'expire']) and \
586
+ any(fin in text_lower for fin in ['account', 'bank', 'payment', 'card']):
587
+ score += 25
588
+
589
+ # Suspicious URL patterns (weight: 20 points)
590
+ if re.search(r'http\S+|www\S+', text, re.IGNORECASE):
591
+ url_count = len(re.findall(r'http\S+|www\S+', text, re.IGNORECASE))
592
+ score += min(url_count * 20, 40) # Cap at 40 for multiple URLs
593
+
594
+ # Request for credentials/info (weight: 20 points)
595
+ if re.search(r'\b(enter|provide|submit|update|confirm).{0,20}(password|credential|info|detail)', text_lower):
596
+ score += 20
597
+
598
+ # Threatening language (weight: 15 points)
599
+ threats = ['locked', 'suspended', 'terminated', 'closed', 'blocked', 'restricted']
600
+ score += sum(15 for word in threats if word in text_lower)
601
+
602
+ # Poor grammar indicators (weight: 8 points)
603
+ if re.search(r'\b(dear customer|dear user|dear member|dear valued)\b', text_lower):
604
+ score += 8
605
+
606
+ # Convert to probability (0-1 scale)
607
+ max_score = 200 # Adjusted maximum possible score
608
+ probability = min(score / max_score, 0.99) # Cap at 99%
609
+
610
+ return probability
611
+
612
+ @st.cache_data
613
+ def generate_confusion_matrix_plot(_cm):
614
+ """Generate confusion matrix plot once and cache it"""
615
+ plt.style.use('dark_background')
616
+ fig, ax = plt.subplots(figsize=(5, 4), facecolor='#1a1a1a')
617
+ ax.set_facecolor('#1a1a1a')
618
+
619
+ sns.heatmap(
620
+ _cm,
621
+ annot=True,
622
+ fmt="d",
623
+ ax=ax,
624
+ cmap="YlOrBr",
625
+ cbar=True,
626
+ square=True,
627
+ annot_kws={"size": 16, "weight": "bold", "color": "#0f0f0f"},
628
+ linewidths=2,
629
+ linecolor='#0f0f0f',
630
+ cbar_kws={'label': 'Count', 'shrink': 0.8}
631
+ )
632
+
633
+ ax.set_xlabel("Predicted", fontsize=11, fontweight='bold', color='#FFD700')
634
+ ax.set_ylabel("Actual", fontsize=11, fontweight='bold', color='#FFD700')
635
+ ax.set_xticklabels(["Safe", "Phishing"], fontsize=10, color='#e5e7eb')
636
+ ax.set_yticklabels(["Safe", "Phishing"], fontsize=10, rotation=0, color='#e5e7eb')
637
+ ax.set_title("Confusion Matrix", fontsize=13, fontweight='bold', pad=12, color='#FFD700')
638
+
639
+ # Style the colorbar
640
+ cbar = ax.collections[0].colorbar
641
+ cbar.ax.yaxis.set_tick_params(color='#e5e7eb')
642
+ plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='#e5e7eb')
643
+
644
+ plt.tight_layout()
645
+ buf = io.BytesIO()
646
+ plt.savefig(buf, format='png', facecolor='#1a1a1a', dpi=100)
647
+ buf.seek(0)
648
+ plt.close(fig)
649
+
650
+ return buf
651
+
652
  # Hero Header
653
  st.markdown("""
654
  <div class="hero-container">
655
  <div class="hero-title">πŸ›‘οΈ AI Phishing Shield</div>
656
  <div class="hero-subtitle">Advanced Machine Learning Protection Against Email Threats</div>
657
  <div class="hero-description">
658
+ Powered by TF-IDF vectorization and Logistic Regression, trained on thousands of real-world phishing examples.
659
  Get instant threat analysis with confidence scoring and explainable AI insights.
660
  </div>
661
  <div class="hero-badge">⚑ Developed by Umaima Qureshi</div>
 
696
  ]
697
  })
698
 
699
+ # Validate dataset
700
+ required_columns = 2
701
+ if len(df.columns) < required_columns or len(df) == 0:
702
+ st.error("⚠️ Invalid dataset format. Please ensure your CSV has email text and labels.")
703
+ st.stop()
704
+
705
  # Clean & Prepare Dataset
706
  if "Unnamed: 0" in df.columns:
707
  df = df.drop(columns=["Unnamed: 0"])
 
756
  # Model Training
757
  @st.cache_resource
758
  def train_model(processed_texts, labels, test_size=0.2, random_state=42):
759
+ """Enhanced model training with better parameters"""
760
  unique_labels, counts = np.unique(labels, return_counts=True)
761
  min_samples = counts.min()
762
 
 
779
  processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
780
  )
781
 
782
+ # Enhanced TF-IDF with better parameters for phishing detection
783
+ vectorizer = TfidfVectorizer(
784
+ max_features=5000,
785
+ ngram_range=(1,3), # Include trigrams for better context
786
+ min_df=1,
787
+ max_df=0.95,
788
+ sublinear_tf=True
789
+ )
790
  X_train_vec = vectorizer.fit_transform(X_train)
791
  X_test_vec = vectorizer.transform(X_test)
792
+
793
+ # Use balanced class weights for better phishing detection
794
+ model = LogisticRegression(
795
+ max_iter=2000,
796
+ solver='liblinear',
797
+ class_weight='balanced', # Handle imbalanced data better
798
+ C=1.0
799
+ )
800
  model.fit(X_train_vec, y_train)
801
+
802
  y_pred = model.predict(X_test_vec)
803
  acc = accuracy_score(y_test, y_pred)
804
  cm = confusion_matrix(y_test, y_pred)
805
  report = classification_report(y_test, y_pred, output_dict=True, zero_division=0)
806
+
807
  return {
808
  "vectorizer": vectorizer,
809
  "model": model,
 
812
  "report": report
813
  }
814
 
815
+ # Train model with session state to prevent re-training
816
+ if not st.session_state.model_trained:
817
+ model_info = train_model(df['processed_text'].tolist(), df['label'].values)
818
+ st.session_state.model_info = model_info
819
+ st.session_state.model_trained = True
820
+ else:
821
+ model_info = st.session_state.model_info
822
+
823
+ vectorizer = model_info["vectorizer"]
824
+ model = model_info["model"]
825
+ accuracy = model_info["accuracy"]
826
 
827
  # Model Performance
828
  st.markdown('<div class="section-title">🎯 Model Performance</div>', unsafe_allow_html=True)
 
855
  </div>
856
  """, unsafe_allow_html=True)
857
 
858
+ # Confusion Matrix Section
859
  with st.expander("πŸ“ˆ Detailed Metrics & Confusion Matrix"):
860
  col_matrix, col_report = st.columns([1, 1.5])
861
 
862
  with col_matrix:
863
+ # Generate confusion matrix plot once
864
+ if st.session_state.cm_plot_cached is None:
865
+ st.session_state.cm_plot_cached = generate_confusion_matrix_plot(model_info["confusion_matrix"])
866
+
867
+ st.image(st.session_state.cm_plot_cached, use_column_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
868
 
869
  with col_report:
870
  st.markdown("**πŸ“Š Classification Report:**")
871
  report_df = pd.DataFrame(model_info["report"]).transpose().round(3)
872
  st.dataframe(
873
+ report_df,
874
+ use_container_width=True,
875
  height=250
876
  )
877
 
 
895
  email_input = uploaded_txt.read().decode("utf-8", errors="ignore")
896
  except Exception:
897
  email_input = str(uploaded_txt.getvalue())
898
+
899
  if st.button("πŸ” Analyze Email Threat"):
900
  if not email_input.strip():
901
  st.warning("⚠️ Please paste or upload email content to analyze")
902
  else:
903
+ # Sanitize input
904
+ email_input = sanitize_input(email_input)
905
+
906
+ # Validate input
907
+ is_valid, error_msg = validate_email_input(email_input)
908
+ if not is_valid:
909
+ st.warning(f"⚠️ {error_msg}")
910
+ else:
911
+ with st.spinner("πŸ” Analyzing email threat..."):
912
  try:
913
+ # ML Model prediction
914
+ processed_input = preprocess_text_cached(email_input)
915
+ input_vec = vectorizer.transform([processed_input])
916
+
917
+ try:
918
+ ml_proba = model.predict_proba(input_vec)[0][1]
919
+ except AttributeError:
920
+ decision = model.decision_function(input_vec)[0]
921
+ ml_proba = 1 / (1 + np.exp(-decision))
922
+
923
+ ml_pred = model.predict(input_vec)[0]
924
+
925
+ # Rule-based scoring
926
+ rule_score = calculate_phishing_score(email_input)
927
+
928
+ # Hybrid approach: weighted combination
929
+ # 60% ML model + 40% rule-based (adjustable)
930
+ hybrid_proba = (0.6 * ml_proba) + (0.4 * rule_score)
931
+
932
+ # Final prediction based on hybrid score
933
+ final_pred = 1 if hybrid_proba > 0.5 else 0
934
+
935
+ # Dynamic color based on confidence
936
+ if hybrid_proba >= 0.8:
937
+ alert_color = "#dc2626" # Deep red - Critical
938
+ alert_gradient = "linear-gradient(135deg, #dc2626 0%, #991b1b 100%)"
939
+ shadow_color = "220, 38, 38"
940
+ emoji = "🚨"
941
+ risk_level = "CRITICAL THREAT"
942
+ elif hybrid_proba >= 0.6:
943
+ alert_color = "#ef4444" # Red - High risk
944
+ alert_gradient = "linear-gradient(135deg, #ef4444 0%, #dc2626 100%)"
945
+ shadow_color = "239, 68, 68"
946
+ emoji = "⚠️"
947
+ risk_level = "HIGH RISK"
948
+ elif hybrid_proba >= 0.4:
949
+ alert_color = "#f97316" # Orange - Medium risk
950
+ alert_gradient = "linear-gradient(135deg, #f97316 0%, #ea580c 100%)"
951
+ shadow_color = "249, 115, 22"
952
+ emoji = "⚑"
953
+ risk_level = "MEDIUM RISK"
954
+ elif hybrid_proba >= 0.2:
955
+ alert_color = "#eab308" # Yellow - Low risk
956
+ alert_gradient = "linear-gradient(135deg, #eab308 0%, #ca8a04 100%)"
957
+ shadow_color = "234, 179, 8"
958
+ emoji = "⚠️"
959
+ risk_level = "LOW RISK"
960
+ else:
961
+ alert_color = "#10b981" # Green - Safe
962
+ alert_gradient = "linear-gradient(135deg, #10b981 0%, #059669 100%)"
963
+ shadow_color = "16, 185, 129"
964
+ emoji = "βœ…"
965
+ risk_level = "SAFE"
966
+
967
+ if final_pred == 1:
968
+ conf_pct = f"{hybrid_proba:.1%}"
969
+ st.markdown(f"""
970
+ <div class="alert-box" style="background: {alert_gradient}; box-shadow: 0 10px 30px rgba({shadow_color}, 0.4), 0 0 50px rgba({shadow_color}, 0.2);">
971
+ <div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 0.75rem;">
972
+ <div style="font-size: 2.5rem;">{emoji}</div>
973
+ <div>
974
+ <div style="font-size: 1.5rem; font-weight: 800; letter-spacing: 0.5px;">{risk_level} DETECTED</div>
975
+ <div style="font-size: 1.05rem; opacity: 0.95; margin-top: 0.25rem;">Threat Confidence: {conf_pct}</div>
976
+ <div style="font-size: 0.9rem; opacity: 0.85; margin-top: 0.25rem;">ML Score: {ml_proba:.1%} | Rule Score: {rule_score:.1%}</div>
977
+ </div>
978
+ </div>
979
+ <div class="confidence-bar">
980
+ <div class="confidence-fill" style="width: {hybrid_proba*100}%;"></div>
981
+ </div>
982
  </div>
983
+ """, unsafe_allow_html=True)
984
+
985
+ st.markdown("**πŸ” Threat Indicators Detected:**")
986
+ indicators = []
987
+ if "suspiciousurl" in processed_input or re.search(r'http\S+|www\S+', email_input, re.IGNORECASE):
988
+ indicators.append("πŸ”— Suspicious URL tokens detected")
989
+ if re.search(r'\b(urgent|immediately|verify|password|suspended|click|act now|action required)\b', email_input, re.IGNORECASE):
990
+ indicators.append("⚑ Urgency manipulation tactics")
991
+ if re.search(r'\b(bank|account|verify|login|password|security|credential|paypal)\b', email_input, re.IGNORECASE):
992
+ indicators.append("🏦 Financial/security keywords present")
993
+ if re.search(r'\b(winner|prize|congratulations|claim|free|won)\b', email_input, re.IGNORECASE):
994
+ indicators.append("🎁 Reward/prize baiting language")
995
+ if re.search(r'\b(confirm|update|validate|unlock|restore)\b', email_input, re.IGNORECASE):
996
+ indicators.append("πŸ” Account action requests")
997
+ if "cardnumber" in processed_input:
998
+ indicators.append("πŸ’³ Credit card pattern detected")
999
+ if "moneymention" in processed_input:
1000
+ indicators.append("πŸ’° Money amount mentioned")
1001
+
1002
+ for indicator in indicators:
1003
+ st.markdown(f"- {indicator}")
1004
+
1005
+ if not indicators:
1006
+ st.markdown("- ⚠️ Content pattern matches known phishing templates")
1007
+
1008
+ st.error("🚨 **Recommendation:** Do NOT click any links. Delete this email immediately and report to your IT security team.")
1009
+
1010
+ # Download analysis report
1011
+ result_data = {
1012
+ 'timestamp': pd.Timestamp.now(),
1013
+ 'prediction': 'Phishing',
1014
+ 'confidence': f"{hybrid_proba:.2%}",
1015
+ 'ml_score': f"{ml_proba:.2%}",
1016
+ 'rule_score': f"{rule_score:.2%}",
1017
+ 'risk_level': risk_level,
1018
+ 'email_preview': email_input[:100] + "..."
1019
+ }
1020
+ result_df = pd.DataFrame([result_data])
1021
+ csv = result_df.to_csv(index=False)
1022
+
1023
+ st.download_button(
1024
+ label="πŸ“₯ Download Analysis Report",
1025
+ data=csv,
1026
+ file_name=f"phishing_analysis_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv",
1027
+ mime="text/csv"
1028
+ )
1029
 
1030
+ else:
1031
+ conf_pct = f"{(1-hybrid_proba):.1%}"
1032
+ st.markdown(f"""
1033
+ <div class="alert-box" style="background: {alert_gradient}; box-shadow: 0 10px 30px rgba({shadow_color}, 0.4), 0 0 50px rgba({shadow_color}, 0.2);">
1034
+ <div style="display: flex; align-items: center; gap: 1rem; margin-bottom: 0.75rem;">
1035
+ <div style="font-size: 2.5rem;">{emoji}</div>
1036
+ <div>
1037
+ <div style="font-size: 1.5rem; font-weight: 800; letter-spacing: 0.5px;">EMAIL APPEARS SAFE</div>
1038
+ <div style="font-size: 1.05rem; opacity: 0.95; margin-top: 0.25rem;">Safety Confidence: {conf_pct}</div>
1039
+ <div style="font-size: 0.9rem; opacity: 0.85; margin-top: 0.25rem;">ML Score: {(1-ml_proba):.1%} | Rule Score: {(1-rule_score):.1%}</div>
1040
+ </div>
1041
+ </div>
1042
+ <div class="confidence-bar">
1043
+ <div class="confidence-fill" style="width: {(1-hybrid_proba)*100}%;"></div>
1044
+ </div>
1045
  </div>
1046
+ """, unsafe_allow_html=True)
1047
+
1048
+ st.markdown("**βœ“ No obvious threat indicators found in content analysis**")
1049
+ st.info("πŸ’‘ **Best Practice:** Always verify sender identity through known contact methods and be cautious with unexpected emails, even if they appear safe.")
1050
+
1051
+ # Add to history
1052
+ st.session_state.analysis_history.append({
1053
+ 'timestamp': pd.Timestamp.now(),
1054
+ 'result': 'Phishing' if final_pred == 1 else 'Safe',
1055
+ 'confidence': f"{hybrid_proba:.2%}",
1056
+ 'preview': email_input[:50] + "..."
1057
+ })
1058
+
1059
+ except Exception as e:
1060
+ st.error(f"⚠️ Analysis failed: {str(e)}")
1061
 
1062
  with col_hints:
1063
  st.markdown("""
 
1089
  <div><strong>Prize/reward</strong> language is a common phishing tactic</div>
1090
  </div>
1091
 
1092
+ <div class="hint-item">
1093
+ <div class="hint-icon">⚑</div>
1094
+ <div><strong>Hybrid Detection:</strong> Combines ML model (60%) with rule-based scoring (40%)</div>
1095
+ </div>
1096
+
1097
  <div class="hint-item">
1098
  <div class="hint-icon">⚠️</div>
1099
  <div><strong>Limitations:</strong> This tool analyzes text content only. Always verify sender identity separately.</div>
 
1101
  </div>
1102
  """, unsafe_allow_html=True)
1103
 
1104
+ # Recent Analyses History
1105
+ if len(st.session_state.analysis_history) > 0:
1106
+ st.markdown('<div class="section-title">πŸ“Š Recent Analyses</div>', unsafe_allow_html=True)
1107
+ with st.expander("View Recent Analysis History", expanded=False):
1108
+ hist_df = pd.DataFrame(st.session_state.analysis_history[-10:]) # Show last 10
1109
+ hist_df = hist_df.iloc[::-1] # Reverse to show most recent first
1110
+ st.dataframe(hist_df, use_container_width=True, height=300)
1111
+
1112
+ if st.button("πŸ—‘οΈ Clear History"):
1113
+ st.session_state.analysis_history = []
1114
+ st.rerun()
1115
+
1116
  # Additional Tips Section
1117
  st.markdown('<div class="section-title">πŸ’‘ Phishing Protection Tips</div>', unsafe_allow_html=True)
1118
 
 
1120
 
1121
  with col_tip1:
1122
  st.markdown("""
1123
+ <div style="background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
1124
+ padding: 1.5rem; border-radius: 16px; border-left: 4px solid #FFD700;
1125
  box-shadow: 0 4px 15px rgba(0,0,0,0.3); height: 100%;">
1126
  <div style="font-size: 2rem; margin-bottom: 0.75rem;">πŸ”</div>
1127
  <div style="font-weight: 700; font-size: 1.1rem; color: #FFD700; margin-bottom: 0.75rem;">Verify Sender</div>
 
1133
 
1134
  with col_tip2:
1135
  st.markdown("""
1136
+ <div style="background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
1137
+ padding: 1.5rem; border-radius: 16px; border-left: 4px solid #FFD700;
1138
  box-shadow: 0 4px 15px rgba(0,0,0,0.3); height: 100%;">
1139
  <div style="font-size: 2rem; margin-bottom: 0.75rem;">πŸ”—</div>
1140
  <div style="font-weight: 700; font-size: 1.1rem; color: #FFD700; margin-bottom: 0.75rem;">Hover Links</div>
 
1146
 
1147
  with col_tip3:
1148
  st.markdown("""
1149
+ <div style="background: linear-gradient(135deg, rgba(26,26,26,0.95) 0%, rgba(15,15,15,0.95) 100%);
1150
+ padding: 1.5rem; border-radius: 16px; border-left: 4px solid #FFD700;
1151
  box-shadow: 0 4px 15px rgba(0,0,0,0.3); height: 100%;">
1152
  <div style="font-size: 2rem; margin-bottom: 0.75rem;">πŸ“ž</div>
1153
  <div style="font-weight: 700; font-size: 1.1rem; color: #FFD700; margin-bottom: 0.75rem;">Contact Directly</div>
 
1168
  For production use: Implement additional verification layers, link scanning, attachment analysis, and human oversight
1169
  </div>
1170
  <div style="margin-top: 1.5rem; padding-top: 1.5rem; border-top: 1px solid rgba(218,165,32,0.2); font-size: 0.9rem; color: #6b7280;">
1171
+ Powered by TF-IDF β€’ Logistic Regression β€’ Hybrid Detection β€’ Scikit-learn β€’ Streamlit
1172
  </div>
1173
  <div style="margin-top: 1rem; font-size: 0.85rem; color: #6b7280;">
1174
  Β© 2024 AI Phishing Shield | All Rights Reserved