Spaces:

mimi111222
/

_phisingdector

Sleeping

App Files Files Community

mimi111222 commited on Oct 12, 2025

Commit

b3b7658

verified ·

1 Parent(s): 2df702f

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -159

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ st.set_page_config(
     initial_sidebar_state="collapsed"
 )
-# Premium Black & Gold CSS Styling
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700;800&display=swap');
@@ -51,10 +51,6 @@ section[data-testid="stSidebar"] {
     display: none;
 }
-.element-container {
-    background: transparent !important;
-}
 /* Hero Section */
 .hero-container {
     background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
@@ -124,74 +120,16 @@ section[data-testid="stSidebar"] {
     box-shadow: 0 8px 25px rgba(255,215,0,0.4);
     position: relative;
     z-index: 1;
-    transition: all 0.3s ease;
-}
-.hero-badge:hover {
-    transform: translateY(-3px);
-    box-shadow: 0 12px 35px rgba(255,215,0,0.6);
 }
-/* Glass Cards */
-.glass-card {
-    background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
-    backdrop-filter: blur(20px);
-    border-radius: 24px;
-    padding: 2.5rem;
-    margin-bottom: 2rem;
-    box-shadow: 0 15px 45px rgba(0,0,0,0.5), 0 5px 15px rgba(255,215,0,0.1);
-    border: 2px solid rgba(218,165,32,0.2);
-    transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
-    position: relative;
-}
-.glass-card::before {
-    content: '';
-    position: absolute;
-    top: 0;
-    left: 0;
-    right: 0;
-    height: 4px;
-    background: linear-gradient(90deg, #FFD700 0%, #FFA500 100%);
-    border-radius: 24px 24px 0 0;
-    opacity: 0;
-    transition: opacity 0.3s ease;
-}
-.glass-card:hover {
-    transform: translateY(-8px);
-    box-shadow: 0 20px 60px rgba(0,0,0,0.6), 0 8px 20px rgba(255,215,0,0.2);
-    border-color: rgba(218,165,32,0.4);
-}
-.glass-card:hover::before {
-    opacity: 1;
-}
-/* Section Headers */
-.section-header {
-    font-size: 1.8rem;
     font-weight: 700;
-    color: #f5f5f5;
-    margin-bottom: 1.5rem;
-    display: flex;
-    align-items: center;
-    gap: 0.75rem;
-    position: relative;
-    z-index: 2;
-}
-.section-icon {
-    width: 40px;
-    height: 40px;
-    background: linear-gradient(135deg, #FFD700 0%, #FFA500 100%);
-    border-radius: 12px;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    font-size: 1.5rem;
-    box-shadow: 0 4px 15px rgba(255,215,0,0.3);
-    flex-shrink: 0;
 }
 /* Stats Grid */
@@ -230,18 +168,12 @@ section[data-testid="stSidebar"] {
     box-shadow: 0 15px 40px rgba(255,215,0,0.5);
 }
-.stat-card:hover::before {
-    top: -30%;
-    right: -30%;
-}
 .stat-value {
     font-size: 3rem;
     font-weight: 900;
     margin-bottom: 0.5rem;
     position: relative;
     z-index: 1;
-    text-shadow: 0 2px 10px rgba(0,0,0,0.2);
     color: #0f0f0f;
 }
@@ -359,14 +291,6 @@ section[data-testid="stSidebar"] {
     font-weight: 700;
 }
-/* Expanders */
-.streamlit-expanderHeader {
-    background: rgba(218,165,32,0.15);
-    border-radius: 12px;
-    font-weight: 600;
-    color: #f5f5f5;
-}
 /* Footer */
 .footer {
     background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
@@ -400,10 +324,6 @@ section[data-testid="stSidebar"] {
     background: rgba(218,165,32,0.1);
 }
-.stFileUploader label {
-    color: #e5e7eb !important;
-}
 /* Metric Cards */
 .metric-container {
     background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
@@ -413,24 +333,12 @@ section[data-testid="stSidebar"] {
     box-shadow: 0 2px 8px rgba(0,0,0,0.3);
 }
-.metric-container div {
-    color: #e5e7eb;
-}
-/* Dataframe Styling */
-.dataframe {
     border-radius: 12px;
-    overflow: hidden;
-}
-/* Animations */
-@keyframes fadeIn {
-    from { opacity: 0; transform: translateY(20px); }
-    to { opacity: 1; transform: translateY(0); }
-}
-.glass-card {
-    animation: fadeIn 0.6s ease forwards;
 }
 /* Hide Streamlit Branding */
@@ -448,7 +356,6 @@ def safe_read_csv(path):
     try:
         return pd.read_csv(path)
     except Exception as e:
-        st.warning(f"Could not read {path}: {e}")
         return pd.DataFrame()
 def preprocess_text(text):
@@ -478,40 +385,35 @@ st.markdown("""
 main_csv_path = "Phishing_Email.csv"
 sample_csv_path = "Phishing_Email_Sample.csv"
-with st.container():
-    st.markdown('<div class="glass-card">', unsafe_allow_html=True)
-    st.markdown('<div class="section-header"><div class="section-icon">📂</div>Dataset Configuration</div>', unsafe_allow_html=True)
-    uploaded_file = st.file_uploader("Upload your phishing dataset (optional)", type=["csv"], help="Upload Phishing_Email.csv for full training")
-    if uploaded_file is not None:
-        df = load_csv_from_bytes(uploaded_file.read())
-    elif os.path.exists(main_csv_path):
-        df = safe_read_csv(main_csv_path)
-    elif os.path.exists(sample_csv_path):
-        st.info("📊 Using sample dataset for demonstration")
-        df = safe_read_csv(sample_csv_path)
-    else:
-        st.info("📊 Using built-in demo dataset")
-        # FIXED: Enhanced demo dataset with more samples for proper training
-        df = pd.DataFrame({
-            "Email Text": [
-                "Urgent! Your account has been suspended. Click http://fakebank.com to verify.",
-                "WINNER! Claim your $1000 prize now at http://scam.com before it expires!",
-                "Hi team, attached is the agenda for tomorrow's meeting. Regards.",
-                "Hello Umaima, congrats on your results. Let's celebrate this week!",
-                "Action required: Update your bank password at http://phishingsite.com immediately.",
-                "Reminder: Project deadline is next Monday. Please submit your updates.",
-                "Your PayPal account needs verification. Click here: http://fake-paypal.com",
-                "Thanks for your email. I'll review the document and get back to you tomorrow."
-            ],
-            "Email Type": [
-                "Phishing Email", "Phishing Email", "Safe Email", "Safe Email",
-                "Phishing Email", "Safe Email", "Phishing Email", "Safe Email"
-            ]
-        })
-    st.markdown('</div>', unsafe_allow_html=True)
 # Clean & Prepare Dataset
 if "Unnamed: 0" in df.columns:
@@ -538,8 +440,7 @@ phishing_count = (df['label'] == 1).sum()
 safe_count = (df['label'] == 0).sum()
 total_count = len(df)
-st.markdown('<div class="glass-card">', unsafe_allow_html=True)
-st.markdown('<div class="section-header"><div class="section-icon">📊</div>Dataset Statistics</div>', unsafe_allow_html=True)
 st.markdown(f"""
 <div class="stats-grid">
@@ -565,35 +466,27 @@ st.markdown(f"""
 with st.expander("🔍 View Dataset Preview", expanded=False):
     st.dataframe(df[[text_col, label_col]].head(10), use_container_width=True)
-st.markdown('</div>', unsafe_allow_html=True)
-# Model Training - ULTIMATE FIX
 @st.cache_resource
 def train_model(processed_texts, labels, test_size=0.2, random_state=42):
-    # Check if we have enough samples for stratified split
     unique_labels, counts = np.unique(labels, return_counts=True)
     min_samples = counts.min()
-    # Determine if stratification is safe
-    # Need at least 2 samples per class AND test_size must allow at least 1 sample per class in split
     min_test_samples = int(np.ceil(min_samples * test_size))
     min_train_samples = min_samples - min_test_samples
     use_stratify = (min_samples >= 2 and min_train_samples >= 1 and min_test_samples >= 1 and len(unique_labels) > 1)
     if not use_stratify:
-        # Use simple split without stratification
         X_train, X_test, y_train, y_test = train_test_split(
             processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
         )
     else:
-        # Try stratified split with fallback
         try:
             X_train, X_test, y_train, y_test = train_test_split(
                 processed_texts, labels, test_size=test_size, random_state=random_state, stratify=labels
             )
         except ValueError:
-            # Fallback to simple split
             X_train, X_test, y_train, y_test = train_test_split(
                 processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
             )
@@ -622,8 +515,7 @@ model_info = train_model(df['processed_text'].tolist(), df['label'].values)
 vectorizer, model, accuracy = model_info["vectorizer"], model_info["model"], model_info["accuracy"]
 # Model Performance
-st.markdown('<div class="glass-card">', unsafe_allow_html=True)
-st.markdown('<div class="section-header"><div class="section-icon">🎯</div>Model Performance</div>', unsafe_allow_html=True)
 col1, col2, col3 = st.columns(3)
@@ -680,11 +572,8 @@ with st.expander("📈 Detailed Metrics & Confusion Matrix"):
     report_df = pd.DataFrame(model_info["report"]).transpose().round(3)
     st.dataframe(report_df, use_container_width=True, height=200)
-st.markdown('</div>', unsafe_allow_html=True)
 # Inference UI
-st.markdown('<div class="glass-card">', unsafe_allow_html=True)
-st.markdown('<div class="section-header"><div class="section-icon">✉️</div>Email Threat Scanner</div>', unsafe_allow_html=True)
 col_input, col_hints = st.columns([2, 1])
@@ -807,8 +696,6 @@ with col_hints:
     </div>
     """, unsafe_allow_html=True)
-st.markdown('</div>', unsafe_allow_html=True)
 # Footer
 st.markdown("""
 <div class="footer">

     initial_sidebar_state="collapsed"
 )
+# Premium Black & Gold CSS Styling - CLEAN VERSION (NO BOXES)
 st.markdown("""
 <style>
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700;800&display=swap');
     display: none;
 }
 /* Hero Section */
 .hero-container {
     background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
     box-shadow: 0 8px 25px rgba(255,215,0,0.4);
     position: relative;
     z-index: 1;
 }
+/* Section Headers - CLEAN TEXT ONLY */
+.section-title {
+    font-size: 2rem;
     font-weight: 700;
+    color: #FFD700;
+    margin: 3rem 0 1.5rem 0;
+    text-align: center;
+    letter-spacing: 0.5px;
 }
 /* Stats Grid */
     box-shadow: 0 15px 40px rgba(255,215,0,0.5);
 }
 .stat-value {
     font-size: 3rem;
     font-weight: 900;
     margin-bottom: 0.5rem;
     position: relative;
     z-index: 1;
     color: #0f0f0f;
 }
     font-weight: 700;
 }
 /* Footer */
 .footer {
     background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
     background: rgba(218,165,32,0.1);
 }
 /* Metric Cards */
 .metric-container {
     background: linear-gradient(135deg, #1a1a1a 0%, #0f0f0f 100%);
     box-shadow: 0 2px 8px rgba(0,0,0,0.3);
 }
+/* Expanders */
+.streamlit-expanderHeader {
+    background: rgba(218,165,32,0.15);
     border-radius: 12px;
+    font-weight: 600;
+    color: #f5f5f5;
 }
 /* Hide Streamlit Branding */
     try:
         return pd.read_csv(path)
     except Exception as e:
         return pd.DataFrame()
 def preprocess_text(text):
 main_csv_path = "Phishing_Email.csv"
 sample_csv_path = "Phishing_Email_Sample.csv"
+st.markdown('<div class="section-title">📂 Dataset Configuration</div>', unsafe_allow_html=True)
+uploaded_file = st.file_uploader("Upload your phishing dataset (optional)", type=["csv"], help="Upload Phishing_Email.csv for full training")
+if uploaded_file is not None:
+    df = load_csv_from_bytes(uploaded_file.read())
+elif os.path.exists(main_csv_path):
+    df = safe_read_csv(main_csv_path)
+elif os.path.exists(sample_csv_path):
+    st.info("📊 Using sample dataset for demonstration")
+    df = safe_read_csv(sample_csv_path)
+else:
+    st.info("📊 Using built-in demo dataset")
+    df = pd.DataFrame({
+        "Email Text": [
+            "Urgent! Your account has been suspended. Click http://fakebank.com to verify.",
+            "WINNER! Claim your $1000 prize now at http://scam.com before it expires!",
+            "Hi team, attached is the agenda for tomorrow's meeting. Regards.",
+            "Hello Umaima, congrats on your results. Let's celebrate this week!",
+            "Action required: Update your bank password at http://phishingsite.com immediately.",
+            "Reminder: Project deadline is next Monday. Please submit your updates.",
+            "Your PayPal account needs verification. Click here: http://fake-paypal.com",
+            "Thanks for your email. I'll review the document and get back to you tomorrow."
+        ],
+        "Email Type": [
+            "Phishing Email", "Phishing Email", "Safe Email", "Safe Email",
+            "Phishing Email", "Safe Email", "Phishing Email", "Safe Email"
+        ]
+    })
 # Clean & Prepare Dataset
 if "Unnamed: 0" in df.columns:
 safe_count = (df['label'] == 0).sum()
 total_count = len(df)
+st.markdown('<div class="section-title">📊 Dataset Statistics</div>', unsafe_allow_html=True)
 st.markdown(f"""
 <div class="stats-grid">
 with st.expander("🔍 View Dataset Preview", expanded=False):
     st.dataframe(df[[text_col, label_col]].head(10), use_container_width=True)
+# Model Training
 @st.cache_resource
 def train_model(processed_texts, labels, test_size=0.2, random_state=42):
     unique_labels, counts = np.unique(labels, return_counts=True)
     min_samples = counts.min()
     min_test_samples = int(np.ceil(min_samples * test_size))
     min_train_samples = min_samples - min_test_samples
     use_stratify = (min_samples >= 2 and min_train_samples >= 1 and min_test_samples >= 1 and len(unique_labels) > 1)
     if not use_stratify:
         X_train, X_test, y_train, y_test = train_test_split(
             processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
         )
     else:
         try:
             X_train, X_test, y_train, y_test = train_test_split(
                 processed_texts, labels, test_size=test_size, random_state=random_state, stratify=labels
             )
         except ValueError:
             X_train, X_test, y_train, y_test = train_test_split(
                 processed_texts, labels, test_size=test_size, random_state=random_state, stratify=None
             )
 vectorizer, model, accuracy = model_info["vectorizer"], model_info["model"], model_info["accuracy"]
 # Model Performance
+st.markdown('<div class="section-title">🎯 Model Performance</div>', unsafe_allow_html=True)
 col1, col2, col3 = st.columns(3)
     report_df = pd.DataFrame(model_info["report"]).transpose().round(3)
     st.dataframe(report_df, use_container_width=True, height=200)
 # Inference UI
+st.markdown('<div class="section-title">✉️ Email Threat Scanner</div>', unsafe_allow_html=True)
 col_input, col_hints = st.columns([2, 1])
     </div>
     """, unsafe_allow_html=True)
 # Footer
 st.markdown("""
 <div class="footer">