sajadup commited on
Commit
591f049
·
1 Parent(s): fb18ac8

Merge remote files and add my email app

Browse files
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ # 1. Create a non-root user
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+
7
+ # 2. Set environment variables to ensure Python finds installed packages
8
+ ENV HOME=/home/user \
9
+ PATH=/home/user/.local/bin:$PATH \
10
+ PYTHONPATH=/home/user/app
11
+
12
+ WORKDIR /app
13
+
14
+ # 3. Upgrade pip first (important for newer packages)
15
+ RUN pip install --no-cache-dir --upgrade pip
16
+
17
+ # 4. Copy requirements and install
18
+ COPY --chown=user requirements.txt .
19
+ # Adding --user ensures packages go to the path we set above
20
+ RUN pip install --no-cache-dir --user -r requirements.txt
21
+
22
+ # 5. Copy the rest of your app
23
+ COPY --chown=user . .
24
+
25
+ # 6. Run the app
26
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,26 @@
1
- ---
2
- title: Email Classification
3
- emoji: 📉
4
- colorFrom: green
5
- colorTo: red
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 🔥 Firebase Configuration & Setup
2
+ To get this application running successfully, please follow these two essential steps to link your Firebase account.
3
+
4
+ 🔑 1. Firebase Service Account Key
5
+ The application requires a private key to authenticate with your Firebase project.
6
+
7
+ Download: Get your serviceAccountKey.json from the Firebase Console.
8
+
9
+ Place: Move the file into the root folder of this project.
10
+
11
+ [!WARNING]
12
+
13
+ Security: Ensure this file is added to your .gitignore to avoid leaking private credentials.
14
+
15
+ 🌐 2. Configure Firebase URL
16
+ Before running the program, you must update the database URL in app.py.
17
+
18
+ Open app.py.
19
+
20
+ Locate the following line:
21
+
22
+ Python
23
+
24
+ # Replace the placeholder with your actual Firebase Realtime Database URL
25
+ database_url = "ADD_YOUR_FIREBASE_URL_HERE"
26
+ Replace "ADD_YOUR_FIREBASE_URL_HERE" with your actual URL.
app.py ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import re
4
+ import torch
5
+ import numpy as np
6
+ import firebase_admin
7
+ from firebase_admin import credentials, db
8
+ from fastapi import FastAPI, Form, Request
9
+ from fastapi.responses import HTMLResponse
10
+ from transformers import (
11
+ AutoTokenizer,
12
+ AutoConfig,
13
+ DistilBertForSequenceClassification,
14
+ T5ForConditionalGeneration,
15
+ T5Tokenizer
16
+ )
17
+ from lime.lime_text import LimeTextExplainer
18
+ import uuid
19
+ from datetime import datetime
20
+
21
+ # FIREBASE INITIALIZATION
22
+ # Ensure your serviceAccountKey.json is in the same directory
23
+ if not firebase_admin._apps:
24
+ cred = credentials.Certificate("serviceAccountKey.json")
25
+ firebase_admin.initialize_app(cred, {
26
+ 'databaseURL': 'https://your-project-id.firebaseio.com/' # <--- Add Firebase URL here
27
+ })
28
+
29
+ app = FastAPI()
30
+
31
+ # Global variables
32
+ tokenizer = None
33
+ model = None
34
+ explainer = LimeTextExplainer(class_names=["LEGITIMATE", "PHISHING"])
35
+ gen_tokenizer = None
36
+ gen_model = None
37
+ MODEL_DIR = "phishing_model_optimized"
38
+ WEIGHTS_NAME = "quantized_model.pt"
39
+
40
+
41
+ # PHISHING INDICATOR PATTERNS (Original Rule-Based)
42
+
43
+ PHISHING_PATTERNS = {
44
+ "urgency": [
45
+ r"\bimmediate(ly)?\b", r"\burgent(ly)?\b", r"\bact now\b",
46
+ r"\baction required\b", r"\bwithin \d+ (hour|minute|day)s?\b",
47
+ r"\bexpir(e|es|ing|ed)\b", r"\bsuspended\b", r"\bcompromised\b",
48
+ r"\bverify (now|immediately|your)\b", r"\bfailure to\b",
49
+ r"\bpermanent(ly)?\b", r"\bdelete(d)?\b", r"\block(ed)?\b"
50
+ ],
51
+ "threat": [
52
+ r"\baccount.*(suspend|terminat|delet|lock|compromis)\w*",
53
+ r"\b(suspend|terminat|delet|lock|compromis)\w*.*account\b",
54
+ r"\blegal action\b", r"\bpenalt(y|ies)\b", r"\bconsequences\b"
55
+ ],
56
+ "credential_request": [
57
+ r"\bpassword\b", r"\bverify your (identity|account|email)\b",
58
+ r"\bconfirm your\b", r"\bupdate.*(payment|billing|account)\b",
59
+ r"\bssn\b", r"\bcredit card\b", r"\bbank account\b"
60
+ ],
61
+ "suspicious_links": [
62
+ r"https?://[^\s]*\.(xyz|tk|ml|ga|cf|gq|top|club|online)/",
63
+ r"https?://[^\s]*-[^\s]*\.(com|net|org)/",
64
+ r"https?://\d+\.\d+\.\d+\.\d+",
65
+ r"bit\.ly|tinyurl|short\.link|t\.co",
66
+ r"click.*here|click.*below|click.*link"
67
+ ],
68
+ "impersonation": [
69
+ r"\b(paypal|amazon|netflix|apple|microsoft|google|bank)\b",
70
+ r"\bcustomer (service|support)\b", r"\bsecurity (team|department)\b"
71
+ ]
72
+ }
73
+
74
+ def detect_phishing_indicators(text: str) -> dict:
75
+ text_lower = text.lower()
76
+ detected = {cat: [] for cat in PHISHING_PATTERNS}
77
+ for category, patterns in PHISHING_PATTERNS.items():
78
+ for pattern in patterns:
79
+ matches = re.findall(pattern, text_lower, re.IGNORECASE)
80
+ if matches:
81
+ detected[category].extend(matches if isinstance(matches[0], str) else [m[0] for m in matches])
82
+ for category in detected:
83
+ detected[category] = list(set(detected[category]))
84
+ return detected
85
+
86
+ def calculate_phishing_score(indicators: dict) -> float:
87
+ weights = {"urgency": 0.25, "threat": 0.25, "credential_request": 0.20, "suspicious_links": 0.20, "impersonation": 0.10}
88
+ score = 0.0
89
+ for category, weight in weights.items():
90
+ if indicators[category]:
91
+ category_score = min(len(indicators[category]) * 0.4, 1.0)
92
+ score += weight * category_score
93
+ return min(score, 1.0)
94
+
95
+ def get_confidence_label(confidence: float) -> str:
96
+ """Determine confidence level descriptor based on score"""
97
+ if confidence >= 0.90:
98
+ return "high confidence"
99
+ elif confidence >= 0.70:
100
+ return "moderate confidence"
101
+ else:
102
+ return "low confidence"
103
+
104
+ def generate_explanation_with_flan(indicators: dict, label: str, confidence: float, email_text: str) -> str:
105
+ """Generate high-quality natural language explanation using FLAN-T5"""
106
+
107
+ # Get confidence level descriptor
108
+ confidence_level = get_confidence_label(confidence)
109
+
110
+ # Build detailed indicator analysis
111
+ indicator_details = []
112
+ urgency_count = len(indicators["urgency"])
113
+ threat_count = len(indicators["threat"])
114
+ cred_count = len(indicators["credential_request"])
115
+ link_count = len(indicators["suspicious_links"])
116
+ imperson_count = len(indicators["impersonation"])
117
+
118
+ if urgency_count > 0:
119
+ urgency_examples = ', '.join([f'"{item}"' for item in indicators['urgency'][:2]])
120
+ indicator_details.append(f"high urgency language ({urgency_count} instances: {urgency_examples})")
121
+ if threat_count > 0:
122
+ threat_examples = ', '.join([f'"{item}"' for item in indicators['threat'][:2]])
123
+ indicator_details.append(f"threatening tone ({threat_count} instances: {threat_examples})")
124
+ if cred_count > 0:
125
+ cred_examples = ', '.join([f'"{item}"' for item in indicators['credential_request'][:2]])
126
+ indicator_details.append(f"credential requests ({cred_count} instances: {cred_examples})")
127
+ if link_count > 0:
128
+ indicator_details.append(f"suspicious links ({link_count} detected)")
129
+ if imperson_count > 0:
130
+ brands = ', '.join(indicators['impersonation'][:2])
131
+ indicator_details.append(f"brand impersonation attempts ({brands})")
132
+
133
+ # Create sophisticated prompts based on label
134
+ if label == "PHISHING":
135
+ indicators_summary = "; ".join(indicator_details) if indicator_details else "general phishing patterns"
136
+
137
+ prompt = f"""You are a cybersecurity expert. Explain why this email is phishing:
138
+
139
+ Email sample: "{email_text[:250]}"
140
+
141
+ Detected threats: {indicators_summary}
142
+
143
+ Write a clear 2-sentence explanation that:
144
+ 1. States the classification with confidence level
145
+ 2. Describes specific malicious tactics (urgency, social engineering, credential harvesting, clickbait)
146
+ 3. Uses professional security terminology
147
+
148
+ Explanation:"""
149
+ else: # LEGITIMATE
150
+ safe_indicators = []
151
+ if not indicators["urgency"] and not indicators["threat"]:
152
+ safe_indicators.append("no urgency or threat language")
153
+ if not indicators["credential_request"]:
154
+ safe_indicators.append("no credential requests")
155
+ if not indicators["suspicious_links"]:
156
+ safe_indicators.append("no suspicious links")
157
+ if not indicators["impersonation"]:
158
+ safe_indicators.append("no brand impersonation")
159
+
160
+ safety_summary = ", ".join(safe_indicators) if safe_indicators else "standard communication patterns"
161
+
162
+ prompt = f"""You are a cybersecurity expert. Explain why this email is legitimate:
163
+
164
+ Email sample: "{email_text[:250]}"
165
+
166
+ Safety indicators: {safety_summary}
167
+
168
+ Write a clear 2-sentence explanation that:
169
+ 1. States the classification with confidence level
170
+ 2. Notes the absence of social-engineering cues, suspicious tokens, or phishing tactics
171
+ 3. Uses professional security terminology
172
+
173
+ Explanation:"""
174
+
175
+ try:
176
+ # Tokenize the prompt
177
+ inputs = gen_tokenizer(
178
+ prompt,
179
+ return_tensors="pt",
180
+ max_length=512,
181
+ truncation=True
182
+ )
183
+
184
+ # Generate explanation with optimized parameters
185
+ with torch.no_grad():
186
+ outputs = gen_model.generate(
187
+ inputs.input_ids,
188
+ max_length=180,
189
+ min_length=40,
190
+ num_beams=5,
191
+ length_penalty=1.2,
192
+ early_stopping=True,
193
+ temperature=0.8,
194
+ top_p=0.92,
195
+ do_sample=False,
196
+ no_repeat_ngram_size=3
197
+ )
198
+
199
+ # Decode the generated text
200
+ explanation = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
201
+
202
+ # Post-process: add confidence level prefix
203
+ explanation = f"The email was classified as {label} with {confidence_level} ({confidence:.2f}). {explanation}"
204
+
205
+ return explanation
206
+
207
+ except Exception as e:
208
+ print(f"FLAN-T5 generation error: {e}")
209
+ # Enhanced fallback explanations
210
+ if label == "PHISHING":
211
+ reasons = []
212
+ if indicators["urgency"]: reasons.append("uses high urgency tactics")
213
+ if indicators["threat"]: reasons.append("contains threatening language")
214
+ if indicators["credential_request"]: reasons.append("attempts credential harvesting")
215
+ if indicators["suspicious_links"]: reasons.append("includes clickbait keywords")
216
+
217
+ reason_text = " and ".join(reasons) if reasons else "exhibits fraudulent patterns"
218
+ return f"The email was classified as PHISHING with {confidence_level} ({confidence:.2f}). The email {reason_text} suggesting a social-engineering attempt to capture sensitive information."
219
+ else:
220
+ return f"The email was classified as LEGITIMATE with {confidence_level} ({confidence:.2f}). The message appears routine and contains no social-engineering cues or suspicious tokens."
221
+
222
+ @app.on_event("startup")
223
+ def load_models():
224
+ global tokenizer, model, gen_tokenizer, gen_model
225
+ base_path = os.path.dirname(os.path.abspath(__file__))
226
+ model_path = os.path.join(base_path, MODEL_DIR)
227
+ weights_path = os.path.join(model_path, WEIGHTS_NAME)
228
+ gc.collect()
229
+ try:
230
+ # Load DistilBERT for classification
231
+ tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
232
+ config = AutoConfig.from_pretrained(model_path, local_files_only=True)
233
+ base_model = DistilBertForSequenceClassification(config)
234
+ model = torch.quantization.quantize_dynamic(base_model, {torch.nn.Linear}, dtype=torch.qint8)
235
+ if os.path.exists(weights_path):
236
+ model.load_state_dict(torch.load(weights_path, map_location='cpu'))
237
+ model.eval()
238
+
239
+ # Load FLAN-T5 for explanation generation
240
+ print("Loading FLAN-T5 for explanation generation...")
241
+ gen_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small", legacy=False)
242
+ gen_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
243
+ gen_model.eval()
244
+ print("FLAN-T5 loaded successfully!")
245
+
246
+ except Exception as e:
247
+ print(f"Load Error: {e}")
248
+
249
+ def predictor(texts):
250
+ probs_list = []
251
+ for text in texts:
252
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
253
+ with torch.no_grad():
254
+ outputs = model(**inputs)
255
+ probs = torch.softmax(outputs.logits, dim=1).numpy()
256
+ probs_list.append(probs[0])
257
+ return np.array(probs_list)
258
+
259
+ def hybrid_predict(email_text: str) -> tuple:
260
+ ml_probs = predictor([email_text])[0]
261
+ indicators = detect_phishing_indicators(email_text)
262
+ rule_score = calculate_phishing_score(indicators)
263
+ # Combine scores based on your logic
264
+ if rule_score > 0.5: combined_prob = (ml_probs[1] * 0.3) + (rule_score * 0.7)
265
+ elif rule_score > 0.2: combined_prob = (ml_probs[1] * 0.5) + (rule_score * 0.5)
266
+ else: combined_prob = (ml_probs[1] * 0.7) + (rule_score * 0.3)
267
+
268
+ label = "PHISHING" if combined_prob >= 0.5 else "LEGITIMATE"
269
+ confidence = combined_prob if label == "PHISHING" else 1 - combined_prob
270
+ return label, confidence, indicators
271
+
272
+ @app.get("/", response_class=HTMLResponse)
273
+ async def index():
274
+ return """
275
+ <html>
276
+ <head><title>Robust Explainable Phishing Classification</title></head>
277
+ <body style="font-family: sans-serif; background: #f4f7f6; display: flex; justify-content: center; padding-top: 50px;">
278
+ <div style="background: white; padding: 40px; border-radius: 15px; box-shadow: 0 10px 30px rgba(0,0,0,0.1); width: 800px;">
279
+ <form action="/predict" method="post">
280
+ <textarea name="email_text" rows="10" style="width: 100%; padding: 10px;" placeholder="Paste email here..." required></textarea>
281
+ <button type="submit" style="width: 100%; background: #007bff; color: white; padding: 15px; margin-top: 10px; border: none; cursor: pointer; border-radius: 8px;">Analyze & Explain</button>
282
+ </form>
283
+ </div>
284
+ </body>
285
+ </html>
286
+ """
287
+
288
+ @app.post("/predict", response_class=HTMLResponse)
289
+ async def predict(email_text: str = Form(...)):
290
+ label, confidence, indicators = hybrid_predict(email_text)
291
+
292
+ # Generate unique token for this analysis
293
+ unique_token = str(uuid.uuid4())[:8].upper()
294
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
295
+
296
+ # Store analysis data in Firebase for reference
297
+ analysis_ref = db.reference('/analysis_records')
298
+ analysis_ref.child(unique_token).set({
299
+ 'timestamp': timestamp,
300
+ 'label': label,
301
+ 'confidence': float(confidence),
302
+ 'email_length': len(email_text)
303
+ })
304
+
305
+ # Get LIME explanation for technical keywords
306
+ try:
307
+ exp = explainer.explain_instance(email_text, predictor, num_features=6, num_samples=100)
308
+ keyword_str = ", ".join([word for word, weight in exp.as_list() if abs(weight) > 0.01])
309
+ except:
310
+ keyword_str = "analysis unavailable"
311
+
312
+ # Generate natural language explanation using FLAN-T5
313
+ clean_explanation = generate_explanation_with_flan(indicators, label, confidence, email_text)
314
+
315
+ color = "#dc3545" if label == "PHISHING" else "#28a745"
316
+
317
+ # HTML Result with Enhanced Feedback Form
318
+ return f"""
319
+ <div style="font-family: sans-serif; max-width: 900px; margin: auto; padding: 20px;">
320
+ <div style="background: {color}; color: white; padding: 20px; border-radius: 20px; text-align: center;">
321
+ <h1 style="margin: 0;">{label}</h1>
322
+ <p>Confidence: {confidence:.2%}</p>
323
+ </div>
324
+ <div style="background: white; padding: 30px; border-radius: 15px; margin-top: 30px; box-shadow: 0 5px 15px rgba(0,0,0,0.05);">
325
+ <h3>Security Analysis (FLAN-T5 Generated)</h3>
326
+ <p style="font-size: 1.1em;">{clean_explanation}</p>
327
+ <p><b>Technical Triggers (LIME):</b> {keyword_str}</p>
328
+
329
+ <hr style="margin: 40px 0;">
330
+
331
+ <h3>Help Us Improve (XAI Evaluation)</h3>
332
+ <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; margin-bottom: 20px;">
333
+ <p style="margin: 5px 0;"><b>Analysis Token:</b> <span style="font-family: monospace; font-size: 1.2em; color: {color}; font-weight: bold;">{unique_token}</span></p>
334
+ <p style="margin: 5px 0; font-size: 0.9em; color: #666;">Please save this token for your records</p>
335
+ </div>
336
+
337
+ <form action="/feedback" method="post">
338
+ <input type="hidden" name="token" value="{unique_token}">
339
+
340
+ <table style="width: 100%; border-collapse: collapse; margin: 20px 0;">
341
+ <thead>
342
+ <tr style="background: #f1f3f5;">
343
+ <th style="padding: 15px; text-align: left; border: 1px solid #dee2e6; width: 50%;">Evaluation Criteria</th>
344
+ <th style="padding: 10px; text-align: center; border: 1px solid #dee2e6; width: 10%;">1</th>
345
+ <th style="padding: 10px; text-align: center; border: 1px solid #dee2e6; width: 10%;">2</th>
346
+ <th style="padding: 10px; text-align: center; border: 1px solid #dee2e6; width: 10%;">3</th>
347
+ <th style="padding: 10px; text-align: center; border: 1px solid #dee2e6; width: 10%;">4</th>
348
+ <th style="padding: 10px; text-align: center; border: 1px solid #dee2e6; width: 10%;">5</th>
349
+ </tr>
350
+ </thead>
351
+ <tbody>
352
+ <tr>
353
+ <td style="padding: 15px; border: 1px solid #dee2e6; background: #fff;">
354
+ <b>1. Decision Clarity:</b><br>
355
+ <span style="font-size: 0.9em; color: #666;">The explanation helped me understand the result.</span>
356
+ </td>
357
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6;">
358
+ <input type="radio" name="understanding" value="1" required style="width: 20px; height: 20px; cursor: pointer;">
359
+ </td>
360
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6;">
361
+ <input type="radio" name="understanding" value="2" style="width: 20px; height: 20px; cursor: pointer;">
362
+ </td>
363
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6;">
364
+ <input type="radio" name="understanding" value="3" style="width: 20px; height: 20px; cursor: pointer;">
365
+ </td>
366
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6;">
367
+ <input type="radio" name="understanding" value="4" style="width: 20px; height: 20px; cursor: pointer;">
368
+ </td>
369
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6;">
370
+ <input type="radio" name="understanding" value="5" style="width: 20px; height: 20px; cursor: pointer;">
371
+ </td>
372
+ </tr>
373
+ <tr>
374
+ <td style="padding: 15px; border: 1px solid #dee2e6; background: #f8f9fa;">
375
+ <b>2. Information Focus:</b><br>
376
+ <span style="font-size: 0.9em; color: #666;">The explanation was concise and essential.</span>
377
+ </td>
378
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6; background: #f8f9fa;">
379
+ <input type="radio" name="clarity" value="1" required style="width: 20px; height: 20px; cursor: pointer;">
380
+ </td>
381
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6; background: #f8f9fa;">
382
+ <input type="radio" name="clarity" value="2" style="width: 20px; height: 20px; cursor: pointer;">
383
+ </td>
384
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6; background: #f8f9fa;">
385
+ <input type="radio" name="clarity" value="3" style="width: 20px; height: 20px; cursor: pointer;">
386
+ </td>
387
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6; background: #f8f9fa;">
388
+ <input type="radio" name="clarity" value="4" style="width: 20px; height: 20px; cursor: pointer;">
389
+ </td>
390
+ <td style="padding: 10px; text-align: center; border: 1px solid #dee2e6; background: #f8f9fa;">
391
+ <input type="radio" name="clarity" value="5" style="width: 20px; height: 20px; cursor: pointer;">
392
+ </td>
393
+ </tr>
394
+ </tbody>
395
+ </table>
396
+
397
+ <p style="font-size: 0.85em; color: #666; text-align: center; margin-top: 10px;">
398
+ Rating Scale: 1 = Strongly Disagree | 5 = Strongly Agree
399
+ </p>
400
+
401
+ <button type="submit" style="width: 100%; background: #28a745; color: white; padding: 12px; border: none; border-radius: 5px; cursor: pointer; font-size: 1.05em; margin-top: 15px;">Submit Feedback to Firebase</button>
402
+ </form>
403
+ </div>
404
+
405
+ <div style="text-align: center; margin-top: 20px;">
406
+ <a href="/" style="color: #007bff; text-decoration: none;">← Analyze Another Email</a>
407
+ </div>
408
+ </div>
409
+ """
410
+
411
+ @app.post("/feedback")
412
+ async def save_feedback(token: str = Form(...), understanding: int = Form(...), clarity: int = Form(...)):
413
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
414
+
415
+ # Save feedback with token reference
416
+ feedback_ref = db.reference('/xai_feedback')
417
+ feedback_ref.push({
418
+ 'token': token,
419
+ 'understanding': understanding,
420
+ 'clarity': clarity,
421
+ 'timestamp': timestamp
422
+ })
423
+
424
+ return HTMLResponse(f"""
425
+ <div style="font-family: sans-serif; max-width: 800px; margin: 100px auto; padding: 40px; background: white; border-radius: 15px; box-shadow: 0 10px 30px rgba(0,0,0,0.1); text-align: center;">
426
+ <h2 style="color: #28a745;">Feedback Received!</h2>
427
+ <p>Thank you for contributing to our research.</p>
428
+ <p><b>Your Token:</b> <span style="font-family: monospace; font-size: 1.2em; color: #007bff;">{token}</span></p>
429
+ <a href="/" style="display: inline-block; margin-top: 20px; padding: 12px 30px; background: #007bff; color: white; text-decoration: none; border-radius: 5px;">Go back to Home</a>
430
+ </div>
431
+ """)
432
+
433
+ if __name__ == "__main__":
434
+ import uvicorn
435
+ uvicorn.run(app, host="127.0.0.1", port=8000)
phishing_model_optimized/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForMaskedLM"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "dtype": "float32",
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "transformers_version": "4.57.3",
23
+ "vocab_size": 30522
24
+ }
phishing_model_optimized/quantized_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4baee6ffe5da36849e00cd0180354af5d72d80968cf85fb2efb809b4ea80ef2
3
+ size 138716071
phishing_model_optimized/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
phishing_model_optimized/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
phishing_model_optimized/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
phishing_model_optimized/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ transformers
5
+ firebase-admin
6
+ lime
7
+ scikit-learn
8
+ numpy
9
+ torch --index-url https://download.pytorch.org/whl/cpu