Perth0603 commited on
Commit
d384c72
·
verified ·
1 Parent(s): e23e668

Upload 4 files

Browse files
Files changed (2) hide show
  1. README.md +44 -41
  2. app.py +25 -12
README.md CHANGED
@@ -1,41 +1,44 @@
1
- ---
2
- title: PhishWatch Proxy
3
- emoji: 🛡️
4
- sdk: docker
5
- ---
6
-
7
- # Hugging Face Space - Phishing Text Classifier (Docker + FastAPI)
8
-
9
- This Space exposes two endpoints so the Flutter app can call them reliably:
10
-
11
- - `/predict` for text/email/SMS classification via Transformers
12
- - `/predict-url` for URL classification via your scikit-learn Random Forest model
13
-
14
- ## Files
15
- - Dockerfile - builds a small FastAPI server image
16
- - app.py - FastAPI app that loads the model and returns `{ label, score }`.
17
- - requirements.txt - Python dependencies.
18
-
19
- ## How to deploy
20
- 1. Create a new Space on Hugging Face (type: Docker).
21
- 2. Upload the contents of this `hf_space/` folder to the Space root (including Dockerfile).
22
- 3. In Space Settings → Variables, add:
23
- - MODEL_ID = Perth0603/phishing-email-mobilebert
24
- - URL_REPO = Perth0603/Random-Forest-Model-for-PhishingDetection
25
- - URL_FILENAME = url_rf_model.joblib (set to your artifact filename)
26
- 4. Wait for the Space to build and become green. Test:
27
- - GET `/` should return `{ status: ok, model: ... }`
28
- - POST `/predict` with `{ "inputs": "Win an iPhone! Click here" }`
29
- - POST `/predict-url` with `{ "url": "https://example.com/login" }`
30
-
31
- ## Flutter app config
32
- Set the Space URL in your env file so the app targets the Space instead of the Hosted Inference API:
33
-
34
- ```
35
- {"HF_SPACE_URL":"https://<your-space>.hf.space"}
36
- ```
37
-
38
- Run the app:
39
- ```
40
- flutter run --dart-define-from-file=hf.env.json
41
- ```
 
 
 
 
1
+ ---
2
+ title: PhishWatch Proxy
3
+ emoji: 🛡️
4
+ sdk: docker
5
+ ---
6
+
7
+ # Hugging Face Space - Phishing Text Classifier (Docker + FastAPI)
8
+
9
+ This Space exposes two endpoints so the Flutter app can call them reliably:
10
+
11
+ - `/predict` for text/email/SMS classification via Transformers. Returns `{ label, score }` where `score` is the confidence for the predicted label.
12
+ - `/predict-url` for URL classification via your URL model. Returns `{ label, score, phishing_probability, backend, threshold }` where:
13
+ - `phishing_probability` is always the raw probability of phishing (0..1)
14
+ - `label` is `PHISH` when `phishing_probability >= threshold`, else `LEGIT`
15
+ - `score` is the confidence for the predicted label (for `LEGIT`, `score = 1 - phishing_probability`), which lets the app show "Safe Confidence" for legitimate URLs
16
+
17
+ ## Files
18
+ - Dockerfile - builds a small FastAPI server image
19
+ - app.py - FastAPI app that loads the model and returns normalized responses as above.
20
+ - requirements.txt - Python dependencies.
21
+
22
+ ## How to deploy
23
+ 1. Create a new Space on Hugging Face (type: Docker).
24
+ 2. Upload the contents of this `hf_space/` folder to the Space root (including Dockerfile).
25
+ 3. In Space Settings Variables, add:
26
+ - MODEL_ID = Perth0603/phishing-email-mobilebert
27
+ - URL_REPO = Perth0603/Random-Forest-Model-for-PhishingDetection
28
+ - URL_FILENAME = url_rf_model.joblib (set to your artifact filename)
29
+ 4. Wait for the Space to build and become green. Test:
30
+ - GET `/` should return `{ status: ok, model: ... }`
31
+ - POST `/predict` with `{ "inputs": "Win an iPhone! Click here" }`
32
+ - POST `/predict-url` with `{ "url": "https://example.com/login" }`
33
+
34
+ ## Flutter app config
35
+ Set the Space URL in your env file so the app targets the Space instead of the Hosted Inference API:
36
+
37
+ ```
38
+ {"HF_SPACE_URL":"https://<your-space>.hf.space"}
39
+ ```
40
+
41
+ Run the app:
42
+ ```
43
+ flutter run --dart-define-from-file=hf.env.json
44
+ ```
app.py CHANGED
@@ -138,40 +138,53 @@ def predict_url(payload: PredictUrlPayload):
138
 
139
  row = pd.DataFrame({url_col: [payload.url]})
140
  feats = _engineer_features(row, url_col, feature_cols)
141
-
142
- score = None
 
143
  label = None
144
 
145
  if isinstance(model_type, str) and model_type == 'xgboost_bst':
146
  if xgb is None:
147
  raise RuntimeError("xgboost is not installed but required for this model bundle.")
148
  dmat = xgb.DMatrix(feats)
149
- proba = float(model.predict(dmat)[0])
150
- score = proba
151
- label = "PHISH" if score >= 0.5 else "LEGIT"
152
  elif hasattr(model, "predict_proba"):
153
  proba = model.predict_proba(feats)[0]
154
  if len(proba) == 2:
155
- score = float(proba[1])
156
- label = "PHISH" if score >= 0.5 else "LEGIT"
157
  else:
158
  max_idx = int(np.argmax(proba))
159
- score = float(proba[max_idx])
 
160
  label = "PHISH" if max_idx == 1 else "LEGIT"
161
  else:
162
  pred = model.predict(feats)[0]
163
  if isinstance(pred, (int, float, np.integer, np.floating)):
164
  label = "PHISH" if int(pred) == 1 else "LEGIT"
165
- score = 1.0 if label == "PHISH" else 0.0
166
  else:
167
  up = str(pred).strip().upper()
168
  if up in ("PHISH", "PHISHING", "MALICIOUS"):
169
- label, score = "PHISH", 1.0
170
  else:
171
- label, score = "LEGIT", 0.0
172
  except Exception as e:
173
  return JSONResponse(status_code=500, content={"error": str(e)})
174
 
175
- return {"label": label, "score": float(score)}
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
 
 
138
 
139
  row = pd.DataFrame({url_col: [payload.url]})
140
  feats = _engineer_features(row, url_col, feature_cols)
141
+ # We standardize on producing a phishing probability first, then
142
+ # derive label and a user-facing confidence for the predicted label.
143
+ phish_proba: float | None = None
144
  label = None
145
 
146
  if isinstance(model_type, str) and model_type == 'xgboost_bst':
147
  if xgb is None:
148
  raise RuntimeError("xgboost is not installed but required for this model bundle.")
149
  dmat = xgb.DMatrix(feats)
150
+ phish_proba = float(model.predict(dmat)[0])
151
+ label = "PHISH" if phish_proba >= 0.5 else "LEGIT"
 
152
  elif hasattr(model, "predict_proba"):
153
  proba = model.predict_proba(feats)[0]
154
  if len(proba) == 2:
155
+ phish_proba = float(proba[1])
156
+ label = "PHISH" if phish_proba >= 0.5 else "LEGIT"
157
  else:
158
  max_idx = int(np.argmax(proba))
159
+ # Best-effort: treat index 1 as PHISH if present
160
+ phish_proba = float(proba[1]) if len(proba) > 1 else float(proba[max_idx])
161
  label = "PHISH" if max_idx == 1 else "LEGIT"
162
  else:
163
  pred = model.predict(feats)[0]
164
  if isinstance(pred, (int, float, np.integer, np.floating)):
165
  label = "PHISH" if int(pred) == 1 else "LEGIT"
166
+ phish_proba = 1.0 if label == "PHISH" else 0.0
167
  else:
168
  up = str(pred).strip().upper()
169
  if up in ("PHISH", "PHISHING", "MALICIOUS"):
170
+ label, phish_proba = "PHISH", 1.0
171
  else:
172
+ label, phish_proba = "LEGIT", 0.0
173
  except Exception as e:
174
  return JSONResponse(status_code=500, content={"error": str(e)})
175
 
176
+ # Ensure we have a probability value
177
+ phish_proba = float(phish_proba or 0.0)
178
+ # Display score should be the confidence of the predicted label, to match the
179
+ # text endpoint and the app UI which expects Safe Confidence for LEGIT.
180
+ display_score = phish_proba if label == "PHISH" else (1.0 - phish_proba)
181
+
182
+ return {
183
+ "label": label,
184
+ "score": float(display_score),
185
+ "phishing_probability": phish_proba,
186
+ "backend": str(model_type),
187
+ "threshold": 0.5,
188
+ }
189
 
190