Spaces:

Perth0603
/

phishwatch-proxy

Sleeping

App Files Files Community

Perth0603 commited on Nov 3, 2025

Commit

9b309b6

verified ·

1 Parent(s): 2b92082

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -110

app.py CHANGED Viewed

@@ -13,18 +13,7 @@ MODEL_ID = (
     or "Perth0603/phishing-email-mobilebert"
 )
-# =========================
-# 数据集 0/1 映射的可配置开关
-# =========================
-# 如果你的 CSV 中 1=PHISH，0=LEGIT（常见约定），保持默认即可
-# 如果你的 CSV 中 0=PHISH，1=LEGIT，请把 DATASET_PHISH_VALUE 设为 "0"
-DATASET_PHISH_VALUE = (os.environ.get("DATASET_PHISH_VALUE") or "1").strip()
-if DATASET_PHISH_VALUE not in {"0", "1"}:
-    DATASET_PHISH_VALUE = "1"  # 容错：非法值时回退到默认
-DATASET_LEGIT_VALUE = "0" if DATASET_PHISH_VALUE == "1" else "1"
-app = FastAPI(title="Phishing Text Classifier", version="1.3.0")
 class PredictPayload(BaseModel):
@@ -37,7 +26,7 @@ class BatchPredictPayload(BaseModel):
 class LabeledText(BaseModel):
     text: str
-    label: Optional[str] = None  # optional ground truth for quick eval (accepts "0"/"1" or text)
 class EvalPayload(BaseModel):
@@ -49,47 +38,25 @@ _model = None
 _device = "cpu"
 # Cached normalized mapping/meta
-_IDX_PHISH = None           # model output index that corresponds to PHISH
-_IDX_LEGIT = None           # model output index that corresponds to LEGIT
 _NORM_LABELS_BY_IDX = None  # normalized labels ordered by model indices
 def _normalize_label_text_only(txt: str) -> str:
     """
-    仅做文字标准化，不解读 "0"/"1"。
-    用于模型 id2label -> 统一为 PHISH/LEGIT。
     """
     t = (str(txt) if txt is not None else "").strip().upper()
     if t in ("PHISHING", "PHISH", "SPAM"):
         return "PHISH"
     if t in ("LEGIT", "LEGITIMATE", "SAFE", "HAM"):
         return "LEGIT"
     return t
-def _normalize_label_from_dataset(txt: str) -> Optional[str]:
-    """
-    把来自 CSV 的 "0"/"1" 或文字标签，统一成 PHISH/LEGIT。
-    这里会按 DATASET_PHISH_VALUE/LEGIT_VALUE 来解释 "0"/"1"。
-    返回 None 表示无法识别（比如空）。
-    """
-    if txt is None:
-        return None
-    t = str(txt).strip().upper()
-    if t in ("0", "1"):
-        if t == DATASET_PHISH_VALUE:
-            return "PHISH"
-        else:
-            return "LEGIT"
-    # 文字也支持
-    t2 = _normalize_label_text_only(t)
-    if t2 in ("PHISH", "LEGIT"):
-        return t2
-    return None
 def _load_model():
-    global _tokenizer, _model, _device, _IDX_PHISH, _IDX_LEGIT, _NORM_LABELS_BY_IDX
     if _tokenizer is None or _model is None:
         _device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -98,36 +65,29 @@ def _load_model():
         _model.to(_device)
         _model.eval()  # important: disable dropout etc.
-        # Warm-up
         with torch.no_grad():
             _ = _model(
                 **_tokenizer(["warm up"], return_tensors="pt", padding=True, truncation=True, max_length=512)
                 .to(_device)
             ).logits
-        # 读取并标准化模型标签（按索引顺序）
         id2label = getattr(_model.config, "id2label", {}) or {}
         num_labels = int(getattr(_model.config, "num_labels", 0) or 0)
         _NORM_LABELS_BY_IDX = [_normalize_label_text_only(id2label.get(i, f"LABEL_{i}")) for i in range(num_labels)]
-        # 找出 PHISH/LEGIT 在 logits 中的索引
-        try:
-            _IDX_PHISH = _NORM_LABELS_BY_IDX.index("PHISH")
-        except ValueError:
-            _IDX_PHISH = None
-        try:
-            _IDX_LEGIT = _NORM_LABELS_BY_IDX.index("LEGIT")
-        except ValueError:
-            _IDX_LEGIT = None
-        # 若模型没提供可识别的标签，但只有 2 类，给出安全的保守默认（不强行假设）
-        # 这里不自动假设 0/1 的含义，避免再次反转；保留 None，让下游概率照常返回。
-        # 你也可以按需启用：
-        # if _IDX_PHISH is None and _IDX_LEGIT is None and num_labels == 2:
-        #     _IDX_LEGIT, _IDX_PHISH = 0, 1
 def _predict_texts(texts: List[str]) -> List[Dict]:
     _load_model()
     if not texts:
         return []
@@ -148,39 +108,33 @@ def _predict_texts(texts: List[str]) -> List[Dict]:
     # Use the model’s own mapping
     id2label = getattr(_model.config, "id2label", None) or {}
-    labels_by_idx = [_normalize_label_text_only(id2label.get(i, f"LABEL_{i}")) for i in range(probs.shape[-1])]
     outputs: List[Dict] = []
     for i in range(probs.shape[0]):
         p = probs[i]
         idx = int(torch.argmax(p).item())
-        norm_label = labels_by_idx[idx]  # 已标准化为 PHISH/LEGIT 或原样回传
-        # 构建（标准化后的）各类概率映射
         prob_map: Dict[str, float] = {}
-        for j, lbl in enumerate(labels_by_idx):
-            key = lbl if lbl in ("PHISH", "LEGIT") else f"CLASS_{j}"
             prob_map[key] = float(p[j].item())
-        # ——把预测映射回你的 CSV 0/1——
-        # 只有在我们确实知道哪个 index 是 PHISH / LEGIT 时才赋值；否则返回 None，避免误导
-        ds_label: Optional[int] = None
-        probs_by_dataset: Optional[Dict[str, float]] = None
-        if _IDX_PHISH is not None and _IDX_LEGIT is not None:
-            ds_label = int(DATASET_PHISH_VALUE) if idx == _IDX_PHISH else int(DATASET_LEGIT_VALUE)
-            probs_by_dataset = {
-                DATASET_PHISH_VALUE: float(p[_IDX_PHISH].item()),   # 数据集里代表 PHISH 的数值（"0" 或 "1"）
-                DATASET_LEGIT_VALUE: float(p[_IDX_LEGIT].item()),   # 数据集里代表 LEGIT 的数值
-            }
         outputs.append(
             {
-                "label": norm_label if norm_label in ("PHISH", "LEGIT") else norm_label,  # 文字结果
-                "score": float(p[idx].item()),       # max class probability
-                "probs": prob_map,                   # 每类概率（键为 PHISH/LEGIT 或 CLASS_k）
-                "predicted_index": idx,              # 模型 argmax 索引
-                "predicted_dataset_label": ds_label, # 用你的数据集 0/1 表示的预测（对齐到 DATASET_*_VALUE）
-                "probs_by_dataset_label": probs_by_dataset,
             }
         )
@@ -189,13 +143,11 @@ def _predict_texts(texts: List[str]) -> List[Dict]:
 @app.get("/")
 def root():
     return {
         "status": "ok",
         "model": MODEL_ID,
-        "dataset_mapping": {
-            "PHISH_VALUE": DATASET_PHISH_VALUE,
-            "LEGIT_VALUE": DATASET_LEGIT_VALUE,
-        },
     }
@@ -208,12 +160,6 @@ def debug_labels():
         "num_labels": int(getattr(_model.config, "num_labels", 0)),
         "device": _device,
         "norm_labels_by_idx": _NORM_LABELS_BY_IDX,
-        "idx_phish": _IDX_PHISH,
-        "idx_legit": _IDX_LEGIT,
-        "dataset_mapping": {
-            "PHISH_VALUE": DATASET_PHISH_VALUE,
-            "LEGIT_VALUE": DATASET_LEGIT_VALUE,
-        },
     }
@@ -238,21 +184,12 @@ def predict_batch(payload: BatchPredictPayload):
 def evaluate(payload: EvalPayload):
     """
     Quick on-the-spot test with provided labeled samples.
-    Request body:
-    {
-      "samples": [
-        {"text": "Your parcel is held...", "label": "PHISH"},  # or "0"/"1"（按你的数据集约定）
-        {"text": "Lunch at 12?", "label": "LEGIT"}             # or "0"/"1"
-      ]
-    }
-    Returns accuracy and per-class counts (labels normalized to PHISH/LEGIT).
     """
     try:
         texts = [s.text for s in payload.samples]
-        # 这里用数据集映射把 "0"/"1" 转成人类可读的 PHISH/LEGIT
-        gts = [_normalize_label_from_dataset(s.label) if s.label is not None else None for s in payload.samples]
         preds = _predict_texts(texts)
         total = len(preds)
@@ -260,8 +197,8 @@ def evaluate(payload: EvalPayload):
         per_class: Dict[str, Dict[str, int]] = {}
         for gt, pr in zip(gts, preds):
-            pred_label = pr["label"] if pr["label"] in ("PHISH", "LEGIT") else None
-            if gt is not None and pred_label is not None:
                 correct += int(gt == pred_label)
                 per_class.setdefault(gt, {"tp": 0, "count": 0})
                 per_class[gt]["count"] += 1
@@ -269,18 +206,13 @@ def evaluate(payload: EvalPayload):
                     per_class[gt]["tp"] += 1
         has_gts = any(gt is not None for gt in gts)
-        denom = sum(1 for gt in gts if gt is not None)
-        acc = (correct / denom) if (has_gts and denom > 0) else None
         return {
-            "accuracy": acc,            # None if no ground truths provided
             "total": total,
             "predictions": preds,
             "per_class": per_class,
-            "dataset_mapping": {
-                "PHISH_VALUE": DATASET_PHISH_VALUE,
-                "LEGIT_VALUE": DATASET_LEGIT_VALUE,
-            },
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Evaluation error: {e}")

     or "Perth0603/phishing-email-mobilebert"
 )
+app = FastAPI(title="Phishing Text Classifier (Model-Authoritative)", version="1.0.0")
 class PredictPayload(BaseModel):
 class LabeledText(BaseModel):
     text: str
+    label: Optional[str] = None  # optional ground truth for quick eval (accepts text)
 class EvalPayload(BaseModel):
 _device = "cpu"
 # Cached normalized mapping/meta
 _NORM_LABELS_BY_IDX = None  # normalized labels ordered by model indices
 def _normalize_label_text_only(txt: str) -> str:
     """
+    Normalize model label text to PHISH/LEGIT when possible.
+    If unfamiliar, return the uppercased original token.
     """
     t = (str(txt) if txt is not None else "").strip().upper()
     if t in ("PHISHING", "PHISH", "SPAM"):
         return "PHISH"
     if t in ("LEGIT", "LEGITIMATE", "SAFE", "HAM"):
         return "LEGIT"
+    # keep other label names as-is (uppercased) so we don't force an incorrect mapping
     return t
 def _load_model():
+    global _tokenizer, _model, _device, _NORM_LABELS_BY_IDX
     if _tokenizer is None or _model is None:
         _device = "cuda" if torch.cuda.is_available() else "cpu"
         _model.to(_device)
         _model.eval()  # important: disable dropout etc.
+        # Warm-up (silent)
         with torch.no_grad():
             _ = _model(
                 **_tokenizer(["warm up"], return_tensors="pt", padding=True, truncation=True, max_length=512)
                 .to(_device)
             ).logits
+        # Read and normalize model labels (by index)
         id2label = getattr(_model.config, "id2label", {}) or {}
         num_labels = int(getattr(_model.config, "num_labels", 0) or 0)
         _NORM_LABELS_BY_IDX = [_normalize_label_text_only(id2label.get(i, f"LABEL_{i}")) for i in range(num_labels)]
 def _predict_texts(texts: List[str]) -> List[Dict]:
+    """
+    Predict and return strictly model-authoritative outputs:
+    - label: normalized model label (PHISH/LEGIT or other model label uppercased)
+    - raw_label: original id2label string from model.config
+    - is_phish: boolean derived from normalized label (True if normalized == "PHISH")
+    - score: probability of predicted class
+    - probs: dict of normalized label -> probability (or CLASS_i keys if unknown)
+    - predicted_index: argmax index
+    """
     _load_model()
     if not texts:
         return []
     # Use the model’s own mapping
     id2label = getattr(_model.config, "id2label", None) or {}
+    labels_by_idx_raw = [id2label.get(i, f"LABEL_{i}") for i in range(probs.shape[-1])]
+    # normalized labels where possible
+    labels_by_idx_norm = [_normalize_label_text_only(lbl) for lbl in labels_by_idx_raw]
     outputs: List[Dict] = []
     for i in range(probs.shape[0]):
         p = probs[i]
         idx = int(torch.argmax(p).item())
+        raw_label = labels_by_idx_raw[idx]
+        norm_label = labels_by_idx_norm[idx]  # normalized where possible
+        # Build probability map keyed by normalized labels when available,
+        # otherwise fallback to CLASS_i keys to avoid collision
         prob_map: Dict[str, float] = {}
+        for j, lbl_norm in enumerate(labels_by_idx_norm):
+            key = lbl_norm if lbl_norm in ("PHISH", "LEGIT") else f"CLASS_{j}"
             prob_map[key] = float(p[j].item())
         outputs.append(
             {
+                "label": norm_label,                    # authoritative label (model-driven, normalized)
+                "raw_label": raw_label,                 # original model id2label value
+                "is_phish": True if norm_label == "PHISH" else False,
+                "score": float(p[idx].item()),          # probability of predicted class
+                "probs": prob_map,                      # per-class probabilities (keys normalized or CLASS_i)
+                "predicted_index": idx,
             }
         )
 @app.get("/")
 def root():
+    _load_model()
     return {
         "status": "ok",
         "model": MODEL_ID,
+        "note": "This service returns predictions exactly as the model decides (label derived from model.config.id2label). Frontend should use `label` or `is_phish` as authority."
     }
         "num_labels": int(getattr(_model.config, "num_labels", 0)),
         "device": _device,
         "norm_labels_by_idx": _NORM_LABELS_BY_IDX,
     }
 def evaluate(payload: EvalPayload):
     """
     Quick on-the-spot test with provided labeled samples.
+    The provided labels are interpreted as text labels (PHISH/LEGIT/etc.) — evaluation is done
+    by comparing normalized GT text to model's normalized prediction (no 0/1 dataset mapping applied).
     """
     try:
         texts = [s.text for s in payload.samples]
+        gts = [(_normalize_label_text_only(s.label) if s.label is not None else None) for s in payload.samples]
         preds = _predict_texts(texts)
         total = len(preds)
         per_class: Dict[str, Dict[str, int]] = {}
         for gt, pr in zip(gts, preds):
+            pred_label = pr["label"]
+            if gt is not None:
                 correct += int(gt == pred_label)
                 per_class.setdefault(gt, {"tp": 0, "count": 0})
                 per_class[gt]["count"] += 1
                     per_class[gt]["tp"] += 1
         has_gts = any(gt is not None for gt in gts)
+        acc = (correct / sum(1 for gt in gts if gt is not None)) if has_gts else None
         return {
+            "accuracy": acc,
             "total": total,
             "predictions": preds,
             "per_class": per_class,
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Evaluation error: {e}")