Spaces:

Perth0603
/

phishwatch-proxy

Sleeping

App Files Files Community

Perth0603 commited on Nov 3, 2025

Commit

72eb3f5

verified ·

1 Parent(s): 113b42d

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -1

app.py CHANGED Viewed

@@ -123,4 +123,122 @@ def _predict_texts(texts: List[str]) -> List[Dict]:
         norm_label = _normalize_label(raw_label)
         # Also expose per-label probabilities (normalized names where possible)
-        prob_map = {_normalize_label(labels_by

         norm_label = _normalize_label(raw_label)
         # Also expose per-label probabilities (normalized names where possible)
+        prob_map = {_normalize_label(labels_by_idx[j]): float(p[j].item()) for j in range(len(labels_by_idx))}
+        # Map to your dataset convention: PHISH=1, LEGIT=0
+        ds_label = None
+        if _IDX_PHISH is not None and _IDX_LEGIT is not None:
+            if idx == _IDX_PHISH:
+                ds_label = 1
+            elif idx == _IDX_LEGIT:
+                ds_label = 0
+        # Per-dataset-label probabilities when both indices are known
+        probs_by_dataset = None
+        if _IDX_PHISH is not None and _IDX_LEGIT is not None:
+            probs_by_dataset = {
+                "1": float(p[_IDX_PHISH].item()),  # PHISH
+                "0": float(p[_IDX_LEGIT].item()),  # LEGIT
+            }
+        outputs.append(
+            {
+                "label": norm_label,                 # normalized (e.g., PHISH/LEGIT)
+                "raw_label": raw_label,              # from model.config.id2label
+                "score": float(p[idx].item()),       # max class probability
+                "probs": prob_map,                   # dict of normalized label -> probability
+                "predicted_index": idx,              # model argmax index
+                "predicted_dataset_label": ds_label, # 1 for PHISH, 0 for LEGIT (your convention)
+                "probs_by_dataset_label": probs_by_dataset,
+            }
+        )
+    return outputs
+@app.get("/")
+def root():
+    return {"status": "ok", "model": MODEL_ID}
+@app.get("/debug/labels")
+def debug_labels():
+    _load_model()
+    return {
+        "id2label": getattr(_model.config, "id2label", {}),
+        "label2id": getattr(_model.config, "label2id", {}),
+        "num_labels": int(getattr(_model.config, "num_labels", 0)),
+        "device": _device,
+        "norm_labels_by_idx": _NORM_LABELS_BY_IDX,
+        "idx_phish": _IDX_PHISH,
+        "idx_legit": _IDX_LEGIT,
+    }
+@app.post("/predict")
+def predict(payload: PredictPayload):
+    try:
+        res = _predict_texts([payload.inputs])
+        return res[0]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Prediction error: {e}")
+@app.post("/predict-batch")
+def predict_batch(payload: BatchPredictPayload):
+    try:
+        return _predict_texts(payload.inputs)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Batch prediction error: {e}")
+@app.post("/evaluate")
+def evaluate(payload: EvalPayload):
+    """
+    Quick on-the-spot test with provided labeled samples.
+    Request body:
+    {
+      "samples": [
+        {"text": "Your parcel is held...", "label": "PHISH"},  # or "1"
+        {"text": "Lunch at 12?", "label": "LEGIT"}             # or "0"
+      ]
+    }
+    Returns accuracy and per-class counts.
+    """
+    try:
+        texts = [s.text for s in payload.samples]
+        gts = [(_normalize_label(s.label) if s.label is not None else None) for s in payload.samples]
+        preds = _predict_texts(texts)
+        total = len(preds)
+        correct = 0
+        per_class: Dict[str, Dict[str, int]] = {}
+        for gt, pr in zip(gts, preds):
+            pred_label = pr["label"]
+            if gt is not None:
+                correct += int(gt == pred_label)
+                per_class.setdefault(gt, {"tp": 0, "count": 0})
+                per_class[gt]["count"] += 1
+                if gt == pred_label:
+                    per_class[gt]["tp"] += 1
+        has_gts = any(gt is not None for gt in gts)
+        acc = (correct / sum(1 for gt in gts if gt is not None)) if has_gts else None
+        return {
+            "accuracy": acc,            # None if no ground truths provided
+            "total": total,
+            "predictions": preds,
+            "per_class": per_class,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Evaluation error: {e}")
+if __name__ == "__main__":
+    # Run:  uvicorn app:app --host 0.0.0.0 --port 8000 --reload
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)