Round thresholds to 3 decimal, recompute recall/FPR
Browse files
README.md
CHANGED
|
@@ -29,7 +29,7 @@ flag = (pi >= pi_thresh) OR (pi >= pi_lower_bound AND toxic >= toxic_thresh)
|
|
| 29 |
|
| 30 |
| Parameter | Value |
|
| 31 |
|:----------|------:|
|
| 32 |
-
| `pi_thresh` | 0.
|
| 33 |
| `pi_lower_bound` | 0.50 |
|
| 34 |
| `toxic_thresh` | 1.00 |
|
| 35 |
|
|
@@ -37,14 +37,14 @@ flag = (pi >= pi_thresh) OR (pi >= pi_lower_bound AND toxic >= toxic_thresh)
|
|
| 37 |
|
| 38 |
| Dataset | Recall | FPR |
|
| 39 |
|:--------|-------:|----:|
|
| 40 |
-
| test (262K) | 43.
|
| 41 |
-
| customer_test (1.4M) | 42.
|
| 42 |
|
| 43 |
### Thresholds at 0.5% FPR
|
| 44 |
|
| 45 |
| Parameter | Value |
|
| 46 |
|:----------|------:|
|
| 47 |
-
| `pi_thresh` | 0.
|
| 48 |
| `pi_lower_bound` | 0.50 |
|
| 49 |
| `toxic_thresh` | 0.95 |
|
| 50 |
|
|
@@ -57,7 +57,7 @@ flag = (pi >= pi_thresh) OR (pi >= pi_lower_bound AND toxic >= toxic_thresh)
|
|
| 57 |
|
| 58 |
| Parameter | Value |
|
| 59 |
|:----------|------:|
|
| 60 |
-
| `pi_thresh` | 0.
|
| 61 |
| `pi_lower_bound` | 0.50 |
|
| 62 |
| `toxic_thresh` | 0.90 |
|
| 63 |
|
|
@@ -72,7 +72,7 @@ flag = (pi >= pi_thresh) OR (pi >= pi_lower_bound AND toxic >= toxic_thresh)
|
|
| 72 |
|
| 73 |
| Test FPR | pi-mmbert-v2 Recall | pi-mmbert-v3.5 Recall | Δ |
|
| 74 |
|:---------|--------------------:|----------------------:|--:|
|
| 75 |
-
| 0.
|
| 76 |
| 0.644% | 60.46% | **69.83%** | +9.37pp |
|
| 77 |
| 1.008% | 67.13% | **75.26%** | +8.13pp |
|
| 78 |
|
|
@@ -118,7 +118,7 @@ chunk_overlap = 200
|
|
| 118 |
stride = max_length - chunk_overlap # 312
|
| 119 |
|
| 120 |
# --- Tiered thresholds (0.1% FPR — PI-only, no tier rescue) ---
|
| 121 |
-
# pi_thresh = 0.
|
| 122 |
# pi_lower_bound = 0.5
|
| 123 |
# toxic_thresh = 1.00 # effectively disabled
|
| 124 |
# --- Tiered thresholds (0.5% FPR) ---
|
|
@@ -126,7 +126,7 @@ pi_thresh = 0.990
|
|
| 126 |
pi_lower_bound = 0.5
|
| 127 |
toxic_thresh = 0.95
|
| 128 |
# --- Tiered thresholds (1% FPR) ---
|
| 129 |
-
# pi_thresh = 0.
|
| 130 |
# pi_lower_bound = 0.5
|
| 131 |
# toxic_thresh = 0.90
|
| 132 |
# --- Thresholds for POV (test: recall=97.34%, FPR=9.336%) ---
|
|
|
|
| 29 |
|
| 30 |
| Parameter | Value |
|
| 31 |
|:----------|------:|
|
| 32 |
+
| `pi_thresh` | 0.996 |
|
| 33 |
| `pi_lower_bound` | 0.50 |
|
| 34 |
| `toxic_thresh` | 1.00 |
|
| 35 |
|
|
|
|
| 37 |
|
| 38 |
| Dataset | Recall | FPR |
|
| 39 |
|:--------|-------:|----:|
|
| 40 |
+
| test (262K) | 43.59% | 0.127% |
|
| 41 |
+
| customer_test (1.4M) | 42.42% | 0.388% |
|
| 42 |
|
| 43 |
### Thresholds at 0.5% FPR
|
| 44 |
|
| 45 |
| Parameter | Value |
|
| 46 |
|:----------|------:|
|
| 47 |
+
| `pi_thresh` | 0.990 |
|
| 48 |
| `pi_lower_bound` | 0.50 |
|
| 49 |
| `toxic_thresh` | 0.95 |
|
| 50 |
|
|
|
|
| 57 |
|
| 58 |
| Parameter | Value |
|
| 59 |
|:----------|------:|
|
| 60 |
+
| `pi_thresh` | 0.982 |
|
| 61 |
| `pi_lower_bound` | 0.50 |
|
| 62 |
| `toxic_thresh` | 0.90 |
|
| 63 |
|
|
|
|
| 72 |
|
| 73 |
| Test FPR | pi-mmbert-v2 Recall | pi-mmbert-v3.5 Recall | Δ |
|
| 74 |
|:---------|--------------------:|----------------------:|--:|
|
| 75 |
+
| 0.127% | 35.31% | **43.59%** | +8.28pp |
|
| 76 |
| 0.644% | 60.46% | **69.83%** | +9.37pp |
|
| 77 |
| 1.008% | 67.13% | **75.26%** | +8.13pp |
|
| 78 |
|
|
|
|
| 118 |
stride = max_length - chunk_overlap # 312
|
| 119 |
|
| 120 |
# --- Tiered thresholds (0.1% FPR — PI-only, no tier rescue) ---
|
| 121 |
+
# pi_thresh = 0.996
|
| 122 |
# pi_lower_bound = 0.5
|
| 123 |
# toxic_thresh = 1.00 # effectively disabled
|
| 124 |
# --- Tiered thresholds (0.5% FPR) ---
|
|
|
|
| 126 |
pi_lower_bound = 0.5
|
| 127 |
toxic_thresh = 0.95
|
| 128 |
# --- Tiered thresholds (1% FPR) ---
|
| 129 |
+
# pi_thresh = 0.982
|
| 130 |
# pi_lower_bound = 0.5
|
| 131 |
# toxic_thresh = 0.90
|
| 132 |
# --- Thresholds for POV (test: recall=97.34%, FPR=9.336%) ---
|