File size: 2,412 Bytes
3ed6290
 
 
 
 
 
 
 
31687e0
 
 
 
3ed6290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31687e0
 
 
 
 
 
 
 
 
 
 
3ed6290
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
{
  "model_type": "sklearn-logistic-regression",
  "embedding_model": "openai-text-embedding-3-small",
  "embedding_dim": 1536,
  "max_pages_per_doc": 20,
  "skip_boilerplate": true,
  "operating_threshold": 0.436,
  "decision_band": {
    "reject_below": 0.25,
    "fast_pass_at_or_above": 0.55,
    "escalate_between": "(0.25, 0.55)",
    "note": "Recalibrated empirically on production score distribution. Plan-time (0.30, 0.85) left FAST_PASS empty."
  },
  "training_data": {
    "source": "setfit_experiments PostgreSQL DB + multi-signal Phase 0 relabeling",
    "n_pages": 7129,
    "n_documents": 465,
    "binary_class_balance": {
      "positive": 3014,
      "negative": 4115
    },
    "split": {
      "train": 298,
      "val": 64,
      "test": 65,
      "train_pos": 201,
      "val_pos": 39,
      "test_pos": 47
    }
  },
  "test_metrics": {
    "name": "logreg_tuned (TEST set)",
    "threshold": 0.4359872072086175,
    "accuracy": 0.9076923076923077,
    "f1": 0.94,
    "roc_auc": 0.9550827423167849,
    "brier_score": 0.13433461274707392,
    "ece": 0.27835753511850964,
    "confusion_matrix": [
      [
        12,
        6
      ],
      [
        0,
        47
      ]
    ]
  },
  "validation_metrics": {
    "name": "LogReg @ best-threshold",
    "threshold": 0.4359872072086175,
    "accuracy": 0.859375,
    "f1": 0.8941176470588236,
    "roc_auc": 0.8748717948717949,
    "brier_score": 0.15576505514468417,
    "ece": 0.19068488965598734,
    "confusion_matrix": [
      [
        17,
        8
      ],
      [
        1,
        38
      ]
    ]
  },
  "candidates_compared": [
    "logreg_05",
    "logreg_tuned",
    "logreg_platt_05",
    "logreg_platt_tuned",
    "mlp_05",
    "mlp_tuned"
  ],
  "winner_selection_rule": "max F1 across LogReg, LogReg+Platt, MLP at best threshold; simplicity tiebreak to LogReg",
  "calibrator": {
    "filename": "ccr_binary_isotonic_calibrator.joblib",
    "method": "isotonic_prefit",
    "fit_split_seed": 42,
    "fit_split_size": 64,
    "test_ece_before": 0.27835753511850964,
    "test_ece_after": 0.08663491157117499,
    "shipped_model_filename": "ccr_binary_logreg_tuned.joblib",
    "note": "Optional artifact. Produces approximately 3 plateau outputs (0.737, 0.833, 1.000) due to small (~70-doc) calibration set. Treat as 3-level confidence rather than fine-grained probability. See ISOTONIC_CALIBRATION_FINDINGS.md."
  }
}