GoverningDocs
/

ccr-binary-logreg

Text Classification

sklearn-logistic-regression

document-classification

binary-classification

legal-documents

property-management

declaration-of-covenants

logistic-regression

Eval Results (legacy)

Model card Files Files and versions

ccr-binary-logreg / config.json

hslee1981's picture

T18 Phase 1 Tier 1: config snapshot

31687e0 verified 9 days ago

history blame contribute delete

2.41 kB

	{
	"model_type": "sklearn-logistic-regression",
	"embedding_model": "openai-text-embedding-3-small",
	"embedding_dim": 1536,
	"max_pages_per_doc": 20,
	"skip_boilerplate": true,
	"operating_threshold": 0.436,
	"decision_band": {
	"reject_below": 0.25,
	"fast_pass_at_or_above": 0.55,
	"escalate_between": "(0.25, 0.55)",
	"note": "Recalibrated empirically on production score distribution. Plan-time (0.30, 0.85) left FAST_PASS empty."
	},
	"training_data": {
	"source": "setfit_experiments PostgreSQL DB + multi-signal Phase 0 relabeling",
	"n_pages": 7129,
	"n_documents": 465,
	"binary_class_balance": {
	"positive": 3014,
	"negative": 4115
	},
	"split": {
	"train": 298,
	"val": 64,
	"test": 65,
	"train_pos": 201,
	"val_pos": 39,
	"test_pos": 47
	}
	},
	"test_metrics": {
	"name": "logreg_tuned (TEST set)",
	"threshold": 0.4359872072086175,
	"accuracy": 0.9076923076923077,
	"f1": 0.94,
	"roc_auc": 0.9550827423167849,
	"brier_score": 0.13433461274707392,
	"ece": 0.27835753511850964,
	"confusion_matrix": [
	[
	12,
	6
	],
	[
	0,
	47
	]
	]
	},
	"validation_metrics": {
	"name": "LogReg @ best-threshold",
	"threshold": 0.4359872072086175,
	"accuracy": 0.859375,
	"f1": 0.8941176470588236,
	"roc_auc": 0.8748717948717949,
	"brier_score": 0.15576505514468417,
	"ece": 0.19068488965598734,
	"confusion_matrix": [
	[
	17,
	8
	],
	[
	1,
	38
	]
	]
	},
	"candidates_compared": [
	"logreg_05",
	"logreg_tuned",
	"logreg_platt_05",
	"logreg_platt_tuned",
	"mlp_05",
	"mlp_tuned"
	],
	"winner_selection_rule": "max F1 across LogReg, LogReg+Platt, MLP at best threshold; simplicity tiebreak to LogReg",
	"calibrator": {
	"filename": "ccr_binary_isotonic_calibrator.joblib",
	"method": "isotonic_prefit",
	"fit_split_seed": 42,
	"fit_split_size": 64,
	"test_ece_before": 0.27835753511850964,
	"test_ece_after": 0.08663491157117499,
	"shipped_model_filename": "ccr_binary_logreg_tuned.joblib",
	"note": "Optional artifact. Produces approximately 3 plateau outputs (0.737, 0.833, 1.000) due to small (~70-doc) calibration set. Treat as 3-level confidence rather than fine-grained probability. See ISOTONIC_CALIBRATION_FINDINGS.md."
	}
	}