CognitiveMemoryLayer-models / extractor_metrics_eval.json

Upload all trained models

e65379c verified 13 days ago

21.6 kB

	{
	"family": "extractor",
	"split": "eval",
	"overall": {
	"rows": 36000,
	"accuracy": 0.9981666666666666,
	"macro_f1": 0.9995285691468461,
	"weighted_f1": 0.9981666577932904,
	"micro_f1": 0.9981666666666666,
	"labels": [
	"constraint_scope::finance",
	"constraint_scope::food",
	"constraint_scope::general",
	"constraint_scope::health",
	"constraint_scope::none",
	"constraint_scope::social",
	"constraint_scope::tech",
	"constraint_scope::travel",
	"constraint_scope::work",
	"constraint_stability::semi_stable",
	"constraint_stability::stable",
	"constraint_stability::volatile",
	"constraint_type::causal",
	"constraint_type::constraint_other",
	"constraint_type::goal",
	"constraint_type::none",
	"constraint_type::policy",
	"constraint_type::preference",
	"constraint_type::state",
	"constraint_type::value",
	"fact_type::identity",
	"fact_type::location",
	"fact_type::none",
	"fact_type::occupation",
	"fact_type::other_fact",
	"fact_type::preference",
	"pii_presence::no_pii",
	"pii_presence::pii"
	],
	"confusion_matrix": [
	[
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	4978,
	22
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	44,
	4956
	]
	],
	"calibration_error": 0.0007514102792564575,
	"classification_report": {
	"constraint_scope::finance": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_scope::food": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_scope::general": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_scope::health": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_scope::none": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_scope::social": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_scope::tech": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_scope::travel": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_scope::work": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_stability::semi_stable": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_stability::stable": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_stability::volatile": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_type::causal": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_type::constraint_other": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_type::goal": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_type::none": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_type::policy": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_type::preference": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_type::state": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"constraint_type::value": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"fact_type::identity": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"fact_type::location": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"fact_type::none": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"fact_type::occupation": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"fact_type::other_fact": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"fact_type::preference": {
	"precision": 1.0,
	"recall": 1.0,
	"f1-score": 1.0,
	"support": 1000.0
	},
	"pii_presence::no_pii": {
	"precision": 0.9912385503783353,
	"recall": 0.9956,
	"f1-score": 0.9934144881261225,
	"support": 5000.0
	},
	"pii_presence::pii": {
	"precision": 0.995580554439534,
	"recall": 0.9912,
	"f1-score": 0.9933854479855683,
	"support": 5000.0
	},
	"accuracy": 0.9981666666666666,
	"macro avg": {
	"precision": 0.9995292537434953,
	"recall": 0.9995285714285714,
	"f1-score": 0.9995285691468461,
	"support": 36000.0
	},
	"weighted avg": {
	"precision": 0.9981693201135929,
	"recall": 0.9981666666666666,
	"f1-score": 0.9981666577932904,
	"support": 36000.0
	}
	}
	},
	"per_task": {
	"constraint_scope": {
	"rows": 9000,
	"accuracy": 1.0,
	"macro_f1": 1.0,
	"weighted_f1": 1.0,
	"micro_f1": 1.0,
	"labels": [
	"finance",
	"food",
	"general",
	"health",
	"none",
	"social",
	"tech",
	"travel",
	"work"
	],
	"confusion_matrix": [
	[
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000
	]
	],
	"wrong_task_predictions": 0,
	"wrong_task_rate": 0.0
	},
	"constraint_stability": {
	"rows": 3000,
	"accuracy": 1.0,
	"macro_f1": 1.0,
	"weighted_f1": 1.0,
	"micro_f1": 1.0,
	"labels": [
	"semi_stable",
	"stable",
	"volatile"
	],
	"confusion_matrix": [
	[
	1000,
	0,
	0
	],
	[
	0,
	1000,
	0
	],
	[
	0,
	0,
	1000
	]
	],
	"wrong_task_predictions": 0,
	"wrong_task_rate": 0.0
	},
	"constraint_type": {
	"rows": 8000,
	"accuracy": 1.0,
	"macro_f1": 1.0,
	"weighted_f1": 1.0,
	"micro_f1": 1.0,
	"labels": [
	"causal",
	"constraint_other",
	"goal",
	"none",
	"policy",
	"preference",
	"state",
	"value"
	],
	"confusion_matrix": [
	[
	1000,
	0,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	1000,
	0,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	1000,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	1000,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	1000,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	1000,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	1000,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	1000
	]
	],
	"wrong_task_predictions": 0,
	"wrong_task_rate": 0.0
	},
	"fact_type": {
	"rows": 6000,
	"accuracy": 1.0,
	"macro_f1": 1.0,
	"weighted_f1": 1.0,
	"micro_f1": 1.0,
	"labels": [
	"identity",
	"location",
	"none",
	"occupation",
	"other_fact",
	"preference"
	],
	"confusion_matrix": [
	[
	1000,
	0,
	0,
	0,
	0,
	0
	],
	[
	0,
	1000,
	0,
	0,
	0,
	0
	],
	[
	0,
	0,
	1000,
	0,
	0,
	0
	],
	[
	0,
	0,
	0,
	1000,
	0,
	0
	],
	[
	0,
	0,
	0,
	0,
	1000,
	0
	],
	[
	0,
	0,
	0,
	0,
	0,
	1000
	]
	],
	"wrong_task_predictions": 0,
	"wrong_task_rate": 0.0
	},
	"pii_presence": {
	"rows": 10000,
	"accuracy": 0.9934,
	"macro_f1": 0.9933999680558454,
	"weighted_f1": 0.9933999680558454,
	"micro_f1": 0.9934,
	"labels": [
	"no_pii",
	"pii"
	],
	"confusion_matrix": [
	[
	4978,
	22
	],
	[
	44,
	4956
	]
	],
	"wrong_task_predictions": 0,
	"wrong_task_rate": 0.0
	}
	},
	"calibration": {
	"method": "task_conditional_sigmoid",
	"split": "eval",
	"rows": 36000,
	"pre_ece": 0.06013730731601039,
	"post_ece": 0.0007514102792564575,
	"pre_accuracy": 0.9978333333333333,
	"post_accuracy": 0.9981666666666666,
	"accuracy_delta": 0.0003333333333332966,
	"tasks": {
	"constraint_scope": {
	"rows": 9000,
	"pre_ece": 0.0073926387441783925,
	"post_ece": 0.00019267901287378653,
	"pre_accuracy": 1.0,
	"post_accuracy": 1.0,
	"accuracy_delta": 0.0
	},
	"constraint_stability": {
	"rows": 3000,
	"pre_ece": 0.0035802200536977353,
	"post_ece": 0.00016981111181246789,
	"pre_accuracy": 1.0,
	"post_accuracy": 1.0,
	"accuracy_delta": 0.0
	},
	"constraint_type": {
	"rows": 8000,
	"pre_ece": 0.0152260080692046,
	"post_ece": 0.0002949549691239062,
	"pre_accuracy": 1.0,
	"post_accuracy": 1.0,
	"accuracy_delta": 0.0
	},
	"fact_type": {
	"rows": 6000,
	"pre_ece": 0.008209015508071071,
	"post_ece": 0.00021106970310058593,
	"pre_accuracy": 1.0,
	"post_accuracy": 1.0,
	"accuracy_delta": 0.0
	},
	"pii_presence": {
	"rows": 10000,
	"pre_ece": 0.1278197693399604,
	"post_ece": 0.002118116763033487,
	"pre_accuracy": 0.9922,
	"post_accuracy": 0.9934,
	"accuracy_delta": 0.0011999999999999789
	}
	}
	}
	}