CognitiveMemoryLayer-models / extractor_metrics_eval.json
avinashm's picture
Upload all trained models
e65379c verified
{
"family": "extractor",
"split": "eval",
"overall": {
"rows": 36000,
"accuracy": 0.9981666666666666,
"macro_f1": 0.9995285691468461,
"weighted_f1": 0.9981666577932904,
"micro_f1": 0.9981666666666666,
"labels": [
"constraint_scope::finance",
"constraint_scope::food",
"constraint_scope::general",
"constraint_scope::health",
"constraint_scope::none",
"constraint_scope::social",
"constraint_scope::tech",
"constraint_scope::travel",
"constraint_scope::work",
"constraint_stability::semi_stable",
"constraint_stability::stable",
"constraint_stability::volatile",
"constraint_type::causal",
"constraint_type::constraint_other",
"constraint_type::goal",
"constraint_type::none",
"constraint_type::policy",
"constraint_type::preference",
"constraint_type::state",
"constraint_type::value",
"fact_type::identity",
"fact_type::location",
"fact_type::none",
"fact_type::occupation",
"fact_type::other_fact",
"fact_type::preference",
"pii_presence::no_pii",
"pii_presence::pii"
],
"confusion_matrix": [
[
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1000,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
4978,
22
],
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
44,
4956
]
],
"calibration_error": 0.0007514102792564575,
"classification_report": {
"constraint_scope::finance": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_scope::food": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_scope::general": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_scope::health": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_scope::none": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_scope::social": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_scope::tech": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_scope::travel": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_scope::work": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_stability::semi_stable": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_stability::stable": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_stability::volatile": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_type::causal": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_type::constraint_other": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_type::goal": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_type::none": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_type::policy": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_type::preference": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_type::state": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"constraint_type::value": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"fact_type::identity": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"fact_type::location": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"fact_type::none": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"fact_type::occupation": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"fact_type::other_fact": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"fact_type::preference": {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1000.0
},
"pii_presence::no_pii": {
"precision": 0.9912385503783353,
"recall": 0.9956,
"f1-score": 0.9934144881261225,
"support": 5000.0
},
"pii_presence::pii": {
"precision": 0.995580554439534,
"recall": 0.9912,
"f1-score": 0.9933854479855683,
"support": 5000.0
},
"accuracy": 0.9981666666666666,
"macro avg": {
"precision": 0.9995292537434953,
"recall": 0.9995285714285714,
"f1-score": 0.9995285691468461,
"support": 36000.0
},
"weighted avg": {
"precision": 0.9981693201135929,
"recall": 0.9981666666666666,
"f1-score": 0.9981666577932904,
"support": 36000.0
}
}
},
"per_task": {
"constraint_scope": {
"rows": 9000,
"accuracy": 1.0,
"macro_f1": 1.0,
"weighted_f1": 1.0,
"micro_f1": 1.0,
"labels": [
"finance",
"food",
"general",
"health",
"none",
"social",
"tech",
"travel",
"work"
],
"confusion_matrix": [
[
1000,
0,
0,
0,
0,
0,
0,
0,
0
],
[
0,
1000,
0,
0,
0,
0,
0,
0,
0
],
[
0,
0,
1000,
0,
0,
0,
0,
0,
0
],
[
0,
0,
0,
1000,
0,
0,
0,
0,
0
],
[
0,
0,
0,
0,
1000,
0,
0,
0,
0
],
[
0,
0,
0,
0,
0,
1000,
0,
0,
0
],
[
0,
0,
0,
0,
0,
0,
1000,
0,
0
],
[
0,
0,
0,
0,
0,
0,
0,
1000,
0
],
[
0,
0,
0,
0,
0,
0,
0,
0,
1000
]
],
"wrong_task_predictions": 0,
"wrong_task_rate": 0.0
},
"constraint_stability": {
"rows": 3000,
"accuracy": 1.0,
"macro_f1": 1.0,
"weighted_f1": 1.0,
"micro_f1": 1.0,
"labels": [
"semi_stable",
"stable",
"volatile"
],
"confusion_matrix": [
[
1000,
0,
0
],
[
0,
1000,
0
],
[
0,
0,
1000
]
],
"wrong_task_predictions": 0,
"wrong_task_rate": 0.0
},
"constraint_type": {
"rows": 8000,
"accuracy": 1.0,
"macro_f1": 1.0,
"weighted_f1": 1.0,
"micro_f1": 1.0,
"labels": [
"causal",
"constraint_other",
"goal",
"none",
"policy",
"preference",
"state",
"value"
],
"confusion_matrix": [
[
1000,
0,
0,
0,
0,
0,
0,
0
],
[
0,
1000,
0,
0,
0,
0,
0,
0
],
[
0,
0,
1000,
0,
0,
0,
0,
0
],
[
0,
0,
0,
1000,
0,
0,
0,
0
],
[
0,
0,
0,
0,
1000,
0,
0,
0
],
[
0,
0,
0,
0,
0,
1000,
0,
0
],
[
0,
0,
0,
0,
0,
0,
1000,
0
],
[
0,
0,
0,
0,
0,
0,
0,
1000
]
],
"wrong_task_predictions": 0,
"wrong_task_rate": 0.0
},
"fact_type": {
"rows": 6000,
"accuracy": 1.0,
"macro_f1": 1.0,
"weighted_f1": 1.0,
"micro_f1": 1.0,
"labels": [
"identity",
"location",
"none",
"occupation",
"other_fact",
"preference"
],
"confusion_matrix": [
[
1000,
0,
0,
0,
0,
0
],
[
0,
1000,
0,
0,
0,
0
],
[
0,
0,
1000,
0,
0,
0
],
[
0,
0,
0,
1000,
0,
0
],
[
0,
0,
0,
0,
1000,
0
],
[
0,
0,
0,
0,
0,
1000
]
],
"wrong_task_predictions": 0,
"wrong_task_rate": 0.0
},
"pii_presence": {
"rows": 10000,
"accuracy": 0.9934,
"macro_f1": 0.9933999680558454,
"weighted_f1": 0.9933999680558454,
"micro_f1": 0.9934,
"labels": [
"no_pii",
"pii"
],
"confusion_matrix": [
[
4978,
22
],
[
44,
4956
]
],
"wrong_task_predictions": 0,
"wrong_task_rate": 0.0
}
},
"calibration": {
"method": "task_conditional_sigmoid",
"split": "eval",
"rows": 36000,
"pre_ece": 0.06013730731601039,
"post_ece": 0.0007514102792564575,
"pre_accuracy": 0.9978333333333333,
"post_accuracy": 0.9981666666666666,
"accuracy_delta": 0.0003333333333332966,
"tasks": {
"constraint_scope": {
"rows": 9000,
"pre_ece": 0.0073926387441783925,
"post_ece": 0.00019267901287378653,
"pre_accuracy": 1.0,
"post_accuracy": 1.0,
"accuracy_delta": 0.0
},
"constraint_stability": {
"rows": 3000,
"pre_ece": 0.0035802200536977353,
"post_ece": 0.00016981111181246789,
"pre_accuracy": 1.0,
"post_accuracy": 1.0,
"accuracy_delta": 0.0
},
"constraint_type": {
"rows": 8000,
"pre_ece": 0.0152260080692046,
"post_ece": 0.0002949549691239062,
"pre_accuracy": 1.0,
"post_accuracy": 1.0,
"accuracy_delta": 0.0
},
"fact_type": {
"rows": 6000,
"pre_ece": 0.008209015508071071,
"post_ece": 0.00021106970310058593,
"pre_accuracy": 1.0,
"post_accuracy": 1.0,
"accuracy_delta": 0.0
},
"pii_presence": {
"rows": 10000,
"pre_ece": 0.1278197693399604,
"post_ece": 0.002118116763033487,
"pre_accuracy": 0.9922,
"post_accuracy": 0.9934,
"accuracy_delta": 0.0011999999999999789
}
}
}
}