{ "family": "extractor", "split": "test", "overall": { "rows": 36000, "accuracy": 0.9974166666666666, "macro_f1": 0.9993357139602141, "weighted_f1": 0.9974166654008327, "micro_f1": 0.9974166666666666, "labels": [ "constraint_scope::finance", "constraint_scope::food", "constraint_scope::general", "constraint_scope::health", "constraint_scope::none", "constraint_scope::social", "constraint_scope::tech", "constraint_scope::travel", "constraint_scope::work", "constraint_stability::semi_stable", "constraint_stability::stable", "constraint_stability::volatile", "constraint_type::causal", "constraint_type::constraint_other", "constraint_type::goal", "constraint_type::none", "constraint_type::policy", "constraint_type::preference", "constraint_type::state", "constraint_type::value", "fact_type::identity", "fact_type::location", "fact_type::none", "fact_type::occupation", "fact_type::other_fact", "fact_type::preference", "pii_presence::no_pii", "pii_presence::pii" ], "confusion_matrix": [ [ 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4957, 43 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 4950 ] ], "calibration_error": 0.00037925340005441757, "classification_report": { "constraint_scope::finance": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_scope::food": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_scope::general": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_scope::health": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_scope::none": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_scope::social": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_scope::tech": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_scope::travel": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_scope::work": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_stability::semi_stable": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_stability::stable": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_stability::volatile": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_type::causal": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_type::constraint_other": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_type::goal": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_type::none": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_type::policy": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_type::preference": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_type::state": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "constraint_type::value": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "fact_type::identity": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "fact_type::location": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "fact_type::none": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "fact_type::occupation": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "fact_type::other_fact": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "fact_type::preference": { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1000.0 }, "pii_presence::no_pii": { "precision": 0.9900139804274016, "recall": 0.9914, "f1-score": 0.9907065054461877, "support": 5000.0 }, "pii_presence::pii": { "precision": 0.9913879431203685, "recall": 0.99, "f1-score": 0.9906934854398078, "support": 5000.0 }, "accuracy": 0.9974166666666666, "macro avg": { "precision": 0.999335782983849, "recall": 0.9993357142857142, "f1-score": 0.9993357139602141, "support": 36000.0 }, "weighted avg": { "precision": 0.9974169338260791, "recall": 0.9974166666666666, "f1-score": 0.9974166654008327, "support": 36000.0 } } }, "per_task": { "constraint_scope": { "rows": 9000, "accuracy": 1.0, "macro_f1": 1.0, "weighted_f1": 1.0, "micro_f1": 1.0, "labels": [ "finance", "food", "general", "health", "none", "social", "tech", "travel", "work" ], "confusion_matrix": [ [ 1000, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 1000, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 1000, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 1000, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 1000, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 1000, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 1000, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 1000, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 0, 1000 ] ], "wrong_task_predictions": 0, "wrong_task_rate": 0.0 }, "constraint_stability": { "rows": 3000, "accuracy": 1.0, "macro_f1": 1.0, "weighted_f1": 1.0, "micro_f1": 1.0, "labels": [ "semi_stable", "stable", "volatile" ], "confusion_matrix": [ [ 1000, 0, 0 ], [ 0, 1000, 0 ], [ 0, 0, 1000 ] ], "wrong_task_predictions": 0, "wrong_task_rate": 0.0 }, "constraint_type": { "rows": 8000, "accuracy": 1.0, "macro_f1": 1.0, "weighted_f1": 1.0, "micro_f1": 1.0, "labels": [ "causal", "constraint_other", "goal", "none", "policy", "preference", "state", "value" ], "confusion_matrix": [ [ 1000, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 1000, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 1000, 0, 0, 0, 0, 0 ], [ 0, 0, 0, 1000, 0, 0, 0, 0 ], [ 0, 0, 0, 0, 1000, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 1000, 0, 0 ], [ 0, 0, 0, 0, 0, 0, 1000, 0 ], [ 0, 0, 0, 0, 0, 0, 0, 1000 ] ], "wrong_task_predictions": 0, "wrong_task_rate": 0.0 }, "fact_type": { "rows": 6000, "accuracy": 1.0, "macro_f1": 1.0, "weighted_f1": 1.0, "micro_f1": 1.0, "labels": [ "identity", "location", "none", "occupation", "other_fact", "preference" ], "confusion_matrix": [ [ 1000, 0, 0, 0, 0, 0 ], [ 0, 1000, 0, 0, 0, 0 ], [ 0, 0, 1000, 0, 0, 0 ], [ 0, 0, 0, 1000, 0, 0 ], [ 0, 0, 0, 0, 1000, 0 ], [ 0, 0, 0, 0, 0, 1000 ] ], "wrong_task_predictions": 0, "wrong_task_rate": 0.0 }, "pii_presence": { "rows": 10000, "accuracy": 0.9907, "macro_f1": 0.9906999954429978, "weighted_f1": 0.9906999954429977, "micro_f1": 0.9907, "labels": [ "no_pii", "pii" ], "confusion_matrix": [ [ 4957, 43 ], [ 50, 4950 ] ], "wrong_task_predictions": 0, "wrong_task_rate": 0.0 } }, "calibration": { "method": "task_conditional_sigmoid", "split": "eval", "rows": 36000, "pre_ece": 0.06013730731601039, "post_ece": 0.0007514102792564575, "pre_accuracy": 0.9978333333333333, "post_accuracy": 0.9981666666666666, "accuracy_delta": 0.0003333333333332966, "tasks": { "constraint_scope": { "rows": 9000, "pre_ece": 0.0073926387441783925, "post_ece": 0.00019267901287378653, "pre_accuracy": 1.0, "post_accuracy": 1.0, "accuracy_delta": 0.0 }, "constraint_stability": { "rows": 3000, "pre_ece": 0.0035802200536977353, "post_ece": 0.00016981111181246789, "pre_accuracy": 1.0, "post_accuracy": 1.0, "accuracy_delta": 0.0 }, "constraint_type": { "rows": 8000, "pre_ece": 0.0152260080692046, "post_ece": 0.0002949549691239062, "pre_accuracy": 1.0, "post_accuracy": 1.0, "accuracy_delta": 0.0 }, "fact_type": { "rows": 6000, "pre_ece": 0.008209015508071071, "post_ece": 0.00021106970310058593, "pre_accuracy": 1.0, "post_accuracy": 1.0, "accuracy_delta": 0.0 }, "pii_presence": { "rows": 10000, "pre_ece": 0.1278197693399604, "post_ece": 0.002118116763033487, "pre_accuracy": 0.9922, "post_accuracy": 0.9934, "accuracy_delta": 0.0011999999999999789 } } } }