{ "family": "router", "split": "test", "overall": { "rows": 6000, "accuracy": 0.8828333333333334, "macro_f1": 0.883185453547077, "weighted_f1": 0.883185453547077, "micro_f1": 0.8828333333333334, "labels": [ "constraint_dimension::causal", "constraint_dimension::goal", "constraint_dimension::other", "constraint_dimension::policy", "constraint_dimension::state", "constraint_dimension::value" ], "confusion_matrix": [ [ 966, 2, 20, 1, 7, 4 ], [ 1, 927, 47, 2, 6, 17 ], [ 12, 30, 817, 22, 56, 63 ], [ 0, 2, 26, 949, 1, 22 ], [ 27, 7, 63, 8, 865, 30 ], [ 7, 19, 137, 27, 37, 773 ] ], "calibration_error": 0.04340881209580012, "classification_report": { "constraint_dimension::causal": { "precision": 0.9536031589338598, "recall": 0.966, "f1-score": 0.9597615499254843, "support": 1000.0 }, "constraint_dimension::goal": { "precision": 0.939209726443769, "recall": 0.927, "f1-score": 0.9330649219929542, "support": 1000.0 }, "constraint_dimension::other": { "precision": 0.7360360360360361, "recall": 0.817, "f1-score": 0.7744075829383886, "support": 1000.0 }, "constraint_dimension::policy": { "precision": 0.9405351833498513, "recall": 0.949, "f1-score": 0.944748631159781, "support": 1000.0 }, "constraint_dimension::state": { "precision": 0.8899176954732511, "recall": 0.865, "f1-score": 0.8772819472616633, "support": 1000.0 }, "constraint_dimension::value": { "precision": 0.8503850385038504, "recall": 0.773, "f1-score": 0.8098480880041907, "support": 1000.0 }, "accuracy": 0.8828333333333334, "macro avg": { "precision": 0.8849478064567696, "recall": 0.8828333333333332, "f1-score": 0.883185453547077, "support": 6000.0 }, "weighted avg": { "precision": 0.8849478064567696, "recall": 0.8828333333333334, "f1-score": 0.883185453547077, "support": 6000.0 } } }, "per_task": { "constraint_dimension": { "rows": 6000, "accuracy": 0.8828333333333334, "macro_f1": 0.883185453547077, "weighted_f1": 0.883185453547077, "micro_f1": 0.8828333333333334, "labels": [ "causal", "goal", "other", "policy", "state", "value" ], "confusion_matrix": [ [ 966, 2, 20, 1, 7, 4 ], [ 1, 927, 47, 2, 6, 17 ], [ 12, 30, 817, 22, 56, 63 ], [ 0, 2, 26, 949, 1, 22 ], [ 27, 7, 63, 8, 865, 30 ], [ 7, 19, 137, 27, 37, 773 ] ], "wrong_task_predictions": 0, "wrong_task_rate": 0.0 } }, "calibration": { "method": "temperature_grid_search", "rows": 6000, "temperature": 2.8, "loss": null } }