{ "family": "router", "split": "eval", "overall": { "rows": 6000, "accuracy": 0.8698333333333333, "macro_f1": 0.8703105353987516, "weighted_f1": 0.8703105353987517, "micro_f1": 0.8698333333333333, "labels": [ "constraint_dimension::causal", "constraint_dimension::goal", "constraint_dimension::other", "constraint_dimension::policy", "constraint_dimension::state", "constraint_dimension::value" ], "confusion_matrix": [ [ 965, 2, 16, 0, 12, 5 ], [ 1, 907, 57, 1, 9, 25 ], [ 19, 31, 791, 18, 56, 85 ], [ 1, 0, 22, 954, 2, 21 ], [ 27, 13, 78, 6, 842, 34 ], [ 13, 23, 145, 23, 36, 760 ] ], "calibration_error": 0.04364704496164792, "classification_report": { "constraint_dimension::causal": { "precision": 0.9405458089668616, "recall": 0.965, "f1-score": 0.9526159921026653, "support": 1000.0 }, "constraint_dimension::goal": { "precision": 0.9293032786885246, "recall": 0.907, "f1-score": 0.9180161943319838, "support": 1000.0 }, "constraint_dimension::other": { "precision": 0.7132551848512173, "recall": 0.791, "f1-score": 0.7501185395922239, "support": 1000.0 }, "constraint_dimension::policy": { "precision": 0.9520958083832335, "recall": 0.954, "f1-score": 0.9530469530469531, "support": 1000.0 }, "constraint_dimension::state": { "precision": 0.8798328108672936, "recall": 0.842, "f1-score": 0.8605007664793051, "support": 1000.0 }, "constraint_dimension::value": { "precision": 0.8172043010752689, "recall": 0.76, "f1-score": 0.7875647668393783, "support": 1000.0 }, "accuracy": 0.8698333333333333, "macro avg": { "precision": 0.8720395321387332, "recall": 0.8698333333333332, "f1-score": 0.8703105353987516, "support": 6000.0 }, "weighted avg": { "precision": 0.8720395321387332, "recall": 0.8698333333333333, "f1-score": 0.8703105353987517, "support": 6000.0 } } }, "per_task": { "constraint_dimension": { "rows": 6000, "accuracy": 0.8698333333333333, "macro_f1": 0.8703105353987516, "weighted_f1": 0.8703105353987517, "micro_f1": 0.8698333333333333, "labels": [ "causal", "goal", "other", "policy", "state", "value" ], "confusion_matrix": [ [ 965, 2, 16, 0, 12, 5 ], [ 1, 907, 57, 1, 9, 25 ], [ 19, 31, 791, 18, 56, 85 ], [ 1, 0, 22, 954, 2, 21 ], [ 27, 13, 78, 6, 842, 34 ], [ 13, 23, 145, 23, 36, 760 ] ], "wrong_task_predictions": 0, "wrong_task_rate": 0.0 } }, "calibration": { "method": "temperature_grid_search", "rows": 6000, "temperature": 2.8, "loss": null } }