| { |
| "family": "router", |
| "split": "test", |
| "overall": { |
| "rows": 6000, |
| "accuracy": 0.8828333333333334, |
| "macro_f1": 0.883185453547077, |
| "weighted_f1": 0.883185453547077, |
| "micro_f1": 0.8828333333333334, |
| "labels": [ |
| "constraint_dimension::causal", |
| "constraint_dimension::goal", |
| "constraint_dimension::other", |
| "constraint_dimension::policy", |
| "constraint_dimension::state", |
| "constraint_dimension::value" |
| ], |
| "confusion_matrix": [ |
| [ |
| 966, |
| 2, |
| 20, |
| 1, |
| 7, |
| 4 |
| ], |
| [ |
| 1, |
| 927, |
| 47, |
| 2, |
| 6, |
| 17 |
| ], |
| [ |
| 12, |
| 30, |
| 817, |
| 22, |
| 56, |
| 63 |
| ], |
| [ |
| 0, |
| 2, |
| 26, |
| 949, |
| 1, |
| 22 |
| ], |
| [ |
| 27, |
| 7, |
| 63, |
| 8, |
| 865, |
| 30 |
| ], |
| [ |
| 7, |
| 19, |
| 137, |
| 27, |
| 37, |
| 773 |
| ] |
| ], |
| "calibration_error": 0.04340881209580012, |
| "classification_report": { |
| "constraint_dimension::causal": { |
| "precision": 0.9536031589338598, |
| "recall": 0.966, |
| "f1-score": 0.9597615499254843, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::goal": { |
| "precision": 0.939209726443769, |
| "recall": 0.927, |
| "f1-score": 0.9330649219929542, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::other": { |
| "precision": 0.7360360360360361, |
| "recall": 0.817, |
| "f1-score": 0.7744075829383886, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::policy": { |
| "precision": 0.9405351833498513, |
| "recall": 0.949, |
| "f1-score": 0.944748631159781, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::state": { |
| "precision": 0.8899176954732511, |
| "recall": 0.865, |
| "f1-score": 0.8772819472616633, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::value": { |
| "precision": 0.8503850385038504, |
| "recall": 0.773, |
| "f1-score": 0.8098480880041907, |
| "support": 1000.0 |
| }, |
| "accuracy": 0.8828333333333334, |
| "macro avg": { |
| "precision": 0.8849478064567696, |
| "recall": 0.8828333333333332, |
| "f1-score": 0.883185453547077, |
| "support": 6000.0 |
| }, |
| "weighted avg": { |
| "precision": 0.8849478064567696, |
| "recall": 0.8828333333333334, |
| "f1-score": 0.883185453547077, |
| "support": 6000.0 |
| } |
| } |
| }, |
| "per_task": { |
| "constraint_dimension": { |
| "rows": 6000, |
| "accuracy": 0.8828333333333334, |
| "macro_f1": 0.883185453547077, |
| "weighted_f1": 0.883185453547077, |
| "micro_f1": 0.8828333333333334, |
| "labels": [ |
| "causal", |
| "goal", |
| "other", |
| "policy", |
| "state", |
| "value" |
| ], |
| "confusion_matrix": [ |
| [ |
| 966, |
| 2, |
| 20, |
| 1, |
| 7, |
| 4 |
| ], |
| [ |
| 1, |
| 927, |
| 47, |
| 2, |
| 6, |
| 17 |
| ], |
| [ |
| 12, |
| 30, |
| 817, |
| 22, |
| 56, |
| 63 |
| ], |
| [ |
| 0, |
| 2, |
| 26, |
| 949, |
| 1, |
| 22 |
| ], |
| [ |
| 27, |
| 7, |
| 63, |
| 8, |
| 865, |
| 30 |
| ], |
| [ |
| 7, |
| 19, |
| 137, |
| 27, |
| 37, |
| 773 |
| ] |
| ], |
| "wrong_task_predictions": 0, |
| "wrong_task_rate": 0.0 |
| } |
| }, |
| "calibration": { |
| "method": "temperature_grid_search", |
| "rows": 6000, |
| "temperature": 2.8, |
| "loss": null |
| } |
| } |