| { |
| "family": "router", |
| "split": "eval", |
| "overall": { |
| "rows": 6000, |
| "accuracy": 0.8698333333333333, |
| "macro_f1": 0.8703105353987516, |
| "weighted_f1": 0.8703105353987517, |
| "micro_f1": 0.8698333333333333, |
| "labels": [ |
| "constraint_dimension::causal", |
| "constraint_dimension::goal", |
| "constraint_dimension::other", |
| "constraint_dimension::policy", |
| "constraint_dimension::state", |
| "constraint_dimension::value" |
| ], |
| "confusion_matrix": [ |
| [ |
| 965, |
| 2, |
| 16, |
| 0, |
| 12, |
| 5 |
| ], |
| [ |
| 1, |
| 907, |
| 57, |
| 1, |
| 9, |
| 25 |
| ], |
| [ |
| 19, |
| 31, |
| 791, |
| 18, |
| 56, |
| 85 |
| ], |
| [ |
| 1, |
| 0, |
| 22, |
| 954, |
| 2, |
| 21 |
| ], |
| [ |
| 27, |
| 13, |
| 78, |
| 6, |
| 842, |
| 34 |
| ], |
| [ |
| 13, |
| 23, |
| 145, |
| 23, |
| 36, |
| 760 |
| ] |
| ], |
| "calibration_error": 0.04364704496164792, |
| "classification_report": { |
| "constraint_dimension::causal": { |
| "precision": 0.9405458089668616, |
| "recall": 0.965, |
| "f1-score": 0.9526159921026653, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::goal": { |
| "precision": 0.9293032786885246, |
| "recall": 0.907, |
| "f1-score": 0.9180161943319838, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::other": { |
| "precision": 0.7132551848512173, |
| "recall": 0.791, |
| "f1-score": 0.7501185395922239, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::policy": { |
| "precision": 0.9520958083832335, |
| "recall": 0.954, |
| "f1-score": 0.9530469530469531, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::state": { |
| "precision": 0.8798328108672936, |
| "recall": 0.842, |
| "f1-score": 0.8605007664793051, |
| "support": 1000.0 |
| }, |
| "constraint_dimension::value": { |
| "precision": 0.8172043010752689, |
| "recall": 0.76, |
| "f1-score": 0.7875647668393783, |
| "support": 1000.0 |
| }, |
| "accuracy": 0.8698333333333333, |
| "macro avg": { |
| "precision": 0.8720395321387332, |
| "recall": 0.8698333333333332, |
| "f1-score": 0.8703105353987516, |
| "support": 6000.0 |
| }, |
| "weighted avg": { |
| "precision": 0.8720395321387332, |
| "recall": 0.8698333333333333, |
| "f1-score": 0.8703105353987517, |
| "support": 6000.0 |
| } |
| } |
| }, |
| "per_task": { |
| "constraint_dimension": { |
| "rows": 6000, |
| "accuracy": 0.8698333333333333, |
| "macro_f1": 0.8703105353987516, |
| "weighted_f1": 0.8703105353987517, |
| "micro_f1": 0.8698333333333333, |
| "labels": [ |
| "causal", |
| "goal", |
| "other", |
| "policy", |
| "state", |
| "value" |
| ], |
| "confusion_matrix": [ |
| [ |
| 965, |
| 2, |
| 16, |
| 0, |
| 12, |
| 5 |
| ], |
| [ |
| 1, |
| 907, |
| 57, |
| 1, |
| 9, |
| 25 |
| ], |
| [ |
| 19, |
| 31, |
| 791, |
| 18, |
| 56, |
| 85 |
| ], |
| [ |
| 1, |
| 0, |
| 22, |
| 954, |
| 2, |
| 21 |
| ], |
| [ |
| 27, |
| 13, |
| 78, |
| 6, |
| 842, |
| 34 |
| ], |
| [ |
| 13, |
| 23, |
| 145, |
| 23, |
| 36, |
| 760 |
| ] |
| ], |
| "wrong_task_predictions": 0, |
| "wrong_task_rate": 0.0 |
| } |
| }, |
| "calibration": { |
| "method": "temperature_grid_search", |
| "rows": 6000, |
| "temperature": 2.8, |
| "loss": null |
| } |
| } |