| { | |
| "iteration": 226, | |
| "training_reward": 0.9533333333333334, | |
| "timestamp": "2026-01-02T01:35:36.597389", | |
| "experiment": "EXP-TRAIN-CDG", | |
| "cdg_metrics": { | |
| "total_episodes": 832, | |
| "wrong_to_right": 188, | |
| "right_to_wrong": 82, | |
| "right_to_right": 295, | |
| "wrong_to_wrong": 267, | |
| "filtered_episodes": 388, | |
| "avg_episode_quality": 0.9533333333333334 | |
| }, | |
| "note": "Best model by training reward (EuroEval evaluation pending)" | |
| } |