| { |
| "dataset": "wls04/prm-scienceworld-gemini-mix:test", |
| "n_examples_kept": 3200, |
| "n_examples_skipped": 0, |
| "n_steps": 50711, |
| "label_scheme": "agent", |
| "combine_mode": "concat", |
| "results": { |
| "combine": { |
| "acc": 86.98310031354144, |
| "macro_f1": 84.01496913424012, |
| "balanced_acc": 84.01278475222438, |
| "per_class_f1": { |
| "0": 94.0, |
| "1": 74.62, |
| "2": 83.42 |
| } |
| }, |
| "step": { |
| "acc": 84.9835341444657, |
| "macro_f1": 81.60372519964524, |
| "balanced_acc": 81.47710783650096, |
| "per_class_f1": { |
| "0": 92.74, |
| "1": 71.36, |
| "2": 80.71 |
| } |
| }, |
| "future": { |
| "acc": 87.21776340438959, |
| "macro_f1": 84.30195984740317, |
| "balanced_acc": 84.34468096714211, |
| "per_class_f1": { |
| "0": 94.15, |
| "1": 75.03, |
| "2": 83.72 |
| } |
| } |
| } |
| } |