| { |
| "dataset": "wls04/prm-alfworld-gemini-mix:test", |
| "n_examples_kept": 2144, |
| "n_examples_skipped": 0, |
| "n_steps": 35147, |
| "label_scheme": "agent", |
| "combine_mode": "concat", |
| "results": { |
| "combine": { |
| "acc": 91.98224599539078, |
| "macro_f1": 91.49491929565538, |
| "balanced_acc": 91.1784596987989, |
| "per_class_f1": { |
| "0": 94.55, |
| "1": 86.76, |
| "2": 93.17 |
| } |
| }, |
| "step": { |
| "acc": 87.1482630096452, |
| "macro_f1": 84.75697001493187, |
| "balanced_acc": 83.84656212095378, |
| "per_class_f1": { |
| "0": 93.57, |
| "1": 79.59, |
| "2": 81.11 |
| } |
| }, |
| "future": { |
| "acc": 92.25253933479387, |
| "macro_f1": 91.81233200670763, |
| "balanced_acc": 91.65293217311738, |
| "per_class_f1": { |
| "0": 94.69, |
| "1": 87.15, |
| "2": 93.6 |
| } |
| } |
| } |
| } |