File size: 870 Bytes
8890dbe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | {
"dataset": "wls04/prm-alfworld-gemini-mix:test",
"n_examples_kept": 2144,
"n_examples_skipped": 0,
"n_steps": 35147,
"label_scheme": "agent",
"combine_mode": "concat",
"results": {
"combine": {
"acc": 91.9708652232054,
"macro_f1": 91.40890341554893,
"balanced_acc": 90.74304346180321,
"per_class_f1": {
"0": 94.64,
"1": 86.75,
"2": 92.83
}
},
"step": {
"acc": 87.35311690898227,
"macro_f1": 84.90823345930534,
"balanced_acc": 83.57057256686144,
"per_class_f1": {
"0": 93.61,
"1": 80.1,
"2": 81.02
}
},
"future": {
"acc": 92.34074031923066,
"macro_f1": 91.88653762600303,
"balanced_acc": 91.41298489827267,
"per_class_f1": {
"0": 94.76,
"1": 87.26,
"2": 93.64
}
}
}
} |