File size: 869 Bytes
88d1266 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | {
"dataset": "wls04/prm-alfworld-gemini-mix:test",
"n_examples_kept": 2144,
"n_examples_skipped": 0,
"n_steps": 35147,
"label_scheme": "agent",
"combine_mode": "concat",
"results": {
"combine": {
"acc": 91.98224599539078,
"macro_f1": 91.49491929565538,
"balanced_acc": 91.1784596987989,
"per_class_f1": {
"0": 94.55,
"1": 86.76,
"2": 93.17
}
},
"step": {
"acc": 87.1482630096452,
"macro_f1": 84.75697001493187,
"balanced_acc": 83.84656212095378,
"per_class_f1": {
"0": 93.57,
"1": 79.59,
"2": 81.11
}
},
"future": {
"acc": 92.25253933479387,
"macro_f1": 91.81233200670763,
"balanced_acc": 91.65293217311738,
"per_class_f1": {
"0": 94.69,
"1": 87.15,
"2": 93.6
}
}
}
} |