File size: 869 Bytes
88d1266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
  "dataset": "wls04/prm-alfworld-gemini-mix:test",
  "n_examples_kept": 2144,
  "n_examples_skipped": 0,
  "n_steps": 35147,
  "label_scheme": "agent",
  "combine_mode": "concat",
  "results": {
    "combine": {
      "acc": 91.98224599539078,
      "macro_f1": 91.49491929565538,
      "balanced_acc": 91.1784596987989,
      "per_class_f1": {
        "0": 94.55,
        "1": 86.76,
        "2": 93.17
      }
    },
    "step": {
      "acc": 87.1482630096452,
      "macro_f1": 84.75697001493187,
      "balanced_acc": 83.84656212095378,
      "per_class_f1": {
        "0": 93.57,
        "1": 79.59,
        "2": 81.11
      }
    },
    "future": {
      "acc": 92.25253933479387,
      "macro_f1": 91.81233200670763,
      "balanced_acc": 91.65293217311738,
      "per_class_f1": {
        "0": 94.69,
        "1": 87.15,
        "2": 93.6
      }
    }
  }
}