File size: 870 Bytes
8890dbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
  "dataset": "wls04/prm-alfworld-gemini-mix:test",
  "n_examples_kept": 2144,
  "n_examples_skipped": 0,
  "n_steps": 35147,
  "label_scheme": "agent",
  "combine_mode": "concat",
  "results": {
    "combine": {
      "acc": 91.9708652232054,
      "macro_f1": 91.40890341554893,
      "balanced_acc": 90.74304346180321,
      "per_class_f1": {
        "0": 94.64,
        "1": 86.75,
        "2": 92.83
      }
    },
    "step": {
      "acc": 87.35311690898227,
      "macro_f1": 84.90823345930534,
      "balanced_acc": 83.57057256686144,
      "per_class_f1": {
        "0": 93.61,
        "1": 80.1,
        "2": 81.02
      }
    },
    "future": {
      "acc": 92.34074031923066,
      "macro_f1": 91.88653762600303,
      "balanced_acc": 91.41298489827267,
      "per_class_f1": {
        "0": 94.76,
        "1": 87.26,
        "2": 93.64
      }
    }
  }
}