File size: 874 Bytes
b838ce0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
  "dataset": "wls04/prm-scienceworld-gemini-mix:test",
  "n_examples_kept": 3200,
  "n_examples_skipped": 0,
  "n_steps": 50711,
  "label_scheme": "agent",
  "combine_mode": "concat",
  "results": {
    "combine": {
      "acc": 86.98310031354144,
      "macro_f1": 84.01496913424012,
      "balanced_acc": 84.01278475222438,
      "per_class_f1": {
        "0": 94.0,
        "1": 74.62,
        "2": 83.42
      }
    },
    "step": {
      "acc": 84.9835341444657,
      "macro_f1": 81.60372519964524,
      "balanced_acc": 81.47710783650096,
      "per_class_f1": {
        "0": 92.74,
        "1": 71.36,
        "2": 80.71
      }
    },
    "future": {
      "acc": 87.21776340438959,
      "macro_f1": 84.30195984740317,
      "balanced_acc": 84.34468096714211,
      "per_class_f1": {
        "0": 94.15,
        "1": 75.03,
        "2": 83.72
      }
    }
  }
}