WFJKK commited on
Commit
a4371e5
·
verified ·
1 Parent(s): 067d59d

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. bins.json +97 -96
  2. grpo.json +14 -14
  3. sft.json +15 -15
bins.json CHANGED
@@ -1,207 +1,208 @@
1
  {
2
  "easy": [
3
- 200,
4
- 206,
5
- 208,
6
- 211,
7
- 212,
8
  219,
9
  222,
10
- 224,
11
- 227,
12
- 230,
13
- 231,
14
  234,
15
- 238,
16
- 239,
17
- 242,
18
- 245,
19
- 247,
20
- 249,
21
- 252,
22
- 253,
23
- 255,
24
  257,
25
- 258,
26
- 259,
27
- 260,
28
- 261,
29
- 267,
30
- 268,
31
- 269,
 
32
  270,
33
- 271,
34
- 273,
35
- 274,
36
- 279,
37
- 280,
38
- 281,
39
- 282,
40
- 286,
41
- 287,
42
- 289,
43
  290,
44
- 292,
45
- 297,
46
- 306,
47
- 311,
48
- 315,
49
- 316,
50
- 318,
51
- 323,
52
- 324,
53
  328,
54
- 329,
55
- 331,
56
- 333,
57
- 334,
58
- 335,
59
- 336,
60
- 337,
61
- 342,
62
- 346,
63
- 348,
64
- 349,
65
- 351,
66
  352,
67
- 356,
68
- 357,
69
- 359,
70
- 361,
71
- 363,
72
- 364,
73
- 366,
74
- 367,
75
- 369,
76
- 370,
77
- 372,
78
- 375,
79
- 380,
80
- 381,
81
- 383,
82
- 385,
83
- 387,
84
- 388,
85
- 389,
86
- 391,
87
  392,
88
- 393,
89
- 394,
90
- 396,
91
  398
92
  ],
93
- "medium": [
 
94
  201,
95
  202,
96
  203,
97
- 204,
98
  205,
 
99
  207,
 
100
  209,
101
  210,
102
- 213,
 
103
  214,
104
  215,
105
  216,
106
  217,
107
  218,
108
  220,
109
- 221,
110
- 223,
111
  225,
112
  226,
113
- 228,
114
  229,
 
 
115
  232,
116
  233,
117
  235,
118
- 236,
119
  237,
 
 
120
  240,
121
  241,
 
122
  243,
123
  244,
 
124
  246,
 
125
  248,
 
126
  250,
127
  251,
128
- 254,
 
 
129
  256,
 
 
 
 
130
  262,
131
  263,
132
  264,
133
  265,
134
  266,
135
- 272,
 
 
 
 
 
136
  275,
137
  276,
138
  277,
139
  278,
 
 
 
 
140
  283,
141
- 284,
142
  285,
 
143
  288,
 
144
  291,
 
145
  293,
146
  294,
147
  295,
148
  296,
 
149
  298,
150
  299,
151
- 300,
152
  301,
153
  302,
154
  303,
155
  304,
156
  305,
 
157
  307,
158
- 308,
159
- 309,
160
  310,
 
161
  312,
162
  313,
163
  314,
 
 
164
  317,
165
- 319,
166
  320,
167
  321,
168
  322,
 
 
169
  325,
170
  326,
171
  327,
 
172
  330,
 
173
  332,
 
 
 
 
 
174
  338,
175
  339,
176
  340,
177
  341,
 
178
  343,
179
  344,
180
  345,
 
181
  347,
 
 
182
  350,
 
183
  353,
184
  354,
185
  355,
 
 
186
  358,
 
187
  360,
 
188
  362,
 
 
189
  365,
 
 
190
  368,
 
 
191
  371,
192
- 373,
193
  374,
 
194
  376,
195
  377,
196
  378,
197
  379,
 
 
198
  382,
 
199
  384,
 
200
  386,
 
 
 
201
  390,
 
 
 
202
  395,
 
203
  397,
204
  399
205
- ],
206
- "hard": []
207
  }
 
1
  {
2
  "easy": [
3
+ 213,
 
 
 
 
4
  219,
5
  222,
 
 
 
 
6
  234,
7
+ 236,
 
 
 
 
 
 
 
 
8
  257,
9
+ 286
10
+ ],
11
+ "medium": [
12
+ 204,
13
+ 221,
14
+ 223,
15
+ 228,
16
+ 254,
17
  270,
18
+ 272,
19
+ 284,
 
 
 
 
 
 
 
 
20
  290,
21
+ 300,
22
+ 308,
23
+ 309,
24
+ 319,
 
 
 
 
 
25
  328,
 
 
 
 
 
 
 
 
 
 
 
 
26
  352,
27
+ 373,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  392,
 
 
 
29
  398
30
  ],
31
+ "hard": [
32
+ 200,
33
  201,
34
  202,
35
  203,
 
36
  205,
37
+ 206,
38
  207,
39
+ 208,
40
  209,
41
  210,
42
+ 211,
43
+ 212,
44
  214,
45
  215,
46
  216,
47
  217,
48
  218,
49
  220,
50
+ 224,
 
51
  225,
52
  226,
53
+ 227,
54
  229,
55
+ 230,
56
+ 231,
57
  232,
58
  233,
59
  235,
 
60
  237,
61
+ 238,
62
+ 239,
63
  240,
64
  241,
65
+ 242,
66
  243,
67
  244,
68
+ 245,
69
  246,
70
+ 247,
71
  248,
72
+ 249,
73
  250,
74
  251,
75
+ 252,
76
+ 253,
77
+ 255,
78
  256,
79
+ 258,
80
+ 259,
81
+ 260,
82
+ 261,
83
  262,
84
  263,
85
  264,
86
  265,
87
  266,
88
+ 267,
89
+ 268,
90
+ 269,
91
+ 271,
92
+ 273,
93
+ 274,
94
  275,
95
  276,
96
  277,
97
  278,
98
+ 279,
99
+ 280,
100
+ 281,
101
+ 282,
102
  283,
 
103
  285,
104
+ 287,
105
  288,
106
+ 289,
107
  291,
108
+ 292,
109
  293,
110
  294,
111
  295,
112
  296,
113
+ 297,
114
  298,
115
  299,
 
116
  301,
117
  302,
118
  303,
119
  304,
120
  305,
121
+ 306,
122
  307,
 
 
123
  310,
124
+ 311,
125
  312,
126
  313,
127
  314,
128
+ 315,
129
+ 316,
130
  317,
131
+ 318,
132
  320,
133
  321,
134
  322,
135
+ 323,
136
+ 324,
137
  325,
138
  326,
139
  327,
140
+ 329,
141
  330,
142
+ 331,
143
  332,
144
+ 333,
145
+ 334,
146
+ 335,
147
+ 336,
148
+ 337,
149
  338,
150
  339,
151
  340,
152
  341,
153
+ 342,
154
  343,
155
  344,
156
  345,
157
+ 346,
158
  347,
159
+ 348,
160
+ 349,
161
  350,
162
+ 351,
163
  353,
164
  354,
165
  355,
166
+ 356,
167
+ 357,
168
  358,
169
+ 359,
170
  360,
171
+ 361,
172
  362,
173
+ 363,
174
+ 364,
175
  365,
176
+ 366,
177
+ 367,
178
  368,
179
+ 369,
180
+ 370,
181
  371,
182
+ 372,
183
  374,
184
+ 375,
185
  376,
186
  377,
187
  378,
188
  379,
189
+ 380,
190
+ 381,
191
  382,
192
+ 383,
193
  384,
194
+ 385,
195
  386,
196
+ 387,
197
+ 388,
198
+ 389,
199
  390,
200
+ 391,
201
+ 393,
202
+ 394,
203
  395,
204
+ 396,
205
  397,
206
  399
207
+ ]
 
208
  }
grpo.json CHANGED
@@ -1,30 +1,30 @@
1
  {
2
  "L1": {
3
- "correct": 27,
4
- "parsed": 49,
5
  "total": 50,
6
- "mae": 2.01,
7
- "accuracy": 55.1
8
  },
9
  "L2": {
10
- "correct": 20,
11
- "parsed": 40,
12
  "total": 50,
13
- "mae": 2.33,
14
  "accuracy": 50.0
15
  },
16
  "L3": {
17
- "correct": 13,
18
- "parsed": 24,
19
  "total": 50,
20
- "mae": 3.25,
21
- "accuracy": 54.2
22
  },
23
  "L4": {
24
  "correct": 15,
25
- "parsed": 26,
26
  "total": 50,
27
- "mae": 2.23,
28
- "accuracy": 57.7
29
  }
30
  }
 
1
  {
2
  "L1": {
3
+ "correct": 21,
4
+ "parsed": 50,
5
  "total": 50,
6
+ "mae": 2.67,
7
+ "accuracy": 42.0
8
  },
9
  "L2": {
10
+ "correct": 24,
11
+ "parsed": 48,
12
  "total": 50,
13
+ "mae": 4.41,
14
  "accuracy": 50.0
15
  },
16
  "L3": {
17
+ "correct": 20,
18
+ "parsed": 45,
19
  "total": 50,
20
+ "mae": 4.44,
21
+ "accuracy": 44.4
22
  },
23
  "L4": {
24
  "correct": 15,
25
+ "parsed": 45,
26
  "total": 50,
27
+ "mae": 5.12,
28
+ "accuracy": 33.3
29
  }
30
  }
sft.json CHANGED
@@ -1,30 +1,30 @@
1
  {
2
  "L1": {
3
- "correct": 24,
4
  "parsed": 50,
5
  "total": 50,
6
- "mae": 2.18,
7
- "accuracy": 48.0
8
  },
9
  "L2": {
10
- "correct": 21,
11
- "parsed": 50,
12
  "total": 50,
13
- "mae": 2.37,
14
- "accuracy": 42.0
15
  },
16
  "L3": {
17
- "correct": 22,
18
- "parsed": 50,
19
  "total": 50,
20
- "mae": 5.19,
21
- "accuracy": 44.0
22
  },
23
  "L4": {
24
- "correct": 18,
25
- "parsed": 50,
26
  "total": 50,
27
- "mae": 7.14,
28
- "accuracy": 36.0
29
  }
30
  }
 
1
  {
2
  "L1": {
3
+ "correct": 26,
4
  "parsed": 50,
5
  "total": 50,
6
+ "mae": 2.48,
7
+ "accuracy": 52.0
8
  },
9
  "L2": {
10
+ "correct": 23,
11
+ "parsed": 48,
12
  "total": 50,
13
+ "mae": 4.18,
14
+ "accuracy": 47.9
15
  },
16
  "L3": {
17
+ "correct": 21,
18
+ "parsed": 46,
19
  "total": 50,
20
+ "mae": 10.2,
21
+ "accuracy": 45.7
22
  },
23
  "L4": {
24
+ "correct": 13,
25
+ "parsed": 45,
26
  "total": 50,
27
+ "mae": 7.41,
28
+ "accuracy": 28.9
29
  }
30
  }