iamshlomo commited on
Commit
d6c7b83
·
verified ·
1 Parent(s): 308725c

Upload generalization/20260322_2203/rl_finetuning_results.json with huggingface_hub

Browse files
generalization/20260322_2203/rl_finetuning_results.json ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_name": "RL_FINETUNING_main",
3
+ "mode": "main",
4
+ "load_pretrained": true,
5
+ "id_envs": [
6
+ "MiniHack-Room-Random-5x5-v0",
7
+ "MiniHack-Room-Random-15x15-v0",
8
+ "MiniHack-Corridor-R2-v0",
9
+ "MiniHack-MazeWalk-9x9-v0"
10
+ ],
11
+ "ood_envs": [
12
+ "MiniHack-Room-Dark-15x15-v0",
13
+ "MiniHack-Corridor-R5-v0",
14
+ "MiniHack-MazeWalk-45x19-v0"
15
+ ],
16
+ "plan_horizon": 10,
17
+ "replan_every": 5,
18
+ "return_weight_cap": 5.0,
19
+ "baseline_id_winrate": 0.5025,
20
+ "baseline_ood_winrate": 0.06666666666666667,
21
+ "post_id_winrate": 0.0475,
22
+ "post_ood_winrate": 0.01,
23
+ "id_delta": -0.45499999999999996,
24
+ "ood_delta": -0.056666666666666664,
25
+ "per_env_id": {
26
+ "MiniHack-Room-Random-5x5-v0": 0.07,
27
+ "MiniHack-Room-Random-15x15-v0": 0.02,
28
+ "MiniHack-Corridor-R2-v0": 0.03,
29
+ "MiniHack-MazeWalk-9x9-v0": 0.07
30
+ },
31
+ "per_env_ood": {
32
+ "MiniHack-Room-Dark-15x15-v0": 0.02,
33
+ "MiniHack-Corridor-R5-v0": 0.0,
34
+ "MiniHack-MazeWalk-45x19-v0": 0.01
35
+ },
36
+ "history": {
37
+ "iter": [
38
+ 10,
39
+ 20,
40
+ 30,
41
+ 40,
42
+ 50,
43
+ 60,
44
+ 70,
45
+ 80,
46
+ 90,
47
+ 100,
48
+ 110,
49
+ 120,
50
+ 130,
51
+ 140,
52
+ 150,
53
+ 160,
54
+ 170,
55
+ 180,
56
+ 190,
57
+ 200,
58
+ 210,
59
+ 220,
60
+ 230,
61
+ 240,
62
+ 250,
63
+ 260,
64
+ 270,
65
+ 280,
66
+ 290,
67
+ 300,
68
+ 310,
69
+ 320,
70
+ 330,
71
+ 340,
72
+ 350,
73
+ 360,
74
+ 370,
75
+ 380,
76
+ 390,
77
+ 400,
78
+ 410,
79
+ 420,
80
+ 430,
81
+ 440,
82
+ 450,
83
+ 460,
84
+ 470,
85
+ 480,
86
+ 490,
87
+ 500
88
+ ],
89
+ "loss": [
90
+ 0.46080999821424484,
91
+ 0.15714064557105303,
92
+ 0.0555162894539535,
93
+ 0.3711315993219614,
94
+ 0.36938854344189165,
95
+ 0.09161516968160868,
96
+ 0.09922296348959207,
97
+ 0.0681916169822216,
98
+ 0.08011019751429557,
99
+ 0.05443341545760631,
100
+ 0.029739007260650395,
101
+ 0.031307310331612825,
102
+ 0.04651444014161825,
103
+ 0.07116823364049196,
104
+ 0.12269484288990498,
105
+ 0.09633960779756308,
106
+ 0.08664808757603168,
107
+ 0.061644272319972514,
108
+ 0.11547831278294325,
109
+ 0.06019139308482409,
110
+ 0.037349676713347435,
111
+ 0.055236862413585185,
112
+ 0.03318943716585636,
113
+ 0.0621327655389905,
114
+ 0.0740554541349411,
115
+ 0.09577131671831011,
116
+ 0.057580916304141284,
117
+ 0.10360853262245655,
118
+ 0.027896188013255597,
119
+ 0.13837355710566043,
120
+ 0.06162200951948762,
121
+ 0.06308978516608477,
122
+ 0.0307971753180027,
123
+ 0.03593000434339046,
124
+ 0.028360088635236026,
125
+ 0.02000439059920609,
126
+ 0.08396465806290507,
127
+ 0.017000191006809474,
128
+ 0.10887408684939145,
129
+ 0.02700605634599924,
130
+ 0.058951123151928186,
131
+ 0.06554037686437368,
132
+ 0.14065809603780507,
133
+ 0.07174393218010663,
134
+ 0.18371206372976304,
135
+ 0.054573338106274606,
136
+ 0.050333873555064204,
137
+ 0.03094298713840544,
138
+ 0.02681313958019018,
139
+ 0.037661240249872205
140
+ ],
141
+ "buffer_size": [
142
+ 275,
143
+ 587,
144
+ 879,
145
+ 1171,
146
+ 1444,
147
+ 1716,
148
+ 2046,
149
+ 2396,
150
+ 2648,
151
+ 2940,
152
+ 3154,
153
+ 3465,
154
+ 3795,
155
+ 4126,
156
+ 4456,
157
+ 4806,
158
+ 5136,
159
+ 5467,
160
+ 5778,
161
+ 6128,
162
+ 6439,
163
+ 6711,
164
+ 7022,
165
+ 7372,
166
+ 7702,
167
+ 8052,
168
+ 8324,
169
+ 8616,
170
+ 8946,
171
+ 9296,
172
+ 9626,
173
+ 9976,
174
+ 10000,
175
+ 10000,
176
+ 10000,
177
+ 10000,
178
+ 10000,
179
+ 10000,
180
+ 10000,
181
+ 10000,
182
+ 10000,
183
+ 10000,
184
+ 10000,
185
+ 10000,
186
+ 10000,
187
+ 10000,
188
+ 10000,
189
+ 10000,
190
+ 10000,
191
+ 10000
192
+ ],
193
+ "episodes_added": [],
194
+ "mean_return": [
195
+ 5.233,
196
+ 1.5950000000000089,
197
+ 1.849000000000008,
198
+ 2.0080000000000053,
199
+ 1.9760000000000058,
200
+ 1.6759999999999984,
201
+ -3.0259999999999985,
202
+ -3.7489999999999943,
203
+ 1.5340000000000134,
204
+ 1.0650000000000102,
205
+ 6.6120000000000045,
206
+ -0.17700000000000207,
207
+ -3.2069999999999927,
208
+ -0.9639999999999972,
209
+ -3.8789999999999893,
210
+ -3.325999999999999,
211
+ -3.681999999999993,
212
+ -2.00799999999999,
213
+ -1.1769999999999878,
214
+ -3.781999999999992,
215
+ -2.206999999999978,
216
+ 1.0560000000000096,
217
+ -0.6469999999999947,
218
+ -3.469,
219
+ -3.82199999999999,
220
+ -4.227999999999987,
221
+ 0.6960000000000075,
222
+ 1.894000000000008,
223
+ -4.415999999999986,
224
+ -3.965999999999993,
225
+ -5.24399999999998,
226
+ -3.9759999999999898,
227
+ -1.2559999999999942,
228
+ -1.270999999999992,
229
+ 1.1900000000000035,
230
+ -1.273999999999994,
231
+ -1.2359999999999909,
232
+ -3.6720000000000006,
233
+ -2.769999999999997,
234
+ -2.970000000000003,
235
+ -2.3719999999999946,
236
+ -1.4329999999999938,
237
+ -3.1499999999999977,
238
+ 2.184000000000002,
239
+ 0.2050000000000189,
240
+ -0.49200000000000194,
241
+ -3.1159999999999948,
242
+ -1.1769999999999965,
243
+ -2.628,
244
+ -3.9549999999999947
245
+ ],
246
+ "win_rate_episode": [
247
+ 0.3,
248
+ 0.2,
249
+ 0.2,
250
+ 0.2,
251
+ 0.2,
252
+ 0.2,
253
+ 0.0,
254
+ 0.0,
255
+ 0.2,
256
+ 0.2,
257
+ 0.4,
258
+ 0.1,
259
+ 0.0,
260
+ 0.1,
261
+ 0.0,
262
+ 0.0,
263
+ 0.0,
264
+ 0.1,
265
+ 0.1,
266
+ 0.0,
267
+ 0.1,
268
+ 0.2,
269
+ 0.1,
270
+ 0.0,
271
+ 0.0,
272
+ 0.0,
273
+ 0.2,
274
+ 0.2,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.1,
280
+ 0.1,
281
+ 0.2,
282
+ 0.1,
283
+ 0.1,
284
+ 0.0,
285
+ 0.0,
286
+ 0.0,
287
+ 0.0,
288
+ 0.1,
289
+ 0.0,
290
+ 0.2,
291
+ 0.2,
292
+ 0.1,
293
+ 0.0,
294
+ 0.1,
295
+ 0.0,
296
+ 0.0
297
+ ],
298
+ "id_winrate": [
299
+ 0.0325,
300
+ 0.0675
301
+ ],
302
+ "ood_winrate": [
303
+ 0.013333333333333334
304
+ ],
305
+ "id_winrate_iter": [
306
+ 250,
307
+ 500
308
+ ],
309
+ "ood_winrate_iter": [
310
+ 500
311
+ ]
312
+ }
313
+ }