iamshlomo commited on
Commit
e71a470
·
verified ·
1 Parent(s): 7c070a3

Upload generalization/20260322_2044/rl_finetuning_results.json with huggingface_hub

Browse files
generalization/20260322_2044/rl_finetuning_results.json ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_name": "RL_FINETUNING_main",
3
+ "mode": "main",
4
+ "load_pretrained": true,
5
+ "id_envs": [
6
+ "MiniHack-Room-Random-5x5-v0",
7
+ "MiniHack-Room-Random-15x15-v0",
8
+ "MiniHack-Corridor-R2-v0",
9
+ "MiniHack-MazeWalk-9x9-v0"
10
+ ],
11
+ "ood_envs": [
12
+ "MiniHack-Room-Dark-15x15-v0",
13
+ "MiniHack-Corridor-R5-v0",
14
+ "MiniHack-MazeWalk-45x19-v0"
15
+ ],
16
+ "plan_horizon": 10,
17
+ "replan_every": 5,
18
+ "return_weight_cap": 5.0,
19
+ "baseline_id_winrate": 0.55,
20
+ "baseline_ood_winrate": 0.03333333333333333,
21
+ "post_id_winrate": 0.05,
22
+ "post_ood_winrate": 0.0,
23
+ "id_delta": -0.5,
24
+ "ood_delta": -0.03333333333333333,
25
+ "per_env_id": {
26
+ "MiniHack-Room-Random-5x5-v0": 0.1,
27
+ "MiniHack-Room-Random-15x15-v0": 0.0,
28
+ "MiniHack-Corridor-R2-v0": 0.1,
29
+ "MiniHack-MazeWalk-9x9-v0": 0.0
30
+ },
31
+ "per_env_ood": {
32
+ "MiniHack-Room-Dark-15x15-v0": 0.0,
33
+ "MiniHack-Corridor-R5-v0": 0.0,
34
+ "MiniHack-MazeWalk-45x19-v0": 0.0
35
+ },
36
+ "history": {
37
+ "iter": [
38
+ 10,
39
+ 20,
40
+ 30,
41
+ 40,
42
+ 50,
43
+ 60,
44
+ 70,
45
+ 80,
46
+ 90,
47
+ 100,
48
+ 110,
49
+ 120,
50
+ 130,
51
+ 140,
52
+ 150,
53
+ 160,
54
+ 170,
55
+ 180,
56
+ 190,
57
+ 200,
58
+ 210,
59
+ 220,
60
+ 230,
61
+ 240,
62
+ 250,
63
+ 260,
64
+ 270,
65
+ 280,
66
+ 290,
67
+ 300,
68
+ 310,
69
+ 320,
70
+ 330,
71
+ 340,
72
+ 350,
73
+ 360,
74
+ 370,
75
+ 380,
76
+ 390,
77
+ 400,
78
+ 410,
79
+ 420,
80
+ 430,
81
+ 440,
82
+ 450,
83
+ 460,
84
+ 470,
85
+ 480,
86
+ 490,
87
+ 500
88
+ ],
89
+ "loss": [
90
+ 1.2032866448163986,
91
+ 0.42301379442214965,
92
+ 0.25104237273335456,
93
+ 0.4591872103512287,
94
+ 0.3000399604439735,
95
+ 0.2877576783299446,
96
+ 0.2404518574476242,
97
+ 0.16835472509264945,
98
+ 0.1067834474146366,
99
+ 0.10583478137850762,
100
+ 0.13617852628231047,
101
+ 0.08201947771012782,
102
+ 0.22633123211562634,
103
+ 0.15388962142169477,
104
+ 0.115889573097229,
105
+ 0.17265327833592892,
106
+ 0.12279366813600064,
107
+ 0.06576005928218365,
108
+ 0.052609814889729024,
109
+ 0.0783039540052414,
110
+ 0.06503458386287093,
111
+ 0.11609100289642811,
112
+ 0.08598651923239231,
113
+ 0.10116784311830998,
114
+ 0.04119649361819029,
115
+ 0.0452251210808754,
116
+ 0.07588116154074669,
117
+ 0.09552744701504708,
118
+ 0.11893859058618546,
119
+ 0.07164406739175319,
120
+ 0.06759938597679138,
121
+ 0.0400170442648232,
122
+ 0.06673767920583487,
123
+ 0.030414086207747458,
124
+ 0.027739189565181732,
125
+ 0.0379154859110713,
126
+ 0.039283078908920285,
127
+ 0.04756462974473834,
128
+ 0.027928036637604237,
129
+ 0.03196718501858413,
130
+ 0.02132293554022908,
131
+ 0.01878543649800122,
132
+ 0.012108159833587706,
133
+ 0.014968549855984748,
134
+ 0.03575099157169461,
135
+ 0.018430603388696908,
136
+ 0.020507937553338705,
137
+ 0.014151530805975199,
138
+ 0.042459844169206916,
139
+ 0.011231989855878055
140
+ ],
141
+ "buffer_size": [
142
+ 272,
143
+ 622,
144
+ 952,
145
+ 1263,
146
+ 1574,
147
+ 1885,
148
+ 2215,
149
+ 2565,
150
+ 2877,
151
+ 3227,
152
+ 3538,
153
+ 3888,
154
+ 4179,
155
+ 4490,
156
+ 4820,
157
+ 5170,
158
+ 5500,
159
+ 5850,
160
+ 6180,
161
+ 6530,
162
+ 6860,
163
+ 7171,
164
+ 7501,
165
+ 7812,
166
+ 8142,
167
+ 8492,
168
+ 8822,
169
+ 9172,
170
+ 9483,
171
+ 9833,
172
+ 10000,
173
+ 10000,
174
+ 10000,
175
+ 10000,
176
+ 10000,
177
+ 10000,
178
+ 10000,
179
+ 10000,
180
+ 10000,
181
+ 10000,
182
+ 10000,
183
+ 10000,
184
+ 10000,
185
+ 10000,
186
+ 10000,
187
+ 10000,
188
+ 10000,
189
+ 10000,
190
+ 10000,
191
+ 10000
192
+ ],
193
+ "episodes_added": [],
194
+ "mean_return": [
195
+ 9.02900000000001,
196
+ -3.340999999999995,
197
+ -2.6769999999999956,
198
+ -0.32000000000000034,
199
+ -2.0869999999999833,
200
+ -1.4799999999999944,
201
+ -3.807999999999992,
202
+ -3.9049999999999963,
203
+ -1.6699999999999886,
204
+ -3.3149999999999933,
205
+ -0.9819999999999969,
206
+ -3.767999999999989,
207
+ -0.42399999999999843,
208
+ -0.9370000000000008,
209
+ -3.192000000000003,
210
+ -3.901999999999995,
211
+ -3.8919999999999937,
212
+ -3.414,
213
+ -3.558999999999995,
214
+ -3.6879999999999975,
215
+ -3.646999999999996,
216
+ -1.3499999999999912,
217
+ -3.8949999999999863,
218
+ -1.6409999999999898,
219
+ -4.051999999999985,
220
+ -3.6239999999999974,
221
+ -2.8239999999999865,
222
+ -3.769999999999997,
223
+ -1.8269999999999904,
224
+ -3.3080000000000025,
225
+ -3.9199999999999933,
226
+ -1.3249999999999968,
227
+ -4.51599999999998,
228
+ 1.686000000000003,
229
+ -3.4939999999999976,
230
+ -4.145999999999988,
231
+ -3.345999999999994,
232
+ -4.39999999999999,
233
+ -3.011999999999995,
234
+ -3.4879999999999938,
235
+ -1.5469999999999855,
236
+ -3.759999999999997,
237
+ -3.9379999999999895,
238
+ 1.3060000000000025,
239
+ -1.1889999999999887,
240
+ -0.6930000000000016,
241
+ -3.699999999999992,
242
+ -0.3769999999999937,
243
+ -1.1509999999999976,
244
+ -3.7639999999999985
245
+ ],
246
+ "win_rate_episode": [
247
+ 0.5,
248
+ 0.0,
249
+ 0.0,
250
+ 0.1,
251
+ 0.1,
252
+ 0.1,
253
+ 0.0,
254
+ 0.0,
255
+ 0.1,
256
+ 0.0,
257
+ 0.1,
258
+ 0.0,
259
+ 0.1,
260
+ 0.1,
261
+ 0.0,
262
+ 0.0,
263
+ 0.0,
264
+ 0.0,
265
+ 0.0,
266
+ 0.0,
267
+ 0.0,
268
+ 0.1,
269
+ 0.0,
270
+ 0.1,
271
+ 0.0,
272
+ 0.0,
273
+ 0.0,
274
+ 0.0,
275
+ 0.1,
276
+ 0.0,
277
+ 0.0,
278
+ 0.1,
279
+ 0.0,
280
+ 0.2,
281
+ 0.0,
282
+ 0.0,
283
+ 0.0,
284
+ 0.0,
285
+ 0.0,
286
+ 0.0,
287
+ 0.1,
288
+ 0.0,
289
+ 0.0,
290
+ 0.2,
291
+ 0.1,
292
+ 0.1,
293
+ 0.0,
294
+ 0.1,
295
+ 0.1,
296
+ 0.0
297
+ ],
298
+ "id_winrate": [
299
+ 0.025,
300
+ 0.025
301
+ ],
302
+ "ood_winrate": [
303
+ 0.0
304
+ ],
305
+ "id_winrate_iter": [
306
+ 250,
307
+ 500
308
+ ],
309
+ "ood_winrate_iter": [
310
+ 500
311
+ ]
312
+ }
313
+ }