iamshlomo commited on
Commit
a76bfce
·
verified ·
1 Parent(s): 1ce60de

Upload generalization/20260316_1009/checkpoint_eval_iter8000.json with huggingface_hub

Browse files
generalization/20260316_1009/checkpoint_eval_iter8000.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "iteration": 8000,
3
+ "date": "2026-03-17",
4
+ "timestamp": "2026-03-17T18:48:13.526538",
5
+ "training": {
6
+ "lr": 3e-05,
7
+ "batch_size": 256,
8
+ "aux_loss_weight": 0.5,
9
+ "buffer_size": 10000,
10
+ "buffer_capacity": 10000,
11
+ "ema_decay": 0.999,
12
+ "recent_loss_total": 0.0040723668720352,
13
+ "recent_loss_diffusion": 0.0039799308213061884,
14
+ "recent_loss_aux": 0.00018487209995189912,
15
+ "grad_steps_per_iteration": 50,
16
+ "episodes_per_iteration": 10
17
+ },
18
+ "eval_episodes_per_env": 20,
19
+ "id_winrate": 0.7250000000000001,
20
+ "ood_winrate": 0.13333333333333333,
21
+ "per_env": {
22
+ "MiniHack-Room-Random-5x5-v0": {
23
+ "win_rate": 1.0,
24
+ "avg_reward": 22.356499999999997,
25
+ "avg_steps": 4.85,
26
+ "wins": 20,
27
+ "episodes": 20
28
+ },
29
+ "MiniHack-Room-Random-15x15-v0": {
30
+ "win_rate": 0.9,
31
+ "avg_reward": 22.319499999999998,
32
+ "avg_steps": 33.45,
33
+ "wins": 18,
34
+ "episodes": 20
35
+ },
36
+ "MiniHack-Corridor-R2-v0": {
37
+ "win_rate": 0.55,
38
+ "avg_reward": 10.869499999999999,
39
+ "avg_steps": 98.25,
40
+ "wins": 11,
41
+ "episodes": 20
42
+ },
43
+ "MiniHack-MazeWalk-9x9-v0": {
44
+ "win_rate": 0.45,
45
+ "avg_reward": 8.5085,
46
+ "avg_steps": 112.2,
47
+ "wins": 9,
48
+ "episodes": 20
49
+ },
50
+ "MiniHack-Room-Dark-15x15-v0": {
51
+ "win_rate": 0.3,
52
+ "avg_reward": 5.197499999999999,
53
+ "avg_steps": 141.55,
54
+ "wins": 6,
55
+ "episodes": 20
56
+ },
57
+ "MiniHack-Corridor-R5-v0": {
58
+ "win_rate": 0.05,
59
+ "avg_reward": -1.515000000000001,
60
+ "avg_steps": 192.3,
61
+ "wins": 1,
62
+ "episodes": 20
63
+ },
64
+ "MiniHack-MazeWalk-45x19-v0": {
65
+ "win_rate": 0.05,
66
+ "avg_reward": -1.2150000000000012,
67
+ "avg_steps": 190.35,
68
+ "wins": 1,
69
+ "episodes": 20
70
+ }
71
+ }
72
+ }