annasoli commited on
Commit
628d28d
·
verified ·
1 Parent(s): 608eb00

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoints/checkpoint-10/rng_state.pth +3 -0
  2. checkpoints/checkpoint-10/steering_vector.pt +3 -0
  3. checkpoints/checkpoint-10/trainer_state.json +134 -0
  4. checkpoints/checkpoint-10/training_args.bin +3 -0
  5. checkpoints/checkpoint-100/rng_state.pth +3 -0
  6. checkpoints/checkpoint-100/steering_vector.pt +3 -0
  7. checkpoints/checkpoint-100/trainer_state.json +1042 -0
  8. checkpoints/checkpoint-100/training_args.bin +3 -0
  9. checkpoints/checkpoint-105/rng_state.pth +3 -0
  10. checkpoints/checkpoint-105/steering_vector.pt +3 -0
  11. checkpoints/checkpoint-105/trainer_state.json +1092 -0
  12. checkpoints/checkpoint-105/training_args.bin +3 -0
  13. checkpoints/checkpoint-110/rng_state.pth +3 -0
  14. checkpoints/checkpoint-110/steering_vector.pt +3 -0
  15. checkpoints/checkpoint-110/trainer_state.json +1142 -0
  16. checkpoints/checkpoint-110/training_args.bin +3 -0
  17. checkpoints/checkpoint-115/rng_state.pth +3 -0
  18. checkpoints/checkpoint-115/steering_vector.pt +3 -0
  19. checkpoints/checkpoint-115/trainer_state.json +1192 -0
  20. checkpoints/checkpoint-115/training_args.bin +3 -0
  21. checkpoints/checkpoint-120/rng_state.pth +3 -0
  22. checkpoints/checkpoint-120/steering_vector.pt +3 -0
  23. checkpoints/checkpoint-120/trainer_state.json +1242 -0
  24. checkpoints/checkpoint-120/training_args.bin +3 -0
  25. checkpoints/checkpoint-125/rng_state.pth +3 -0
  26. checkpoints/checkpoint-125/steering_vector.pt +3 -0
  27. checkpoints/checkpoint-125/trainer_state.json +1292 -0
  28. checkpoints/checkpoint-125/training_args.bin +3 -0
  29. checkpoints/checkpoint-130/rng_state.pth +3 -0
  30. checkpoints/checkpoint-130/steering_vector.pt +3 -0
  31. checkpoints/checkpoint-130/trainer_state.json +1342 -0
  32. checkpoints/checkpoint-130/training_args.bin +3 -0
  33. checkpoints/checkpoint-135/rng_state.pth +3 -0
  34. checkpoints/checkpoint-135/steering_vector.pt +3 -0
  35. checkpoints/checkpoint-135/trainer_state.json +1392 -0
  36. checkpoints/checkpoint-135/training_args.bin +3 -0
  37. checkpoints/checkpoint-140/rng_state.pth +3 -0
  38. checkpoints/checkpoint-140/steering_vector.pt +3 -0
  39. checkpoints/checkpoint-140/trainer_state.json +1442 -0
  40. checkpoints/checkpoint-140/training_args.bin +3 -0
  41. checkpoints/checkpoint-145/rng_state.pth +3 -0
  42. checkpoints/checkpoint-145/steering_vector.pt +3 -0
  43. checkpoints/checkpoint-145/trainer_state.json +1492 -0
  44. checkpoints/checkpoint-145/training_args.bin +3 -0
  45. checkpoints/checkpoint-15/rng_state.pth +3 -0
  46. checkpoints/checkpoint-15/steering_vector.pt +3 -0
  47. checkpoints/checkpoint-15/trainer_state.json +184 -0
  48. checkpoints/checkpoint-15/training_args.bin +3 -0
  49. checkpoints/checkpoint-150/rng_state.pth +3 -0
  50. checkpoints/checkpoint-150/steering_vector.pt +3 -0
checkpoints/checkpoint-10/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1a63186e1831fee81225bf6cd47d27992b873c25909a295f22f84e99b453aa2
3
+ size 6901
checkpoints/checkpoint-10/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c1785efdda13c338b7a82613d63ff65cbe4fa0b178cee2fa44241feafe444f
3
+ size 22241
checkpoints/checkpoint-10/trainer_state.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.025220680958385876,
6
+ "eval_steps": 100,
7
+ "global_step": 10,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ }
112
+ ],
113
+ "logging_steps": 1,
114
+ "max_steps": 794,
115
+ "num_input_tokens_seen": 0,
116
+ "num_train_epochs": 2,
117
+ "save_steps": 5,
118
+ "stateful_callbacks": {
119
+ "TrainerControl": {
120
+ "args": {
121
+ "should_epoch_stop": false,
122
+ "should_evaluate": false,
123
+ "should_log": false,
124
+ "should_save": false,
125
+ "should_training_stop": false
126
+ },
127
+ "attributes": {}
128
+ }
129
+ },
130
+ "total_flos": 3438280704000000.0,
131
+ "train_batch_size": 2,
132
+ "trial_name": null,
133
+ "trial_params": null
134
+ }
checkpoints/checkpoint-10/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42eaa72eb49da412ce263a632b0d990a3ce580b1a0b52f193cbbf5895f9265d0
3
+ size 6901
checkpoints/checkpoint-100/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f1c72859bfd218e2621d1ba24c8f846c87fd1f9ece1ccabd8918baae96de6d
3
+ size 22241
checkpoints/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,1042 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.25220680958385877,
6
+ "eval_steps": 100,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ }
1020
+ ],
1021
+ "logging_steps": 1,
1022
+ "max_steps": 794,
1023
+ "num_input_tokens_seen": 0,
1024
+ "num_train_epochs": 2,
1025
+ "save_steps": 5,
1026
+ "stateful_callbacks": {
1027
+ "TrainerControl": {
1028
+ "args": {
1029
+ "should_epoch_stop": false,
1030
+ "should_evaluate": false,
1031
+ "should_log": false,
1032
+ "should_save": false,
1033
+ "should_training_stop": false
1034
+ },
1035
+ "attributes": {}
1036
+ }
1037
+ },
1038
+ "total_flos": 3.438280704e+16,
1039
+ "train_batch_size": 2,
1040
+ "trial_name": null,
1041
+ "trial_params": null
1042
+ }
checkpoints/checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-105/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515290160273577b95c101a125302ab6be637eecabd3ef07fe5ab29e36af7a96
3
+ size 6901
checkpoints/checkpoint-105/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40ee95207c7dd4709ed28afd3490a09ac789742f72fed33752730c41f611366e
3
+ size 22241
checkpoints/checkpoint-105/trainer_state.json ADDED
@@ -0,0 +1,1092 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.2648171500630517,
6
+ "eval_steps": 100,
7
+ "global_step": 105,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ }
1070
+ ],
1071
+ "logging_steps": 1,
1072
+ "max_steps": 794,
1073
+ "num_input_tokens_seen": 0,
1074
+ "num_train_epochs": 2,
1075
+ "save_steps": 5,
1076
+ "stateful_callbacks": {
1077
+ "TrainerControl": {
1078
+ "args": {
1079
+ "should_epoch_stop": false,
1080
+ "should_evaluate": false,
1081
+ "should_log": false,
1082
+ "should_save": false,
1083
+ "should_training_stop": false
1084
+ },
1085
+ "attributes": {}
1086
+ }
1087
+ },
1088
+ "total_flos": 3.6101947392e+16,
1089
+ "train_batch_size": 2,
1090
+ "trial_name": null,
1091
+ "trial_params": null
1092
+ }
checkpoints/checkpoint-105/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-110/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515290160273577b95c101a125302ab6be637eecabd3ef07fe5ab29e36af7a96
3
+ size 6901
checkpoints/checkpoint-110/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0927fc7c9be51a3f6f28d540fa73d010f0f7f2fa950b5b84d9b87b62883a483a
3
+ size 22241
checkpoints/checkpoint-110/trainer_state.json ADDED
@@ -0,0 +1,1142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.27742749054224464,
6
+ "eval_steps": 100,
7
+ "global_step": 110,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ },
1070
+ {
1071
+ "epoch": 0.2673392181588903,
1072
+ "grad_norm": 50.12539291381836,
1073
+ "kl_loss": 1.2759885237301205e-07,
1074
+ "kl_weight": 1000000.0,
1075
+ "learning_rate": 0.00043662864385297847,
1076
+ "loss": 2.3349037170410156,
1077
+ "step": 106,
1078
+ "total_loss": 2.4625024795532227
1079
+ },
1080
+ {
1081
+ "epoch": 0.2698612862547289,
1082
+ "grad_norm": 58.65528106689453,
1083
+ "kl_loss": 6.978748245956012e-08,
1084
+ "kl_weight": 1000000.0,
1085
+ "learning_rate": 0.00043599493029150826,
1086
+ "loss": 2.520481586456299,
1087
+ "step": 107,
1088
+ "total_loss": 2.590269088745117
1089
+ },
1090
+ {
1091
+ "epoch": 0.2723833543505675,
1092
+ "grad_norm": 114.07057189941406,
1093
+ "kl_loss": 2.180455993538999e-07,
1094
+ "kl_weight": 1000000.0,
1095
+ "learning_rate": 0.00043536121673003804,
1096
+ "loss": 2.2573392391204834,
1097
+ "step": 108,
1098
+ "total_loss": 2.4753847122192383
1099
+ },
1100
+ {
1101
+ "epoch": 0.27490542244640603,
1102
+ "grad_norm": 42.89162063598633,
1103
+ "kl_loss": 1.3848097069057985e-07,
1104
+ "kl_weight": 1000000.0,
1105
+ "learning_rate": 0.0004347275031685678,
1106
+ "loss": 2.557804822921753,
1107
+ "step": 109,
1108
+ "total_loss": 2.6962857246398926
1109
+ },
1110
+ {
1111
+ "epoch": 0.27742749054224464,
1112
+ "grad_norm": 34.359859466552734,
1113
+ "kl_loss": 7.828553094668678e-08,
1114
+ "kl_weight": 1000000.0,
1115
+ "learning_rate": 0.0004340937896070976,
1116
+ "loss": 2.673866033554077,
1117
+ "step": 110,
1118
+ "total_loss": 2.7521514892578125
1119
+ }
1120
+ ],
1121
+ "logging_steps": 1,
1122
+ "max_steps": 794,
1123
+ "num_input_tokens_seen": 0,
1124
+ "num_train_epochs": 2,
1125
+ "save_steps": 5,
1126
+ "stateful_callbacks": {
1127
+ "TrainerControl": {
1128
+ "args": {
1129
+ "should_epoch_stop": false,
1130
+ "should_evaluate": false,
1131
+ "should_log": false,
1132
+ "should_save": false,
1133
+ "should_training_stop": false
1134
+ },
1135
+ "attributes": {}
1136
+ }
1137
+ },
1138
+ "total_flos": 3.7821087744e+16,
1139
+ "train_batch_size": 2,
1140
+ "trial_name": null,
1141
+ "trial_params": null
1142
+ }
checkpoints/checkpoint-110/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-115/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515290160273577b95c101a125302ab6be637eecabd3ef07fe5ab29e36af7a96
3
+ size 6901
checkpoints/checkpoint-115/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f85b72e7d1b9ba3f4e3b715ea993d6f68106de3cc0424ac50c97d0d950058ab
3
+ size 22241
checkpoints/checkpoint-115/trainer_state.json ADDED
@@ -0,0 +1,1192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.2900378310214376,
6
+ "eval_steps": 100,
7
+ "global_step": 115,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ },
1070
+ {
1071
+ "epoch": 0.2673392181588903,
1072
+ "grad_norm": 50.12539291381836,
1073
+ "kl_loss": 1.2759885237301205e-07,
1074
+ "kl_weight": 1000000.0,
1075
+ "learning_rate": 0.00043662864385297847,
1076
+ "loss": 2.3349037170410156,
1077
+ "step": 106,
1078
+ "total_loss": 2.4625024795532227
1079
+ },
1080
+ {
1081
+ "epoch": 0.2698612862547289,
1082
+ "grad_norm": 58.65528106689453,
1083
+ "kl_loss": 6.978748245956012e-08,
1084
+ "kl_weight": 1000000.0,
1085
+ "learning_rate": 0.00043599493029150826,
1086
+ "loss": 2.520481586456299,
1087
+ "step": 107,
1088
+ "total_loss": 2.590269088745117
1089
+ },
1090
+ {
1091
+ "epoch": 0.2723833543505675,
1092
+ "grad_norm": 114.07057189941406,
1093
+ "kl_loss": 2.180455993538999e-07,
1094
+ "kl_weight": 1000000.0,
1095
+ "learning_rate": 0.00043536121673003804,
1096
+ "loss": 2.2573392391204834,
1097
+ "step": 108,
1098
+ "total_loss": 2.4753847122192383
1099
+ },
1100
+ {
1101
+ "epoch": 0.27490542244640603,
1102
+ "grad_norm": 42.89162063598633,
1103
+ "kl_loss": 1.3848097069057985e-07,
1104
+ "kl_weight": 1000000.0,
1105
+ "learning_rate": 0.0004347275031685678,
1106
+ "loss": 2.557804822921753,
1107
+ "step": 109,
1108
+ "total_loss": 2.6962857246398926
1109
+ },
1110
+ {
1111
+ "epoch": 0.27742749054224464,
1112
+ "grad_norm": 34.359859466552734,
1113
+ "kl_loss": 7.828553094668678e-08,
1114
+ "kl_weight": 1000000.0,
1115
+ "learning_rate": 0.0004340937896070976,
1116
+ "loss": 2.673866033554077,
1117
+ "step": 110,
1118
+ "total_loss": 2.7521514892578125
1119
+ },
1120
+ {
1121
+ "epoch": 0.27994955863808324,
1122
+ "grad_norm": 56.4673957824707,
1123
+ "kl_loss": 1.2365975976535992e-07,
1124
+ "kl_weight": 1000000.0,
1125
+ "learning_rate": 0.00043346007604562736,
1126
+ "loss": 2.275721311569214,
1127
+ "step": 111,
1128
+ "total_loss": 2.399381160736084
1129
+ },
1130
+ {
1131
+ "epoch": 0.28247162673392184,
1132
+ "grad_norm": 80.7730484008789,
1133
+ "kl_loss": 7.710952587558495e-08,
1134
+ "kl_weight": 1000000.0,
1135
+ "learning_rate": 0.0004328263624841572,
1136
+ "loss": 2.3654656410217285,
1137
+ "step": 112,
1138
+ "total_loss": 2.442575216293335
1139
+ },
1140
+ {
1141
+ "epoch": 0.2849936948297604,
1142
+ "grad_norm": 78.50458526611328,
1143
+ "kl_loss": 1.1497648699787533e-07,
1144
+ "kl_weight": 1000000.0,
1145
+ "learning_rate": 0.00043219264892268693,
1146
+ "loss": 2.0045268535614014,
1147
+ "step": 113,
1148
+ "total_loss": 2.1195032596588135
1149
+ },
1150
+ {
1151
+ "epoch": 0.287515762925599,
1152
+ "grad_norm": 42.11017990112305,
1153
+ "kl_loss": 1.2359024026409315e-07,
1154
+ "kl_weight": 1000000.0,
1155
+ "learning_rate": 0.0004315589353612168,
1156
+ "loss": 2.8311715126037598,
1157
+ "step": 114,
1158
+ "total_loss": 2.9547617435455322
1159
+ },
1160
+ {
1161
+ "epoch": 0.2900378310214376,
1162
+ "grad_norm": 46.80673599243164,
1163
+ "kl_loss": 1.1356344487012393e-07,
1164
+ "kl_weight": 1000000.0,
1165
+ "learning_rate": 0.0004309252217997465,
1166
+ "loss": 2.4908668994903564,
1167
+ "step": 115,
1168
+ "total_loss": 2.6044304370880127
1169
+ }
1170
+ ],
1171
+ "logging_steps": 1,
1172
+ "max_steps": 794,
1173
+ "num_input_tokens_seen": 0,
1174
+ "num_train_epochs": 2,
1175
+ "save_steps": 5,
1176
+ "stateful_callbacks": {
1177
+ "TrainerControl": {
1178
+ "args": {
1179
+ "should_epoch_stop": false,
1180
+ "should_evaluate": false,
1181
+ "should_log": false,
1182
+ "should_save": false,
1183
+ "should_training_stop": false
1184
+ },
1185
+ "attributes": {}
1186
+ }
1187
+ },
1188
+ "total_flos": 3.9540228096e+16,
1189
+ "train_batch_size": 2,
1190
+ "trial_name": null,
1191
+ "trial_params": null
1192
+ }
checkpoints/checkpoint-115/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-120/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e89f9f7b93d0ef6f21db70c056019f0d88707aedf92d18dea4d889d30181269
3
+ size 6901
checkpoints/checkpoint-120/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:855f016efb21cc65ad782052cfee135313ff98f6fa739467d4a3eef61c045e69
3
+ size 22241
checkpoints/checkpoint-120/trainer_state.json ADDED
@@ -0,0 +1,1242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.3026481715006305,
6
+ "eval_steps": 100,
7
+ "global_step": 120,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ },
1070
+ {
1071
+ "epoch": 0.2673392181588903,
1072
+ "grad_norm": 50.12539291381836,
1073
+ "kl_loss": 1.2759885237301205e-07,
1074
+ "kl_weight": 1000000.0,
1075
+ "learning_rate": 0.00043662864385297847,
1076
+ "loss": 2.3349037170410156,
1077
+ "step": 106,
1078
+ "total_loss": 2.4625024795532227
1079
+ },
1080
+ {
1081
+ "epoch": 0.2698612862547289,
1082
+ "grad_norm": 58.65528106689453,
1083
+ "kl_loss": 6.978748245956012e-08,
1084
+ "kl_weight": 1000000.0,
1085
+ "learning_rate": 0.00043599493029150826,
1086
+ "loss": 2.520481586456299,
1087
+ "step": 107,
1088
+ "total_loss": 2.590269088745117
1089
+ },
1090
+ {
1091
+ "epoch": 0.2723833543505675,
1092
+ "grad_norm": 114.07057189941406,
1093
+ "kl_loss": 2.180455993538999e-07,
1094
+ "kl_weight": 1000000.0,
1095
+ "learning_rate": 0.00043536121673003804,
1096
+ "loss": 2.2573392391204834,
1097
+ "step": 108,
1098
+ "total_loss": 2.4753847122192383
1099
+ },
1100
+ {
1101
+ "epoch": 0.27490542244640603,
1102
+ "grad_norm": 42.89162063598633,
1103
+ "kl_loss": 1.3848097069057985e-07,
1104
+ "kl_weight": 1000000.0,
1105
+ "learning_rate": 0.0004347275031685678,
1106
+ "loss": 2.557804822921753,
1107
+ "step": 109,
1108
+ "total_loss": 2.6962857246398926
1109
+ },
1110
+ {
1111
+ "epoch": 0.27742749054224464,
1112
+ "grad_norm": 34.359859466552734,
1113
+ "kl_loss": 7.828553094668678e-08,
1114
+ "kl_weight": 1000000.0,
1115
+ "learning_rate": 0.0004340937896070976,
1116
+ "loss": 2.673866033554077,
1117
+ "step": 110,
1118
+ "total_loss": 2.7521514892578125
1119
+ },
1120
+ {
1121
+ "epoch": 0.27994955863808324,
1122
+ "grad_norm": 56.4673957824707,
1123
+ "kl_loss": 1.2365975976535992e-07,
1124
+ "kl_weight": 1000000.0,
1125
+ "learning_rate": 0.00043346007604562736,
1126
+ "loss": 2.275721311569214,
1127
+ "step": 111,
1128
+ "total_loss": 2.399381160736084
1129
+ },
1130
+ {
1131
+ "epoch": 0.28247162673392184,
1132
+ "grad_norm": 80.7730484008789,
1133
+ "kl_loss": 7.710952587558495e-08,
1134
+ "kl_weight": 1000000.0,
1135
+ "learning_rate": 0.0004328263624841572,
1136
+ "loss": 2.3654656410217285,
1137
+ "step": 112,
1138
+ "total_loss": 2.442575216293335
1139
+ },
1140
+ {
1141
+ "epoch": 0.2849936948297604,
1142
+ "grad_norm": 78.50458526611328,
1143
+ "kl_loss": 1.1497648699787533e-07,
1144
+ "kl_weight": 1000000.0,
1145
+ "learning_rate": 0.00043219264892268693,
1146
+ "loss": 2.0045268535614014,
1147
+ "step": 113,
1148
+ "total_loss": 2.1195032596588135
1149
+ },
1150
+ {
1151
+ "epoch": 0.287515762925599,
1152
+ "grad_norm": 42.11017990112305,
1153
+ "kl_loss": 1.2359024026409315e-07,
1154
+ "kl_weight": 1000000.0,
1155
+ "learning_rate": 0.0004315589353612168,
1156
+ "loss": 2.8311715126037598,
1157
+ "step": 114,
1158
+ "total_loss": 2.9547617435455322
1159
+ },
1160
+ {
1161
+ "epoch": 0.2900378310214376,
1162
+ "grad_norm": 46.80673599243164,
1163
+ "kl_loss": 1.1356344487012393e-07,
1164
+ "kl_weight": 1000000.0,
1165
+ "learning_rate": 0.0004309252217997465,
1166
+ "loss": 2.4908668994903564,
1167
+ "step": 115,
1168
+ "total_loss": 2.6044304370880127
1169
+ },
1170
+ {
1171
+ "epoch": 0.29255989911727615,
1172
+ "grad_norm": 37.80940628051758,
1173
+ "kl_loss": 8.934256356951664e-08,
1174
+ "kl_weight": 1000000.0,
1175
+ "learning_rate": 0.0004302915082382763,
1176
+ "loss": 2.5639941692352295,
1177
+ "step": 116,
1178
+ "total_loss": 2.653336763381958
1179
+ },
1180
+ {
1181
+ "epoch": 0.29508196721311475,
1182
+ "grad_norm": 40.135955810546875,
1183
+ "kl_loss": 9.139830581261776e-08,
1184
+ "kl_weight": 1000000.0,
1185
+ "learning_rate": 0.0004296577946768061,
1186
+ "loss": 2.3605127334594727,
1187
+ "step": 117,
1188
+ "total_loss": 2.451910972595215
1189
+ },
1190
+ {
1191
+ "epoch": 0.29760403530895335,
1192
+ "grad_norm": 34.20405197143555,
1193
+ "kl_loss": 6.737117530519754e-08,
1194
+ "kl_weight": 1000000.0,
1195
+ "learning_rate": 0.0004290240811153359,
1196
+ "loss": 2.2198715209960938,
1197
+ "step": 118,
1198
+ "total_loss": 2.2872426509857178
1199
+ },
1200
+ {
1201
+ "epoch": 0.30012610340479196,
1202
+ "grad_norm": 38.27109909057617,
1203
+ "kl_loss": 7.206848096075191e-08,
1204
+ "kl_weight": 1000000.0,
1205
+ "learning_rate": 0.00042839036755386566,
1206
+ "loss": 2.442990779876709,
1207
+ "step": 119,
1208
+ "total_loss": 2.515059232711792
1209
+ },
1210
+ {
1211
+ "epoch": 0.3026481715006305,
1212
+ "grad_norm": 33.383628845214844,
1213
+ "kl_loss": 5.8401464997359653e-08,
1214
+ "kl_weight": 1000000.0,
1215
+ "learning_rate": 0.00042775665399239545,
1216
+ "loss": 1.9709354639053345,
1217
+ "step": 120,
1218
+ "total_loss": 2.029336929321289
1219
+ }
1220
+ ],
1221
+ "logging_steps": 1,
1222
+ "max_steps": 794,
1223
+ "num_input_tokens_seen": 0,
1224
+ "num_train_epochs": 2,
1225
+ "save_steps": 5,
1226
+ "stateful_callbacks": {
1227
+ "TrainerControl": {
1228
+ "args": {
1229
+ "should_epoch_stop": false,
1230
+ "should_evaluate": false,
1231
+ "should_log": false,
1232
+ "should_save": false,
1233
+ "should_training_stop": false
1234
+ },
1235
+ "attributes": {}
1236
+ }
1237
+ },
1238
+ "total_flos": 4.1259368448e+16,
1239
+ "train_batch_size": 2,
1240
+ "trial_name": null,
1241
+ "trial_params": null
1242
+ }
checkpoints/checkpoint-120/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-125/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e89f9f7b93d0ef6f21db70c056019f0d88707aedf92d18dea4d889d30181269
3
+ size 6901
checkpoints/checkpoint-125/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3472a699bcc5ba1b68c965f72109b841b47e59a1da32167c1456f5b4da326d9
3
+ size 22241
checkpoints/checkpoint-125/trainer_state.json ADDED
@@ -0,0 +1,1292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.31525851197982346,
6
+ "eval_steps": 100,
7
+ "global_step": 125,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ },
1070
+ {
1071
+ "epoch": 0.2673392181588903,
1072
+ "grad_norm": 50.12539291381836,
1073
+ "kl_loss": 1.2759885237301205e-07,
1074
+ "kl_weight": 1000000.0,
1075
+ "learning_rate": 0.00043662864385297847,
1076
+ "loss": 2.3349037170410156,
1077
+ "step": 106,
1078
+ "total_loss": 2.4625024795532227
1079
+ },
1080
+ {
1081
+ "epoch": 0.2698612862547289,
1082
+ "grad_norm": 58.65528106689453,
1083
+ "kl_loss": 6.978748245956012e-08,
1084
+ "kl_weight": 1000000.0,
1085
+ "learning_rate": 0.00043599493029150826,
1086
+ "loss": 2.520481586456299,
1087
+ "step": 107,
1088
+ "total_loss": 2.590269088745117
1089
+ },
1090
+ {
1091
+ "epoch": 0.2723833543505675,
1092
+ "grad_norm": 114.07057189941406,
1093
+ "kl_loss": 2.180455993538999e-07,
1094
+ "kl_weight": 1000000.0,
1095
+ "learning_rate": 0.00043536121673003804,
1096
+ "loss": 2.2573392391204834,
1097
+ "step": 108,
1098
+ "total_loss": 2.4753847122192383
1099
+ },
1100
+ {
1101
+ "epoch": 0.27490542244640603,
1102
+ "grad_norm": 42.89162063598633,
1103
+ "kl_loss": 1.3848097069057985e-07,
1104
+ "kl_weight": 1000000.0,
1105
+ "learning_rate": 0.0004347275031685678,
1106
+ "loss": 2.557804822921753,
1107
+ "step": 109,
1108
+ "total_loss": 2.6962857246398926
1109
+ },
1110
+ {
1111
+ "epoch": 0.27742749054224464,
1112
+ "grad_norm": 34.359859466552734,
1113
+ "kl_loss": 7.828553094668678e-08,
1114
+ "kl_weight": 1000000.0,
1115
+ "learning_rate": 0.0004340937896070976,
1116
+ "loss": 2.673866033554077,
1117
+ "step": 110,
1118
+ "total_loss": 2.7521514892578125
1119
+ },
1120
+ {
1121
+ "epoch": 0.27994955863808324,
1122
+ "grad_norm": 56.4673957824707,
1123
+ "kl_loss": 1.2365975976535992e-07,
1124
+ "kl_weight": 1000000.0,
1125
+ "learning_rate": 0.00043346007604562736,
1126
+ "loss": 2.275721311569214,
1127
+ "step": 111,
1128
+ "total_loss": 2.399381160736084
1129
+ },
1130
+ {
1131
+ "epoch": 0.28247162673392184,
1132
+ "grad_norm": 80.7730484008789,
1133
+ "kl_loss": 7.710952587558495e-08,
1134
+ "kl_weight": 1000000.0,
1135
+ "learning_rate": 0.0004328263624841572,
1136
+ "loss": 2.3654656410217285,
1137
+ "step": 112,
1138
+ "total_loss": 2.442575216293335
1139
+ },
1140
+ {
1141
+ "epoch": 0.2849936948297604,
1142
+ "grad_norm": 78.50458526611328,
1143
+ "kl_loss": 1.1497648699787533e-07,
1144
+ "kl_weight": 1000000.0,
1145
+ "learning_rate": 0.00043219264892268693,
1146
+ "loss": 2.0045268535614014,
1147
+ "step": 113,
1148
+ "total_loss": 2.1195032596588135
1149
+ },
1150
+ {
1151
+ "epoch": 0.287515762925599,
1152
+ "grad_norm": 42.11017990112305,
1153
+ "kl_loss": 1.2359024026409315e-07,
1154
+ "kl_weight": 1000000.0,
1155
+ "learning_rate": 0.0004315589353612168,
1156
+ "loss": 2.8311715126037598,
1157
+ "step": 114,
1158
+ "total_loss": 2.9547617435455322
1159
+ },
1160
+ {
1161
+ "epoch": 0.2900378310214376,
1162
+ "grad_norm": 46.80673599243164,
1163
+ "kl_loss": 1.1356344487012393e-07,
1164
+ "kl_weight": 1000000.0,
1165
+ "learning_rate": 0.0004309252217997465,
1166
+ "loss": 2.4908668994903564,
1167
+ "step": 115,
1168
+ "total_loss": 2.6044304370880127
1169
+ },
1170
+ {
1171
+ "epoch": 0.29255989911727615,
1172
+ "grad_norm": 37.80940628051758,
1173
+ "kl_loss": 8.934256356951664e-08,
1174
+ "kl_weight": 1000000.0,
1175
+ "learning_rate": 0.0004302915082382763,
1176
+ "loss": 2.5639941692352295,
1177
+ "step": 116,
1178
+ "total_loss": 2.653336763381958
1179
+ },
1180
+ {
1181
+ "epoch": 0.29508196721311475,
1182
+ "grad_norm": 40.135955810546875,
1183
+ "kl_loss": 9.139830581261776e-08,
1184
+ "kl_weight": 1000000.0,
1185
+ "learning_rate": 0.0004296577946768061,
1186
+ "loss": 2.3605127334594727,
1187
+ "step": 117,
1188
+ "total_loss": 2.451910972595215
1189
+ },
1190
+ {
1191
+ "epoch": 0.29760403530895335,
1192
+ "grad_norm": 34.20405197143555,
1193
+ "kl_loss": 6.737117530519754e-08,
1194
+ "kl_weight": 1000000.0,
1195
+ "learning_rate": 0.0004290240811153359,
1196
+ "loss": 2.2198715209960938,
1197
+ "step": 118,
1198
+ "total_loss": 2.2872426509857178
1199
+ },
1200
+ {
1201
+ "epoch": 0.30012610340479196,
1202
+ "grad_norm": 38.27109909057617,
1203
+ "kl_loss": 7.206848096075191e-08,
1204
+ "kl_weight": 1000000.0,
1205
+ "learning_rate": 0.00042839036755386566,
1206
+ "loss": 2.442990779876709,
1207
+ "step": 119,
1208
+ "total_loss": 2.515059232711792
1209
+ },
1210
+ {
1211
+ "epoch": 0.3026481715006305,
1212
+ "grad_norm": 33.383628845214844,
1213
+ "kl_loss": 5.8401464997359653e-08,
1214
+ "kl_weight": 1000000.0,
1215
+ "learning_rate": 0.00042775665399239545,
1216
+ "loss": 1.9709354639053345,
1217
+ "step": 120,
1218
+ "total_loss": 2.029336929321289
1219
+ },
1220
+ {
1221
+ "epoch": 0.3051702395964691,
1222
+ "grad_norm": 29.64325714111328,
1223
+ "kl_loss": 7.080717523422209e-08,
1224
+ "kl_weight": 1000000.0,
1225
+ "learning_rate": 0.00042712294043092524,
1226
+ "loss": 2.5838398933410645,
1227
+ "step": 121,
1228
+ "total_loss": 2.6546471118927
1229
+ },
1230
+ {
1231
+ "epoch": 0.3076923076923077,
1232
+ "grad_norm": 31.308616638183594,
1233
+ "kl_loss": 7.158789117056585e-08,
1234
+ "kl_weight": 1000000.0,
1235
+ "learning_rate": 0.00042648922686945503,
1236
+ "loss": 2.582289457321167,
1237
+ "step": 122,
1238
+ "total_loss": 2.6538772583007812
1239
+ },
1240
+ {
1241
+ "epoch": 0.31021437578814626,
1242
+ "grad_norm": 31.91105842590332,
1243
+ "kl_loss": 5.958595039601278e-08,
1244
+ "kl_weight": 1000000.0,
1245
+ "learning_rate": 0.00042585551330798476,
1246
+ "loss": 2.362175703048706,
1247
+ "step": 123,
1248
+ "total_loss": 2.4217617511749268
1249
+ },
1250
+ {
1251
+ "epoch": 0.31273644388398486,
1252
+ "grad_norm": 48.74770736694336,
1253
+ "kl_loss": 1.0601585387348678e-07,
1254
+ "kl_weight": 1000000.0,
1255
+ "learning_rate": 0.0004252217997465146,
1256
+ "loss": 2.6029539108276367,
1257
+ "step": 124,
1258
+ "total_loss": 2.708969831466675
1259
+ },
1260
+ {
1261
+ "epoch": 0.31525851197982346,
1262
+ "grad_norm": 37.71382141113281,
1263
+ "kl_loss": 7.022106984777565e-08,
1264
+ "kl_weight": 1000000.0,
1265
+ "learning_rate": 0.00042458808618504434,
1266
+ "loss": 1.9780246019363403,
1267
+ "step": 125,
1268
+ "total_loss": 2.048245668411255
1269
+ }
1270
+ ],
1271
+ "logging_steps": 1,
1272
+ "max_steps": 794,
1273
+ "num_input_tokens_seen": 0,
1274
+ "num_train_epochs": 2,
1275
+ "save_steps": 5,
1276
+ "stateful_callbacks": {
1277
+ "TrainerControl": {
1278
+ "args": {
1279
+ "should_epoch_stop": false,
1280
+ "should_evaluate": false,
1281
+ "should_log": false,
1282
+ "should_save": false,
1283
+ "should_training_stop": false
1284
+ },
1285
+ "attributes": {}
1286
+ }
1287
+ },
1288
+ "total_flos": 4.29785088e+16,
1289
+ "train_batch_size": 2,
1290
+ "trial_name": null,
1291
+ "trial_params": null
1292
+ }
checkpoints/checkpoint-125/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-130/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e89f9f7b93d0ef6f21db70c056019f0d88707aedf92d18dea4d889d30181269
3
+ size 6901
checkpoints/checkpoint-130/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b479e3113cc30625de6170cf2123b0fd110a1c1bcca8bffb9fba38be9f093508
3
+ size 22241
checkpoints/checkpoint-130/trainer_state.json ADDED
@@ -0,0 +1,1342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.32786885245901637,
6
+ "eval_steps": 100,
7
+ "global_step": 130,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ },
1070
+ {
1071
+ "epoch": 0.2673392181588903,
1072
+ "grad_norm": 50.12539291381836,
1073
+ "kl_loss": 1.2759885237301205e-07,
1074
+ "kl_weight": 1000000.0,
1075
+ "learning_rate": 0.00043662864385297847,
1076
+ "loss": 2.3349037170410156,
1077
+ "step": 106,
1078
+ "total_loss": 2.4625024795532227
1079
+ },
1080
+ {
1081
+ "epoch": 0.2698612862547289,
1082
+ "grad_norm": 58.65528106689453,
1083
+ "kl_loss": 6.978748245956012e-08,
1084
+ "kl_weight": 1000000.0,
1085
+ "learning_rate": 0.00043599493029150826,
1086
+ "loss": 2.520481586456299,
1087
+ "step": 107,
1088
+ "total_loss": 2.590269088745117
1089
+ },
1090
+ {
1091
+ "epoch": 0.2723833543505675,
1092
+ "grad_norm": 114.07057189941406,
1093
+ "kl_loss": 2.180455993538999e-07,
1094
+ "kl_weight": 1000000.0,
1095
+ "learning_rate": 0.00043536121673003804,
1096
+ "loss": 2.2573392391204834,
1097
+ "step": 108,
1098
+ "total_loss": 2.4753847122192383
1099
+ },
1100
+ {
1101
+ "epoch": 0.27490542244640603,
1102
+ "grad_norm": 42.89162063598633,
1103
+ "kl_loss": 1.3848097069057985e-07,
1104
+ "kl_weight": 1000000.0,
1105
+ "learning_rate": 0.0004347275031685678,
1106
+ "loss": 2.557804822921753,
1107
+ "step": 109,
1108
+ "total_loss": 2.6962857246398926
1109
+ },
1110
+ {
1111
+ "epoch": 0.27742749054224464,
1112
+ "grad_norm": 34.359859466552734,
1113
+ "kl_loss": 7.828553094668678e-08,
1114
+ "kl_weight": 1000000.0,
1115
+ "learning_rate": 0.0004340937896070976,
1116
+ "loss": 2.673866033554077,
1117
+ "step": 110,
1118
+ "total_loss": 2.7521514892578125
1119
+ },
1120
+ {
1121
+ "epoch": 0.27994955863808324,
1122
+ "grad_norm": 56.4673957824707,
1123
+ "kl_loss": 1.2365975976535992e-07,
1124
+ "kl_weight": 1000000.0,
1125
+ "learning_rate": 0.00043346007604562736,
1126
+ "loss": 2.275721311569214,
1127
+ "step": 111,
1128
+ "total_loss": 2.399381160736084
1129
+ },
1130
+ {
1131
+ "epoch": 0.28247162673392184,
1132
+ "grad_norm": 80.7730484008789,
1133
+ "kl_loss": 7.710952587558495e-08,
1134
+ "kl_weight": 1000000.0,
1135
+ "learning_rate": 0.0004328263624841572,
1136
+ "loss": 2.3654656410217285,
1137
+ "step": 112,
1138
+ "total_loss": 2.442575216293335
1139
+ },
1140
+ {
1141
+ "epoch": 0.2849936948297604,
1142
+ "grad_norm": 78.50458526611328,
1143
+ "kl_loss": 1.1497648699787533e-07,
1144
+ "kl_weight": 1000000.0,
1145
+ "learning_rate": 0.00043219264892268693,
1146
+ "loss": 2.0045268535614014,
1147
+ "step": 113,
1148
+ "total_loss": 2.1195032596588135
1149
+ },
1150
+ {
1151
+ "epoch": 0.287515762925599,
1152
+ "grad_norm": 42.11017990112305,
1153
+ "kl_loss": 1.2359024026409315e-07,
1154
+ "kl_weight": 1000000.0,
1155
+ "learning_rate": 0.0004315589353612168,
1156
+ "loss": 2.8311715126037598,
1157
+ "step": 114,
1158
+ "total_loss": 2.9547617435455322
1159
+ },
1160
+ {
1161
+ "epoch": 0.2900378310214376,
1162
+ "grad_norm": 46.80673599243164,
1163
+ "kl_loss": 1.1356344487012393e-07,
1164
+ "kl_weight": 1000000.0,
1165
+ "learning_rate": 0.0004309252217997465,
1166
+ "loss": 2.4908668994903564,
1167
+ "step": 115,
1168
+ "total_loss": 2.6044304370880127
1169
+ },
1170
+ {
1171
+ "epoch": 0.29255989911727615,
1172
+ "grad_norm": 37.80940628051758,
1173
+ "kl_loss": 8.934256356951664e-08,
1174
+ "kl_weight": 1000000.0,
1175
+ "learning_rate": 0.0004302915082382763,
1176
+ "loss": 2.5639941692352295,
1177
+ "step": 116,
1178
+ "total_loss": 2.653336763381958
1179
+ },
1180
+ {
1181
+ "epoch": 0.29508196721311475,
1182
+ "grad_norm": 40.135955810546875,
1183
+ "kl_loss": 9.139830581261776e-08,
1184
+ "kl_weight": 1000000.0,
1185
+ "learning_rate": 0.0004296577946768061,
1186
+ "loss": 2.3605127334594727,
1187
+ "step": 117,
1188
+ "total_loss": 2.451910972595215
1189
+ },
1190
+ {
1191
+ "epoch": 0.29760403530895335,
1192
+ "grad_norm": 34.20405197143555,
1193
+ "kl_loss": 6.737117530519754e-08,
1194
+ "kl_weight": 1000000.0,
1195
+ "learning_rate": 0.0004290240811153359,
1196
+ "loss": 2.2198715209960938,
1197
+ "step": 118,
1198
+ "total_loss": 2.2872426509857178
1199
+ },
1200
+ {
1201
+ "epoch": 0.30012610340479196,
1202
+ "grad_norm": 38.27109909057617,
1203
+ "kl_loss": 7.206848096075191e-08,
1204
+ "kl_weight": 1000000.0,
1205
+ "learning_rate": 0.00042839036755386566,
1206
+ "loss": 2.442990779876709,
1207
+ "step": 119,
1208
+ "total_loss": 2.515059232711792
1209
+ },
1210
+ {
1211
+ "epoch": 0.3026481715006305,
1212
+ "grad_norm": 33.383628845214844,
1213
+ "kl_loss": 5.8401464997359653e-08,
1214
+ "kl_weight": 1000000.0,
1215
+ "learning_rate": 0.00042775665399239545,
1216
+ "loss": 1.9709354639053345,
1217
+ "step": 120,
1218
+ "total_loss": 2.029336929321289
1219
+ },
1220
+ {
1221
+ "epoch": 0.3051702395964691,
1222
+ "grad_norm": 29.64325714111328,
1223
+ "kl_loss": 7.080717523422209e-08,
1224
+ "kl_weight": 1000000.0,
1225
+ "learning_rate": 0.00042712294043092524,
1226
+ "loss": 2.5838398933410645,
1227
+ "step": 121,
1228
+ "total_loss": 2.6546471118927
1229
+ },
1230
+ {
1231
+ "epoch": 0.3076923076923077,
1232
+ "grad_norm": 31.308616638183594,
1233
+ "kl_loss": 7.158789117056585e-08,
1234
+ "kl_weight": 1000000.0,
1235
+ "learning_rate": 0.00042648922686945503,
1236
+ "loss": 2.582289457321167,
1237
+ "step": 122,
1238
+ "total_loss": 2.6538772583007812
1239
+ },
1240
+ {
1241
+ "epoch": 0.31021437578814626,
1242
+ "grad_norm": 31.91105842590332,
1243
+ "kl_loss": 5.958595039601278e-08,
1244
+ "kl_weight": 1000000.0,
1245
+ "learning_rate": 0.00042585551330798476,
1246
+ "loss": 2.362175703048706,
1247
+ "step": 123,
1248
+ "total_loss": 2.4217617511749268
1249
+ },
1250
+ {
1251
+ "epoch": 0.31273644388398486,
1252
+ "grad_norm": 48.74770736694336,
1253
+ "kl_loss": 1.0601585387348678e-07,
1254
+ "kl_weight": 1000000.0,
1255
+ "learning_rate": 0.0004252217997465146,
1256
+ "loss": 2.6029539108276367,
1257
+ "step": 124,
1258
+ "total_loss": 2.708969831466675
1259
+ },
1260
+ {
1261
+ "epoch": 0.31525851197982346,
1262
+ "grad_norm": 37.71382141113281,
1263
+ "kl_loss": 7.022106984777565e-08,
1264
+ "kl_weight": 1000000.0,
1265
+ "learning_rate": 0.00042458808618504434,
1266
+ "loss": 1.9780246019363403,
1267
+ "step": 125,
1268
+ "total_loss": 2.048245668411255
1269
+ },
1270
+ {
1271
+ "epoch": 0.31778058007566207,
1272
+ "grad_norm": 58.47407531738281,
1273
+ "kl_loss": 7.89053729022271e-08,
1274
+ "kl_weight": 1000000.0,
1275
+ "learning_rate": 0.0004239543726235742,
1276
+ "loss": 3.0634782314300537,
1277
+ "step": 126,
1278
+ "total_loss": 3.142383575439453
1279
+ },
1280
+ {
1281
+ "epoch": 0.3203026481715006,
1282
+ "grad_norm": 49.75334167480469,
1283
+ "kl_loss": 9.529335898150748e-08,
1284
+ "kl_weight": 1000000.0,
1285
+ "learning_rate": 0.0004233206590621039,
1286
+ "loss": 2.625107765197754,
1287
+ "step": 127,
1288
+ "total_loss": 2.7204010486602783
1289
+ },
1290
+ {
1291
+ "epoch": 0.3228247162673392,
1292
+ "grad_norm": 37.173675537109375,
1293
+ "kl_loss": 9.76136007579953e-08,
1294
+ "kl_weight": 1000000.0,
1295
+ "learning_rate": 0.00042268694550063376,
1296
+ "loss": 2.3797764778137207,
1297
+ "step": 128,
1298
+ "total_loss": 2.4773900508880615
1299
+ },
1300
+ {
1301
+ "epoch": 0.3253467843631778,
1302
+ "grad_norm": 35.94040298461914,
1303
+ "kl_loss": 8.867663581213492e-08,
1304
+ "kl_weight": 1000000.0,
1305
+ "learning_rate": 0.0004220532319391635,
1306
+ "loss": 2.19878888130188,
1307
+ "step": 129,
1308
+ "total_loss": 2.2874655723571777
1309
+ },
1310
+ {
1311
+ "epoch": 0.32786885245901637,
1312
+ "grad_norm": 30.7440128326416,
1313
+ "kl_loss": 8.357946512660419e-08,
1314
+ "kl_weight": 1000000.0,
1315
+ "learning_rate": 0.0004214195183776933,
1316
+ "loss": 2.374424934387207,
1317
+ "step": 130,
1318
+ "total_loss": 2.4580044746398926
1319
+ }
1320
+ ],
1321
+ "logging_steps": 1,
1322
+ "max_steps": 794,
1323
+ "num_input_tokens_seen": 0,
1324
+ "num_train_epochs": 2,
1325
+ "save_steps": 5,
1326
+ "stateful_callbacks": {
1327
+ "TrainerControl": {
1328
+ "args": {
1329
+ "should_epoch_stop": false,
1330
+ "should_evaluate": false,
1331
+ "should_log": false,
1332
+ "should_save": false,
1333
+ "should_training_stop": false
1334
+ },
1335
+ "attributes": {}
1336
+ }
1337
+ },
1338
+ "total_flos": 4.4697649152e+16,
1339
+ "train_batch_size": 2,
1340
+ "trial_name": null,
1341
+ "trial_params": null
1342
+ }
checkpoints/checkpoint-130/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-135/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d3aea558cb00f620f94e50b26ccd5377884c23af22e3581e3a5a9da8a420a2
3
+ size 6901
checkpoints/checkpoint-135/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf06ca3b67baaddcbbf9a5892bc13788d44d1a5300c2792c008e9f337993c7ec
3
+ size 22241
checkpoints/checkpoint-135/trainer_state.json ADDED
@@ -0,0 +1,1392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.34047919293820933,
6
+ "eval_steps": 100,
7
+ "global_step": 135,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ },
1070
+ {
1071
+ "epoch": 0.2673392181588903,
1072
+ "grad_norm": 50.12539291381836,
1073
+ "kl_loss": 1.2759885237301205e-07,
1074
+ "kl_weight": 1000000.0,
1075
+ "learning_rate": 0.00043662864385297847,
1076
+ "loss": 2.3349037170410156,
1077
+ "step": 106,
1078
+ "total_loss": 2.4625024795532227
1079
+ },
1080
+ {
1081
+ "epoch": 0.2698612862547289,
1082
+ "grad_norm": 58.65528106689453,
1083
+ "kl_loss": 6.978748245956012e-08,
1084
+ "kl_weight": 1000000.0,
1085
+ "learning_rate": 0.00043599493029150826,
1086
+ "loss": 2.520481586456299,
1087
+ "step": 107,
1088
+ "total_loss": 2.590269088745117
1089
+ },
1090
+ {
1091
+ "epoch": 0.2723833543505675,
1092
+ "grad_norm": 114.07057189941406,
1093
+ "kl_loss": 2.180455993538999e-07,
1094
+ "kl_weight": 1000000.0,
1095
+ "learning_rate": 0.00043536121673003804,
1096
+ "loss": 2.2573392391204834,
1097
+ "step": 108,
1098
+ "total_loss": 2.4753847122192383
1099
+ },
1100
+ {
1101
+ "epoch": 0.27490542244640603,
1102
+ "grad_norm": 42.89162063598633,
1103
+ "kl_loss": 1.3848097069057985e-07,
1104
+ "kl_weight": 1000000.0,
1105
+ "learning_rate": 0.0004347275031685678,
1106
+ "loss": 2.557804822921753,
1107
+ "step": 109,
1108
+ "total_loss": 2.6962857246398926
1109
+ },
1110
+ {
1111
+ "epoch": 0.27742749054224464,
1112
+ "grad_norm": 34.359859466552734,
1113
+ "kl_loss": 7.828553094668678e-08,
1114
+ "kl_weight": 1000000.0,
1115
+ "learning_rate": 0.0004340937896070976,
1116
+ "loss": 2.673866033554077,
1117
+ "step": 110,
1118
+ "total_loss": 2.7521514892578125
1119
+ },
1120
+ {
1121
+ "epoch": 0.27994955863808324,
1122
+ "grad_norm": 56.4673957824707,
1123
+ "kl_loss": 1.2365975976535992e-07,
1124
+ "kl_weight": 1000000.0,
1125
+ "learning_rate": 0.00043346007604562736,
1126
+ "loss": 2.275721311569214,
1127
+ "step": 111,
1128
+ "total_loss": 2.399381160736084
1129
+ },
1130
+ {
1131
+ "epoch": 0.28247162673392184,
1132
+ "grad_norm": 80.7730484008789,
1133
+ "kl_loss": 7.710952587558495e-08,
1134
+ "kl_weight": 1000000.0,
1135
+ "learning_rate": 0.0004328263624841572,
1136
+ "loss": 2.3654656410217285,
1137
+ "step": 112,
1138
+ "total_loss": 2.442575216293335
1139
+ },
1140
+ {
1141
+ "epoch": 0.2849936948297604,
1142
+ "grad_norm": 78.50458526611328,
1143
+ "kl_loss": 1.1497648699787533e-07,
1144
+ "kl_weight": 1000000.0,
1145
+ "learning_rate": 0.00043219264892268693,
1146
+ "loss": 2.0045268535614014,
1147
+ "step": 113,
1148
+ "total_loss": 2.1195032596588135
1149
+ },
1150
+ {
1151
+ "epoch": 0.287515762925599,
1152
+ "grad_norm": 42.11017990112305,
1153
+ "kl_loss": 1.2359024026409315e-07,
1154
+ "kl_weight": 1000000.0,
1155
+ "learning_rate": 0.0004315589353612168,
1156
+ "loss": 2.8311715126037598,
1157
+ "step": 114,
1158
+ "total_loss": 2.9547617435455322
1159
+ },
1160
+ {
1161
+ "epoch": 0.2900378310214376,
1162
+ "grad_norm": 46.80673599243164,
1163
+ "kl_loss": 1.1356344487012393e-07,
1164
+ "kl_weight": 1000000.0,
1165
+ "learning_rate": 0.0004309252217997465,
1166
+ "loss": 2.4908668994903564,
1167
+ "step": 115,
1168
+ "total_loss": 2.6044304370880127
1169
+ },
1170
+ {
1171
+ "epoch": 0.29255989911727615,
1172
+ "grad_norm": 37.80940628051758,
1173
+ "kl_loss": 8.934256356951664e-08,
1174
+ "kl_weight": 1000000.0,
1175
+ "learning_rate": 0.0004302915082382763,
1176
+ "loss": 2.5639941692352295,
1177
+ "step": 116,
1178
+ "total_loss": 2.653336763381958
1179
+ },
1180
+ {
1181
+ "epoch": 0.29508196721311475,
1182
+ "grad_norm": 40.135955810546875,
1183
+ "kl_loss": 9.139830581261776e-08,
1184
+ "kl_weight": 1000000.0,
1185
+ "learning_rate": 0.0004296577946768061,
1186
+ "loss": 2.3605127334594727,
1187
+ "step": 117,
1188
+ "total_loss": 2.451910972595215
1189
+ },
1190
+ {
1191
+ "epoch": 0.29760403530895335,
1192
+ "grad_norm": 34.20405197143555,
1193
+ "kl_loss": 6.737117530519754e-08,
1194
+ "kl_weight": 1000000.0,
1195
+ "learning_rate": 0.0004290240811153359,
1196
+ "loss": 2.2198715209960938,
1197
+ "step": 118,
1198
+ "total_loss": 2.2872426509857178
1199
+ },
1200
+ {
1201
+ "epoch": 0.30012610340479196,
1202
+ "grad_norm": 38.27109909057617,
1203
+ "kl_loss": 7.206848096075191e-08,
1204
+ "kl_weight": 1000000.0,
1205
+ "learning_rate": 0.00042839036755386566,
1206
+ "loss": 2.442990779876709,
1207
+ "step": 119,
1208
+ "total_loss": 2.515059232711792
1209
+ },
1210
+ {
1211
+ "epoch": 0.3026481715006305,
1212
+ "grad_norm": 33.383628845214844,
1213
+ "kl_loss": 5.8401464997359653e-08,
1214
+ "kl_weight": 1000000.0,
1215
+ "learning_rate": 0.00042775665399239545,
1216
+ "loss": 1.9709354639053345,
1217
+ "step": 120,
1218
+ "total_loss": 2.029336929321289
1219
+ },
1220
+ {
1221
+ "epoch": 0.3051702395964691,
1222
+ "grad_norm": 29.64325714111328,
1223
+ "kl_loss": 7.080717523422209e-08,
1224
+ "kl_weight": 1000000.0,
1225
+ "learning_rate": 0.00042712294043092524,
1226
+ "loss": 2.5838398933410645,
1227
+ "step": 121,
1228
+ "total_loss": 2.6546471118927
1229
+ },
1230
+ {
1231
+ "epoch": 0.3076923076923077,
1232
+ "grad_norm": 31.308616638183594,
1233
+ "kl_loss": 7.158789117056585e-08,
1234
+ "kl_weight": 1000000.0,
1235
+ "learning_rate": 0.00042648922686945503,
1236
+ "loss": 2.582289457321167,
1237
+ "step": 122,
1238
+ "total_loss": 2.6538772583007812
1239
+ },
1240
+ {
1241
+ "epoch": 0.31021437578814626,
1242
+ "grad_norm": 31.91105842590332,
1243
+ "kl_loss": 5.958595039601278e-08,
1244
+ "kl_weight": 1000000.0,
1245
+ "learning_rate": 0.00042585551330798476,
1246
+ "loss": 2.362175703048706,
1247
+ "step": 123,
1248
+ "total_loss": 2.4217617511749268
1249
+ },
1250
+ {
1251
+ "epoch": 0.31273644388398486,
1252
+ "grad_norm": 48.74770736694336,
1253
+ "kl_loss": 1.0601585387348678e-07,
1254
+ "kl_weight": 1000000.0,
1255
+ "learning_rate": 0.0004252217997465146,
1256
+ "loss": 2.6029539108276367,
1257
+ "step": 124,
1258
+ "total_loss": 2.708969831466675
1259
+ },
1260
+ {
1261
+ "epoch": 0.31525851197982346,
1262
+ "grad_norm": 37.71382141113281,
1263
+ "kl_loss": 7.022106984777565e-08,
1264
+ "kl_weight": 1000000.0,
1265
+ "learning_rate": 0.00042458808618504434,
1266
+ "loss": 1.9780246019363403,
1267
+ "step": 125,
1268
+ "total_loss": 2.048245668411255
1269
+ },
1270
+ {
1271
+ "epoch": 0.31778058007566207,
1272
+ "grad_norm": 58.47407531738281,
1273
+ "kl_loss": 7.89053729022271e-08,
1274
+ "kl_weight": 1000000.0,
1275
+ "learning_rate": 0.0004239543726235742,
1276
+ "loss": 3.0634782314300537,
1277
+ "step": 126,
1278
+ "total_loss": 3.142383575439453
1279
+ },
1280
+ {
1281
+ "epoch": 0.3203026481715006,
1282
+ "grad_norm": 49.75334167480469,
1283
+ "kl_loss": 9.529335898150748e-08,
1284
+ "kl_weight": 1000000.0,
1285
+ "learning_rate": 0.0004233206590621039,
1286
+ "loss": 2.625107765197754,
1287
+ "step": 127,
1288
+ "total_loss": 2.7204010486602783
1289
+ },
1290
+ {
1291
+ "epoch": 0.3228247162673392,
1292
+ "grad_norm": 37.173675537109375,
1293
+ "kl_loss": 9.76136007579953e-08,
1294
+ "kl_weight": 1000000.0,
1295
+ "learning_rate": 0.00042268694550063376,
1296
+ "loss": 2.3797764778137207,
1297
+ "step": 128,
1298
+ "total_loss": 2.4773900508880615
1299
+ },
1300
+ {
1301
+ "epoch": 0.3253467843631778,
1302
+ "grad_norm": 35.94040298461914,
1303
+ "kl_loss": 8.867663581213492e-08,
1304
+ "kl_weight": 1000000.0,
1305
+ "learning_rate": 0.0004220532319391635,
1306
+ "loss": 2.19878888130188,
1307
+ "step": 129,
1308
+ "total_loss": 2.2874655723571777
1309
+ },
1310
+ {
1311
+ "epoch": 0.32786885245901637,
1312
+ "grad_norm": 30.7440128326416,
1313
+ "kl_loss": 8.357946512660419e-08,
1314
+ "kl_weight": 1000000.0,
1315
+ "learning_rate": 0.0004214195183776933,
1316
+ "loss": 2.374424934387207,
1317
+ "step": 130,
1318
+ "total_loss": 2.4580044746398926
1319
+ },
1320
+ {
1321
+ "epoch": 0.33039092055485497,
1322
+ "grad_norm": 44.41523361206055,
1323
+ "kl_loss": 7.932466417059914e-08,
1324
+ "kl_weight": 1000000.0,
1325
+ "learning_rate": 0.00042078580481622307,
1326
+ "loss": 2.270353078842163,
1327
+ "step": 131,
1328
+ "total_loss": 2.349677801132202
1329
+ },
1330
+ {
1331
+ "epoch": 0.3329129886506936,
1332
+ "grad_norm": 50.61003494262695,
1333
+ "kl_loss": 7.062011064817852e-08,
1334
+ "kl_weight": 1000000.0,
1335
+ "learning_rate": 0.00042015209125475286,
1336
+ "loss": 2.704482078552246,
1337
+ "step": 132,
1338
+ "total_loss": 2.775102138519287
1339
+ },
1340
+ {
1341
+ "epoch": 0.3354350567465322,
1342
+ "grad_norm": 52.31584930419922,
1343
+ "kl_loss": 8.649838179053404e-08,
1344
+ "kl_weight": 1000000.0,
1345
+ "learning_rate": 0.00041951837769328265,
1346
+ "loss": 2.623465061187744,
1347
+ "step": 133,
1348
+ "total_loss": 2.709963321685791
1349
+ },
1350
+ {
1351
+ "epoch": 0.3379571248423707,
1352
+ "grad_norm": 44.85032653808594,
1353
+ "kl_loss": 7.965866899439789e-08,
1354
+ "kl_weight": 1000000.0,
1355
+ "learning_rate": 0.00041888466413181243,
1356
+ "loss": 2.3895857334136963,
1357
+ "step": 134,
1358
+ "total_loss": 2.4692444801330566
1359
+ },
1360
+ {
1361
+ "epoch": 0.34047919293820933,
1362
+ "grad_norm": 52.73245620727539,
1363
+ "kl_loss": 9.069820805507334e-08,
1364
+ "kl_weight": 1000000.0,
1365
+ "learning_rate": 0.0004182509505703422,
1366
+ "loss": 2.5411245822906494,
1367
+ "step": 135,
1368
+ "total_loss": 2.6318228244781494
1369
+ }
1370
+ ],
1371
+ "logging_steps": 1,
1372
+ "max_steps": 794,
1373
+ "num_input_tokens_seen": 0,
1374
+ "num_train_epochs": 2,
1375
+ "save_steps": 5,
1376
+ "stateful_callbacks": {
1377
+ "TrainerControl": {
1378
+ "args": {
1379
+ "should_epoch_stop": false,
1380
+ "should_evaluate": false,
1381
+ "should_log": false,
1382
+ "should_save": false,
1383
+ "should_training_stop": false
1384
+ },
1385
+ "attributes": {}
1386
+ }
1387
+ },
1388
+ "total_flos": 4.6416789504e+16,
1389
+ "train_batch_size": 2,
1390
+ "trial_name": null,
1391
+ "trial_params": null
1392
+ }
checkpoints/checkpoint-135/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-140/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d3aea558cb00f620f94e50b26ccd5377884c23af22e3581e3a5a9da8a420a2
3
+ size 6901
checkpoints/checkpoint-140/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0592a88cc67d92c3872ea201c37a509fdd789ee5c7d493a441ce7eee927a326a
3
+ size 22241
checkpoints/checkpoint-140/trainer_state.json ADDED
@@ -0,0 +1,1442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.3530895334174023,
6
+ "eval_steps": 100,
7
+ "global_step": 140,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ },
1070
+ {
1071
+ "epoch": 0.2673392181588903,
1072
+ "grad_norm": 50.12539291381836,
1073
+ "kl_loss": 1.2759885237301205e-07,
1074
+ "kl_weight": 1000000.0,
1075
+ "learning_rate": 0.00043662864385297847,
1076
+ "loss": 2.3349037170410156,
1077
+ "step": 106,
1078
+ "total_loss": 2.4625024795532227
1079
+ },
1080
+ {
1081
+ "epoch": 0.2698612862547289,
1082
+ "grad_norm": 58.65528106689453,
1083
+ "kl_loss": 6.978748245956012e-08,
1084
+ "kl_weight": 1000000.0,
1085
+ "learning_rate": 0.00043599493029150826,
1086
+ "loss": 2.520481586456299,
1087
+ "step": 107,
1088
+ "total_loss": 2.590269088745117
1089
+ },
1090
+ {
1091
+ "epoch": 0.2723833543505675,
1092
+ "grad_norm": 114.07057189941406,
1093
+ "kl_loss": 2.180455993538999e-07,
1094
+ "kl_weight": 1000000.0,
1095
+ "learning_rate": 0.00043536121673003804,
1096
+ "loss": 2.2573392391204834,
1097
+ "step": 108,
1098
+ "total_loss": 2.4753847122192383
1099
+ },
1100
+ {
1101
+ "epoch": 0.27490542244640603,
1102
+ "grad_norm": 42.89162063598633,
1103
+ "kl_loss": 1.3848097069057985e-07,
1104
+ "kl_weight": 1000000.0,
1105
+ "learning_rate": 0.0004347275031685678,
1106
+ "loss": 2.557804822921753,
1107
+ "step": 109,
1108
+ "total_loss": 2.6962857246398926
1109
+ },
1110
+ {
1111
+ "epoch": 0.27742749054224464,
1112
+ "grad_norm": 34.359859466552734,
1113
+ "kl_loss": 7.828553094668678e-08,
1114
+ "kl_weight": 1000000.0,
1115
+ "learning_rate": 0.0004340937896070976,
1116
+ "loss": 2.673866033554077,
1117
+ "step": 110,
1118
+ "total_loss": 2.7521514892578125
1119
+ },
1120
+ {
1121
+ "epoch": 0.27994955863808324,
1122
+ "grad_norm": 56.4673957824707,
1123
+ "kl_loss": 1.2365975976535992e-07,
1124
+ "kl_weight": 1000000.0,
1125
+ "learning_rate": 0.00043346007604562736,
1126
+ "loss": 2.275721311569214,
1127
+ "step": 111,
1128
+ "total_loss": 2.399381160736084
1129
+ },
1130
+ {
1131
+ "epoch": 0.28247162673392184,
1132
+ "grad_norm": 80.7730484008789,
1133
+ "kl_loss": 7.710952587558495e-08,
1134
+ "kl_weight": 1000000.0,
1135
+ "learning_rate": 0.0004328263624841572,
1136
+ "loss": 2.3654656410217285,
1137
+ "step": 112,
1138
+ "total_loss": 2.442575216293335
1139
+ },
1140
+ {
1141
+ "epoch": 0.2849936948297604,
1142
+ "grad_norm": 78.50458526611328,
1143
+ "kl_loss": 1.1497648699787533e-07,
1144
+ "kl_weight": 1000000.0,
1145
+ "learning_rate": 0.00043219264892268693,
1146
+ "loss": 2.0045268535614014,
1147
+ "step": 113,
1148
+ "total_loss": 2.1195032596588135
1149
+ },
1150
+ {
1151
+ "epoch": 0.287515762925599,
1152
+ "grad_norm": 42.11017990112305,
1153
+ "kl_loss": 1.2359024026409315e-07,
1154
+ "kl_weight": 1000000.0,
1155
+ "learning_rate": 0.0004315589353612168,
1156
+ "loss": 2.8311715126037598,
1157
+ "step": 114,
1158
+ "total_loss": 2.9547617435455322
1159
+ },
1160
+ {
1161
+ "epoch": 0.2900378310214376,
1162
+ "grad_norm": 46.80673599243164,
1163
+ "kl_loss": 1.1356344487012393e-07,
1164
+ "kl_weight": 1000000.0,
1165
+ "learning_rate": 0.0004309252217997465,
1166
+ "loss": 2.4908668994903564,
1167
+ "step": 115,
1168
+ "total_loss": 2.6044304370880127
1169
+ },
1170
+ {
1171
+ "epoch": 0.29255989911727615,
1172
+ "grad_norm": 37.80940628051758,
1173
+ "kl_loss": 8.934256356951664e-08,
1174
+ "kl_weight": 1000000.0,
1175
+ "learning_rate": 0.0004302915082382763,
1176
+ "loss": 2.5639941692352295,
1177
+ "step": 116,
1178
+ "total_loss": 2.653336763381958
1179
+ },
1180
+ {
1181
+ "epoch": 0.29508196721311475,
1182
+ "grad_norm": 40.135955810546875,
1183
+ "kl_loss": 9.139830581261776e-08,
1184
+ "kl_weight": 1000000.0,
1185
+ "learning_rate": 0.0004296577946768061,
1186
+ "loss": 2.3605127334594727,
1187
+ "step": 117,
1188
+ "total_loss": 2.451910972595215
1189
+ },
1190
+ {
1191
+ "epoch": 0.29760403530895335,
1192
+ "grad_norm": 34.20405197143555,
1193
+ "kl_loss": 6.737117530519754e-08,
1194
+ "kl_weight": 1000000.0,
1195
+ "learning_rate": 0.0004290240811153359,
1196
+ "loss": 2.2198715209960938,
1197
+ "step": 118,
1198
+ "total_loss": 2.2872426509857178
1199
+ },
1200
+ {
1201
+ "epoch": 0.30012610340479196,
1202
+ "grad_norm": 38.27109909057617,
1203
+ "kl_loss": 7.206848096075191e-08,
1204
+ "kl_weight": 1000000.0,
1205
+ "learning_rate": 0.00042839036755386566,
1206
+ "loss": 2.442990779876709,
1207
+ "step": 119,
1208
+ "total_loss": 2.515059232711792
1209
+ },
1210
+ {
1211
+ "epoch": 0.3026481715006305,
1212
+ "grad_norm": 33.383628845214844,
1213
+ "kl_loss": 5.8401464997359653e-08,
1214
+ "kl_weight": 1000000.0,
1215
+ "learning_rate": 0.00042775665399239545,
1216
+ "loss": 1.9709354639053345,
1217
+ "step": 120,
1218
+ "total_loss": 2.029336929321289
1219
+ },
1220
+ {
1221
+ "epoch": 0.3051702395964691,
1222
+ "grad_norm": 29.64325714111328,
1223
+ "kl_loss": 7.080717523422209e-08,
1224
+ "kl_weight": 1000000.0,
1225
+ "learning_rate": 0.00042712294043092524,
1226
+ "loss": 2.5838398933410645,
1227
+ "step": 121,
1228
+ "total_loss": 2.6546471118927
1229
+ },
1230
+ {
1231
+ "epoch": 0.3076923076923077,
1232
+ "grad_norm": 31.308616638183594,
1233
+ "kl_loss": 7.158789117056585e-08,
1234
+ "kl_weight": 1000000.0,
1235
+ "learning_rate": 0.00042648922686945503,
1236
+ "loss": 2.582289457321167,
1237
+ "step": 122,
1238
+ "total_loss": 2.6538772583007812
1239
+ },
1240
+ {
1241
+ "epoch": 0.31021437578814626,
1242
+ "grad_norm": 31.91105842590332,
1243
+ "kl_loss": 5.958595039601278e-08,
1244
+ "kl_weight": 1000000.0,
1245
+ "learning_rate": 0.00042585551330798476,
1246
+ "loss": 2.362175703048706,
1247
+ "step": 123,
1248
+ "total_loss": 2.4217617511749268
1249
+ },
1250
+ {
1251
+ "epoch": 0.31273644388398486,
1252
+ "grad_norm": 48.74770736694336,
1253
+ "kl_loss": 1.0601585387348678e-07,
1254
+ "kl_weight": 1000000.0,
1255
+ "learning_rate": 0.0004252217997465146,
1256
+ "loss": 2.6029539108276367,
1257
+ "step": 124,
1258
+ "total_loss": 2.708969831466675
1259
+ },
1260
+ {
1261
+ "epoch": 0.31525851197982346,
1262
+ "grad_norm": 37.71382141113281,
1263
+ "kl_loss": 7.022106984777565e-08,
1264
+ "kl_weight": 1000000.0,
1265
+ "learning_rate": 0.00042458808618504434,
1266
+ "loss": 1.9780246019363403,
1267
+ "step": 125,
1268
+ "total_loss": 2.048245668411255
1269
+ },
1270
+ {
1271
+ "epoch": 0.31778058007566207,
1272
+ "grad_norm": 58.47407531738281,
1273
+ "kl_loss": 7.89053729022271e-08,
1274
+ "kl_weight": 1000000.0,
1275
+ "learning_rate": 0.0004239543726235742,
1276
+ "loss": 3.0634782314300537,
1277
+ "step": 126,
1278
+ "total_loss": 3.142383575439453
1279
+ },
1280
+ {
1281
+ "epoch": 0.3203026481715006,
1282
+ "grad_norm": 49.75334167480469,
1283
+ "kl_loss": 9.529335898150748e-08,
1284
+ "kl_weight": 1000000.0,
1285
+ "learning_rate": 0.0004233206590621039,
1286
+ "loss": 2.625107765197754,
1287
+ "step": 127,
1288
+ "total_loss": 2.7204010486602783
1289
+ },
1290
+ {
1291
+ "epoch": 0.3228247162673392,
1292
+ "grad_norm": 37.173675537109375,
1293
+ "kl_loss": 9.76136007579953e-08,
1294
+ "kl_weight": 1000000.0,
1295
+ "learning_rate": 0.00042268694550063376,
1296
+ "loss": 2.3797764778137207,
1297
+ "step": 128,
1298
+ "total_loss": 2.4773900508880615
1299
+ },
1300
+ {
1301
+ "epoch": 0.3253467843631778,
1302
+ "grad_norm": 35.94040298461914,
1303
+ "kl_loss": 8.867663581213492e-08,
1304
+ "kl_weight": 1000000.0,
1305
+ "learning_rate": 0.0004220532319391635,
1306
+ "loss": 2.19878888130188,
1307
+ "step": 129,
1308
+ "total_loss": 2.2874655723571777
1309
+ },
1310
+ {
1311
+ "epoch": 0.32786885245901637,
1312
+ "grad_norm": 30.7440128326416,
1313
+ "kl_loss": 8.357946512660419e-08,
1314
+ "kl_weight": 1000000.0,
1315
+ "learning_rate": 0.0004214195183776933,
1316
+ "loss": 2.374424934387207,
1317
+ "step": 130,
1318
+ "total_loss": 2.4580044746398926
1319
+ },
1320
+ {
1321
+ "epoch": 0.33039092055485497,
1322
+ "grad_norm": 44.41523361206055,
1323
+ "kl_loss": 7.932466417059914e-08,
1324
+ "kl_weight": 1000000.0,
1325
+ "learning_rate": 0.00042078580481622307,
1326
+ "loss": 2.270353078842163,
1327
+ "step": 131,
1328
+ "total_loss": 2.349677801132202
1329
+ },
1330
+ {
1331
+ "epoch": 0.3329129886506936,
1332
+ "grad_norm": 50.61003494262695,
1333
+ "kl_loss": 7.062011064817852e-08,
1334
+ "kl_weight": 1000000.0,
1335
+ "learning_rate": 0.00042015209125475286,
1336
+ "loss": 2.704482078552246,
1337
+ "step": 132,
1338
+ "total_loss": 2.775102138519287
1339
+ },
1340
+ {
1341
+ "epoch": 0.3354350567465322,
1342
+ "grad_norm": 52.31584930419922,
1343
+ "kl_loss": 8.649838179053404e-08,
1344
+ "kl_weight": 1000000.0,
1345
+ "learning_rate": 0.00041951837769328265,
1346
+ "loss": 2.623465061187744,
1347
+ "step": 133,
1348
+ "total_loss": 2.709963321685791
1349
+ },
1350
+ {
1351
+ "epoch": 0.3379571248423707,
1352
+ "grad_norm": 44.85032653808594,
1353
+ "kl_loss": 7.965866899439789e-08,
1354
+ "kl_weight": 1000000.0,
1355
+ "learning_rate": 0.00041888466413181243,
1356
+ "loss": 2.3895857334136963,
1357
+ "step": 134,
1358
+ "total_loss": 2.4692444801330566
1359
+ },
1360
+ {
1361
+ "epoch": 0.34047919293820933,
1362
+ "grad_norm": 52.73245620727539,
1363
+ "kl_loss": 9.069820805507334e-08,
1364
+ "kl_weight": 1000000.0,
1365
+ "learning_rate": 0.0004182509505703422,
1366
+ "loss": 2.5411245822906494,
1367
+ "step": 135,
1368
+ "total_loss": 2.6318228244781494
1369
+ },
1370
+ {
1371
+ "epoch": 0.34300126103404793,
1372
+ "grad_norm": 54.24882507324219,
1373
+ "kl_loss": 9.488780960964505e-08,
1374
+ "kl_weight": 1000000.0,
1375
+ "learning_rate": 0.000417617237008872,
1376
+ "loss": 1.959804654121399,
1377
+ "step": 136,
1378
+ "total_loss": 2.054692506790161
1379
+ },
1380
+ {
1381
+ "epoch": 0.3455233291298865,
1382
+ "grad_norm": 80.58323669433594,
1383
+ "kl_loss": 1.1013327139153262e-07,
1384
+ "kl_weight": 1000000.0,
1385
+ "learning_rate": 0.00041698352344740175,
1386
+ "loss": 2.191988229751587,
1387
+ "step": 137,
1388
+ "total_loss": 2.30212140083313
1389
+ },
1390
+ {
1391
+ "epoch": 0.3480453972257251,
1392
+ "grad_norm": 94.20862579345703,
1393
+ "kl_loss": 4.3523357362573734e-08,
1394
+ "kl_weight": 1000000.0,
1395
+ "learning_rate": 0.0004163498098859316,
1396
+ "loss": 2.3833768367767334,
1397
+ "step": 138,
1398
+ "total_loss": 2.4269001483917236
1399
+ },
1400
+ {
1401
+ "epoch": 0.3505674653215637,
1402
+ "grad_norm": 57.289390563964844,
1403
+ "kl_loss": 6.378383687888345e-08,
1404
+ "kl_weight": 1000000.0,
1405
+ "learning_rate": 0.0004157160963244613,
1406
+ "loss": 2.5576775074005127,
1407
+ "step": 139,
1408
+ "total_loss": 2.6214613914489746
1409
+ },
1410
+ {
1411
+ "epoch": 0.3530895334174023,
1412
+ "grad_norm": 108.49940490722656,
1413
+ "kl_loss": 1.0705950614919857e-07,
1414
+ "kl_weight": 1000000.0,
1415
+ "learning_rate": 0.00041508238276299116,
1416
+ "loss": 2.5679421424865723,
1417
+ "step": 140,
1418
+ "total_loss": 2.675001621246338
1419
+ }
1420
+ ],
1421
+ "logging_steps": 1,
1422
+ "max_steps": 794,
1423
+ "num_input_tokens_seen": 0,
1424
+ "num_train_epochs": 2,
1425
+ "save_steps": 5,
1426
+ "stateful_callbacks": {
1427
+ "TrainerControl": {
1428
+ "args": {
1429
+ "should_epoch_stop": false,
1430
+ "should_evaluate": false,
1431
+ "should_log": false,
1432
+ "should_save": false,
1433
+ "should_training_stop": false
1434
+ },
1435
+ "attributes": {}
1436
+ }
1437
+ },
1438
+ "total_flos": 4.8135929856e+16,
1439
+ "train_batch_size": 2,
1440
+ "trial_name": null,
1441
+ "trial_params": null
1442
+ }
checkpoints/checkpoint-140/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-145/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d3aea558cb00f620f94e50b26ccd5377884c23af22e3581e3a5a9da8a420a2
3
+ size 6901
checkpoints/checkpoint-145/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5179949cd57d88e294f28615450c3b1570ab8670b30238d99f1aa2fe8d95f4b
3
+ size 22241
checkpoints/checkpoint-145/trainer_state.json ADDED
@@ -0,0 +1,1492 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 2.8745031356811523,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.3656998738965952,
6
+ "eval_steps": 100,
7
+ "global_step": 145,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ },
162
+ {
163
+ "epoch": 0.0403530895334174,
164
+ "grad_norm": 44.62814712524414,
165
+ "kl_loss": 1.450005981951108e-07,
166
+ "kl_weight": 1000000.0,
167
+ "learning_rate": 0.0004936628643852978,
168
+ "loss": 2.4844541549682617,
169
+ "step": 16,
170
+ "total_loss": 2.6294548511505127
171
+ },
172
+ {
173
+ "epoch": 0.04287515762925599,
174
+ "grad_norm": 41.87761688232422,
175
+ "kl_loss": 1.397227009647395e-07,
176
+ "kl_weight": 1000000.0,
177
+ "learning_rate": 0.0004930291508238277,
178
+ "loss": 2.7985713481903076,
179
+ "step": 17,
180
+ "total_loss": 2.938293933868408
181
+ },
182
+ {
183
+ "epoch": 0.04539722572509458,
184
+ "grad_norm": 39.647457122802734,
185
+ "kl_loss": 1.0770181546604363e-07,
186
+ "kl_weight": 1000000.0,
187
+ "learning_rate": 0.0004923954372623574,
188
+ "loss": 2.1876273155212402,
189
+ "step": 18,
190
+ "total_loss": 2.2953290939331055
191
+ },
192
+ {
193
+ "epoch": 0.04791929382093316,
194
+ "grad_norm": 44.82719039916992,
195
+ "kl_loss": 1.325549447983576e-07,
196
+ "kl_weight": 1000000.0,
197
+ "learning_rate": 0.0004917617237008873,
198
+ "loss": 2.344290256500244,
199
+ "step": 19,
200
+ "total_loss": 2.4768452644348145
201
+ },
202
+ {
203
+ "epoch": 0.05044136191677175,
204
+ "grad_norm": 35.45253372192383,
205
+ "kl_loss": 1.3449634650442022e-07,
206
+ "kl_weight": 1000000.0,
207
+ "learning_rate": 0.0004911280101394169,
208
+ "loss": 2.393965244293213,
209
+ "step": 20,
210
+ "total_loss": 2.5284616947174072
211
+ },
212
+ {
213
+ "epoch": 0.05296343001261034,
214
+ "grad_norm": 36.362369537353516,
215
+ "kl_loss": 1.552224659917556e-07,
216
+ "kl_weight": 1000000.0,
217
+ "learning_rate": 0.0004904942965779467,
218
+ "loss": 2.1951944828033447,
219
+ "step": 21,
220
+ "total_loss": 2.350416898727417
221
+ },
222
+ {
223
+ "epoch": 0.05548549810844893,
224
+ "grad_norm": 42.16935348510742,
225
+ "kl_loss": 1.1523614062980414e-07,
226
+ "kl_weight": 1000000.0,
227
+ "learning_rate": 0.0004898605830164765,
228
+ "loss": 2.4038805961608887,
229
+ "step": 22,
230
+ "total_loss": 2.5191166400909424
231
+ },
232
+ {
233
+ "epoch": 0.058007566204287514,
234
+ "grad_norm": 39.14812088012695,
235
+ "kl_loss": 1.1659390963814076e-07,
236
+ "kl_weight": 1000000.0,
237
+ "learning_rate": 0.0004892268694550063,
238
+ "loss": 2.6124515533447266,
239
+ "step": 23,
240
+ "total_loss": 2.7290453910827637
241
+ },
242
+ {
243
+ "epoch": 0.0605296343001261,
244
+ "grad_norm": 49.780704498291016,
245
+ "kl_loss": 2.0625684271635691e-07,
246
+ "kl_weight": 1000000.0,
247
+ "learning_rate": 0.0004885931558935361,
248
+ "loss": 2.9930167198181152,
249
+ "step": 24,
250
+ "total_loss": 3.1992735862731934
251
+ },
252
+ {
253
+ "epoch": 0.06305170239596469,
254
+ "grad_norm": 53.23894500732422,
255
+ "kl_loss": 1.4698964889703348e-07,
256
+ "kl_weight": 1000000.0,
257
+ "learning_rate": 0.0004879594423320659,
258
+ "loss": 2.4087769985198975,
259
+ "step": 25,
260
+ "total_loss": 2.5557665824890137
261
+ },
262
+ {
263
+ "epoch": 0.06557377049180328,
264
+ "grad_norm": 50.209110260009766,
265
+ "kl_loss": 1.2840492047416774e-07,
266
+ "kl_weight": 1000000.0,
267
+ "learning_rate": 0.0004873257287705957,
268
+ "loss": 2.918276309967041,
269
+ "step": 26,
270
+ "total_loss": 3.0466811656951904
271
+ },
272
+ {
273
+ "epoch": 0.06809583858764187,
274
+ "grad_norm": 41.90302658081055,
275
+ "kl_loss": 1.8274477042723447e-07,
276
+ "kl_weight": 1000000.0,
277
+ "learning_rate": 0.00048669201520912546,
278
+ "loss": 2.272730588912964,
279
+ "step": 27,
280
+ "total_loss": 2.455475330352783
281
+ },
282
+ {
283
+ "epoch": 0.07061790668348046,
284
+ "grad_norm": 39.8343620300293,
285
+ "kl_loss": 1.3182453528770566e-07,
286
+ "kl_weight": 1000000.0,
287
+ "learning_rate": 0.00048605830164765525,
288
+ "loss": 2.6464455127716064,
289
+ "step": 28,
290
+ "total_loss": 2.7782700061798096
291
+ },
292
+ {
293
+ "epoch": 0.07313997477931904,
294
+ "grad_norm": 36.19342803955078,
295
+ "kl_loss": 1.3146133426289452e-07,
296
+ "kl_weight": 1000000.0,
297
+ "learning_rate": 0.00048542458808618503,
298
+ "loss": 2.6744461059570312,
299
+ "step": 29,
300
+ "total_loss": 2.8059074878692627
301
+ },
302
+ {
303
+ "epoch": 0.07566204287515763,
304
+ "grad_norm": 39.07732391357422,
305
+ "kl_loss": 1.3345737670533708e-07,
306
+ "kl_weight": 1000000.0,
307
+ "learning_rate": 0.0004847908745247148,
308
+ "loss": 2.624727249145508,
309
+ "step": 30,
310
+ "total_loss": 2.7581846714019775
311
+ },
312
+ {
313
+ "epoch": 0.07818411097099622,
314
+ "grad_norm": 46.28368377685547,
315
+ "kl_loss": 1.4280556115409127e-07,
316
+ "kl_weight": 1000000.0,
317
+ "learning_rate": 0.0004841571609632446,
318
+ "loss": 2.4123611450195312,
319
+ "step": 31,
320
+ "total_loss": 2.555166721343994
321
+ },
322
+ {
323
+ "epoch": 0.0807061790668348,
324
+ "grad_norm": 43.698612213134766,
325
+ "kl_loss": 1.2306992402955075e-07,
326
+ "kl_weight": 1000000.0,
327
+ "learning_rate": 0.0004835234474017744,
328
+ "loss": 2.641366958618164,
329
+ "step": 32,
330
+ "total_loss": 2.764436960220337
331
+ },
332
+ {
333
+ "epoch": 0.0832282471626734,
334
+ "grad_norm": 45.80537796020508,
335
+ "kl_loss": 1.3520984509796108e-07,
336
+ "kl_weight": 1000000.0,
337
+ "learning_rate": 0.0004828897338403042,
338
+ "loss": 2.5132896900177,
339
+ "step": 33,
340
+ "total_loss": 2.6484994888305664
341
+ },
342
+ {
343
+ "epoch": 0.08575031525851198,
344
+ "grad_norm": 48.67962646484375,
345
+ "kl_loss": 1.237484639204922e-07,
346
+ "kl_weight": 1000000.0,
347
+ "learning_rate": 0.000482256020278834,
348
+ "loss": 2.7271363735198975,
349
+ "step": 34,
350
+ "total_loss": 2.8508849143981934
351
+ },
352
+ {
353
+ "epoch": 0.08827238335435057,
354
+ "grad_norm": 49.93069839477539,
355
+ "kl_loss": 1.2296362683628104e-07,
356
+ "kl_weight": 1000000.0,
357
+ "learning_rate": 0.0004816223067173637,
358
+ "loss": 2.2032339572906494,
359
+ "step": 35,
360
+ "total_loss": 2.326197624206543
361
+ },
362
+ {
363
+ "epoch": 0.09079445145018916,
364
+ "grad_norm": 34.86311721801758,
365
+ "kl_loss": 1.3591355241260317e-07,
366
+ "kl_weight": 1000000.0,
367
+ "learning_rate": 0.00048098859315589355,
368
+ "loss": 2.0896949768066406,
369
+ "step": 36,
370
+ "total_loss": 2.2256085872650146
371
+ },
372
+ {
373
+ "epoch": 0.09331651954602774,
374
+ "grad_norm": 37.413055419921875,
375
+ "kl_loss": 1.403360414542476e-07,
376
+ "kl_weight": 1000000.0,
377
+ "learning_rate": 0.0004803548795944233,
378
+ "loss": 2.331239700317383,
379
+ "step": 37,
380
+ "total_loss": 2.4715757369995117
381
+ },
382
+ {
383
+ "epoch": 0.09583858764186633,
384
+ "grad_norm": 71.4142074584961,
385
+ "kl_loss": 9.95134428194433e-08,
386
+ "kl_weight": 1000000.0,
387
+ "learning_rate": 0.00047972116603295313,
388
+ "loss": 2.3557369709014893,
389
+ "step": 38,
390
+ "total_loss": 2.4552505016326904
391
+ },
392
+ {
393
+ "epoch": 0.09836065573770492,
394
+ "grad_norm": 43.588504791259766,
395
+ "kl_loss": 9.627683539292775e-08,
396
+ "kl_weight": 1000000.0,
397
+ "learning_rate": 0.00047908745247148286,
398
+ "loss": 2.483755111694336,
399
+ "step": 39,
400
+ "total_loss": 2.5800318717956543
401
+ },
402
+ {
403
+ "epoch": 0.1008827238335435,
404
+ "grad_norm": 53.27906036376953,
405
+ "kl_loss": 1.6940899172368518e-07,
406
+ "kl_weight": 1000000.0,
407
+ "learning_rate": 0.0004784537389100127,
408
+ "loss": 2.2950119972229004,
409
+ "step": 40,
410
+ "total_loss": 2.464421033859253
411
+ },
412
+ {
413
+ "epoch": 0.1034047919293821,
414
+ "grad_norm": 36.538055419921875,
415
+ "kl_loss": 1.7988656964007532e-07,
416
+ "kl_weight": 1000000.0,
417
+ "learning_rate": 0.00047782002534854244,
418
+ "loss": 2.2202811241149902,
419
+ "step": 41,
420
+ "total_loss": 2.40016770362854
421
+ },
422
+ {
423
+ "epoch": 0.10592686002522068,
424
+ "grad_norm": 37.166908264160156,
425
+ "kl_loss": 1.6280961290249252e-07,
426
+ "kl_weight": 1000000.0,
427
+ "learning_rate": 0.0004771863117870723,
428
+ "loss": 2.3544418811798096,
429
+ "step": 42,
430
+ "total_loss": 2.517251491546631
431
+ },
432
+ {
433
+ "epoch": 0.10844892812105927,
434
+ "grad_norm": 42.741424560546875,
435
+ "kl_loss": 1.2595839393725328e-07,
436
+ "kl_weight": 1000000.0,
437
+ "learning_rate": 0.000476552598225602,
438
+ "loss": 2.299348831176758,
439
+ "step": 43,
440
+ "total_loss": 2.425307273864746
441
+ },
442
+ {
443
+ "epoch": 0.11097099621689786,
444
+ "grad_norm": 39.69258117675781,
445
+ "kl_loss": 1.7368185467603325e-07,
446
+ "kl_weight": 1000000.0,
447
+ "learning_rate": 0.0004759188846641318,
448
+ "loss": 2.6060984134674072,
449
+ "step": 44,
450
+ "total_loss": 2.779780387878418
451
+ },
452
+ {
453
+ "epoch": 0.11349306431273644,
454
+ "grad_norm": 48.37623596191406,
455
+ "kl_loss": 1.2949078609381104e-07,
456
+ "kl_weight": 1000000.0,
457
+ "learning_rate": 0.0004752851711026616,
458
+ "loss": 2.5570790767669678,
459
+ "step": 45,
460
+ "total_loss": 2.686569929122925
461
+ },
462
+ {
463
+ "epoch": 0.11601513240857503,
464
+ "grad_norm": 32.54016876220703,
465
+ "kl_loss": 1.0888848578360921e-07,
466
+ "kl_weight": 1000000.0,
467
+ "learning_rate": 0.0004746514575411914,
468
+ "loss": 2.351015567779541,
469
+ "step": 46,
470
+ "total_loss": 2.4599039554595947
471
+ },
472
+ {
473
+ "epoch": 0.11853720050441362,
474
+ "grad_norm": 39.50113296508789,
475
+ "kl_loss": 8.368018455939819e-08,
476
+ "kl_weight": 1000000.0,
477
+ "learning_rate": 0.00047401774397972117,
478
+ "loss": 2.030034303665161,
479
+ "step": 47,
480
+ "total_loss": 2.1137144565582275
481
+ },
482
+ {
483
+ "epoch": 0.1210592686002522,
484
+ "grad_norm": 58.826534271240234,
485
+ "kl_loss": 7.160872428357834e-08,
486
+ "kl_weight": 1000000.0,
487
+ "learning_rate": 0.00047338403041825096,
488
+ "loss": 2.4345922470092773,
489
+ "step": 48,
490
+ "total_loss": 2.5062010288238525
491
+ },
492
+ {
493
+ "epoch": 0.1235813366960908,
494
+ "grad_norm": 49.829811096191406,
495
+ "kl_loss": 1.4755499933016836e-07,
496
+ "kl_weight": 1000000.0,
497
+ "learning_rate": 0.00047275031685678075,
498
+ "loss": 2.4012234210968018,
499
+ "step": 49,
500
+ "total_loss": 2.548778533935547
501
+ },
502
+ {
503
+ "epoch": 0.12610340479192939,
504
+ "grad_norm": 54.877708435058594,
505
+ "kl_loss": 1.25692750430062e-07,
506
+ "kl_weight": 1000000.0,
507
+ "learning_rate": 0.00047211660329531054,
508
+ "loss": 2.4835963249206543,
509
+ "step": 50,
510
+ "total_loss": 2.6092891693115234
511
+ },
512
+ {
513
+ "epoch": 0.12862547288776796,
514
+ "grad_norm": 57.37236785888672,
515
+ "kl_loss": 1.1875315664156005e-07,
516
+ "kl_weight": 1000000.0,
517
+ "learning_rate": 0.00047148288973384027,
518
+ "loss": 2.25846266746521,
519
+ "step": 51,
520
+ "total_loss": 2.37721586227417
521
+ },
522
+ {
523
+ "epoch": 0.13114754098360656,
524
+ "grad_norm": 46.449405670166016,
525
+ "kl_loss": 9.858030125542427e-08,
526
+ "kl_weight": 1000000.0,
527
+ "learning_rate": 0.0004708491761723701,
528
+ "loss": 2.2844595909118652,
529
+ "step": 52,
530
+ "total_loss": 2.383039951324463
531
+ },
532
+ {
533
+ "epoch": 0.13366960907944514,
534
+ "grad_norm": 88.41195678710938,
535
+ "kl_loss": 1.3280877908528055e-07,
536
+ "kl_weight": 1000000.0,
537
+ "learning_rate": 0.00047021546261089985,
538
+ "loss": 2.73095440864563,
539
+ "step": 53,
540
+ "total_loss": 2.8637630939483643
541
+ },
542
+ {
543
+ "epoch": 0.13619167717528374,
544
+ "grad_norm": 46.997642517089844,
545
+ "kl_loss": 1.318484947887555e-07,
546
+ "kl_weight": 1000000.0,
547
+ "learning_rate": 0.0004695817490494297,
548
+ "loss": 2.9694020748138428,
549
+ "step": 54,
550
+ "total_loss": 3.101250648498535
551
+ },
552
+ {
553
+ "epoch": 0.13871374527112232,
554
+ "grad_norm": 59.88271713256836,
555
+ "kl_loss": 1.311551756089102e-07,
556
+ "kl_weight": 1000000.0,
557
+ "learning_rate": 0.0004689480354879594,
558
+ "loss": 2.339895009994507,
559
+ "step": 55,
560
+ "total_loss": 2.471050262451172
561
+ },
562
+ {
563
+ "epoch": 0.14123581336696092,
564
+ "grad_norm": 50.745269775390625,
565
+ "kl_loss": 1.2313154229559586e-07,
566
+ "kl_weight": 1000000.0,
567
+ "learning_rate": 0.00046831432192648927,
568
+ "loss": 2.3892390727996826,
569
+ "step": 56,
570
+ "total_loss": 2.5123705863952637
571
+ },
572
+ {
573
+ "epoch": 0.1437578814627995,
574
+ "grad_norm": 38.669708251953125,
575
+ "kl_loss": 1.3242403440472117e-07,
576
+ "kl_weight": 1000000.0,
577
+ "learning_rate": 0.000467680608365019,
578
+ "loss": 2.3022656440734863,
579
+ "step": 57,
580
+ "total_loss": 2.43468976020813
581
+ },
582
+ {
583
+ "epoch": 0.14627994955863807,
584
+ "grad_norm": 35.511695861816406,
585
+ "kl_loss": 9.955392243909955e-08,
586
+ "kl_weight": 1000000.0,
587
+ "learning_rate": 0.0004670468948035488,
588
+ "loss": 2.4699792861938477,
589
+ "step": 58,
590
+ "total_loss": 2.569533109664917
591
+ },
592
+ {
593
+ "epoch": 0.14880201765447668,
594
+ "grad_norm": 42.08246994018555,
595
+ "kl_loss": 1.3320465086508193e-07,
596
+ "kl_weight": 1000000.0,
597
+ "learning_rate": 0.0004664131812420786,
598
+ "loss": 2.288395643234253,
599
+ "step": 59,
600
+ "total_loss": 2.421600341796875
601
+ },
602
+ {
603
+ "epoch": 0.15132408575031525,
604
+ "grad_norm": 37.103790283203125,
605
+ "kl_loss": 5.67616069702126e-08,
606
+ "kl_weight": 1000000.0,
607
+ "learning_rate": 0.00046577946768060837,
608
+ "loss": 2.5267558097839355,
609
+ "step": 60,
610
+ "total_loss": 2.58351731300354
611
+ },
612
+ {
613
+ "epoch": 0.15384615384615385,
614
+ "grad_norm": 38.30869674682617,
615
+ "kl_loss": 1.197651613438211e-07,
616
+ "kl_weight": 1000000.0,
617
+ "learning_rate": 0.00046514575411913815,
618
+ "loss": 2.6164743900299072,
619
+ "step": 61,
620
+ "total_loss": 2.736239433288574
621
+ },
622
+ {
623
+ "epoch": 0.15636822194199243,
624
+ "grad_norm": 39.79656982421875,
625
+ "kl_loss": 9.629880537431745e-08,
626
+ "kl_weight": 1000000.0,
627
+ "learning_rate": 0.00046451204055766794,
628
+ "loss": 2.2402400970458984,
629
+ "step": 62,
630
+ "total_loss": 2.336538791656494
631
+ },
632
+ {
633
+ "epoch": 0.15889029003783103,
634
+ "grad_norm": 40.427127838134766,
635
+ "kl_loss": 1.266524236598343e-07,
636
+ "kl_weight": 1000000.0,
637
+ "learning_rate": 0.00046387832699619773,
638
+ "loss": 2.634199857711792,
639
+ "step": 63,
640
+ "total_loss": 2.760852336883545
641
+ },
642
+ {
643
+ "epoch": 0.1614123581336696,
644
+ "grad_norm": 45.250335693359375,
645
+ "kl_loss": 1.2385322634145268e-07,
646
+ "kl_weight": 1000000.0,
647
+ "learning_rate": 0.0004632446134347275,
648
+ "loss": 2.8654606342315674,
649
+ "step": 64,
650
+ "total_loss": 2.989313840866089
651
+ },
652
+ {
653
+ "epoch": 0.16393442622950818,
654
+ "grad_norm": 41.70964050292969,
655
+ "kl_loss": 8.554673769367582e-08,
656
+ "kl_weight": 1000000.0,
657
+ "learning_rate": 0.00046261089987325725,
658
+ "loss": 2.4218544960021973,
659
+ "step": 65,
660
+ "total_loss": 2.50740122795105
661
+ },
662
+ {
663
+ "epoch": 0.1664564943253468,
664
+ "grad_norm": 48.33979415893555,
665
+ "kl_loss": 8.96402525540907e-08,
666
+ "kl_weight": 1000000.0,
667
+ "learning_rate": 0.0004619771863117871,
668
+ "loss": 2.1502766609191895,
669
+ "step": 66,
670
+ "total_loss": 2.2399168014526367
671
+ },
672
+ {
673
+ "epoch": 0.16897856242118536,
674
+ "grad_norm": 49.24103546142578,
675
+ "kl_loss": 1.0698733632352742e-07,
676
+ "kl_weight": 1000000.0,
677
+ "learning_rate": 0.00046134347275031683,
678
+ "loss": 2.373239278793335,
679
+ "step": 67,
680
+ "total_loss": 2.480226516723633
681
+ },
682
+ {
683
+ "epoch": 0.17150063051702397,
684
+ "grad_norm": 47.207801818847656,
685
+ "kl_loss": 1.1443621161788542e-07,
686
+ "kl_weight": 1000000.0,
687
+ "learning_rate": 0.00046070975918884667,
688
+ "loss": 2.083575963973999,
689
+ "step": 68,
690
+ "total_loss": 2.198012113571167
691
+ },
692
+ {
693
+ "epoch": 0.17402269861286254,
694
+ "grad_norm": 55.49308395385742,
695
+ "kl_loss": 7.693908798955817e-08,
696
+ "kl_weight": 1000000.0,
697
+ "learning_rate": 0.0004600760456273764,
698
+ "loss": 2.3746559619903564,
699
+ "step": 69,
700
+ "total_loss": 2.4515950679779053
701
+ },
702
+ {
703
+ "epoch": 0.17654476670870115,
704
+ "grad_norm": 35.293094635009766,
705
+ "kl_loss": 1.1812133493549481e-07,
706
+ "kl_weight": 1000000.0,
707
+ "learning_rate": 0.00045944233206590625,
708
+ "loss": 2.300846576690674,
709
+ "step": 70,
710
+ "total_loss": 2.4189679622650146
711
+ },
712
+ {
713
+ "epoch": 0.17906683480453972,
714
+ "grad_norm": 40.90177536010742,
715
+ "kl_loss": 1.278984171904085e-07,
716
+ "kl_weight": 1000000.0,
717
+ "learning_rate": 0.000458808618504436,
718
+ "loss": 2.6642138957977295,
719
+ "step": 71,
720
+ "total_loss": 2.792112350463867
721
+ },
722
+ {
723
+ "epoch": 0.18158890290037832,
724
+ "grad_norm": 35.18415832519531,
725
+ "kl_loss": 1.0427108065869106e-07,
726
+ "kl_weight": 1000000.0,
727
+ "learning_rate": 0.00045817490494296577,
728
+ "loss": 2.0037200450897217,
729
+ "step": 72,
730
+ "total_loss": 2.1079912185668945
731
+ },
732
+ {
733
+ "epoch": 0.1841109709962169,
734
+ "grad_norm": 58.994651794433594,
735
+ "kl_loss": 1.1237546715392455e-07,
736
+ "kl_weight": 1000000.0,
737
+ "learning_rate": 0.00045754119138149556,
738
+ "loss": 2.1798439025878906,
739
+ "step": 73,
740
+ "total_loss": 2.292219400405884
741
+ },
742
+ {
743
+ "epoch": 0.18663303909205547,
744
+ "grad_norm": 58.826744079589844,
745
+ "kl_loss": 1.2047219399846654e-07,
746
+ "kl_weight": 1000000.0,
747
+ "learning_rate": 0.00045690747782002535,
748
+ "loss": 2.182692527770996,
749
+ "step": 74,
750
+ "total_loss": 2.3031647205352783
751
+ },
752
+ {
753
+ "epoch": 0.18915510718789408,
754
+ "grad_norm": 48.294960021972656,
755
+ "kl_loss": 9.133592726584538e-08,
756
+ "kl_weight": 1000000.0,
757
+ "learning_rate": 0.00045627376425855514,
758
+ "loss": 2.640594005584717,
759
+ "step": 75,
760
+ "total_loss": 2.7319300174713135
761
+ },
762
+ {
763
+ "epoch": 0.19167717528373265,
764
+ "grad_norm": 42.91217803955078,
765
+ "kl_loss": 1.3158961564840865e-07,
766
+ "kl_weight": 1000000.0,
767
+ "learning_rate": 0.0004556400506970849,
768
+ "loss": 2.20942759513855,
769
+ "step": 76,
770
+ "total_loss": 2.341017246246338
771
+ },
772
+ {
773
+ "epoch": 0.19419924337957126,
774
+ "grad_norm": 37.41611099243164,
775
+ "kl_loss": 7.94594257058634e-08,
776
+ "kl_weight": 1000000.0,
777
+ "learning_rate": 0.0004550063371356147,
778
+ "loss": 1.8829660415649414,
779
+ "step": 77,
780
+ "total_loss": 1.9624254703521729
781
+ },
782
+ {
783
+ "epoch": 0.19672131147540983,
784
+ "grad_norm": 68.14502716064453,
785
+ "kl_loss": 6.981721156762433e-08,
786
+ "kl_weight": 1000000.0,
787
+ "learning_rate": 0.0004543726235741445,
788
+ "loss": 2.176638603210449,
789
+ "step": 78,
790
+ "total_loss": 2.2464559078216553
791
+ },
792
+ {
793
+ "epoch": 0.19924337957124844,
794
+ "grad_norm": 37.93901824951172,
795
+ "kl_loss": 1.4584460927835607e-07,
796
+ "kl_weight": 1000000.0,
797
+ "learning_rate": 0.00045373891001267424,
798
+ "loss": 2.3125131130218506,
799
+ "step": 79,
800
+ "total_loss": 2.458357810974121
801
+ },
802
+ {
803
+ "epoch": 0.201765447667087,
804
+ "grad_norm": 36.36228942871094,
805
+ "kl_loss": 1.0491366708720307e-07,
806
+ "kl_weight": 1000000.0,
807
+ "learning_rate": 0.0004531051964512041,
808
+ "loss": 2.669814348220825,
809
+ "step": 80,
810
+ "total_loss": 2.7747280597686768
811
+ },
812
+ {
813
+ "epoch": 0.2042875157629256,
814
+ "grad_norm": 33.21589279174805,
815
+ "kl_loss": 8.339785040334391e-08,
816
+ "kl_weight": 1000000.0,
817
+ "learning_rate": 0.0004524714828897338,
818
+ "loss": 2.1568803787231445,
819
+ "step": 81,
820
+ "total_loss": 2.2402782440185547
821
+ },
822
+ {
823
+ "epoch": 0.2068095838587642,
824
+ "grad_norm": 50.5767936706543,
825
+ "kl_loss": 8.075436142007675e-08,
826
+ "kl_weight": 1000000.0,
827
+ "learning_rate": 0.00045183776932826366,
828
+ "loss": 2.582383871078491,
829
+ "step": 82,
830
+ "total_loss": 2.6631381511688232
831
+ },
832
+ {
833
+ "epoch": 0.20933165195460277,
834
+ "grad_norm": 35.17644119262695,
835
+ "kl_loss": 7.244600652711597e-08,
836
+ "kl_weight": 1000000.0,
837
+ "learning_rate": 0.0004512040557667934,
838
+ "loss": 2.234858274459839,
839
+ "step": 83,
840
+ "total_loss": 2.3073043823242188
841
+ },
842
+ {
843
+ "epoch": 0.21185372005044137,
844
+ "grad_norm": 42.40107727050781,
845
+ "kl_loss": 1.1873476779555858e-07,
846
+ "kl_weight": 1000000.0,
847
+ "learning_rate": 0.00045057034220532323,
848
+ "loss": 2.455256462097168,
849
+ "step": 84,
850
+ "total_loss": 2.573991298675537
851
+ },
852
+ {
853
+ "epoch": 0.21437578814627994,
854
+ "grad_norm": 36.42718505859375,
855
+ "kl_loss": 1.1225206009157773e-07,
856
+ "kl_weight": 1000000.0,
857
+ "learning_rate": 0.00044993662864385297,
858
+ "loss": 2.2919607162475586,
859
+ "step": 85,
860
+ "total_loss": 2.404212713241577
861
+ },
862
+ {
863
+ "epoch": 0.21689785624211855,
864
+ "grad_norm": 53.86962127685547,
865
+ "kl_loss": 1.4159495265175792e-07,
866
+ "kl_weight": 1000000.0,
867
+ "learning_rate": 0.00044930291508238275,
868
+ "loss": 2.3607466220855713,
869
+ "step": 86,
870
+ "total_loss": 2.5023415088653564
871
+ },
872
+ {
873
+ "epoch": 0.21941992433795712,
874
+ "grad_norm": 38.30321502685547,
875
+ "kl_loss": 1.2286402295558219e-07,
876
+ "kl_weight": 1000000.0,
877
+ "learning_rate": 0.00044866920152091254,
878
+ "loss": 2.518197774887085,
879
+ "step": 87,
880
+ "total_loss": 2.641061782836914
881
+ },
882
+ {
883
+ "epoch": 0.22194199243379573,
884
+ "grad_norm": 39.55595016479492,
885
+ "kl_loss": 7.652983669004243e-08,
886
+ "kl_weight": 1000000.0,
887
+ "learning_rate": 0.00044803548795944233,
888
+ "loss": 2.497284173965454,
889
+ "step": 88,
890
+ "total_loss": 2.5738139152526855
891
+ },
892
+ {
893
+ "epoch": 0.2244640605296343,
894
+ "grad_norm": 54.330936431884766,
895
+ "kl_loss": 1.4088276145685086e-07,
896
+ "kl_weight": 1000000.0,
897
+ "learning_rate": 0.0004474017743979721,
898
+ "loss": 2.0035746097564697,
899
+ "step": 89,
900
+ "total_loss": 2.1444573402404785
901
+ },
902
+ {
903
+ "epoch": 0.22698612862547288,
904
+ "grad_norm": 115.77396392822266,
905
+ "kl_loss": 7.670182355923316e-08,
906
+ "kl_weight": 1000000.0,
907
+ "learning_rate": 0.0004467680608365019,
908
+ "loss": 2.504946708679199,
909
+ "step": 90,
910
+ "total_loss": 2.581648588180542
911
+ },
912
+ {
913
+ "epoch": 0.22950819672131148,
914
+ "grad_norm": 106.39173126220703,
915
+ "kl_loss": 1.4106633727806184e-07,
916
+ "kl_weight": 1000000.0,
917
+ "learning_rate": 0.0004461343472750317,
918
+ "loss": 2.12337064743042,
919
+ "step": 91,
920
+ "total_loss": 2.264436960220337
921
+ },
922
+ {
923
+ "epoch": 0.23203026481715006,
924
+ "grad_norm": 65.24938201904297,
925
+ "kl_loss": 9.111739984746237e-08,
926
+ "kl_weight": 1000000.0,
927
+ "learning_rate": 0.0004455006337135615,
928
+ "loss": 2.0131237506866455,
929
+ "step": 92,
930
+ "total_loss": 2.104241132736206
931
+ },
932
+ {
933
+ "epoch": 0.23455233291298866,
934
+ "grad_norm": 44.74656295776367,
935
+ "kl_loss": 1.4964980721288157e-07,
936
+ "kl_weight": 1000000.0,
937
+ "learning_rate": 0.0004448669201520912,
938
+ "loss": 2.265284776687622,
939
+ "step": 93,
940
+ "total_loss": 2.4149346351623535
941
+ },
942
+ {
943
+ "epoch": 0.23707440100882723,
944
+ "grad_norm": 91.94232940673828,
945
+ "kl_loss": 9.062223682576587e-08,
946
+ "kl_weight": 1000000.0,
947
+ "learning_rate": 0.00044423320659062106,
948
+ "loss": 2.1661198139190674,
949
+ "step": 94,
950
+ "total_loss": 2.256742000579834
951
+ },
952
+ {
953
+ "epoch": 0.23959646910466584,
954
+ "grad_norm": 93.24700164794922,
955
+ "kl_loss": 9.902019826313335e-08,
956
+ "kl_weight": 1000000.0,
957
+ "learning_rate": 0.0004435994930291508,
958
+ "loss": 2.3344357013702393,
959
+ "step": 95,
960
+ "total_loss": 2.4334559440612793
961
+ },
962
+ {
963
+ "epoch": 0.2421185372005044,
964
+ "grad_norm": 121.62272644042969,
965
+ "kl_loss": 7.655695810626639e-08,
966
+ "kl_weight": 1000000.0,
967
+ "learning_rate": 0.00044296577946768064,
968
+ "loss": 2.6050221920013428,
969
+ "step": 96,
970
+ "total_loss": 2.681579113006592
971
+ },
972
+ {
973
+ "epoch": 0.244640605296343,
974
+ "grad_norm": 46.2025260925293,
975
+ "kl_loss": 1.436859946579716e-07,
976
+ "kl_weight": 1000000.0,
977
+ "learning_rate": 0.00044233206590621037,
978
+ "loss": 2.530095100402832,
979
+ "step": 97,
980
+ "total_loss": 2.673781156539917
981
+ },
982
+ {
983
+ "epoch": 0.2471626733921816,
984
+ "grad_norm": 113.35319519042969,
985
+ "kl_loss": 1.0582026988004145e-07,
986
+ "kl_weight": 1000000.0,
987
+ "learning_rate": 0.0004416983523447402,
988
+ "loss": 2.698303461074829,
989
+ "step": 98,
990
+ "total_loss": 2.804123640060425
991
+ },
992
+ {
993
+ "epoch": 0.24968474148802017,
994
+ "grad_norm": 102.921875,
995
+ "kl_loss": 1.3178673441416322e-07,
996
+ "kl_weight": 1000000.0,
997
+ "learning_rate": 0.00044106463878326995,
998
+ "loss": 2.6428942680358887,
999
+ "step": 99,
1000
+ "total_loss": 2.7746810913085938
1001
+ },
1002
+ {
1003
+ "epoch": 0.25220680958385877,
1004
+ "grad_norm": 79.49183654785156,
1005
+ "kl_loss": 1.0406408534890943e-07,
1006
+ "kl_weight": 1000000.0,
1007
+ "learning_rate": 0.0004404309252217998,
1008
+ "loss": 1.7494869232177734,
1009
+ "step": 100,
1010
+ "total_loss": 1.8535510301589966
1011
+ },
1012
+ {
1013
+ "epoch": 0.25220680958385877,
1014
+ "eval_loss": 2.8745031356811523,
1015
+ "eval_runtime": 36.3348,
1016
+ "eval_samples_per_second": 19.403,
1017
+ "eval_steps_per_second": 2.449,
1018
+ "step": 100
1019
+ },
1020
+ {
1021
+ "epoch": 0.2547288776796974,
1022
+ "grad_norm": 52.9495849609375,
1023
+ "kl_loss": 8.112144200822513e-08,
1024
+ "kl_weight": 1000000.0,
1025
+ "learning_rate": 0.0004397972116603295,
1026
+ "loss": 2.6726205348968506,
1027
+ "step": 101,
1028
+ "total_loss": 2.753741979598999
1029
+ },
1030
+ {
1031
+ "epoch": 0.2572509457755359,
1032
+ "grad_norm": 43.57243728637695,
1033
+ "kl_loss": 8.97685552558869e-08,
1034
+ "kl_weight": 1000000.0,
1035
+ "learning_rate": 0.0004391634980988593,
1036
+ "loss": 2.557943105697632,
1037
+ "step": 102,
1038
+ "total_loss": 2.647711753845215
1039
+ },
1040
+ {
1041
+ "epoch": 0.2597730138713745,
1042
+ "grad_norm": 89.02836608886719,
1043
+ "kl_loss": 7.308499050395767e-08,
1044
+ "kl_weight": 1000000.0,
1045
+ "learning_rate": 0.0004385297845373891,
1046
+ "loss": 2.3326590061187744,
1047
+ "step": 103,
1048
+ "total_loss": 2.4057440757751465
1049
+ },
1050
+ {
1051
+ "epoch": 0.26229508196721313,
1052
+ "grad_norm": 70.54704284667969,
1053
+ "kl_loss": 1.186173719247563e-07,
1054
+ "kl_weight": 1000000.0,
1055
+ "learning_rate": 0.0004378960709759189,
1056
+ "loss": 2.48604154586792,
1057
+ "step": 104,
1058
+ "total_loss": 2.604658842086792
1059
+ },
1060
+ {
1061
+ "epoch": 0.2648171500630517,
1062
+ "grad_norm": 56.15437316894531,
1063
+ "kl_loss": 1.0662374450021161e-07,
1064
+ "kl_weight": 1000000.0,
1065
+ "learning_rate": 0.0004372623574144487,
1066
+ "loss": 2.398261070251465,
1067
+ "step": 105,
1068
+ "total_loss": 2.504884719848633
1069
+ },
1070
+ {
1071
+ "epoch": 0.2673392181588903,
1072
+ "grad_norm": 50.12539291381836,
1073
+ "kl_loss": 1.2759885237301205e-07,
1074
+ "kl_weight": 1000000.0,
1075
+ "learning_rate": 0.00043662864385297847,
1076
+ "loss": 2.3349037170410156,
1077
+ "step": 106,
1078
+ "total_loss": 2.4625024795532227
1079
+ },
1080
+ {
1081
+ "epoch": 0.2698612862547289,
1082
+ "grad_norm": 58.65528106689453,
1083
+ "kl_loss": 6.978748245956012e-08,
1084
+ "kl_weight": 1000000.0,
1085
+ "learning_rate": 0.00043599493029150826,
1086
+ "loss": 2.520481586456299,
1087
+ "step": 107,
1088
+ "total_loss": 2.590269088745117
1089
+ },
1090
+ {
1091
+ "epoch": 0.2723833543505675,
1092
+ "grad_norm": 114.07057189941406,
1093
+ "kl_loss": 2.180455993538999e-07,
1094
+ "kl_weight": 1000000.0,
1095
+ "learning_rate": 0.00043536121673003804,
1096
+ "loss": 2.2573392391204834,
1097
+ "step": 108,
1098
+ "total_loss": 2.4753847122192383
1099
+ },
1100
+ {
1101
+ "epoch": 0.27490542244640603,
1102
+ "grad_norm": 42.89162063598633,
1103
+ "kl_loss": 1.3848097069057985e-07,
1104
+ "kl_weight": 1000000.0,
1105
+ "learning_rate": 0.0004347275031685678,
1106
+ "loss": 2.557804822921753,
1107
+ "step": 109,
1108
+ "total_loss": 2.6962857246398926
1109
+ },
1110
+ {
1111
+ "epoch": 0.27742749054224464,
1112
+ "grad_norm": 34.359859466552734,
1113
+ "kl_loss": 7.828553094668678e-08,
1114
+ "kl_weight": 1000000.0,
1115
+ "learning_rate": 0.0004340937896070976,
1116
+ "loss": 2.673866033554077,
1117
+ "step": 110,
1118
+ "total_loss": 2.7521514892578125
1119
+ },
1120
+ {
1121
+ "epoch": 0.27994955863808324,
1122
+ "grad_norm": 56.4673957824707,
1123
+ "kl_loss": 1.2365975976535992e-07,
1124
+ "kl_weight": 1000000.0,
1125
+ "learning_rate": 0.00043346007604562736,
1126
+ "loss": 2.275721311569214,
1127
+ "step": 111,
1128
+ "total_loss": 2.399381160736084
1129
+ },
1130
+ {
1131
+ "epoch": 0.28247162673392184,
1132
+ "grad_norm": 80.7730484008789,
1133
+ "kl_loss": 7.710952587558495e-08,
1134
+ "kl_weight": 1000000.0,
1135
+ "learning_rate": 0.0004328263624841572,
1136
+ "loss": 2.3654656410217285,
1137
+ "step": 112,
1138
+ "total_loss": 2.442575216293335
1139
+ },
1140
+ {
1141
+ "epoch": 0.2849936948297604,
1142
+ "grad_norm": 78.50458526611328,
1143
+ "kl_loss": 1.1497648699787533e-07,
1144
+ "kl_weight": 1000000.0,
1145
+ "learning_rate": 0.00043219264892268693,
1146
+ "loss": 2.0045268535614014,
1147
+ "step": 113,
1148
+ "total_loss": 2.1195032596588135
1149
+ },
1150
+ {
1151
+ "epoch": 0.287515762925599,
1152
+ "grad_norm": 42.11017990112305,
1153
+ "kl_loss": 1.2359024026409315e-07,
1154
+ "kl_weight": 1000000.0,
1155
+ "learning_rate": 0.0004315589353612168,
1156
+ "loss": 2.8311715126037598,
1157
+ "step": 114,
1158
+ "total_loss": 2.9547617435455322
1159
+ },
1160
+ {
1161
+ "epoch": 0.2900378310214376,
1162
+ "grad_norm": 46.80673599243164,
1163
+ "kl_loss": 1.1356344487012393e-07,
1164
+ "kl_weight": 1000000.0,
1165
+ "learning_rate": 0.0004309252217997465,
1166
+ "loss": 2.4908668994903564,
1167
+ "step": 115,
1168
+ "total_loss": 2.6044304370880127
1169
+ },
1170
+ {
1171
+ "epoch": 0.29255989911727615,
1172
+ "grad_norm": 37.80940628051758,
1173
+ "kl_loss": 8.934256356951664e-08,
1174
+ "kl_weight": 1000000.0,
1175
+ "learning_rate": 0.0004302915082382763,
1176
+ "loss": 2.5639941692352295,
1177
+ "step": 116,
1178
+ "total_loss": 2.653336763381958
1179
+ },
1180
+ {
1181
+ "epoch": 0.29508196721311475,
1182
+ "grad_norm": 40.135955810546875,
1183
+ "kl_loss": 9.139830581261776e-08,
1184
+ "kl_weight": 1000000.0,
1185
+ "learning_rate": 0.0004296577946768061,
1186
+ "loss": 2.3605127334594727,
1187
+ "step": 117,
1188
+ "total_loss": 2.451910972595215
1189
+ },
1190
+ {
1191
+ "epoch": 0.29760403530895335,
1192
+ "grad_norm": 34.20405197143555,
1193
+ "kl_loss": 6.737117530519754e-08,
1194
+ "kl_weight": 1000000.0,
1195
+ "learning_rate": 0.0004290240811153359,
1196
+ "loss": 2.2198715209960938,
1197
+ "step": 118,
1198
+ "total_loss": 2.2872426509857178
1199
+ },
1200
+ {
1201
+ "epoch": 0.30012610340479196,
1202
+ "grad_norm": 38.27109909057617,
1203
+ "kl_loss": 7.206848096075191e-08,
1204
+ "kl_weight": 1000000.0,
1205
+ "learning_rate": 0.00042839036755386566,
1206
+ "loss": 2.442990779876709,
1207
+ "step": 119,
1208
+ "total_loss": 2.515059232711792
1209
+ },
1210
+ {
1211
+ "epoch": 0.3026481715006305,
1212
+ "grad_norm": 33.383628845214844,
1213
+ "kl_loss": 5.8401464997359653e-08,
1214
+ "kl_weight": 1000000.0,
1215
+ "learning_rate": 0.00042775665399239545,
1216
+ "loss": 1.9709354639053345,
1217
+ "step": 120,
1218
+ "total_loss": 2.029336929321289
1219
+ },
1220
+ {
1221
+ "epoch": 0.3051702395964691,
1222
+ "grad_norm": 29.64325714111328,
1223
+ "kl_loss": 7.080717523422209e-08,
1224
+ "kl_weight": 1000000.0,
1225
+ "learning_rate": 0.00042712294043092524,
1226
+ "loss": 2.5838398933410645,
1227
+ "step": 121,
1228
+ "total_loss": 2.6546471118927
1229
+ },
1230
+ {
1231
+ "epoch": 0.3076923076923077,
1232
+ "grad_norm": 31.308616638183594,
1233
+ "kl_loss": 7.158789117056585e-08,
1234
+ "kl_weight": 1000000.0,
1235
+ "learning_rate": 0.00042648922686945503,
1236
+ "loss": 2.582289457321167,
1237
+ "step": 122,
1238
+ "total_loss": 2.6538772583007812
1239
+ },
1240
+ {
1241
+ "epoch": 0.31021437578814626,
1242
+ "grad_norm": 31.91105842590332,
1243
+ "kl_loss": 5.958595039601278e-08,
1244
+ "kl_weight": 1000000.0,
1245
+ "learning_rate": 0.00042585551330798476,
1246
+ "loss": 2.362175703048706,
1247
+ "step": 123,
1248
+ "total_loss": 2.4217617511749268
1249
+ },
1250
+ {
1251
+ "epoch": 0.31273644388398486,
1252
+ "grad_norm": 48.74770736694336,
1253
+ "kl_loss": 1.0601585387348678e-07,
1254
+ "kl_weight": 1000000.0,
1255
+ "learning_rate": 0.0004252217997465146,
1256
+ "loss": 2.6029539108276367,
1257
+ "step": 124,
1258
+ "total_loss": 2.708969831466675
1259
+ },
1260
+ {
1261
+ "epoch": 0.31525851197982346,
1262
+ "grad_norm": 37.71382141113281,
1263
+ "kl_loss": 7.022106984777565e-08,
1264
+ "kl_weight": 1000000.0,
1265
+ "learning_rate": 0.00042458808618504434,
1266
+ "loss": 1.9780246019363403,
1267
+ "step": 125,
1268
+ "total_loss": 2.048245668411255
1269
+ },
1270
+ {
1271
+ "epoch": 0.31778058007566207,
1272
+ "grad_norm": 58.47407531738281,
1273
+ "kl_loss": 7.89053729022271e-08,
1274
+ "kl_weight": 1000000.0,
1275
+ "learning_rate": 0.0004239543726235742,
1276
+ "loss": 3.0634782314300537,
1277
+ "step": 126,
1278
+ "total_loss": 3.142383575439453
1279
+ },
1280
+ {
1281
+ "epoch": 0.3203026481715006,
1282
+ "grad_norm": 49.75334167480469,
1283
+ "kl_loss": 9.529335898150748e-08,
1284
+ "kl_weight": 1000000.0,
1285
+ "learning_rate": 0.0004233206590621039,
1286
+ "loss": 2.625107765197754,
1287
+ "step": 127,
1288
+ "total_loss": 2.7204010486602783
1289
+ },
1290
+ {
1291
+ "epoch": 0.3228247162673392,
1292
+ "grad_norm": 37.173675537109375,
1293
+ "kl_loss": 9.76136007579953e-08,
1294
+ "kl_weight": 1000000.0,
1295
+ "learning_rate": 0.00042268694550063376,
1296
+ "loss": 2.3797764778137207,
1297
+ "step": 128,
1298
+ "total_loss": 2.4773900508880615
1299
+ },
1300
+ {
1301
+ "epoch": 0.3253467843631778,
1302
+ "grad_norm": 35.94040298461914,
1303
+ "kl_loss": 8.867663581213492e-08,
1304
+ "kl_weight": 1000000.0,
1305
+ "learning_rate": 0.0004220532319391635,
1306
+ "loss": 2.19878888130188,
1307
+ "step": 129,
1308
+ "total_loss": 2.2874655723571777
1309
+ },
1310
+ {
1311
+ "epoch": 0.32786885245901637,
1312
+ "grad_norm": 30.7440128326416,
1313
+ "kl_loss": 8.357946512660419e-08,
1314
+ "kl_weight": 1000000.0,
1315
+ "learning_rate": 0.0004214195183776933,
1316
+ "loss": 2.374424934387207,
1317
+ "step": 130,
1318
+ "total_loss": 2.4580044746398926
1319
+ },
1320
+ {
1321
+ "epoch": 0.33039092055485497,
1322
+ "grad_norm": 44.41523361206055,
1323
+ "kl_loss": 7.932466417059914e-08,
1324
+ "kl_weight": 1000000.0,
1325
+ "learning_rate": 0.00042078580481622307,
1326
+ "loss": 2.270353078842163,
1327
+ "step": 131,
1328
+ "total_loss": 2.349677801132202
1329
+ },
1330
+ {
1331
+ "epoch": 0.3329129886506936,
1332
+ "grad_norm": 50.61003494262695,
1333
+ "kl_loss": 7.062011064817852e-08,
1334
+ "kl_weight": 1000000.0,
1335
+ "learning_rate": 0.00042015209125475286,
1336
+ "loss": 2.704482078552246,
1337
+ "step": 132,
1338
+ "total_loss": 2.775102138519287
1339
+ },
1340
+ {
1341
+ "epoch": 0.3354350567465322,
1342
+ "grad_norm": 52.31584930419922,
1343
+ "kl_loss": 8.649838179053404e-08,
1344
+ "kl_weight": 1000000.0,
1345
+ "learning_rate": 0.00041951837769328265,
1346
+ "loss": 2.623465061187744,
1347
+ "step": 133,
1348
+ "total_loss": 2.709963321685791
1349
+ },
1350
+ {
1351
+ "epoch": 0.3379571248423707,
1352
+ "grad_norm": 44.85032653808594,
1353
+ "kl_loss": 7.965866899439789e-08,
1354
+ "kl_weight": 1000000.0,
1355
+ "learning_rate": 0.00041888466413181243,
1356
+ "loss": 2.3895857334136963,
1357
+ "step": 134,
1358
+ "total_loss": 2.4692444801330566
1359
+ },
1360
+ {
1361
+ "epoch": 0.34047919293820933,
1362
+ "grad_norm": 52.73245620727539,
1363
+ "kl_loss": 9.069820805507334e-08,
1364
+ "kl_weight": 1000000.0,
1365
+ "learning_rate": 0.0004182509505703422,
1366
+ "loss": 2.5411245822906494,
1367
+ "step": 135,
1368
+ "total_loss": 2.6318228244781494
1369
+ },
1370
+ {
1371
+ "epoch": 0.34300126103404793,
1372
+ "grad_norm": 54.24882507324219,
1373
+ "kl_loss": 9.488780960964505e-08,
1374
+ "kl_weight": 1000000.0,
1375
+ "learning_rate": 0.000417617237008872,
1376
+ "loss": 1.959804654121399,
1377
+ "step": 136,
1378
+ "total_loss": 2.054692506790161
1379
+ },
1380
+ {
1381
+ "epoch": 0.3455233291298865,
1382
+ "grad_norm": 80.58323669433594,
1383
+ "kl_loss": 1.1013327139153262e-07,
1384
+ "kl_weight": 1000000.0,
1385
+ "learning_rate": 0.00041698352344740175,
1386
+ "loss": 2.191988229751587,
1387
+ "step": 137,
1388
+ "total_loss": 2.30212140083313
1389
+ },
1390
+ {
1391
+ "epoch": 0.3480453972257251,
1392
+ "grad_norm": 94.20862579345703,
1393
+ "kl_loss": 4.3523357362573734e-08,
1394
+ "kl_weight": 1000000.0,
1395
+ "learning_rate": 0.0004163498098859316,
1396
+ "loss": 2.3833768367767334,
1397
+ "step": 138,
1398
+ "total_loss": 2.4269001483917236
1399
+ },
1400
+ {
1401
+ "epoch": 0.3505674653215637,
1402
+ "grad_norm": 57.289390563964844,
1403
+ "kl_loss": 6.378383687888345e-08,
1404
+ "kl_weight": 1000000.0,
1405
+ "learning_rate": 0.0004157160963244613,
1406
+ "loss": 2.5576775074005127,
1407
+ "step": 139,
1408
+ "total_loss": 2.6214613914489746
1409
+ },
1410
+ {
1411
+ "epoch": 0.3530895334174023,
1412
+ "grad_norm": 108.49940490722656,
1413
+ "kl_loss": 1.0705950614919857e-07,
1414
+ "kl_weight": 1000000.0,
1415
+ "learning_rate": 0.00041508238276299116,
1416
+ "loss": 2.5679421424865723,
1417
+ "step": 140,
1418
+ "total_loss": 2.675001621246338
1419
+ },
1420
+ {
1421
+ "epoch": 0.35561160151324084,
1422
+ "grad_norm": 45.74307632446289,
1423
+ "kl_loss": 1.295888125696365e-07,
1424
+ "kl_weight": 1000000.0,
1425
+ "learning_rate": 0.0004144486692015209,
1426
+ "loss": 2.040994882583618,
1427
+ "step": 141,
1428
+ "total_loss": 2.170583724975586
1429
+ },
1430
+ {
1431
+ "epoch": 0.35813366960907944,
1432
+ "grad_norm": 43.9136962890625,
1433
+ "kl_loss": 1.1324662807510322e-07,
1434
+ "kl_weight": 1000000.0,
1435
+ "learning_rate": 0.00041381495564005074,
1436
+ "loss": 2.430799961090088,
1437
+ "step": 142,
1438
+ "total_loss": 2.544046640396118
1439
+ },
1440
+ {
1441
+ "epoch": 0.36065573770491804,
1442
+ "grad_norm": 37.08522415161133,
1443
+ "kl_loss": 1.0119717330780986e-07,
1444
+ "kl_weight": 1000000.0,
1445
+ "learning_rate": 0.0004131812420785805,
1446
+ "loss": 2.4721150398254395,
1447
+ "step": 143,
1448
+ "total_loss": 2.573312282562256
1449
+ },
1450
+ {
1451
+ "epoch": 0.36317780580075665,
1452
+ "grad_norm": 30.434844970703125,
1453
+ "kl_loss": 8.321502775743284e-08,
1454
+ "kl_weight": 1000000.0,
1455
+ "learning_rate": 0.00041254752851711026,
1456
+ "loss": 2.430828094482422,
1457
+ "step": 144,
1458
+ "total_loss": 2.514043092727661
1459
+ },
1460
+ {
1461
+ "epoch": 0.3656998738965952,
1462
+ "grad_norm": 31.82435417175293,
1463
+ "kl_loss": 7.403247082038433e-08,
1464
+ "kl_weight": 1000000.0,
1465
+ "learning_rate": 0.00041191381495564005,
1466
+ "loss": 2.4657552242279053,
1467
+ "step": 145,
1468
+ "total_loss": 2.539787769317627
1469
+ }
1470
+ ],
1471
+ "logging_steps": 1,
1472
+ "max_steps": 794,
1473
+ "num_input_tokens_seen": 0,
1474
+ "num_train_epochs": 2,
1475
+ "save_steps": 5,
1476
+ "stateful_callbacks": {
1477
+ "TrainerControl": {
1478
+ "args": {
1479
+ "should_epoch_stop": false,
1480
+ "should_evaluate": false,
1481
+ "should_log": false,
1482
+ "should_save": false,
1483
+ "should_training_stop": false
1484
+ },
1485
+ "attributes": {}
1486
+ }
1487
+ },
1488
+ "total_flos": 4.9855070208e+16,
1489
+ "train_batch_size": 2,
1490
+ "trial_name": null,
1491
+ "trial_params": null
1492
+ }
checkpoints/checkpoint-145/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-15/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1a63186e1831fee81225bf6cd47d27992b873c25909a295f22f84e99b453aa2
3
+ size 6901
checkpoints/checkpoint-15/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d81fc9bea09b0c1522656e1033f2911c1b33b7fe2d86027dccd20039b0310444
3
+ size 22241
checkpoints/checkpoint-15/trainer_state.json ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.03783102143757881,
6
+ "eval_steps": 100,
7
+ "global_step": 15,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.0025220680958385876,
14
+ "grad_norm": 119.76318359375,
15
+ "kl_loss": -1.1687562835330993e-15,
16
+ "kl_weight": 1000000.0,
17
+ "learning_rate": 0.0,
18
+ "loss": 2.6394360065460205,
19
+ "step": 1,
20
+ "total_loss": 2.6394360065460205
21
+ },
22
+ {
23
+ "epoch": 0.005044136191677175,
24
+ "grad_norm": 116.01831817626953,
25
+ "kl_loss": -1.280914393650412e-14,
26
+ "kl_weight": 1000000.0,
27
+ "learning_rate": 0.0001,
28
+ "loss": 3.2936160564422607,
29
+ "step": 2,
30
+ "total_loss": 3.2936160564422607
31
+ },
32
+ {
33
+ "epoch": 0.007566204287515763,
34
+ "grad_norm": 104.04817962646484,
35
+ "kl_loss": 7.10318071028837e-09,
36
+ "kl_weight": 1000000.0,
37
+ "learning_rate": 0.0002,
38
+ "loss": 3.084439992904663,
39
+ "step": 3,
40
+ "total_loss": 3.091543197631836
41
+ },
42
+ {
43
+ "epoch": 0.01008827238335435,
44
+ "grad_norm": 68.36679077148438,
45
+ "kl_loss": 2.8489626657801637e-08,
46
+ "kl_weight": 1000000.0,
47
+ "learning_rate": 0.0003,
48
+ "loss": 3.105210304260254,
49
+ "step": 4,
50
+ "total_loss": 3.133699893951416
51
+ },
52
+ {
53
+ "epoch": 0.012610340479192938,
54
+ "grad_norm": 61.00284957885742,
55
+ "kl_loss": 4.923957774849441e-08,
56
+ "kl_weight": 1000000.0,
57
+ "learning_rate": 0.0004,
58
+ "loss": 3.345022678375244,
59
+ "step": 5,
60
+ "total_loss": 3.3942623138427734
61
+ },
62
+ {
63
+ "epoch": 0.015132408575031526,
64
+ "grad_norm": 65.48960876464844,
65
+ "kl_loss": 1.43211394743048e-07,
66
+ "kl_weight": 1000000.0,
67
+ "learning_rate": 0.0005,
68
+ "loss": 2.3467514514923096,
69
+ "step": 6,
70
+ "total_loss": 2.4899628162384033
71
+ },
72
+ {
73
+ "epoch": 0.017654476670870115,
74
+ "grad_norm": 63.001102447509766,
75
+ "kl_loss": 9.109995602329946e-08,
76
+ "kl_weight": 1000000.0,
77
+ "learning_rate": 0.0004993662864385298,
78
+ "loss": 2.5077083110809326,
79
+ "step": 7,
80
+ "total_loss": 2.5988082885742188
81
+ },
82
+ {
83
+ "epoch": 0.0201765447667087,
84
+ "grad_norm": 58.6073112487793,
85
+ "kl_loss": 2.3511624647198914e-07,
86
+ "kl_weight": 1000000.0,
87
+ "learning_rate": 0.0004987325728770596,
88
+ "loss": 2.2668278217315674,
89
+ "step": 8,
90
+ "total_loss": 2.501944065093994
91
+ },
92
+ {
93
+ "epoch": 0.02269861286254729,
94
+ "grad_norm": 97.743896484375,
95
+ "kl_loss": 2.1175161180053692e-07,
96
+ "kl_weight": 1000000.0,
97
+ "learning_rate": 0.0004980988593155894,
98
+ "loss": 2.352029800415039,
99
+ "step": 9,
100
+ "total_loss": 2.563781499862671
101
+ },
102
+ {
103
+ "epoch": 0.025220680958385876,
104
+ "grad_norm": 60.91500473022461,
105
+ "kl_loss": 1.2846226127294358e-07,
106
+ "kl_weight": 1000000.0,
107
+ "learning_rate": 0.0004974651457541192,
108
+ "loss": 2.2376697063446045,
109
+ "step": 10,
110
+ "total_loss": 2.3661320209503174
111
+ },
112
+ {
113
+ "epoch": 0.027742749054224466,
114
+ "grad_norm": 55.095516204833984,
115
+ "kl_loss": 1.4181343033214944e-07,
116
+ "kl_weight": 1000000.0,
117
+ "learning_rate": 0.000496831432192649,
118
+ "loss": 2.8243818283081055,
119
+ "step": 11,
120
+ "total_loss": 2.9661953449249268
121
+ },
122
+ {
123
+ "epoch": 0.03026481715006305,
124
+ "grad_norm": 44.97727966308594,
125
+ "kl_loss": 1.545683971926337e-07,
126
+ "kl_weight": 1000000.0,
127
+ "learning_rate": 0.0004961977186311787,
128
+ "loss": 2.4689197540283203,
129
+ "step": 12,
130
+ "total_loss": 2.623488187789917
131
+ },
132
+ {
133
+ "epoch": 0.03278688524590164,
134
+ "grad_norm": 51.62504196166992,
135
+ "kl_loss": 2.2357993145760702e-07,
136
+ "kl_weight": 1000000.0,
137
+ "learning_rate": 0.0004955640050697085,
138
+ "loss": 2.2227847576141357,
139
+ "step": 13,
140
+ "total_loss": 2.446364641189575
141
+ },
142
+ {
143
+ "epoch": 0.03530895334174023,
144
+ "grad_norm": 42.21575927734375,
145
+ "kl_loss": 1.6229765265052265e-07,
146
+ "kl_weight": 1000000.0,
147
+ "learning_rate": 0.0004949302915082382,
148
+ "loss": 2.4396450519561768,
149
+ "step": 14,
150
+ "total_loss": 2.601942777633667
151
+ },
152
+ {
153
+ "epoch": 0.03783102143757881,
154
+ "grad_norm": 40.02684783935547,
155
+ "kl_loss": 1.4151250127270032e-07,
156
+ "kl_weight": 1000000.0,
157
+ "learning_rate": 0.0004942965779467681,
158
+ "loss": 2.509690761566162,
159
+ "step": 15,
160
+ "total_loss": 2.651203155517578
161
+ }
162
+ ],
163
+ "logging_steps": 1,
164
+ "max_steps": 794,
165
+ "num_input_tokens_seen": 0,
166
+ "num_train_epochs": 2,
167
+ "save_steps": 5,
168
+ "stateful_callbacks": {
169
+ "TrainerControl": {
170
+ "args": {
171
+ "should_epoch_stop": false,
172
+ "should_evaluate": false,
173
+ "should_log": false,
174
+ "should_save": false,
175
+ "should_training_stop": false
176
+ },
177
+ "attributes": {}
178
+ }
179
+ },
180
+ "total_flos": 5157421056000000.0,
181
+ "train_batch_size": 2,
182
+ "trial_name": null,
183
+ "trial_params": null
184
+ }
checkpoints/checkpoint-15/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62465250eb2a1f055f36fe52b462f0d54a69dad86561444a881da53ad99e7862
3
+ size 5777
checkpoints/checkpoint-150/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e9c14bb7ffa28063282ef286b151c1799c66569f368152efd9854ada08f9b8
3
+ size 6901
checkpoints/checkpoint-150/steering_vector.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab895f3ce2f398f496cc4bbe17afabea385f960ee9b815024a425076828186c0
3
+ size 22241