rootxhacker commited on
Commit
6e303a7
·
verified ·
1 Parent(s): 807cd37

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cdda364a7dea7ba94515761ffb2ace46580132bb59bff458e83e48f877aff5a
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a6231b9a43188b3030be892c139d7894a62c3fea99c8c6c7eb17b00365a57d7
3
  size 36730224
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9f18b53100851e8533c053a9a05241ff32f7709a9624b367b683d0de95176f9
3
  size 1544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61e1d1dea84b6c812c4ddd82f2f7f4ea581df5126382ca1bf3a48d5758288836
3
  size 1544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56779e621a4c332b005c80c0b7213bbef6847256028ea00b31ba836b8203e11e
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:923a3b0ad55df1dc931abd14ea79f0c2ae7566e46df76993edc14d667615816a
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d00cc8448e3ffb4901f6ba77807b5c278ba2af145c1beeacecc1ec2179e63f6
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42de46b2e4c5179833d46fe28b80f469c3d40255a80ec3a104ef7ad1fd3d8210
3
+ size 14180
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08e8e3095f52bd6edeff23718f3d5712489fc946ab10f7e28fb30a543dc6baa8
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3adce9cc9139db0a326056a6714b81b8c58d21b77098b37dae23cf33f822452c
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ca71486ea7bff6a3b5fb792928d9943140a9a2fdb3589f5685c55d472ab8413
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f03648c25873beacb3afe9bdbd6315225ba0d09c0be3adf22a9ad7dc42cafe47
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 500,
3
- "best_metric": 5.163902759552002,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-500",
5
- "epoch": 0.038458580109222366,
6
  "eval_steps": 250,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -94,6 +94,178 @@
94
  "eval_samples_per_second": 50.725,
95
  "eval_steps_per_second": 12.681,
96
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
  ],
99
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 1250,
3
+ "best_metric": 4.76948356628418,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-1000",
5
+ "epoch": 0.1153757403276671,
6
  "eval_steps": 250,
7
+ "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
94
  "eval_samples_per_second": 50.725,
95
  "eval_steps_per_second": 12.681,
96
  "step": 500
97
+ },
98
+ {
99
+ "epoch": 0.0423044381201446,
100
+ "grad_norm": 6.368561744689941,
101
+ "learning_rate": 0.0001997714463808015,
102
+ "loss": 5.0723,
103
+ "step": 550
104
+ },
105
+ {
106
+ "epoch": 0.04615029613106684,
107
+ "grad_norm": 4.666464328765869,
108
+ "learning_rate": 0.00019951172635898504,
109
+ "loss": 5.0754,
110
+ "step": 600
111
+ },
112
+ {
113
+ "epoch": 0.04999615414198908,
114
+ "grad_norm": 9.395013809204102,
115
+ "learning_rate": 0.00019925200633716855,
116
+ "loss": 4.7718,
117
+ "step": 650
118
+ },
119
+ {
120
+ "epoch": 0.05384201215291132,
121
+ "grad_norm": 4.279386520385742,
122
+ "learning_rate": 0.00019899228631535204,
123
+ "loss": 4.781,
124
+ "step": 700
125
+ },
126
+ {
127
+ "epoch": 0.05768787016383355,
128
+ "grad_norm": 4.274901866912842,
129
+ "learning_rate": 0.00019873256629353558,
130
+ "loss": 4.9529,
131
+ "step": 750
132
+ },
133
+ {
134
+ "epoch": 0.05768787016383355,
135
+ "eval_loss": 4.844481945037842,
136
+ "eval_runtime": 18.9171,
137
+ "eval_samples_per_second": 52.862,
138
+ "eval_steps_per_second": 13.216,
139
+ "step": 750
140
+ },
141
+ {
142
+ "epoch": 0.06153372817475579,
143
+ "grad_norm": 3.241445541381836,
144
+ "learning_rate": 0.0001984728462717191,
145
+ "loss": 5.0586,
146
+ "step": 800
147
+ },
148
+ {
149
+ "epoch": 0.06537958618567802,
150
+ "grad_norm": 5.880845546722412,
151
+ "learning_rate": 0.00019821312624990263,
152
+ "loss": 4.952,
153
+ "step": 850
154
+ },
155
+ {
156
+ "epoch": 0.06922544419660026,
157
+ "grad_norm": 3.81998610496521,
158
+ "learning_rate": 0.00019795340622808614,
159
+ "loss": 5.0012,
160
+ "step": 900
161
+ },
162
+ {
163
+ "epoch": 0.0730713022075225,
164
+ "grad_norm": 4.52741003036499,
165
+ "learning_rate": 0.00019769368620626966,
166
+ "loss": 4.7775,
167
+ "step": 950
168
+ },
169
+ {
170
+ "epoch": 0.07691716021844473,
171
+ "grad_norm": 27.68866729736328,
172
+ "learning_rate": 0.00019743916058488948,
173
+ "loss": 4.9852,
174
+ "step": 1000
175
+ },
176
+ {
177
+ "epoch": 0.07691716021844473,
178
+ "eval_loss": 4.836514472961426,
179
+ "eval_runtime": 19.7103,
180
+ "eval_samples_per_second": 50.735,
181
+ "eval_steps_per_second": 12.684,
182
+ "step": 1000
183
+ },
184
+ {
185
+ "epoch": 0.08076301822936698,
186
+ "grad_norm": 5.79191255569458,
187
+ "learning_rate": 0.00019717944056307302,
188
+ "loss": 4.6731,
189
+ "step": 1050
190
+ },
191
+ {
192
+ "epoch": 0.0846088762402892,
193
+ "grad_norm": 4.957877159118652,
194
+ "learning_rate": 0.00019691972054125653,
195
+ "loss": 4.7966,
196
+ "step": 1100
197
+ },
198
+ {
199
+ "epoch": 0.08845473425121145,
200
+ "grad_norm": 3.2968597412109375,
201
+ "learning_rate": 0.00019666000051944005,
202
+ "loss": 4.752,
203
+ "step": 1150
204
+ },
205
+ {
206
+ "epoch": 0.09230059226213368,
207
+ "grad_norm": 6.059363842010498,
208
+ "learning_rate": 0.00019640028049762359,
209
+ "loss": 4.7368,
210
+ "step": 1200
211
+ },
212
+ {
213
+ "epoch": 0.09614645027305592,
214
+ "grad_norm": 5.9793171882629395,
215
+ "learning_rate": 0.0001961405604758071,
216
+ "loss": 4.9613,
217
+ "step": 1250
218
+ },
219
+ {
220
+ "epoch": 0.09614645027305592,
221
+ "eval_loss": 4.76948356628418,
222
+ "eval_runtime": 18.6703,
223
+ "eval_samples_per_second": 53.561,
224
+ "eval_steps_per_second": 13.39,
225
+ "step": 1250
226
+ },
227
+ {
228
+ "epoch": 0.09999230828397816,
229
+ "grad_norm": 7.604544162750244,
230
+ "learning_rate": 0.0001958808404539906,
231
+ "loss": 4.8365,
232
+ "step": 1300
233
+ },
234
+ {
235
+ "epoch": 0.10383816629490039,
236
+ "grad_norm": 6.4756083488464355,
237
+ "learning_rate": 0.00019562112043217412,
238
+ "loss": 4.8381,
239
+ "step": 1350
240
+ },
241
+ {
242
+ "epoch": 0.10768402430582263,
243
+ "grad_norm": 3.605341672897339,
244
+ "learning_rate": 0.00019536140041035764,
245
+ "loss": 4.7004,
246
+ "step": 1400
247
+ },
248
+ {
249
+ "epoch": 0.11152988231674486,
250
+ "grad_norm": 3.4805853366851807,
251
+ "learning_rate": 0.00019510168038854115,
252
+ "loss": 4.7293,
253
+ "step": 1450
254
+ },
255
+ {
256
+ "epoch": 0.1153757403276671,
257
+ "grad_norm": 4.5123796463012695,
258
+ "learning_rate": 0.0001948419603667247,
259
+ "loss": 4.8315,
260
+ "step": 1500
261
+ },
262
+ {
263
+ "epoch": 0.1153757403276671,
264
+ "eval_loss": 4.784451961517334,
265
+ "eval_runtime": 18.5259,
266
+ "eval_samples_per_second": 53.979,
267
+ "eval_steps_per_second": 13.495,
268
+ "step": 1500
269
  }
270
  ],
271
  "logging_steps": 50,