baby-dev commited on
Commit
34d25c3
·
verified ·
1 Parent(s): 242ede1

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c693a75292cc1fa3a9f6f73a72febe4c6e07a58a4197545a4c4d4fd6e879b5ee
3
  size 48552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51dd34513a4fa72c6a40a490cefe2c7c0da0574f3ff7b502150801b8b3ac1912
3
  size 48552
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:294259cb2c0619098026d5dab6b1d13ce9ce514c68aaa0eb013550087742d2a7
3
  size 107046
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e416c953720231f41d31a3f85c68a502f5c35eba038071e85319b8eec10ce37
3
  size 107046
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aa86072d894b06ed15a781ebf27303cfbb0a54143c21949f75c3251dd2ec55c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b97e51ea9f5d34aebc6d066d7c2fb7a20dbec78f27d117049f04bf79c02536c3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7962e06291987b3327e85ffe1a0f48aec5a6651b8b5139bec8a1fd5526407429
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a277d82c7576fb11769cdddd31bacd8b5bc730226a0e626981ae85a7a41efa44
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9979035639412998,
5
  "eval_steps": 500,
6
- "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -176,6 +176,182 @@
176
  "eval_samples_per_second": 242.779,
177
  "eval_steps_per_second": 62.498,
178
  "step": 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  }
180
  ],
181
  "logging_steps": 5,
@@ -195,7 +371,7 @@
195
  "attributes": {}
196
  }
197
  },
198
- "total_flos": 134180413440.0,
199
  "train_batch_size": 4,
200
  "trial_name": null,
201
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9958071278825997,
5
  "eval_steps": 500,
6
+ "global_step": 238,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
176
  "eval_samples_per_second": 242.779,
177
  "eval_steps_per_second": 62.498,
178
  "step": 119
179
+ },
180
+ {
181
+ "epoch": 1.0062893081761006,
182
+ "grad_norm": 0.10388734191656113,
183
+ "learning_rate": 0.00012713675213675213,
184
+ "loss": 13.7207,
185
+ "step": 120
186
+ },
187
+ {
188
+ "epoch": 1.0482180293501049,
189
+ "grad_norm": 0.028476731851696968,
190
+ "learning_rate": 0.0001217948717948718,
191
+ "loss": 11.9223,
192
+ "step": 125
193
+ },
194
+ {
195
+ "epoch": 1.090146750524109,
196
+ "grad_norm": 0.0434449277818203,
197
+ "learning_rate": 0.00011645299145299146,
198
+ "loss": 11.9223,
199
+ "step": 130
200
+ },
201
+ {
202
+ "epoch": 1.1320754716981132,
203
+ "grad_norm": 0.09088350832462311,
204
+ "learning_rate": 0.0001111111111111111,
205
+ "loss": 11.9366,
206
+ "step": 135
207
+ },
208
+ {
209
+ "epoch": 1.1740041928721174,
210
+ "grad_norm": 0.07184627652168274,
211
+ "learning_rate": 0.00010576923076923077,
212
+ "loss": 11.5661,
213
+ "step": 140
214
+ },
215
+ {
216
+ "epoch": 1.2159329140461215,
217
+ "grad_norm": 0.04500441253185272,
218
+ "learning_rate": 0.00010042735042735043,
219
+ "loss": 12.3022,
220
+ "step": 145
221
+ },
222
+ {
223
+ "epoch": 1.2578616352201257,
224
+ "grad_norm": 0.029749717563390732,
225
+ "learning_rate": 9.508547008547008e-05,
226
+ "loss": 11.9321,
227
+ "step": 150
228
+ },
229
+ {
230
+ "epoch": 1.29979035639413,
231
+ "grad_norm": 0.046956080943346024,
232
+ "learning_rate": 8.974358974358975e-05,
233
+ "loss": 11.8407,
234
+ "step": 155
235
+ },
236
+ {
237
+ "epoch": 1.3417190775681342,
238
+ "grad_norm": 0.06576091051101685,
239
+ "learning_rate": 8.440170940170941e-05,
240
+ "loss": 11.9356,
241
+ "step": 160
242
+ },
243
+ {
244
+ "epoch": 1.3836477987421385,
245
+ "grad_norm": 0.056520890444517136,
246
+ "learning_rate": 7.905982905982905e-05,
247
+ "loss": 11.9497,
248
+ "step": 165
249
+ },
250
+ {
251
+ "epoch": 1.4255765199161425,
252
+ "grad_norm": 0.05084730684757233,
253
+ "learning_rate": 7.371794871794872e-05,
254
+ "loss": 11.5847,
255
+ "step": 170
256
+ },
257
+ {
258
+ "epoch": 1.4675052410901468,
259
+ "grad_norm": 0.03961843624711037,
260
+ "learning_rate": 6.837606837606838e-05,
261
+ "loss": 12.261,
262
+ "step": 175
263
+ },
264
+ {
265
+ "epoch": 1.509433962264151,
266
+ "grad_norm": 0.03475997969508171,
267
+ "learning_rate": 6.303418803418804e-05,
268
+ "loss": 11.8777,
269
+ "step": 180
270
+ },
271
+ {
272
+ "epoch": 1.551362683438155,
273
+ "grad_norm": 0.028086921200156212,
274
+ "learning_rate": 5.76923076923077e-05,
275
+ "loss": 11.9761,
276
+ "step": 185
277
+ },
278
+ {
279
+ "epoch": 1.5932914046121593,
280
+ "grad_norm": 0.046144578605890274,
281
+ "learning_rate": 5.2350427350427356e-05,
282
+ "loss": 11.8616,
283
+ "step": 190
284
+ },
285
+ {
286
+ "epoch": 1.6352201257861636,
287
+ "grad_norm": 0.05854855850338936,
288
+ "learning_rate": 4.700854700854701e-05,
289
+ "loss": 11.9751,
290
+ "step": 195
291
+ },
292
+ {
293
+ "epoch": 1.6771488469601676,
294
+ "grad_norm": 0.035215962678194046,
295
+ "learning_rate": 4.1666666666666665e-05,
296
+ "loss": 11.9493,
297
+ "step": 200
298
+ },
299
+ {
300
+ "epoch": 1.719077568134172,
301
+ "grad_norm": 0.06034635007381439,
302
+ "learning_rate": 3.632478632478633e-05,
303
+ "loss": 11.8182,
304
+ "step": 205
305
+ },
306
+ {
307
+ "epoch": 1.7610062893081762,
308
+ "grad_norm": 0.027154497802257538,
309
+ "learning_rate": 3.098290598290598e-05,
310
+ "loss": 11.9445,
311
+ "step": 210
312
+ },
313
+ {
314
+ "epoch": 1.8029350104821802,
315
+ "grad_norm": 0.055567361414432526,
316
+ "learning_rate": 2.564102564102564e-05,
317
+ "loss": 11.9099,
318
+ "step": 215
319
+ },
320
+ {
321
+ "epoch": 1.8448637316561844,
322
+ "grad_norm": 0.04124658182263374,
323
+ "learning_rate": 2.02991452991453e-05,
324
+ "loss": 11.7515,
325
+ "step": 220
326
+ },
327
+ {
328
+ "epoch": 1.8867924528301887,
329
+ "grad_norm": 0.047468505799770355,
330
+ "learning_rate": 1.4957264957264958e-05,
331
+ "loss": 12.3221,
332
+ "step": 225
333
+ },
334
+ {
335
+ "epoch": 1.9287211740041927,
336
+ "grad_norm": 0.04315986856818199,
337
+ "learning_rate": 9.615384615384616e-06,
338
+ "loss": 11.6918,
339
+ "step": 230
340
+ },
341
+ {
342
+ "epoch": 1.9706498951781972,
343
+ "grad_norm": 0.08445514738559723,
344
+ "learning_rate": 4.273504273504274e-06,
345
+ "loss": 12.0431,
346
+ "step": 235
347
+ },
348
+ {
349
+ "epoch": 1.9958071278825997,
350
+ "eval_loss": 11.917438507080078,
351
+ "eval_runtime": 0.4203,
352
+ "eval_samples_per_second": 240.321,
353
+ "eval_steps_per_second": 61.865,
354
+ "step": 238
355
  }
356
  ],
357
  "logging_steps": 5,
 
371
  "attributes": {}
372
  }
373
  },
374
+ "total_flos": 268360826880.0,
375
  "train_batch_size": 4,
376
  "trial_name": null,
377
  "trial_params": null