YouAreSpecialToMe commited on
Commit
ffdc15a
·
verified ·
1 Parent(s): 43385c8

Upload folder using huggingface_hub

Browse files
global_step40/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ec51201b882f6a8e39e9b4a000d3c316ca8bfaf5c15541aae7dc04097201337
3
+ size 12067415756
global_step40/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741b20bd726cf033744f268a8a2469ef4b3d450cabe83e3f933e68e8bdc16493
3
+ size 12067417612
global_step40/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cbd91783283b8f5eb9a13f66b31682d30886d4e7a6ff3ea96da3f4cae55910e
3
+ size 12067417740
global_step40/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:296f369a346a7c54d6483df40538f0901e29f1f9efff96014d0ea2326e25bb79
3
+ size 12067417484
global_step40/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a941908da7cf5ec9a393ba912ebc9dc6c8d512d2289db2364d86eb2829e1002
3
+ size 8045051256
latest CHANGED
@@ -1 +1 @@
1
- global_step60
 
1
+ global_step40
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1561b30955cdafc1bbff0f91f2984da108e591855b04d00afa377e9ebbf2154d
3
  size 4967215360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8ae8180852b2a447f27a0feb7345f9587608f0dd69af025121657669ec1f7a
3
  size 4967215360
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65c3d5828b8740b861f9ef831d4f6ef05422d51454996506ebcb7337c6607751
3
  size 3855679144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a305d318e8c30753363d1df7ebb71707d9febc3e7a04c301562a4103e443584
3
  size 3855679144
rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c5e18f922d0af74d820247ae97bee506ab412554a58345ddf2558abc94ee3e3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70cc56408014c410353d4dd58ae9b03f4be043f5f800324f66fd8e20e99b840e
3
  size 15024
rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a2dcca6d9741f46592359768ea2212b9321da6408d1fd7d3a80b017bf37f434
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49d1438e98cc9c53a6852464635ce62e9788e61eb3646b73e33813f487c4b6ae
3
  size 15024
rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69420ece2c255923c5cbb3c6c9c4a6b9cb38fb57e5d3033c8b7d436a1faf6f13
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4388add9cec90932f8ff0100d27a0574d98e1bad52ff89d44e31967d2b4fbfde
3
  size 15024
rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66f278b40a1e23b88a657c4e5d03afa8dbbbe14dfeb16f6b4beedaece6cdd0b9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a705d6dfaae4f2c1b4b2be6b25a6eb521ffae6fcba21cc1531e97b60037ed079
3
  size 15024
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:761876867248b8062e0ec4487c4af77a5b0b0cdcc6159656889f49800c1afb14
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026a51f29db64db8e5e93e3050c875ab8fc3ef2f66f66d4082eab0d09025d56b
3
  size 1064
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 5.0,
6
  "eval_steps": 500,
7
- "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -288,146 +288,6 @@
288
  "learning_rate": 2.8134633977057236e-06,
289
  "loss": 0.3623,
290
  "step": 40
291
- },
292
- {
293
- "epoch": 3.425531914893617,
294
- "grad_norm": 0.3110595941543579,
295
- "learning_rate": 2.57724564833675e-06,
296
- "loss": 0.3531,
297
- "step": 41
298
- },
299
- {
300
- "epoch": 3.5106382978723403,
301
- "grad_norm": 0.29042813181877136,
302
- "learning_rate": 2.3478954594012884e-06,
303
- "loss": 0.3585,
304
- "step": 42
305
- },
306
- {
307
- "epoch": 3.595744680851064,
308
- "grad_norm": 0.28844553232192993,
309
- "learning_rate": 2.1260629489279662e-06,
310
- "loss": 0.359,
311
- "step": 43
312
- },
313
- {
314
- "epoch": 3.6808510638297873,
315
- "grad_norm": 0.28630757331848145,
316
- "learning_rate": 1.912376925269041e-06,
317
- "loss": 0.3583,
318
- "step": 44
319
- },
320
- {
321
- "epoch": 3.7659574468085104,
322
- "grad_norm": 0.2970672845840454,
323
- "learning_rate": 1.7074431046748075e-06,
324
- "loss": 0.355,
325
- "step": 45
326
- },
327
- {
328
- "epoch": 3.851063829787234,
329
- "grad_norm": 0.27552056312561035,
330
- "learning_rate": 1.511842394325077e-06,
331
- "loss": 0.3459,
332
- "step": 46
333
- },
334
- {
335
- "epoch": 3.9361702127659575,
336
- "grad_norm": 0.2798301875591278,
337
- "learning_rate": 1.3261292456846648e-06,
338
- "loss": 0.3532,
339
- "step": 47
340
- },
341
- {
342
- "epoch": 4.0,
343
- "grad_norm": 0.3010568916797638,
344
- "learning_rate": 1.1508300828504682e-06,
345
- "loss": 0.3512,
346
- "step": 48
347
- },
348
- {
349
- "epoch": 4.085106382978723,
350
- "grad_norm": 0.2678888142108917,
351
- "learning_rate": 9.86441810345183e-07,
352
- "loss": 0.3537,
353
- "step": 49
354
- },
355
- {
356
- "epoch": 4.170212765957447,
357
- "grad_norm": 0.26379236578941345,
358
- "learning_rate": 8.334304045874248e-07,
359
- "loss": 0.347,
360
- "step": 50
361
- },
362
- {
363
- "epoch": 4.25531914893617,
364
- "grad_norm": 0.2635997235774994,
365
- "learning_rate": 6.922295930309691e-07,
366
- "loss": 0.3485,
367
- "step": 51
368
- },
369
- {
370
- "epoch": 4.340425531914893,
371
- "grad_norm": 0.31183329224586487,
372
- "learning_rate": 5.632396247171429e-07,
373
- "loss": 0.3603,
374
- "step": 52
375
- },
376
- {
377
- "epoch": 4.425531914893617,
378
- "grad_norm": 0.27197587490081787,
379
- "learning_rate": 4.468261357254339e-07,
380
- "loss": 0.3374,
381
- "step": 53
382
- },
383
- {
384
- "epoch": 4.51063829787234,
385
- "grad_norm": 0.2632615566253662,
386
- "learning_rate": 3.433191127383079e-07,
387
- "loss": 0.3548,
388
- "step": 54
389
- },
390
- {
391
- "epoch": 4.595744680851064,
392
- "grad_norm": 0.27395132184028625,
393
- "learning_rate": 2.530119576580936e-07,
394
- "loss": 0.3599,
395
- "step": 55
396
- },
397
- {
398
- "epoch": 4.680851063829787,
399
- "grad_norm": 0.25600942969322205,
400
- "learning_rate": 1.7616065592742038e-07,
401
- "loss": 0.3398,
402
- "step": 56
403
- },
404
- {
405
- "epoch": 4.76595744680851,
406
- "grad_norm": 0.2644282579421997,
407
- "learning_rate": 1.1298305091066664e-07,
408
- "loss": 0.3377,
409
- "step": 57
410
- },
411
- {
412
- "epoch": 4.851063829787234,
413
- "grad_norm": 0.2618866264820099,
414
- "learning_rate": 6.365822639327724e-08,
415
- "loss": 0.3451,
416
- "step": 58
417
- },
418
- {
419
- "epoch": 4.9361702127659575,
420
- "grad_norm": 0.27382051944732666,
421
- "learning_rate": 2.8325998949314536e-08,
422
- "loss": 0.3524,
423
- "step": 59
424
- },
425
- {
426
- "epoch": 5.0,
427
- "grad_norm": 0.2965172231197357,
428
- "learning_rate": 7.0865216161902785e-09,
429
- "loss": 0.3488,
430
- "step": 60
431
  }
432
  ],
433
  "logging_steps": 1,
@@ -442,12 +302,12 @@
442
  "should_evaluate": false,
443
  "should_log": false,
444
  "should_save": true,
445
- "should_training_stop": true
446
  },
447
  "attributes": {}
448
  }
449
  },
450
- "total_flos": 1.183024509477716e+18,
451
  "train_batch_size": 4,
452
  "trial_name": null,
453
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.3404255319148937,
6
  "eval_steps": 500,
7
+ "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
288
  "learning_rate": 2.8134633977057236e-06,
289
  "loss": 0.3623,
290
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  }
292
  ],
293
  "logging_steps": 1,
 
302
  "should_evaluate": false,
303
  "should_log": false,
304
  "should_save": true,
305
+ "should_training_stop": false
306
  },
307
  "attributes": {}
308
  }
309
  },
310
+ "total_flos": 7.911961183849021e+17,
311
  "train_batch_size": 4,
312
  "trial_name": null,
313
  "trial_params": null