gzqaq commited on
Commit
cbea824
·
verified ·
1 Parent(s): 0e17264

Add files using upload-large-folder tool

Browse files
checkpoint-4410.txt ADDED
File without changes
global_step4403/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a6a648da445af15cd5bea321fc11a5c23fdd269644dbce9ca75f1656dc2830
3
+ size 3760226304
latest CHANGED
@@ -1 +1 @@
1
- global_step2995
 
1
+ global_step4403
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55aeaab7a90a7d2a1a4866d187f47e67153d275ac8524a3f6723acf38c0cc7aa
3
  size 4968243304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:806858e608db4065644f4bd3251d595f67c672bfe354e29d0a3c63a5315f59fb
3
  size 4968243304
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f251e4436393cbc69c5d59ce60af68995d48904e2551f59703dbe5d0e4ffb0c
3
  size 4991495816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2d943f5670b6d38e1105bcd1b47158bacb3676741c7fb1a6ead2c9bd91f046
3
  size 4991495816
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bea4aa4a2bcb6fe68408853132fadbdd37b13fa8c5b108c675136d3c2f97b64d
3
  size 4932751040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5567189897093f0418f25ab4c3dd52a386cdb413f1c3a5ce228732fd7bd2a851
3
  size 4932751040
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e72e729811d53e78a6f34e75fcaadc2ca0160800c769b6823646601422e049f4
3
  size 1691924384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:942ed51be07c0f5e4cbe7a38ce3ae081879e5b33d6b626bc70f69e8b38b2fec0
3
  size 1691924384
rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7d7e02ffb4d440dce7ab4ce0b5617578ec9ce3672acee7434ed6f1153f1ae0c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1165242405b17b3d6a8186ae61b13dcb1faa5a54320bebd74ef8d71b964bf7
3
  size 15984
rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5b40ca759e432b2688f021b81291d74a40f56a205e9842119f7e772275eebd3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:562c262916c9997ec644c42fed9655ab28706b74fca20290ca921c4761d6a4b0
3
  size 15984
rng_state_10.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d77ef8825b0e27c584c2d3eaeebfd82ebe38732eae91ff9f69c9a874632730c8
3
  size 15997
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6fd4911141e2842a3d884ce4d2109eaeeedefd435098cadf9b564c65967a6eb
3
  size 15997
rng_state_11.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32f9f9d83cb982f1d825d89bde80de05cf37ecaa787ad08de64e0c8df94e6115
3
  size 15997
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ba6ece259230c4e521fc019ff78563c612add590722906bc4725a8f02ad5376
3
  size 15997
rng_state_12.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77270e25477a4a35eaceb98ecff060e192a6357bdc322e5d19aa92ecacd4e7e3
3
  size 15997
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1bd528dc10399e3df15ae65f3098a87d293e8d4777932e1875d6dadae5f2760
3
  size 15997
rng_state_13.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9eab08108477ca9ce657269520a7cfc34510e7812e8499eb4c1a918805c8fb0
3
  size 15997
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ddc21f5027e4c32e3cbc9daa1863acde639199470b640785b2bf069b1758850
3
  size 15997
rng_state_14.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:162d71274c75bc8f1dcbf21836ecdbc2a8c2756b4971b76847083de39d3d4098
3
  size 15997
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c7d46874297ff7a0b9cc4607aac3d077700d8f60db95b27325fad44d0ed974
3
  size 15997
rng_state_15.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8517010852772c3221be3f5fe18254baa1d12a58a99ef4cabc7765112ce8a7ea
3
  size 15997
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a20510fa92135806b278f3c0f3a327dcede9674b4eb3de8360d7643b2a73f1a
3
  size 15997
rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdaef955ddd36d6bc1c40584113dd6205483e2aa85b02439b8b27e82e02a8359
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d40f8118f513299624ded0a9bcf09778b961635615090409394d4f96f928f6
3
  size 15984
rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10b14ae5db356e6512538751d6b386c190754e307cc99cd652d5c6dd891e1f82
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4391f924238a4cb855c4cbdc6d1a14954f785431c75997d05c7a4ee6615dae7
3
  size 15984
rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f26e28be26826eeeed244b77185c67b443ac185175f8d4bf5ba94caa8b271bc5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be7b19bb9543a16bf9f4cd96466ac581436f63070f5815f3a7ba57980608994f
3
  size 15984
rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:847cedc1d6ca26f299a132c2ade9754887374acb9d98f26594a85d4c7742d474
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97da4a1ede0a3e0f96411cacd5bfdf84d9355198f7aadc9bcb8be41122043f63
3
  size 15984
rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcd043d1690ae0ff6991b03322799a0b28f021427b15fd9f1e5ed8b9905d9307
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544cb6421b975bd5d2b2360a4e666003794e6197ae654d2ad963cd6572a86ede
3
  size 15984
rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:772190f7e6667c865d25fc72da7bdd1b5d39f46fe03bb5c2d754aee1ad3c99c7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d6eb32a23f3bef6262bbcb2eda724b2fd6f5e579969aa27c71a5971331722b
3
  size 15984
rng_state_8.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6632cfcdbb30b4b8f6c2437ffa4a4f76a2a7c729fefd331ecb675d2f63f96bf5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29dee3c545638083bb257f6e165d367893ac8d5dbb07203c355bd5914e9e47a2
3
  size 15984
rng_state_9.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e6c87b73b3f0787cc3976a6cbd289ee87123c4cb74f388630459c010fa9fbeb
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6805b7c016228fa319f57f490b2d47f3b38a21aecf78e7cb6654a92340347d3b
3
  size 15984
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d561373aa5d93d08e27cff4bdee13071587b60cd3f13e8fe91e5701f1a9258dc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c89398e01b5829487b6d28ed0235d051c0f71e779ce1d7835c279e8dbddad4e7
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.788668555240793,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,6 +427,202 @@
427
  "learning_rate": 6.394557823129253e-06,
428
  "loss": 0.0002,
429
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 50,
@@ -441,12 +637,12 @@
441
  "should_evaluate": false,
442
  "should_log": false,
443
  "should_save": true,
444
- "should_training_stop": false
445
  },
446
  "attributes": {}
447
  }
448
  },
449
- "total_flos": 1.6645832262549504e+16,
450
  "train_batch_size": 4,
451
  "trial_name": null,
452
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.979036827195468,
5
  "eval_steps": 500,
6
+ "global_step": 4410,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "learning_rate": 6.394557823129253e-06,
428
  "loss": 0.0002,
429
  "step": 3000
430
+ },
431
+ {
432
+ "epoch": 6.9019830028328615,
433
+ "grad_norm": 0.014847405573922268,
434
+ "learning_rate": 6.16780045351474e-06,
435
+ "loss": 0.0002,
436
+ "step": 3050
437
+ },
438
+ {
439
+ "epoch": 7.013597733711048,
440
+ "grad_norm": 0.018305480145040905,
441
+ "learning_rate": 5.9410430839002275e-06,
442
+ "loss": 0.0002,
443
+ "step": 3100
444
+ },
445
+ {
446
+ "epoch": 7.126912181303116,
447
+ "grad_norm": 0.012819862819578573,
448
+ "learning_rate": 5.7142857142857145e-06,
449
+ "loss": 0.0001,
450
+ "step": 3150
451
+ },
452
+ {
453
+ "epoch": 7.240226628895184,
454
+ "grad_norm": 0.013371528432039266,
455
+ "learning_rate": 5.487528344671202e-06,
456
+ "loss": 0.0001,
457
+ "step": 3200
458
+ },
459
+ {
460
+ "epoch": 7.353541076487252,
461
+ "grad_norm": 0.005633164169599245,
462
+ "learning_rate": 5.260770975056689e-06,
463
+ "loss": 0.0001,
464
+ "step": 3250
465
+ },
466
+ {
467
+ "epoch": 7.46685552407932,
468
+ "grad_norm": 0.01061442292135964,
469
+ "learning_rate": 5.034013605442177e-06,
470
+ "loss": 0.0001,
471
+ "step": 3300
472
+ },
473
+ {
474
+ "epoch": 7.580169971671388,
475
+ "grad_norm": 0.00907172882592498,
476
+ "learning_rate": 4.807256235827665e-06,
477
+ "loss": 0.0001,
478
+ "step": 3350
479
+ },
480
+ {
481
+ "epoch": 7.693484419263456,
482
+ "grad_norm": 0.010227726380358893,
483
+ "learning_rate": 4.580498866213152e-06,
484
+ "loss": 0.0001,
485
+ "step": 3400
486
+ },
487
+ {
488
+ "epoch": 7.806798866855524,
489
+ "grad_norm": 0.008230239860332859,
490
+ "learning_rate": 4.35374149659864e-06,
491
+ "loss": 0.0001,
492
+ "step": 3450
493
+ },
494
+ {
495
+ "epoch": 7.920113314447592,
496
+ "grad_norm": 0.013256768674645692,
497
+ "learning_rate": 4.126984126984127e-06,
498
+ "loss": 0.0001,
499
+ "step": 3500
500
+ },
501
+ {
502
+ "epoch": 8.03172804532578,
503
+ "grad_norm": 0.01006097891003743,
504
+ "learning_rate": 3.9002267573696154e-06,
505
+ "loss": 0.0001,
506
+ "step": 3550
507
+ },
508
+ {
509
+ "epoch": 8.145042492917847,
510
+ "grad_norm": 0.009602466515273548,
511
+ "learning_rate": 3.6734693877551024e-06,
512
+ "loss": 0.0001,
513
+ "step": 3600
514
+ },
515
+ {
516
+ "epoch": 8.258356940509914,
517
+ "grad_norm": 0.005795654820959573,
518
+ "learning_rate": 3.44671201814059e-06,
519
+ "loss": 0.0001,
520
+ "step": 3650
521
+ },
522
+ {
523
+ "epoch": 8.371671388101984,
524
+ "grad_norm": 0.0034753960503702283,
525
+ "learning_rate": 3.2199546485260772e-06,
526
+ "loss": 0.0001,
527
+ "step": 3700
528
+ },
529
+ {
530
+ "epoch": 8.48498583569405,
531
+ "grad_norm": 0.006716712372637811,
532
+ "learning_rate": 2.993197278911565e-06,
533
+ "loss": 0.0001,
534
+ "step": 3750
535
+ },
536
+ {
537
+ "epoch": 8.598300283286118,
538
+ "grad_norm": 0.005691017397956627,
539
+ "learning_rate": 2.7664399092970525e-06,
540
+ "loss": 0.0001,
541
+ "step": 3800
542
+ },
543
+ {
544
+ "epoch": 8.711614730878187,
545
+ "grad_norm": 0.005512521725963195,
546
+ "learning_rate": 2.53968253968254e-06,
547
+ "loss": 0.0001,
548
+ "step": 3850
549
+ },
550
+ {
551
+ "epoch": 8.824929178470255,
552
+ "grad_norm": 0.004574835289146862,
553
+ "learning_rate": 2.3129251700680273e-06,
554
+ "loss": 0.0001,
555
+ "step": 3900
556
+ },
557
+ {
558
+ "epoch": 8.938243626062324,
559
+ "grad_norm": 0.005810520530240234,
560
+ "learning_rate": 2.086167800453515e-06,
561
+ "loss": 0.0001,
562
+ "step": 3950
563
+ },
564
+ {
565
+ "epoch": 9.04985835694051,
566
+ "grad_norm": 0.0032799015697558206,
567
+ "learning_rate": 1.8594104308390023e-06,
568
+ "loss": 0.0001,
569
+ "step": 4000
570
+ },
571
+ {
572
+ "epoch": 9.163172804532579,
573
+ "grad_norm": 0.0021797625948262924,
574
+ "learning_rate": 1.6326530612244897e-06,
575
+ "loss": 0.0,
576
+ "step": 4050
577
+ },
578
+ {
579
+ "epoch": 9.276487252124646,
580
+ "grad_norm": 0.003847390080167819,
581
+ "learning_rate": 1.4058956916099775e-06,
582
+ "loss": 0.0,
583
+ "step": 4100
584
+ },
585
+ {
586
+ "epoch": 9.389801699716713,
587
+ "grad_norm": 0.0025440319025627875,
588
+ "learning_rate": 1.179138321995465e-06,
589
+ "loss": 0.0,
590
+ "step": 4150
591
+ },
592
+ {
593
+ "epoch": 9.503116147308782,
594
+ "grad_norm": 0.002651261609298388,
595
+ "learning_rate": 9.523809523809525e-07,
596
+ "loss": 0.0,
597
+ "step": 4200
598
+ },
599
+ {
600
+ "epoch": 9.61643059490085,
601
+ "grad_norm": 0.0033509868369888817,
602
+ "learning_rate": 7.2562358276644e-07,
603
+ "loss": 0.0,
604
+ "step": 4250
605
+ },
606
+ {
607
+ "epoch": 9.729745042492917,
608
+ "grad_norm": 0.011727033108113469,
609
+ "learning_rate": 4.988662131519275e-07,
610
+ "loss": 0.0,
611
+ "step": 4300
612
+ },
613
+ {
614
+ "epoch": 9.843059490084986,
615
+ "grad_norm": 0.005342769601421854,
616
+ "learning_rate": 2.72108843537415e-07,
617
+ "loss": 0.0,
618
+ "step": 4350
619
+ },
620
+ {
621
+ "epoch": 9.956373937677053,
622
+ "grad_norm": 0.004383322417463679,
623
+ "learning_rate": 4.53514739229025e-08,
624
+ "loss": 0.0,
625
+ "step": 4400
626
  }
627
  ],
628
  "logging_steps": 50,
 
637
  "should_evaluate": false,
638
  "should_log": false,
639
  "should_save": true,
640
+ "should_training_stop": true
641
  },
642
  "attributes": {}
643
  }
644
  },
645
+ "total_flos": 2.446988850023629e+16,
646
  "train_batch_size": 4,
647
  "trial_name": null,
648
  "trial_params": null