robertou2 commited on
Commit
e8fe3e4
·
verified ·
1 Parent(s): 436d433

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f38f6c7cc699891d33b43936e13424bf4f5c79f2e529aeddd1d38cb2b4e18803
3
  size 359270696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26c9ef3202a5fecea3cf4e8797de14edaf72f05c99e7339f204d4ae79fc4059e
3
  size 359270696
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d10e91706fa7e2ba3d059b9c546fae7154a67c360412992451723c1d1fe5cc02
3
  size 718831691
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d57a71f32bc02f77680270598e164ef53cf521b9842a915012f167967134ca03
3
  size 718831691
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:198cd7bfd4cc7ff1f8428350e0d8215b1fdfe0f5fb7d8e52ae1940a8f9a2b85b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d312928d0bb60518eb9856d5ab0ae1674bcb745294bf27f615cb6d07b0463e
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f9d102cc71e10314031dfa77900e5dce67d2852876159da43d82b7787c5824f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5418e0fc9a3a6a50ea3a7b440dfb8b2fa26686b28c8f28256150a09922035962
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -428,6 +428,286 @@
428
  "learning_rate": 0.00023614713127100752,
429
  "loss": 0.5813,
430
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  }
432
  ],
433
  "logging_steps": 1,
@@ -442,12 +722,12 @@
442
  "should_evaluate": false,
443
  "should_log": false,
444
  "should_save": true,
445
- "should_training_stop": false
446
  },
447
  "attributes": {}
448
  }
449
  },
450
- "total_flos": 1.746175843467264e+16,
451
  "train_batch_size": 2,
452
  "trial_name": null,
453
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
  "eval_steps": 500,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
428
  "learning_rate": 0.00023614713127100752,
429
  "loss": 0.5813,
430
  "step": 60
431
+ },
432
+ {
433
+ "epoch": 3.050632911392405,
434
+ "grad_norm": 2.65234112739563,
435
+ "learning_rate": 0.00022693291013417452,
436
+ "loss": 0.7218,
437
+ "step": 61
438
+ },
439
+ {
440
+ "epoch": 3.1012658227848102,
441
+ "grad_norm": 1.9882073402404785,
442
+ "learning_rate": 0.00021775019586744925,
443
+ "loss": 0.5474,
444
+ "step": 62
445
+ },
446
+ {
447
+ "epoch": 3.151898734177215,
448
+ "grad_norm": 2.5399203300476074,
449
+ "learning_rate": 0.0002086115309539675,
450
+ "loss": 0.7906,
451
+ "step": 63
452
+ },
453
+ {
454
+ "epoch": 3.2025316455696204,
455
+ "grad_norm": 1.6655718088150024,
456
+ "learning_rate": 0.0001995293977107475,
457
+ "loss": 0.3726,
458
+ "step": 64
459
+ },
460
+ {
461
+ "epoch": 3.2531645569620253,
462
+ "grad_norm": 2.330378770828247,
463
+ "learning_rate": 0.00019051620123934537,
464
+ "loss": 0.6493,
465
+ "step": 65
466
+ },
467
+ {
468
+ "epoch": 3.3037974683544302,
469
+ "grad_norm": 2.360882043838501,
470
+ "learning_rate": 0.0001815842524819793,
471
+ "loss": 0.6553,
472
+ "step": 66
473
+ },
474
+ {
475
+ "epoch": 3.3544303797468356,
476
+ "grad_norm": 2.726463556289673,
477
+ "learning_rate": 0.00017274575140626317,
478
+ "loss": 0.7101,
479
+ "step": 67
480
+ },
481
+ {
482
+ "epoch": 3.4050632911392404,
483
+ "grad_norm": 2.313028573989868,
484
+ "learning_rate": 0.00016401277034151795,
485
+ "loss": 0.773,
486
+ "step": 68
487
+ },
488
+ {
489
+ "epoch": 3.4556962025316453,
490
+ "grad_norm": 2.5557079315185547,
491
+ "learning_rate": 0.00015539723748942243,
492
+ "loss": 0.7098,
493
+ "step": 69
494
+ },
495
+ {
496
+ "epoch": 3.5063291139240507,
497
+ "grad_norm": 2.11527681350708,
498
+ "learning_rate": 0.00014691092063152418,
499
+ "loss": 0.5224,
500
+ "step": 70
501
+ },
502
+ {
503
+ "epoch": 3.5569620253164556,
504
+ "grad_norm": 2.06489634513855,
505
+ "learning_rate": 0.00013856541105586545,
506
+ "loss": 0.565,
507
+ "step": 71
508
+ },
509
+ {
510
+ "epoch": 3.607594936708861,
511
+ "grad_norm": 2.3769450187683105,
512
+ "learning_rate": 0.0001303721077246784,
513
+ "loss": 0.5167,
514
+ "step": 72
515
+ },
516
+ {
517
+ "epoch": 3.6582278481012658,
518
+ "grad_norm": 3.1446056365966797,
519
+ "learning_rate": 0.0001223422017047733,
520
+ "loss": 0.738,
521
+ "step": 73
522
+ },
523
+ {
524
+ "epoch": 3.708860759493671,
525
+ "grad_norm": 2.9032044410705566,
526
+ "learning_rate": 0.00011448666088188764,
527
+ "loss": 0.84,
528
+ "step": 74
529
+ },
530
+ {
531
+ "epoch": 3.759493670886076,
532
+ "grad_norm": 2.6407244205474854,
533
+ "learning_rate": 0.00010681621497987371,
534
+ "loss": 0.55,
535
+ "step": 75
536
+ },
537
+ {
538
+ "epoch": 3.810126582278481,
539
+ "grad_norm": 2.3889331817626953,
540
+ "learning_rate": 9.934134090518593e-05,
541
+ "loss": 0.6036,
542
+ "step": 76
543
+ },
544
+ {
545
+ "epoch": 3.8607594936708862,
546
+ "grad_norm": 2.591595411300659,
547
+ "learning_rate": 9.207224843668733e-05,
548
+ "loss": 0.4689,
549
+ "step": 77
550
+ },
551
+ {
552
+ "epoch": 3.911392405063291,
553
+ "grad_norm": 2.4760868549346924,
554
+ "learning_rate": 8.50188662803194e-05,
555
+ "loss": 0.4915,
556
+ "step": 78
557
+ },
558
+ {
559
+ "epoch": 3.962025316455696,
560
+ "grad_norm": 2.930954694747925,
561
+ "learning_rate": 7.819082850768433e-05,
562
+ "loss": 0.8307,
563
+ "step": 79
564
+ },
565
+ {
566
+ "epoch": 4.0,
567
+ "grad_norm": 2.1440188884735107,
568
+ "learning_rate": 7.159746139706194e-05,
569
+ "loss": 0.4358,
570
+ "step": 80
571
+ },
572
+ {
573
+ "epoch": 4.050632911392405,
574
+ "grad_norm": 1.4254531860351562,
575
+ "learning_rate": 6.524777069483526e-05,
576
+ "loss": 0.2303,
577
+ "step": 81
578
+ },
579
+ {
580
+ "epoch": 4.10126582278481,
581
+ "grad_norm": 2.0634138584136963,
582
+ "learning_rate": 5.9150429314724254e-05,
583
+ "loss": 0.5029,
584
+ "step": 82
585
+ },
586
+ {
587
+ "epoch": 4.151898734177215,
588
+ "grad_norm": 1.7933154106140137,
589
+ "learning_rate": 5.3313765491629194e-05,
590
+ "loss": 0.3326,
591
+ "step": 83
592
+ },
593
+ {
594
+ "epoch": 4.2025316455696204,
595
+ "grad_norm": 1.1411280632019043,
596
+ "learning_rate": 4.7745751406263163e-05,
597
+ "loss": 0.1651,
598
+ "step": 84
599
+ },
600
+ {
601
+ "epoch": 4.253164556962025,
602
+ "grad_norm": 1.6106775999069214,
603
+ "learning_rate": 4.245399229611238e-05,
604
+ "loss": 0.3276,
605
+ "step": 85
606
+ },
607
+ {
608
+ "epoch": 4.30379746835443,
609
+ "grad_norm": 1.6101868152618408,
610
+ "learning_rate": 3.7445716067596506e-05,
611
+ "loss": 0.3073,
612
+ "step": 86
613
+ },
614
+ {
615
+ "epoch": 4.3544303797468356,
616
+ "grad_norm": 1.1931700706481934,
617
+ "learning_rate": 3.2727763423617915e-05,
618
+ "loss": 0.1378,
619
+ "step": 87
620
+ },
621
+ {
622
+ "epoch": 4.405063291139241,
623
+ "grad_norm": 1.7510262727737427,
624
+ "learning_rate": 2.8306578519984528e-05,
625
+ "loss": 0.2117,
626
+ "step": 88
627
+ },
628
+ {
629
+ "epoch": 4.455696202531645,
630
+ "grad_norm": 1.7942403554916382,
631
+ "learning_rate": 2.4188200163467787e-05,
632
+ "loss": 0.1961,
633
+ "step": 89
634
+ },
635
+ {
636
+ "epoch": 4.506329113924051,
637
+ "grad_norm": 1.7418352365493774,
638
+ "learning_rate": 2.0378253563519245e-05,
639
+ "loss": 0.2915,
640
+ "step": 90
641
+ },
642
+ {
643
+ "epoch": 4.556962025316456,
644
+ "grad_norm": 1.7192072868347168,
645
+ "learning_rate": 1.6881942648911074e-05,
646
+ "loss": 0.1873,
647
+ "step": 91
648
+ },
649
+ {
650
+ "epoch": 4.6075949367088604,
651
+ "grad_norm": 1.5197887420654297,
652
+ "learning_rate": 1.3704042959795133e-05,
653
+ "loss": 0.3229,
654
+ "step": 92
655
+ },
656
+ {
657
+ "epoch": 4.658227848101266,
658
+ "grad_norm": 1.1745399236679077,
659
+ "learning_rate": 1.0848895124889818e-05,
660
+ "loss": 0.1945,
661
+ "step": 93
662
+ },
663
+ {
664
+ "epoch": 4.708860759493671,
665
+ "grad_norm": 1.7969136238098145,
666
+ "learning_rate": 8.320398932703144e-06,
667
+ "loss": 0.2555,
668
+ "step": 94
669
+ },
670
+ {
671
+ "epoch": 4.759493670886076,
672
+ "grad_norm": 1.9828176498413086,
673
+ "learning_rate": 6.12200800489085e-06,
674
+ "loss": 0.2931,
675
+ "step": 95
676
+ },
677
+ {
678
+ "epoch": 4.810126582278481,
679
+ "grad_norm": 1.6430472135543823,
680
+ "learning_rate": 4.256725079024554e-06,
681
+ "loss": 0.1705,
682
+ "step": 96
683
+ },
684
+ {
685
+ "epoch": 4.860759493670886,
686
+ "grad_norm": 1.5796780586242676,
687
+ "learning_rate": 2.7270979072135106e-06,
688
+ "loss": 0.2818,
689
+ "step": 97
690
+ },
691
+ {
692
+ "epoch": 4.911392405063291,
693
+ "grad_norm": 1.7523950338363647,
694
+ "learning_rate": 1.5352157761815977e-06,
695
+ "loss": 0.2395,
696
+ "step": 98
697
+ },
698
+ {
699
+ "epoch": 4.962025316455696,
700
+ "grad_norm": 1.9358466863632202,
701
+ "learning_rate": 6.827066535529947e-07,
702
+ "loss": 0.3112,
703
+ "step": 99
704
+ },
705
+ {
706
+ "epoch": 5.0,
707
+ "grad_norm": 2.1313531398773193,
708
+ "learning_rate": 1.7073496424427348e-07,
709
+ "loss": 0.102,
710
+ "step": 100
711
  }
712
  ],
713
  "logging_steps": 1,
 
722
  "should_evaluate": false,
723
  "should_log": false,
724
  "should_save": true,
725
+ "should_training_stop": true
726
  },
727
  "attributes": {}
728
  }
729
  },
730
+ "total_flos": 2.9265650012307456e+16,
731
  "train_batch_size": 2,
732
  "trial_name": null,
733
  "trial_params": null