schnell commited on
Commit
93f6f6d
·
1 Parent(s): 7f626cf

Training in progress, epoch 11

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e50c16003edb8883d4149aea560899fa302da8a5ea265041b7b2b0e6e753ea6
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427ddde5949f77db8935be38c6362c73ffac529ca902634168abcb77846282d4
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:020c47f09229e7b4397da1597dd814d8bad9db375a9c2b2366593d834ff17bb0
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9217f1422e2ba20adfa234e66d641852a206c5630b921ecc80d6c9999bdad2
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:811189d72660f95f34538db512a28cda03a6481472b571473a39751393e425ed
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:896681ae078ffd1d4e4769bf2d6e9344fd51fb18220bfe249c5a5fc74a830917
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68eb61321f52d319beb3b0aea14b956dcbfbc7a51e4307bb6194168d92337ac6
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac1e8edaf46acd0fd7646a0bb25c1e4b46b0ab04d87474b4fe175e85e479d6d4
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1cd8f88a9cad614bc2f0545599431a338717269118bde820f7a2ebef8c3d177
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f35c7bd008e8e8a41d0ab633d161ba4bfa16e5435c68d6d7e739be6d290d478
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5a601c907d442bfc98aecb4b414394e5a711f038902c4eca06616f9bff709ff
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1b3125518446f5f519d5932574d9e19db8e3320097f9a530448ee7f4f0d7d71
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90712adf282f1d59d322b20c807b46ede7a0fd0f43d34d14f6b8d9ee4800700e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08fe68888b57c19aba30af213d1336bce889e7e0465c4a58939b446db8405c5
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
- "global_step": 694730,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8430,11 +8430,854 @@
8430
  "eval_samples_per_second": 969.838,
8431
  "eval_steps_per_second": 40.41,
8432
  "step": 694730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8433
  }
8434
  ],
8435
  "max_steps": 972622,
8436
  "num_train_epochs": 14,
8437
- "total_flos": 4.2724234309696225e+18,
8438
  "trial_name": null,
8439
  "trial_params": null
8440
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.0,
5
+ "global_step": 764203,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8430
  "eval_samples_per_second": 969.838,
8431
  "eval_steps_per_second": 40.41,
8432
  "step": 694730
8433
+ },
8434
+ {
8435
+ "epoch": 10.0,
8436
+ "learning_rate": 2.886337554977438e-05,
8437
+ "loss": 2.8173,
8438
+ "step": 695000
8439
+ },
8440
+ {
8441
+ "epoch": 10.01,
8442
+ "learning_rate": 2.8811448808021647e-05,
8443
+ "loss": 2.8197,
8444
+ "step": 695500
8445
+ },
8446
+ {
8447
+ "epoch": 10.02,
8448
+ "learning_rate": 2.8759522066268908e-05,
8449
+ "loss": 2.8121,
8450
+ "step": 696000
8451
+ },
8452
+ {
8453
+ "epoch": 10.03,
8454
+ "learning_rate": 2.8707595324516173e-05,
8455
+ "loss": 2.8132,
8456
+ "step": 696500
8457
+ },
8458
+ {
8459
+ "epoch": 10.03,
8460
+ "learning_rate": 2.8655668582763438e-05,
8461
+ "loss": 2.8159,
8462
+ "step": 697000
8463
+ },
8464
+ {
8465
+ "epoch": 10.04,
8466
+ "learning_rate": 2.8603949547977716e-05,
8467
+ "loss": 2.8141,
8468
+ "step": 697500
8469
+ },
8470
+ {
8471
+ "epoch": 10.05,
8472
+ "learning_rate": 2.855202280622498e-05,
8473
+ "loss": 2.819,
8474
+ "step": 698000
8475
+ },
8476
+ {
8477
+ "epoch": 10.05,
8478
+ "learning_rate": 2.8500096064472242e-05,
8479
+ "loss": 2.8164,
8480
+ "step": 698500
8481
+ },
8482
+ {
8483
+ "epoch": 10.06,
8484
+ "learning_rate": 2.844816932271951e-05,
8485
+ "loss": 2.8188,
8486
+ "step": 699000
8487
+ },
8488
+ {
8489
+ "epoch": 10.07,
8490
+ "learning_rate": 2.8396242580966775e-05,
8491
+ "loss": 2.8136,
8492
+ "step": 699500
8493
+ },
8494
+ {
8495
+ "epoch": 10.08,
8496
+ "learning_rate": 2.8344315839214037e-05,
8497
+ "loss": 2.8099,
8498
+ "step": 700000
8499
+ },
8500
+ {
8501
+ "epoch": 10.08,
8502
+ "learning_rate": 2.8292389097461302e-05,
8503
+ "loss": 2.8207,
8504
+ "step": 700500
8505
+ },
8506
+ {
8507
+ "epoch": 10.09,
8508
+ "learning_rate": 2.824046235570857e-05,
8509
+ "loss": 2.8109,
8510
+ "step": 701000
8511
+ },
8512
+ {
8513
+ "epoch": 10.1,
8514
+ "learning_rate": 2.8188639467439336e-05,
8515
+ "loss": 2.8167,
8516
+ "step": 701500
8517
+ },
8518
+ {
8519
+ "epoch": 10.1,
8520
+ "learning_rate": 2.8136712725686605e-05,
8521
+ "loss": 2.8171,
8522
+ "step": 702000
8523
+ },
8524
+ {
8525
+ "epoch": 10.11,
8526
+ "learning_rate": 2.808478598393387e-05,
8527
+ "loss": 2.8198,
8528
+ "step": 702500
8529
+ },
8530
+ {
8531
+ "epoch": 10.12,
8532
+ "learning_rate": 2.803285924218113e-05,
8533
+ "loss": 2.8165,
8534
+ "step": 703000
8535
+ },
8536
+ {
8537
+ "epoch": 10.13,
8538
+ "learning_rate": 2.7981036353911904e-05,
8539
+ "loss": 2.8162,
8540
+ "step": 703500
8541
+ },
8542
+ {
8543
+ "epoch": 10.13,
8544
+ "learning_rate": 2.7929109612159166e-05,
8545
+ "loss": 2.8141,
8546
+ "step": 704000
8547
+ },
8548
+ {
8549
+ "epoch": 10.14,
8550
+ "learning_rate": 2.787718287040643e-05,
8551
+ "loss": 2.8126,
8552
+ "step": 704500
8553
+ },
8554
+ {
8555
+ "epoch": 10.15,
8556
+ "learning_rate": 2.78252561286537e-05,
8557
+ "loss": 2.8126,
8558
+ "step": 705000
8559
+ },
8560
+ {
8561
+ "epoch": 10.16,
8562
+ "learning_rate": 2.777332938690096e-05,
8563
+ "loss": 2.8168,
8564
+ "step": 705500
8565
+ },
8566
+ {
8567
+ "epoch": 10.16,
8568
+ "learning_rate": 2.7721402645148225e-05,
8569
+ "loss": 2.8162,
8570
+ "step": 706000
8571
+ },
8572
+ {
8573
+ "epoch": 10.17,
8574
+ "learning_rate": 2.7669579756878995e-05,
8575
+ "loss": 2.8156,
8576
+ "step": 706500
8577
+ },
8578
+ {
8579
+ "epoch": 10.18,
8580
+ "learning_rate": 2.761765301512626e-05,
8581
+ "loss": 2.8092,
8582
+ "step": 707000
8583
+ },
8584
+ {
8585
+ "epoch": 10.18,
8586
+ "learning_rate": 2.7565726273373528e-05,
8587
+ "loss": 2.8151,
8588
+ "step": 707500
8589
+ },
8590
+ {
8591
+ "epoch": 10.19,
8592
+ "learning_rate": 2.7513799531620793e-05,
8593
+ "loss": 2.8134,
8594
+ "step": 708000
8595
+ },
8596
+ {
8597
+ "epoch": 10.2,
8598
+ "learning_rate": 2.7461872789868054e-05,
8599
+ "loss": 2.8125,
8600
+ "step": 708500
8601
+ },
8602
+ {
8603
+ "epoch": 10.21,
8604
+ "learning_rate": 2.741015375508233e-05,
8605
+ "loss": 2.8139,
8606
+ "step": 709000
8607
+ },
8608
+ {
8609
+ "epoch": 10.21,
8610
+ "learning_rate": 2.7358227013329597e-05,
8611
+ "loss": 2.8176,
8612
+ "step": 709500
8613
+ },
8614
+ {
8615
+ "epoch": 10.22,
8616
+ "learning_rate": 2.7306300271576862e-05,
8617
+ "loss": 2.8119,
8618
+ "step": 710000
8619
+ },
8620
+ {
8621
+ "epoch": 10.23,
8622
+ "learning_rate": 2.7254373529824124e-05,
8623
+ "loss": 2.8132,
8624
+ "step": 710500
8625
+ },
8626
+ {
8627
+ "epoch": 10.23,
8628
+ "learning_rate": 2.720244678807139e-05,
8629
+ "loss": 2.817,
8630
+ "step": 711000
8631
+ },
8632
+ {
8633
+ "epoch": 10.24,
8634
+ "learning_rate": 2.7150520046318657e-05,
8635
+ "loss": 2.8143,
8636
+ "step": 711500
8637
+ },
8638
+ {
8639
+ "epoch": 10.25,
8640
+ "learning_rate": 2.709859330456592e-05,
8641
+ "loss": 2.8172,
8642
+ "step": 712000
8643
+ },
8644
+ {
8645
+ "epoch": 10.26,
8646
+ "learning_rate": 2.7046666562813183e-05,
8647
+ "loss": 2.8124,
8648
+ "step": 712500
8649
+ },
8650
+ {
8651
+ "epoch": 10.26,
8652
+ "learning_rate": 2.6994739821060448e-05,
8653
+ "loss": 2.814,
8654
+ "step": 713000
8655
+ },
8656
+ {
8657
+ "epoch": 10.27,
8658
+ "learning_rate": 2.6942813079307716e-05,
8659
+ "loss": 2.812,
8660
+ "step": 713500
8661
+ },
8662
+ {
8663
+ "epoch": 10.28,
8664
+ "learning_rate": 2.6890886337554978e-05,
8665
+ "loss": 2.8138,
8666
+ "step": 714000
8667
+ },
8668
+ {
8669
+ "epoch": 10.28,
8670
+ "learning_rate": 2.683906344928575e-05,
8671
+ "loss": 2.8112,
8672
+ "step": 714500
8673
+ },
8674
+ {
8675
+ "epoch": 10.29,
8676
+ "learning_rate": 2.6787136707533012e-05,
8677
+ "loss": 2.811,
8678
+ "step": 715000
8679
+ },
8680
+ {
8681
+ "epoch": 10.3,
8682
+ "learning_rate": 2.6735209965780277e-05,
8683
+ "loss": 2.8142,
8684
+ "step": 715500
8685
+ },
8686
+ {
8687
+ "epoch": 10.31,
8688
+ "learning_rate": 2.6683283224027546e-05,
8689
+ "loss": 2.812,
8690
+ "step": 716000
8691
+ },
8692
+ {
8693
+ "epoch": 10.31,
8694
+ "learning_rate": 2.663135648227481e-05,
8695
+ "loss": 2.8149,
8696
+ "step": 716500
8697
+ },
8698
+ {
8699
+ "epoch": 10.32,
8700
+ "learning_rate": 2.657953359400558e-05,
8701
+ "loss": 2.8093,
8702
+ "step": 717000
8703
+ },
8704
+ {
8705
+ "epoch": 10.33,
8706
+ "learning_rate": 2.6527606852252845e-05,
8707
+ "loss": 2.8155,
8708
+ "step": 717500
8709
+ },
8710
+ {
8711
+ "epoch": 10.33,
8712
+ "learning_rate": 2.6475680110500107e-05,
8713
+ "loss": 2.8115,
8714
+ "step": 718000
8715
+ },
8716
+ {
8717
+ "epoch": 10.34,
8718
+ "learning_rate": 2.642375336874737e-05,
8719
+ "loss": 2.8132,
8720
+ "step": 718500
8721
+ },
8722
+ {
8723
+ "epoch": 10.35,
8724
+ "learning_rate": 2.637182662699464e-05,
8725
+ "loss": 2.8131,
8726
+ "step": 719000
8727
+ },
8728
+ {
8729
+ "epoch": 10.36,
8730
+ "learning_rate": 2.63198998852419e-05,
8731
+ "loss": 2.8088,
8732
+ "step": 719500
8733
+ },
8734
+ {
8735
+ "epoch": 10.36,
8736
+ "learning_rate": 2.6268076996972674e-05,
8737
+ "loss": 2.8079,
8738
+ "step": 720000
8739
+ },
8740
+ {
8741
+ "epoch": 10.37,
8742
+ "learning_rate": 2.6216150255219936e-05,
8743
+ "loss": 2.8035,
8744
+ "step": 720500
8745
+ },
8746
+ {
8747
+ "epoch": 10.38,
8748
+ "learning_rate": 2.61642235134672e-05,
8749
+ "loss": 2.8155,
8750
+ "step": 721000
8751
+ },
8752
+ {
8753
+ "epoch": 10.39,
8754
+ "learning_rate": 2.611229677171447e-05,
8755
+ "loss": 2.8114,
8756
+ "step": 721500
8757
+ },
8758
+ {
8759
+ "epoch": 10.39,
8760
+ "learning_rate": 2.6060473883445235e-05,
8761
+ "loss": 2.8157,
8762
+ "step": 722000
8763
+ },
8764
+ {
8765
+ "epoch": 10.4,
8766
+ "learning_rate": 2.6008547141692504e-05,
8767
+ "loss": 2.8042,
8768
+ "step": 722500
8769
+ },
8770
+ {
8771
+ "epoch": 10.41,
8772
+ "learning_rate": 2.595662039993977e-05,
8773
+ "loss": 2.8137,
8774
+ "step": 723000
8775
+ },
8776
+ {
8777
+ "epoch": 10.41,
8778
+ "learning_rate": 2.590469365818703e-05,
8779
+ "loss": 2.8136,
8780
+ "step": 723500
8781
+ },
8782
+ {
8783
+ "epoch": 10.42,
8784
+ "learning_rate": 2.5852766916434295e-05,
8785
+ "loss": 2.8074,
8786
+ "step": 724000
8787
+ },
8788
+ {
8789
+ "epoch": 10.43,
8790
+ "learning_rate": 2.5800944028165064e-05,
8791
+ "loss": 2.8052,
8792
+ "step": 724500
8793
+ },
8794
+ {
8795
+ "epoch": 10.44,
8796
+ "learning_rate": 2.574901728641233e-05,
8797
+ "loss": 2.8065,
8798
+ "step": 725000
8799
+ },
8800
+ {
8801
+ "epoch": 10.44,
8802
+ "learning_rate": 2.5697090544659598e-05,
8803
+ "loss": 2.814,
8804
+ "step": 725500
8805
+ },
8806
+ {
8807
+ "epoch": 10.45,
8808
+ "learning_rate": 2.5645163802906856e-05,
8809
+ "loss": 2.8128,
8810
+ "step": 726000
8811
+ },
8812
+ {
8813
+ "epoch": 10.46,
8814
+ "learning_rate": 2.5593237061154124e-05,
8815
+ "loss": 2.8117,
8816
+ "step": 726500
8817
+ },
8818
+ {
8819
+ "epoch": 10.46,
8820
+ "learning_rate": 2.554131031940139e-05,
8821
+ "loss": 2.8092,
8822
+ "step": 727000
8823
+ },
8824
+ {
8825
+ "epoch": 10.47,
8826
+ "learning_rate": 2.5489383577648657e-05,
8827
+ "loss": 2.8079,
8828
+ "step": 727500
8829
+ },
8830
+ {
8831
+ "epoch": 10.48,
8832
+ "learning_rate": 2.543745683589592e-05,
8833
+ "loss": 2.8108,
8834
+ "step": 728000
8835
+ },
8836
+ {
8837
+ "epoch": 10.49,
8838
+ "learning_rate": 2.5385633947626692e-05,
8839
+ "loss": 2.8113,
8840
+ "step": 728500
8841
+ },
8842
+ {
8843
+ "epoch": 10.49,
8844
+ "learning_rate": 2.5333811059357458e-05,
8845
+ "loss": 2.8091,
8846
+ "step": 729000
8847
+ },
8848
+ {
8849
+ "epoch": 10.5,
8850
+ "learning_rate": 2.5281884317604726e-05,
8851
+ "loss": 2.8056,
8852
+ "step": 729500
8853
+ },
8854
+ {
8855
+ "epoch": 10.51,
8856
+ "learning_rate": 2.5229957575851988e-05,
8857
+ "loss": 2.8141,
8858
+ "step": 730000
8859
+ },
8860
+ {
8861
+ "epoch": 10.51,
8862
+ "learning_rate": 2.5178030834099253e-05,
8863
+ "loss": 2.8058,
8864
+ "step": 730500
8865
+ },
8866
+ {
8867
+ "epoch": 10.52,
8868
+ "learning_rate": 2.512610409234652e-05,
8869
+ "loss": 2.8084,
8870
+ "step": 731000
8871
+ },
8872
+ {
8873
+ "epoch": 10.53,
8874
+ "learning_rate": 2.5074177350593786e-05,
8875
+ "loss": 2.8066,
8876
+ "step": 731500
8877
+ },
8878
+ {
8879
+ "epoch": 10.54,
8880
+ "learning_rate": 2.5022250608841047e-05,
8881
+ "loss": 2.8056,
8882
+ "step": 732000
8883
+ },
8884
+ {
8885
+ "epoch": 10.54,
8886
+ "learning_rate": 2.4970427720571817e-05,
8887
+ "loss": 2.8085,
8888
+ "step": 732500
8889
+ },
8890
+ {
8891
+ "epoch": 10.55,
8892
+ "learning_rate": 2.4918500978819085e-05,
8893
+ "loss": 2.8108,
8894
+ "step": 733000
8895
+ },
8896
+ {
8897
+ "epoch": 10.56,
8898
+ "learning_rate": 2.4866574237066347e-05,
8899
+ "loss": 2.8139,
8900
+ "step": 733500
8901
+ },
8902
+ {
8903
+ "epoch": 10.57,
8904
+ "learning_rate": 2.4814647495313612e-05,
8905
+ "loss": 2.8046,
8906
+ "step": 734000
8907
+ },
8908
+ {
8909
+ "epoch": 10.57,
8910
+ "learning_rate": 2.4762720753560877e-05,
8911
+ "loss": 2.8119,
8912
+ "step": 734500
8913
+ },
8914
+ {
8915
+ "epoch": 10.58,
8916
+ "learning_rate": 2.471089786529165e-05,
8917
+ "loss": 2.8117,
8918
+ "step": 735000
8919
+ },
8920
+ {
8921
+ "epoch": 10.59,
8922
+ "learning_rate": 2.465897112353891e-05,
8923
+ "loss": 2.8102,
8924
+ "step": 735500
8925
+ },
8926
+ {
8927
+ "epoch": 10.59,
8928
+ "learning_rate": 2.4607044381786176e-05,
8929
+ "loss": 2.8075,
8930
+ "step": 736000
8931
+ },
8932
+ {
8933
+ "epoch": 10.6,
8934
+ "learning_rate": 2.455511764003344e-05,
8935
+ "loss": 2.8127,
8936
+ "step": 736500
8937
+ },
8938
+ {
8939
+ "epoch": 10.61,
8940
+ "learning_rate": 2.450329475176421e-05,
8941
+ "loss": 2.8076,
8942
+ "step": 737000
8943
+ },
8944
+ {
8945
+ "epoch": 10.62,
8946
+ "learning_rate": 2.4451368010011476e-05,
8947
+ "loss": 2.8036,
8948
+ "step": 737500
8949
+ },
8950
+ {
8951
+ "epoch": 10.62,
8952
+ "learning_rate": 2.439944126825874e-05,
8953
+ "loss": 2.8055,
8954
+ "step": 738000
8955
+ },
8956
+ {
8957
+ "epoch": 10.63,
8958
+ "learning_rate": 2.434751452650601e-05,
8959
+ "loss": 2.8104,
8960
+ "step": 738500
8961
+ },
8962
+ {
8963
+ "epoch": 10.64,
8964
+ "learning_rate": 2.429558778475327e-05,
8965
+ "loss": 2.8087,
8966
+ "step": 739000
8967
+ },
8968
+ {
8969
+ "epoch": 10.64,
8970
+ "learning_rate": 2.4243764896484043e-05,
8971
+ "loss": 2.8048,
8972
+ "step": 739500
8973
+ },
8974
+ {
8975
+ "epoch": 10.65,
8976
+ "learning_rate": 2.4191838154731305e-05,
8977
+ "loss": 2.8087,
8978
+ "step": 740000
8979
+ },
8980
+ {
8981
+ "epoch": 10.66,
8982
+ "learning_rate": 2.4139911412978573e-05,
8983
+ "loss": 2.8019,
8984
+ "step": 740500
8985
+ },
8986
+ {
8987
+ "epoch": 10.67,
8988
+ "learning_rate": 2.4087984671225835e-05,
8989
+ "loss": 2.8137,
8990
+ "step": 741000
8991
+ },
8992
+ {
8993
+ "epoch": 10.67,
8994
+ "learning_rate": 2.40360579294731e-05,
8995
+ "loss": 2.808,
8996
+ "step": 741500
8997
+ },
8998
+ {
8999
+ "epoch": 10.68,
9000
+ "learning_rate": 2.398423504120387e-05,
9001
+ "loss": 2.8106,
9002
+ "step": 742000
9003
+ },
9004
+ {
9005
+ "epoch": 10.69,
9006
+ "learning_rate": 2.3932308299451137e-05,
9007
+ "loss": 2.8067,
9008
+ "step": 742500
9009
+ },
9010
+ {
9011
+ "epoch": 10.69,
9012
+ "learning_rate": 2.38803815576984e-05,
9013
+ "loss": 2.8072,
9014
+ "step": 743000
9015
+ },
9016
+ {
9017
+ "epoch": 10.7,
9018
+ "learning_rate": 2.3828454815945664e-05,
9019
+ "loss": 2.8095,
9020
+ "step": 743500
9021
+ },
9022
+ {
9023
+ "epoch": 10.71,
9024
+ "learning_rate": 2.377652807419293e-05,
9025
+ "loss": 2.8015,
9026
+ "step": 744000
9027
+ },
9028
+ {
9029
+ "epoch": 10.72,
9030
+ "learning_rate": 2.3724601332440194e-05,
9031
+ "loss": 2.8056,
9032
+ "step": 744500
9033
+ },
9034
+ {
9035
+ "epoch": 10.72,
9036
+ "learning_rate": 2.367267459068746e-05,
9037
+ "loss": 2.8076,
9038
+ "step": 745000
9039
+ },
9040
+ {
9041
+ "epoch": 10.73,
9042
+ "learning_rate": 2.3620747848934724e-05,
9043
+ "loss": 2.8052,
9044
+ "step": 745500
9045
+ },
9046
+ {
9047
+ "epoch": 10.74,
9048
+ "learning_rate": 2.3568924960665497e-05,
9049
+ "loss": 2.8051,
9050
+ "step": 746000
9051
+ },
9052
+ {
9053
+ "epoch": 10.75,
9054
+ "learning_rate": 2.3516998218912758e-05,
9055
+ "loss": 2.8012,
9056
+ "step": 746500
9057
+ },
9058
+ {
9059
+ "epoch": 10.75,
9060
+ "learning_rate": 2.346517533064353e-05,
9061
+ "loss": 2.8097,
9062
+ "step": 747000
9063
+ },
9064
+ {
9065
+ "epoch": 10.76,
9066
+ "learning_rate": 2.3413248588890793e-05,
9067
+ "loss": 2.802,
9068
+ "step": 747500
9069
+ },
9070
+ {
9071
+ "epoch": 10.77,
9072
+ "learning_rate": 2.336132184713806e-05,
9073
+ "loss": 2.801,
9074
+ "step": 748000
9075
+ },
9076
+ {
9077
+ "epoch": 10.77,
9078
+ "learning_rate": 2.3309395105385322e-05,
9079
+ "loss": 2.8061,
9080
+ "step": 748500
9081
+ },
9082
+ {
9083
+ "epoch": 10.78,
9084
+ "learning_rate": 2.325746836363259e-05,
9085
+ "loss": 2.8079,
9086
+ "step": 749000
9087
+ },
9088
+ {
9089
+ "epoch": 10.79,
9090
+ "learning_rate": 2.3205541621879852e-05,
9091
+ "loss": 2.8062,
9092
+ "step": 749500
9093
+ },
9094
+ {
9095
+ "epoch": 10.8,
9096
+ "learning_rate": 2.3153614880127117e-05,
9097
+ "loss": 2.8007,
9098
+ "step": 750000
9099
+ },
9100
+ {
9101
+ "epoch": 10.8,
9102
+ "learning_rate": 2.3101688138374382e-05,
9103
+ "loss": 2.8057,
9104
+ "step": 750500
9105
+ },
9106
+ {
9107
+ "epoch": 10.81,
9108
+ "learning_rate": 2.304986525010515e-05,
9109
+ "loss": 2.8045,
9110
+ "step": 751000
9111
+ },
9112
+ {
9113
+ "epoch": 10.82,
9114
+ "learning_rate": 2.2997938508352417e-05,
9115
+ "loss": 2.8053,
9116
+ "step": 751500
9117
+ },
9118
+ {
9119
+ "epoch": 10.82,
9120
+ "learning_rate": 2.294601176659968e-05,
9121
+ "loss": 2.8026,
9122
+ "step": 752000
9123
+ },
9124
+ {
9125
+ "epoch": 10.83,
9126
+ "learning_rate": 2.2894085024846946e-05,
9127
+ "loss": 2.805,
9128
+ "step": 752500
9129
+ },
9130
+ {
9131
+ "epoch": 10.84,
9132
+ "learning_rate": 2.2842262136577716e-05,
9133
+ "loss": 2.8023,
9134
+ "step": 753000
9135
+ },
9136
+ {
9137
+ "epoch": 10.85,
9138
+ "learning_rate": 2.2790335394824984e-05,
9139
+ "loss": 2.8072,
9140
+ "step": 753500
9141
+ },
9142
+ {
9143
+ "epoch": 10.85,
9144
+ "learning_rate": 2.2738408653072246e-05,
9145
+ "loss": 2.8021,
9146
+ "step": 754000
9147
+ },
9148
+ {
9149
+ "epoch": 10.86,
9150
+ "learning_rate": 2.2686481911319514e-05,
9151
+ "loss": 2.8027,
9152
+ "step": 754500
9153
+ },
9154
+ {
9155
+ "epoch": 10.87,
9156
+ "learning_rate": 2.2634555169566776e-05,
9157
+ "loss": 2.8036,
9158
+ "step": 755000
9159
+ },
9160
+ {
9161
+ "epoch": 10.87,
9162
+ "learning_rate": 2.258273228129755e-05,
9163
+ "loss": 2.8052,
9164
+ "step": 755500
9165
+ },
9166
+ {
9167
+ "epoch": 10.88,
9168
+ "learning_rate": 2.253080553954481e-05,
9169
+ "loss": 2.8014,
9170
+ "step": 756000
9171
+ },
9172
+ {
9173
+ "epoch": 10.89,
9174
+ "learning_rate": 2.247887879779208e-05,
9175
+ "loss": 2.8009,
9176
+ "step": 756500
9177
+ },
9178
+ {
9179
+ "epoch": 10.9,
9180
+ "learning_rate": 2.242695205603934e-05,
9181
+ "loss": 2.804,
9182
+ "step": 757000
9183
+ },
9184
+ {
9185
+ "epoch": 10.9,
9186
+ "learning_rate": 2.2375129167770113e-05,
9187
+ "loss": 2.8077,
9188
+ "step": 757500
9189
+ },
9190
+ {
9191
+ "epoch": 10.91,
9192
+ "learning_rate": 2.2323202426017374e-05,
9193
+ "loss": 2.7989,
9194
+ "step": 758000
9195
+ },
9196
+ {
9197
+ "epoch": 10.92,
9198
+ "learning_rate": 2.227127568426464e-05,
9199
+ "loss": 2.8051,
9200
+ "step": 758500
9201
+ },
9202
+ {
9203
+ "epoch": 10.93,
9204
+ "learning_rate": 2.2219348942511904e-05,
9205
+ "loss": 2.8048,
9206
+ "step": 759000
9207
+ },
9208
+ {
9209
+ "epoch": 10.93,
9210
+ "learning_rate": 2.216742220075917e-05,
9211
+ "loss": 2.7965,
9212
+ "step": 759500
9213
+ },
9214
+ {
9215
+ "epoch": 10.94,
9216
+ "learning_rate": 2.211559931248994e-05,
9217
+ "loss": 2.801,
9218
+ "step": 760000
9219
+ },
9220
+ {
9221
+ "epoch": 10.95,
9222
+ "learning_rate": 2.2063672570737204e-05,
9223
+ "loss": 2.8038,
9224
+ "step": 760500
9225
+ },
9226
+ {
9227
+ "epoch": 10.95,
9228
+ "learning_rate": 2.201174582898447e-05,
9229
+ "loss": 2.8041,
9230
+ "step": 761000
9231
+ },
9232
+ {
9233
+ "epoch": 10.96,
9234
+ "learning_rate": 2.1959819087231733e-05,
9235
+ "loss": 2.8061,
9236
+ "step": 761500
9237
+ },
9238
+ {
9239
+ "epoch": 10.97,
9240
+ "learning_rate": 2.1907892345479002e-05,
9241
+ "loss": 2.8046,
9242
+ "step": 762000
9243
+ },
9244
+ {
9245
+ "epoch": 10.98,
9246
+ "learning_rate": 2.1856069457209768e-05,
9247
+ "loss": 2.8002,
9248
+ "step": 762500
9249
+ },
9250
+ {
9251
+ "epoch": 10.98,
9252
+ "learning_rate": 2.1804142715457036e-05,
9253
+ "loss": 2.8039,
9254
+ "step": 763000
9255
+ },
9256
+ {
9257
+ "epoch": 10.99,
9258
+ "learning_rate": 2.1752215973704298e-05,
9259
+ "loss": 2.802,
9260
+ "step": 763500
9261
+ },
9262
+ {
9263
+ "epoch": 11.0,
9264
+ "learning_rate": 2.1700289231951566e-05,
9265
+ "loss": 2.8026,
9266
+ "step": 764000
9267
+ },
9268
+ {
9269
+ "epoch": 11.0,
9270
+ "eval_accuracy": 0.5046112569907905,
9271
+ "eval_loss": 2.62461256980896,
9272
+ "eval_runtime": 555.9604,
9273
+ "eval_samples_per_second": 969.38,
9274
+ "eval_steps_per_second": 40.391,
9275
+ "step": 764203
9276
  }
9277
  ],
9278
  "max_steps": 972622,
9279
  "num_train_epochs": 14,
9280
+ "total_flos": 4.6996799356928e+18,
9281
  "trial_name": null,
9282
  "trial_params": null
9283
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:020c47f09229e7b4397da1597dd814d8bad9db375a9c2b2366593d834ff17bb0
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9217f1422e2ba20adfa234e66d641852a206c5630b921ecc80d6c9999bdad2
3
  size 118242180
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25c4e9beee823a5935e50174eb1417fc4c243aa090bff6d0ab7ea1c65de8bda8
3
- size 228856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:691c58d07abd0364109c6890a1a42b4785eb7fdcf6aab10cc7829acc674dcedd
3
+ size 251425