CocoRoF commited on
Commit
8372236
·
verified ·
1 Parent(s): a22a00d

Training in progress, step 4268, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c7c2466e9547634a505ccaf2590f9e4d9d15d2f31d94aa4c0cfe5f155dc10b
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6655c6985e492e8d6919382548a7effa0be42aa4ce41de6a3afb623371f715b6
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05b07ba25a847922c59fe9c0ee222039fd2b55eb27e7164ec80572760094d906
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4d24494f3685341ff655be78dd4d6b804adf234b56e7ca404f9eb06e1b340b6
3
  size 1475248442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b8a0379bdd10765d4926325b17779ba084884beedfbdf271680e1d1bd136b43
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a736126bf032a05408714e9a6309ebf595d5e8e36aa317f7cb41422c442e7ab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea8c56402fe28ab6610db127ee707a0d7bbb7e8371ebb7f77b59566a41c7f5ef
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdfd054ee415a43775ee6882ba10b5080791cd5c9e7e77c4915c3e4fc9fe5d58
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8744142455482662,
5
  "eval_steps": 100,
6
- "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3447,6 +3447,220 @@
3447
  "eval_spearman_manhattan": 0.8198041702608989,
3448
  "eval_steps_per_second": 13.426,
3449
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3450
  }
3451
  ],
3452
  "logging_steps": 10,
@@ -3461,7 +3675,7 @@
3461
  "should_evaluate": false,
3462
  "should_log": false,
3463
  "should_save": true,
3464
- "should_training_stop": false
3465
  },
3466
  "attributes": {}
3467
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 100,
6
+ "global_step": 4268,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3447
  "eval_spearman_manhattan": 0.8198041702608989,
3448
  "eval_steps_per_second": 13.426,
3449
  "step": 4000
3450
+ },
3451
+ {
3452
+ "epoch": 1.879100281162137,
3453
+ "grad_norm": 1.8194371461868286,
3454
+ "learning_rate": 4.4127811621368325e-05,
3455
+ "loss": 0.1598,
3456
+ "step": 4010
3457
+ },
3458
+ {
3459
+ "epoch": 1.8837863167760074,
3460
+ "grad_norm": 1.2515980005264282,
3461
+ "learning_rate": 4.411316776007498e-05,
3462
+ "loss": 0.1872,
3463
+ "step": 4020
3464
+ },
3465
+ {
3466
+ "epoch": 1.8884723523898783,
3467
+ "grad_norm": 1.4522411823272705,
3468
+ "learning_rate": 4.409852389878163e-05,
3469
+ "loss": 0.1906,
3470
+ "step": 4030
3471
+ },
3472
+ {
3473
+ "epoch": 1.8931583880037488,
3474
+ "grad_norm": 1.3392481803894043,
3475
+ "learning_rate": 4.408388003748829e-05,
3476
+ "loss": 0.1947,
3477
+ "step": 4040
3478
+ },
3479
+ {
3480
+ "epoch": 1.8978444236176195,
3481
+ "grad_norm": 1.1387908458709717,
3482
+ "learning_rate": 4.406923617619494e-05,
3483
+ "loss": 0.1719,
3484
+ "step": 4050
3485
+ },
3486
+ {
3487
+ "epoch": 1.9025304592314902,
3488
+ "grad_norm": 1.7648086547851562,
3489
+ "learning_rate": 4.4054592314901596e-05,
3490
+ "loss": 0.2144,
3491
+ "step": 4060
3492
+ },
3493
+ {
3494
+ "epoch": 1.9072164948453607,
3495
+ "grad_norm": 1.774842381477356,
3496
+ "learning_rate": 4.403994845360825e-05,
3497
+ "loss": 0.1973,
3498
+ "step": 4070
3499
+ },
3500
+ {
3501
+ "epoch": 1.9119025304592316,
3502
+ "grad_norm": 1.3129111528396606,
3503
+ "learning_rate": 4.4025304592314905e-05,
3504
+ "loss": 0.1723,
3505
+ "step": 4080
3506
+ },
3507
+ {
3508
+ "epoch": 1.9165885660731021,
3509
+ "grad_norm": 1.311933159828186,
3510
+ "learning_rate": 4.401066073102156e-05,
3511
+ "loss": 0.1768,
3512
+ "step": 4090
3513
+ },
3514
+ {
3515
+ "epoch": 1.9212746016869728,
3516
+ "grad_norm": 1.510150671005249,
3517
+ "learning_rate": 4.399601686972821e-05,
3518
+ "loss": 0.1679,
3519
+ "step": 4100
3520
+ },
3521
+ {
3522
+ "epoch": 1.9212746016869728,
3523
+ "eval_loss": 0.034906383603811264,
3524
+ "eval_pearson_cosine": 0.8238323682543012,
3525
+ "eval_pearson_dot": 0.75607099937789,
3526
+ "eval_pearson_euclidean": 0.8097449943516324,
3527
+ "eval_pearson_manhattan": 0.8109217792674599,
3528
+ "eval_runtime": 7.5129,
3529
+ "eval_samples_per_second": 199.657,
3530
+ "eval_spearman_cosine": 0.824907875327154,
3531
+ "eval_spearman_dot": 0.7550909354777231,
3532
+ "eval_spearman_euclidean": 0.8187348127942441,
3533
+ "eval_spearman_manhattan": 0.8200370653936264,
3534
+ "eval_steps_per_second": 12.512,
3535
+ "step": 4100
3536
+ },
3537
+ {
3538
+ "epoch": 1.9259606373008435,
3539
+ "grad_norm": 1.644677996635437,
3540
+ "learning_rate": 4.398137300843487e-05,
3541
+ "loss": 0.1961,
3542
+ "step": 4110
3543
+ },
3544
+ {
3545
+ "epoch": 1.930646672914714,
3546
+ "grad_norm": 1.508178472518921,
3547
+ "learning_rate": 4.396672914714152e-05,
3548
+ "loss": 0.1841,
3549
+ "step": 4120
3550
+ },
3551
+ {
3552
+ "epoch": 1.935332708528585,
3553
+ "grad_norm": 1.5336145162582397,
3554
+ "learning_rate": 4.3952085285848176e-05,
3555
+ "loss": 0.1637,
3556
+ "step": 4130
3557
+ },
3558
+ {
3559
+ "epoch": 1.9400187441424555,
3560
+ "grad_norm": 1.7044395208358765,
3561
+ "learning_rate": 4.3937441424554824e-05,
3562
+ "loss": 0.2063,
3563
+ "step": 4140
3564
+ },
3565
+ {
3566
+ "epoch": 1.9447047797563262,
3567
+ "grad_norm": 1.6980154514312744,
3568
+ "learning_rate": 4.392279756326148e-05,
3569
+ "loss": 0.2118,
3570
+ "step": 4150
3571
+ },
3572
+ {
3573
+ "epoch": 1.9493908153701969,
3574
+ "grad_norm": 1.833633542060852,
3575
+ "learning_rate": 4.390815370196814e-05,
3576
+ "loss": 0.171,
3577
+ "step": 4160
3578
+ },
3579
+ {
3580
+ "epoch": 1.9540768509840674,
3581
+ "grad_norm": 1.7349201440811157,
3582
+ "learning_rate": 4.389350984067479e-05,
3583
+ "loss": 0.1885,
3584
+ "step": 4170
3585
+ },
3586
+ {
3587
+ "epoch": 1.9587628865979383,
3588
+ "grad_norm": 2.3254284858703613,
3589
+ "learning_rate": 4.387886597938145e-05,
3590
+ "loss": 0.1843,
3591
+ "step": 4180
3592
+ },
3593
+ {
3594
+ "epoch": 1.9634489222118088,
3595
+ "grad_norm": 1.1924229860305786,
3596
+ "learning_rate": 4.38642221180881e-05,
3597
+ "loss": 0.1862,
3598
+ "step": 4190
3599
+ },
3600
+ {
3601
+ "epoch": 1.9681349578256795,
3602
+ "grad_norm": 1.5753990411758423,
3603
+ "learning_rate": 4.3849578256794756e-05,
3604
+ "loss": 0.1699,
3605
+ "step": 4200
3606
+ },
3607
+ {
3608
+ "epoch": 1.9681349578256795,
3609
+ "eval_loss": 0.03551472723484039,
3610
+ "eval_pearson_cosine": 0.8273878707711191,
3611
+ "eval_pearson_dot": 0.7646820898603437,
3612
+ "eval_pearson_euclidean": 0.8112987734110177,
3613
+ "eval_pearson_manhattan": 0.8125188338482303,
3614
+ "eval_runtime": 5.9715,
3615
+ "eval_samples_per_second": 251.194,
3616
+ "eval_spearman_cosine": 0.8298080691919564,
3617
+ "eval_spearman_dot": 0.7648333772102188,
3618
+ "eval_spearman_euclidean": 0.8214596205940881,
3619
+ "eval_spearman_manhattan": 0.8226861322419045,
3620
+ "eval_steps_per_second": 15.742,
3621
+ "step": 4200
3622
+ },
3623
+ {
3624
+ "epoch": 1.9728209934395502,
3625
+ "grad_norm": 1.7450155019760132,
3626
+ "learning_rate": 4.383493439550141e-05,
3627
+ "loss": 0.2132,
3628
+ "step": 4210
3629
+ },
3630
+ {
3631
+ "epoch": 1.9775070290534207,
3632
+ "grad_norm": 2.049828290939331,
3633
+ "learning_rate": 4.3820290534208064e-05,
3634
+ "loss": 0.2,
3635
+ "step": 4220
3636
+ },
3637
+ {
3638
+ "epoch": 1.9821930646672916,
3639
+ "grad_norm": 1.8437615633010864,
3640
+ "learning_rate": 4.380564667291472e-05,
3641
+ "loss": 0.1787,
3642
+ "step": 4230
3643
+ },
3644
+ {
3645
+ "epoch": 1.986879100281162,
3646
+ "grad_norm": 1.3667303323745728,
3647
+ "learning_rate": 4.3791002811621366e-05,
3648
+ "loss": 0.1995,
3649
+ "step": 4240
3650
+ },
3651
+ {
3652
+ "epoch": 1.9915651358950328,
3653
+ "grad_norm": 1.3837028741836548,
3654
+ "learning_rate": 4.377635895032802e-05,
3655
+ "loss": 0.2021,
3656
+ "step": 4250
3657
+ },
3658
+ {
3659
+ "epoch": 1.9962511715089035,
3660
+ "grad_norm": 1.6766111850738525,
3661
+ "learning_rate": 4.3761715089034675e-05,
3662
+ "loss": 0.1918,
3663
+ "step": 4260
3664
  }
3665
  ],
3666
  "logging_steps": 10,
 
3675
  "should_evaluate": false,
3676
  "should_log": false,
3677
  "should_save": true,
3678
+ "should_training_stop": true
3679
  },
3680
  "attributes": {}
3681
  }