CocoRoF commited on
Commit
3d5367e
·
verified ·
1 Parent(s): baf8456

Training in progress, step 12000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d7c345217153b5ffddd752365249e312bba97798c34c7461fe730d174535d39
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3b588761b50be65d8796badee7ef8b1410198e0580687e0e9b4fb211a20c99b
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:528a95b443b1dbf7f94d3bb468ca4f53192fe7e23b946971046c87c0b9045e56
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a271571edbbdb0723201c1dcd4e66117d96525b325fc1fd6acf49af21cc25818
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b017ee0b8d5b0208434fd4a2a199fc3b93d7395c3472e6edb0f63512bbc20c3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f92435eb6dc7f41ea95fa3d980a0666d4a13b153aeb2cd8cebe90dc94dc1f10
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:193283a3db7c41b10c55462d4cd2b8ef19c4b39369364b96fdc0d6e4e4e07810
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db93a2f6abb2c48f8c464d140e6d8e94f07e6e65ef70a39fa77270a82587ab1
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.154639175257732,
5
  "eval_steps": 250,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8411,6 +8411,770 @@
8411
  "eval_spearman_manhattan": 0.7590724893258253,
8412
  "eval_steps_per_second": 37.21,
8413
  "step": 11000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8414
  }
8415
  ],
8416
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.623242736644799,
5
  "eval_steps": 250,
6
+ "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8411
  "eval_spearman_manhattan": 0.7590724893258253,
8412
  "eval_steps_per_second": 37.21,
8413
  "step": 11000
8414
+ },
8415
+ {
8416
+ "epoch": 5.159325210871603,
8417
+ "grad_norm": 2.7338879108428955,
8418
+ "learning_rate": 9.35508434864105e-06,
8419
+ "loss": 0.1157,
8420
+ "step": 11010
8421
+ },
8422
+ {
8423
+ "epoch": 5.164011246485473,
8424
+ "grad_norm": 2.1284737586975098,
8425
+ "learning_rate": 9.354498594189316e-06,
8426
+ "loss": 0.1073,
8427
+ "step": 11020
8428
+ },
8429
+ {
8430
+ "epoch": 5.168697282099344,
8431
+ "grad_norm": 1.506453037261963,
8432
+ "learning_rate": 9.353912839737583e-06,
8433
+ "loss": 0.1119,
8434
+ "step": 11030
8435
+ },
8436
+ {
8437
+ "epoch": 5.173383317713214,
8438
+ "grad_norm": 2.2100448608398438,
8439
+ "learning_rate": 9.35332708528585e-06,
8440
+ "loss": 0.1091,
8441
+ "step": 11040
8442
+ },
8443
+ {
8444
+ "epoch": 5.178069353327086,
8445
+ "grad_norm": 2.5225071907043457,
8446
+ "learning_rate": 9.352741330834115e-06,
8447
+ "loss": 0.1089,
8448
+ "step": 11050
8449
+ },
8450
+ {
8451
+ "epoch": 5.182755388940956,
8452
+ "grad_norm": 2.282289981842041,
8453
+ "learning_rate": 9.352155576382382e-06,
8454
+ "loss": 0.1165,
8455
+ "step": 11060
8456
+ },
8457
+ {
8458
+ "epoch": 5.187441424554827,
8459
+ "grad_norm": 2.467607259750366,
8460
+ "learning_rate": 9.351569821930647e-06,
8461
+ "loss": 0.1104,
8462
+ "step": 11070
8463
+ },
8464
+ {
8465
+ "epoch": 5.192127460168697,
8466
+ "grad_norm": 2.524582624435425,
8467
+ "learning_rate": 9.350984067478914e-06,
8468
+ "loss": 0.1316,
8469
+ "step": 11080
8470
+ },
8471
+ {
8472
+ "epoch": 5.196813495782568,
8473
+ "grad_norm": 2.6294236183166504,
8474
+ "learning_rate": 9.350398313027181e-06,
8475
+ "loss": 0.1068,
8476
+ "step": 11090
8477
+ },
8478
+ {
8479
+ "epoch": 5.201499531396439,
8480
+ "grad_norm": 2.033003091812134,
8481
+ "learning_rate": 9.349812558575446e-06,
8482
+ "loss": 0.1025,
8483
+ "step": 11100
8484
+ },
8485
+ {
8486
+ "epoch": 5.206185567010309,
8487
+ "grad_norm": 1.909003734588623,
8488
+ "learning_rate": 9.349226804123713e-06,
8489
+ "loss": 0.0948,
8490
+ "step": 11110
8491
+ },
8492
+ {
8493
+ "epoch": 5.21087160262418,
8494
+ "grad_norm": 2.284494400024414,
8495
+ "learning_rate": 9.348641049671978e-06,
8496
+ "loss": 0.1456,
8497
+ "step": 11120
8498
+ },
8499
+ {
8500
+ "epoch": 5.21555763823805,
8501
+ "grad_norm": 2.152364492416382,
8502
+ "learning_rate": 9.348055295220243e-06,
8503
+ "loss": 0.1067,
8504
+ "step": 11130
8505
+ },
8506
+ {
8507
+ "epoch": 5.220243673851921,
8508
+ "grad_norm": 1.9327460527420044,
8509
+ "learning_rate": 9.34746954076851e-06,
8510
+ "loss": 0.1041,
8511
+ "step": 11140
8512
+ },
8513
+ {
8514
+ "epoch": 5.224929709465792,
8515
+ "grad_norm": 1.4854366779327393,
8516
+ "learning_rate": 9.346883786316777e-06,
8517
+ "loss": 0.1014,
8518
+ "step": 11150
8519
+ },
8520
+ {
8521
+ "epoch": 5.229615745079663,
8522
+ "grad_norm": 2.438124418258667,
8523
+ "learning_rate": 9.346298031865043e-06,
8524
+ "loss": 0.1009,
8525
+ "step": 11160
8526
+ },
8527
+ {
8528
+ "epoch": 5.234301780693533,
8529
+ "grad_norm": 2.2534475326538086,
8530
+ "learning_rate": 9.34571227741331e-06,
8531
+ "loss": 0.1204,
8532
+ "step": 11170
8533
+ },
8534
+ {
8535
+ "epoch": 5.238987816307404,
8536
+ "grad_norm": 1.7294119596481323,
8537
+ "learning_rate": 9.345126522961575e-06,
8538
+ "loss": 0.1156,
8539
+ "step": 11180
8540
+ },
8541
+ {
8542
+ "epoch": 5.243673851921274,
8543
+ "grad_norm": 2.046323299407959,
8544
+ "learning_rate": 9.344540768509842e-06,
8545
+ "loss": 0.1052,
8546
+ "step": 11190
8547
+ },
8548
+ {
8549
+ "epoch": 5.248359887535146,
8550
+ "grad_norm": 1.4913642406463623,
8551
+ "learning_rate": 9.343955014058108e-06,
8552
+ "loss": 0.1108,
8553
+ "step": 11200
8554
+ },
8555
+ {
8556
+ "epoch": 5.253045923149016,
8557
+ "grad_norm": 2.705307722091675,
8558
+ "learning_rate": 9.343369259606374e-06,
8559
+ "loss": 0.1196,
8560
+ "step": 11210
8561
+ },
8562
+ {
8563
+ "epoch": 5.257731958762887,
8564
+ "grad_norm": 2.288198709487915,
8565
+ "learning_rate": 9.34278350515464e-06,
8566
+ "loss": 0.1111,
8567
+ "step": 11220
8568
+ },
8569
+ {
8570
+ "epoch": 5.262417994376757,
8571
+ "grad_norm": 1.7562745809555054,
8572
+ "learning_rate": 9.342197750702906e-06,
8573
+ "loss": 0.1091,
8574
+ "step": 11230
8575
+ },
8576
+ {
8577
+ "epoch": 5.2671040299906275,
8578
+ "grad_norm": 1.7820425033569336,
8579
+ "learning_rate": 9.341611996251173e-06,
8580
+ "loss": 0.1274,
8581
+ "step": 11240
8582
+ },
8583
+ {
8584
+ "epoch": 5.271790065604499,
8585
+ "grad_norm": 1.3509507179260254,
8586
+ "learning_rate": 9.341026241799438e-06,
8587
+ "loss": 0.1082,
8588
+ "step": 11250
8589
+ },
8590
+ {
8591
+ "epoch": 5.271790065604499,
8592
+ "eval_loss": 0.04377983510494232,
8593
+ "eval_pearson_cosine": 0.777409933262625,
8594
+ "eval_pearson_dot": 0.6039188474436514,
8595
+ "eval_pearson_euclidean": 0.7432758495109439,
8596
+ "eval_pearson_manhattan": 0.742989414571646,
8597
+ "eval_runtime": 40.4868,
8598
+ "eval_samples_per_second": 37.049,
8599
+ "eval_spearman_cosine": 0.7818314076527526,
8600
+ "eval_spearman_dot": 0.6129138744459114,
8601
+ "eval_spearman_euclidean": 0.7593037528355521,
8602
+ "eval_spearman_manhattan": 0.7591440996970567,
8603
+ "eval_steps_per_second": 37.049,
8604
+ "step": 11250
8605
+ },
8606
+ {
8607
+ "epoch": 5.276476101218369,
8608
+ "grad_norm": 1.9400215148925781,
8609
+ "learning_rate": 9.340440487347705e-06,
8610
+ "loss": 0.1033,
8611
+ "step": 11260
8612
+ },
8613
+ {
8614
+ "epoch": 5.28116213683224,
8615
+ "grad_norm": 2.5327563285827637,
8616
+ "learning_rate": 9.339854732895972e-06,
8617
+ "loss": 0.1006,
8618
+ "step": 11270
8619
+ },
8620
+ {
8621
+ "epoch": 5.28584817244611,
8622
+ "grad_norm": 2.5133285522460938,
8623
+ "learning_rate": 9.339268978444237e-06,
8624
+ "loss": 0.1092,
8625
+ "step": 11280
8626
+ },
8627
+ {
8628
+ "epoch": 5.290534208059981,
8629
+ "grad_norm": 2.192230224609375,
8630
+ "learning_rate": 9.338683223992502e-06,
8631
+ "loss": 0.0995,
8632
+ "step": 11290
8633
+ },
8634
+ {
8635
+ "epoch": 5.295220243673852,
8636
+ "grad_norm": 2.7413835525512695,
8637
+ "learning_rate": 9.33809746954077e-06,
8638
+ "loss": 0.1253,
8639
+ "step": 11300
8640
+ },
8641
+ {
8642
+ "epoch": 5.299906279287723,
8643
+ "grad_norm": 2.4897637367248535,
8644
+ "learning_rate": 9.337511715089036e-06,
8645
+ "loss": 0.1231,
8646
+ "step": 11310
8647
+ },
8648
+ {
8649
+ "epoch": 5.304592314901593,
8650
+ "grad_norm": 2.837437629699707,
8651
+ "learning_rate": 9.336925960637301e-06,
8652
+ "loss": 0.1061,
8653
+ "step": 11320
8654
+ },
8655
+ {
8656
+ "epoch": 5.309278350515464,
8657
+ "grad_norm": 2.932845115661621,
8658
+ "learning_rate": 9.336340206185568e-06,
8659
+ "loss": 0.1128,
8660
+ "step": 11330
8661
+ },
8662
+ {
8663
+ "epoch": 5.313964386129334,
8664
+ "grad_norm": 2.7689220905303955,
8665
+ "learning_rate": 9.335754451733833e-06,
8666
+ "loss": 0.106,
8667
+ "step": 11340
8668
+ },
8669
+ {
8670
+ "epoch": 5.318650421743206,
8671
+ "grad_norm": 2.0174484252929688,
8672
+ "learning_rate": 9.3351686972821e-06,
8673
+ "loss": 0.1068,
8674
+ "step": 11350
8675
+ },
8676
+ {
8677
+ "epoch": 5.323336457357076,
8678
+ "grad_norm": 1.9986820220947266,
8679
+ "learning_rate": 9.334582942830366e-06,
8680
+ "loss": 0.1138,
8681
+ "step": 11360
8682
+ },
8683
+ {
8684
+ "epoch": 5.3280224929709465,
8685
+ "grad_norm": 2.263935089111328,
8686
+ "learning_rate": 9.333997188378632e-06,
8687
+ "loss": 0.1215,
8688
+ "step": 11370
8689
+ },
8690
+ {
8691
+ "epoch": 5.332708528584817,
8692
+ "grad_norm": 2.540949821472168,
8693
+ "learning_rate": 9.3334114339269e-06,
8694
+ "loss": 0.1139,
8695
+ "step": 11380
8696
+ },
8697
+ {
8698
+ "epoch": 5.3373945641986875,
8699
+ "grad_norm": 2.293947219848633,
8700
+ "learning_rate": 9.332825679475165e-06,
8701
+ "loss": 0.1113,
8702
+ "step": 11390
8703
+ },
8704
+ {
8705
+ "epoch": 5.342080599812559,
8706
+ "grad_norm": 1.2586168050765991,
8707
+ "learning_rate": 9.332239925023432e-06,
8708
+ "loss": 0.1072,
8709
+ "step": 11400
8710
+ },
8711
+ {
8712
+ "epoch": 5.346766635426429,
8713
+ "grad_norm": 2.605865955352783,
8714
+ "learning_rate": 9.331654170571697e-06,
8715
+ "loss": 0.1032,
8716
+ "step": 11410
8717
+ },
8718
+ {
8719
+ "epoch": 5.3514526710403,
8720
+ "grad_norm": 2.003467321395874,
8721
+ "learning_rate": 9.331068416119962e-06,
8722
+ "loss": 0.1107,
8723
+ "step": 11420
8724
+ },
8725
+ {
8726
+ "epoch": 5.35613870665417,
8727
+ "grad_norm": 2.439173460006714,
8728
+ "learning_rate": 9.33048266166823e-06,
8729
+ "loss": 0.1055,
8730
+ "step": 11430
8731
+ },
8732
+ {
8733
+ "epoch": 5.360824742268041,
8734
+ "grad_norm": 2.154588460922241,
8735
+ "learning_rate": 9.329896907216496e-06,
8736
+ "loss": 0.1008,
8737
+ "step": 11440
8738
+ },
8739
+ {
8740
+ "epoch": 5.365510777881912,
8741
+ "grad_norm": 2.530766725540161,
8742
+ "learning_rate": 9.329311152764761e-06,
8743
+ "loss": 0.1062,
8744
+ "step": 11450
8745
+ },
8746
+ {
8747
+ "epoch": 5.370196813495783,
8748
+ "grad_norm": 1.994178056716919,
8749
+ "learning_rate": 9.328725398313028e-06,
8750
+ "loss": 0.1472,
8751
+ "step": 11460
8752
+ },
8753
+ {
8754
+ "epoch": 5.374882849109653,
8755
+ "grad_norm": 2.8370258808135986,
8756
+ "learning_rate": 9.328139643861293e-06,
8757
+ "loss": 0.1098,
8758
+ "step": 11470
8759
+ },
8760
+ {
8761
+ "epoch": 5.379568884723524,
8762
+ "grad_norm": 1.487667441368103,
8763
+ "learning_rate": 9.32755388940956e-06,
8764
+ "loss": 0.1124,
8765
+ "step": 11480
8766
+ },
8767
+ {
8768
+ "epoch": 5.384254920337394,
8769
+ "grad_norm": 2.180283308029175,
8770
+ "learning_rate": 9.326968134957827e-06,
8771
+ "loss": 0.1181,
8772
+ "step": 11490
8773
+ },
8774
+ {
8775
+ "epoch": 5.3889409559512655,
8776
+ "grad_norm": 2.2690398693084717,
8777
+ "learning_rate": 9.326382380506092e-06,
8778
+ "loss": 0.1138,
8779
+ "step": 11500
8780
+ },
8781
+ {
8782
+ "epoch": 5.3889409559512655,
8783
+ "eval_loss": 0.04145639017224312,
8784
+ "eval_pearson_cosine": 0.782895609059949,
8785
+ "eval_pearson_dot": 0.63470693002958,
8786
+ "eval_pearson_euclidean": 0.740956945446257,
8787
+ "eval_pearson_manhattan": 0.740521959356709,
8788
+ "eval_runtime": 40.2743,
8789
+ "eval_samples_per_second": 37.245,
8790
+ "eval_spearman_cosine": 0.7869693899558259,
8791
+ "eval_spearman_dot": 0.6463692784357133,
8792
+ "eval_spearman_euclidean": 0.7560888074878002,
8793
+ "eval_spearman_manhattan": 0.7559675920990712,
8794
+ "eval_steps_per_second": 37.245,
8795
+ "step": 11500
8796
+ },
8797
+ {
8798
+ "epoch": 5.393626991565136,
8799
+ "grad_norm": 1.9818650484085083,
8800
+ "learning_rate": 9.32579662605436e-06,
8801
+ "loss": 0.1057,
8802
+ "step": 11510
8803
+ },
8804
+ {
8805
+ "epoch": 5.3983130271790065,
8806
+ "grad_norm": 2.286259651184082,
8807
+ "learning_rate": 9.325210871602624e-06,
8808
+ "loss": 0.1091,
8809
+ "step": 11520
8810
+ },
8811
+ {
8812
+ "epoch": 5.402999062792877,
8813
+ "grad_norm": 1.2215278148651123,
8814
+ "learning_rate": 9.324625117150891e-06,
8815
+ "loss": 0.0859,
8816
+ "step": 11530
8817
+ },
8818
+ {
8819
+ "epoch": 5.4076850984067475,
8820
+ "grad_norm": 2.2619011402130127,
8821
+ "learning_rate": 9.324039362699158e-06,
8822
+ "loss": 0.1281,
8823
+ "step": 11540
8824
+ },
8825
+ {
8826
+ "epoch": 5.412371134020619,
8827
+ "grad_norm": 1.313723087310791,
8828
+ "learning_rate": 9.323453608247423e-06,
8829
+ "loss": 0.0844,
8830
+ "step": 11550
8831
+ },
8832
+ {
8833
+ "epoch": 5.417057169634489,
8834
+ "grad_norm": 1.5897408723831177,
8835
+ "learning_rate": 9.32286785379569e-06,
8836
+ "loss": 0.1006,
8837
+ "step": 11560
8838
+ },
8839
+ {
8840
+ "epoch": 5.42174320524836,
8841
+ "grad_norm": 2.7910852432250977,
8842
+ "learning_rate": 9.322282099343956e-06,
8843
+ "loss": 0.1199,
8844
+ "step": 11570
8845
+ },
8846
+ {
8847
+ "epoch": 5.42642924086223,
8848
+ "grad_norm": 1.7807790040969849,
8849
+ "learning_rate": 9.32169634489222e-06,
8850
+ "loss": 0.1063,
8851
+ "step": 11580
8852
+ },
8853
+ {
8854
+ "epoch": 5.431115276476101,
8855
+ "grad_norm": 2.412628650665283,
8856
+ "learning_rate": 9.32111059044049e-06,
8857
+ "loss": 0.103,
8858
+ "step": 11590
8859
+ },
8860
+ {
8861
+ "epoch": 5.435801312089972,
8862
+ "grad_norm": 1.029317855834961,
8863
+ "learning_rate": 9.320524835988755e-06,
8864
+ "loss": 0.1025,
8865
+ "step": 11600
8866
+ },
8867
+ {
8868
+ "epoch": 5.440487347703843,
8869
+ "grad_norm": 2.206700563430786,
8870
+ "learning_rate": 9.31993908153702e-06,
8871
+ "loss": 0.114,
8872
+ "step": 11610
8873
+ },
8874
+ {
8875
+ "epoch": 5.445173383317713,
8876
+ "grad_norm": 1.643839716911316,
8877
+ "learning_rate": 9.319353327085287e-06,
8878
+ "loss": 0.1155,
8879
+ "step": 11620
8880
+ },
8881
+ {
8882
+ "epoch": 5.449859418931584,
8883
+ "grad_norm": 2.211933135986328,
8884
+ "learning_rate": 9.318767572633552e-06,
8885
+ "loss": 0.1096,
8886
+ "step": 11630
8887
+ },
8888
+ {
8889
+ "epoch": 5.454545454545454,
8890
+ "grad_norm": 2.2456345558166504,
8891
+ "learning_rate": 9.318181818181819e-06,
8892
+ "loss": 0.1153,
8893
+ "step": 11640
8894
+ },
8895
+ {
8896
+ "epoch": 5.4592314901593255,
8897
+ "grad_norm": 2.144434928894043,
8898
+ "learning_rate": 9.317596063730086e-06,
8899
+ "loss": 0.1245,
8900
+ "step": 11650
8901
+ },
8902
+ {
8903
+ "epoch": 5.463917525773196,
8904
+ "grad_norm": 1.8856313228607178,
8905
+ "learning_rate": 9.317010309278351e-06,
8906
+ "loss": 0.1139,
8907
+ "step": 11660
8908
+ },
8909
+ {
8910
+ "epoch": 5.4686035613870665,
8911
+ "grad_norm": 2.399268627166748,
8912
+ "learning_rate": 9.316424554826618e-06,
8913
+ "loss": 0.1189,
8914
+ "step": 11670
8915
+ },
8916
+ {
8917
+ "epoch": 5.473289597000937,
8918
+ "grad_norm": 2.7165584564208984,
8919
+ "learning_rate": 9.315838800374883e-06,
8920
+ "loss": 0.1016,
8921
+ "step": 11680
8922
+ },
8923
+ {
8924
+ "epoch": 5.4779756326148075,
8925
+ "grad_norm": 2.1864237785339355,
8926
+ "learning_rate": 9.31525304592315e-06,
8927
+ "loss": 0.1164,
8928
+ "step": 11690
8929
+ },
8930
+ {
8931
+ "epoch": 5.482661668228679,
8932
+ "grad_norm": 1.5643881559371948,
8933
+ "learning_rate": 9.314667291471417e-06,
8934
+ "loss": 0.1069,
8935
+ "step": 11700
8936
+ },
8937
+ {
8938
+ "epoch": 5.487347703842549,
8939
+ "grad_norm": 2.7151424884796143,
8940
+ "learning_rate": 9.314081537019682e-06,
8941
+ "loss": 0.1268,
8942
+ "step": 11710
8943
+ },
8944
+ {
8945
+ "epoch": 5.49203373945642,
8946
+ "grad_norm": 2.5677719116210938,
8947
+ "learning_rate": 9.31349578256795e-06,
8948
+ "loss": 0.0967,
8949
+ "step": 11720
8950
+ },
8951
+ {
8952
+ "epoch": 5.49671977507029,
8953
+ "grad_norm": 1.8421558141708374,
8954
+ "learning_rate": 9.312910028116214e-06,
8955
+ "loss": 0.111,
8956
+ "step": 11730
8957
+ },
8958
+ {
8959
+ "epoch": 5.501405810684162,
8960
+ "grad_norm": 2.0751326084136963,
8961
+ "learning_rate": 9.31232427366448e-06,
8962
+ "loss": 0.12,
8963
+ "step": 11740
8964
+ },
8965
+ {
8966
+ "epoch": 5.506091846298032,
8967
+ "grad_norm": 2.1153831481933594,
8968
+ "learning_rate": 9.311738519212747e-06,
8969
+ "loss": 0.1015,
8970
+ "step": 11750
8971
+ },
8972
+ {
8973
+ "epoch": 5.506091846298032,
8974
+ "eval_loss": 0.041955165565013885,
8975
+ "eval_pearson_cosine": 0.777849777417785,
8976
+ "eval_pearson_dot": 0.6249138636860323,
8977
+ "eval_pearson_euclidean": 0.7435429429906506,
8978
+ "eval_pearson_manhattan": 0.7436891262385359,
8979
+ "eval_runtime": 40.2707,
8980
+ "eval_samples_per_second": 37.248,
8981
+ "eval_spearman_cosine": 0.7810709480369951,
8982
+ "eval_spearman_dot": 0.6369677452237958,
8983
+ "eval_spearman_euclidean": 0.7589293041705764,
8984
+ "eval_spearman_manhattan": 0.7592155214560544,
8985
+ "eval_steps_per_second": 37.248,
8986
+ "step": 11750
8987
+ },
8988
+ {
8989
+ "epoch": 5.510777881911903,
8990
+ "grad_norm": 2.5980823040008545,
8991
+ "learning_rate": 9.311152764761013e-06,
8992
+ "loss": 0.097,
8993
+ "step": 11760
8994
+ },
8995
+ {
8996
+ "epoch": 5.515463917525773,
8997
+ "grad_norm": 2.678083658218384,
8998
+ "learning_rate": 9.310567010309279e-06,
8999
+ "loss": 0.1082,
9000
+ "step": 11770
9001
+ },
9002
+ {
9003
+ "epoch": 5.520149953139644,
9004
+ "grad_norm": 2.221808910369873,
9005
+ "learning_rate": 9.309981255857546e-06,
9006
+ "loss": 0.0951,
9007
+ "step": 11780
9008
+ },
9009
+ {
9010
+ "epoch": 5.524835988753514,
9011
+ "grad_norm": 3.2802789211273193,
9012
+ "learning_rate": 9.30939550140581e-06,
9013
+ "loss": 0.1153,
9014
+ "step": 11790
9015
+ },
9016
+ {
9017
+ "epoch": 5.5295220243673855,
9018
+ "grad_norm": 2.6341254711151123,
9019
+ "learning_rate": 9.308809746954078e-06,
9020
+ "loss": 0.1173,
9021
+ "step": 11800
9022
+ },
9023
+ {
9024
+ "epoch": 5.534208059981256,
9025
+ "grad_norm": 1.4996163845062256,
9026
+ "learning_rate": 9.308223992502345e-06,
9027
+ "loss": 0.1066,
9028
+ "step": 11810
9029
+ },
9030
+ {
9031
+ "epoch": 5.5388940955951265,
9032
+ "grad_norm": 1.6984524726867676,
9033
+ "learning_rate": 9.30763823805061e-06,
9034
+ "loss": 0.0985,
9035
+ "step": 11820
9036
+ },
9037
+ {
9038
+ "epoch": 5.543580131208997,
9039
+ "grad_norm": 2.20283842086792,
9040
+ "learning_rate": 9.307052483598877e-06,
9041
+ "loss": 0.1011,
9042
+ "step": 11830
9043
+ },
9044
+ {
9045
+ "epoch": 5.548266166822868,
9046
+ "grad_norm": 1.3354145288467407,
9047
+ "learning_rate": 9.306466729147142e-06,
9048
+ "loss": 0.1067,
9049
+ "step": 11840
9050
+ },
9051
+ {
9052
+ "epoch": 5.552952202436739,
9053
+ "grad_norm": 2.6207938194274902,
9054
+ "learning_rate": 9.305880974695409e-06,
9055
+ "loss": 0.1159,
9056
+ "step": 11850
9057
+ },
9058
+ {
9059
+ "epoch": 5.557638238050609,
9060
+ "grad_norm": 2.9331352710723877,
9061
+ "learning_rate": 9.305295220243674e-06,
9062
+ "loss": 0.1152,
9063
+ "step": 11860
9064
+ },
9065
+ {
9066
+ "epoch": 5.56232427366448,
9067
+ "grad_norm": 1.8482978343963623,
9068
+ "learning_rate": 9.304709465791941e-06,
9069
+ "loss": 0.1121,
9070
+ "step": 11870
9071
+ },
9072
+ {
9073
+ "epoch": 5.56701030927835,
9074
+ "grad_norm": 1.7065162658691406,
9075
+ "learning_rate": 9.304123711340208e-06,
9076
+ "loss": 0.1256,
9077
+ "step": 11880
9078
+ },
9079
+ {
9080
+ "epoch": 5.571696344892221,
9081
+ "grad_norm": 2.6578869819641113,
9082
+ "learning_rate": 9.303537956888473e-06,
9083
+ "loss": 0.1033,
9084
+ "step": 11890
9085
+ },
9086
+ {
9087
+ "epoch": 5.576382380506092,
9088
+ "grad_norm": 2.356271266937256,
9089
+ "learning_rate": 9.302952202436738e-06,
9090
+ "loss": 0.1106,
9091
+ "step": 11900
9092
+ },
9093
+ {
9094
+ "epoch": 5.581068416119963,
9095
+ "grad_norm": 2.4331321716308594,
9096
+ "learning_rate": 9.302366447985005e-06,
9097
+ "loss": 0.1342,
9098
+ "step": 11910
9099
+ },
9100
+ {
9101
+ "epoch": 5.585754451733833,
9102
+ "grad_norm": 2.5583012104034424,
9103
+ "learning_rate": 9.30178069353327e-06,
9104
+ "loss": 0.1232,
9105
+ "step": 11920
9106
+ },
9107
+ {
9108
+ "epoch": 5.590440487347704,
9109
+ "grad_norm": 1.2320704460144043,
9110
+ "learning_rate": 9.301194939081537e-06,
9111
+ "loss": 0.1235,
9112
+ "step": 11930
9113
+ },
9114
+ {
9115
+ "epoch": 5.595126522961575,
9116
+ "grad_norm": 2.749943256378174,
9117
+ "learning_rate": 9.300609184629804e-06,
9118
+ "loss": 0.1204,
9119
+ "step": 11940
9120
+ },
9121
+ {
9122
+ "epoch": 5.5998125585754455,
9123
+ "grad_norm": 1.935255527496338,
9124
+ "learning_rate": 9.30002343017807e-06,
9125
+ "loss": 0.1194,
9126
+ "step": 11950
9127
+ },
9128
+ {
9129
+ "epoch": 5.604498594189316,
9130
+ "grad_norm": 2.05389666557312,
9131
+ "learning_rate": 9.299437675726337e-06,
9132
+ "loss": 0.1185,
9133
+ "step": 11960
9134
+ },
9135
+ {
9136
+ "epoch": 5.609184629803186,
9137
+ "grad_norm": 2.0948450565338135,
9138
+ "learning_rate": 9.298851921274602e-06,
9139
+ "loss": 0.117,
9140
+ "step": 11970
9141
+ },
9142
+ {
9143
+ "epoch": 5.613870665417057,
9144
+ "grad_norm": 2.4468414783477783,
9145
+ "learning_rate": 9.298266166822869e-06,
9146
+ "loss": 0.1042,
9147
+ "step": 11980
9148
+ },
9149
+ {
9150
+ "epoch": 5.618556701030927,
9151
+ "grad_norm": 1.7686811685562134,
9152
+ "learning_rate": 9.297680412371136e-06,
9153
+ "loss": 0.1323,
9154
+ "step": 11990
9155
+ },
9156
+ {
9157
+ "epoch": 5.623242736644799,
9158
+ "grad_norm": 2.70004940032959,
9159
+ "learning_rate": 9.2970946579194e-06,
9160
+ "loss": 0.1153,
9161
+ "step": 12000
9162
+ },
9163
+ {
9164
+ "epoch": 5.623242736644799,
9165
+ "eval_loss": 0.044810693711042404,
9166
+ "eval_pearson_cosine": 0.7729592983209912,
9167
+ "eval_pearson_dot": 0.6141014999130547,
9168
+ "eval_pearson_euclidean": 0.7453445610743417,
9169
+ "eval_pearson_manhattan": 0.7450714346494465,
9170
+ "eval_runtime": 39.9226,
9171
+ "eval_samples_per_second": 37.573,
9172
+ "eval_spearman_cosine": 0.7783555866745624,
9173
+ "eval_spearman_dot": 0.6213713696364336,
9174
+ "eval_spearman_euclidean": 0.7596029469985899,
9175
+ "eval_spearman_manhattan": 0.7598359774134882,
9176
+ "eval_steps_per_second": 37.573,
9177
+ "step": 12000
9178
  }
9179
  ],
9180
  "logging_steps": 10,