CocoRoF commited on
Commit
d9ab832
·
verified ·
1 Parent(s): 79d42a8

Training in progress, step 13000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b198e43aac6859985d7c9cb18c2860594033b256136cc0b0e915d584614c895c
3
  size 737582948
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cf83fcc35cbeccda29e41e5a239a30c9878fff2ed12f6202687d1cc78a53d33
3
  size 737582948
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64ec40cbe8543eb2855a915aee21dd1f77e088ec666a079a32133adde9da7af4
3
  size 1475256250
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a822201578594008224879f74b2b4f9407c3b6d910a5d9f8150dfb57e55f9839
3
  size 1475256250
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae238f666763a7993ec652c03f60677cb3de9003ea7ee1bc1dac41c2065a9c25
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab9b88403a4870612524aeb22edbec848f22712de7dc2dc2c2e5d5a61fd5fa2
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5ab92b6e335feba5c54de89db3c87b707994c34e8ae94b68ceaf1c0e44c4698
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac04a7bb6b195f0524aef30982df54700583a952ce3f364f2ebb726a2283cb83
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2632a9a94c203af7029ed1ba1b5fb0c1a8126e97bbd443fb5dba117f62e54913
3
- size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200a8689b4395a1bbe5786c6e017cc00f974df43d64b67ff1fd65d86cee2eac5
3
+ size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:add85b850cbfe5b8cf5c4f2e6f71a61a7d77d12000e589671d2903fa92c8b4c3
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b66504d38448386cda55d409ea77b9c193882e1d9e50fc8e75e03505e47e07
3
+ size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7645a2766e30c501c310ca2b1baf3bd1106ec431388b54ca1a7f2f6cc5531dbd
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24bd01e4934833714e50683db598cf170e089ac7345314487bab1e91a26e5fd6
3
+ size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cad911d83e601a147b8872de9ba34bade0b9837051abcd270f992115bb282348
3
- size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10568452053cd042809865062f1c3b6117b4f9cb2a36138830ab329649de4c5d
3
+ size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36c25b50d403f4b10a416c3c4294b21a8f3a8f0d8b348d5a613cd951ffd7b66c
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:028fbfed0ea057f7cba505b85aabb9f8f3300e1ca5de4c9738be9eb5fb7f4bfb
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4aa3be7aba10932fe3b181dbc7c647b64be83ff98de84fe2b9cd6b26e86aafe
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5340b47a03039876c313687a97ae0b0c4c4f3b0059c1b4266e40d507f87b999
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4297fd04c7ed2579ce63e17f4b5a76a418be4b988ee50b810797fa07318b7ac1
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da5a3d0f3b47665abb770a3493b11c8bcfaffceef746f758a329ed422fd0cb0c
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.106922942290251,
5
  "eval_steps": 500,
6
- "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8407,6 +8407,706 @@
8407
  "learning_rate": 4.9829524387246726e-05,
8408
  "loss": 0.6721,
8409
  "step": 12000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8410
  }
8411
  ],
8412
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.616079810339067,
5
  "eval_steps": 500,
6
+ "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8407
  "learning_rate": 4.9829524387246726e-05,
8408
  "loss": 0.6721,
8409
  "step": 12000
8410
+ },
8411
+ {
8412
+ "epoch": 6.11201451097074,
8413
+ "grad_norm": 0.18195395171642303,
8414
+ "learning_rate": 4.982938232423609e-05,
8415
+ "loss": 0.6806,
8416
+ "step": 12010
8417
+ },
8418
+ {
8419
+ "epoch": 6.1171060796512275,
8420
+ "grad_norm": 0.15061675012111664,
8421
+ "learning_rate": 4.9829240261225466e-05,
8422
+ "loss": 0.6732,
8423
+ "step": 12020
8424
+ },
8425
+ {
8426
+ "epoch": 6.122197648331715,
8427
+ "grad_norm": 0.14526985585689545,
8428
+ "learning_rate": 4.982909819821484e-05,
8429
+ "loss": 0.6788,
8430
+ "step": 12030
8431
+ },
8432
+ {
8433
+ "epoch": 6.127289217012204,
8434
+ "grad_norm": 0.1469496637582779,
8435
+ "learning_rate": 4.982895613520421e-05,
8436
+ "loss": 0.6779,
8437
+ "step": 12040
8438
+ },
8439
+ {
8440
+ "epoch": 6.132380785692692,
8441
+ "grad_norm": 0.18443866074085236,
8442
+ "learning_rate": 4.9828814072193585e-05,
8443
+ "loss": 0.6767,
8444
+ "step": 12050
8445
+ },
8446
+ {
8447
+ "epoch": 6.13747235437318,
8448
+ "grad_norm": 0.11885727196931839,
8449
+ "learning_rate": 4.982867200918295e-05,
8450
+ "loss": 0.6764,
8451
+ "step": 12060
8452
+ },
8453
+ {
8454
+ "epoch": 6.142563923053668,
8455
+ "grad_norm": 0.1266055554151535,
8456
+ "learning_rate": 4.9828529946172325e-05,
8457
+ "loss": 0.6837,
8458
+ "step": 12070
8459
+ },
8460
+ {
8461
+ "epoch": 6.147655491734157,
8462
+ "grad_norm": 0.11415141075849533,
8463
+ "learning_rate": 4.98283878831617e-05,
8464
+ "loss": 0.6837,
8465
+ "step": 12080
8466
+ },
8467
+ {
8468
+ "epoch": 6.152747060414645,
8469
+ "grad_norm": 0.09705322235822678,
8470
+ "learning_rate": 4.982824582015107e-05,
8471
+ "loss": 0.6815,
8472
+ "step": 12090
8473
+ },
8474
+ {
8475
+ "epoch": 6.157838629095132,
8476
+ "grad_norm": 0.12555427849292755,
8477
+ "learning_rate": 4.9828103757140444e-05,
8478
+ "loss": 0.6804,
8479
+ "step": 12100
8480
+ },
8481
+ {
8482
+ "epoch": 6.162930197775621,
8483
+ "grad_norm": 0.11063813418149948,
8484
+ "learning_rate": 4.982796169412982e-05,
8485
+ "loss": 0.6815,
8486
+ "step": 12110
8487
+ },
8488
+ {
8489
+ "epoch": 6.168021766456109,
8490
+ "grad_norm": 0.1428930014371872,
8491
+ "learning_rate": 4.982781963111919e-05,
8492
+ "loss": 0.6781,
8493
+ "step": 12120
8494
+ },
8495
+ {
8496
+ "epoch": 6.173113335136597,
8497
+ "grad_norm": 0.13896307349205017,
8498
+ "learning_rate": 4.982767756810856e-05,
8499
+ "loss": 0.6763,
8500
+ "step": 12130
8501
+ },
8502
+ {
8503
+ "epoch": 6.178204903817085,
8504
+ "grad_norm": 0.12032928317785263,
8505
+ "learning_rate": 4.9827535505097936e-05,
8506
+ "loss": 0.6803,
8507
+ "step": 12140
8508
+ },
8509
+ {
8510
+ "epoch": 6.183296472497574,
8511
+ "grad_norm": 0.11562150716781616,
8512
+ "learning_rate": 4.98273934420873e-05,
8513
+ "loss": 0.6766,
8514
+ "step": 12150
8515
+ },
8516
+ {
8517
+ "epoch": 6.188388041178062,
8518
+ "grad_norm": 0.1040254682302475,
8519
+ "learning_rate": 4.9827251379076676e-05,
8520
+ "loss": 0.6823,
8521
+ "step": 12160
8522
+ },
8523
+ {
8524
+ "epoch": 6.19347960985855,
8525
+ "grad_norm": 0.1031600683927536,
8526
+ "learning_rate": 4.982710931606605e-05,
8527
+ "loss": 0.6757,
8528
+ "step": 12170
8529
+ },
8530
+ {
8531
+ "epoch": 6.198571178539038,
8532
+ "grad_norm": 0.11150684952735901,
8533
+ "learning_rate": 4.982696725305542e-05,
8534
+ "loss": 0.6781,
8535
+ "step": 12180
8536
+ },
8537
+ {
8538
+ "epoch": 6.203662747219526,
8539
+ "grad_norm": 0.15506963431835175,
8540
+ "learning_rate": 4.9826825190044795e-05,
8541
+ "loss": 0.672,
8542
+ "step": 12190
8543
+ },
8544
+ {
8545
+ "epoch": 6.2087543159000145,
8546
+ "grad_norm": 0.13985055685043335,
8547
+ "learning_rate": 4.982668312703417e-05,
8548
+ "loss": 0.6793,
8549
+ "step": 12200
8550
+ },
8551
+ {
8552
+ "epoch": 6.213845884580502,
8553
+ "grad_norm": 0.11352770030498505,
8554
+ "learning_rate": 4.982654106402354e-05,
8555
+ "loss": 0.6824,
8556
+ "step": 12210
8557
+ },
8558
+ {
8559
+ "epoch": 6.218937453260991,
8560
+ "grad_norm": 0.11052574217319489,
8561
+ "learning_rate": 4.9826399001012914e-05,
8562
+ "loss": 0.6791,
8563
+ "step": 12220
8564
+ },
8565
+ {
8566
+ "epoch": 6.224029021941479,
8567
+ "grad_norm": 0.12992137670516968,
8568
+ "learning_rate": 4.982625693800229e-05,
8569
+ "loss": 0.6793,
8570
+ "step": 12230
8571
+ },
8572
+ {
8573
+ "epoch": 6.229120590621967,
8574
+ "grad_norm": 0.1408848613500595,
8575
+ "learning_rate": 4.982611487499166e-05,
8576
+ "loss": 0.6791,
8577
+ "step": 12240
8578
+ },
8579
+ {
8580
+ "epoch": 6.234212159302455,
8581
+ "grad_norm": 0.18795296549797058,
8582
+ "learning_rate": 4.9825972811981034e-05,
8583
+ "loss": 0.6802,
8584
+ "step": 12250
8585
+ },
8586
+ {
8587
+ "epoch": 6.239303727982943,
8588
+ "grad_norm": 0.12889884412288666,
8589
+ "learning_rate": 4.982583074897041e-05,
8590
+ "loss": 0.6878,
8591
+ "step": 12260
8592
+ },
8593
+ {
8594
+ "epoch": 6.2443952966634315,
8595
+ "grad_norm": 0.1431640386581421,
8596
+ "learning_rate": 4.982568868595977e-05,
8597
+ "loss": 0.6775,
8598
+ "step": 12270
8599
+ },
8600
+ {
8601
+ "epoch": 6.249486865343919,
8602
+ "grad_norm": 0.11410534381866455,
8603
+ "learning_rate": 4.9825546622949146e-05,
8604
+ "loss": 0.6798,
8605
+ "step": 12280
8606
+ },
8607
+ {
8608
+ "epoch": 6.254578434024408,
8609
+ "grad_norm": 0.14347901940345764,
8610
+ "learning_rate": 4.982540455993851e-05,
8611
+ "loss": 0.6764,
8612
+ "step": 12290
8613
+ },
8614
+ {
8615
+ "epoch": 6.259670002704896,
8616
+ "grad_norm": 0.14148719608783722,
8617
+ "learning_rate": 4.9825262496927886e-05,
8618
+ "loss": 0.6778,
8619
+ "step": 12300
8620
+ },
8621
+ {
8622
+ "epoch": 6.264761571385384,
8623
+ "grad_norm": 0.13571056723594666,
8624
+ "learning_rate": 4.982512043391726e-05,
8625
+ "loss": 0.6822,
8626
+ "step": 12310
8627
+ },
8628
+ {
8629
+ "epoch": 6.269853140065872,
8630
+ "grad_norm": 0.13416819274425507,
8631
+ "learning_rate": 4.982497837090663e-05,
8632
+ "loss": 0.6764,
8633
+ "step": 12320
8634
+ },
8635
+ {
8636
+ "epoch": 6.274944708746361,
8637
+ "grad_norm": 0.12467856705188751,
8638
+ "learning_rate": 4.9824836307896005e-05,
8639
+ "loss": 0.681,
8640
+ "step": 12330
8641
+ },
8642
+ {
8643
+ "epoch": 6.280036277426849,
8644
+ "grad_norm": 0.11934306472539902,
8645
+ "learning_rate": 4.982469424488538e-05,
8646
+ "loss": 0.6808,
8647
+ "step": 12340
8648
+ },
8649
+ {
8650
+ "epoch": 6.285127846107336,
8651
+ "grad_norm": 0.12335172295570374,
8652
+ "learning_rate": 4.982455218187475e-05,
8653
+ "loss": 0.6795,
8654
+ "step": 12350
8655
+ },
8656
+ {
8657
+ "epoch": 6.290219414787825,
8658
+ "grad_norm": 0.12900583446025848,
8659
+ "learning_rate": 4.9824410118864124e-05,
8660
+ "loss": 0.6736,
8661
+ "step": 12360
8662
+ },
8663
+ {
8664
+ "epoch": 6.295310983468313,
8665
+ "grad_norm": 0.11381091177463531,
8666
+ "learning_rate": 4.98242680558535e-05,
8667
+ "loss": 0.6838,
8668
+ "step": 12370
8669
+ },
8670
+ {
8671
+ "epoch": 6.300402552148801,
8672
+ "grad_norm": 0.11505099385976791,
8673
+ "learning_rate": 4.982412599284287e-05,
8674
+ "loss": 0.6772,
8675
+ "step": 12380
8676
+ },
8677
+ {
8678
+ "epoch": 6.305494120829289,
8679
+ "grad_norm": 0.11616339534521103,
8680
+ "learning_rate": 4.9823983929832244e-05,
8681
+ "loss": 0.6788,
8682
+ "step": 12390
8683
+ },
8684
+ {
8685
+ "epoch": 6.310585689509777,
8686
+ "grad_norm": 0.1088867336511612,
8687
+ "learning_rate": 4.982384186682162e-05,
8688
+ "loss": 0.6777,
8689
+ "step": 12400
8690
+ },
8691
+ {
8692
+ "epoch": 6.315677258190266,
8693
+ "grad_norm": 0.11975440382957458,
8694
+ "learning_rate": 4.982369980381098e-05,
8695
+ "loss": 0.6854,
8696
+ "step": 12410
8697
+ },
8698
+ {
8699
+ "epoch": 6.320768826870753,
8700
+ "grad_norm": 0.11531190574169159,
8701
+ "learning_rate": 4.9823557740800356e-05,
8702
+ "loss": 0.6786,
8703
+ "step": 12420
8704
+ },
8705
+ {
8706
+ "epoch": 6.325860395551242,
8707
+ "grad_norm": 0.117821604013443,
8708
+ "learning_rate": 4.982341567778973e-05,
8709
+ "loss": 0.6814,
8710
+ "step": 12430
8711
+ },
8712
+ {
8713
+ "epoch": 6.33095196423173,
8714
+ "grad_norm": 0.13663433492183685,
8715
+ "learning_rate": 4.98232736147791e-05,
8716
+ "loss": 0.6754,
8717
+ "step": 12440
8718
+ },
8719
+ {
8720
+ "epoch": 6.3360435329122184,
8721
+ "grad_norm": 0.14458602666854858,
8722
+ "learning_rate": 4.9823131551768475e-05,
8723
+ "loss": 0.6829,
8724
+ "step": 12450
8725
+ },
8726
+ {
8727
+ "epoch": 6.341135101592706,
8728
+ "grad_norm": 0.12459100037813187,
8729
+ "learning_rate": 4.982298948875785e-05,
8730
+ "loss": 0.6803,
8731
+ "step": 12460
8732
+ },
8733
+ {
8734
+ "epoch": 6.346226670273195,
8735
+ "grad_norm": 0.11213183403015137,
8736
+ "learning_rate": 4.9822847425747215e-05,
8737
+ "loss": 0.6776,
8738
+ "step": 12470
8739
+ },
8740
+ {
8741
+ "epoch": 6.351318238953683,
8742
+ "grad_norm": 0.12166488170623779,
8743
+ "learning_rate": 4.982270536273659e-05,
8744
+ "loss": 0.6817,
8745
+ "step": 12480
8746
+ },
8747
+ {
8748
+ "epoch": 6.35640980763417,
8749
+ "grad_norm": 0.11691765487194061,
8750
+ "learning_rate": 4.982256329972596e-05,
8751
+ "loss": 0.6829,
8752
+ "step": 12490
8753
+ },
8754
+ {
8755
+ "epoch": 6.361501376314659,
8756
+ "grad_norm": 0.1120506301522255,
8757
+ "learning_rate": 4.9822421236715334e-05,
8758
+ "loss": 0.6791,
8759
+ "step": 12500
8760
+ },
8761
+ {
8762
+ "epoch": 6.366592944995147,
8763
+ "grad_norm": 0.12437008321285248,
8764
+ "learning_rate": 4.982227917370471e-05,
8765
+ "loss": 0.6751,
8766
+ "step": 12510
8767
+ },
8768
+ {
8769
+ "epoch": 6.3716845136756355,
8770
+ "grad_norm": 0.15133772790431976,
8771
+ "learning_rate": 4.982213711069408e-05,
8772
+ "loss": 0.6785,
8773
+ "step": 12520
8774
+ },
8775
+ {
8776
+ "epoch": 6.376776082356123,
8777
+ "grad_norm": 0.14470815658569336,
8778
+ "learning_rate": 4.9821995047683453e-05,
8779
+ "loss": 0.6805,
8780
+ "step": 12530
8781
+ },
8782
+ {
8783
+ "epoch": 6.381867651036612,
8784
+ "grad_norm": 0.1352653056383133,
8785
+ "learning_rate": 4.9821852984672827e-05,
8786
+ "loss": 0.6799,
8787
+ "step": 12540
8788
+ },
8789
+ {
8790
+ "epoch": 6.3869592197171,
8791
+ "grad_norm": 0.12650400400161743,
8792
+ "learning_rate": 4.982171092166219e-05,
8793
+ "loss": 0.6788,
8794
+ "step": 12550
8795
+ },
8796
+ {
8797
+ "epoch": 6.392050788397588,
8798
+ "grad_norm": 0.12057118117809296,
8799
+ "learning_rate": 4.9821568858651566e-05,
8800
+ "loss": 0.6811,
8801
+ "step": 12560
8802
+ },
8803
+ {
8804
+ "epoch": 6.397142357078076,
8805
+ "grad_norm": 0.16348209977149963,
8806
+ "learning_rate": 4.982142679564094e-05,
8807
+ "loss": 0.6799,
8808
+ "step": 12570
8809
+ },
8810
+ {
8811
+ "epoch": 6.402233925758564,
8812
+ "grad_norm": 0.18208801746368408,
8813
+ "learning_rate": 4.982128473263031e-05,
8814
+ "loss": 0.6738,
8815
+ "step": 12580
8816
+ },
8817
+ {
8818
+ "epoch": 6.4073254944390525,
8819
+ "grad_norm": 0.1399811953306198,
8820
+ "learning_rate": 4.9821142669619685e-05,
8821
+ "loss": 0.6762,
8822
+ "step": 12590
8823
+ },
8824
+ {
8825
+ "epoch": 6.41241706311954,
8826
+ "grad_norm": 0.11085145175457001,
8827
+ "learning_rate": 4.982100060660906e-05,
8828
+ "loss": 0.6914,
8829
+ "step": 12600
8830
+ },
8831
+ {
8832
+ "epoch": 6.417508631800029,
8833
+ "grad_norm": 0.10344066470861435,
8834
+ "learning_rate": 4.982085854359843e-05,
8835
+ "loss": 0.6809,
8836
+ "step": 12610
8837
+ },
8838
+ {
8839
+ "epoch": 6.422600200480517,
8840
+ "grad_norm": 0.13643105328083038,
8841
+ "learning_rate": 4.9820716480587805e-05,
8842
+ "loss": 0.6752,
8843
+ "step": 12620
8844
+ },
8845
+ {
8846
+ "epoch": 6.4276917691610045,
8847
+ "grad_norm": 0.12111321091651917,
8848
+ "learning_rate": 4.982057441757718e-05,
8849
+ "loss": 0.6786,
8850
+ "step": 12630
8851
+ },
8852
+ {
8853
+ "epoch": 6.432783337841493,
8854
+ "grad_norm": 0.1612890660762787,
8855
+ "learning_rate": 4.982043235456655e-05,
8856
+ "loss": 0.6789,
8857
+ "step": 12640
8858
+ },
8859
+ {
8860
+ "epoch": 6.437874906521981,
8861
+ "grad_norm": 0.15844057500362396,
8862
+ "learning_rate": 4.9820290291555924e-05,
8863
+ "loss": 0.6826,
8864
+ "step": 12650
8865
+ },
8866
+ {
8867
+ "epoch": 6.44296647520247,
8868
+ "grad_norm": 0.128059983253479,
8869
+ "learning_rate": 4.982014822854529e-05,
8870
+ "loss": 0.6776,
8871
+ "step": 12660
8872
+ },
8873
+ {
8874
+ "epoch": 6.448058043882957,
8875
+ "grad_norm": 0.13311228156089783,
8876
+ "learning_rate": 4.9820006165534663e-05,
8877
+ "loss": 0.6793,
8878
+ "step": 12670
8879
+ },
8880
+ {
8881
+ "epoch": 6.453149612563446,
8882
+ "grad_norm": 0.15546241402626038,
8883
+ "learning_rate": 4.9819864102524037e-05,
8884
+ "loss": 0.6753,
8885
+ "step": 12680
8886
+ },
8887
+ {
8888
+ "epoch": 6.458241181243934,
8889
+ "grad_norm": 0.1458451747894287,
8890
+ "learning_rate": 4.98197220395134e-05,
8891
+ "loss": 0.6817,
8892
+ "step": 12690
8893
+ },
8894
+ {
8895
+ "epoch": 6.463332749924422,
8896
+ "grad_norm": 0.12202929705381393,
8897
+ "learning_rate": 4.9819579976502776e-05,
8898
+ "loss": 0.6801,
8899
+ "step": 12700
8900
+ },
8901
+ {
8902
+ "epoch": 6.46842431860491,
8903
+ "grad_norm": 0.137448251247406,
8904
+ "learning_rate": 4.981943791349215e-05,
8905
+ "loss": 0.6779,
8906
+ "step": 12710
8907
+ },
8908
+ {
8909
+ "epoch": 6.473515887285398,
8910
+ "grad_norm": 0.12428711354732513,
8911
+ "learning_rate": 4.981929585048152e-05,
8912
+ "loss": 0.6814,
8913
+ "step": 12720
8914
+ },
8915
+ {
8916
+ "epoch": 6.478607455965887,
8917
+ "grad_norm": 0.15364359319210052,
8918
+ "learning_rate": 4.9819153787470895e-05,
8919
+ "loss": 0.6719,
8920
+ "step": 12730
8921
+ },
8922
+ {
8923
+ "epoch": 6.483699024646374,
8924
+ "grad_norm": 0.1646897941827774,
8925
+ "learning_rate": 4.981901172446027e-05,
8926
+ "loss": 0.6787,
8927
+ "step": 12740
8928
+ },
8929
+ {
8930
+ "epoch": 6.488790593326863,
8931
+ "grad_norm": 0.18058307468891144,
8932
+ "learning_rate": 4.981886966144964e-05,
8933
+ "loss": 0.6797,
8934
+ "step": 12750
8935
+ },
8936
+ {
8937
+ "epoch": 6.493882162007351,
8938
+ "grad_norm": 0.13395850360393524,
8939
+ "learning_rate": 4.9818727598439015e-05,
8940
+ "loss": 0.6776,
8941
+ "step": 12760
8942
+ },
8943
+ {
8944
+ "epoch": 6.4989737306878395,
8945
+ "grad_norm": 0.15397368371486664,
8946
+ "learning_rate": 4.981858553542839e-05,
8947
+ "loss": 0.6698,
8948
+ "step": 12770
8949
+ },
8950
+ {
8951
+ "epoch": 6.504065299368327,
8952
+ "grad_norm": 0.16110943257808685,
8953
+ "learning_rate": 4.981844347241776e-05,
8954
+ "loss": 0.6849,
8955
+ "step": 12780
8956
+ },
8957
+ {
8958
+ "epoch": 6.509156868048816,
8959
+ "grad_norm": 0.18386079370975494,
8960
+ "learning_rate": 4.9818301409407134e-05,
8961
+ "loss": 0.6813,
8962
+ "step": 12790
8963
+ },
8964
+ {
8965
+ "epoch": 6.514248436729304,
8966
+ "grad_norm": 0.11144635081291199,
8967
+ "learning_rate": 4.98181593463965e-05,
8968
+ "loss": 0.6746,
8969
+ "step": 12800
8970
+ },
8971
+ {
8972
+ "epoch": 6.519340005409791,
8973
+ "grad_norm": 0.1547509729862213,
8974
+ "learning_rate": 4.981801728338587e-05,
8975
+ "loss": 0.6775,
8976
+ "step": 12810
8977
+ },
8978
+ {
8979
+ "epoch": 6.52443157409028,
8980
+ "grad_norm": 0.12533412873744965,
8981
+ "learning_rate": 4.9817875220375246e-05,
8982
+ "loss": 0.6723,
8983
+ "step": 12820
8984
+ },
8985
+ {
8986
+ "epoch": 6.529523142770768,
8987
+ "grad_norm": 0.13594309985637665,
8988
+ "learning_rate": 4.981773315736462e-05,
8989
+ "loss": 0.6815,
8990
+ "step": 12830
8991
+ },
8992
+ {
8993
+ "epoch": 6.5346147114512565,
8994
+ "grad_norm": 0.16000863909721375,
8995
+ "learning_rate": 4.981759109435399e-05,
8996
+ "loss": 0.6845,
8997
+ "step": 12840
8998
+ },
8999
+ {
9000
+ "epoch": 6.539706280131744,
9001
+ "grad_norm": 0.12660828232765198,
9002
+ "learning_rate": 4.9817449031343366e-05,
9003
+ "loss": 0.6776,
9004
+ "step": 12850
9005
+ },
9006
+ {
9007
+ "epoch": 6.544797848812232,
9008
+ "grad_norm": 0.13099251687526703,
9009
+ "learning_rate": 4.981730696833274e-05,
9010
+ "loss": 0.6761,
9011
+ "step": 12860
9012
+ },
9013
+ {
9014
+ "epoch": 6.549889417492721,
9015
+ "grad_norm": 0.13618282973766327,
9016
+ "learning_rate": 4.981716490532211e-05,
9017
+ "loss": 0.6777,
9018
+ "step": 12870
9019
+ },
9020
+ {
9021
+ "epoch": 6.5549809861732085,
9022
+ "grad_norm": 0.128812775015831,
9023
+ "learning_rate": 4.9817022842311485e-05,
9024
+ "loss": 0.687,
9025
+ "step": 12880
9026
+ },
9027
+ {
9028
+ "epoch": 6.560072554853697,
9029
+ "grad_norm": 0.10990247130393982,
9030
+ "learning_rate": 4.981688077930085e-05,
9031
+ "loss": 0.6792,
9032
+ "step": 12890
9033
+ },
9034
+ {
9035
+ "epoch": 6.565164123534185,
9036
+ "grad_norm": 0.13022927939891815,
9037
+ "learning_rate": 4.9816738716290225e-05,
9038
+ "loss": 0.6785,
9039
+ "step": 12900
9040
+ },
9041
+ {
9042
+ "epoch": 6.5702556922146735,
9043
+ "grad_norm": 0.14299486577510834,
9044
+ "learning_rate": 4.98165966532796e-05,
9045
+ "loss": 0.6819,
9046
+ "step": 12910
9047
+ },
9048
+ {
9049
+ "epoch": 6.575347260895161,
9050
+ "grad_norm": 0.13400639593601227,
9051
+ "learning_rate": 4.981645459026897e-05,
9052
+ "loss": 0.6815,
9053
+ "step": 12920
9054
+ },
9055
+ {
9056
+ "epoch": 6.58043882957565,
9057
+ "grad_norm": 0.0999205932021141,
9058
+ "learning_rate": 4.9816312527258344e-05,
9059
+ "loss": 0.6788,
9060
+ "step": 12930
9061
+ },
9062
+ {
9063
+ "epoch": 6.585530398256138,
9064
+ "grad_norm": 0.11330140382051468,
9065
+ "learning_rate": 4.981617046424771e-05,
9066
+ "loss": 0.6805,
9067
+ "step": 12940
9068
+ },
9069
+ {
9070
+ "epoch": 6.5906219669366255,
9071
+ "grad_norm": 0.18674777448177338,
9072
+ "learning_rate": 4.981602840123708e-05,
9073
+ "loss": 0.6778,
9074
+ "step": 12950
9075
+ },
9076
+ {
9077
+ "epoch": 6.595713535617114,
9078
+ "grad_norm": 0.15032435953617096,
9079
+ "learning_rate": 4.9815886338226456e-05,
9080
+ "loss": 0.6825,
9081
+ "step": 12960
9082
+ },
9083
+ {
9084
+ "epoch": 6.600805104297602,
9085
+ "grad_norm": 0.1333203762769699,
9086
+ "learning_rate": 4.981574427521583e-05,
9087
+ "loss": 0.6795,
9088
+ "step": 12970
9089
+ },
9090
+ {
9091
+ "epoch": 6.605896672978091,
9092
+ "grad_norm": 0.16465353965759277,
9093
+ "learning_rate": 4.98156022122052e-05,
9094
+ "loss": 0.6706,
9095
+ "step": 12980
9096
+ },
9097
+ {
9098
+ "epoch": 6.610988241658578,
9099
+ "grad_norm": 0.15451110899448395,
9100
+ "learning_rate": 4.9815460149194576e-05,
9101
+ "loss": 0.6757,
9102
+ "step": 12990
9103
+ },
9104
+ {
9105
+ "epoch": 6.616079810339067,
9106
+ "grad_norm": 0.15208947658538818,
9107
+ "learning_rate": 4.981531808618395e-05,
9108
+ "loss": 0.6818,
9109
+ "step": 13000
9110
  }
9111
  ],
9112
  "logging_steps": 10,