CocoRoF commited on
Commit
a7d2b57
·
verified ·
1 Parent(s): f3eafab

Training in progress, step 8211, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cab986cac6601467f4940ed6779aacd75433c805cf9ee93cf355a5b188e0484
3
  size 306619286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cda0b31f70d9aa1b6b0d2861a2bbcd9fb8c95012a6ab358768d46b28163374a
3
  size 306619286
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:419bfa8031bc6da10b096b1b37735094111b842b61b3e7b3340f027599c0b9b4
3
  size 919972410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96f21eb8f0138de484c920cb8f7bdeb36ad541352260059030b3b39bc9b5fb3b
3
  size 919972410
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcd7c2d4414537acaf6af528d88506ec79e891bf58e2e02229f3b597ef734a1c
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0aceef701350b39c9357a1bb5166b5f89159643c4a993e803dff7eb40b4b245
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6088743435573484,
5
  "eval_steps": 5000,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3515,6 +3515,2253 @@
3515
  "eval_samples_per_second": 2268.978,
3516
  "eval_steps_per_second": 35.453,
3517
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3518
  }
3519
  ],
3520
  "logging_steps": 10,
@@ -3529,12 +5776,12 @@
3529
  "should_evaluate": false,
3530
  "should_log": false,
3531
  "should_save": true,
3532
- "should_training_stop": false
3533
  },
3534
  "attributes": {}
3535
  }
3536
  },
3537
- "total_flos": 1.745522759041024e+18,
3538
  "train_batch_size": 16,
3539
  "trial_name": null,
3540
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9998934469898775,
5
  "eval_steps": 5000,
6
+ "global_step": 8211,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3515
  "eval_samples_per_second": 2268.978,
3516
  "eval_steps_per_second": 35.453,
3517
  "step": 5000
3518
+ },
3519
+ {
3520
+ "epoch": 0.610092092244463,
3521
+ "grad_norm": 511.75,
3522
+ "learning_rate": 9.904673654728358e-06,
3523
+ "loss": 22.1654,
3524
+ "step": 5010
3525
+ },
3526
+ {
3527
+ "epoch": 0.6113098409315777,
3528
+ "grad_norm": 349.25,
3529
+ "learning_rate": 9.904483382582109e-06,
3530
+ "loss": 22.3448,
3531
+ "step": 5020
3532
+ },
3533
+ {
3534
+ "epoch": 0.6125275896186925,
3535
+ "grad_norm": 481.5,
3536
+ "learning_rate": 9.904293110435858e-06,
3537
+ "loss": 22.3357,
3538
+ "step": 5030
3539
+ },
3540
+ {
3541
+ "epoch": 0.6137453383058071,
3542
+ "grad_norm": 583.0,
3543
+ "learning_rate": 9.904102838289607e-06,
3544
+ "loss": 22.5857,
3545
+ "step": 5040
3546
+ },
3547
+ {
3548
+ "epoch": 0.6149630869929218,
3549
+ "grad_norm": 388.0,
3550
+ "learning_rate": 9.903912566143355e-06,
3551
+ "loss": 22.3212,
3552
+ "step": 5050
3553
+ },
3554
+ {
3555
+ "epoch": 0.6161808356800366,
3556
+ "grad_norm": 305.0,
3557
+ "learning_rate": 9.903722293997104e-06,
3558
+ "loss": 22.3341,
3559
+ "step": 5060
3560
+ },
3561
+ {
3562
+ "epoch": 0.6173985843671512,
3563
+ "grad_norm": 264.75,
3564
+ "learning_rate": 9.903532021850855e-06,
3565
+ "loss": 22.5816,
3566
+ "step": 5070
3567
+ },
3568
+ {
3569
+ "epoch": 0.6186163330542659,
3570
+ "grad_norm": 311.75,
3571
+ "learning_rate": 9.903341749704604e-06,
3572
+ "loss": 22.5137,
3573
+ "step": 5080
3574
+ },
3575
+ {
3576
+ "epoch": 0.6198340817413807,
3577
+ "grad_norm": 619.5,
3578
+ "learning_rate": 9.903151477558353e-06,
3579
+ "loss": 22.5041,
3580
+ "step": 5090
3581
+ },
3582
+ {
3583
+ "epoch": 0.6210518304284953,
3584
+ "grad_norm": 398.0,
3585
+ "learning_rate": 9.902961205412102e-06,
3586
+ "loss": 22.4265,
3587
+ "step": 5100
3588
+ },
3589
+ {
3590
+ "epoch": 0.62226957911561,
3591
+ "grad_norm": 456.5,
3592
+ "learning_rate": 9.90277093326585e-06,
3593
+ "loss": 22.3639,
3594
+ "step": 5110
3595
+ },
3596
+ {
3597
+ "epoch": 0.6234873278027248,
3598
+ "grad_norm": 300.0,
3599
+ "learning_rate": 9.902580661119601e-06,
3600
+ "loss": 22.2858,
3601
+ "step": 5120
3602
+ },
3603
+ {
3604
+ "epoch": 0.6247050764898394,
3605
+ "grad_norm": 342.75,
3606
+ "learning_rate": 9.90239038897335e-06,
3607
+ "loss": 22.2856,
3608
+ "step": 5130
3609
+ },
3610
+ {
3611
+ "epoch": 0.6259228251769541,
3612
+ "grad_norm": 679.0,
3613
+ "learning_rate": 9.902200116827099e-06,
3614
+ "loss": 22.4301,
3615
+ "step": 5140
3616
+ },
3617
+ {
3618
+ "epoch": 0.6271405738640689,
3619
+ "grad_norm": 250.75,
3620
+ "learning_rate": 9.902009844680848e-06,
3621
+ "loss": 22.1408,
3622
+ "step": 5150
3623
+ },
3624
+ {
3625
+ "epoch": 0.6283583225511835,
3626
+ "grad_norm": 480.75,
3627
+ "learning_rate": 9.901819572534596e-06,
3628
+ "loss": 22.5274,
3629
+ "step": 5160
3630
+ },
3631
+ {
3632
+ "epoch": 0.6295760712382982,
3633
+ "grad_norm": 495.75,
3634
+ "learning_rate": 9.901629300388347e-06,
3635
+ "loss": 22.3002,
3636
+ "step": 5170
3637
+ },
3638
+ {
3639
+ "epoch": 0.630793819925413,
3640
+ "grad_norm": 370.75,
3641
+ "learning_rate": 9.901439028242096e-06,
3642
+ "loss": 22.5818,
3643
+ "step": 5180
3644
+ },
3645
+ {
3646
+ "epoch": 0.6320115686125276,
3647
+ "grad_norm": 633.0,
3648
+ "learning_rate": 9.901248756095845e-06,
3649
+ "loss": 21.9686,
3650
+ "step": 5190
3651
+ },
3652
+ {
3653
+ "epoch": 0.6332293172996423,
3654
+ "grad_norm": 317.25,
3655
+ "learning_rate": 9.901058483949594e-06,
3656
+ "loss": 22.3873,
3657
+ "step": 5200
3658
+ },
3659
+ {
3660
+ "epoch": 0.6344470659867569,
3661
+ "grad_norm": 508.0,
3662
+ "learning_rate": 9.900868211803342e-06,
3663
+ "loss": 22.2581,
3664
+ "step": 5210
3665
+ },
3666
+ {
3667
+ "epoch": 0.6356648146738717,
3668
+ "grad_norm": 443.0,
3669
+ "learning_rate": 9.900677939657093e-06,
3670
+ "loss": 22.2598,
3671
+ "step": 5220
3672
+ },
3673
+ {
3674
+ "epoch": 0.6368825633609864,
3675
+ "grad_norm": 540.5,
3676
+ "learning_rate": 9.900487667510842e-06,
3677
+ "loss": 22.0666,
3678
+ "step": 5230
3679
+ },
3680
+ {
3681
+ "epoch": 0.638100312048101,
3682
+ "grad_norm": 499.0,
3683
+ "learning_rate": 9.90029739536459e-06,
3684
+ "loss": 22.5543,
3685
+ "step": 5240
3686
+ },
3687
+ {
3688
+ "epoch": 0.6393180607352158,
3689
+ "grad_norm": 410.25,
3690
+ "learning_rate": 9.90010712321834e-06,
3691
+ "loss": 22.2988,
3692
+ "step": 5250
3693
+ },
3694
+ {
3695
+ "epoch": 0.6405358094223305,
3696
+ "grad_norm": 316.25,
3697
+ "learning_rate": 9.899916851072089e-06,
3698
+ "loss": 22.3868,
3699
+ "step": 5260
3700
+ },
3701
+ {
3702
+ "epoch": 0.6417535581094451,
3703
+ "grad_norm": 310.25,
3704
+ "learning_rate": 9.899726578925837e-06,
3705
+ "loss": 22.3954,
3706
+ "step": 5270
3707
+ },
3708
+ {
3709
+ "epoch": 0.6429713067965599,
3710
+ "grad_norm": 303.5,
3711
+ "learning_rate": 9.899536306779588e-06,
3712
+ "loss": 21.8775,
3713
+ "step": 5280
3714
+ },
3715
+ {
3716
+ "epoch": 0.6441890554836746,
3717
+ "grad_norm": 326.25,
3718
+ "learning_rate": 9.899346034633337e-06,
3719
+ "loss": 22.2514,
3720
+ "step": 5290
3721
+ },
3722
+ {
3723
+ "epoch": 0.6454068041707892,
3724
+ "grad_norm": 408.75,
3725
+ "learning_rate": 9.899155762487086e-06,
3726
+ "loss": 22.3362,
3727
+ "step": 5300
3728
+ },
3729
+ {
3730
+ "epoch": 0.646624552857904,
3731
+ "grad_norm": 364.25,
3732
+ "learning_rate": 9.898965490340835e-06,
3733
+ "loss": 22.2638,
3734
+ "step": 5310
3735
+ },
3736
+ {
3737
+ "epoch": 0.6478423015450187,
3738
+ "grad_norm": 468.0,
3739
+ "learning_rate": 9.898775218194583e-06,
3740
+ "loss": 22.3181,
3741
+ "step": 5320
3742
+ },
3743
+ {
3744
+ "epoch": 0.6490600502321333,
3745
+ "grad_norm": 464.25,
3746
+ "learning_rate": 9.898584946048334e-06,
3747
+ "loss": 22.1543,
3748
+ "step": 5330
3749
+ },
3750
+ {
3751
+ "epoch": 0.6502777989192481,
3752
+ "grad_norm": 550.5,
3753
+ "learning_rate": 9.898394673902083e-06,
3754
+ "loss": 22.3125,
3755
+ "step": 5340
3756
+ },
3757
+ {
3758
+ "epoch": 0.6514955476063627,
3759
+ "grad_norm": 721.5,
3760
+ "learning_rate": 9.898204401755832e-06,
3761
+ "loss": 22.4355,
3762
+ "step": 5350
3763
+ },
3764
+ {
3765
+ "epoch": 0.6527132962934774,
3766
+ "grad_norm": 878.5,
3767
+ "learning_rate": 9.89801412960958e-06,
3768
+ "loss": 22.2877,
3769
+ "step": 5360
3770
+ },
3771
+ {
3772
+ "epoch": 0.6539310449805922,
3773
+ "grad_norm": 378.25,
3774
+ "learning_rate": 9.89782385746333e-06,
3775
+ "loss": 22.2965,
3776
+ "step": 5370
3777
+ },
3778
+ {
3779
+ "epoch": 0.6551487936677068,
3780
+ "grad_norm": 444.0,
3781
+ "learning_rate": 9.89763358531708e-06,
3782
+ "loss": 22.4212,
3783
+ "step": 5380
3784
+ },
3785
+ {
3786
+ "epoch": 0.6563665423548215,
3787
+ "grad_norm": 1159.0,
3788
+ "learning_rate": 9.897443313170829e-06,
3789
+ "loss": 22.283,
3790
+ "step": 5390
3791
+ },
3792
+ {
3793
+ "epoch": 0.6575842910419363,
3794
+ "grad_norm": 463.25,
3795
+ "learning_rate": 9.897253041024578e-06,
3796
+ "loss": 22.3946,
3797
+ "step": 5400
3798
+ },
3799
+ {
3800
+ "epoch": 0.6588020397290509,
3801
+ "grad_norm": 508.5,
3802
+ "learning_rate": 9.897062768878327e-06,
3803
+ "loss": 22.2301,
3804
+ "step": 5410
3805
+ },
3806
+ {
3807
+ "epoch": 0.6600197884161656,
3808
+ "grad_norm": 330.0,
3809
+ "learning_rate": 9.896872496732076e-06,
3810
+ "loss": 22.1951,
3811
+ "step": 5420
3812
+ },
3813
+ {
3814
+ "epoch": 0.6612375371032803,
3815
+ "grad_norm": 509.5,
3816
+ "learning_rate": 9.896682224585826e-06,
3817
+ "loss": 22.2835,
3818
+ "step": 5430
3819
+ },
3820
+ {
3821
+ "epoch": 0.662455285790395,
3822
+ "grad_norm": 983.0,
3823
+ "learning_rate": 9.896491952439575e-06,
3824
+ "loss": 22.0716,
3825
+ "step": 5440
3826
+ },
3827
+ {
3828
+ "epoch": 0.6636730344775097,
3829
+ "grad_norm": 367.25,
3830
+ "learning_rate": 9.896301680293324e-06,
3831
+ "loss": 22.0852,
3832
+ "step": 5450
3833
+ },
3834
+ {
3835
+ "epoch": 0.6648907831646244,
3836
+ "grad_norm": 320.0,
3837
+ "learning_rate": 9.896111408147073e-06,
3838
+ "loss": 22.0447,
3839
+ "step": 5460
3840
+ },
3841
+ {
3842
+ "epoch": 0.6661085318517391,
3843
+ "grad_norm": 443.25,
3844
+ "learning_rate": 9.895921136000822e-06,
3845
+ "loss": 22.1223,
3846
+ "step": 5470
3847
+ },
3848
+ {
3849
+ "epoch": 0.6673262805388538,
3850
+ "grad_norm": 737.0,
3851
+ "learning_rate": 9.895730863854572e-06,
3852
+ "loss": 22.3605,
3853
+ "step": 5480
3854
+ },
3855
+ {
3856
+ "epoch": 0.6685440292259685,
3857
+ "grad_norm": 370.25,
3858
+ "learning_rate": 9.895540591708321e-06,
3859
+ "loss": 22.3988,
3860
+ "step": 5490
3861
+ },
3862
+ {
3863
+ "epoch": 0.6697617779130832,
3864
+ "grad_norm": 445.25,
3865
+ "learning_rate": 9.89535031956207e-06,
3866
+ "loss": 22.2857,
3867
+ "step": 5500
3868
+ },
3869
+ {
3870
+ "epoch": 0.6709795266001979,
3871
+ "grad_norm": 376.5,
3872
+ "learning_rate": 9.895160047415819e-06,
3873
+ "loss": 22.3568,
3874
+ "step": 5510
3875
+ },
3876
+ {
3877
+ "epoch": 0.6721972752873125,
3878
+ "grad_norm": 570.0,
3879
+ "learning_rate": 9.894969775269568e-06,
3880
+ "loss": 22.2735,
3881
+ "step": 5520
3882
+ },
3883
+ {
3884
+ "epoch": 0.6734150239744273,
3885
+ "grad_norm": 336.25,
3886
+ "learning_rate": 9.894779503123318e-06,
3887
+ "loss": 22.1921,
3888
+ "step": 5530
3889
+ },
3890
+ {
3891
+ "epoch": 0.674632772661542,
3892
+ "grad_norm": 321.75,
3893
+ "learning_rate": 9.894589230977067e-06,
3894
+ "loss": 22.1618,
3895
+ "step": 5540
3896
+ },
3897
+ {
3898
+ "epoch": 0.6758505213486566,
3899
+ "grad_norm": 397.5,
3900
+ "learning_rate": 9.894398958830816e-06,
3901
+ "loss": 22.183,
3902
+ "step": 5550
3903
+ },
3904
+ {
3905
+ "epoch": 0.6770682700357714,
3906
+ "grad_norm": 389.75,
3907
+ "learning_rate": 9.894208686684565e-06,
3908
+ "loss": 22.3434,
3909
+ "step": 5560
3910
+ },
3911
+ {
3912
+ "epoch": 0.6782860187228861,
3913
+ "grad_norm": 353.0,
3914
+ "learning_rate": 9.894018414538314e-06,
3915
+ "loss": 22.3097,
3916
+ "step": 5570
3917
+ },
3918
+ {
3919
+ "epoch": 0.6795037674100007,
3920
+ "grad_norm": 637.5,
3921
+ "learning_rate": 9.893828142392064e-06,
3922
+ "loss": 22.36,
3923
+ "step": 5580
3924
+ },
3925
+ {
3926
+ "epoch": 0.6807215160971155,
3927
+ "grad_norm": 800.5,
3928
+ "learning_rate": 9.893637870245813e-06,
3929
+ "loss": 22.2326,
3930
+ "step": 5590
3931
+ },
3932
+ {
3933
+ "epoch": 0.6819392647842302,
3934
+ "grad_norm": 381.75,
3935
+ "learning_rate": 9.893447598099562e-06,
3936
+ "loss": 22.0298,
3937
+ "step": 5600
3938
+ },
3939
+ {
3940
+ "epoch": 0.6831570134713448,
3941
+ "grad_norm": 405.75,
3942
+ "learning_rate": 9.893257325953311e-06,
3943
+ "loss": 22.0199,
3944
+ "step": 5610
3945
+ },
3946
+ {
3947
+ "epoch": 0.6843747621584595,
3948
+ "grad_norm": 803.5,
3949
+ "learning_rate": 9.89306705380706e-06,
3950
+ "loss": 21.978,
3951
+ "step": 5620
3952
+ },
3953
+ {
3954
+ "epoch": 0.6855925108455743,
3955
+ "grad_norm": 345.5,
3956
+ "learning_rate": 9.89287678166081e-06,
3957
+ "loss": 22.1337,
3958
+ "step": 5630
3959
+ },
3960
+ {
3961
+ "epoch": 0.6868102595326889,
3962
+ "grad_norm": 505.5,
3963
+ "learning_rate": 9.89268650951456e-06,
3964
+ "loss": 22.1635,
3965
+ "step": 5640
3966
+ },
3967
+ {
3968
+ "epoch": 0.6880280082198036,
3969
+ "grad_norm": 328.75,
3970
+ "learning_rate": 9.892496237368308e-06,
3971
+ "loss": 22.2217,
3972
+ "step": 5650
3973
+ },
3974
+ {
3975
+ "epoch": 0.6892457569069184,
3976
+ "grad_norm": 267.75,
3977
+ "learning_rate": 9.892305965222057e-06,
3978
+ "loss": 21.8837,
3979
+ "step": 5660
3980
+ },
3981
+ {
3982
+ "epoch": 0.690463505594033,
3983
+ "grad_norm": 377.5,
3984
+ "learning_rate": 9.892115693075806e-06,
3985
+ "loss": 22.2993,
3986
+ "step": 5670
3987
+ },
3988
+ {
3989
+ "epoch": 0.6916812542811477,
3990
+ "grad_norm": 419.5,
3991
+ "learning_rate": 9.891925420929556e-06,
3992
+ "loss": 22.2408,
3993
+ "step": 5680
3994
+ },
3995
+ {
3996
+ "epoch": 0.6928990029682625,
3997
+ "grad_norm": 353.25,
3998
+ "learning_rate": 9.891735148783305e-06,
3999
+ "loss": 22.3364,
4000
+ "step": 5690
4001
+ },
4002
+ {
4003
+ "epoch": 0.6941167516553771,
4004
+ "grad_norm": 337.5,
4005
+ "learning_rate": 9.891544876637054e-06,
4006
+ "loss": 22.2121,
4007
+ "step": 5700
4008
+ },
4009
+ {
4010
+ "epoch": 0.6953345003424918,
4011
+ "grad_norm": 381.5,
4012
+ "learning_rate": 9.891354604490803e-06,
4013
+ "loss": 22.3864,
4014
+ "step": 5710
4015
+ },
4016
+ {
4017
+ "epoch": 0.6965522490296066,
4018
+ "grad_norm": 311.5,
4019
+ "learning_rate": 9.891164332344554e-06,
4020
+ "loss": 22.0068,
4021
+ "step": 5720
4022
+ },
4023
+ {
4024
+ "epoch": 0.6977699977167212,
4025
+ "grad_norm": 364.75,
4026
+ "learning_rate": 9.890974060198302e-06,
4027
+ "loss": 22.2089,
4028
+ "step": 5730
4029
+ },
4030
+ {
4031
+ "epoch": 0.6989877464038359,
4032
+ "grad_norm": 469.75,
4033
+ "learning_rate": 9.890783788052051e-06,
4034
+ "loss": 22.3107,
4035
+ "step": 5740
4036
+ },
4037
+ {
4038
+ "epoch": 0.7002054950909506,
4039
+ "grad_norm": 320.0,
4040
+ "learning_rate": 9.8905935159058e-06,
4041
+ "loss": 22.4451,
4042
+ "step": 5750
4043
+ },
4044
+ {
4045
+ "epoch": 0.7014232437780653,
4046
+ "grad_norm": 394.25,
4047
+ "learning_rate": 9.890403243759549e-06,
4048
+ "loss": 22.3636,
4049
+ "step": 5760
4050
+ },
4051
+ {
4052
+ "epoch": 0.70264099246518,
4053
+ "grad_norm": 348.25,
4054
+ "learning_rate": 9.8902129716133e-06,
4055
+ "loss": 22.2534,
4056
+ "step": 5770
4057
+ },
4058
+ {
4059
+ "epoch": 0.7038587411522947,
4060
+ "grad_norm": 396.75,
4061
+ "learning_rate": 9.890022699467048e-06,
4062
+ "loss": 22.1951,
4063
+ "step": 5780
4064
+ },
4065
+ {
4066
+ "epoch": 0.7050764898394094,
4067
+ "grad_norm": 388.0,
4068
+ "learning_rate": 9.889832427320797e-06,
4069
+ "loss": 22.3651,
4070
+ "step": 5790
4071
+ },
4072
+ {
4073
+ "epoch": 0.7062942385265241,
4074
+ "grad_norm": 490.75,
4075
+ "learning_rate": 9.889642155174546e-06,
4076
+ "loss": 22.3609,
4077
+ "step": 5800
4078
+ },
4079
+ {
4080
+ "epoch": 0.7075119872136388,
4081
+ "grad_norm": 390.75,
4082
+ "learning_rate": 9.889451883028295e-06,
4083
+ "loss": 22.1719,
4084
+ "step": 5810
4085
+ },
4086
+ {
4087
+ "epoch": 0.7087297359007535,
4088
+ "grad_norm": 569.5,
4089
+ "learning_rate": 9.889261610882046e-06,
4090
+ "loss": 22.3145,
4091
+ "step": 5820
4092
+ },
4093
+ {
4094
+ "epoch": 0.7099474845878682,
4095
+ "grad_norm": 395.25,
4096
+ "learning_rate": 9.889071338735795e-06,
4097
+ "loss": 22.0284,
4098
+ "step": 5830
4099
+ },
4100
+ {
4101
+ "epoch": 0.7111652332749828,
4102
+ "grad_norm": 383.25,
4103
+ "learning_rate": 9.888881066589543e-06,
4104
+ "loss": 22.1514,
4105
+ "step": 5840
4106
+ },
4107
+ {
4108
+ "epoch": 0.7123829819620976,
4109
+ "grad_norm": 439.75,
4110
+ "learning_rate": 9.888690794443292e-06,
4111
+ "loss": 22.2325,
4112
+ "step": 5850
4113
+ },
4114
+ {
4115
+ "epoch": 0.7136007306492123,
4116
+ "grad_norm": 419.25,
4117
+ "learning_rate": 9.888500522297043e-06,
4118
+ "loss": 22.072,
4119
+ "step": 5860
4120
+ },
4121
+ {
4122
+ "epoch": 0.7148184793363269,
4123
+ "grad_norm": 375.0,
4124
+ "learning_rate": 9.888310250150792e-06,
4125
+ "loss": 22.3628,
4126
+ "step": 5870
4127
+ },
4128
+ {
4129
+ "epoch": 0.7160362280234417,
4130
+ "grad_norm": 429.0,
4131
+ "learning_rate": 9.88811997800454e-06,
4132
+ "loss": 22.0338,
4133
+ "step": 5880
4134
+ },
4135
+ {
4136
+ "epoch": 0.7172539767105564,
4137
+ "grad_norm": 291.5,
4138
+ "learning_rate": 9.88792970585829e-06,
4139
+ "loss": 22.4026,
4140
+ "step": 5890
4141
+ },
4142
+ {
4143
+ "epoch": 0.718471725397671,
4144
+ "grad_norm": 672.5,
4145
+ "learning_rate": 9.887739433712038e-06,
4146
+ "loss": 22.1481,
4147
+ "step": 5900
4148
+ },
4149
+ {
4150
+ "epoch": 0.7196894740847858,
4151
+ "grad_norm": 340.0,
4152
+ "learning_rate": 9.887549161565789e-06,
4153
+ "loss": 22.348,
4154
+ "step": 5910
4155
+ },
4156
+ {
4157
+ "epoch": 0.7209072227719004,
4158
+ "grad_norm": 395.75,
4159
+ "learning_rate": 9.887358889419538e-06,
4160
+ "loss": 22.2665,
4161
+ "step": 5920
4162
+ },
4163
+ {
4164
+ "epoch": 0.7221249714590151,
4165
+ "grad_norm": 516.5,
4166
+ "learning_rate": 9.887168617273287e-06,
4167
+ "loss": 22.2479,
4168
+ "step": 5930
4169
+ },
4170
+ {
4171
+ "epoch": 0.7233427201461299,
4172
+ "grad_norm": 498.0,
4173
+ "learning_rate": 9.886978345127036e-06,
4174
+ "loss": 22.2421,
4175
+ "step": 5940
4176
+ },
4177
+ {
4178
+ "epoch": 0.7245604688332445,
4179
+ "grad_norm": 450.5,
4180
+ "learning_rate": 9.886788072980784e-06,
4181
+ "loss": 22.3301,
4182
+ "step": 5950
4183
+ },
4184
+ {
4185
+ "epoch": 0.7257782175203592,
4186
+ "grad_norm": 701.5,
4187
+ "learning_rate": 9.886597800834535e-06,
4188
+ "loss": 21.9416,
4189
+ "step": 5960
4190
+ },
4191
+ {
4192
+ "epoch": 0.726995966207474,
4193
+ "grad_norm": 293.0,
4194
+ "learning_rate": 9.886407528688284e-06,
4195
+ "loss": 22.1706,
4196
+ "step": 5970
4197
+ },
4198
+ {
4199
+ "epoch": 0.7282137148945886,
4200
+ "grad_norm": 359.75,
4201
+ "learning_rate": 9.886217256542033e-06,
4202
+ "loss": 22.4014,
4203
+ "step": 5980
4204
+ },
4205
+ {
4206
+ "epoch": 0.7294314635817033,
4207
+ "grad_norm": 308.0,
4208
+ "learning_rate": 9.886026984395782e-06,
4209
+ "loss": 22.3196,
4210
+ "step": 5990
4211
+ },
4212
+ {
4213
+ "epoch": 0.7306492122688181,
4214
+ "grad_norm": 248.0,
4215
+ "learning_rate": 9.885836712249532e-06,
4216
+ "loss": 22.0416,
4217
+ "step": 6000
4218
+ },
4219
+ {
4220
+ "epoch": 0.7318669609559327,
4221
+ "grad_norm": 284.5,
4222
+ "learning_rate": 9.885646440103281e-06,
4223
+ "loss": 22.1623,
4224
+ "step": 6010
4225
+ },
4226
+ {
4227
+ "epoch": 0.7330847096430474,
4228
+ "grad_norm": 306.0,
4229
+ "learning_rate": 9.88545616795703e-06,
4230
+ "loss": 22.3356,
4231
+ "step": 6020
4232
+ },
4233
+ {
4234
+ "epoch": 0.7343024583301622,
4235
+ "grad_norm": 583.5,
4236
+ "learning_rate": 9.885265895810779e-06,
4237
+ "loss": 22.2748,
4238
+ "step": 6030
4239
+ },
4240
+ {
4241
+ "epoch": 0.7355202070172768,
4242
+ "grad_norm": 475.75,
4243
+ "learning_rate": 9.885075623664528e-06,
4244
+ "loss": 22.3207,
4245
+ "step": 6040
4246
+ },
4247
+ {
4248
+ "epoch": 0.7367379557043915,
4249
+ "grad_norm": 300.5,
4250
+ "learning_rate": 9.884885351518278e-06,
4251
+ "loss": 22.1761,
4252
+ "step": 6050
4253
+ },
4254
+ {
4255
+ "epoch": 0.7379557043915062,
4256
+ "grad_norm": 280.75,
4257
+ "learning_rate": 9.884695079372027e-06,
4258
+ "loss": 22.1228,
4259
+ "step": 6060
4260
+ },
4261
+ {
4262
+ "epoch": 0.7391734530786209,
4263
+ "grad_norm": 518.5,
4264
+ "learning_rate": 9.884504807225776e-06,
4265
+ "loss": 22.4198,
4266
+ "step": 6070
4267
+ },
4268
+ {
4269
+ "epoch": 0.7403912017657356,
4270
+ "grad_norm": 320.75,
4271
+ "learning_rate": 9.884314535079525e-06,
4272
+ "loss": 22.1976,
4273
+ "step": 6080
4274
+ },
4275
+ {
4276
+ "epoch": 0.7416089504528502,
4277
+ "grad_norm": 369.25,
4278
+ "learning_rate": 9.884124262933275e-06,
4279
+ "loss": 22.2466,
4280
+ "step": 6090
4281
+ },
4282
+ {
4283
+ "epoch": 0.742826699139965,
4284
+ "grad_norm": 288.75,
4285
+ "learning_rate": 9.883933990787024e-06,
4286
+ "loss": 22.3804,
4287
+ "step": 6100
4288
+ },
4289
+ {
4290
+ "epoch": 0.7440444478270797,
4291
+ "grad_norm": 437.25,
4292
+ "learning_rate": 9.883743718640773e-06,
4293
+ "loss": 22.2938,
4294
+ "step": 6110
4295
+ },
4296
+ {
4297
+ "epoch": 0.7452621965141943,
4298
+ "grad_norm": 445.75,
4299
+ "learning_rate": 9.883553446494522e-06,
4300
+ "loss": 22.2249,
4301
+ "step": 6120
4302
+ },
4303
+ {
4304
+ "epoch": 0.7464799452013091,
4305
+ "grad_norm": 444.25,
4306
+ "learning_rate": 9.883363174348271e-06,
4307
+ "loss": 22.423,
4308
+ "step": 6130
4309
+ },
4310
+ {
4311
+ "epoch": 0.7476976938884238,
4312
+ "grad_norm": 576.0,
4313
+ "learning_rate": 9.883172902202021e-06,
4314
+ "loss": 22.2609,
4315
+ "step": 6140
4316
+ },
4317
+ {
4318
+ "epoch": 0.7489154425755384,
4319
+ "grad_norm": 330.5,
4320
+ "learning_rate": 9.88298263005577e-06,
4321
+ "loss": 22.248,
4322
+ "step": 6150
4323
+ },
4324
+ {
4325
+ "epoch": 0.7501331912626532,
4326
+ "grad_norm": 716.0,
4327
+ "learning_rate": 9.88279235790952e-06,
4328
+ "loss": 22.1307,
4329
+ "step": 6160
4330
+ },
4331
+ {
4332
+ "epoch": 0.7513509399497679,
4333
+ "grad_norm": 265.5,
4334
+ "learning_rate": 9.882602085763268e-06,
4335
+ "loss": 22.2934,
4336
+ "step": 6170
4337
+ },
4338
+ {
4339
+ "epoch": 0.7525686886368825,
4340
+ "grad_norm": 325.25,
4341
+ "learning_rate": 9.882411813617017e-06,
4342
+ "loss": 21.9465,
4343
+ "step": 6180
4344
+ },
4345
+ {
4346
+ "epoch": 0.7537864373239973,
4347
+ "grad_norm": 545.5,
4348
+ "learning_rate": 9.882221541470767e-06,
4349
+ "loss": 22.468,
4350
+ "step": 6190
4351
+ },
4352
+ {
4353
+ "epoch": 0.755004186011112,
4354
+ "grad_norm": 289.25,
4355
+ "learning_rate": 9.882031269324516e-06,
4356
+ "loss": 22.2477,
4357
+ "step": 6200
4358
+ },
4359
+ {
4360
+ "epoch": 0.7562219346982266,
4361
+ "grad_norm": 456.75,
4362
+ "learning_rate": 9.881840997178265e-06,
4363
+ "loss": 22.4299,
4364
+ "step": 6210
4365
+ },
4366
+ {
4367
+ "epoch": 0.7574396833853414,
4368
+ "grad_norm": 340.25,
4369
+ "learning_rate": 9.881650725032014e-06,
4370
+ "loss": 21.9778,
4371
+ "step": 6220
4372
+ },
4373
+ {
4374
+ "epoch": 0.7586574320724561,
4375
+ "grad_norm": 375.5,
4376
+ "learning_rate": 9.881460452885765e-06,
4377
+ "loss": 22.3145,
4378
+ "step": 6230
4379
+ },
4380
+ {
4381
+ "epoch": 0.7598751807595707,
4382
+ "grad_norm": 342.5,
4383
+ "learning_rate": 9.881270180739514e-06,
4384
+ "loss": 22.521,
4385
+ "step": 6240
4386
+ },
4387
+ {
4388
+ "epoch": 0.7610929294466855,
4389
+ "grad_norm": 405.25,
4390
+ "learning_rate": 9.881079908593262e-06,
4391
+ "loss": 22.0611,
4392
+ "step": 6250
4393
+ },
4394
+ {
4395
+ "epoch": 0.7623106781338002,
4396
+ "grad_norm": 383.5,
4397
+ "learning_rate": 9.880889636447011e-06,
4398
+ "loss": 22.0493,
4399
+ "step": 6260
4400
+ },
4401
+ {
4402
+ "epoch": 0.7635284268209148,
4403
+ "grad_norm": 406.25,
4404
+ "learning_rate": 9.88069936430076e-06,
4405
+ "loss": 22.1901,
4406
+ "step": 6270
4407
+ },
4408
+ {
4409
+ "epoch": 0.7647461755080295,
4410
+ "grad_norm": 363.5,
4411
+ "learning_rate": 9.88050909215451e-06,
4412
+ "loss": 22.2588,
4413
+ "step": 6280
4414
+ },
4415
+ {
4416
+ "epoch": 0.7659639241951443,
4417
+ "grad_norm": 303.25,
4418
+ "learning_rate": 9.88031882000826e-06,
4419
+ "loss": 22.2292,
4420
+ "step": 6290
4421
+ },
4422
+ {
4423
+ "epoch": 0.7671816728822589,
4424
+ "grad_norm": 315.5,
4425
+ "learning_rate": 9.880128547862008e-06,
4426
+ "loss": 22.1524,
4427
+ "step": 6300
4428
+ },
4429
+ {
4430
+ "epoch": 0.7683994215693736,
4431
+ "grad_norm": 343.5,
4432
+ "learning_rate": 9.879938275715757e-06,
4433
+ "loss": 22.0444,
4434
+ "step": 6310
4435
+ },
4436
+ {
4437
+ "epoch": 0.7696171702564883,
4438
+ "grad_norm": 434.25,
4439
+ "learning_rate": 9.879748003569506e-06,
4440
+ "loss": 21.9586,
4441
+ "step": 6320
4442
+ },
4443
+ {
4444
+ "epoch": 0.770834918943603,
4445
+ "grad_norm": 497.75,
4446
+ "learning_rate": 9.879557731423257e-06,
4447
+ "loss": 22.358,
4448
+ "step": 6330
4449
+ },
4450
+ {
4451
+ "epoch": 0.7720526676307177,
4452
+ "grad_norm": 658.5,
4453
+ "learning_rate": 9.879367459277006e-06,
4454
+ "loss": 22.3363,
4455
+ "step": 6340
4456
+ },
4457
+ {
4458
+ "epoch": 0.7732704163178324,
4459
+ "grad_norm": 536.5,
4460
+ "learning_rate": 9.879177187130755e-06,
4461
+ "loss": 22.0662,
4462
+ "step": 6350
4463
+ },
4464
+ {
4465
+ "epoch": 0.7744881650049471,
4466
+ "grad_norm": 501.0,
4467
+ "learning_rate": 9.878986914984503e-06,
4468
+ "loss": 22.1174,
4469
+ "step": 6360
4470
+ },
4471
+ {
4472
+ "epoch": 0.7757059136920618,
4473
+ "grad_norm": 273.25,
4474
+ "learning_rate": 9.878796642838252e-06,
4475
+ "loss": 22.046,
4476
+ "step": 6370
4477
+ },
4478
+ {
4479
+ "epoch": 0.7769236623791765,
4480
+ "grad_norm": 350.5,
4481
+ "learning_rate": 9.878606370692003e-06,
4482
+ "loss": 22.3518,
4483
+ "step": 6380
4484
+ },
4485
+ {
4486
+ "epoch": 0.7781414110662912,
4487
+ "grad_norm": 330.0,
4488
+ "learning_rate": 9.878416098545752e-06,
4489
+ "loss": 22.2315,
4490
+ "step": 6390
4491
+ },
4492
+ {
4493
+ "epoch": 0.7793591597534059,
4494
+ "grad_norm": 694.5,
4495
+ "learning_rate": 9.8782258263995e-06,
4496
+ "loss": 22.3835,
4497
+ "step": 6400
4498
+ },
4499
+ {
4500
+ "epoch": 0.7805769084405206,
4501
+ "grad_norm": 304.75,
4502
+ "learning_rate": 9.87803555425325e-06,
4503
+ "loss": 22.1816,
4504
+ "step": 6410
4505
+ },
4506
+ {
4507
+ "epoch": 0.7817946571276353,
4508
+ "grad_norm": 351.5,
4509
+ "learning_rate": 9.877845282106998e-06,
4510
+ "loss": 22.1395,
4511
+ "step": 6420
4512
+ },
4513
+ {
4514
+ "epoch": 0.78301240581475,
4515
+ "grad_norm": 308.25,
4516
+ "learning_rate": 9.877655009960747e-06,
4517
+ "loss": 22.0537,
4518
+ "step": 6430
4519
+ },
4520
+ {
4521
+ "epoch": 0.7842301545018647,
4522
+ "grad_norm": 466.25,
4523
+ "learning_rate": 9.877464737814498e-06,
4524
+ "loss": 22.3,
4525
+ "step": 6440
4526
+ },
4527
+ {
4528
+ "epoch": 0.7854479031889794,
4529
+ "grad_norm": 574.5,
4530
+ "learning_rate": 9.877274465668247e-06,
4531
+ "loss": 21.9396,
4532
+ "step": 6450
4533
+ },
4534
+ {
4535
+ "epoch": 0.786665651876094,
4536
+ "grad_norm": 329.5,
4537
+ "learning_rate": 9.877084193521995e-06,
4538
+ "loss": 21.962,
4539
+ "step": 6460
4540
+ },
4541
+ {
4542
+ "epoch": 0.7878834005632087,
4543
+ "grad_norm": 315.0,
4544
+ "learning_rate": 9.876893921375744e-06,
4545
+ "loss": 22.1423,
4546
+ "step": 6470
4547
+ },
4548
+ {
4549
+ "epoch": 0.7891011492503235,
4550
+ "grad_norm": 606.0,
4551
+ "learning_rate": 9.876703649229493e-06,
4552
+ "loss": 22.1091,
4553
+ "step": 6480
4554
+ },
4555
+ {
4556
+ "epoch": 0.7903188979374381,
4557
+ "grad_norm": 325.75,
4558
+ "learning_rate": 9.876513377083244e-06,
4559
+ "loss": 22.2421,
4560
+ "step": 6490
4561
+ },
4562
+ {
4563
+ "epoch": 0.7915366466245528,
4564
+ "grad_norm": 313.25,
4565
+ "learning_rate": 9.876323104936993e-06,
4566
+ "loss": 22.3469,
4567
+ "step": 6500
4568
+ },
4569
+ {
4570
+ "epoch": 0.7927543953116676,
4571
+ "grad_norm": 287.5,
4572
+ "learning_rate": 9.876132832790742e-06,
4573
+ "loss": 22.2427,
4574
+ "step": 6510
4575
+ },
4576
+ {
4577
+ "epoch": 0.7939721439987822,
4578
+ "grad_norm": 409.75,
4579
+ "learning_rate": 9.87594256064449e-06,
4580
+ "loss": 22.3911,
4581
+ "step": 6520
4582
+ },
4583
+ {
4584
+ "epoch": 0.7951898926858969,
4585
+ "grad_norm": 281.0,
4586
+ "learning_rate": 9.87575228849824e-06,
4587
+ "loss": 22.2416,
4588
+ "step": 6530
4589
+ },
4590
+ {
4591
+ "epoch": 0.7964076413730117,
4592
+ "grad_norm": 401.0,
4593
+ "learning_rate": 9.87556201635199e-06,
4594
+ "loss": 22.1661,
4595
+ "step": 6540
4596
+ },
4597
+ {
4598
+ "epoch": 0.7976253900601263,
4599
+ "grad_norm": 763.5,
4600
+ "learning_rate": 9.875371744205739e-06,
4601
+ "loss": 22.3706,
4602
+ "step": 6550
4603
+ },
4604
+ {
4605
+ "epoch": 0.798843138747241,
4606
+ "grad_norm": 351.25,
4607
+ "learning_rate": 9.875181472059488e-06,
4608
+ "loss": 22.1028,
4609
+ "step": 6560
4610
+ },
4611
+ {
4612
+ "epoch": 0.8000608874343558,
4613
+ "grad_norm": 368.25,
4614
+ "learning_rate": 9.874991199913236e-06,
4615
+ "loss": 21.9313,
4616
+ "step": 6570
4617
+ },
4618
+ {
4619
+ "epoch": 0.8012786361214704,
4620
+ "grad_norm": 392.25,
4621
+ "learning_rate": 9.874800927766985e-06,
4622
+ "loss": 22.1766,
4623
+ "step": 6580
4624
+ },
4625
+ {
4626
+ "epoch": 0.8024963848085851,
4627
+ "grad_norm": 541.5,
4628
+ "learning_rate": 9.874610655620736e-06,
4629
+ "loss": 22.1877,
4630
+ "step": 6590
4631
+ },
4632
+ {
4633
+ "epoch": 0.8037141334956999,
4634
+ "grad_norm": 334.5,
4635
+ "learning_rate": 9.874420383474485e-06,
4636
+ "loss": 21.9352,
4637
+ "step": 6600
4638
+ },
4639
+ {
4640
+ "epoch": 0.8049318821828145,
4641
+ "grad_norm": 457.5,
4642
+ "learning_rate": 9.874230111328234e-06,
4643
+ "loss": 22.3417,
4644
+ "step": 6610
4645
+ },
4646
+ {
4647
+ "epoch": 0.8061496308699292,
4648
+ "grad_norm": 300.75,
4649
+ "learning_rate": 9.874039839181983e-06,
4650
+ "loss": 22.295,
4651
+ "step": 6620
4652
+ },
4653
+ {
4654
+ "epoch": 0.807367379557044,
4655
+ "grad_norm": 342.0,
4656
+ "learning_rate": 9.873849567035731e-06,
4657
+ "loss": 22.1422,
4658
+ "step": 6630
4659
+ },
4660
+ {
4661
+ "epoch": 0.8085851282441586,
4662
+ "grad_norm": 329.0,
4663
+ "learning_rate": 9.873659294889482e-06,
4664
+ "loss": 22.3643,
4665
+ "step": 6640
4666
+ },
4667
+ {
4668
+ "epoch": 0.8098028769312733,
4669
+ "grad_norm": 359.5,
4670
+ "learning_rate": 9.87346902274323e-06,
4671
+ "loss": 22.0662,
4672
+ "step": 6650
4673
+ },
4674
+ {
4675
+ "epoch": 0.8110206256183881,
4676
+ "grad_norm": 369.25,
4677
+ "learning_rate": 9.87327875059698e-06,
4678
+ "loss": 22.2069,
4679
+ "step": 6660
4680
+ },
4681
+ {
4682
+ "epoch": 0.8122383743055027,
4683
+ "grad_norm": 415.25,
4684
+ "learning_rate": 9.873088478450729e-06,
4685
+ "loss": 22.0581,
4686
+ "step": 6670
4687
+ },
4688
+ {
4689
+ "epoch": 0.8134561229926174,
4690
+ "grad_norm": 360.25,
4691
+ "learning_rate": 9.872898206304477e-06,
4692
+ "loss": 22.2493,
4693
+ "step": 6680
4694
+ },
4695
+ {
4696
+ "epoch": 0.814673871679732,
4697
+ "grad_norm": 329.0,
4698
+ "learning_rate": 9.872707934158228e-06,
4699
+ "loss": 22.1208,
4700
+ "step": 6690
4701
+ },
4702
+ {
4703
+ "epoch": 0.8158916203668468,
4704
+ "grad_norm": 527.5,
4705
+ "learning_rate": 9.872517662011977e-06,
4706
+ "loss": 22.1305,
4707
+ "step": 6700
4708
+ },
4709
+ {
4710
+ "epoch": 0.8171093690539615,
4711
+ "grad_norm": 753.5,
4712
+ "learning_rate": 9.872327389865726e-06,
4713
+ "loss": 22.2309,
4714
+ "step": 6710
4715
+ },
4716
+ {
4717
+ "epoch": 0.8183271177410761,
4718
+ "grad_norm": 415.0,
4719
+ "learning_rate": 9.872137117719475e-06,
4720
+ "loss": 21.9451,
4721
+ "step": 6720
4722
+ },
4723
+ {
4724
+ "epoch": 0.8195448664281909,
4725
+ "grad_norm": 290.0,
4726
+ "learning_rate": 9.871946845573223e-06,
4727
+ "loss": 22.0432,
4728
+ "step": 6730
4729
+ },
4730
+ {
4731
+ "epoch": 0.8207626151153056,
4732
+ "grad_norm": 319.25,
4733
+ "learning_rate": 9.871756573426974e-06,
4734
+ "loss": 22.2189,
4735
+ "step": 6740
4736
+ },
4737
+ {
4738
+ "epoch": 0.8219803638024202,
4739
+ "grad_norm": 508.75,
4740
+ "learning_rate": 9.871566301280723e-06,
4741
+ "loss": 22.2375,
4742
+ "step": 6750
4743
+ },
4744
+ {
4745
+ "epoch": 0.823198112489535,
4746
+ "grad_norm": 366.5,
4747
+ "learning_rate": 9.871376029134472e-06,
4748
+ "loss": 22.1456,
4749
+ "step": 6760
4750
+ },
4751
+ {
4752
+ "epoch": 0.8244158611766497,
4753
+ "grad_norm": 363.5,
4754
+ "learning_rate": 9.87118575698822e-06,
4755
+ "loss": 22.1803,
4756
+ "step": 6770
4757
+ },
4758
+ {
4759
+ "epoch": 0.8256336098637643,
4760
+ "grad_norm": 510.5,
4761
+ "learning_rate": 9.87099548484197e-06,
4762
+ "loss": 22.2005,
4763
+ "step": 6780
4764
+ },
4765
+ {
4766
+ "epoch": 0.8268513585508791,
4767
+ "grad_norm": 456.75,
4768
+ "learning_rate": 9.87080521269572e-06,
4769
+ "loss": 21.7561,
4770
+ "step": 6790
4771
+ },
4772
+ {
4773
+ "epoch": 0.8280691072379938,
4774
+ "grad_norm": 592.5,
4775
+ "learning_rate": 9.870614940549469e-06,
4776
+ "loss": 22.1845,
4777
+ "step": 6800
4778
+ },
4779
+ {
4780
+ "epoch": 0.8292868559251084,
4781
+ "grad_norm": 403.25,
4782
+ "learning_rate": 9.870424668403218e-06,
4783
+ "loss": 22.1656,
4784
+ "step": 6810
4785
+ },
4786
+ {
4787
+ "epoch": 0.8305046046122232,
4788
+ "grad_norm": 590.0,
4789
+ "learning_rate": 9.870234396256967e-06,
4790
+ "loss": 22.0388,
4791
+ "step": 6820
4792
+ },
4793
+ {
4794
+ "epoch": 0.8317223532993379,
4795
+ "grad_norm": 337.75,
4796
+ "learning_rate": 9.870044124110716e-06,
4797
+ "loss": 22.0442,
4798
+ "step": 6830
4799
+ },
4800
+ {
4801
+ "epoch": 0.8329401019864525,
4802
+ "grad_norm": 322.0,
4803
+ "learning_rate": 9.869853851964466e-06,
4804
+ "loss": 22.0503,
4805
+ "step": 6840
4806
+ },
4807
+ {
4808
+ "epoch": 0.8341578506735673,
4809
+ "grad_norm": 309.5,
4810
+ "learning_rate": 9.869663579818215e-06,
4811
+ "loss": 22.3047,
4812
+ "step": 6850
4813
+ },
4814
+ {
4815
+ "epoch": 0.835375599360682,
4816
+ "grad_norm": 312.0,
4817
+ "learning_rate": 9.869473307671964e-06,
4818
+ "loss": 21.8039,
4819
+ "step": 6860
4820
+ },
4821
+ {
4822
+ "epoch": 0.8365933480477966,
4823
+ "grad_norm": 352.25,
4824
+ "learning_rate": 9.869283035525713e-06,
4825
+ "loss": 22.3301,
4826
+ "step": 6870
4827
+ },
4828
+ {
4829
+ "epoch": 0.8378110967349114,
4830
+ "grad_norm": 334.75,
4831
+ "learning_rate": 9.869092763379462e-06,
4832
+ "loss": 22.0786,
4833
+ "step": 6880
4834
+ },
4835
+ {
4836
+ "epoch": 0.839028845422026,
4837
+ "grad_norm": 315.25,
4838
+ "learning_rate": 9.868902491233212e-06,
4839
+ "loss": 21.8268,
4840
+ "step": 6890
4841
+ },
4842
+ {
4843
+ "epoch": 0.8402465941091407,
4844
+ "grad_norm": 408.0,
4845
+ "learning_rate": 9.868712219086961e-06,
4846
+ "loss": 22.3968,
4847
+ "step": 6900
4848
+ },
4849
+ {
4850
+ "epoch": 0.8414643427962554,
4851
+ "grad_norm": 319.75,
4852
+ "learning_rate": 9.86852194694071e-06,
4853
+ "loss": 21.8756,
4854
+ "step": 6910
4855
+ },
4856
+ {
4857
+ "epoch": 0.8426820914833701,
4858
+ "grad_norm": 334.75,
4859
+ "learning_rate": 9.868331674794459e-06,
4860
+ "loss": 22.3082,
4861
+ "step": 6920
4862
+ },
4863
+ {
4864
+ "epoch": 0.8438998401704848,
4865
+ "grad_norm": 486.0,
4866
+ "learning_rate": 9.868141402648208e-06,
4867
+ "loss": 22.2503,
4868
+ "step": 6930
4869
+ },
4870
+ {
4871
+ "epoch": 0.8451175888575995,
4872
+ "grad_norm": 360.5,
4873
+ "learning_rate": 9.867951130501958e-06,
4874
+ "loss": 22.1495,
4875
+ "step": 6940
4876
+ },
4877
+ {
4878
+ "epoch": 0.8463353375447142,
4879
+ "grad_norm": 550.5,
4880
+ "learning_rate": 9.867760858355707e-06,
4881
+ "loss": 22.4421,
4882
+ "step": 6950
4883
+ },
4884
+ {
4885
+ "epoch": 0.8475530862318289,
4886
+ "grad_norm": 338.5,
4887
+ "learning_rate": 9.867570586209456e-06,
4888
+ "loss": 22.3046,
4889
+ "step": 6960
4890
+ },
4891
+ {
4892
+ "epoch": 0.8487708349189436,
4893
+ "grad_norm": 279.75,
4894
+ "learning_rate": 9.867380314063205e-06,
4895
+ "loss": 21.8373,
4896
+ "step": 6970
4897
+ },
4898
+ {
4899
+ "epoch": 0.8499885836060583,
4900
+ "grad_norm": 467.5,
4901
+ "learning_rate": 9.867190041916954e-06,
4902
+ "loss": 22.2306,
4903
+ "step": 6980
4904
+ },
4905
+ {
4906
+ "epoch": 0.851206332293173,
4907
+ "grad_norm": 357.0,
4908
+ "learning_rate": 9.866999769770704e-06,
4909
+ "loss": 22.0495,
4910
+ "step": 6990
4911
+ },
4912
+ {
4913
+ "epoch": 0.8524240809802877,
4914
+ "grad_norm": 277.0,
4915
+ "learning_rate": 9.866809497624453e-06,
4916
+ "loss": 22.2471,
4917
+ "step": 7000
4918
+ },
4919
+ {
4920
+ "epoch": 0.8536418296674024,
4921
+ "grad_norm": 395.5,
4922
+ "learning_rate": 9.866619225478202e-06,
4923
+ "loss": 22.2751,
4924
+ "step": 7010
4925
+ },
4926
+ {
4927
+ "epoch": 0.8548595783545171,
4928
+ "grad_norm": 369.75,
4929
+ "learning_rate": 9.866428953331951e-06,
4930
+ "loss": 21.8535,
4931
+ "step": 7020
4932
+ },
4933
+ {
4934
+ "epoch": 0.8560773270416318,
4935
+ "grad_norm": 295.5,
4936
+ "learning_rate": 9.8662386811857e-06,
4937
+ "loss": 22.1966,
4938
+ "step": 7030
4939
+ },
4940
+ {
4941
+ "epoch": 0.8572950757287465,
4942
+ "grad_norm": 309.0,
4943
+ "learning_rate": 9.86604840903945e-06,
4944
+ "loss": 22.0675,
4945
+ "step": 7040
4946
+ },
4947
+ {
4948
+ "epoch": 0.8585128244158612,
4949
+ "grad_norm": 299.5,
4950
+ "learning_rate": 9.8658581368932e-06,
4951
+ "loss": 21.9049,
4952
+ "step": 7050
4953
+ },
4954
+ {
4955
+ "epoch": 0.8597305731029758,
4956
+ "grad_norm": 528.5,
4957
+ "learning_rate": 9.865667864746948e-06,
4958
+ "loss": 22.1892,
4959
+ "step": 7060
4960
+ },
4961
+ {
4962
+ "epoch": 0.8609483217900906,
4963
+ "grad_norm": 473.25,
4964
+ "learning_rate": 9.865477592600697e-06,
4965
+ "loss": 21.9495,
4966
+ "step": 7070
4967
+ },
4968
+ {
4969
+ "epoch": 0.8621660704772053,
4970
+ "grad_norm": 695.5,
4971
+ "learning_rate": 9.865287320454446e-06,
4972
+ "loss": 22.0613,
4973
+ "step": 7080
4974
+ },
4975
+ {
4976
+ "epoch": 0.8633838191643199,
4977
+ "grad_norm": 397.5,
4978
+ "learning_rate": 9.865097048308196e-06,
4979
+ "loss": 22.3193,
4980
+ "step": 7090
4981
+ },
4982
+ {
4983
+ "epoch": 0.8646015678514346,
4984
+ "grad_norm": 304.25,
4985
+ "learning_rate": 9.864906776161945e-06,
4986
+ "loss": 21.9795,
4987
+ "step": 7100
4988
+ },
4989
+ {
4990
+ "epoch": 0.8658193165385494,
4991
+ "grad_norm": 400.25,
4992
+ "learning_rate": 9.864716504015694e-06,
4993
+ "loss": 22.2449,
4994
+ "step": 7110
4995
+ },
4996
+ {
4997
+ "epoch": 0.867037065225664,
4998
+ "grad_norm": 295.25,
4999
+ "learning_rate": 9.864526231869443e-06,
5000
+ "loss": 21.85,
5001
+ "step": 7120
5002
+ },
5003
+ {
5004
+ "epoch": 0.8682548139127787,
5005
+ "grad_norm": 297.25,
5006
+ "learning_rate": 9.864335959723192e-06,
5007
+ "loss": 22.0741,
5008
+ "step": 7130
5009
+ },
5010
+ {
5011
+ "epoch": 0.8694725625998935,
5012
+ "grad_norm": 351.75,
5013
+ "learning_rate": 9.864145687576942e-06,
5014
+ "loss": 22.0668,
5015
+ "step": 7140
5016
+ },
5017
+ {
5018
+ "epoch": 0.8706903112870081,
5019
+ "grad_norm": 484.5,
5020
+ "learning_rate": 9.863955415430691e-06,
5021
+ "loss": 22.237,
5022
+ "step": 7150
5023
+ },
5024
+ {
5025
+ "epoch": 0.8719080599741228,
5026
+ "grad_norm": 696.5,
5027
+ "learning_rate": 9.86376514328444e-06,
5028
+ "loss": 22.2217,
5029
+ "step": 7160
5030
+ },
5031
+ {
5032
+ "epoch": 0.8731258086612376,
5033
+ "grad_norm": 391.75,
5034
+ "learning_rate": 9.863574871138189e-06,
5035
+ "loss": 21.9983,
5036
+ "step": 7170
5037
+ },
5038
+ {
5039
+ "epoch": 0.8743435573483522,
5040
+ "grad_norm": 474.5,
5041
+ "learning_rate": 9.863384598991938e-06,
5042
+ "loss": 22.8282,
5043
+ "step": 7180
5044
+ },
5045
+ {
5046
+ "epoch": 0.8755613060354669,
5047
+ "grad_norm": 328.5,
5048
+ "learning_rate": 9.863194326845689e-06,
5049
+ "loss": 21.9469,
5050
+ "step": 7190
5051
+ },
5052
+ {
5053
+ "epoch": 0.8767790547225817,
5054
+ "grad_norm": 325.5,
5055
+ "learning_rate": 9.863004054699437e-06,
5056
+ "loss": 22.0664,
5057
+ "step": 7200
5058
+ },
5059
+ {
5060
+ "epoch": 0.8779968034096963,
5061
+ "grad_norm": 435.5,
5062
+ "learning_rate": 9.862813782553186e-06,
5063
+ "loss": 22.2138,
5064
+ "step": 7210
5065
+ },
5066
+ {
5067
+ "epoch": 0.879214552096811,
5068
+ "grad_norm": 430.0,
5069
+ "learning_rate": 9.862623510406935e-06,
5070
+ "loss": 21.8652,
5071
+ "step": 7220
5072
+ },
5073
+ {
5074
+ "epoch": 0.8804323007839258,
5075
+ "grad_norm": 485.75,
5076
+ "learning_rate": 9.862433238260684e-06,
5077
+ "loss": 21.9634,
5078
+ "step": 7230
5079
+ },
5080
+ {
5081
+ "epoch": 0.8816500494710404,
5082
+ "grad_norm": 445.75,
5083
+ "learning_rate": 9.862242966114435e-06,
5084
+ "loss": 22.1161,
5085
+ "step": 7240
5086
+ },
5087
+ {
5088
+ "epoch": 0.8828677981581551,
5089
+ "grad_norm": 348.25,
5090
+ "learning_rate": 9.862052693968183e-06,
5091
+ "loss": 22.2584,
5092
+ "step": 7250
5093
+ },
5094
+ {
5095
+ "epoch": 0.8840855468452699,
5096
+ "grad_norm": 433.5,
5097
+ "learning_rate": 9.861862421821932e-06,
5098
+ "loss": 21.846,
5099
+ "step": 7260
5100
+ },
5101
+ {
5102
+ "epoch": 0.8853032955323845,
5103
+ "grad_norm": 709.5,
5104
+ "learning_rate": 9.861672149675681e-06,
5105
+ "loss": 22.3034,
5106
+ "step": 7270
5107
+ },
5108
+ {
5109
+ "epoch": 0.8865210442194992,
5110
+ "grad_norm": 569.5,
5111
+ "learning_rate": 9.86148187752943e-06,
5112
+ "loss": 22.0625,
5113
+ "step": 7280
5114
+ },
5115
+ {
5116
+ "epoch": 0.887738792906614,
5117
+ "grad_norm": 542.0,
5118
+ "learning_rate": 9.86129160538318e-06,
5119
+ "loss": 21.9613,
5120
+ "step": 7290
5121
+ },
5122
+ {
5123
+ "epoch": 0.8889565415937286,
5124
+ "grad_norm": 272.5,
5125
+ "learning_rate": 9.86110133323693e-06,
5126
+ "loss": 21.9754,
5127
+ "step": 7300
5128
+ },
5129
+ {
5130
+ "epoch": 0.8901742902808433,
5131
+ "grad_norm": 316.75,
5132
+ "learning_rate": 9.860911061090678e-06,
5133
+ "loss": 21.8645,
5134
+ "step": 7310
5135
+ },
5136
+ {
5137
+ "epoch": 0.8913920389679579,
5138
+ "grad_norm": 434.5,
5139
+ "learning_rate": 9.860720788944427e-06,
5140
+ "loss": 22.0292,
5141
+ "step": 7320
5142
+ },
5143
+ {
5144
+ "epoch": 0.8926097876550727,
5145
+ "grad_norm": 460.25,
5146
+ "learning_rate": 9.860530516798176e-06,
5147
+ "loss": 22.0469,
5148
+ "step": 7330
5149
+ },
5150
+ {
5151
+ "epoch": 0.8938275363421874,
5152
+ "grad_norm": 469.5,
5153
+ "learning_rate": 9.860340244651927e-06,
5154
+ "loss": 21.835,
5155
+ "step": 7340
5156
+ },
5157
+ {
5158
+ "epoch": 0.895045285029302,
5159
+ "grad_norm": 542.0,
5160
+ "learning_rate": 9.860149972505676e-06,
5161
+ "loss": 22.1604,
5162
+ "step": 7350
5163
+ },
5164
+ {
5165
+ "epoch": 0.8962630337164168,
5166
+ "grad_norm": 300.0,
5167
+ "learning_rate": 9.859959700359424e-06,
5168
+ "loss": 22.2402,
5169
+ "step": 7360
5170
+ },
5171
+ {
5172
+ "epoch": 0.8974807824035315,
5173
+ "grad_norm": 414.25,
5174
+ "learning_rate": 9.859769428213173e-06,
5175
+ "loss": 22.086,
5176
+ "step": 7370
5177
+ },
5178
+ {
5179
+ "epoch": 0.8986985310906461,
5180
+ "grad_norm": 274.0,
5181
+ "learning_rate": 9.859579156066922e-06,
5182
+ "loss": 22.0595,
5183
+ "step": 7380
5184
+ },
5185
+ {
5186
+ "epoch": 0.8999162797777609,
5187
+ "grad_norm": 293.0,
5188
+ "learning_rate": 9.859388883920673e-06,
5189
+ "loss": 22.0757,
5190
+ "step": 7390
5191
+ },
5192
+ {
5193
+ "epoch": 0.9011340284648756,
5194
+ "grad_norm": 295.5,
5195
+ "learning_rate": 9.859198611774422e-06,
5196
+ "loss": 22.2322,
5197
+ "step": 7400
5198
+ },
5199
+ {
5200
+ "epoch": 0.9023517771519902,
5201
+ "grad_norm": 538.0,
5202
+ "learning_rate": 9.85900833962817e-06,
5203
+ "loss": 21.8385,
5204
+ "step": 7410
5205
+ },
5206
+ {
5207
+ "epoch": 0.903569525839105,
5208
+ "grad_norm": 528.5,
5209
+ "learning_rate": 9.85881806748192e-06,
5210
+ "loss": 22.345,
5211
+ "step": 7420
5212
+ },
5213
+ {
5214
+ "epoch": 0.9047872745262197,
5215
+ "grad_norm": 635.0,
5216
+ "learning_rate": 9.858627795335668e-06,
5217
+ "loss": 22.1144,
5218
+ "step": 7430
5219
+ },
5220
+ {
5221
+ "epoch": 0.9060050232133343,
5222
+ "grad_norm": 1307.0,
5223
+ "learning_rate": 9.858437523189419e-06,
5224
+ "loss": 22.1034,
5225
+ "step": 7440
5226
+ },
5227
+ {
5228
+ "epoch": 0.9072227719004491,
5229
+ "grad_norm": 453.0,
5230
+ "learning_rate": 9.858247251043168e-06,
5231
+ "loss": 21.9531,
5232
+ "step": 7450
5233
+ },
5234
+ {
5235
+ "epoch": 0.9084405205875637,
5236
+ "grad_norm": 399.75,
5237
+ "learning_rate": 9.858056978896917e-06,
5238
+ "loss": 21.9347,
5239
+ "step": 7460
5240
+ },
5241
+ {
5242
+ "epoch": 0.9096582692746784,
5243
+ "grad_norm": 390.75,
5244
+ "learning_rate": 9.857866706750665e-06,
5245
+ "loss": 22.138,
5246
+ "step": 7470
5247
+ },
5248
+ {
5249
+ "epoch": 0.9108760179617932,
5250
+ "grad_norm": 300.5,
5251
+ "learning_rate": 9.857676434604416e-06,
5252
+ "loss": 22.3572,
5253
+ "step": 7480
5254
+ },
5255
+ {
5256
+ "epoch": 0.9120937666489078,
5257
+ "grad_norm": 652.5,
5258
+ "learning_rate": 9.857486162458165e-06,
5259
+ "loss": 22.3054,
5260
+ "step": 7490
5261
+ },
5262
+ {
5263
+ "epoch": 0.9133115153360225,
5264
+ "grad_norm": 378.25,
5265
+ "learning_rate": 9.857295890311914e-06,
5266
+ "loss": 21.9117,
5267
+ "step": 7500
5268
+ },
5269
+ {
5270
+ "epoch": 0.9145292640231373,
5271
+ "grad_norm": 347.5,
5272
+ "learning_rate": 9.857105618165663e-06,
5273
+ "loss": 22.1442,
5274
+ "step": 7510
5275
+ },
5276
+ {
5277
+ "epoch": 0.9157470127102519,
5278
+ "grad_norm": 383.75,
5279
+ "learning_rate": 9.856915346019411e-06,
5280
+ "loss": 22.4111,
5281
+ "step": 7520
5282
+ },
5283
+ {
5284
+ "epoch": 0.9169647613973666,
5285
+ "grad_norm": 403.25,
5286
+ "learning_rate": 9.856725073873162e-06,
5287
+ "loss": 21.9787,
5288
+ "step": 7530
5289
+ },
5290
+ {
5291
+ "epoch": 0.9181825100844813,
5292
+ "grad_norm": 457.5,
5293
+ "learning_rate": 9.856534801726911e-06,
5294
+ "loss": 21.8651,
5295
+ "step": 7540
5296
+ },
5297
+ {
5298
+ "epoch": 0.919400258771596,
5299
+ "grad_norm": 518.0,
5300
+ "learning_rate": 9.85634452958066e-06,
5301
+ "loss": 21.9291,
5302
+ "step": 7550
5303
+ },
5304
+ {
5305
+ "epoch": 0.9206180074587107,
5306
+ "grad_norm": 558.5,
5307
+ "learning_rate": 9.856154257434409e-06,
5308
+ "loss": 22.1247,
5309
+ "step": 7560
5310
+ },
5311
+ {
5312
+ "epoch": 0.9218357561458254,
5313
+ "grad_norm": 329.25,
5314
+ "learning_rate": 9.855963985288158e-06,
5315
+ "loss": 22.1231,
5316
+ "step": 7570
5317
+ },
5318
+ {
5319
+ "epoch": 0.9230535048329401,
5320
+ "grad_norm": 593.0,
5321
+ "learning_rate": 9.855773713141908e-06,
5322
+ "loss": 22.0844,
5323
+ "step": 7580
5324
+ },
5325
+ {
5326
+ "epoch": 0.9242712535200548,
5327
+ "grad_norm": 361.5,
5328
+ "learning_rate": 9.855583440995657e-06,
5329
+ "loss": 22.0896,
5330
+ "step": 7590
5331
+ },
5332
+ {
5333
+ "epoch": 0.9254890022071695,
5334
+ "grad_norm": 487.25,
5335
+ "learning_rate": 9.855393168849406e-06,
5336
+ "loss": 22.3015,
5337
+ "step": 7600
5338
+ },
5339
+ {
5340
+ "epoch": 0.9267067508942842,
5341
+ "grad_norm": 247.125,
5342
+ "learning_rate": 9.855202896703155e-06,
5343
+ "loss": 22.0374,
5344
+ "step": 7610
5345
+ },
5346
+ {
5347
+ "epoch": 0.9279244995813989,
5348
+ "grad_norm": 434.5,
5349
+ "learning_rate": 9.855012624556905e-06,
5350
+ "loss": 22.0935,
5351
+ "step": 7620
5352
+ },
5353
+ {
5354
+ "epoch": 0.9291422482685135,
5355
+ "grad_norm": 357.25,
5356
+ "learning_rate": 9.854822352410654e-06,
5357
+ "loss": 22.2531,
5358
+ "step": 7630
5359
+ },
5360
+ {
5361
+ "epoch": 0.9303599969556283,
5362
+ "grad_norm": 382.5,
5363
+ "learning_rate": 9.854632080264403e-06,
5364
+ "loss": 21.9988,
5365
+ "step": 7640
5366
+ },
5367
+ {
5368
+ "epoch": 0.931577745642743,
5369
+ "grad_norm": 518.0,
5370
+ "learning_rate": 9.854441808118152e-06,
5371
+ "loss": 21.9012,
5372
+ "step": 7650
5373
+ },
5374
+ {
5375
+ "epoch": 0.9327954943298576,
5376
+ "grad_norm": 402.75,
5377
+ "learning_rate": 9.8542515359719e-06,
5378
+ "loss": 21.89,
5379
+ "step": 7660
5380
+ },
5381
+ {
5382
+ "epoch": 0.9340132430169724,
5383
+ "grad_norm": 585.0,
5384
+ "learning_rate": 9.854061263825651e-06,
5385
+ "loss": 22.0898,
5386
+ "step": 7670
5387
+ },
5388
+ {
5389
+ "epoch": 0.9352309917040871,
5390
+ "grad_norm": 403.5,
5391
+ "learning_rate": 9.8538709916794e-06,
5392
+ "loss": 22.1882,
5393
+ "step": 7680
5394
+ },
5395
+ {
5396
+ "epoch": 0.9364487403912017,
5397
+ "grad_norm": 278.75,
5398
+ "learning_rate": 9.853680719533149e-06,
5399
+ "loss": 22.0084,
5400
+ "step": 7690
5401
+ },
5402
+ {
5403
+ "epoch": 0.9376664890783165,
5404
+ "grad_norm": 293.75,
5405
+ "learning_rate": 9.853490447386898e-06,
5406
+ "loss": 22.1582,
5407
+ "step": 7700
5408
+ },
5409
+ {
5410
+ "epoch": 0.9388842377654312,
5411
+ "grad_norm": 400.5,
5412
+ "learning_rate": 9.853300175240648e-06,
5413
+ "loss": 21.6699,
5414
+ "step": 7710
5415
+ },
5416
+ {
5417
+ "epoch": 0.9401019864525458,
5418
+ "grad_norm": 300.25,
5419
+ "learning_rate": 9.853109903094397e-06,
5420
+ "loss": 22.1939,
5421
+ "step": 7720
5422
+ },
5423
+ {
5424
+ "epoch": 0.9413197351396606,
5425
+ "grad_norm": 329.25,
5426
+ "learning_rate": 9.852919630948146e-06,
5427
+ "loss": 21.8216,
5428
+ "step": 7730
5429
+ },
5430
+ {
5431
+ "epoch": 0.9425374838267753,
5432
+ "grad_norm": 266.25,
5433
+ "learning_rate": 9.852729358801895e-06,
5434
+ "loss": 22.0178,
5435
+ "step": 7740
5436
+ },
5437
+ {
5438
+ "epoch": 0.9437552325138899,
5439
+ "grad_norm": 325.75,
5440
+ "learning_rate": 9.852539086655644e-06,
5441
+ "loss": 22.0285,
5442
+ "step": 7750
5443
+ },
5444
+ {
5445
+ "epoch": 0.9449729812010046,
5446
+ "grad_norm": 530.5,
5447
+ "learning_rate": 9.852348814509395e-06,
5448
+ "loss": 21.9276,
5449
+ "step": 7760
5450
+ },
5451
+ {
5452
+ "epoch": 0.9461907298881194,
5453
+ "grad_norm": 448.25,
5454
+ "learning_rate": 9.852158542363143e-06,
5455
+ "loss": 21.7508,
5456
+ "step": 7770
5457
+ },
5458
+ {
5459
+ "epoch": 0.947408478575234,
5460
+ "grad_norm": 370.5,
5461
+ "learning_rate": 9.851968270216892e-06,
5462
+ "loss": 22.271,
5463
+ "step": 7780
5464
+ },
5465
+ {
5466
+ "epoch": 0.9486262272623487,
5467
+ "grad_norm": 716.5,
5468
+ "learning_rate": 9.851777998070641e-06,
5469
+ "loss": 21.8806,
5470
+ "step": 7790
5471
+ },
5472
+ {
5473
+ "epoch": 0.9498439759494635,
5474
+ "grad_norm": 549.5,
5475
+ "learning_rate": 9.85158772592439e-06,
5476
+ "loss": 21.8866,
5477
+ "step": 7800
5478
+ },
5479
+ {
5480
+ "epoch": 0.9510617246365781,
5481
+ "grad_norm": 265.75,
5482
+ "learning_rate": 9.85139745377814e-06,
5483
+ "loss": 21.8491,
5484
+ "step": 7810
5485
+ },
5486
+ {
5487
+ "epoch": 0.9522794733236928,
5488
+ "grad_norm": 544.0,
5489
+ "learning_rate": 9.85120718163189e-06,
5490
+ "loss": 21.9407,
5491
+ "step": 7820
5492
+ },
5493
+ {
5494
+ "epoch": 0.9534972220108076,
5495
+ "grad_norm": 254.5,
5496
+ "learning_rate": 9.851016909485638e-06,
5497
+ "loss": 22.0854,
5498
+ "step": 7830
5499
+ },
5500
+ {
5501
+ "epoch": 0.9547149706979222,
5502
+ "grad_norm": 321.0,
5503
+ "learning_rate": 9.850826637339387e-06,
5504
+ "loss": 22.0765,
5505
+ "step": 7840
5506
+ },
5507
+ {
5508
+ "epoch": 0.9559327193850369,
5509
+ "grad_norm": 363.75,
5510
+ "learning_rate": 9.850636365193138e-06,
5511
+ "loss": 22.0777,
5512
+ "step": 7850
5513
+ },
5514
+ {
5515
+ "epoch": 0.9571504680721517,
5516
+ "grad_norm": 383.0,
5517
+ "learning_rate": 9.850446093046887e-06,
5518
+ "loss": 22.0465,
5519
+ "step": 7860
5520
+ },
5521
+ {
5522
+ "epoch": 0.9583682167592663,
5523
+ "grad_norm": 314.25,
5524
+ "learning_rate": 9.850255820900636e-06,
5525
+ "loss": 21.9334,
5526
+ "step": 7870
5527
+ },
5528
+ {
5529
+ "epoch": 0.959585965446381,
5530
+ "grad_norm": 440.0,
5531
+ "learning_rate": 9.850065548754384e-06,
5532
+ "loss": 22.0787,
5533
+ "step": 7880
5534
+ },
5535
+ {
5536
+ "epoch": 0.9608037141334957,
5537
+ "grad_norm": 478.5,
5538
+ "learning_rate": 9.849875276608133e-06,
5539
+ "loss": 22.4122,
5540
+ "step": 7890
5541
+ },
5542
+ {
5543
+ "epoch": 0.9620214628206104,
5544
+ "grad_norm": 385.25,
5545
+ "learning_rate": 9.849685004461884e-06,
5546
+ "loss": 22.0855,
5547
+ "step": 7900
5548
+ },
5549
+ {
5550
+ "epoch": 0.9632392115077251,
5551
+ "grad_norm": 389.75,
5552
+ "learning_rate": 9.849494732315633e-06,
5553
+ "loss": 22.1411,
5554
+ "step": 7910
5555
+ },
5556
+ {
5557
+ "epoch": 0.9644569601948398,
5558
+ "grad_norm": 342.5,
5559
+ "learning_rate": 9.849304460169382e-06,
5560
+ "loss": 22.098,
5561
+ "step": 7920
5562
+ },
5563
+ {
5564
+ "epoch": 0.9656747088819545,
5565
+ "grad_norm": 266.75,
5566
+ "learning_rate": 9.84911418802313e-06,
5567
+ "loss": 22.1849,
5568
+ "step": 7930
5569
+ },
5570
+ {
5571
+ "epoch": 0.9668924575690692,
5572
+ "grad_norm": 556.5,
5573
+ "learning_rate": 9.84892391587688e-06,
5574
+ "loss": 22.1534,
5575
+ "step": 7940
5576
+ },
5577
+ {
5578
+ "epoch": 0.9681102062561838,
5579
+ "grad_norm": 413.75,
5580
+ "learning_rate": 9.84873364373063e-06,
5581
+ "loss": 21.9893,
5582
+ "step": 7950
5583
+ },
5584
+ {
5585
+ "epoch": 0.9693279549432986,
5586
+ "grad_norm": 403.75,
5587
+ "learning_rate": 9.848543371584379e-06,
5588
+ "loss": 22.0232,
5589
+ "step": 7960
5590
+ },
5591
+ {
5592
+ "epoch": 0.9705457036304133,
5593
+ "grad_norm": 713.0,
5594
+ "learning_rate": 9.848353099438128e-06,
5595
+ "loss": 22.1614,
5596
+ "step": 7970
5597
+ },
5598
+ {
5599
+ "epoch": 0.9717634523175279,
5600
+ "grad_norm": 349.5,
5601
+ "learning_rate": 9.848162827291876e-06,
5602
+ "loss": 21.9629,
5603
+ "step": 7980
5604
+ },
5605
+ {
5606
+ "epoch": 0.9729812010046427,
5607
+ "grad_norm": 450.25,
5608
+ "learning_rate": 9.847972555145625e-06,
5609
+ "loss": 21.956,
5610
+ "step": 7990
5611
+ },
5612
+ {
5613
+ "epoch": 0.9741989496917574,
5614
+ "grad_norm": 343.75,
5615
+ "learning_rate": 9.847782282999376e-06,
5616
+ "loss": 21.8555,
5617
+ "step": 8000
5618
+ },
5619
+ {
5620
+ "epoch": 0.975416698378872,
5621
+ "grad_norm": 528.5,
5622
+ "learning_rate": 9.847592010853125e-06,
5623
+ "loss": 21.9875,
5624
+ "step": 8010
5625
+ },
5626
+ {
5627
+ "epoch": 0.9766344470659868,
5628
+ "grad_norm": 587.5,
5629
+ "learning_rate": 9.847401738706874e-06,
5630
+ "loss": 21.8694,
5631
+ "step": 8020
5632
+ },
5633
+ {
5634
+ "epoch": 0.9778521957531014,
5635
+ "grad_norm": 807.5,
5636
+ "learning_rate": 9.847211466560623e-06,
5637
+ "loss": 21.7027,
5638
+ "step": 8030
5639
+ },
5640
+ {
5641
+ "epoch": 0.9790699444402161,
5642
+ "grad_norm": 629.0,
5643
+ "learning_rate": 9.847021194414371e-06,
5644
+ "loss": 21.8312,
5645
+ "step": 8040
5646
+ },
5647
+ {
5648
+ "epoch": 0.9802876931273309,
5649
+ "grad_norm": 564.5,
5650
+ "learning_rate": 9.846830922268122e-06,
5651
+ "loss": 22.0643,
5652
+ "step": 8050
5653
+ },
5654
+ {
5655
+ "epoch": 0.9815054418144455,
5656
+ "grad_norm": 439.0,
5657
+ "learning_rate": 9.84664065012187e-06,
5658
+ "loss": 21.993,
5659
+ "step": 8060
5660
+ },
5661
+ {
5662
+ "epoch": 0.9827231905015602,
5663
+ "grad_norm": 380.0,
5664
+ "learning_rate": 9.84645037797562e-06,
5665
+ "loss": 21.8951,
5666
+ "step": 8070
5667
+ },
5668
+ {
5669
+ "epoch": 0.983940939188675,
5670
+ "grad_norm": 828.0,
5671
+ "learning_rate": 9.846260105829369e-06,
5672
+ "loss": 21.9939,
5673
+ "step": 8080
5674
+ },
5675
+ {
5676
+ "epoch": 0.9851586878757896,
5677
+ "grad_norm": 306.75,
5678
+ "learning_rate": 9.846069833683117e-06,
5679
+ "loss": 22.3458,
5680
+ "step": 8090
5681
+ },
5682
+ {
5683
+ "epoch": 0.9863764365629043,
5684
+ "grad_norm": 384.0,
5685
+ "learning_rate": 9.845879561536868e-06,
5686
+ "loss": 21.869,
5687
+ "step": 8100
5688
+ },
5689
+ {
5690
+ "epoch": 0.9875941852500191,
5691
+ "grad_norm": 377.75,
5692
+ "learning_rate": 9.845689289390617e-06,
5693
+ "loss": 22.1,
5694
+ "step": 8110
5695
+ },
5696
+ {
5697
+ "epoch": 0.9888119339371337,
5698
+ "grad_norm": 531.0,
5699
+ "learning_rate": 9.845499017244366e-06,
5700
+ "loss": 22.2576,
5701
+ "step": 8120
5702
+ },
5703
+ {
5704
+ "epoch": 0.9900296826242484,
5705
+ "grad_norm": 531.5,
5706
+ "learning_rate": 9.845308745098115e-06,
5707
+ "loss": 22.0621,
5708
+ "step": 8130
5709
+ },
5710
+ {
5711
+ "epoch": 0.9912474313113632,
5712
+ "grad_norm": 466.25,
5713
+ "learning_rate": 9.845118472951864e-06,
5714
+ "loss": 21.9393,
5715
+ "step": 8140
5716
+ },
5717
+ {
5718
+ "epoch": 0.9924651799984778,
5719
+ "grad_norm": 320.5,
5720
+ "learning_rate": 9.844928200805614e-06,
5721
+ "loss": 21.6988,
5722
+ "step": 8150
5723
+ },
5724
+ {
5725
+ "epoch": 0.9936829286855925,
5726
+ "grad_norm": 485.0,
5727
+ "learning_rate": 9.844737928659363e-06,
5728
+ "loss": 21.8836,
5729
+ "step": 8160
5730
+ },
5731
+ {
5732
+ "epoch": 0.9949006773727072,
5733
+ "grad_norm": 595.0,
5734
+ "learning_rate": 9.844547656513112e-06,
5735
+ "loss": 21.7679,
5736
+ "step": 8170
5737
+ },
5738
+ {
5739
+ "epoch": 0.9961184260598219,
5740
+ "grad_norm": 466.0,
5741
+ "learning_rate": 9.84435738436686e-06,
5742
+ "loss": 22.1523,
5743
+ "step": 8180
5744
+ },
5745
+ {
5746
+ "epoch": 0.9973361747469366,
5747
+ "grad_norm": 303.5,
5748
+ "learning_rate": 9.84416711222061e-06,
5749
+ "loss": 21.9741,
5750
+ "step": 8190
5751
+ },
5752
+ {
5753
+ "epoch": 0.9985539234340512,
5754
+ "grad_norm": 300.5,
5755
+ "learning_rate": 9.84397684007436e-06,
5756
+ "loss": 21.9414,
5757
+ "step": 8200
5758
+ },
5759
+ {
5760
+ "epoch": 0.999771672121166,
5761
+ "grad_norm": 333.75,
5762
+ "learning_rate": 9.843786567928109e-06,
5763
+ "loss": 21.7997,
5764
+ "step": 8210
5765
  }
5766
  ],
5767
  "logging_steps": 10,
 
5776
  "should_evaluate": false,
5777
  "should_log": false,
5778
  "should_save": true,
5779
+ "should_training_stop": true
5780
  },
5781
  "attributes": {}
5782
  }
5783
  },
5784
+ "total_flos": 2.8664974749173023e+18,
5785
  "train_batch_size": 16,
5786
  "trial_name": null,
5787
  "trial_params": null