lucascruz commited on
Commit
58f7a2f
·
1 Parent(s): f1a4d92

Training in progress, step 3270, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d9771df9793c16ba16696b8f3b9c0e905d4e1b702cb6562230c1a2e5e35485b
3
  size 686648325
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:919958d2a08dda2fc5b6febacaefa98ca6f2e9720593b56c826cf30453121f1e
3
  size 686648325
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23faefff5cb399f167cebc88d8c34e3020171376181940c5ec6ab4d8dc3d003e
3
  size 343308717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d713634ca5cceb391d747c573527f2a2e8cb873435a0cc082d237b58d7f4a92
3
  size 343308717
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b7a16101b0ef7fe0ef38bd8ea213d6ec0bc7ac93095829f8c98671fd4fe5c15
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:421d5b4ce1e9cea720bfab05ecb240212268cc41cf974e33e3789a6040d6daaa
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.497180097756611,
5
  "eval_steps": 500,
6
- "global_step": 3052,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18352,13 +18352,1321 @@
18352
  "learning_rate": 0.00015571428571428572,
18353
  "loss": 0.5721,
18354
  "step": 3052
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18355
  }
18356
  ],
18357
  "logging_steps": 1,
18358
  "max_steps": 4360,
18359
  "num_train_epochs": 5,
18360
  "save_steps": 218,
18361
- "total_flos": 6.0552475832752275e+19,
18362
  "trial_name": null,
18363
  "trial_params": null
18364
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.7469786761677977,
5
  "eval_steps": 500,
6
+ "global_step": 3270,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18352
  "learning_rate": 0.00015571428571428572,
18353
  "loss": 0.5721,
18354
  "step": 3052
18355
+ },
18356
+ {
18357
+ "epoch": 3.5,
18358
+ "learning_rate": 0.0001555952380952381,
18359
+ "loss": 0.5651,
18360
+ "step": 3053
18361
+ },
18362
+ {
18363
+ "epoch": 3.5,
18364
+ "learning_rate": 0.0001554761904761905,
18365
+ "loss": 0.5654,
18366
+ "step": 3054
18367
+ },
18368
+ {
18369
+ "epoch": 3.5,
18370
+ "learning_rate": 0.00015535714285714287,
18371
+ "loss": 0.5722,
18372
+ "step": 3055
18373
+ },
18374
+ {
18375
+ "epoch": 3.5,
18376
+ "learning_rate": 0.00015523809523809525,
18377
+ "loss": 0.5758,
18378
+ "step": 3056
18379
+ },
18380
+ {
18381
+ "epoch": 3.5,
18382
+ "learning_rate": 0.00015511904761904762,
18383
+ "loss": 0.6001,
18384
+ "step": 3057
18385
+ },
18386
+ {
18387
+ "epoch": 3.5,
18388
+ "learning_rate": 0.000155,
18389
+ "loss": 0.5672,
18390
+ "step": 3058
18391
+ },
18392
+ {
18393
+ "epoch": 3.51,
18394
+ "learning_rate": 0.0001548809523809524,
18395
+ "loss": 0.6342,
18396
+ "step": 3059
18397
+ },
18398
+ {
18399
+ "epoch": 3.51,
18400
+ "learning_rate": 0.00015476190476190478,
18401
+ "loss": 0.5929,
18402
+ "step": 3060
18403
+ },
18404
+ {
18405
+ "epoch": 3.51,
18406
+ "learning_rate": 0.00015464285714285715,
18407
+ "loss": 0.5297,
18408
+ "step": 3061
18409
+ },
18410
+ {
18411
+ "epoch": 3.51,
18412
+ "learning_rate": 0.00015452380952380953,
18413
+ "loss": 0.5476,
18414
+ "step": 3062
18415
+ },
18416
+ {
18417
+ "epoch": 3.51,
18418
+ "learning_rate": 0.00015440476190476193,
18419
+ "loss": 0.5195,
18420
+ "step": 3063
18421
+ },
18422
+ {
18423
+ "epoch": 3.51,
18424
+ "learning_rate": 0.0001542857142857143,
18425
+ "loss": 0.5523,
18426
+ "step": 3064
18427
+ },
18428
+ {
18429
+ "epoch": 3.51,
18430
+ "learning_rate": 0.00015416666666666668,
18431
+ "loss": 0.5988,
18432
+ "step": 3065
18433
+ },
18434
+ {
18435
+ "epoch": 3.51,
18436
+ "learning_rate": 0.00015404761904761906,
18437
+ "loss": 0.5551,
18438
+ "step": 3066
18439
+ },
18440
+ {
18441
+ "epoch": 3.51,
18442
+ "learning_rate": 0.00015392857142857143,
18443
+ "loss": 0.5389,
18444
+ "step": 3067
18445
+ },
18446
+ {
18447
+ "epoch": 3.52,
18448
+ "learning_rate": 0.00015380952380952384,
18449
+ "loss": 0.5201,
18450
+ "step": 3068
18451
+ },
18452
+ {
18453
+ "epoch": 3.52,
18454
+ "learning_rate": 0.0001536904761904762,
18455
+ "loss": 0.5886,
18456
+ "step": 3069
18457
+ },
18458
+ {
18459
+ "epoch": 3.52,
18460
+ "learning_rate": 0.0001535714285714286,
18461
+ "loss": 0.554,
18462
+ "step": 3070
18463
+ },
18464
+ {
18465
+ "epoch": 3.52,
18466
+ "learning_rate": 0.00015345238095238096,
18467
+ "loss": 0.5796,
18468
+ "step": 3071
18469
+ },
18470
+ {
18471
+ "epoch": 3.52,
18472
+ "learning_rate": 0.00015333333333333334,
18473
+ "loss": 0.5978,
18474
+ "step": 3072
18475
+ },
18476
+ {
18477
+ "epoch": 3.52,
18478
+ "learning_rate": 0.00015321428571428574,
18479
+ "loss": 0.5738,
18480
+ "step": 3073
18481
+ },
18482
+ {
18483
+ "epoch": 3.52,
18484
+ "learning_rate": 0.0001530952380952381,
18485
+ "loss": 0.511,
18486
+ "step": 3074
18487
+ },
18488
+ {
18489
+ "epoch": 3.52,
18490
+ "learning_rate": 0.00015297619047619046,
18491
+ "loss": 0.5642,
18492
+ "step": 3075
18493
+ },
18494
+ {
18495
+ "epoch": 3.52,
18496
+ "learning_rate": 0.00015285714285714287,
18497
+ "loss": 0.5516,
18498
+ "step": 3076
18499
+ },
18500
+ {
18501
+ "epoch": 3.53,
18502
+ "learning_rate": 0.00015273809523809524,
18503
+ "loss": 0.5769,
18504
+ "step": 3077
18505
+ },
18506
+ {
18507
+ "epoch": 3.53,
18508
+ "learning_rate": 0.00015261904761904762,
18509
+ "loss": 0.5575,
18510
+ "step": 3078
18511
+ },
18512
+ {
18513
+ "epoch": 3.53,
18514
+ "learning_rate": 0.0001525,
18515
+ "loss": 0.5616,
18516
+ "step": 3079
18517
+ },
18518
+ {
18519
+ "epoch": 3.53,
18520
+ "learning_rate": 0.00015238095238095237,
18521
+ "loss": 0.6042,
18522
+ "step": 3080
18523
+ },
18524
+ {
18525
+ "epoch": 3.53,
18526
+ "learning_rate": 0.00015226190476190477,
18527
+ "loss": 0.503,
18528
+ "step": 3081
18529
+ },
18530
+ {
18531
+ "epoch": 3.53,
18532
+ "learning_rate": 0.00015214285714285715,
18533
+ "loss": 0.5374,
18534
+ "step": 3082
18535
+ },
18536
+ {
18537
+ "epoch": 3.53,
18538
+ "learning_rate": 0.00015202380952380952,
18539
+ "loss": 0.5583,
18540
+ "step": 3083
18541
+ },
18542
+ {
18543
+ "epoch": 3.53,
18544
+ "learning_rate": 0.0001519047619047619,
18545
+ "loss": 0.5623,
18546
+ "step": 3084
18547
+ },
18548
+ {
18549
+ "epoch": 3.53,
18550
+ "learning_rate": 0.00015178571428571427,
18551
+ "loss": 0.5619,
18552
+ "step": 3085
18553
+ },
18554
+ {
18555
+ "epoch": 3.54,
18556
+ "learning_rate": 0.00015166666666666668,
18557
+ "loss": 0.5788,
18558
+ "step": 3086
18559
+ },
18560
+ {
18561
+ "epoch": 3.54,
18562
+ "learning_rate": 0.00015154761904761905,
18563
+ "loss": 0.5563,
18564
+ "step": 3087
18565
+ },
18566
+ {
18567
+ "epoch": 3.54,
18568
+ "learning_rate": 0.00015142857142857143,
18569
+ "loss": 0.5518,
18570
+ "step": 3088
18571
+ },
18572
+ {
18573
+ "epoch": 3.54,
18574
+ "learning_rate": 0.0001513095238095238,
18575
+ "loss": 0.5497,
18576
+ "step": 3089
18577
+ },
18578
+ {
18579
+ "epoch": 3.54,
18580
+ "learning_rate": 0.0001511904761904762,
18581
+ "loss": 0.5448,
18582
+ "step": 3090
18583
+ },
18584
+ {
18585
+ "epoch": 3.54,
18586
+ "learning_rate": 0.00015107142857142858,
18587
+ "loss": 0.5716,
18588
+ "step": 3091
18589
+ },
18590
+ {
18591
+ "epoch": 3.54,
18592
+ "learning_rate": 0.00015095238095238096,
18593
+ "loss": 0.5372,
18594
+ "step": 3092
18595
+ },
18596
+ {
18597
+ "epoch": 3.54,
18598
+ "learning_rate": 0.00015083333333333333,
18599
+ "loss": 0.5876,
18600
+ "step": 3093
18601
+ },
18602
+ {
18603
+ "epoch": 3.55,
18604
+ "learning_rate": 0.0001507142857142857,
18605
+ "loss": 0.5861,
18606
+ "step": 3094
18607
+ },
18608
+ {
18609
+ "epoch": 3.55,
18610
+ "learning_rate": 0.0001505952380952381,
18611
+ "loss": 0.565,
18612
+ "step": 3095
18613
+ },
18614
+ {
18615
+ "epoch": 3.55,
18616
+ "learning_rate": 0.00015047619047619048,
18617
+ "loss": 0.5765,
18618
+ "step": 3096
18619
+ },
18620
+ {
18621
+ "epoch": 3.55,
18622
+ "learning_rate": 0.00015035714285714286,
18623
+ "loss": 0.5464,
18624
+ "step": 3097
18625
+ },
18626
+ {
18627
+ "epoch": 3.55,
18628
+ "learning_rate": 0.00015023809523809524,
18629
+ "loss": 0.5415,
18630
+ "step": 3098
18631
+ },
18632
+ {
18633
+ "epoch": 3.55,
18634
+ "learning_rate": 0.00015011904761904764,
18635
+ "loss": 0.5242,
18636
+ "step": 3099
18637
+ },
18638
+ {
18639
+ "epoch": 3.55,
18640
+ "learning_rate": 0.00015000000000000001,
18641
+ "loss": 0.5616,
18642
+ "step": 3100
18643
+ },
18644
+ {
18645
+ "epoch": 3.55,
18646
+ "learning_rate": 0.0001498809523809524,
18647
+ "loss": 0.5377,
18648
+ "step": 3101
18649
+ },
18650
+ {
18651
+ "epoch": 3.55,
18652
+ "learning_rate": 0.00014976190476190476,
18653
+ "loss": 0.5817,
18654
+ "step": 3102
18655
+ },
18656
+ {
18657
+ "epoch": 3.56,
18658
+ "learning_rate": 0.00014964285714285714,
18659
+ "loss": 0.5891,
18660
+ "step": 3103
18661
+ },
18662
+ {
18663
+ "epoch": 3.56,
18664
+ "learning_rate": 0.00014952380952380954,
18665
+ "loss": 0.574,
18666
+ "step": 3104
18667
+ },
18668
+ {
18669
+ "epoch": 3.56,
18670
+ "learning_rate": 0.00014940476190476192,
18671
+ "loss": 0.5928,
18672
+ "step": 3105
18673
+ },
18674
+ {
18675
+ "epoch": 3.56,
18676
+ "learning_rate": 0.0001492857142857143,
18677
+ "loss": 0.566,
18678
+ "step": 3106
18679
+ },
18680
+ {
18681
+ "epoch": 3.56,
18682
+ "learning_rate": 0.00014916666666666667,
18683
+ "loss": 0.575,
18684
+ "step": 3107
18685
+ },
18686
+ {
18687
+ "epoch": 3.56,
18688
+ "learning_rate": 0.00014904761904761904,
18689
+ "loss": 0.5458,
18690
+ "step": 3108
18691
+ },
18692
+ {
18693
+ "epoch": 3.56,
18694
+ "learning_rate": 0.00014892857142857145,
18695
+ "loss": 0.5612,
18696
+ "step": 3109
18697
+ },
18698
+ {
18699
+ "epoch": 3.56,
18700
+ "learning_rate": 0.00014880952380952382,
18701
+ "loss": 0.5708,
18702
+ "step": 3110
18703
+ },
18704
+ {
18705
+ "epoch": 3.56,
18706
+ "learning_rate": 0.0001486904761904762,
18707
+ "loss": 0.5601,
18708
+ "step": 3111
18709
+ },
18710
+ {
18711
+ "epoch": 3.57,
18712
+ "learning_rate": 0.00014857142857142857,
18713
+ "loss": 0.5816,
18714
+ "step": 3112
18715
+ },
18716
+ {
18717
+ "epoch": 3.57,
18718
+ "learning_rate": 0.00014845238095238098,
18719
+ "loss": 0.5785,
18720
+ "step": 3113
18721
+ },
18722
+ {
18723
+ "epoch": 3.57,
18724
+ "learning_rate": 0.00014833333333333335,
18725
+ "loss": 0.5517,
18726
+ "step": 3114
18727
+ },
18728
+ {
18729
+ "epoch": 3.57,
18730
+ "learning_rate": 0.00014821428571428573,
18731
+ "loss": 0.5566,
18732
+ "step": 3115
18733
+ },
18734
+ {
18735
+ "epoch": 3.57,
18736
+ "learning_rate": 0.0001480952380952381,
18737
+ "loss": 0.5071,
18738
+ "step": 3116
18739
+ },
18740
+ {
18741
+ "epoch": 3.57,
18742
+ "learning_rate": 0.00014797619047619048,
18743
+ "loss": 0.5581,
18744
+ "step": 3117
18745
+ },
18746
+ {
18747
+ "epoch": 3.57,
18748
+ "learning_rate": 0.00014785714285714288,
18749
+ "loss": 0.4908,
18750
+ "step": 3118
18751
+ },
18752
+ {
18753
+ "epoch": 3.57,
18754
+ "learning_rate": 0.00014773809523809526,
18755
+ "loss": 0.5595,
18756
+ "step": 3119
18757
+ },
18758
+ {
18759
+ "epoch": 3.58,
18760
+ "learning_rate": 0.00014761904761904763,
18761
+ "loss": 0.5943,
18762
+ "step": 3120
18763
+ },
18764
+ {
18765
+ "epoch": 3.58,
18766
+ "learning_rate": 0.0001475,
18767
+ "loss": 0.5505,
18768
+ "step": 3121
18769
+ },
18770
+ {
18771
+ "epoch": 3.58,
18772
+ "learning_rate": 0.0001473809523809524,
18773
+ "loss": 0.5632,
18774
+ "step": 3122
18775
+ },
18776
+ {
18777
+ "epoch": 3.58,
18778
+ "learning_rate": 0.00014726190476190479,
18779
+ "loss": 0.5612,
18780
+ "step": 3123
18781
+ },
18782
+ {
18783
+ "epoch": 3.58,
18784
+ "learning_rate": 0.00014714285714285716,
18785
+ "loss": 0.5699,
18786
+ "step": 3124
18787
+ },
18788
+ {
18789
+ "epoch": 3.58,
18790
+ "learning_rate": 0.00014702380952380954,
18791
+ "loss": 0.5397,
18792
+ "step": 3125
18793
+ },
18794
+ {
18795
+ "epoch": 3.58,
18796
+ "learning_rate": 0.0001469047619047619,
18797
+ "loss": 0.5653,
18798
+ "step": 3126
18799
+ },
18800
+ {
18801
+ "epoch": 3.58,
18802
+ "learning_rate": 0.0001467857142857143,
18803
+ "loss": 0.5666,
18804
+ "step": 3127
18805
+ },
18806
+ {
18807
+ "epoch": 3.58,
18808
+ "learning_rate": 0.00014666666666666666,
18809
+ "loss": 0.563,
18810
+ "step": 3128
18811
+ },
18812
+ {
18813
+ "epoch": 3.59,
18814
+ "learning_rate": 0.00014654761904761904,
18815
+ "loss": 0.5528,
18816
+ "step": 3129
18817
+ },
18818
+ {
18819
+ "epoch": 3.59,
18820
+ "learning_rate": 0.00014642857142857141,
18821
+ "loss": 0.5511,
18822
+ "step": 3130
18823
+ },
18824
+ {
18825
+ "epoch": 3.59,
18826
+ "learning_rate": 0.00014630952380952382,
18827
+ "loss": 0.5667,
18828
+ "step": 3131
18829
+ },
18830
+ {
18831
+ "epoch": 3.59,
18832
+ "learning_rate": 0.0001461904761904762,
18833
+ "loss": 0.4794,
18834
+ "step": 3132
18835
+ },
18836
+ {
18837
+ "epoch": 3.59,
18838
+ "learning_rate": 0.00014607142857142857,
18839
+ "loss": 0.5021,
18840
+ "step": 3133
18841
+ },
18842
+ {
18843
+ "epoch": 3.59,
18844
+ "learning_rate": 0.00014595238095238094,
18845
+ "loss": 0.5656,
18846
+ "step": 3134
18847
+ },
18848
+ {
18849
+ "epoch": 3.59,
18850
+ "learning_rate": 0.00014583333333333335,
18851
+ "loss": 0.6263,
18852
+ "step": 3135
18853
+ },
18854
+ {
18855
+ "epoch": 3.59,
18856
+ "learning_rate": 0.00014571428571428572,
18857
+ "loss": 0.5461,
18858
+ "step": 3136
18859
+ },
18860
+ {
18861
+ "epoch": 3.59,
18862
+ "learning_rate": 0.0001455952380952381,
18863
+ "loss": 0.555,
18864
+ "step": 3137
18865
+ },
18866
+ {
18867
+ "epoch": 3.6,
18868
+ "learning_rate": 0.00014547619047619047,
18869
+ "loss": 0.5897,
18870
+ "step": 3138
18871
+ },
18872
+ {
18873
+ "epoch": 3.6,
18874
+ "learning_rate": 0.00014535714285714285,
18875
+ "loss": 0.5382,
18876
+ "step": 3139
18877
+ },
18878
+ {
18879
+ "epoch": 3.6,
18880
+ "learning_rate": 0.00014523809523809525,
18881
+ "loss": 0.5902,
18882
+ "step": 3140
18883
+ },
18884
+ {
18885
+ "epoch": 3.6,
18886
+ "learning_rate": 0.00014511904761904763,
18887
+ "loss": 0.5703,
18888
+ "step": 3141
18889
+ },
18890
+ {
18891
+ "epoch": 3.6,
18892
+ "learning_rate": 0.000145,
18893
+ "loss": 0.5615,
18894
+ "step": 3142
18895
+ },
18896
+ {
18897
+ "epoch": 3.6,
18898
+ "learning_rate": 0.00014488095238095238,
18899
+ "loss": 0.5837,
18900
+ "step": 3143
18901
+ },
18902
+ {
18903
+ "epoch": 3.6,
18904
+ "learning_rate": 0.00014476190476190475,
18905
+ "loss": 0.5589,
18906
+ "step": 3144
18907
+ },
18908
+ {
18909
+ "epoch": 3.6,
18910
+ "learning_rate": 0.00014464285714285715,
18911
+ "loss": 0.5947,
18912
+ "step": 3145
18913
+ },
18914
+ {
18915
+ "epoch": 3.6,
18916
+ "learning_rate": 0.00014452380952380953,
18917
+ "loss": 0.579,
18918
+ "step": 3146
18919
+ },
18920
+ {
18921
+ "epoch": 3.61,
18922
+ "learning_rate": 0.0001444047619047619,
18923
+ "loss": 0.5669,
18924
+ "step": 3147
18925
+ },
18926
+ {
18927
+ "epoch": 3.61,
18928
+ "learning_rate": 0.00014428571428571428,
18929
+ "loss": 0.5375,
18930
+ "step": 3148
18931
+ },
18932
+ {
18933
+ "epoch": 3.61,
18934
+ "learning_rate": 0.00014416666666666668,
18935
+ "loss": 0.5751,
18936
+ "step": 3149
18937
+ },
18938
+ {
18939
+ "epoch": 3.61,
18940
+ "learning_rate": 0.00014404761904761906,
18941
+ "loss": 0.5643,
18942
+ "step": 3150
18943
+ },
18944
+ {
18945
+ "epoch": 3.61,
18946
+ "learning_rate": 0.00014392857142857143,
18947
+ "loss": 0.5664,
18948
+ "step": 3151
18949
+ },
18950
+ {
18951
+ "epoch": 3.61,
18952
+ "learning_rate": 0.0001438095238095238,
18953
+ "loss": 0.5694,
18954
+ "step": 3152
18955
+ },
18956
+ {
18957
+ "epoch": 3.61,
18958
+ "learning_rate": 0.00014369047619047619,
18959
+ "loss": 0.5647,
18960
+ "step": 3153
18961
+ },
18962
+ {
18963
+ "epoch": 3.61,
18964
+ "learning_rate": 0.0001435714285714286,
18965
+ "loss": 0.541,
18966
+ "step": 3154
18967
+ },
18968
+ {
18969
+ "epoch": 3.62,
18970
+ "learning_rate": 0.00014345238095238096,
18971
+ "loss": 0.5289,
18972
+ "step": 3155
18973
+ },
18974
+ {
18975
+ "epoch": 3.62,
18976
+ "learning_rate": 0.00014333333333333334,
18977
+ "loss": 0.5777,
18978
+ "step": 3156
18979
+ },
18980
+ {
18981
+ "epoch": 3.62,
18982
+ "learning_rate": 0.00014321428571428571,
18983
+ "loss": 0.5671,
18984
+ "step": 3157
18985
+ },
18986
+ {
18987
+ "epoch": 3.62,
18988
+ "learning_rate": 0.00014309523809523812,
18989
+ "loss": 0.5347,
18990
+ "step": 3158
18991
+ },
18992
+ {
18993
+ "epoch": 3.62,
18994
+ "learning_rate": 0.0001429761904761905,
18995
+ "loss": 0.6037,
18996
+ "step": 3159
18997
+ },
18998
+ {
18999
+ "epoch": 3.62,
19000
+ "learning_rate": 0.00014285714285714287,
19001
+ "loss": 0.5622,
19002
+ "step": 3160
19003
+ },
19004
+ {
19005
+ "epoch": 3.62,
19006
+ "learning_rate": 0.00014273809523809524,
19007
+ "loss": 0.5508,
19008
+ "step": 3161
19009
+ },
19010
+ {
19011
+ "epoch": 3.62,
19012
+ "learning_rate": 0.00014261904761904762,
19013
+ "loss": 0.5439,
19014
+ "step": 3162
19015
+ },
19016
+ {
19017
+ "epoch": 3.62,
19018
+ "learning_rate": 0.00014250000000000002,
19019
+ "loss": 0.5572,
19020
+ "step": 3163
19021
+ },
19022
+ {
19023
+ "epoch": 3.63,
19024
+ "learning_rate": 0.0001423809523809524,
19025
+ "loss": 0.5478,
19026
+ "step": 3164
19027
+ },
19028
+ {
19029
+ "epoch": 3.63,
19030
+ "learning_rate": 0.00014226190476190477,
19031
+ "loss": 0.5748,
19032
+ "step": 3165
19033
+ },
19034
+ {
19035
+ "epoch": 3.63,
19036
+ "learning_rate": 0.00014214285714285715,
19037
+ "loss": 0.5241,
19038
+ "step": 3166
19039
+ },
19040
+ {
19041
+ "epoch": 3.63,
19042
+ "learning_rate": 0.00014202380952380952,
19043
+ "loss": 0.5662,
19044
+ "step": 3167
19045
+ },
19046
+ {
19047
+ "epoch": 3.63,
19048
+ "learning_rate": 0.00014190476190476193,
19049
+ "loss": 0.568,
19050
+ "step": 3168
19051
+ },
19052
+ {
19053
+ "epoch": 3.63,
19054
+ "learning_rate": 0.0001417857142857143,
19055
+ "loss": 0.5808,
19056
+ "step": 3169
19057
+ },
19058
+ {
19059
+ "epoch": 3.63,
19060
+ "learning_rate": 0.00014166666666666668,
19061
+ "loss": 0.5736,
19062
+ "step": 3170
19063
+ },
19064
+ {
19065
+ "epoch": 3.63,
19066
+ "learning_rate": 0.00014154761904761905,
19067
+ "loss": 0.5496,
19068
+ "step": 3171
19069
+ },
19070
+ {
19071
+ "epoch": 3.63,
19072
+ "learning_rate": 0.00014142857142857145,
19073
+ "loss": 0.5584,
19074
+ "step": 3172
19075
+ },
19076
+ {
19077
+ "epoch": 3.64,
19078
+ "learning_rate": 0.00014130952380952383,
19079
+ "loss": 0.5607,
19080
+ "step": 3173
19081
+ },
19082
+ {
19083
+ "epoch": 3.64,
19084
+ "learning_rate": 0.0001411904761904762,
19085
+ "loss": 0.5238,
19086
+ "step": 3174
19087
+ },
19088
+ {
19089
+ "epoch": 3.64,
19090
+ "learning_rate": 0.00014107142857142858,
19091
+ "loss": 0.5569,
19092
+ "step": 3175
19093
+ },
19094
+ {
19095
+ "epoch": 3.64,
19096
+ "learning_rate": 0.00014095238095238096,
19097
+ "loss": 0.5491,
19098
+ "step": 3176
19099
+ },
19100
+ {
19101
+ "epoch": 3.64,
19102
+ "learning_rate": 0.00014083333333333336,
19103
+ "loss": 0.5248,
19104
+ "step": 3177
19105
+ },
19106
+ {
19107
+ "epoch": 3.64,
19108
+ "learning_rate": 0.00014071428571428573,
19109
+ "loss": 0.5277,
19110
+ "step": 3178
19111
+ },
19112
+ {
19113
+ "epoch": 3.64,
19114
+ "learning_rate": 0.00014059523809523808,
19115
+ "loss": 0.5376,
19116
+ "step": 3179
19117
+ },
19118
+ {
19119
+ "epoch": 3.64,
19120
+ "learning_rate": 0.00014047619047619049,
19121
+ "loss": 0.5606,
19122
+ "step": 3180
19123
+ },
19124
+ {
19125
+ "epoch": 3.64,
19126
+ "learning_rate": 0.00014035714285714286,
19127
+ "loss": 0.5504,
19128
+ "step": 3181
19129
+ },
19130
+ {
19131
+ "epoch": 3.65,
19132
+ "learning_rate": 0.00014023809523809524,
19133
+ "loss": 0.5313,
19134
+ "step": 3182
19135
+ },
19136
+ {
19137
+ "epoch": 3.65,
19138
+ "learning_rate": 0.0001401190476190476,
19139
+ "loss": 0.5728,
19140
+ "step": 3183
19141
+ },
19142
+ {
19143
+ "epoch": 3.65,
19144
+ "learning_rate": 0.00014,
19145
+ "loss": 0.5316,
19146
+ "step": 3184
19147
+ },
19148
+ {
19149
+ "epoch": 3.65,
19150
+ "learning_rate": 0.0001398809523809524,
19151
+ "loss": 0.5956,
19152
+ "step": 3185
19153
+ },
19154
+ {
19155
+ "epoch": 3.65,
19156
+ "learning_rate": 0.00013976190476190477,
19157
+ "loss": 0.516,
19158
+ "step": 3186
19159
+ },
19160
+ {
19161
+ "epoch": 3.65,
19162
+ "learning_rate": 0.00013964285714285714,
19163
+ "loss": 0.5605,
19164
+ "step": 3187
19165
+ },
19166
+ {
19167
+ "epoch": 3.65,
19168
+ "learning_rate": 0.00013952380952380952,
19169
+ "loss": 0.5474,
19170
+ "step": 3188
19171
+ },
19172
+ {
19173
+ "epoch": 3.65,
19174
+ "learning_rate": 0.0001394047619047619,
19175
+ "loss": 0.5748,
19176
+ "step": 3189
19177
+ },
19178
+ {
19179
+ "epoch": 3.66,
19180
+ "learning_rate": 0.0001392857142857143,
19181
+ "loss": 0.5514,
19182
+ "step": 3190
19183
+ },
19184
+ {
19185
+ "epoch": 3.66,
19186
+ "learning_rate": 0.00013916666666666667,
19187
+ "loss": 0.5641,
19188
+ "step": 3191
19189
+ },
19190
+ {
19191
+ "epoch": 3.66,
19192
+ "learning_rate": 0.00013904761904761905,
19193
+ "loss": 0.6025,
19194
+ "step": 3192
19195
+ },
19196
+ {
19197
+ "epoch": 3.66,
19198
+ "learning_rate": 0.00013892857142857142,
19199
+ "loss": 0.545,
19200
+ "step": 3193
19201
+ },
19202
+ {
19203
+ "epoch": 3.66,
19204
+ "learning_rate": 0.00013880952380952382,
19205
+ "loss": 0.5669,
19206
+ "step": 3194
19207
+ },
19208
+ {
19209
+ "epoch": 3.66,
19210
+ "learning_rate": 0.0001386904761904762,
19211
+ "loss": 0.5016,
19212
+ "step": 3195
19213
+ },
19214
+ {
19215
+ "epoch": 3.66,
19216
+ "learning_rate": 0.00013857142857142857,
19217
+ "loss": 0.5498,
19218
+ "step": 3196
19219
+ },
19220
+ {
19221
+ "epoch": 3.66,
19222
+ "learning_rate": 0.00013845238095238095,
19223
+ "loss": 0.5838,
19224
+ "step": 3197
19225
+ },
19226
+ {
19227
+ "epoch": 3.66,
19228
+ "learning_rate": 0.00013833333333333333,
19229
+ "loss": 0.5395,
19230
+ "step": 3198
19231
+ },
19232
+ {
19233
+ "epoch": 3.67,
19234
+ "learning_rate": 0.00013821428571428573,
19235
+ "loss": 0.5385,
19236
+ "step": 3199
19237
+ },
19238
+ {
19239
+ "epoch": 3.67,
19240
+ "learning_rate": 0.0001380952380952381,
19241
+ "loss": 0.5247,
19242
+ "step": 3200
19243
+ },
19244
+ {
19245
+ "epoch": 3.67,
19246
+ "learning_rate": 0.00013797619047619048,
19247
+ "loss": 0.5672,
19248
+ "step": 3201
19249
+ },
19250
+ {
19251
+ "epoch": 3.67,
19252
+ "learning_rate": 0.00013785714285714285,
19253
+ "loss": 0.5626,
19254
+ "step": 3202
19255
+ },
19256
+ {
19257
+ "epoch": 3.67,
19258
+ "learning_rate": 0.00013773809523809526,
19259
+ "loss": 0.5316,
19260
+ "step": 3203
19261
+ },
19262
+ {
19263
+ "epoch": 3.67,
19264
+ "learning_rate": 0.00013761904761904763,
19265
+ "loss": 0.5689,
19266
+ "step": 3204
19267
+ },
19268
+ {
19269
+ "epoch": 3.67,
19270
+ "learning_rate": 0.0001375,
19271
+ "loss": 0.5375,
19272
+ "step": 3205
19273
+ },
19274
+ {
19275
+ "epoch": 3.67,
19276
+ "learning_rate": 0.00013738095238095238,
19277
+ "loss": 0.5374,
19278
+ "step": 3206
19279
+ },
19280
+ {
19281
+ "epoch": 3.67,
19282
+ "learning_rate": 0.00013726190476190476,
19283
+ "loss": 0.6272,
19284
+ "step": 3207
19285
+ },
19286
+ {
19287
+ "epoch": 3.68,
19288
+ "learning_rate": 0.00013714285714285716,
19289
+ "loss": 0.5552,
19290
+ "step": 3208
19291
+ },
19292
+ {
19293
+ "epoch": 3.68,
19294
+ "learning_rate": 0.00013702380952380954,
19295
+ "loss": 0.5856,
19296
+ "step": 3209
19297
+ },
19298
+ {
19299
+ "epoch": 3.68,
19300
+ "learning_rate": 0.0001369047619047619,
19301
+ "loss": 0.5454,
19302
+ "step": 3210
19303
+ },
19304
+ {
19305
+ "epoch": 3.68,
19306
+ "learning_rate": 0.0001367857142857143,
19307
+ "loss": 0.5537,
19308
+ "step": 3211
19309
+ },
19310
+ {
19311
+ "epoch": 3.68,
19312
+ "learning_rate": 0.00013666666666666666,
19313
+ "loss": 0.5393,
19314
+ "step": 3212
19315
+ },
19316
+ {
19317
+ "epoch": 3.68,
19318
+ "learning_rate": 0.00013654761904761907,
19319
+ "loss": 0.5305,
19320
+ "step": 3213
19321
+ },
19322
+ {
19323
+ "epoch": 3.68,
19324
+ "learning_rate": 0.00013642857142857144,
19325
+ "loss": 0.5719,
19326
+ "step": 3214
19327
+ },
19328
+ {
19329
+ "epoch": 3.68,
19330
+ "learning_rate": 0.00013630952380952382,
19331
+ "loss": 0.5724,
19332
+ "step": 3215
19333
+ },
19334
+ {
19335
+ "epoch": 3.69,
19336
+ "learning_rate": 0.0001361904761904762,
19337
+ "loss": 0.5327,
19338
+ "step": 3216
19339
+ },
19340
+ {
19341
+ "epoch": 3.69,
19342
+ "learning_rate": 0.0001360714285714286,
19343
+ "loss": 0.5349,
19344
+ "step": 3217
19345
+ },
19346
+ {
19347
+ "epoch": 3.69,
19348
+ "learning_rate": 0.00013595238095238097,
19349
+ "loss": 0.5766,
19350
+ "step": 3218
19351
+ },
19352
+ {
19353
+ "epoch": 3.69,
19354
+ "learning_rate": 0.00013583333333333335,
19355
+ "loss": 0.5849,
19356
+ "step": 3219
19357
+ },
19358
+ {
19359
+ "epoch": 3.69,
19360
+ "learning_rate": 0.00013571428571428572,
19361
+ "loss": 0.5525,
19362
+ "step": 3220
19363
+ },
19364
+ {
19365
+ "epoch": 3.69,
19366
+ "learning_rate": 0.0001355952380952381,
19367
+ "loss": 0.6146,
19368
+ "step": 3221
19369
+ },
19370
+ {
19371
+ "epoch": 3.69,
19372
+ "learning_rate": 0.0001354761904761905,
19373
+ "loss": 0.5167,
19374
+ "step": 3222
19375
+ },
19376
+ {
19377
+ "epoch": 3.69,
19378
+ "learning_rate": 0.00013535714285714288,
19379
+ "loss": 0.5481,
19380
+ "step": 3223
19381
+ },
19382
+ {
19383
+ "epoch": 3.69,
19384
+ "learning_rate": 0.00013523809523809525,
19385
+ "loss": 0.5893,
19386
+ "step": 3224
19387
+ },
19388
+ {
19389
+ "epoch": 3.7,
19390
+ "learning_rate": 0.00013511904761904763,
19391
+ "loss": 0.5512,
19392
+ "step": 3225
19393
+ },
19394
+ {
19395
+ "epoch": 3.7,
19396
+ "learning_rate": 0.00013500000000000003,
19397
+ "loss": 0.5674,
19398
+ "step": 3226
19399
+ },
19400
+ {
19401
+ "epoch": 3.7,
19402
+ "learning_rate": 0.0001348809523809524,
19403
+ "loss": 0.5976,
19404
+ "step": 3227
19405
+ },
19406
+ {
19407
+ "epoch": 3.7,
19408
+ "learning_rate": 0.00013476190476190478,
19409
+ "loss": 0.5986,
19410
+ "step": 3228
19411
+ },
19412
+ {
19413
+ "epoch": 3.7,
19414
+ "learning_rate": 0.00013464285714285716,
19415
+ "loss": 0.5706,
19416
+ "step": 3229
19417
+ },
19418
+ {
19419
+ "epoch": 3.7,
19420
+ "learning_rate": 0.00013452380952380953,
19421
+ "loss": 0.5692,
19422
+ "step": 3230
19423
+ },
19424
+ {
19425
+ "epoch": 3.7,
19426
+ "learning_rate": 0.00013440476190476193,
19427
+ "loss": 0.5434,
19428
+ "step": 3231
19429
+ },
19430
+ {
19431
+ "epoch": 3.7,
19432
+ "learning_rate": 0.00013428571428571428,
19433
+ "loss": 0.5059,
19434
+ "step": 3232
19435
+ },
19436
+ {
19437
+ "epoch": 3.7,
19438
+ "learning_rate": 0.00013416666666666666,
19439
+ "loss": 0.5663,
19440
+ "step": 3233
19441
+ },
19442
+ {
19443
+ "epoch": 3.71,
19444
+ "learning_rate": 0.00013404761904761903,
19445
+ "loss": 0.5383,
19446
+ "step": 3234
19447
+ },
19448
+ {
19449
+ "epoch": 3.71,
19450
+ "learning_rate": 0.00013392857142857144,
19451
+ "loss": 0.5731,
19452
+ "step": 3235
19453
+ },
19454
+ {
19455
+ "epoch": 3.71,
19456
+ "learning_rate": 0.0001338095238095238,
19457
+ "loss": 0.528,
19458
+ "step": 3236
19459
+ },
19460
+ {
19461
+ "epoch": 3.71,
19462
+ "learning_rate": 0.00013369047619047619,
19463
+ "loss": 0.5678,
19464
+ "step": 3237
19465
+ },
19466
+ {
19467
+ "epoch": 3.71,
19468
+ "learning_rate": 0.00013357142857142856,
19469
+ "loss": 0.6083,
19470
+ "step": 3238
19471
+ },
19472
+ {
19473
+ "epoch": 3.71,
19474
+ "learning_rate": 0.00013345238095238096,
19475
+ "loss": 0.5214,
19476
+ "step": 3239
19477
+ },
19478
+ {
19479
+ "epoch": 3.71,
19480
+ "learning_rate": 0.00013333333333333334,
19481
+ "loss": 0.6059,
19482
+ "step": 3240
19483
+ },
19484
+ {
19485
+ "epoch": 3.71,
19486
+ "learning_rate": 0.00013321428571428572,
19487
+ "loss": 0.5335,
19488
+ "step": 3241
19489
+ },
19490
+ {
19491
+ "epoch": 3.71,
19492
+ "learning_rate": 0.0001330952380952381,
19493
+ "loss": 0.5164,
19494
+ "step": 3242
19495
+ },
19496
+ {
19497
+ "epoch": 3.72,
19498
+ "learning_rate": 0.00013297619047619047,
19499
+ "loss": 0.6037,
19500
+ "step": 3243
19501
+ },
19502
+ {
19503
+ "epoch": 3.72,
19504
+ "learning_rate": 0.00013285714285714287,
19505
+ "loss": 0.5439,
19506
+ "step": 3244
19507
+ },
19508
+ {
19509
+ "epoch": 3.72,
19510
+ "learning_rate": 0.00013273809523809524,
19511
+ "loss": 0.5295,
19512
+ "step": 3245
19513
+ },
19514
+ {
19515
+ "epoch": 3.72,
19516
+ "learning_rate": 0.00013261904761904762,
19517
+ "loss": 0.5633,
19518
+ "step": 3246
19519
+ },
19520
+ {
19521
+ "epoch": 3.72,
19522
+ "learning_rate": 0.0001325,
19523
+ "loss": 0.5891,
19524
+ "step": 3247
19525
+ },
19526
+ {
19527
+ "epoch": 3.72,
19528
+ "learning_rate": 0.00013238095238095237,
19529
+ "loss": 0.5443,
19530
+ "step": 3248
19531
+ },
19532
+ {
19533
+ "epoch": 3.72,
19534
+ "learning_rate": 0.00013226190476190477,
19535
+ "loss": 0.5387,
19536
+ "step": 3249
19537
+ },
19538
+ {
19539
+ "epoch": 3.72,
19540
+ "learning_rate": 0.00013214285714285715,
19541
+ "loss": 0.5465,
19542
+ "step": 3250
19543
+ },
19544
+ {
19545
+ "epoch": 3.73,
19546
+ "learning_rate": 0.00013202380952380952,
19547
+ "loss": 0.5962,
19548
+ "step": 3251
19549
+ },
19550
+ {
19551
+ "epoch": 3.73,
19552
+ "learning_rate": 0.0001319047619047619,
19553
+ "loss": 0.5493,
19554
+ "step": 3252
19555
+ },
19556
+ {
19557
+ "epoch": 3.73,
19558
+ "learning_rate": 0.0001317857142857143,
19559
+ "loss": 0.5569,
19560
+ "step": 3253
19561
+ },
19562
+ {
19563
+ "epoch": 3.73,
19564
+ "learning_rate": 0.00013166666666666668,
19565
+ "loss": 0.5685,
19566
+ "step": 3254
19567
+ },
19568
+ {
19569
+ "epoch": 3.73,
19570
+ "learning_rate": 0.00013154761904761905,
19571
+ "loss": 0.5767,
19572
+ "step": 3255
19573
+ },
19574
+ {
19575
+ "epoch": 3.73,
19576
+ "learning_rate": 0.00013142857142857143,
19577
+ "loss": 0.5334,
19578
+ "step": 3256
19579
+ },
19580
+ {
19581
+ "epoch": 3.73,
19582
+ "learning_rate": 0.0001313095238095238,
19583
+ "loss": 0.5695,
19584
+ "step": 3257
19585
+ },
19586
+ {
19587
+ "epoch": 3.73,
19588
+ "learning_rate": 0.0001311904761904762,
19589
+ "loss": 0.562,
19590
+ "step": 3258
19591
+ },
19592
+ {
19593
+ "epoch": 3.73,
19594
+ "learning_rate": 0.00013107142857142858,
19595
+ "loss": 0.5139,
19596
+ "step": 3259
19597
+ },
19598
+ {
19599
+ "epoch": 3.74,
19600
+ "learning_rate": 0.00013095238095238096,
19601
+ "loss": 0.5649,
19602
+ "step": 3260
19603
+ },
19604
+ {
19605
+ "epoch": 3.74,
19606
+ "learning_rate": 0.00013083333333333333,
19607
+ "loss": 0.526,
19608
+ "step": 3261
19609
+ },
19610
+ {
19611
+ "epoch": 3.74,
19612
+ "learning_rate": 0.00013071428571428574,
19613
+ "loss": 0.5455,
19614
+ "step": 3262
19615
+ },
19616
+ {
19617
+ "epoch": 3.74,
19618
+ "learning_rate": 0.0001305952380952381,
19619
+ "loss": 0.5797,
19620
+ "step": 3263
19621
+ },
19622
+ {
19623
+ "epoch": 3.74,
19624
+ "learning_rate": 0.0001304761904761905,
19625
+ "loss": 0.5853,
19626
+ "step": 3264
19627
+ },
19628
+ {
19629
+ "epoch": 3.74,
19630
+ "learning_rate": 0.00013035714285714286,
19631
+ "loss": 0.5337,
19632
+ "step": 3265
19633
+ },
19634
+ {
19635
+ "epoch": 3.74,
19636
+ "learning_rate": 0.00013023809523809524,
19637
+ "loss": 0.5265,
19638
+ "step": 3266
19639
+ },
19640
+ {
19641
+ "epoch": 3.74,
19642
+ "learning_rate": 0.00013011904761904764,
19643
+ "loss": 0.5799,
19644
+ "step": 3267
19645
+ },
19646
+ {
19647
+ "epoch": 3.74,
19648
+ "learning_rate": 0.00013000000000000002,
19649
+ "loss": 0.5246,
19650
+ "step": 3268
19651
+ },
19652
+ {
19653
+ "epoch": 3.75,
19654
+ "learning_rate": 0.0001298809523809524,
19655
+ "loss": 0.5789,
19656
+ "step": 3269
19657
+ },
19658
+ {
19659
+ "epoch": 3.75,
19660
+ "learning_rate": 0.00012976190476190477,
19661
+ "loss": 0.5561,
19662
+ "step": 3270
19663
  }
19664
  ],
19665
  "logging_steps": 1,
19666
  "max_steps": 4360,
19667
  "num_train_epochs": 5,
19668
  "save_steps": 218,
19669
+ "total_flos": 6.487765267794887e+19,
19670
  "trial_name": null,
19671
  "trial_params": null
19672
  }