DeepDunk / trainer_state.json
Frikallo's picture
End of training
61e039b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 3031,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00013697367205542726,
"loss": 4.515,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 0.0001367473441108545,
"loss": 4.4104,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 0.00013652101616628176,
"loss": 4.1934,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 0.000136294688221709,
"loss": 4.2499,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.00013606836027713626,
"loss": 4.1236,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 0.00013584203233256352,
"loss": 4.1098,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 0.00013561570438799076,
"loss": 4.0061,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 0.000135389376443418,
"loss": 3.8647,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.00013516304849884526,
"loss": 3.9263,
"step": 45
},
{
"epoch": 0.02,
"learning_rate": 0.00013493672055427252,
"loss": 3.8562,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 0.00013471039260969976,
"loss": 3.9549,
"step": 55
},
{
"epoch": 0.02,
"learning_rate": 0.00013448406466512702,
"loss": 3.947,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 0.00013425773672055428,
"loss": 3.9132,
"step": 65
},
{
"epoch": 0.02,
"learning_rate": 0.00013403140877598152,
"loss": 3.7056,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 0.00013380508083140878,
"loss": 3.9002,
"step": 75
},
{
"epoch": 0.03,
"learning_rate": 0.00013357875288683602,
"loss": 3.9315,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 0.00013335242494226328,
"loss": 3.8103,
"step": 85
},
{
"epoch": 0.03,
"learning_rate": 0.00013312609699769055,
"loss": 3.7449,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 0.00013289976905311778,
"loss": 3.7089,
"step": 95
},
{
"epoch": 0.03,
"learning_rate": 0.00013267344110854504,
"loss": 3.8787,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 0.00013244711316397228,
"loss": 3.797,
"step": 105
},
{
"epoch": 0.04,
"learning_rate": 0.00013222078521939954,
"loss": 3.5114,
"step": 110
},
{
"epoch": 0.04,
"learning_rate": 0.00013199445727482678,
"loss": 3.8432,
"step": 115
},
{
"epoch": 0.04,
"learning_rate": 0.00013176812933025404,
"loss": 3.4919,
"step": 120
},
{
"epoch": 0.04,
"learning_rate": 0.00013154180138568128,
"loss": 3.683,
"step": 125
},
{
"epoch": 0.04,
"learning_rate": 0.00013131547344110854,
"loss": 3.9541,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 0.0001310891454965358,
"loss": 3.6708,
"step": 135
},
{
"epoch": 0.05,
"learning_rate": 0.00013086281755196304,
"loss": 3.7126,
"step": 140
},
{
"epoch": 0.05,
"learning_rate": 0.0001306364896073903,
"loss": 3.6123,
"step": 145
},
{
"epoch": 0.05,
"learning_rate": 0.00013041016166281757,
"loss": 3.7501,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 0.0001301838337182448,
"loss": 3.6422,
"step": 155
},
{
"epoch": 0.05,
"learning_rate": 0.00012995750577367206,
"loss": 3.5964,
"step": 160
},
{
"epoch": 0.05,
"learning_rate": 0.0001297311778290993,
"loss": 3.7605,
"step": 165
},
{
"epoch": 0.06,
"learning_rate": 0.00012950484988452656,
"loss": 3.5969,
"step": 170
},
{
"epoch": 0.06,
"learning_rate": 0.00012927852193995383,
"loss": 3.6422,
"step": 175
},
{
"epoch": 0.06,
"learning_rate": 0.00012905219399538106,
"loss": 3.3317,
"step": 180
},
{
"epoch": 0.06,
"learning_rate": 0.00012882586605080833,
"loss": 3.3237,
"step": 185
},
{
"epoch": 0.06,
"learning_rate": 0.00012859953810623556,
"loss": 3.511,
"step": 190
},
{
"epoch": 0.06,
"learning_rate": 0.00012837321016166282,
"loss": 3.634,
"step": 195
},
{
"epoch": 0.07,
"learning_rate": 0.00012814688221709006,
"loss": 3.2707,
"step": 200
},
{
"epoch": 0.07,
"learning_rate": 0.00012792055427251732,
"loss": 3.7581,
"step": 205
},
{
"epoch": 0.07,
"learning_rate": 0.00012769422632794456,
"loss": 3.7056,
"step": 210
},
{
"epoch": 0.07,
"learning_rate": 0.00012746789838337182,
"loss": 3.784,
"step": 215
},
{
"epoch": 0.07,
"learning_rate": 0.00012724157043879909,
"loss": 2.9932,
"step": 220
},
{
"epoch": 0.07,
"learning_rate": 0.00012701524249422632,
"loss": 3.6009,
"step": 225
},
{
"epoch": 0.08,
"learning_rate": 0.00012678891454965358,
"loss": 3.7976,
"step": 230
},
{
"epoch": 0.08,
"learning_rate": 0.00012656258660508085,
"loss": 3.7625,
"step": 235
},
{
"epoch": 0.08,
"learning_rate": 0.00012633625866050808,
"loss": 3.264,
"step": 240
},
{
"epoch": 0.08,
"learning_rate": 0.00012610993071593535,
"loss": 3.7345,
"step": 245
},
{
"epoch": 0.08,
"learning_rate": 0.00012588360277136258,
"loss": 3.5308,
"step": 250
},
{
"epoch": 0.08,
"learning_rate": 0.00012565727482678985,
"loss": 3.7175,
"step": 255
},
{
"epoch": 0.09,
"learning_rate": 0.0001254309468822171,
"loss": 3.5114,
"step": 260
},
{
"epoch": 0.09,
"learning_rate": 0.00012520461893764434,
"loss": 3.7169,
"step": 265
},
{
"epoch": 0.09,
"learning_rate": 0.00012497829099307158,
"loss": 3.4318,
"step": 270
},
{
"epoch": 0.09,
"learning_rate": 0.00012475196304849884,
"loss": 3.3658,
"step": 275
},
{
"epoch": 0.09,
"learning_rate": 0.0001245256351039261,
"loss": 3.7082,
"step": 280
},
{
"epoch": 0.09,
"learning_rate": 0.00012429930715935334,
"loss": 3.5838,
"step": 285
},
{
"epoch": 0.1,
"learning_rate": 0.0001240729792147806,
"loss": 2.8508,
"step": 290
},
{
"epoch": 0.1,
"learning_rate": 0.00012384665127020784,
"loss": 3.2041,
"step": 295
},
{
"epoch": 0.1,
"learning_rate": 0.0001236203233256351,
"loss": 3.1151,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 0.00012339399538106237,
"loss": 3.5128,
"step": 305
},
{
"epoch": 0.1,
"learning_rate": 0.0001231676674364896,
"loss": 3.6807,
"step": 310
},
{
"epoch": 0.1,
"learning_rate": 0.00012294133949191687,
"loss": 3.6973,
"step": 315
},
{
"epoch": 0.11,
"learning_rate": 0.00012271501154734413,
"loss": 3.7338,
"step": 320
},
{
"epoch": 0.11,
"learning_rate": 0.00012248868360277136,
"loss": 3.3445,
"step": 325
},
{
"epoch": 0.11,
"learning_rate": 0.00012226235565819863,
"loss": 3.6292,
"step": 330
},
{
"epoch": 0.11,
"learning_rate": 0.00012203602771362588,
"loss": 3.6886,
"step": 335
},
{
"epoch": 0.11,
"learning_rate": 0.00012180969976905311,
"loss": 2.6932,
"step": 340
},
{
"epoch": 0.11,
"learning_rate": 0.00012158337182448036,
"loss": 3.4789,
"step": 345
},
{
"epoch": 0.12,
"learning_rate": 0.00012135704387990761,
"loss": 3.4813,
"step": 350
},
{
"epoch": 0.12,
"learning_rate": 0.00012113071593533487,
"loss": 3.6185,
"step": 355
},
{
"epoch": 0.12,
"learning_rate": 0.00012090438799076212,
"loss": 3.5236,
"step": 360
},
{
"epoch": 0.12,
"learning_rate": 0.00012067806004618937,
"loss": 3.6228,
"step": 365
},
{
"epoch": 0.12,
"learning_rate": 0.00012045173210161662,
"loss": 3.414,
"step": 370
},
{
"epoch": 0.12,
"learning_rate": 0.00012022540415704389,
"loss": 3.6309,
"step": 375
},
{
"epoch": 0.13,
"learning_rate": 0.00011999907621247114,
"loss": 3.5011,
"step": 380
},
{
"epoch": 0.13,
"learning_rate": 0.00011977274826789839,
"loss": 3.5267,
"step": 385
},
{
"epoch": 0.13,
"learning_rate": 0.00011954642032332563,
"loss": 3.5156,
"step": 390
},
{
"epoch": 0.13,
"learning_rate": 0.0001193200923787529,
"loss": 3.3563,
"step": 395
},
{
"epoch": 0.13,
"learning_rate": 0.00011909376443418015,
"loss": 3.6401,
"step": 400
},
{
"epoch": 0.13,
"learning_rate": 0.0001188674364896074,
"loss": 3.4525,
"step": 405
},
{
"epoch": 0.14,
"learning_rate": 0.00011864110854503465,
"loss": 3.5465,
"step": 410
},
{
"epoch": 0.14,
"learning_rate": 0.00011841478060046188,
"loss": 2.87,
"step": 415
},
{
"epoch": 0.14,
"learning_rate": 0.00011818845265588914,
"loss": 2.6848,
"step": 420
},
{
"epoch": 0.14,
"learning_rate": 0.0001179621247113164,
"loss": 3.5327,
"step": 425
},
{
"epoch": 0.14,
"learning_rate": 0.00011773579676674364,
"loss": 3.1602,
"step": 430
},
{
"epoch": 0.14,
"learning_rate": 0.0001175094688221709,
"loss": 3.1387,
"step": 435
},
{
"epoch": 0.15,
"learning_rate": 0.00011728314087759816,
"loss": 3.1799,
"step": 440
},
{
"epoch": 0.15,
"learning_rate": 0.0001170568129330254,
"loss": 3.1679,
"step": 445
},
{
"epoch": 0.15,
"learning_rate": 0.00011683048498845266,
"loss": 3.0457,
"step": 450
},
{
"epoch": 0.15,
"learning_rate": 0.0001166041570438799,
"loss": 3.5954,
"step": 455
},
{
"epoch": 0.15,
"learning_rate": 0.00011637782909930717,
"loss": 3.008,
"step": 460
},
{
"epoch": 0.15,
"learning_rate": 0.00011615150115473442,
"loss": 3.6305,
"step": 465
},
{
"epoch": 0.16,
"learning_rate": 0.00011592517321016167,
"loss": 3.056,
"step": 470
},
{
"epoch": 0.16,
"learning_rate": 0.00011569884526558892,
"loss": 3.0358,
"step": 475
},
{
"epoch": 0.16,
"learning_rate": 0.00011547251732101618,
"loss": 3.3535,
"step": 480
},
{
"epoch": 0.16,
"learning_rate": 0.00011524618937644343,
"loss": 3.5352,
"step": 485
},
{
"epoch": 0.16,
"learning_rate": 0.00011501986143187066,
"loss": 3.5372,
"step": 490
},
{
"epoch": 0.16,
"learning_rate": 0.00011479353348729791,
"loss": 2.9131,
"step": 495
},
{
"epoch": 0.16,
"learning_rate": 0.00011456720554272516,
"loss": 3.4774,
"step": 500
},
{
"epoch": 0.17,
"learning_rate": 0.00011434087759815243,
"loss": 3.5785,
"step": 505
},
{
"epoch": 0.17,
"learning_rate": 0.00011411454965357968,
"loss": 3.0545,
"step": 510
},
{
"epoch": 0.17,
"learning_rate": 0.00011388822170900693,
"loss": 3.6184,
"step": 515
},
{
"epoch": 0.17,
"learning_rate": 0.00011366189376443417,
"loss": 3.536,
"step": 520
},
{
"epoch": 0.17,
"learning_rate": 0.00011343556581986144,
"loss": 3.4078,
"step": 525
},
{
"epoch": 0.17,
"learning_rate": 0.00011320923787528869,
"loss": 3.0253,
"step": 530
},
{
"epoch": 0.18,
"learning_rate": 0.00011298290993071594,
"loss": 3.5612,
"step": 535
},
{
"epoch": 0.18,
"learning_rate": 0.00011275658198614319,
"loss": 3.609,
"step": 540
},
{
"epoch": 0.18,
"learning_rate": 0.00011253025404157045,
"loss": 3.48,
"step": 545
},
{
"epoch": 0.18,
"learning_rate": 0.0001123039260969977,
"loss": 3.0283,
"step": 550
},
{
"epoch": 0.18,
"learning_rate": 0.00011207759815242495,
"loss": 2.9956,
"step": 555
},
{
"epoch": 0.18,
"learning_rate": 0.0001118512702078522,
"loss": 3.4985,
"step": 560
},
{
"epoch": 0.19,
"learning_rate": 0.00011162494226327943,
"loss": 3.6792,
"step": 565
},
{
"epoch": 0.19,
"learning_rate": 0.0001113986143187067,
"loss": 3.0146,
"step": 570
},
{
"epoch": 0.19,
"learning_rate": 0.00011117228637413395,
"loss": 3.3542,
"step": 575
},
{
"epoch": 0.19,
"learning_rate": 0.0001109459584295612,
"loss": 3.4283,
"step": 580
},
{
"epoch": 0.19,
"learning_rate": 0.00011071963048498844,
"loss": 3.4591,
"step": 585
},
{
"epoch": 0.19,
"learning_rate": 0.00011049330254041571,
"loss": 2.725,
"step": 590
},
{
"epoch": 0.2,
"learning_rate": 0.00011026697459584296,
"loss": 2.8868,
"step": 595
},
{
"epoch": 0.2,
"learning_rate": 0.0001100406466512702,
"loss": 2.981,
"step": 600
},
{
"epoch": 0.2,
"learning_rate": 0.00010981431870669746,
"loss": 3.4859,
"step": 605
},
{
"epoch": 0.2,
"learning_rate": 0.00010958799076212472,
"loss": 3.2683,
"step": 610
},
{
"epoch": 0.2,
"learning_rate": 0.00010936166281755197,
"loss": 3.5555,
"step": 615
},
{
"epoch": 0.2,
"learning_rate": 0.00010913533487297922,
"loss": 2.9897,
"step": 620
},
{
"epoch": 0.21,
"learning_rate": 0.00010890900692840647,
"loss": 3.227,
"step": 625
},
{
"epoch": 0.21,
"learning_rate": 0.00010868267898383373,
"loss": 3.2891,
"step": 630
},
{
"epoch": 0.21,
"learning_rate": 0.00010845635103926098,
"loss": 3.5478,
"step": 635
},
{
"epoch": 0.21,
"learning_rate": 0.00010823002309468822,
"loss": 2.9625,
"step": 640
},
{
"epoch": 0.21,
"learning_rate": 0.00010800369515011547,
"loss": 3.2351,
"step": 645
},
{
"epoch": 0.21,
"learning_rate": 0.00010777736720554271,
"loss": 3.4041,
"step": 650
},
{
"epoch": 0.22,
"learning_rate": 0.00010755103926096998,
"loss": 3.5005,
"step": 655
},
{
"epoch": 0.22,
"learning_rate": 0.00010732471131639723,
"loss": 3.4141,
"step": 660
},
{
"epoch": 0.22,
"learning_rate": 0.00010709838337182448,
"loss": 3.0204,
"step": 665
},
{
"epoch": 0.22,
"learning_rate": 0.00010687205542725173,
"loss": 2.8737,
"step": 670
},
{
"epoch": 0.22,
"learning_rate": 0.00010664572748267899,
"loss": 3.44,
"step": 675
},
{
"epoch": 0.22,
"learning_rate": 0.00010641939953810624,
"loss": 3.4723,
"step": 680
},
{
"epoch": 0.23,
"learning_rate": 0.00010619307159353349,
"loss": 2.8187,
"step": 685
},
{
"epoch": 0.23,
"learning_rate": 0.00010596674364896074,
"loss": 3.5171,
"step": 690
},
{
"epoch": 0.23,
"learning_rate": 0.000105740415704388,
"loss": 3.346,
"step": 695
},
{
"epoch": 0.23,
"learning_rate": 0.00010551408775981525,
"loss": 3.3495,
"step": 700
},
{
"epoch": 0.23,
"learning_rate": 0.0001052877598152425,
"loss": 3.4582,
"step": 705
},
{
"epoch": 0.23,
"learning_rate": 0.00010506143187066975,
"loss": 3.4312,
"step": 710
},
{
"epoch": 0.24,
"learning_rate": 0.00010483510392609698,
"loss": 2.9329,
"step": 715
},
{
"epoch": 0.24,
"learning_rate": 0.00010460877598152425,
"loss": 3.4131,
"step": 720
},
{
"epoch": 0.24,
"learning_rate": 0.0001043824480369515,
"loss": 3.432,
"step": 725
},
{
"epoch": 0.24,
"learning_rate": 0.00010415612009237875,
"loss": 3.4073,
"step": 730
},
{
"epoch": 0.24,
"learning_rate": 0.000103929792147806,
"loss": 3.0032,
"step": 735
},
{
"epoch": 0.24,
"learning_rate": 0.00010370346420323326,
"loss": 3.3989,
"step": 740
},
{
"epoch": 0.25,
"learning_rate": 0.00010347713625866051,
"loss": 2.5839,
"step": 745
},
{
"epoch": 0.25,
"learning_rate": 0.00010325080831408776,
"loss": 3.5058,
"step": 750
},
{
"epoch": 0.25,
"learning_rate": 0.00010302448036951501,
"loss": 2.8574,
"step": 755
},
{
"epoch": 0.25,
"learning_rate": 0.00010279815242494227,
"loss": 3.2324,
"step": 760
},
{
"epoch": 0.25,
"learning_rate": 0.00010257182448036952,
"loss": 3.4433,
"step": 765
},
{
"epoch": 0.25,
"learning_rate": 0.00010234549653579677,
"loss": 3.2637,
"step": 770
},
{
"epoch": 0.26,
"learning_rate": 0.00010211916859122402,
"loss": 3.4732,
"step": 775
},
{
"epoch": 0.26,
"learning_rate": 0.00010189284064665128,
"loss": 3.215,
"step": 780
},
{
"epoch": 0.26,
"learning_rate": 0.00010166651270207853,
"loss": 3.0964,
"step": 785
},
{
"epoch": 0.26,
"learning_rate": 0.00010144018475750577,
"loss": 3.2448,
"step": 790
},
{
"epoch": 0.26,
"learning_rate": 0.00010121385681293302,
"loss": 3.3717,
"step": 795
},
{
"epoch": 0.26,
"learning_rate": 0.00010098752886836027,
"loss": 3.4094,
"step": 800
},
{
"epoch": 0.27,
"learning_rate": 0.00010076120092378753,
"loss": 2.8995,
"step": 805
},
{
"epoch": 0.27,
"learning_rate": 0.00010053487297921478,
"loss": 3.4194,
"step": 810
},
{
"epoch": 0.27,
"learning_rate": 0.00010030854503464203,
"loss": 2.8647,
"step": 815
},
{
"epoch": 0.27,
"learning_rate": 0.00010008221709006928,
"loss": 2.649,
"step": 820
},
{
"epoch": 0.27,
"learning_rate": 9.985588914549654e-05,
"loss": 2.593,
"step": 825
},
{
"epoch": 0.27,
"learning_rate": 9.962956120092379e-05,
"loss": 2.7692,
"step": 830
},
{
"epoch": 0.28,
"learning_rate": 9.940323325635104e-05,
"loss": 2.923,
"step": 835
},
{
"epoch": 0.28,
"learning_rate": 9.917690531177829e-05,
"loss": 2.7793,
"step": 840
},
{
"epoch": 0.28,
"learning_rate": 9.895057736720555e-05,
"loss": 2.3173,
"step": 845
},
{
"epoch": 0.28,
"learning_rate": 9.87242494226328e-05,
"loss": 3.3614,
"step": 850
},
{
"epoch": 0.28,
"learning_rate": 9.849792147806005e-05,
"loss": 2.8285,
"step": 855
},
{
"epoch": 0.28,
"learning_rate": 9.82715935334873e-05,
"loss": 3.1578,
"step": 860
},
{
"epoch": 0.29,
"learning_rate": 9.804526558891455e-05,
"loss": 3.2701,
"step": 865
},
{
"epoch": 0.29,
"learning_rate": 9.78189376443418e-05,
"loss": 3.1701,
"step": 870
},
{
"epoch": 0.29,
"learning_rate": 9.759260969976905e-05,
"loss": 3.3629,
"step": 875
},
{
"epoch": 0.29,
"learning_rate": 9.73662817551963e-05,
"loss": 3.2969,
"step": 880
},
{
"epoch": 0.29,
"learning_rate": 9.713995381062355e-05,
"loss": 2.4782,
"step": 885
},
{
"epoch": 0.29,
"learning_rate": 9.691362586605081e-05,
"loss": 3.3053,
"step": 890
},
{
"epoch": 0.3,
"learning_rate": 9.668729792147806e-05,
"loss": 3.3026,
"step": 895
},
{
"epoch": 0.3,
"learning_rate": 9.646096997690531e-05,
"loss": 3.353,
"step": 900
},
{
"epoch": 0.3,
"learning_rate": 9.623464203233256e-05,
"loss": 2.7781,
"step": 905
},
{
"epoch": 0.3,
"learning_rate": 9.600831408775982e-05,
"loss": 2.8703,
"step": 910
},
{
"epoch": 0.3,
"learning_rate": 9.578198614318707e-05,
"loss": 3.1661,
"step": 915
},
{
"epoch": 0.3,
"learning_rate": 9.555565819861432e-05,
"loss": 2.7715,
"step": 920
},
{
"epoch": 0.31,
"learning_rate": 9.532933025404157e-05,
"loss": 2.8061,
"step": 925
},
{
"epoch": 0.31,
"learning_rate": 9.510300230946883e-05,
"loss": 3.2015,
"step": 930
},
{
"epoch": 0.31,
"learning_rate": 9.487667436489608e-05,
"loss": 2.4464,
"step": 935
},
{
"epoch": 0.31,
"learning_rate": 9.465034642032332e-05,
"loss": 2.5126,
"step": 940
},
{
"epoch": 0.31,
"learning_rate": 9.442401847575057e-05,
"loss": 3.3578,
"step": 945
},
{
"epoch": 0.31,
"learning_rate": 9.419769053117783e-05,
"loss": 3.2997,
"step": 950
},
{
"epoch": 0.32,
"learning_rate": 9.397136258660508e-05,
"loss": 3.0528,
"step": 955
},
{
"epoch": 0.32,
"learning_rate": 9.374503464203233e-05,
"loss": 3.2615,
"step": 960
},
{
"epoch": 0.32,
"learning_rate": 9.351870669745958e-05,
"loss": 2.2974,
"step": 965
},
{
"epoch": 0.32,
"learning_rate": 9.329237875288683e-05,
"loss": 2.4089,
"step": 970
},
{
"epoch": 0.32,
"learning_rate": 9.306605080831409e-05,
"loss": 3.3305,
"step": 975
},
{
"epoch": 0.32,
"learning_rate": 9.283972286374134e-05,
"loss": 2.9349,
"step": 980
},
{
"epoch": 0.32,
"learning_rate": 9.261339491916859e-05,
"loss": 3.026,
"step": 985
},
{
"epoch": 0.33,
"learning_rate": 9.238706697459584e-05,
"loss": 3.2153,
"step": 990
},
{
"epoch": 0.33,
"learning_rate": 9.21607390300231e-05,
"loss": 2.754,
"step": 995
},
{
"epoch": 0.33,
"learning_rate": 9.193441108545035e-05,
"loss": 3.3203,
"step": 1000
},
{
"epoch": 0.33,
"learning_rate": 9.17080831408776e-05,
"loss": 3.3236,
"step": 1005
},
{
"epoch": 0.33,
"learning_rate": 9.148175519630485e-05,
"loss": 3.2072,
"step": 1010
},
{
"epoch": 0.33,
"learning_rate": 9.12554272517321e-05,
"loss": 3.271,
"step": 1015
},
{
"epoch": 0.34,
"learning_rate": 9.102909930715935e-05,
"loss": 3.3537,
"step": 1020
},
{
"epoch": 0.34,
"learning_rate": 9.08027713625866e-05,
"loss": 3.0024,
"step": 1025
},
{
"epoch": 0.34,
"learning_rate": 9.057644341801385e-05,
"loss": 3.4014,
"step": 1030
},
{
"epoch": 0.34,
"learning_rate": 9.035011547344111e-05,
"loss": 2.6294,
"step": 1035
},
{
"epoch": 0.34,
"learning_rate": 9.012378752886836e-05,
"loss": 2.736,
"step": 1040
},
{
"epoch": 0.34,
"learning_rate": 8.989745958429561e-05,
"loss": 2.1099,
"step": 1045
},
{
"epoch": 0.35,
"learning_rate": 8.967113163972286e-05,
"loss": 3.1103,
"step": 1050
},
{
"epoch": 0.35,
"learning_rate": 8.944480369515012e-05,
"loss": 2.7629,
"step": 1055
},
{
"epoch": 0.35,
"learning_rate": 8.921847575057737e-05,
"loss": 2.9411,
"step": 1060
},
{
"epoch": 0.35,
"learning_rate": 8.899214780600462e-05,
"loss": 2.6607,
"step": 1065
},
{
"epoch": 0.35,
"learning_rate": 8.876581986143187e-05,
"loss": 1.6034,
"step": 1070
},
{
"epoch": 0.35,
"learning_rate": 8.853949191685912e-05,
"loss": 2.7623,
"step": 1075
},
{
"epoch": 0.36,
"learning_rate": 8.831316397228638e-05,
"loss": 3.3491,
"step": 1080
},
{
"epoch": 0.36,
"learning_rate": 8.808683602771363e-05,
"loss": 3.3623,
"step": 1085
},
{
"epoch": 0.36,
"learning_rate": 8.786050808314087e-05,
"loss": 3.3125,
"step": 1090
},
{
"epoch": 0.36,
"learning_rate": 8.763418013856812e-05,
"loss": 3.2459,
"step": 1095
},
{
"epoch": 0.36,
"learning_rate": 8.740785219399538e-05,
"loss": 2.9697,
"step": 1100
},
{
"epoch": 0.36,
"learning_rate": 8.718152424942263e-05,
"loss": 2.063,
"step": 1105
},
{
"epoch": 0.37,
"learning_rate": 8.695519630484988e-05,
"loss": 2.6997,
"step": 1110
},
{
"epoch": 0.37,
"learning_rate": 8.672886836027713e-05,
"loss": 3.2616,
"step": 1115
},
{
"epoch": 0.37,
"learning_rate": 8.65025404157044e-05,
"loss": 3.327,
"step": 1120
},
{
"epoch": 0.37,
"learning_rate": 8.627621247113164e-05,
"loss": 2.6713,
"step": 1125
},
{
"epoch": 0.37,
"learning_rate": 8.604988452655889e-05,
"loss": 2.8985,
"step": 1130
},
{
"epoch": 0.37,
"learning_rate": 8.582355658198614e-05,
"loss": 3.3272,
"step": 1135
},
{
"epoch": 0.38,
"learning_rate": 8.55972286374134e-05,
"loss": 3.2654,
"step": 1140
},
{
"epoch": 0.38,
"learning_rate": 8.537090069284065e-05,
"loss": 3.2365,
"step": 1145
},
{
"epoch": 0.38,
"learning_rate": 8.51445727482679e-05,
"loss": 2.3625,
"step": 1150
},
{
"epoch": 0.38,
"learning_rate": 8.491824480369515e-05,
"loss": 3.3359,
"step": 1155
},
{
"epoch": 0.38,
"learning_rate": 8.46919168591224e-05,
"loss": 3.2978,
"step": 1160
},
{
"epoch": 0.38,
"learning_rate": 8.446558891454965e-05,
"loss": 3.2987,
"step": 1165
},
{
"epoch": 0.39,
"learning_rate": 8.42392609699769e-05,
"loss": 3.1829,
"step": 1170
},
{
"epoch": 0.39,
"learning_rate": 8.401293302540415e-05,
"loss": 2.9166,
"step": 1175
},
{
"epoch": 0.39,
"learning_rate": 8.37866050808314e-05,
"loss": 2.8781,
"step": 1180
},
{
"epoch": 0.39,
"learning_rate": 8.356027713625866e-05,
"loss": 3.311,
"step": 1185
},
{
"epoch": 0.39,
"learning_rate": 8.333394919168591e-05,
"loss": 3.2549,
"step": 1190
},
{
"epoch": 0.39,
"learning_rate": 8.310762124711316e-05,
"loss": 3.3449,
"step": 1195
},
{
"epoch": 0.4,
"learning_rate": 8.288129330254041e-05,
"loss": 3.1834,
"step": 1200
},
{
"epoch": 0.4,
"learning_rate": 8.265496535796768e-05,
"loss": 2.6987,
"step": 1205
},
{
"epoch": 0.4,
"learning_rate": 8.242863741339492e-05,
"loss": 3.2679,
"step": 1210
},
{
"epoch": 0.4,
"learning_rate": 8.220230946882217e-05,
"loss": 2.2549,
"step": 1215
},
{
"epoch": 0.4,
"learning_rate": 8.197598152424942e-05,
"loss": 2.9217,
"step": 1220
},
{
"epoch": 0.4,
"learning_rate": 8.174965357967669e-05,
"loss": 3.2378,
"step": 1225
},
{
"epoch": 0.41,
"learning_rate": 8.152332563510394e-05,
"loss": 3.2458,
"step": 1230
},
{
"epoch": 0.41,
"learning_rate": 8.129699769053119e-05,
"loss": 2.5979,
"step": 1235
},
{
"epoch": 0.41,
"learning_rate": 8.107066974595842e-05,
"loss": 3.141,
"step": 1240
},
{
"epoch": 0.41,
"learning_rate": 8.084434180138567e-05,
"loss": 3.2136,
"step": 1245
},
{
"epoch": 0.41,
"learning_rate": 8.061801385681293e-05,
"loss": 2.5998,
"step": 1250
},
{
"epoch": 0.41,
"learning_rate": 8.039168591224018e-05,
"loss": 1.9745,
"step": 1255
},
{
"epoch": 0.42,
"learning_rate": 8.016535796766743e-05,
"loss": 3.2393,
"step": 1260
},
{
"epoch": 0.42,
"learning_rate": 7.993903002309468e-05,
"loss": 1.985,
"step": 1265
},
{
"epoch": 0.42,
"learning_rate": 7.971270207852195e-05,
"loss": 3.1701,
"step": 1270
},
{
"epoch": 0.42,
"learning_rate": 7.94863741339492e-05,
"loss": 3.2786,
"step": 1275
},
{
"epoch": 0.42,
"learning_rate": 7.926004618937644e-05,
"loss": 3.3514,
"step": 1280
},
{
"epoch": 0.42,
"learning_rate": 7.90337182448037e-05,
"loss": 2.6101,
"step": 1285
},
{
"epoch": 0.43,
"learning_rate": 7.880739030023096e-05,
"loss": 3.2465,
"step": 1290
},
{
"epoch": 0.43,
"learning_rate": 7.85810623556582e-05,
"loss": 3.1456,
"step": 1295
},
{
"epoch": 0.43,
"learning_rate": 7.835473441108546e-05,
"loss": 3.1528,
"step": 1300
},
{
"epoch": 0.43,
"learning_rate": 7.81284064665127e-05,
"loss": 2.3822,
"step": 1305
},
{
"epoch": 0.43,
"learning_rate": 7.790207852193997e-05,
"loss": 3.1659,
"step": 1310
},
{
"epoch": 0.43,
"learning_rate": 7.76757505773672e-05,
"loss": 2.6111,
"step": 1315
},
{
"epoch": 0.44,
"learning_rate": 7.744942263279445e-05,
"loss": 2.6443,
"step": 1320
},
{
"epoch": 0.44,
"learning_rate": 7.72230946882217e-05,
"loss": 2.184,
"step": 1325
},
{
"epoch": 0.44,
"learning_rate": 7.699676674364895e-05,
"loss": 3.2053,
"step": 1330
},
{
"epoch": 0.44,
"learning_rate": 7.677043879907622e-05,
"loss": 3.323,
"step": 1335
},
{
"epoch": 0.44,
"learning_rate": 7.654411085450346e-05,
"loss": 2.2732,
"step": 1340
},
{
"epoch": 0.44,
"learning_rate": 7.631778290993071e-05,
"loss": 3.1413,
"step": 1345
},
{
"epoch": 0.45,
"learning_rate": 7.609145496535796e-05,
"loss": 3.2421,
"step": 1350
},
{
"epoch": 0.45,
"learning_rate": 7.586512702078523e-05,
"loss": 3.1688,
"step": 1355
},
{
"epoch": 0.45,
"learning_rate": 7.563879907621248e-05,
"loss": 3.1884,
"step": 1360
},
{
"epoch": 0.45,
"learning_rate": 7.541247113163973e-05,
"loss": 3.1239,
"step": 1365
},
{
"epoch": 0.45,
"learning_rate": 7.518614318706697e-05,
"loss": 2.6056,
"step": 1370
},
{
"epoch": 0.45,
"learning_rate": 7.495981524249424e-05,
"loss": 3.1442,
"step": 1375
},
{
"epoch": 0.46,
"learning_rate": 7.473348729792149e-05,
"loss": 3.2654,
"step": 1380
},
{
"epoch": 0.46,
"learning_rate": 7.450715935334874e-05,
"loss": 3.1673,
"step": 1385
},
{
"epoch": 0.46,
"learning_rate": 7.428083140877597e-05,
"loss": 2.8148,
"step": 1390
},
{
"epoch": 0.46,
"learning_rate": 7.405450346420322e-05,
"loss": 2.5837,
"step": 1395
},
{
"epoch": 0.46,
"learning_rate": 7.382817551963049e-05,
"loss": 2.2148,
"step": 1400
},
{
"epoch": 0.46,
"learning_rate": 7.360184757505773e-05,
"loss": 3.1805,
"step": 1405
},
{
"epoch": 0.47,
"learning_rate": 7.337551963048498e-05,
"loss": 2.798,
"step": 1410
},
{
"epoch": 0.47,
"learning_rate": 7.314919168591223e-05,
"loss": 3.2318,
"step": 1415
},
{
"epoch": 0.47,
"learning_rate": 7.29228637413395e-05,
"loss": 2.8976,
"step": 1420
},
{
"epoch": 0.47,
"learning_rate": 7.269653579676675e-05,
"loss": 3.0862,
"step": 1425
},
{
"epoch": 0.47,
"learning_rate": 7.2470207852194e-05,
"loss": 2.5277,
"step": 1430
},
{
"epoch": 0.47,
"learning_rate": 7.224387990762124e-05,
"loss": 2.0882,
"step": 1435
},
{
"epoch": 0.48,
"learning_rate": 7.201755196304851e-05,
"loss": 3.0711,
"step": 1440
},
{
"epoch": 0.48,
"learning_rate": 7.179122401847576e-05,
"loss": 2.6206,
"step": 1445
},
{
"epoch": 0.48,
"learning_rate": 7.156489607390301e-05,
"loss": 2.0745,
"step": 1450
},
{
"epoch": 0.48,
"learning_rate": 7.133856812933026e-05,
"loss": 2.5786,
"step": 1455
},
{
"epoch": 0.48,
"learning_rate": 7.111224018475752e-05,
"loss": 2.6242,
"step": 1460
},
{
"epoch": 0.48,
"learning_rate": 7.088591224018476e-05,
"loss": 2.506,
"step": 1465
},
{
"epoch": 0.48,
"learning_rate": 7.0659584295612e-05,
"loss": 2.5967,
"step": 1470
},
{
"epoch": 0.49,
"learning_rate": 7.043325635103925e-05,
"loss": 3.1206,
"step": 1475
},
{
"epoch": 0.49,
"learning_rate": 7.02069284064665e-05,
"loss": 3.1769,
"step": 1480
},
{
"epoch": 0.49,
"learning_rate": 6.998060046189377e-05,
"loss": 2.7589,
"step": 1485
},
{
"epoch": 0.49,
"learning_rate": 6.975427251732102e-05,
"loss": 2.892,
"step": 1490
},
{
"epoch": 0.49,
"learning_rate": 6.952794457274827e-05,
"loss": 2.7783,
"step": 1495
},
{
"epoch": 0.49,
"learning_rate": 6.930161662817551e-05,
"loss": 3.0331,
"step": 1500
},
{
"epoch": 0.5,
"learning_rate": 6.907528868360278e-05,
"loss": 3.1017,
"step": 1505
},
{
"epoch": 0.5,
"learning_rate": 6.884896073903003e-05,
"loss": 2.909,
"step": 1510
},
{
"epoch": 0.5,
"learning_rate": 6.862263279445728e-05,
"loss": 2.5939,
"step": 1515
},
{
"epoch": 0.5,
"learning_rate": 6.839630484988453e-05,
"loss": 3.1691,
"step": 1520
},
{
"epoch": 0.5,
"learning_rate": 6.816997690531178e-05,
"loss": 2.9704,
"step": 1525
},
{
"epoch": 0.5,
"learning_rate": 6.794364896073903e-05,
"loss": 2.3905,
"step": 1530
},
{
"epoch": 0.51,
"learning_rate": 6.771732101616629e-05,
"loss": 2.1867,
"step": 1535
},
{
"epoch": 0.51,
"learning_rate": 6.749099307159354e-05,
"loss": 2.147,
"step": 1540
},
{
"epoch": 0.51,
"learning_rate": 6.726466512702079e-05,
"loss": 2.7461,
"step": 1545
},
{
"epoch": 0.51,
"learning_rate": 6.703833718244804e-05,
"loss": 3.1617,
"step": 1550
},
{
"epoch": 0.51,
"learning_rate": 6.681200923787529e-05,
"loss": 3.1386,
"step": 1555
},
{
"epoch": 0.51,
"learning_rate": 6.658568129330254e-05,
"loss": 2.7372,
"step": 1560
},
{
"epoch": 0.52,
"learning_rate": 6.635935334872979e-05,
"loss": 3.1669,
"step": 1565
},
{
"epoch": 0.52,
"learning_rate": 6.613302540415705e-05,
"loss": 2.5412,
"step": 1570
},
{
"epoch": 0.52,
"learning_rate": 6.59066974595843e-05,
"loss": 2.7079,
"step": 1575
},
{
"epoch": 0.52,
"learning_rate": 6.568036951501155e-05,
"loss": 2.5812,
"step": 1580
},
{
"epoch": 0.52,
"learning_rate": 6.54540415704388e-05,
"loss": 2.575,
"step": 1585
},
{
"epoch": 0.52,
"learning_rate": 6.522771362586606e-05,
"loss": 3.1151,
"step": 1590
},
{
"epoch": 0.53,
"learning_rate": 6.50013856812933e-05,
"loss": 3.0427,
"step": 1595
},
{
"epoch": 0.53,
"learning_rate": 6.477505773672056e-05,
"loss": 3.0496,
"step": 1600
},
{
"epoch": 0.53,
"learning_rate": 6.454872979214781e-05,
"loss": 3.1504,
"step": 1605
},
{
"epoch": 0.53,
"learning_rate": 6.432240184757506e-05,
"loss": 2.7546,
"step": 1610
},
{
"epoch": 0.53,
"learning_rate": 6.40960739030023e-05,
"loss": 2.5357,
"step": 1615
},
{
"epoch": 0.53,
"learning_rate": 6.386974595842957e-05,
"loss": 2.3116,
"step": 1620
},
{
"epoch": 0.54,
"learning_rate": 6.364341801385682e-05,
"loss": 2.7059,
"step": 1625
},
{
"epoch": 0.54,
"learning_rate": 6.341709006928407e-05,
"loss": 2.5225,
"step": 1630
},
{
"epoch": 0.54,
"learning_rate": 6.319076212471132e-05,
"loss": 2.5516,
"step": 1635
},
{
"epoch": 0.54,
"learning_rate": 6.296443418013857e-05,
"loss": 2.677,
"step": 1640
},
{
"epoch": 0.54,
"learning_rate": 6.273810623556582e-05,
"loss": 1.9059,
"step": 1645
},
{
"epoch": 0.54,
"learning_rate": 6.251177829099307e-05,
"loss": 1.3935,
"step": 1650
},
{
"epoch": 0.55,
"learning_rate": 6.228545034642033e-05,
"loss": 2.6359,
"step": 1655
},
{
"epoch": 0.55,
"learning_rate": 6.205912240184758e-05,
"loss": 2.7623,
"step": 1660
},
{
"epoch": 0.55,
"learning_rate": 6.183279445727483e-05,
"loss": 3.0703,
"step": 1665
},
{
"epoch": 0.55,
"learning_rate": 6.160646651270208e-05,
"loss": 2.7344,
"step": 1670
},
{
"epoch": 0.55,
"learning_rate": 6.138013856812933e-05,
"loss": 3.0174,
"step": 1675
},
{
"epoch": 0.55,
"learning_rate": 6.115381062355658e-05,
"loss": 2.5429,
"step": 1680
},
{
"epoch": 0.56,
"learning_rate": 6.092748267898383e-05,
"loss": 3.092,
"step": 1685
},
{
"epoch": 0.56,
"learning_rate": 6.070115473441109e-05,
"loss": 2.0041,
"step": 1690
},
{
"epoch": 0.56,
"learning_rate": 6.047482678983834e-05,
"loss": 1.9942,
"step": 1695
},
{
"epoch": 0.56,
"learning_rate": 6.0248498845265595e-05,
"loss": 3.0518,
"step": 1700
},
{
"epoch": 0.56,
"learning_rate": 6.0022170900692844e-05,
"loss": 3.1302,
"step": 1705
},
{
"epoch": 0.56,
"learning_rate": 5.979584295612009e-05,
"loss": 2.6788,
"step": 1710
},
{
"epoch": 0.57,
"learning_rate": 5.956951501154734e-05,
"loss": 2.6504,
"step": 1715
},
{
"epoch": 0.57,
"learning_rate": 5.934318706697459e-05,
"loss": 3.1506,
"step": 1720
},
{
"epoch": 0.57,
"learning_rate": 5.911685912240185e-05,
"loss": 2.5277,
"step": 1725
},
{
"epoch": 0.57,
"learning_rate": 5.88905311778291e-05,
"loss": 2.7044,
"step": 1730
},
{
"epoch": 0.57,
"learning_rate": 5.8664203233256355e-05,
"loss": 3.0895,
"step": 1735
},
{
"epoch": 0.57,
"learning_rate": 5.8437875288683604e-05,
"loss": 2.7463,
"step": 1740
},
{
"epoch": 0.58,
"learning_rate": 5.8211547344110853e-05,
"loss": 2.5867,
"step": 1745
},
{
"epoch": 0.58,
"learning_rate": 5.79852193995381e-05,
"loss": 2.5577,
"step": 1750
},
{
"epoch": 0.58,
"learning_rate": 5.775889145496536e-05,
"loss": 2.0084,
"step": 1755
},
{
"epoch": 0.58,
"learning_rate": 5.753256351039261e-05,
"loss": 2.656,
"step": 1760
},
{
"epoch": 0.58,
"learning_rate": 5.7306235565819865e-05,
"loss": 3.0814,
"step": 1765
},
{
"epoch": 0.58,
"learning_rate": 5.7079907621247114e-05,
"loss": 3.0512,
"step": 1770
},
{
"epoch": 0.59,
"learning_rate": 5.685357967667437e-05,
"loss": 2.4837,
"step": 1775
},
{
"epoch": 0.59,
"learning_rate": 5.662725173210162e-05,
"loss": 3.0879,
"step": 1780
},
{
"epoch": 0.59,
"learning_rate": 5.640092378752886e-05,
"loss": 3.0925,
"step": 1785
},
{
"epoch": 0.59,
"learning_rate": 5.617459584295612e-05,
"loss": 2.9465,
"step": 1790
},
{
"epoch": 0.59,
"learning_rate": 5.594826789838337e-05,
"loss": 2.1648,
"step": 1795
},
{
"epoch": 0.59,
"learning_rate": 5.5721939953810625e-05,
"loss": 2.4771,
"step": 1800
},
{
"epoch": 0.6,
"learning_rate": 5.5495612009237874e-05,
"loss": 3.0954,
"step": 1805
},
{
"epoch": 0.6,
"learning_rate": 5.526928406466513e-05,
"loss": 2.481,
"step": 1810
},
{
"epoch": 0.6,
"learning_rate": 5.504295612009238e-05,
"loss": 3.0379,
"step": 1815
},
{
"epoch": 0.6,
"learning_rate": 5.481662817551963e-05,
"loss": 2.5866,
"step": 1820
},
{
"epoch": 0.6,
"learning_rate": 5.459030023094688e-05,
"loss": 2.4673,
"step": 1825
},
{
"epoch": 0.6,
"learning_rate": 5.4363972286374135e-05,
"loss": 2.0428,
"step": 1830
},
{
"epoch": 0.61,
"learning_rate": 5.4137644341801384e-05,
"loss": 2.9469,
"step": 1835
},
{
"epoch": 0.61,
"learning_rate": 5.391131639722864e-05,
"loss": 3.065,
"step": 1840
},
{
"epoch": 0.61,
"learning_rate": 5.368498845265589e-05,
"loss": 2.3777,
"step": 1845
},
{
"epoch": 0.61,
"learning_rate": 5.3458660508083146e-05,
"loss": 2.4645,
"step": 1850
},
{
"epoch": 0.61,
"learning_rate": 5.3232332563510396e-05,
"loss": 2.9935,
"step": 1855
},
{
"epoch": 0.61,
"learning_rate": 5.300600461893764e-05,
"loss": 3.0377,
"step": 1860
},
{
"epoch": 0.62,
"learning_rate": 5.2779676674364895e-05,
"loss": 2.2265,
"step": 1865
},
{
"epoch": 0.62,
"learning_rate": 5.2553348729792144e-05,
"loss": 2.0621,
"step": 1870
},
{
"epoch": 0.62,
"learning_rate": 5.23270207852194e-05,
"loss": 2.285,
"step": 1875
},
{
"epoch": 0.62,
"learning_rate": 5.210069284064665e-05,
"loss": 2.605,
"step": 1880
},
{
"epoch": 0.62,
"learning_rate": 5.1874364896073906e-05,
"loss": 2.4744,
"step": 1885
},
{
"epoch": 0.62,
"learning_rate": 5.1648036951501155e-05,
"loss": 2.3932,
"step": 1890
},
{
"epoch": 0.63,
"learning_rate": 5.1421709006928405e-05,
"loss": 3.0259,
"step": 1895
},
{
"epoch": 0.63,
"learning_rate": 5.1195381062355654e-05,
"loss": 2.9768,
"step": 1900
},
{
"epoch": 0.63,
"learning_rate": 5.096905311778291e-05,
"loss": 2.4951,
"step": 1905
},
{
"epoch": 0.63,
"learning_rate": 5.074272517321016e-05,
"loss": 2.9348,
"step": 1910
},
{
"epoch": 0.63,
"learning_rate": 5.0516397228637416e-05,
"loss": 3.0326,
"step": 1915
},
{
"epoch": 0.63,
"learning_rate": 5.0290069284064666e-05,
"loss": 2.544,
"step": 1920
},
{
"epoch": 0.64,
"learning_rate": 5.006374133949192e-05,
"loss": 2.9794,
"step": 1925
},
{
"epoch": 0.64,
"learning_rate": 4.983741339491917e-05,
"loss": 2.2389,
"step": 1930
},
{
"epoch": 0.64,
"learning_rate": 4.961108545034642e-05,
"loss": 2.5411,
"step": 1935
},
{
"epoch": 0.64,
"learning_rate": 4.938475750577367e-05,
"loss": 2.1246,
"step": 1940
},
{
"epoch": 0.64,
"learning_rate": 4.915842956120092e-05,
"loss": 2.549,
"step": 1945
},
{
"epoch": 0.64,
"learning_rate": 4.8932101616628176e-05,
"loss": 2.9752,
"step": 1950
},
{
"epoch": 0.65,
"learning_rate": 4.8705773672055425e-05,
"loss": 2.2107,
"step": 1955
},
{
"epoch": 0.65,
"learning_rate": 4.847944572748268e-05,
"loss": 2.9383,
"step": 1960
},
{
"epoch": 0.65,
"learning_rate": 4.825311778290993e-05,
"loss": 2.4591,
"step": 1965
},
{
"epoch": 0.65,
"learning_rate": 4.802678983833718e-05,
"loss": 2.5564,
"step": 1970
},
{
"epoch": 0.65,
"learning_rate": 4.780046189376443e-05,
"loss": 2.6209,
"step": 1975
},
{
"epoch": 0.65,
"learning_rate": 4.7574133949191686e-05,
"loss": 2.4038,
"step": 1980
},
{
"epoch": 0.65,
"learning_rate": 4.7347806004618936e-05,
"loss": 2.5304,
"step": 1985
},
{
"epoch": 0.66,
"learning_rate": 4.712147806004619e-05,
"loss": 2.6632,
"step": 1990
},
{
"epoch": 0.66,
"learning_rate": 4.689515011547344e-05,
"loss": 1.937,
"step": 1995
},
{
"epoch": 0.66,
"learning_rate": 4.66688221709007e-05,
"loss": 2.4658,
"step": 2000
},
{
"epoch": 0.66,
"learning_rate": 4.644249422632795e-05,
"loss": 2.5939,
"step": 2005
},
{
"epoch": 0.66,
"learning_rate": 4.6216166281755197e-05,
"loss": 2.618,
"step": 2010
},
{
"epoch": 0.66,
"learning_rate": 4.5989838337182446e-05,
"loss": 3.0384,
"step": 2015
},
{
"epoch": 0.67,
"learning_rate": 4.57635103926097e-05,
"loss": 2.9713,
"step": 2020
},
{
"epoch": 0.67,
"learning_rate": 4.553718244803695e-05,
"loss": 3.0171,
"step": 2025
},
{
"epoch": 0.67,
"learning_rate": 4.53108545034642e-05,
"loss": 3.0126,
"step": 2030
},
{
"epoch": 0.67,
"learning_rate": 4.508452655889146e-05,
"loss": 2.9481,
"step": 2035
},
{
"epoch": 0.67,
"learning_rate": 4.485819861431871e-05,
"loss": 2.4466,
"step": 2040
},
{
"epoch": 0.67,
"learning_rate": 4.4631870669745956e-05,
"loss": 2.0695,
"step": 2045
},
{
"epoch": 0.68,
"learning_rate": 4.4405542725173206e-05,
"loss": 3.0076,
"step": 2050
},
{
"epoch": 0.68,
"learning_rate": 4.417921478060046e-05,
"loss": 3.0097,
"step": 2055
},
{
"epoch": 0.68,
"learning_rate": 4.395288683602771e-05,
"loss": 2.4857,
"step": 2060
},
{
"epoch": 0.68,
"learning_rate": 4.372655889145497e-05,
"loss": 3.0361,
"step": 2065
},
{
"epoch": 0.68,
"learning_rate": 4.350023094688222e-05,
"loss": 1.9166,
"step": 2070
},
{
"epoch": 0.68,
"learning_rate": 4.327390300230947e-05,
"loss": 2.9817,
"step": 2075
},
{
"epoch": 0.69,
"learning_rate": 4.304757505773672e-05,
"loss": 2.9798,
"step": 2080
},
{
"epoch": 0.69,
"learning_rate": 4.282124711316397e-05,
"loss": 2.4675,
"step": 2085
},
{
"epoch": 0.69,
"learning_rate": 4.259491916859122e-05,
"loss": 3.0161,
"step": 2090
},
{
"epoch": 0.69,
"learning_rate": 4.236859122401848e-05,
"loss": 1.9453,
"step": 2095
},
{
"epoch": 0.69,
"learning_rate": 4.214226327944573e-05,
"loss": 3.0623,
"step": 2100
},
{
"epoch": 0.69,
"learning_rate": 4.1915935334872984e-05,
"loss": 3.0624,
"step": 2105
},
{
"epoch": 0.7,
"learning_rate": 4.168960739030023e-05,
"loss": 2.4797,
"step": 2110
},
{
"epoch": 0.7,
"learning_rate": 4.146327944572748e-05,
"loss": 2.0303,
"step": 2115
},
{
"epoch": 0.7,
"learning_rate": 4.123695150115473e-05,
"loss": 2.9842,
"step": 2120
},
{
"epoch": 0.7,
"learning_rate": 4.101062355658198e-05,
"loss": 2.5711,
"step": 2125
},
{
"epoch": 0.7,
"learning_rate": 4.078429561200924e-05,
"loss": 2.409,
"step": 2130
},
{
"epoch": 0.7,
"learning_rate": 4.055796766743649e-05,
"loss": 2.473,
"step": 2135
},
{
"epoch": 0.71,
"learning_rate": 4.033163972286374e-05,
"loss": 1.9119,
"step": 2140
},
{
"epoch": 0.71,
"learning_rate": 4.010531177829099e-05,
"loss": 2.9008,
"step": 2145
},
{
"epoch": 0.71,
"learning_rate": 3.987898383371825e-05,
"loss": 3.032,
"step": 2150
},
{
"epoch": 0.71,
"learning_rate": 3.96526558891455e-05,
"loss": 2.3587,
"step": 2155
},
{
"epoch": 0.71,
"learning_rate": 3.942632794457275e-05,
"loss": 2.9925,
"step": 2160
},
{
"epoch": 0.71,
"learning_rate": 3.92e-05,
"loss": 2.9025,
"step": 2165
},
{
"epoch": 0.72,
"learning_rate": 3.8973672055427254e-05,
"loss": 3.0899,
"step": 2170
},
{
"epoch": 0.72,
"learning_rate": 3.87473441108545e-05,
"loss": 2.9319,
"step": 2175
},
{
"epoch": 0.72,
"learning_rate": 3.852101616628176e-05,
"loss": 2.453,
"step": 2180
},
{
"epoch": 0.72,
"learning_rate": 3.829468822170901e-05,
"loss": 2.0663,
"step": 2185
},
{
"epoch": 0.72,
"learning_rate": 3.8068360277136265e-05,
"loss": 2.4464,
"step": 2190
},
{
"epoch": 0.72,
"learning_rate": 3.784203233256351e-05,
"loss": 2.5029,
"step": 2195
},
{
"epoch": 0.73,
"learning_rate": 3.761570438799076e-05,
"loss": 2.4315,
"step": 2200
},
{
"epoch": 0.73,
"learning_rate": 3.738937644341801e-05,
"loss": 2.9723,
"step": 2205
},
{
"epoch": 0.73,
"learning_rate": 3.716304849884526e-05,
"loss": 2.9545,
"step": 2210
},
{
"epoch": 0.73,
"learning_rate": 3.693672055427252e-05,
"loss": 2.4394,
"step": 2215
},
{
"epoch": 0.73,
"learning_rate": 3.671039260969977e-05,
"loss": 2.8852,
"step": 2220
},
{
"epoch": 0.73,
"learning_rate": 3.6484064665127025e-05,
"loss": 2.5524,
"step": 2225
},
{
"epoch": 0.74,
"learning_rate": 3.6257736720554274e-05,
"loss": 2.5424,
"step": 2230
},
{
"epoch": 0.74,
"learning_rate": 3.6031408775981524e-05,
"loss": 2.4416,
"step": 2235
},
{
"epoch": 0.74,
"learning_rate": 3.580508083140877e-05,
"loss": 2.3582,
"step": 2240
},
{
"epoch": 0.74,
"learning_rate": 3.557875288683603e-05,
"loss": 2.9595,
"step": 2245
},
{
"epoch": 0.74,
"learning_rate": 3.535242494226328e-05,
"loss": 3.0225,
"step": 2250
},
{
"epoch": 0.74,
"learning_rate": 3.5126096997690535e-05,
"loss": 2.0518,
"step": 2255
},
{
"epoch": 0.75,
"learning_rate": 3.4899769053117784e-05,
"loss": 2.9843,
"step": 2260
},
{
"epoch": 0.75,
"learning_rate": 3.467344110854504e-05,
"loss": 2.3971,
"step": 2265
},
{
"epoch": 0.75,
"learning_rate": 3.444711316397228e-05,
"loss": 2.9604,
"step": 2270
},
{
"epoch": 0.75,
"learning_rate": 3.422078521939954e-05,
"loss": 1.526,
"step": 2275
},
{
"epoch": 0.75,
"learning_rate": 3.399445727482679e-05,
"loss": 2.5448,
"step": 2280
},
{
"epoch": 0.75,
"learning_rate": 3.376812933025404e-05,
"loss": 2.5325,
"step": 2285
},
{
"epoch": 0.76,
"learning_rate": 3.3541801385681295e-05,
"loss": 2.0032,
"step": 2290
},
{
"epoch": 0.76,
"learning_rate": 3.3315473441108544e-05,
"loss": 2.447,
"step": 2295
},
{
"epoch": 0.76,
"learning_rate": 3.3089145496535794e-05,
"loss": 2.9568,
"step": 2300
},
{
"epoch": 0.76,
"learning_rate": 3.286281755196305e-05,
"loss": 2.5495,
"step": 2305
},
{
"epoch": 0.76,
"learning_rate": 3.26364896073903e-05,
"loss": 2.4766,
"step": 2310
},
{
"epoch": 0.76,
"learning_rate": 3.2410161662817556e-05,
"loss": 2.9513,
"step": 2315
},
{
"epoch": 0.77,
"learning_rate": 3.2183833718244805e-05,
"loss": 1.9875,
"step": 2320
},
{
"epoch": 0.77,
"learning_rate": 3.1957505773672054e-05,
"loss": 2.0134,
"step": 2325
},
{
"epoch": 0.77,
"learning_rate": 3.173117782909931e-05,
"loss": 2.5515,
"step": 2330
},
{
"epoch": 0.77,
"learning_rate": 3.150484988452656e-05,
"loss": 2.9005,
"step": 2335
},
{
"epoch": 0.77,
"learning_rate": 3.127852193995381e-05,
"loss": 2.8825,
"step": 2340
},
{
"epoch": 0.77,
"learning_rate": 3.1052193995381066e-05,
"loss": 1.8196,
"step": 2345
},
{
"epoch": 0.78,
"learning_rate": 3.0825866050808315e-05,
"loss": 2.8672,
"step": 2350
},
{
"epoch": 0.78,
"learning_rate": 3.0599538106235565e-05,
"loss": 2.5313,
"step": 2355
},
{
"epoch": 0.78,
"learning_rate": 3.0373210161662818e-05,
"loss": 2.4912,
"step": 2360
},
{
"epoch": 0.78,
"learning_rate": 3.014688221709007e-05,
"loss": 2.851,
"step": 2365
},
{
"epoch": 0.78,
"learning_rate": 2.9920554272517323e-05,
"loss": 2.9382,
"step": 2370
},
{
"epoch": 0.78,
"learning_rate": 2.9694226327944573e-05,
"loss": 2.8546,
"step": 2375
},
{
"epoch": 0.79,
"learning_rate": 2.9467898383371826e-05,
"loss": 2.9912,
"step": 2380
},
{
"epoch": 0.79,
"learning_rate": 2.924157043879908e-05,
"loss": 2.4831,
"step": 2385
},
{
"epoch": 0.79,
"learning_rate": 2.901524249422633e-05,
"loss": 2.96,
"step": 2390
},
{
"epoch": 0.79,
"learning_rate": 2.8788914549653577e-05,
"loss": 2.916,
"step": 2395
},
{
"epoch": 0.79,
"learning_rate": 2.856258660508083e-05,
"loss": 2.9423,
"step": 2400
},
{
"epoch": 0.79,
"learning_rate": 2.8336258660508083e-05,
"loss": 2.3475,
"step": 2405
},
{
"epoch": 0.8,
"learning_rate": 2.8109930715935336e-05,
"loss": 2.4757,
"step": 2410
},
{
"epoch": 0.8,
"learning_rate": 2.7883602771362585e-05,
"loss": 2.497,
"step": 2415
},
{
"epoch": 0.8,
"learning_rate": 2.7657274826789838e-05,
"loss": 2.8617,
"step": 2420
},
{
"epoch": 0.8,
"learning_rate": 2.743094688221709e-05,
"loss": 2.5424,
"step": 2425
},
{
"epoch": 0.8,
"learning_rate": 2.720461893764434e-05,
"loss": 1.9343,
"step": 2430
},
{
"epoch": 0.8,
"learning_rate": 2.6978290993071593e-05,
"loss": 2.3538,
"step": 2435
},
{
"epoch": 0.81,
"learning_rate": 2.6751963048498846e-05,
"loss": 2.457,
"step": 2440
},
{
"epoch": 0.81,
"learning_rate": 2.65256351039261e-05,
"loss": 1.8267,
"step": 2445
},
{
"epoch": 0.81,
"learning_rate": 2.629930715935335e-05,
"loss": 2.8956,
"step": 2450
},
{
"epoch": 0.81,
"learning_rate": 2.60729792147806e-05,
"loss": 2.4804,
"step": 2455
},
{
"epoch": 0.81,
"learning_rate": 2.5846651270207854e-05,
"loss": 0.9658,
"step": 2460
},
{
"epoch": 0.81,
"learning_rate": 2.5620323325635107e-05,
"loss": 2.5179,
"step": 2465
},
{
"epoch": 0.81,
"learning_rate": 2.5393995381062353e-05,
"loss": 2.0064,
"step": 2470
},
{
"epoch": 0.82,
"learning_rate": 2.5167667436489606e-05,
"loss": 2.9194,
"step": 2475
},
{
"epoch": 0.82,
"learning_rate": 2.494133949191686e-05,
"loss": 2.9696,
"step": 2480
},
{
"epoch": 0.82,
"learning_rate": 2.471501154734411e-05,
"loss": 2.3919,
"step": 2485
},
{
"epoch": 0.82,
"learning_rate": 2.448868360277136e-05,
"loss": 2.9989,
"step": 2490
},
{
"epoch": 0.82,
"learning_rate": 2.4262355658198614e-05,
"loss": 1.9785,
"step": 2495
},
{
"epoch": 0.82,
"learning_rate": 2.4036027713625867e-05,
"loss": 2.4736,
"step": 2500
},
{
"epoch": 0.83,
"learning_rate": 2.3809699769053116e-05,
"loss": 2.9508,
"step": 2505
},
{
"epoch": 0.83,
"learning_rate": 2.358337182448037e-05,
"loss": 3.0043,
"step": 2510
},
{
"epoch": 0.83,
"learning_rate": 2.3357043879907622e-05,
"loss": 2.3829,
"step": 2515
},
{
"epoch": 0.83,
"learning_rate": 2.3130715935334875e-05,
"loss": 2.4771,
"step": 2520
},
{
"epoch": 0.83,
"learning_rate": 2.2904387990762124e-05,
"loss": 2.4709,
"step": 2525
},
{
"epoch": 0.83,
"learning_rate": 2.2678060046189377e-05,
"loss": 2.8935,
"step": 2530
},
{
"epoch": 0.84,
"learning_rate": 2.245173210161663e-05,
"loss": 2.9177,
"step": 2535
},
{
"epoch": 0.84,
"learning_rate": 2.2225404157043883e-05,
"loss": 2.9019,
"step": 2540
},
{
"epoch": 0.84,
"learning_rate": 2.1999076212471132e-05,
"loss": 2.944,
"step": 2545
},
{
"epoch": 0.84,
"learning_rate": 2.1772748267898385e-05,
"loss": 2.4892,
"step": 2550
},
{
"epoch": 0.84,
"learning_rate": 2.1546420323325638e-05,
"loss": 2.8733,
"step": 2555
},
{
"epoch": 0.84,
"learning_rate": 2.1320092378752887e-05,
"loss": 1.6182,
"step": 2560
},
{
"epoch": 0.85,
"learning_rate": 2.1093764434180137e-05,
"loss": 2.8951,
"step": 2565
},
{
"epoch": 0.85,
"learning_rate": 2.086743648960739e-05,
"loss": 2.903,
"step": 2570
},
{
"epoch": 0.85,
"learning_rate": 2.0641108545034642e-05,
"loss": 2.3775,
"step": 2575
},
{
"epoch": 0.85,
"learning_rate": 2.0414780600461892e-05,
"loss": 2.5025,
"step": 2580
},
{
"epoch": 0.85,
"learning_rate": 2.0188452655889145e-05,
"loss": 2.3309,
"step": 2585
},
{
"epoch": 0.85,
"learning_rate": 1.9962124711316398e-05,
"loss": 2.5212,
"step": 2590
},
{
"epoch": 0.86,
"learning_rate": 1.973579676674365e-05,
"loss": 2.8385,
"step": 2595
},
{
"epoch": 0.86,
"learning_rate": 1.95094688221709e-05,
"loss": 2.3192,
"step": 2600
},
{
"epoch": 0.86,
"learning_rate": 1.9283140877598153e-05,
"loss": 2.8028,
"step": 2605
},
{
"epoch": 0.86,
"learning_rate": 1.9056812933025406e-05,
"loss": 2.9434,
"step": 2610
},
{
"epoch": 0.86,
"learning_rate": 1.883048498845266e-05,
"loss": 2.9547,
"step": 2615
},
{
"epoch": 0.86,
"learning_rate": 1.8604157043879908e-05,
"loss": 1.8949,
"step": 2620
},
{
"epoch": 0.87,
"learning_rate": 1.837782909930716e-05,
"loss": 1.8706,
"step": 2625
},
{
"epoch": 0.87,
"learning_rate": 1.8151501154734413e-05,
"loss": 2.424,
"step": 2630
},
{
"epoch": 0.87,
"learning_rate": 1.7925173210161666e-05,
"loss": 2.3591,
"step": 2635
},
{
"epoch": 0.87,
"learning_rate": 1.7698845265588912e-05,
"loss": 2.8878,
"step": 2640
},
{
"epoch": 0.87,
"learning_rate": 1.7472517321016165e-05,
"loss": 2.8651,
"step": 2645
},
{
"epoch": 0.87,
"learning_rate": 1.7246189376443418e-05,
"loss": 2.8761,
"step": 2650
},
{
"epoch": 0.88,
"learning_rate": 1.701986143187067e-05,
"loss": 2.833,
"step": 2655
},
{
"epoch": 0.88,
"learning_rate": 1.679353348729792e-05,
"loss": 2.395,
"step": 2660
},
{
"epoch": 0.88,
"learning_rate": 1.6567205542725173e-05,
"loss": 2.9417,
"step": 2665
},
{
"epoch": 0.88,
"learning_rate": 1.6340877598152426e-05,
"loss": 2.9002,
"step": 2670
},
{
"epoch": 0.88,
"learning_rate": 1.611454965357968e-05,
"loss": 2.3858,
"step": 2675
},
{
"epoch": 0.88,
"learning_rate": 1.588822170900693e-05,
"loss": 2.4055,
"step": 2680
},
{
"epoch": 0.89,
"learning_rate": 1.566189376443418e-05,
"loss": 2.8661,
"step": 2685
},
{
"epoch": 0.89,
"learning_rate": 1.543556581986143e-05,
"loss": 2.4005,
"step": 2690
},
{
"epoch": 0.89,
"learning_rate": 1.5209237875288685e-05,
"loss": 2.8337,
"step": 2695
},
{
"epoch": 0.89,
"learning_rate": 1.4982909930715935e-05,
"loss": 1.8415,
"step": 2700
},
{
"epoch": 0.89,
"learning_rate": 1.4756581986143188e-05,
"loss": 2.8588,
"step": 2705
},
{
"epoch": 0.89,
"learning_rate": 1.4530254041570439e-05,
"loss": 2.8498,
"step": 2710
},
{
"epoch": 0.9,
"learning_rate": 1.430392609699769e-05,
"loss": 2.3665,
"step": 2715
},
{
"epoch": 0.9,
"learning_rate": 1.4077598152424943e-05,
"loss": 2.9143,
"step": 2720
},
{
"epoch": 0.9,
"learning_rate": 1.3851270207852194e-05,
"loss": 2.8927,
"step": 2725
},
{
"epoch": 0.9,
"learning_rate": 1.3624942263279447e-05,
"loss": 2.2419,
"step": 2730
},
{
"epoch": 0.9,
"learning_rate": 1.3398614318706696e-05,
"loss": 2.8982,
"step": 2735
},
{
"epoch": 0.9,
"learning_rate": 1.3172286374133949e-05,
"loss": 2.3598,
"step": 2740
},
{
"epoch": 0.91,
"learning_rate": 1.29459584295612e-05,
"loss": 1.9034,
"step": 2745
},
{
"epoch": 0.91,
"learning_rate": 1.2719630484988453e-05,
"loss": 2.8746,
"step": 2750
},
{
"epoch": 0.91,
"learning_rate": 1.2493302540415704e-05,
"loss": 2.5453,
"step": 2755
},
{
"epoch": 0.91,
"learning_rate": 1.2266974595842957e-05,
"loss": 2.3748,
"step": 2760
},
{
"epoch": 0.91,
"learning_rate": 1.2040646651270208e-05,
"loss": 2.3395,
"step": 2765
},
{
"epoch": 0.91,
"learning_rate": 1.1814318706697461e-05,
"loss": 1.8988,
"step": 2770
},
{
"epoch": 0.92,
"learning_rate": 1.1587990762124712e-05,
"loss": 2.832,
"step": 2775
},
{
"epoch": 0.92,
"learning_rate": 1.1361662817551963e-05,
"loss": 2.9164,
"step": 2780
},
{
"epoch": 0.92,
"learning_rate": 1.1135334872979214e-05,
"loss": 1.3357,
"step": 2785
},
{
"epoch": 0.92,
"learning_rate": 1.0909006928406465e-05,
"loss": 2.3844,
"step": 2790
},
{
"epoch": 0.92,
"learning_rate": 1.0682678983833718e-05,
"loss": 2.3568,
"step": 2795
},
{
"epoch": 0.92,
"learning_rate": 1.045635103926097e-05,
"loss": 2.9659,
"step": 2800
},
{
"epoch": 0.93,
"learning_rate": 1.0230023094688222e-05,
"loss": 2.3948,
"step": 2805
},
{
"epoch": 0.93,
"learning_rate": 1.0003695150115473e-05,
"loss": 2.3828,
"step": 2810
},
{
"epoch": 0.93,
"learning_rate": 9.777367205542726e-06,
"loss": 2.3914,
"step": 2815
},
{
"epoch": 0.93,
"learning_rate": 9.551039260969976e-06,
"loss": 2.4423,
"step": 2820
},
{
"epoch": 0.93,
"learning_rate": 9.324711316397229e-06,
"loss": 2.4067,
"step": 2825
},
{
"epoch": 0.93,
"learning_rate": 9.09838337182448e-06,
"loss": 2.8074,
"step": 2830
},
{
"epoch": 0.94,
"learning_rate": 8.872055427251733e-06,
"loss": 2.9147,
"step": 2835
},
{
"epoch": 0.94,
"learning_rate": 8.645727482678984e-06,
"loss": 2.4484,
"step": 2840
},
{
"epoch": 0.94,
"learning_rate": 8.419399538106235e-06,
"loss": 2.8331,
"step": 2845
},
{
"epoch": 0.94,
"learning_rate": 8.193071593533488e-06,
"loss": 2.3605,
"step": 2850
},
{
"epoch": 0.94,
"learning_rate": 7.966743648960739e-06,
"loss": 2.0343,
"step": 2855
},
{
"epoch": 0.94,
"learning_rate": 7.74041570438799e-06,
"loss": 2.8557,
"step": 2860
},
{
"epoch": 0.95,
"learning_rate": 7.514087759815243e-06,
"loss": 2.4906,
"step": 2865
},
{
"epoch": 0.95,
"learning_rate": 7.287759815242494e-06,
"loss": 2.2705,
"step": 2870
},
{
"epoch": 0.95,
"learning_rate": 7.061431870669746e-06,
"loss": 2.1079,
"step": 2875
},
{
"epoch": 0.95,
"learning_rate": 6.835103926096998e-06,
"loss": 2.9184,
"step": 2880
},
{
"epoch": 0.95,
"learning_rate": 6.60877598152425e-06,
"loss": 2.7964,
"step": 2885
},
{
"epoch": 0.95,
"learning_rate": 6.382448036951501e-06,
"loss": 2.2701,
"step": 2890
},
{
"epoch": 0.96,
"learning_rate": 6.156120092378753e-06,
"loss": 2.4188,
"step": 2895
},
{
"epoch": 0.96,
"learning_rate": 5.929792147806005e-06,
"loss": 2.4344,
"step": 2900
},
{
"epoch": 0.96,
"learning_rate": 5.703464203233256e-06,
"loss": 2.0036,
"step": 2905
},
{
"epoch": 0.96,
"learning_rate": 5.4771362586605075e-06,
"loss": 2.3556,
"step": 2910
},
{
"epoch": 0.96,
"learning_rate": 5.2508083140877595e-06,
"loss": 2.8956,
"step": 2915
},
{
"epoch": 0.96,
"learning_rate": 5.0244803695150115e-06,
"loss": 2.9212,
"step": 2920
},
{
"epoch": 0.97,
"learning_rate": 4.7981524249422635e-06,
"loss": 2.325,
"step": 2925
},
{
"epoch": 0.97,
"learning_rate": 4.571824480369515e-06,
"loss": 2.9001,
"step": 2930
},
{
"epoch": 0.97,
"learning_rate": 4.345496535796767e-06,
"loss": 2.0078,
"step": 2935
},
{
"epoch": 0.97,
"learning_rate": 4.119168591224019e-06,
"loss": 1.8776,
"step": 2940
},
{
"epoch": 0.97,
"learning_rate": 3.892840646651271e-06,
"loss": 2.315,
"step": 2945
},
{
"epoch": 0.97,
"learning_rate": 3.666512702078522e-06,
"loss": 1.9461,
"step": 2950
},
{
"epoch": 0.97,
"learning_rate": 3.4401847575057737e-06,
"loss": 2.8444,
"step": 2955
},
{
"epoch": 0.98,
"learning_rate": 3.2138568129330253e-06,
"loss": 2.7964,
"step": 2960
},
{
"epoch": 0.98,
"learning_rate": 2.987528868360277e-06,
"loss": 2.2962,
"step": 2965
},
{
"epoch": 0.98,
"learning_rate": 2.761200923787529e-06,
"loss": 2.9162,
"step": 2970
},
{
"epoch": 0.98,
"learning_rate": 2.5348729792147804e-06,
"loss": 2.3761,
"step": 2975
},
{
"epoch": 0.98,
"learning_rate": 2.3085450346420324e-06,
"loss": 2.7675,
"step": 2980
},
{
"epoch": 0.98,
"learning_rate": 2.082217090069284e-06,
"loss": 2.3303,
"step": 2985
},
{
"epoch": 0.99,
"learning_rate": 1.8558891454965358e-06,
"loss": 2.3253,
"step": 2990
},
{
"epoch": 0.99,
"learning_rate": 1.6295612009237876e-06,
"loss": 2.8782,
"step": 2995
},
{
"epoch": 0.99,
"learning_rate": 1.4032332563510394e-06,
"loss": 2.7999,
"step": 3000
},
{
"epoch": 0.99,
"learning_rate": 1.1769053117782911e-06,
"loss": 2.7688,
"step": 3005
},
{
"epoch": 0.99,
"learning_rate": 9.505773672055428e-07,
"loss": 2.8891,
"step": 3010
},
{
"epoch": 0.99,
"learning_rate": 7.242494226327945e-07,
"loss": 2.4505,
"step": 3015
},
{
"epoch": 1.0,
"learning_rate": 4.979214780600462e-07,
"loss": 2.8581,
"step": 3020
},
{
"epoch": 1.0,
"learning_rate": 2.715935334872979e-07,
"loss": 2.3888,
"step": 3025
},
{
"epoch": 1.0,
"learning_rate": 4.5265588914549656e-08,
"loss": 2.2638,
"step": 3030
},
{
"epoch": 1.0,
"step": 3031,
"total_flos": 5629783610228736.0,
"train_loss": 2.9040869920020134,
"train_runtime": 3297.2859,
"train_samples_per_second": 0.919,
"train_steps_per_second": 0.919
}
],
"max_steps": 3031,
"num_train_epochs": 1,
"total_flos": 5629783610228736.0,
"trial_name": null,
"trial_params": null
}