MixtureBT-700K-static-3B-10heads / trainer_state.json
Evangelinejy's picture
Upload folder using huggingface_hub
78b477e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999751200457792,
"eval_steps": 500,
"global_step": 10048,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009951981688353694,
"grad_norm": 0.6644615530967712,
"learning_rate": 3.578528827037773e-05,
"loss": -0.2368,
"step": 10
},
{
"epoch": 0.0019903963376707388,
"grad_norm": 0.23524807393550873,
"learning_rate": 7.554671968190855e-05,
"loss": -0.2801,
"step": 20
},
{
"epoch": 0.002985594506506108,
"grad_norm": 0.22816260159015656,
"learning_rate": 0.00011530815109343937,
"loss": -0.3487,
"step": 30
},
{
"epoch": 0.0039807926753414775,
"grad_norm": 0.23517341911792755,
"learning_rate": 0.00015506958250497018,
"loss": -0.421,
"step": 40
},
{
"epoch": 0.004975990844176847,
"grad_norm": 0.14163829386234283,
"learning_rate": 0.000194831013916501,
"loss": -0.474,
"step": 50
},
{
"epoch": 0.005971189013012216,
"grad_norm": 0.17084212601184845,
"learning_rate": 0.00023459244532803182,
"loss": -0.5109,
"step": 60
},
{
"epoch": 0.006966387181847586,
"grad_norm": 0.15078520774841309,
"learning_rate": 0.00027435387673956264,
"loss": -0.5484,
"step": 70
},
{
"epoch": 0.007961585350682955,
"grad_norm": 0.095310278236866,
"learning_rate": 0.00031411530815109347,
"loss": -0.5527,
"step": 80
},
{
"epoch": 0.008956783519518324,
"grad_norm": 0.10798652470111847,
"learning_rate": 0.0003538767395626243,
"loss": -0.555,
"step": 90
},
{
"epoch": 0.009951981688353694,
"grad_norm": 0.14966444671154022,
"learning_rate": 0.00039363817097415506,
"loss": -0.5603,
"step": 100
},
{
"epoch": 0.010947179857189062,
"grad_norm": 0.11505836993455887,
"learning_rate": 0.0004333996023856859,
"loss": -0.5812,
"step": 110
},
{
"epoch": 0.011942378026024433,
"grad_norm": 0.10001794248819351,
"learning_rate": 0.0004731610337972167,
"loss": -0.6047,
"step": 120
},
{
"epoch": 0.012937576194859801,
"grad_norm": 0.09755612164735794,
"learning_rate": 0.0005129224652087476,
"loss": -0.6262,
"step": 130
},
{
"epoch": 0.013932774363695171,
"grad_norm": 0.09725947678089142,
"learning_rate": 0.0005526838966202783,
"loss": -0.5985,
"step": 140
},
{
"epoch": 0.01492797253253054,
"grad_norm": 0.11158037185668945,
"learning_rate": 0.0005924453280318092,
"loss": -0.6026,
"step": 150
},
{
"epoch": 0.01592317070136591,
"grad_norm": 0.14563938975334167,
"learning_rate": 0.00063220675944334,
"loss": -0.65,
"step": 160
},
{
"epoch": 0.01691836887020128,
"grad_norm": 0.1282544583082199,
"learning_rate": 0.0006719681908548709,
"loss": -0.6005,
"step": 170
},
{
"epoch": 0.017913567039036647,
"grad_norm": 0.10261674225330353,
"learning_rate": 0.0007117296222664016,
"loss": -0.6009,
"step": 180
},
{
"epoch": 0.01890876520787202,
"grad_norm": 0.09247754514217377,
"learning_rate": 0.0007514910536779325,
"loss": -0.6396,
"step": 190
},
{
"epoch": 0.019903963376707388,
"grad_norm": 0.10132607072591782,
"learning_rate": 0.0007912524850894633,
"loss": -0.6591,
"step": 200
},
{
"epoch": 0.020899161545542756,
"grad_norm": 0.16978560388088226,
"learning_rate": 0.000831013916500994,
"loss": -0.6426,
"step": 210
},
{
"epoch": 0.021894359714378125,
"grad_norm": 0.11510445177555084,
"learning_rate": 0.0008707753479125249,
"loss": -0.6005,
"step": 220
},
{
"epoch": 0.022889557883213493,
"grad_norm": 0.07095818221569061,
"learning_rate": 0.0009105367793240557,
"loss": -0.6672,
"step": 230
},
{
"epoch": 0.023884756052048865,
"grad_norm": 0.09719249606132507,
"learning_rate": 0.0009502982107355866,
"loss": -0.6317,
"step": 240
},
{
"epoch": 0.024879954220884234,
"grad_norm": 0.07893206924200058,
"learning_rate": 0.0009900596421471173,
"loss": -0.6573,
"step": 250
},
{
"epoch": 0.025875152389719602,
"grad_norm": 0.09626351296901703,
"learning_rate": 0.001029821073558648,
"loss": -0.6316,
"step": 260
},
{
"epoch": 0.02687035055855497,
"grad_norm": 0.0947069525718689,
"learning_rate": 0.0010695825049701789,
"loss": -0.6453,
"step": 270
},
{
"epoch": 0.027865548727390343,
"grad_norm": 0.10596685111522675,
"learning_rate": 0.0011093439363817096,
"loss": -0.6316,
"step": 280
},
{
"epoch": 0.02886074689622571,
"grad_norm": 0.09962307661771774,
"learning_rate": 0.0011491053677932406,
"loss": -0.6847,
"step": 290
},
{
"epoch": 0.02985594506506108,
"grad_norm": 0.09802088141441345,
"learning_rate": 0.0011888667992047714,
"loss": -0.6957,
"step": 300
},
{
"epoch": 0.03085114323389645,
"grad_norm": 0.10051831603050232,
"learning_rate": 0.0012286282306163022,
"loss": -0.6896,
"step": 310
},
{
"epoch": 0.03184634140273182,
"grad_norm": 0.0771615281701088,
"learning_rate": 0.001268389662027833,
"loss": -0.6673,
"step": 320
},
{
"epoch": 0.032841539571567185,
"grad_norm": 0.09795242547988892,
"learning_rate": 0.001308151093439364,
"loss": -0.6615,
"step": 330
},
{
"epoch": 0.03383673774040256,
"grad_norm": 0.06744644790887833,
"learning_rate": 0.0013479125248508945,
"loss": -0.6949,
"step": 340
},
{
"epoch": 0.03483193590923793,
"grad_norm": 0.12363786995410919,
"learning_rate": 0.0013876739562624254,
"loss": -0.6797,
"step": 350
},
{
"epoch": 0.035827134078073294,
"grad_norm": 0.11588004976511002,
"learning_rate": 0.0014274353876739562,
"loss": -0.6947,
"step": 360
},
{
"epoch": 0.036822332246908666,
"grad_norm": 0.07055709511041641,
"learning_rate": 0.0014671968190854872,
"loss": -0.6801,
"step": 370
},
{
"epoch": 0.03781753041574404,
"grad_norm": 0.11170762777328491,
"learning_rate": 0.0015069582504970177,
"loss": -0.6894,
"step": 380
},
{
"epoch": 0.0388127285845794,
"grad_norm": 0.06324685364961624,
"learning_rate": 0.0015467196819085487,
"loss": -0.7317,
"step": 390
},
{
"epoch": 0.039807926753414775,
"grad_norm": 0.06563621014356613,
"learning_rate": 0.0015864811133200797,
"loss": -0.7251,
"step": 400
},
{
"epoch": 0.04080312492225014,
"grad_norm": 0.06519268453121185,
"learning_rate": 0.0016262425447316103,
"loss": -0.7299,
"step": 410
},
{
"epoch": 0.04179832309108551,
"grad_norm": 0.09023866802453995,
"learning_rate": 0.0016660039761431412,
"loss": -0.7148,
"step": 420
},
{
"epoch": 0.042793521259920884,
"grad_norm": 0.11152295768260956,
"learning_rate": 0.001705765407554672,
"loss": -0.728,
"step": 430
},
{
"epoch": 0.04378871942875625,
"grad_norm": 0.07573758065700531,
"learning_rate": 0.001745526838966203,
"loss": -0.7095,
"step": 440
},
{
"epoch": 0.04478391759759162,
"grad_norm": 0.12177475541830063,
"learning_rate": 0.0017852882703777335,
"loss": -0.7138,
"step": 450
},
{
"epoch": 0.04577911576642699,
"grad_norm": 0.10095696151256561,
"learning_rate": 0.0018250497017892645,
"loss": -0.7141,
"step": 460
},
{
"epoch": 0.04677431393526236,
"grad_norm": 0.06338244676589966,
"learning_rate": 0.0018648111332007953,
"loss": -0.7236,
"step": 470
},
{
"epoch": 0.04776951210409773,
"grad_norm": 0.14773637056350708,
"learning_rate": 0.001904572564612326,
"loss": -0.7243,
"step": 480
},
{
"epoch": 0.048764710272933096,
"grad_norm": 0.08487069606781006,
"learning_rate": 0.0019443339960238568,
"loss": -0.7226,
"step": 490
},
{
"epoch": 0.04975990844176847,
"grad_norm": 0.0774376168847084,
"learning_rate": 0.001984095427435388,
"loss": -0.7551,
"step": 500
},
{
"epoch": 0.05075510661060384,
"grad_norm": 0.07008124142885208,
"learning_rate": 0.0019999980500645096,
"loss": -0.7235,
"step": 510
},
{
"epoch": 0.051750304779439205,
"grad_norm": 0.0964338481426239,
"learning_rate": 0.0019999861338196065,
"loss": -0.7487,
"step": 520
},
{
"epoch": 0.05274550294827458,
"grad_norm": 0.06681355834007263,
"learning_rate": 0.0019999633847562264,
"loss": -0.7639,
"step": 530
},
{
"epoch": 0.05374070111710994,
"grad_norm": 0.1224050298333168,
"learning_rate": 0.0019999298031208096,
"loss": -0.758,
"step": 540
},
{
"epoch": 0.054735899285945314,
"grad_norm": 0.08830223232507706,
"learning_rate": 0.001999885389277145,
"loss": -0.7676,
"step": 550
},
{
"epoch": 0.055731097454780686,
"grad_norm": 0.06694019585847855,
"learning_rate": 0.001999830143706366,
"loss": -0.7881,
"step": 560
},
{
"epoch": 0.05672629562361605,
"grad_norm": 0.10447929799556732,
"learning_rate": 0.0019997640670069467,
"loss": -0.7783,
"step": 570
},
{
"epoch": 0.05772149379245142,
"grad_norm": 0.18173834681510925,
"learning_rate": 0.0019996871598946934,
"loss": -0.7783,
"step": 580
},
{
"epoch": 0.058716691961286795,
"grad_norm": 0.06380622833967209,
"learning_rate": 0.001999599423202739,
"loss": -0.7888,
"step": 590
},
{
"epoch": 0.05971189013012216,
"grad_norm": 0.04775335267186165,
"learning_rate": 0.0019995008578815314,
"loss": -0.799,
"step": 600
},
{
"epoch": 0.06070708829895753,
"grad_norm": 0.08990439772605896,
"learning_rate": 0.0019993914649988264,
"loss": -0.7885,
"step": 610
},
{
"epoch": 0.0617022864677929,
"grad_norm": 0.05452219769358635,
"learning_rate": 0.0019992712457396733,
"loss": -0.794,
"step": 620
},
{
"epoch": 0.06269748463662826,
"grad_norm": 0.047960225492715836,
"learning_rate": 0.0019991402014064037,
"loss": -0.7613,
"step": 630
},
{
"epoch": 0.06369268280546364,
"grad_norm": 0.0659102350473404,
"learning_rate": 0.001998998333418617,
"loss": -0.7943,
"step": 640
},
{
"epoch": 0.064687880974299,
"grad_norm": 0.09903737157583237,
"learning_rate": 0.0019988456433131644,
"loss": -0.7935,
"step": 650
},
{
"epoch": 0.06568307914313437,
"grad_norm": 0.10571643710136414,
"learning_rate": 0.0019986821327441333,
"loss": -0.826,
"step": 660
},
{
"epoch": 0.06667827731196975,
"grad_norm": 0.09088798612356186,
"learning_rate": 0.001998507803482828,
"loss": -0.7864,
"step": 670
},
{
"epoch": 0.06767347548080511,
"grad_norm": 0.06041436642408371,
"learning_rate": 0.0019983226574177525,
"loss": -0.8122,
"step": 680
},
{
"epoch": 0.06866867364964048,
"grad_norm": 0.04567006230354309,
"learning_rate": 0.0019981266965545877,
"loss": -0.8225,
"step": 690
},
{
"epoch": 0.06966387181847586,
"grad_norm": 0.1008988469839096,
"learning_rate": 0.0019979199230161725,
"loss": -0.7983,
"step": 700
},
{
"epoch": 0.07065906998731122,
"grad_norm": 0.0660218819975853,
"learning_rate": 0.0019977023390424778,
"loss": -0.8171,
"step": 710
},
{
"epoch": 0.07165426815614659,
"grad_norm": 0.0423499271273613,
"learning_rate": 0.0019974739469905828,
"loss": -0.8051,
"step": 720
},
{
"epoch": 0.07264946632498197,
"grad_norm": 0.09598391503095627,
"learning_rate": 0.001997234749334653,
"loss": -0.7979,
"step": 730
},
{
"epoch": 0.07364466449381733,
"grad_norm": 0.04957730695605278,
"learning_rate": 0.001996984748665908,
"loss": -0.8326,
"step": 740
},
{
"epoch": 0.0746398626626527,
"grad_norm": 0.09694412350654602,
"learning_rate": 0.0019967239476925986,
"loss": -0.8425,
"step": 750
},
{
"epoch": 0.07563506083148808,
"grad_norm": 0.040948230773210526,
"learning_rate": 0.001996452349239972,
"loss": -0.8062,
"step": 760
},
{
"epoch": 0.07663025900032344,
"grad_norm": 0.1682979166507721,
"learning_rate": 0.001996169956250247,
"loss": -0.7646,
"step": 770
},
{
"epoch": 0.0776254571691588,
"grad_norm": 0.058550719171762466,
"learning_rate": 0.001995876771782577,
"loss": -0.7909,
"step": 780
},
{
"epoch": 0.07862065533799417,
"grad_norm": 0.11844189465045929,
"learning_rate": 0.001995572799013021,
"loss": -0.8219,
"step": 790
},
{
"epoch": 0.07961585350682955,
"grad_norm": 0.0915180891752243,
"learning_rate": 0.001995258041234506,
"loss": -0.8018,
"step": 800
},
{
"epoch": 0.08061105167566492,
"grad_norm": 0.053632501512765884,
"learning_rate": 0.001994932501856793,
"loss": -0.82,
"step": 810
},
{
"epoch": 0.08160624984450028,
"grad_norm": 0.10094787180423737,
"learning_rate": 0.00199459618440644,
"loss": -0.7917,
"step": 820
},
{
"epoch": 0.08260144801333566,
"grad_norm": 0.05512174591422081,
"learning_rate": 0.001994249092526764,
"loss": -0.842,
"step": 830
},
{
"epoch": 0.08359664618217102,
"grad_norm": 0.05738908424973488,
"learning_rate": 0.0019938912299778,
"loss": -0.8688,
"step": 840
},
{
"epoch": 0.08459184435100639,
"grad_norm": 0.11923105269670486,
"learning_rate": 0.001993522600636262,
"loss": -0.8018,
"step": 850
},
{
"epoch": 0.08558704251984177,
"grad_norm": 0.22091729938983917,
"learning_rate": 0.0019931432084954992,
"loss": -0.8532,
"step": 860
},
{
"epoch": 0.08658224068867713,
"grad_norm": 0.08304735273122787,
"learning_rate": 0.0019927530576654565,
"loss": -0.8289,
"step": 870
},
{
"epoch": 0.0875774388575125,
"grad_norm": 0.1574501097202301,
"learning_rate": 0.001992352152372624,
"loss": -0.8044,
"step": 880
},
{
"epoch": 0.08857263702634788,
"grad_norm": 0.0682770311832428,
"learning_rate": 0.001991940496959997,
"loss": -0.7923,
"step": 890
},
{
"epoch": 0.08956783519518324,
"grad_norm": 0.1527094841003418,
"learning_rate": 0.001991518095887025,
"loss": -0.83,
"step": 900
},
{
"epoch": 0.09056303336401861,
"grad_norm": 0.053579773753881454,
"learning_rate": 0.001991084953729567,
"loss": -0.809,
"step": 910
},
{
"epoch": 0.09155823153285397,
"grad_norm": 0.06435493379831314,
"learning_rate": 0.001990641075179837,
"loss": -0.7872,
"step": 920
},
{
"epoch": 0.09255342970168935,
"grad_norm": 0.045854952186346054,
"learning_rate": 0.0019901864650463583,
"loss": -0.8038,
"step": 930
},
{
"epoch": 0.09354862787052472,
"grad_norm": 0.053723808377981186,
"learning_rate": 0.0019897211282539078,
"loss": -0.8497,
"step": 940
},
{
"epoch": 0.09454382603936008,
"grad_norm": 0.2113220989704132,
"learning_rate": 0.0019892450698434645,
"loss": -0.8268,
"step": 950
},
{
"epoch": 0.09553902420819546,
"grad_norm": 0.06267759203910828,
"learning_rate": 0.001988758294972154,
"loss": -0.8163,
"step": 960
},
{
"epoch": 0.09653422237703083,
"grad_norm": 0.06445565819740295,
"learning_rate": 0.0019882608089131937,
"loss": -0.8093,
"step": 970
},
{
"epoch": 0.09752942054586619,
"grad_norm": 0.16995398700237274,
"learning_rate": 0.0019877526170558346,
"loss": -0.8083,
"step": 980
},
{
"epoch": 0.09852461871470157,
"grad_norm": 0.03674469143152237,
"learning_rate": 0.0019872337249053026,
"loss": -0.8669,
"step": 990
},
{
"epoch": 0.09951981688353694,
"grad_norm": 0.12962763011455536,
"learning_rate": 0.0019867041380827407,
"loss": -0.8912,
"step": 1000
},
{
"epoch": 0.1005150150523723,
"grad_norm": 0.043658774346113205,
"learning_rate": 0.0019861638623251457,
"loss": -0.8531,
"step": 1010
},
{
"epoch": 0.10151021322120768,
"grad_norm": 0.0549648217856884,
"learning_rate": 0.0019856129034853086,
"loss": -0.8226,
"step": 1020
},
{
"epoch": 0.10250541139004304,
"grad_norm": 0.05147390440106392,
"learning_rate": 0.001985051267531749,
"loss": -0.8368,
"step": 1030
},
{
"epoch": 0.10350060955887841,
"grad_norm": 0.059379711747169495,
"learning_rate": 0.0019844789605486515,
"loss": -0.8074,
"step": 1040
},
{
"epoch": 0.10449580772771377,
"grad_norm": 0.10174992680549622,
"learning_rate": 0.0019838959887358,
"loss": -0.8322,
"step": 1050
},
{
"epoch": 0.10549100589654915,
"grad_norm": 0.19209684431552887,
"learning_rate": 0.0019833023584085096,
"loss": -0.8304,
"step": 1060
},
{
"epoch": 0.10648620406538452,
"grad_norm": 0.0454883873462677,
"learning_rate": 0.001982698075997559,
"loss": -0.8354,
"step": 1070
},
{
"epoch": 0.10748140223421988,
"grad_norm": 0.12441568821668625,
"learning_rate": 0.001982083148049121,
"loss": -0.8562,
"step": 1080
},
{
"epoch": 0.10847660040305526,
"grad_norm": 0.06760125607252121,
"learning_rate": 0.0019814575812246906,
"loss": -0.8471,
"step": 1090
},
{
"epoch": 0.10947179857189063,
"grad_norm": 0.04590695723891258,
"learning_rate": 0.0019808213823010136,
"loss": -0.845,
"step": 1100
},
{
"epoch": 0.11046699674072599,
"grad_norm": 0.14543893933296204,
"learning_rate": 0.001980174558170013,
"loss": -0.8236,
"step": 1110
},
{
"epoch": 0.11146219490956137,
"grad_norm": 0.044470854103565216,
"learning_rate": 0.001979517115838715,
"loss": -0.8177,
"step": 1120
},
{
"epoch": 0.11245739307839674,
"grad_norm": 0.05811746418476105,
"learning_rate": 0.0019788490624291707,
"loss": -0.8414,
"step": 1130
},
{
"epoch": 0.1134525912472321,
"grad_norm": 0.05510469153523445,
"learning_rate": 0.0019781704051783826,
"loss": -0.8606,
"step": 1140
},
{
"epoch": 0.11444778941606748,
"grad_norm": 0.04229406639933586,
"learning_rate": 0.0019774811514382223,
"loss": -0.8722,
"step": 1150
},
{
"epoch": 0.11544298758490285,
"grad_norm": 0.0429520383477211,
"learning_rate": 0.0019767813086753556,
"loss": -0.8211,
"step": 1160
},
{
"epoch": 0.11643818575373821,
"grad_norm": 0.061233002692461014,
"learning_rate": 0.0019760708844711564,
"loss": -0.8616,
"step": 1170
},
{
"epoch": 0.11743338392257359,
"grad_norm": 0.04444749280810356,
"learning_rate": 0.0019753498865216278,
"loss": -0.8412,
"step": 1180
},
{
"epoch": 0.11842858209140895,
"grad_norm": 0.0725698322057724,
"learning_rate": 0.001974618322637318,
"loss": -0.867,
"step": 1190
},
{
"epoch": 0.11942378026024432,
"grad_norm": 0.05399654805660248,
"learning_rate": 0.0019738762007432357,
"loss": -0.8645,
"step": 1200
},
{
"epoch": 0.12041897842907968,
"grad_norm": 0.07678170502185822,
"learning_rate": 0.0019731235288787644,
"loss": -0.8491,
"step": 1210
},
{
"epoch": 0.12141417659791506,
"grad_norm": 0.07065637409687042,
"learning_rate": 0.001972360315197575,
"loss": -0.802,
"step": 1220
},
{
"epoch": 0.12240937476675043,
"grad_norm": 0.07059847563505173,
"learning_rate": 0.0019715865679675363,
"loss": -0.8345,
"step": 1230
},
{
"epoch": 0.1234045729355858,
"grad_norm": 0.06182623654603958,
"learning_rate": 0.0019708022955706294,
"loss": -0.8061,
"step": 1240
},
{
"epoch": 0.12439977110442117,
"grad_norm": 0.0434928797185421,
"learning_rate": 0.001970007506502851,
"loss": -0.847,
"step": 1250
},
{
"epoch": 0.12539496927325652,
"grad_norm": 0.18933677673339844,
"learning_rate": 0.0019692022093741276,
"loss": -0.8603,
"step": 1260
},
{
"epoch": 0.12639016744209192,
"grad_norm": 0.08330973982810974,
"learning_rate": 0.001968386412908217,
"loss": -0.8027,
"step": 1270
},
{
"epoch": 0.12738536561092728,
"grad_norm": 0.04857812821865082,
"learning_rate": 0.001967560125942617,
"loss": -0.8462,
"step": 1280
},
{
"epoch": 0.12838056377976265,
"grad_norm": 0.05622467026114464,
"learning_rate": 0.001966723357428468,
"loss": -0.8372,
"step": 1290
},
{
"epoch": 0.129375761948598,
"grad_norm": 0.057230498641729355,
"learning_rate": 0.001965876116430458,
"loss": -0.8392,
"step": 1300
},
{
"epoch": 0.13037096011743338,
"grad_norm": 0.0401107594370842,
"learning_rate": 0.0019650184121267214,
"loss": -0.8606,
"step": 1310
},
{
"epoch": 0.13136615828626874,
"grad_norm": 0.0766066461801529,
"learning_rate": 0.0019641502538087423,
"loss": -0.8347,
"step": 1320
},
{
"epoch": 0.13236135645510413,
"grad_norm": 0.279532790184021,
"learning_rate": 0.001963271650881253,
"loss": -0.8474,
"step": 1330
},
{
"epoch": 0.1333565546239395,
"grad_norm": 0.06721749901771545,
"learning_rate": 0.0019623826128621308,
"loss": -0.8662,
"step": 1340
},
{
"epoch": 0.13435175279277486,
"grad_norm": 0.06590744107961655,
"learning_rate": 0.0019614831493822973,
"loss": -0.8409,
"step": 1350
},
{
"epoch": 0.13534695096161023,
"grad_norm": 0.047027770429849625,
"learning_rate": 0.0019605732701856115,
"loss": -0.8455,
"step": 1360
},
{
"epoch": 0.1363421491304456,
"grad_norm": 0.038147736340761185,
"learning_rate": 0.001959652985128767,
"loss": -0.8335,
"step": 1370
},
{
"epoch": 0.13733734729928096,
"grad_norm": 0.03474709019064903,
"learning_rate": 0.001958722304181183,
"loss": -0.8443,
"step": 1380
},
{
"epoch": 0.13833254546811635,
"grad_norm": 0.04871761053800583,
"learning_rate": 0.001957781237424896,
"loss": -0.8282,
"step": 1390
},
{
"epoch": 0.13932774363695172,
"grad_norm": 0.039951737970113754,
"learning_rate": 0.0019568297950544543,
"loss": -0.8458,
"step": 1400
},
{
"epoch": 0.14032294180578708,
"grad_norm": 0.07247880846261978,
"learning_rate": 0.0019558679873768023,
"loss": -0.8035,
"step": 1410
},
{
"epoch": 0.14131813997462245,
"grad_norm": 0.12037922441959381,
"learning_rate": 0.0019548958248111724,
"loss": -0.8209,
"step": 1420
},
{
"epoch": 0.1423133381434578,
"grad_norm": 0.1445380449295044,
"learning_rate": 0.0019539133178889715,
"loss": -0.8602,
"step": 1430
},
{
"epoch": 0.14330853631229318,
"grad_norm": 0.04830145835876465,
"learning_rate": 0.0019529204772536664,
"loss": -0.8569,
"step": 1440
},
{
"epoch": 0.14430373448112854,
"grad_norm": 0.0444733202457428,
"learning_rate": 0.0019519173136606685,
"loss": -0.8213,
"step": 1450
},
{
"epoch": 0.14529893264996394,
"grad_norm": 0.04037511348724365,
"learning_rate": 0.0019509038379772177,
"loss": -0.8319,
"step": 1460
},
{
"epoch": 0.1462941308187993,
"grad_norm": 0.0549684576690197,
"learning_rate": 0.0019498800611822645,
"loss": -0.8654,
"step": 1470
},
{
"epoch": 0.14728932898763467,
"grad_norm": 0.03622014820575714,
"learning_rate": 0.0019488459943663502,
"loss": -0.8603,
"step": 1480
},
{
"epoch": 0.14828452715647003,
"grad_norm": 0.05168438330292702,
"learning_rate": 0.0019478016487314888,
"loss": -0.8359,
"step": 1490
},
{
"epoch": 0.1492797253253054,
"grad_norm": 0.05102885514497757,
"learning_rate": 0.0019467470355910438,
"loss": -0.8362,
"step": 1500
},
{
"epoch": 0.15027492349414076,
"grad_norm": 0.08411931991577148,
"learning_rate": 0.0019456821663696063,
"loss": -0.8557,
"step": 1510
},
{
"epoch": 0.15127012166297615,
"grad_norm": 0.02999095432460308,
"learning_rate": 0.001944607052602871,
"loss": -0.8318,
"step": 1520
},
{
"epoch": 0.15226531983181152,
"grad_norm": 0.06085268408060074,
"learning_rate": 0.0019435217059375121,
"loss": -0.866,
"step": 1530
},
{
"epoch": 0.15326051800064688,
"grad_norm": 0.03816385567188263,
"learning_rate": 0.0019424261381310558,
"loss": -0.8348,
"step": 1540
},
{
"epoch": 0.15425571616948225,
"grad_norm": 0.03999185562133789,
"learning_rate": 0.0019413203610517537,
"loss": -0.8375,
"step": 1550
},
{
"epoch": 0.1552509143383176,
"grad_norm": 0.06329697370529175,
"learning_rate": 0.0019402043866784545,
"loss": -0.8598,
"step": 1560
},
{
"epoch": 0.15624611250715298,
"grad_norm": 0.29359814524650574,
"learning_rate": 0.0019390782271004735,
"loss": -0.8305,
"step": 1570
},
{
"epoch": 0.15724131067598834,
"grad_norm": 0.04354254528880119,
"learning_rate": 0.0019379418945174624,
"loss": -0.8428,
"step": 1580
},
{
"epoch": 0.15823650884482374,
"grad_norm": 0.09485089778900146,
"learning_rate": 0.001936795401239276,
"loss": -0.8524,
"step": 1590
},
{
"epoch": 0.1592317070136591,
"grad_norm": 0.052271757274866104,
"learning_rate": 0.0019356387596858404,
"loss": -0.8889,
"step": 1600
},
{
"epoch": 0.16022690518249447,
"grad_norm": 0.07193049043416977,
"learning_rate": 0.0019344719823870175,
"loss": -0.8396,
"step": 1610
},
{
"epoch": 0.16122210335132983,
"grad_norm": 0.04632144048810005,
"learning_rate": 0.001933295081982469,
"loss": -0.8434,
"step": 1620
},
{
"epoch": 0.1622173015201652,
"grad_norm": 0.05413699895143509,
"learning_rate": 0.0019321080712215205,
"loss": -0.8523,
"step": 1630
},
{
"epoch": 0.16321249968900056,
"grad_norm": 0.045868393033742905,
"learning_rate": 0.0019309109629630217,
"loss": -0.8795,
"step": 1640
},
{
"epoch": 0.16420769785783595,
"grad_norm": 0.03584539145231247,
"learning_rate": 0.0019297037701752095,
"loss": -0.8834,
"step": 1650
},
{
"epoch": 0.16520289602667132,
"grad_norm": 0.05212506279349327,
"learning_rate": 0.0019284865059355654,
"loss": -0.8491,
"step": 1660
},
{
"epoch": 0.16619809419550668,
"grad_norm": 0.039999254047870636,
"learning_rate": 0.0019272591834306745,
"loss": -0.832,
"step": 1670
},
{
"epoch": 0.16719329236434205,
"grad_norm": 0.04333388805389404,
"learning_rate": 0.0019260218159560837,
"loss": -0.8568,
"step": 1680
},
{
"epoch": 0.16818849053317741,
"grad_norm": 0.05948438495397568,
"learning_rate": 0.0019247744169161552,
"loss": -0.8372,
"step": 1690
},
{
"epoch": 0.16918368870201278,
"grad_norm": 0.05164005607366562,
"learning_rate": 0.0019235169998239247,
"loss": -0.8453,
"step": 1700
},
{
"epoch": 0.17017888687084815,
"grad_norm": 0.07558200508356094,
"learning_rate": 0.0019222495783009516,
"loss": -0.837,
"step": 1710
},
{
"epoch": 0.17117408503968354,
"grad_norm": 0.039037518203258514,
"learning_rate": 0.0019209721660771737,
"loss": -0.8494,
"step": 1720
},
{
"epoch": 0.1721692832085189,
"grad_norm": 0.18770313262939453,
"learning_rate": 0.0019196847769907578,
"loss": -0.8765,
"step": 1730
},
{
"epoch": 0.17316448137735427,
"grad_norm": 0.08727400749921799,
"learning_rate": 0.0019183874249879495,
"loss": -0.8125,
"step": 1740
},
{
"epoch": 0.17415967954618963,
"grad_norm": 0.07949760556221008,
"learning_rate": 0.001917080124122922,
"loss": -0.8654,
"step": 1750
},
{
"epoch": 0.175154877715025,
"grad_norm": 0.06581337749958038,
"learning_rate": 0.0019157628885576252,
"loss": -0.8996,
"step": 1760
},
{
"epoch": 0.17615007588386036,
"grad_norm": 0.062335576862096786,
"learning_rate": 0.0019144357325616306,
"loss": -0.8842,
"step": 1770
},
{
"epoch": 0.17714527405269576,
"grad_norm": 0.040845975279808044,
"learning_rate": 0.0019130986705119773,
"loss": -0.8228,
"step": 1780
},
{
"epoch": 0.17814047222153112,
"grad_norm": 0.06012045592069626,
"learning_rate": 0.0019117517168930167,
"loss": -0.8529,
"step": 1790
},
{
"epoch": 0.17913567039036649,
"grad_norm": 0.030869802460074425,
"learning_rate": 0.0019103948862962555,
"loss": -0.8951,
"step": 1800
},
{
"epoch": 0.18013086855920185,
"grad_norm": 0.05283189192414284,
"learning_rate": 0.0019090281934201964,
"loss": -0.9048,
"step": 1810
},
{
"epoch": 0.18112606672803722,
"grad_norm": 0.020264575257897377,
"learning_rate": 0.0019076516530701815,
"loss": -0.8583,
"step": 1820
},
{
"epoch": 0.18212126489687258,
"grad_norm": 0.06241228058934212,
"learning_rate": 0.0019062652801582285,
"loss": -0.8432,
"step": 1830
},
{
"epoch": 0.18311646306570795,
"grad_norm": 0.048376064747571945,
"learning_rate": 0.001904869089702872,
"loss": -0.84,
"step": 1840
},
{
"epoch": 0.18411166123454334,
"grad_norm": 0.04957319051027298,
"learning_rate": 0.0019034630968289997,
"loss": -0.8312,
"step": 1850
},
{
"epoch": 0.1851068594033787,
"grad_norm": 0.11068796366453171,
"learning_rate": 0.001902047316767688,
"loss": -0.8366,
"step": 1860
},
{
"epoch": 0.18610205757221407,
"grad_norm": 0.03778199851512909,
"learning_rate": 0.0019006217648560382,
"loss": -0.8542,
"step": 1870
},
{
"epoch": 0.18709725574104943,
"grad_norm": 0.03918764367699623,
"learning_rate": 0.0018991864565370096,
"loss": -0.8395,
"step": 1880
},
{
"epoch": 0.1880924539098848,
"grad_norm": 0.0716870054602623,
"learning_rate": 0.0018977414073592521,
"loss": -0.7759,
"step": 1890
},
{
"epoch": 0.18908765207872016,
"grad_norm": 0.08251062780618668,
"learning_rate": 0.001896286632976938,
"loss": -0.8232,
"step": 1900
},
{
"epoch": 0.19008285024755556,
"grad_norm": 0.04269665852189064,
"learning_rate": 0.001894822149149593,
"loss": -0.8706,
"step": 1910
},
{
"epoch": 0.19107804841639092,
"grad_norm": 0.07331789284944534,
"learning_rate": 0.0018933479717419246,
"loss": -0.8209,
"step": 1920
},
{
"epoch": 0.1920732465852263,
"grad_norm": 0.03469880297780037,
"learning_rate": 0.0018918641167236503,
"loss": -0.8526,
"step": 1930
},
{
"epoch": 0.19306844475406165,
"grad_norm": 0.0502643845975399,
"learning_rate": 0.0018903706001693252,
"loss": -0.8058,
"step": 1940
},
{
"epoch": 0.19406364292289702,
"grad_norm": 0.04479772225022316,
"learning_rate": 0.0018888674382581672,
"loss": -0.8794,
"step": 1950
},
{
"epoch": 0.19505884109173238,
"grad_norm": 0.12034481018781662,
"learning_rate": 0.0018873546472738822,
"loss": -0.8684,
"step": 1960
},
{
"epoch": 0.19605403926056775,
"grad_norm": 0.10239536315202713,
"learning_rate": 0.0018858322436044875,
"loss": -0.8758,
"step": 1970
},
{
"epoch": 0.19704923742940314,
"grad_norm": 0.0984937995672226,
"learning_rate": 0.0018843002437421345,
"loss": -0.8669,
"step": 1980
},
{
"epoch": 0.1980444355982385,
"grad_norm": 0.04999193921685219,
"learning_rate": 0.00188275866428293,
"loss": -0.8509,
"step": 1990
},
{
"epoch": 0.19903963376707387,
"grad_norm": 0.02357977069914341,
"learning_rate": 0.001881207521926756,
"loss": -0.8546,
"step": 2000
},
{
"epoch": 0.20003483193590924,
"grad_norm": 0.05344194918870926,
"learning_rate": 0.0018796468334770884,
"loss": -0.8483,
"step": 2010
},
{
"epoch": 0.2010300301047446,
"grad_norm": 0.026923952624201775,
"learning_rate": 0.0018780766158408167,
"loss": -0.8808,
"step": 2020
},
{
"epoch": 0.20202522827357997,
"grad_norm": 0.06462886184453964,
"learning_rate": 0.0018764968860280598,
"loss": -0.8494,
"step": 2030
},
{
"epoch": 0.20302042644241536,
"grad_norm": 0.11881168186664581,
"learning_rate": 0.0018749076611519807,
"loss": -0.8483,
"step": 2040
},
{
"epoch": 0.20401562461125072,
"grad_norm": 0.0413346029818058,
"learning_rate": 0.001873308958428603,
"loss": -0.8356,
"step": 2050
},
{
"epoch": 0.2050108227800861,
"grad_norm": 0.08872512727975845,
"learning_rate": 0.0018717007951766233,
"loss": -0.8571,
"step": 2060
},
{
"epoch": 0.20600602094892145,
"grad_norm": 0.04187199845910072,
"learning_rate": 0.0018700831888172236,
"loss": -0.9066,
"step": 2070
},
{
"epoch": 0.20700121911775682,
"grad_norm": 0.08172470331192017,
"learning_rate": 0.001868456156873883,
"loss": -0.854,
"step": 2080
},
{
"epoch": 0.20799641728659218,
"grad_norm": 0.039352960884571075,
"learning_rate": 0.0018668197169721874,
"loss": -0.8822,
"step": 2090
},
{
"epoch": 0.20899161545542755,
"grad_norm": 0.0712110847234726,
"learning_rate": 0.0018651738868396394,
"loss": -0.8432,
"step": 2100
},
{
"epoch": 0.20998681362426294,
"grad_norm": 0.035332091152668,
"learning_rate": 0.0018635186843054651,
"loss": -0.8688,
"step": 2110
},
{
"epoch": 0.2109820117930983,
"grad_norm": 0.042279988527297974,
"learning_rate": 0.001861854127300422,
"loss": -0.8718,
"step": 2120
},
{
"epoch": 0.21197720996193367,
"grad_norm": 0.04759962484240532,
"learning_rate": 0.0018601802338566037,
"loss": -0.8676,
"step": 2130
},
{
"epoch": 0.21297240813076904,
"grad_norm": 0.06393526494503021,
"learning_rate": 0.0018584970221072453,
"loss": -0.8408,
"step": 2140
},
{
"epoch": 0.2139676062996044,
"grad_norm": 0.062339067459106445,
"learning_rate": 0.001856804510286527,
"loss": -0.8687,
"step": 2150
},
{
"epoch": 0.21496280446843977,
"grad_norm": 0.030178282409906387,
"learning_rate": 0.0018551027167293768,
"loss": -0.8563,
"step": 2160
},
{
"epoch": 0.21595800263727516,
"grad_norm": 0.0338846780359745,
"learning_rate": 0.0018533916598712707,
"loss": -0.8596,
"step": 2170
},
{
"epoch": 0.21695320080611052,
"grad_norm": 0.03212130814790726,
"learning_rate": 0.0018516713582480341,
"loss": -0.8531,
"step": 2180
},
{
"epoch": 0.2179483989749459,
"grad_norm": 0.08240335434675217,
"learning_rate": 0.001849941830495641,
"loss": -0.8391,
"step": 2190
},
{
"epoch": 0.21894359714378125,
"grad_norm": 0.0584181547164917,
"learning_rate": 0.001848203095350011,
"loss": -0.8469,
"step": 2200
},
{
"epoch": 0.21993879531261662,
"grad_norm": 0.05169396847486496,
"learning_rate": 0.0018464551716468071,
"loss": -0.8808,
"step": 2210
},
{
"epoch": 0.22093399348145198,
"grad_norm": 0.10674053430557251,
"learning_rate": 0.0018446980783212328,
"loss": -0.8367,
"step": 2220
},
{
"epoch": 0.22192919165028735,
"grad_norm": 0.03972488269209862,
"learning_rate": 0.0018429318344078246,
"loss": -0.8194,
"step": 2230
},
{
"epoch": 0.22292438981912274,
"grad_norm": 0.0668836236000061,
"learning_rate": 0.001841156459040248,
"loss": -0.831,
"step": 2240
},
{
"epoch": 0.2239195879879581,
"grad_norm": 0.06554841250181198,
"learning_rate": 0.001839371971451088,
"loss": -0.8217,
"step": 2250
},
{
"epoch": 0.22491478615679347,
"grad_norm": 0.13248029351234436,
"learning_rate": 0.0018375783909716432,
"loss": -0.8556,
"step": 2260
},
{
"epoch": 0.22590998432562884,
"grad_norm": 0.06625150144100189,
"learning_rate": 0.0018357757370317152,
"loss": -0.9072,
"step": 2270
},
{
"epoch": 0.2269051824944642,
"grad_norm": 0.07411986589431763,
"learning_rate": 0.0018339640291593971,
"loss": -0.8659,
"step": 2280
},
{
"epoch": 0.22790038066329957,
"grad_norm": 0.03291834518313408,
"learning_rate": 0.0018321432869808638,
"loss": -0.8748,
"step": 2290
},
{
"epoch": 0.22889557883213496,
"grad_norm": 0.15677815675735474,
"learning_rate": 0.0018303135302201578,
"loss": -0.8674,
"step": 2300
},
{
"epoch": 0.22989077700097033,
"grad_norm": 0.053248967975378036,
"learning_rate": 0.0018284747786989778,
"loss": -0.8293,
"step": 2310
},
{
"epoch": 0.2308859751698057,
"grad_norm": 0.021416958421468735,
"learning_rate": 0.0018266270523364608,
"loss": -0.8894,
"step": 2320
},
{
"epoch": 0.23188117333864106,
"grad_norm": 0.08533802628517151,
"learning_rate": 0.0018247703711489684,
"loss": -0.8688,
"step": 2330
},
{
"epoch": 0.23287637150747642,
"grad_norm": 0.0453949049115181,
"learning_rate": 0.0018229047552498706,
"loss": -0.848,
"step": 2340
},
{
"epoch": 0.23387156967631179,
"grad_norm": 0.20563171803951263,
"learning_rate": 0.0018210302248493253,
"loss": -0.7981,
"step": 2350
},
{
"epoch": 0.23486676784514718,
"grad_norm": 0.061649810522794724,
"learning_rate": 0.0018191468002540616,
"loss": -0.9015,
"step": 2360
},
{
"epoch": 0.23586196601398254,
"grad_norm": 0.050693582743406296,
"learning_rate": 0.0018172545018671595,
"loss": -0.8103,
"step": 2370
},
{
"epoch": 0.2368571641828179,
"grad_norm": 0.11253263801336288,
"learning_rate": 0.0018153533501878284,
"loss": -0.8513,
"step": 2380
},
{
"epoch": 0.23785236235165327,
"grad_norm": 0.05799407884478569,
"learning_rate": 0.0018134433658111844,
"loss": -0.8725,
"step": 2390
},
{
"epoch": 0.23884756052048864,
"grad_norm": 0.12612490355968475,
"learning_rate": 0.0018115245694280287,
"loss": -0.8313,
"step": 2400
},
{
"epoch": 0.239842758689324,
"grad_norm": 0.052735164761543274,
"learning_rate": 0.0018095969818246224,
"loss": -0.8749,
"step": 2410
},
{
"epoch": 0.24083795685815937,
"grad_norm": 0.062358558177948,
"learning_rate": 0.0018076606238824615,
"loss": -0.8253,
"step": 2420
},
{
"epoch": 0.24183315502699476,
"grad_norm": 0.05003003403544426,
"learning_rate": 0.0018057155165780512,
"loss": -0.869,
"step": 2430
},
{
"epoch": 0.24282835319583013,
"grad_norm": 0.038384635001420975,
"learning_rate": 0.001803761680982678,
"loss": -0.8525,
"step": 2440
},
{
"epoch": 0.2438235513646655,
"grad_norm": 0.05786055326461792,
"learning_rate": 0.0018017991382621814,
"loss": -0.8248,
"step": 2450
},
{
"epoch": 0.24481874953350086,
"grad_norm": 0.05332957208156586,
"learning_rate": 0.0017998279096767256,
"loss": -0.8151,
"step": 2460
},
{
"epoch": 0.24581394770233622,
"grad_norm": 0.05446227639913559,
"learning_rate": 0.0017978480165805682,
"loss": -0.8425,
"step": 2470
},
{
"epoch": 0.2468091458711716,
"grad_norm": 0.061572350561618805,
"learning_rate": 0.001795859480421829,
"loss": -0.8527,
"step": 2480
},
{
"epoch": 0.24780434404000698,
"grad_norm": 0.07180485129356384,
"learning_rate": 0.0017938623227422576,
"loss": -0.8539,
"step": 2490
},
{
"epoch": 0.24879954220884234,
"grad_norm": 0.040774550288915634,
"learning_rate": 0.0017918565651770012,
"loss": -0.845,
"step": 2500
},
{
"epoch": 0.2497947403776777,
"grad_norm": 0.08509187400341034,
"learning_rate": 0.001789842229454368,
"loss": -0.8622,
"step": 2510
},
{
"epoch": 0.25078993854651305,
"grad_norm": 0.03797999024391174,
"learning_rate": 0.0017878193373955947,
"loss": -0.8216,
"step": 2520
},
{
"epoch": 0.25178513671534847,
"grad_norm": 0.06903784722089767,
"learning_rate": 0.0017857879109146068,
"loss": -0.8296,
"step": 2530
},
{
"epoch": 0.25278033488418383,
"grad_norm": 0.05143648013472557,
"learning_rate": 0.0017837479720177844,
"loss": -0.8783,
"step": 2540
},
{
"epoch": 0.2537755330530192,
"grad_norm": 0.06422683596611023,
"learning_rate": 0.0017816995428037213,
"loss": -0.8332,
"step": 2550
},
{
"epoch": 0.25477073122185456,
"grad_norm": 0.12899571657180786,
"learning_rate": 0.001779642645462987,
"loss": -0.8527,
"step": 2560
},
{
"epoch": 0.25576592939068993,
"grad_norm": 0.06423981487751007,
"learning_rate": 0.0017775773022778863,
"loss": -0.8072,
"step": 2570
},
{
"epoch": 0.2567611275595253,
"grad_norm": 0.06049381569027901,
"learning_rate": 0.0017755035356222173,
"loss": -0.8617,
"step": 2580
},
{
"epoch": 0.25775632572836066,
"grad_norm": 0.05178122967481613,
"learning_rate": 0.0017734213679610287,
"loss": -0.8668,
"step": 2590
},
{
"epoch": 0.258751523897196,
"grad_norm": 0.07370316982269287,
"learning_rate": 0.001771330821850378,
"loss": -0.8563,
"step": 2600
},
{
"epoch": 0.2597467220660314,
"grad_norm": 0.055624667555093765,
"learning_rate": 0.0017692319199370857,
"loss": -0.8197,
"step": 2610
},
{
"epoch": 0.26074192023486675,
"grad_norm": 0.0338563434779644,
"learning_rate": 0.0017671246849584902,
"loss": -0.8392,
"step": 2620
},
{
"epoch": 0.2617371184037021,
"grad_norm": 0.056284110993146896,
"learning_rate": 0.0017650091397422027,
"loss": -0.8636,
"step": 2630
},
{
"epoch": 0.2627323165725375,
"grad_norm": 0.08218339830636978,
"learning_rate": 0.001762885307205858,
"loss": -0.8284,
"step": 2640
},
{
"epoch": 0.26372751474137285,
"grad_norm": 0.04653181508183479,
"learning_rate": 0.0017607532103568672,
"loss": -0.8151,
"step": 2650
},
{
"epoch": 0.26472271291020827,
"grad_norm": 0.05832862854003906,
"learning_rate": 0.001758612872292169,
"loss": -0.893,
"step": 2660
},
{
"epoch": 0.26571791107904363,
"grad_norm": 0.12119904160499573,
"learning_rate": 0.0017564643161979783,
"loss": -0.8758,
"step": 2670
},
{
"epoch": 0.266713109247879,
"grad_norm": 0.09644432365894318,
"learning_rate": 0.0017543075653495364,
"loss": -0.8273,
"step": 2680
},
{
"epoch": 0.26770830741671436,
"grad_norm": 0.05218861624598503,
"learning_rate": 0.0017521426431108573,
"loss": -0.8439,
"step": 2690
},
{
"epoch": 0.26870350558554973,
"grad_norm": 0.043938472867012024,
"learning_rate": 0.0017499695729344764,
"loss": -0.8499,
"step": 2700
},
{
"epoch": 0.2696987037543851,
"grad_norm": 0.05423975735902786,
"learning_rate": 0.0017477883783611943,
"loss": -0.8484,
"step": 2710
},
{
"epoch": 0.27069390192322046,
"grad_norm": 0.05785686895251274,
"learning_rate": 0.001745599083019824,
"loss": -0.9024,
"step": 2720
},
{
"epoch": 0.2716891000920558,
"grad_norm": 0.06736781448125839,
"learning_rate": 0.0017434017106269326,
"loss": -0.8415,
"step": 2730
},
{
"epoch": 0.2726842982608912,
"grad_norm": 0.04341396689414978,
"learning_rate": 0.0017411962849865873,
"loss": -0.849,
"step": 2740
},
{
"epoch": 0.27367949642972655,
"grad_norm": 0.060457851737737656,
"learning_rate": 0.0017389828299900947,
"loss": -0.8814,
"step": 2750
},
{
"epoch": 0.2746746945985619,
"grad_norm": 0.06649858504533768,
"learning_rate": 0.0017367613696157435,
"loss": -0.8329,
"step": 2760
},
{
"epoch": 0.2756698927673973,
"grad_norm": 0.20052272081375122,
"learning_rate": 0.0017345319279285438,
"loss": -0.8024,
"step": 2770
},
{
"epoch": 0.2766650909362327,
"grad_norm": 0.07833337783813477,
"learning_rate": 0.0017322945290799687,
"loss": -0.8234,
"step": 2780
},
{
"epoch": 0.27766028910506807,
"grad_norm": 0.07275154441595078,
"learning_rate": 0.001730049197307689,
"loss": -0.8805,
"step": 2790
},
{
"epoch": 0.27865548727390343,
"grad_norm": 0.042492885142564774,
"learning_rate": 0.0017277959569353138,
"loss": -0.8511,
"step": 2800
},
{
"epoch": 0.2796506854427388,
"grad_norm": 0.04260088875889778,
"learning_rate": 0.0017255348323721256,
"loss": -0.8304,
"step": 2810
},
{
"epoch": 0.28064588361157417,
"grad_norm": 0.03392525017261505,
"learning_rate": 0.0017232658481128157,
"loss": -0.8563,
"step": 2820
},
{
"epoch": 0.28164108178040953,
"grad_norm": 0.05141417682170868,
"learning_rate": 0.0017209890287372199,
"loss": -0.844,
"step": 2830
},
{
"epoch": 0.2826362799492449,
"grad_norm": 0.11153557151556015,
"learning_rate": 0.0017187043989100508,
"loss": -0.8143,
"step": 2840
},
{
"epoch": 0.28363147811808026,
"grad_norm": 0.07110270857810974,
"learning_rate": 0.001716411983380632,
"loss": -0.8752,
"step": 2850
},
{
"epoch": 0.2846266762869156,
"grad_norm": 0.023993652313947678,
"learning_rate": 0.001714111806982629,
"loss": -0.8588,
"step": 2860
},
{
"epoch": 0.285621874455751,
"grad_norm": 0.052228838205337524,
"learning_rate": 0.0017118038946337811,
"loss": -0.8507,
"step": 2870
},
{
"epoch": 0.28661707262458636,
"grad_norm": 0.022853029891848564,
"learning_rate": 0.00170948827133563,
"loss": -0.8648,
"step": 2880
},
{
"epoch": 0.2876122707934217,
"grad_norm": 0.044159069657325745,
"learning_rate": 0.0017071649621732507,
"loss": -0.8385,
"step": 2890
},
{
"epoch": 0.2886074689622571,
"grad_norm": 0.05615956708788872,
"learning_rate": 0.0017048339923149789,
"loss": -0.8399,
"step": 2900
},
{
"epoch": 0.2896026671310925,
"grad_norm": 0.1265154331922531,
"learning_rate": 0.0017024953870121374,
"loss": -0.819,
"step": 2910
},
{
"epoch": 0.29059786529992787,
"grad_norm": 0.03834633529186249,
"learning_rate": 0.0017001491715987643,
"loss": -0.859,
"step": 2920
},
{
"epoch": 0.29159306346876324,
"grad_norm": 0.07074081897735596,
"learning_rate": 0.0016977953714913383,
"loss": -0.8078,
"step": 2930
},
{
"epoch": 0.2925882616375986,
"grad_norm": 0.06447435915470123,
"learning_rate": 0.001695434012188502,
"loss": -0.8533,
"step": 2940
},
{
"epoch": 0.29358345980643397,
"grad_norm": 0.040345288813114166,
"learning_rate": 0.0016930651192707865,
"loss": -0.8452,
"step": 2950
},
{
"epoch": 0.29457865797526933,
"grad_norm": 0.04355823993682861,
"learning_rate": 0.0016906887184003345,
"loss": -0.8232,
"step": 2960
},
{
"epoch": 0.2955738561441047,
"grad_norm": 0.1037873849272728,
"learning_rate": 0.0016883048353206228,
"loss": -0.8356,
"step": 2970
},
{
"epoch": 0.29656905431294006,
"grad_norm": 0.1285988837480545,
"learning_rate": 0.001685913495856182,
"loss": -0.8347,
"step": 2980
},
{
"epoch": 0.2975642524817754,
"grad_norm": 0.06592582166194916,
"learning_rate": 0.0016835147259123176,
"loss": -0.8578,
"step": 2990
},
{
"epoch": 0.2985594506506108,
"grad_norm": 0.0572790652513504,
"learning_rate": 0.001681108551474829,
"loss": -0.8624,
"step": 3000
},
{
"epoch": 0.29955464881944616,
"grad_norm": 0.02885902300477028,
"learning_rate": 0.001678694998609729,
"loss": -0.8771,
"step": 3010
},
{
"epoch": 0.3005498469882815,
"grad_norm": 0.081394262611866,
"learning_rate": 0.00167627409346296,
"loss": -0.8258,
"step": 3020
},
{
"epoch": 0.3015450451571169,
"grad_norm": 0.09440901130437851,
"learning_rate": 0.0016738458622601114,
"loss": -0.8845,
"step": 3030
},
{
"epoch": 0.3025402433259523,
"grad_norm": 0.0748928114771843,
"learning_rate": 0.001671410331306136,
"loss": -0.7864,
"step": 3040
},
{
"epoch": 0.30353544149478767,
"grad_norm": 0.05985994637012482,
"learning_rate": 0.0016689675269850645,
"loss": -0.8587,
"step": 3050
},
{
"epoch": 0.30453063966362304,
"grad_norm": 0.05486709251999855,
"learning_rate": 0.0016665174757597196,
"loss": -0.838,
"step": 3060
},
{
"epoch": 0.3055258378324584,
"grad_norm": 0.02882876992225647,
"learning_rate": 0.0016640602041714293,
"loss": -0.8639,
"step": 3070
},
{
"epoch": 0.30652103600129377,
"grad_norm": 0.0584944412112236,
"learning_rate": 0.00166159573883974,
"loss": -0.8264,
"step": 3080
},
{
"epoch": 0.30751623417012913,
"grad_norm": 0.04569467529654503,
"learning_rate": 0.0016591241064621272,
"loss": -0.8153,
"step": 3090
},
{
"epoch": 0.3085114323389645,
"grad_norm": 0.04677337408065796,
"learning_rate": 0.0016566453338137071,
"loss": -0.8432,
"step": 3100
},
{
"epoch": 0.30950663050779986,
"grad_norm": 0.04505476728081703,
"learning_rate": 0.0016541594477469466,
"loss": -0.8812,
"step": 3110
},
{
"epoch": 0.3105018286766352,
"grad_norm": 0.04246704652905464,
"learning_rate": 0.0016516664751913713,
"loss": -0.8448,
"step": 3120
},
{
"epoch": 0.3114970268454706,
"grad_norm": 0.06890695542097092,
"learning_rate": 0.0016491664431532746,
"loss": -0.8286,
"step": 3130
},
{
"epoch": 0.31249222501430596,
"grad_norm": 0.04187687113881111,
"learning_rate": 0.001646659378715426,
"loss": -0.8733,
"step": 3140
},
{
"epoch": 0.3134874231831413,
"grad_norm": 0.04906761646270752,
"learning_rate": 0.0016441453090367752,
"loss": -0.8545,
"step": 3150
},
{
"epoch": 0.3144826213519767,
"grad_norm": 0.02713639661669731,
"learning_rate": 0.001641624261352161,
"loss": -0.8881,
"step": 3160
},
{
"epoch": 0.3154778195208121,
"grad_norm": 0.053854767233133316,
"learning_rate": 0.0016390962629720138,
"loss": -0.8535,
"step": 3170
},
{
"epoch": 0.3164730176896475,
"grad_norm": 0.04315832257270813,
"learning_rate": 0.001636561341282061,
"loss": -0.837,
"step": 3180
},
{
"epoch": 0.31746821585848284,
"grad_norm": 0.037244752049446106,
"learning_rate": 0.0016340195237430292,
"loss": -0.8767,
"step": 3190
},
{
"epoch": 0.3184634140273182,
"grad_norm": 0.03590194508433342,
"learning_rate": 0.0016314708378903492,
"loss": -0.8656,
"step": 3200
},
{
"epoch": 0.31945861219615357,
"grad_norm": 0.04543634504079819,
"learning_rate": 0.0016289153113338544,
"loss": -0.8195,
"step": 3210
},
{
"epoch": 0.32045381036498893,
"grad_norm": 0.08021537959575653,
"learning_rate": 0.0016263529717574841,
"loss": -0.8524,
"step": 3220
},
{
"epoch": 0.3214490085338243,
"grad_norm": 0.03517700731754303,
"learning_rate": 0.001623783846918983,
"loss": -0.8523,
"step": 3230
},
{
"epoch": 0.32244420670265966,
"grad_norm": 0.08983421325683594,
"learning_rate": 0.0016212079646495995,
"loss": -0.8836,
"step": 3240
},
{
"epoch": 0.32343940487149503,
"grad_norm": 0.03937257453799248,
"learning_rate": 0.001618625352853786,
"loss": -0.8303,
"step": 3250
},
{
"epoch": 0.3244346030403304,
"grad_norm": 0.05866921320557594,
"learning_rate": 0.001616036039508895,
"loss": -0.8363,
"step": 3260
},
{
"epoch": 0.32542980120916576,
"grad_norm": 0.036655690521001816,
"learning_rate": 0.001613440052664877,
"loss": -0.8672,
"step": 3270
},
{
"epoch": 0.3264249993780011,
"grad_norm": 0.04297897219657898,
"learning_rate": 0.0016108374204439767,
"loss": -0.819,
"step": 3280
},
{
"epoch": 0.3274201975468365,
"grad_norm": 0.13241665065288544,
"learning_rate": 0.0016082281710404264,
"loss": -0.842,
"step": 3290
},
{
"epoch": 0.3284153957156719,
"grad_norm": 0.08025432378053665,
"learning_rate": 0.0016056123327201437,
"loss": -0.8578,
"step": 3300
},
{
"epoch": 0.3294105938845073,
"grad_norm": 0.037778500467538834,
"learning_rate": 0.0016029899338204233,
"loss": -0.8423,
"step": 3310
},
{
"epoch": 0.33040579205334264,
"grad_norm": 0.051869478076696396,
"learning_rate": 0.0016003610027496304,
"loss": -0.8651,
"step": 3320
},
{
"epoch": 0.331400990222178,
"grad_norm": 0.03212331607937813,
"learning_rate": 0.0015977255679868931,
"loss": -0.8604,
"step": 3330
},
{
"epoch": 0.33239618839101337,
"grad_norm": 0.03444599732756615,
"learning_rate": 0.0015950836580817928,
"loss": -0.8549,
"step": 3340
},
{
"epoch": 0.33339138655984873,
"grad_norm": 0.07950103282928467,
"learning_rate": 0.001592435301654058,
"loss": -0.8384,
"step": 3350
},
{
"epoch": 0.3343865847286841,
"grad_norm": 0.06033974885940552,
"learning_rate": 0.0015897805273932502,
"loss": -0.8699,
"step": 3360
},
{
"epoch": 0.33538178289751946,
"grad_norm": 0.041036177426576614,
"learning_rate": 0.0015871193640584562,
"loss": -0.8253,
"step": 3370
},
{
"epoch": 0.33637698106635483,
"grad_norm": 0.08517392724752426,
"learning_rate": 0.001584451840477975,
"loss": -0.8145,
"step": 3380
},
{
"epoch": 0.3373721792351902,
"grad_norm": 0.053120095282793045,
"learning_rate": 0.0015817779855490054,
"loss": -0.8427,
"step": 3390
},
{
"epoch": 0.33836737740402556,
"grad_norm": 0.056728117167949677,
"learning_rate": 0.0015790978282373348,
"loss": -0.8381,
"step": 3400
},
{
"epoch": 0.3393625755728609,
"grad_norm": 0.050754986703395844,
"learning_rate": 0.0015764113975770236,
"loss": -0.8605,
"step": 3410
},
{
"epoch": 0.3403577737416963,
"grad_norm": 0.02987166866660118,
"learning_rate": 0.0015737187226700904,
"loss": -0.8775,
"step": 3420
},
{
"epoch": 0.3413529719105317,
"grad_norm": 0.041901689022779465,
"learning_rate": 0.001571019832686199,
"loss": -0.8808,
"step": 3430
},
{
"epoch": 0.3423481700793671,
"grad_norm": 0.03326866030693054,
"learning_rate": 0.001568314756862339,
"loss": -0.8301,
"step": 3440
},
{
"epoch": 0.34334336824820244,
"grad_norm": 0.08149438351392746,
"learning_rate": 0.0015656035245025129,
"loss": -0.867,
"step": 3450
},
{
"epoch": 0.3443385664170378,
"grad_norm": 0.0481393039226532,
"learning_rate": 0.0015628861649774155,
"loss": -0.8777,
"step": 3460
},
{
"epoch": 0.34533376458587317,
"grad_norm": 0.06255054473876953,
"learning_rate": 0.0015601627077241182,
"loss": -0.8579,
"step": 3470
},
{
"epoch": 0.34632896275470854,
"grad_norm": 0.05797385796904564,
"learning_rate": 0.0015574331822457471,
"loss": -0.877,
"step": 3480
},
{
"epoch": 0.3473241609235439,
"grad_norm": 0.05263502523303032,
"learning_rate": 0.0015546976181111671,
"loss": -0.8364,
"step": 3490
},
{
"epoch": 0.34831935909237927,
"grad_norm": 0.06181139498949051,
"learning_rate": 0.0015519560449546584,
"loss": -0.8426,
"step": 3500
},
{
"epoch": 0.34931455726121463,
"grad_norm": 0.0733242854475975,
"learning_rate": 0.0015492084924755972,
"loss": -0.8277,
"step": 3510
},
{
"epoch": 0.35030975543005,
"grad_norm": 0.041065763682127,
"learning_rate": 0.0015464549904381335,
"loss": -0.8914,
"step": 3520
},
{
"epoch": 0.35130495359888536,
"grad_norm": 0.0405518040060997,
"learning_rate": 0.0015436955686708687,
"loss": -0.8374,
"step": 3530
},
{
"epoch": 0.3523001517677207,
"grad_norm": 0.05705835297703743,
"learning_rate": 0.0015409302570665324,
"loss": -0.848,
"step": 3540
},
{
"epoch": 0.3532953499365561,
"grad_norm": 0.049008939415216446,
"learning_rate": 0.0015381590855816586,
"loss": -0.839,
"step": 3550
},
{
"epoch": 0.3542905481053915,
"grad_norm": 0.23253494501113892,
"learning_rate": 0.0015353820842362613,
"loss": -0.844,
"step": 3560
},
{
"epoch": 0.3552857462742269,
"grad_norm": 0.06854347884654999,
"learning_rate": 0.001532599283113509,
"loss": -0.8313,
"step": 3570
},
{
"epoch": 0.35628094444306224,
"grad_norm": 0.04343127831816673,
"learning_rate": 0.001529810712359399,
"loss": -0.8745,
"step": 3580
},
{
"epoch": 0.3572761426118976,
"grad_norm": 0.03824308142066002,
"learning_rate": 0.0015270164021824314,
"loss": -0.8711,
"step": 3590
},
{
"epoch": 0.35827134078073297,
"grad_norm": 0.03023524396121502,
"learning_rate": 0.0015242163828532804,
"loss": -0.8434,
"step": 3600
},
{
"epoch": 0.35926653894956834,
"grad_norm": 0.041595038026571274,
"learning_rate": 0.001521410684704468,
"loss": -0.851,
"step": 3610
},
{
"epoch": 0.3602617371184037,
"grad_norm": 0.044001661241054535,
"learning_rate": 0.0015185993381300346,
"loss": -0.8513,
"step": 3620
},
{
"epoch": 0.36125693528723907,
"grad_norm": 0.022997932508587837,
"learning_rate": 0.0015157823735852088,
"loss": -0.86,
"step": 3630
},
{
"epoch": 0.36225213345607443,
"grad_norm": 0.0806240439414978,
"learning_rate": 0.0015129598215860803,
"loss": -0.8273,
"step": 3640
},
{
"epoch": 0.3632473316249098,
"grad_norm": 0.039777714759111404,
"learning_rate": 0.0015101317127092658,
"loss": -0.8552,
"step": 3650
},
{
"epoch": 0.36424252979374516,
"grad_norm": 0.04692668467760086,
"learning_rate": 0.0015072980775915805,
"loss": -0.7963,
"step": 3660
},
{
"epoch": 0.3652377279625805,
"grad_norm": 0.03576252609491348,
"learning_rate": 0.001504458946929705,
"loss": -0.85,
"step": 3670
},
{
"epoch": 0.3662329261314159,
"grad_norm": 0.039703838527202606,
"learning_rate": 0.001501614351479853,
"loss": -0.8753,
"step": 3680
},
{
"epoch": 0.3672281243002513,
"grad_norm": 0.054135072976350784,
"learning_rate": 0.0014987643220574372,
"loss": -0.8365,
"step": 3690
},
{
"epoch": 0.3682233224690867,
"grad_norm": 0.05153687298297882,
"learning_rate": 0.001495908889536738,
"loss": -0.8924,
"step": 3700
},
{
"epoch": 0.36921852063792204,
"grad_norm": 0.03708389773964882,
"learning_rate": 0.0014930480848505657,
"loss": -0.8845,
"step": 3710
},
{
"epoch": 0.3702137188067574,
"grad_norm": 0.051574934273958206,
"learning_rate": 0.0014901819389899283,
"loss": -0.8512,
"step": 3720
},
{
"epoch": 0.3712089169755928,
"grad_norm": 0.13312560319900513,
"learning_rate": 0.0014873104830036943,
"loss": -0.8762,
"step": 3730
},
{
"epoch": 0.37220411514442814,
"grad_norm": 0.08459248393774033,
"learning_rate": 0.0014844337479982563,
"loss": -0.8661,
"step": 3740
},
{
"epoch": 0.3731993133132635,
"grad_norm": 0.029227307066321373,
"learning_rate": 0.0014815517651371945,
"loss": -0.8333,
"step": 3750
},
{
"epoch": 0.37419451148209887,
"grad_norm": 0.04313547909259796,
"learning_rate": 0.0014786645656409392,
"loss": -0.8348,
"step": 3760
},
{
"epoch": 0.37518970965093423,
"grad_norm": 0.08348735421895981,
"learning_rate": 0.0014757721807864318,
"loss": -0.8379,
"step": 3770
},
{
"epoch": 0.3761849078197696,
"grad_norm": 0.02443796768784523,
"learning_rate": 0.0014728746419067872,
"loss": -0.8608,
"step": 3780
},
{
"epoch": 0.37718010598860496,
"grad_norm": 0.040778059512376785,
"learning_rate": 0.001469971980390953,
"loss": -0.8305,
"step": 3790
},
{
"epoch": 0.37817530415744033,
"grad_norm": 0.04360215365886688,
"learning_rate": 0.001467064227683371,
"loss": -0.8496,
"step": 3800
},
{
"epoch": 0.3791705023262757,
"grad_norm": 0.040289781987667084,
"learning_rate": 0.0014641514152836349,
"loss": -0.8688,
"step": 3810
},
{
"epoch": 0.3801657004951111,
"grad_norm": 0.03933040797710419,
"learning_rate": 0.00146123357474615,
"loss": -0.8777,
"step": 3820
},
{
"epoch": 0.3811608986639465,
"grad_norm": 0.09043987840414047,
"learning_rate": 0.001458310737679792,
"loss": -0.857,
"step": 3830
},
{
"epoch": 0.38215609683278184,
"grad_norm": 0.034497786313295364,
"learning_rate": 0.001455382935747563,
"loss": -0.8712,
"step": 3840
},
{
"epoch": 0.3831512950016172,
"grad_norm": 0.03928445652127266,
"learning_rate": 0.0014524502006662498,
"loss": -0.8774,
"step": 3850
},
{
"epoch": 0.3841464931704526,
"grad_norm": 0.04413870349526405,
"learning_rate": 0.0014495125642060797,
"loss": -0.8739,
"step": 3860
},
{
"epoch": 0.38514169133928794,
"grad_norm": 0.023376120254397392,
"learning_rate": 0.0014465700581903764,
"loss": -0.9013,
"step": 3870
},
{
"epoch": 0.3861368895081233,
"grad_norm": 0.04416632652282715,
"learning_rate": 0.0014436227144952155,
"loss": -0.8906,
"step": 3880
},
{
"epoch": 0.38713208767695867,
"grad_norm": 0.0480741448700428,
"learning_rate": 0.0014406705650490787,
"loss": -0.8795,
"step": 3890
},
{
"epoch": 0.38812728584579403,
"grad_norm": 0.12349933385848999,
"learning_rate": 0.001437713641832509,
"loss": -0.8592,
"step": 3900
},
{
"epoch": 0.3891224840146294,
"grad_norm": 0.06916704773902893,
"learning_rate": 0.0014347519768777627,
"loss": -0.8575,
"step": 3910
},
{
"epoch": 0.39011768218346476,
"grad_norm": 0.05390172451734543,
"learning_rate": 0.0014317856022684637,
"loss": -0.8743,
"step": 3920
},
{
"epoch": 0.39111288035230013,
"grad_norm": 0.0792585238814354,
"learning_rate": 0.0014288145501392552,
"loss": -0.8422,
"step": 3930
},
{
"epoch": 0.3921080785211355,
"grad_norm": 0.06161206215620041,
"learning_rate": 0.0014258388526754517,
"loss": -0.8521,
"step": 3940
},
{
"epoch": 0.3931032766899709,
"grad_norm": 0.1214885413646698,
"learning_rate": 0.001422858542112691,
"loss": -0.7868,
"step": 3950
},
{
"epoch": 0.3940984748588063,
"grad_norm": 0.022072777152061462,
"learning_rate": 0.0014198736507365834,
"loss": -0.858,
"step": 3960
},
{
"epoch": 0.39509367302764165,
"grad_norm": 0.038208458572626114,
"learning_rate": 0.0014168842108823645,
"loss": -0.8562,
"step": 3970
},
{
"epoch": 0.396088871196477,
"grad_norm": 0.0507245697081089,
"learning_rate": 0.0014138902549345428,
"loss": -0.8513,
"step": 3980
},
{
"epoch": 0.3970840693653124,
"grad_norm": 0.045909419655799866,
"learning_rate": 0.0014108918153265485,
"loss": -0.852,
"step": 3990
},
{
"epoch": 0.39807926753414774,
"grad_norm": 0.09012339264154434,
"learning_rate": 0.0014078889245403843,
"loss": -0.8763,
"step": 4000
},
{
"epoch": 0.3990744657029831,
"grad_norm": 0.08021102845668793,
"learning_rate": 0.001404881615106272,
"loss": -0.8464,
"step": 4010
},
{
"epoch": 0.40006966387181847,
"grad_norm": 0.07724355161190033,
"learning_rate": 0.001401869919602301,
"loss": -0.8451,
"step": 4020
},
{
"epoch": 0.40106486204065384,
"grad_norm": 0.03839779272675514,
"learning_rate": 0.001398853870654074,
"loss": -0.8635,
"step": 4030
},
{
"epoch": 0.4020600602094892,
"grad_norm": 0.059427373111248016,
"learning_rate": 0.0013958335009343547,
"loss": -0.8255,
"step": 4040
},
{
"epoch": 0.40305525837832457,
"grad_norm": 0.050591662526130676,
"learning_rate": 0.0013928088431627145,
"loss": -0.8643,
"step": 4050
},
{
"epoch": 0.40405045654715993,
"grad_norm": 0.0813014954328537,
"learning_rate": 0.001389779930105175,
"loss": -0.8737,
"step": 4060
},
{
"epoch": 0.4050456547159953,
"grad_norm": 0.04178832843899727,
"learning_rate": 0.0013867467945738576,
"loss": -0.8632,
"step": 4070
},
{
"epoch": 0.4060408528848307,
"grad_norm": 0.08438771218061447,
"learning_rate": 0.0013837094694266244,
"loss": -0.8227,
"step": 4080
},
{
"epoch": 0.4070360510536661,
"grad_norm": 0.047368500381708145,
"learning_rate": 0.0013806679875667228,
"loss": -0.8522,
"step": 4090
},
{
"epoch": 0.40803124922250145,
"grad_norm": 0.04647338017821312,
"learning_rate": 0.0013776223819424317,
"loss": -0.8457,
"step": 4100
},
{
"epoch": 0.4090264473913368,
"grad_norm": 0.2708975672721863,
"learning_rate": 0.0013745726855467005,
"loss": -0.8376,
"step": 4110
},
{
"epoch": 0.4100216455601722,
"grad_norm": 0.06989341974258423,
"learning_rate": 0.0013715189314167954,
"loss": -0.8457,
"step": 4120
},
{
"epoch": 0.41101684372900754,
"grad_norm": 0.047301627695560455,
"learning_rate": 0.0013684611526339392,
"loss": -0.8713,
"step": 4130
},
{
"epoch": 0.4120120418978429,
"grad_norm": 0.039312686771154404,
"learning_rate": 0.0013653993823229535,
"loss": -0.8742,
"step": 4140
},
{
"epoch": 0.41300724006667827,
"grad_norm": 0.055356428027153015,
"learning_rate": 0.0013623336536519,
"loss": -0.8134,
"step": 4150
},
{
"epoch": 0.41400243823551364,
"grad_norm": 0.10555850714445114,
"learning_rate": 0.001359263999831722,
"loss": -0.8181,
"step": 4160
},
{
"epoch": 0.414997636404349,
"grad_norm": 0.10872016102075577,
"learning_rate": 0.0013561904541158827,
"loss": -0.8514,
"step": 4170
},
{
"epoch": 0.41599283457318437,
"grad_norm": 0.031025927513837814,
"learning_rate": 0.001353113049800007,
"loss": -0.8503,
"step": 4180
},
{
"epoch": 0.41698803274201973,
"grad_norm": 0.040020719170570374,
"learning_rate": 0.0013500318202215192,
"loss": -0.842,
"step": 4190
},
{
"epoch": 0.4179832309108551,
"grad_norm": 0.16235248744487762,
"learning_rate": 0.0013469467987592828,
"loss": -0.855,
"step": 4200
},
{
"epoch": 0.4189784290796905,
"grad_norm": 0.04261363670229912,
"learning_rate": 0.001343858018833239,
"loss": -0.8704,
"step": 4210
},
{
"epoch": 0.4199736272485259,
"grad_norm": 0.05656075477600098,
"learning_rate": 0.0013407655139040435,
"loss": -0.8526,
"step": 4220
},
{
"epoch": 0.42096882541736125,
"grad_norm": 0.04471505433320999,
"learning_rate": 0.0013376693174727064,
"loss": -0.8372,
"step": 4230
},
{
"epoch": 0.4219640235861966,
"grad_norm": 0.05171617493033409,
"learning_rate": 0.001334569463080226,
"loss": -0.8628,
"step": 4240
},
{
"epoch": 0.422959221755032,
"grad_norm": 0.06456071138381958,
"learning_rate": 0.0013314659843072273,
"loss": -0.8115,
"step": 4250
},
{
"epoch": 0.42395441992386734,
"grad_norm": 0.03985525295138359,
"learning_rate": 0.0013283589147735995,
"loss": -0.8998,
"step": 4260
},
{
"epoch": 0.4249496180927027,
"grad_norm": 0.05016870051622391,
"learning_rate": 0.0013252482881381297,
"loss": -0.8782,
"step": 4270
},
{
"epoch": 0.4259448162615381,
"grad_norm": 0.12391635030508041,
"learning_rate": 0.001322134138098138,
"loss": -0.8462,
"step": 4280
},
{
"epoch": 0.42694001443037344,
"grad_norm": 0.038156382739543915,
"learning_rate": 0.0013190164983891148,
"loss": -0.8127,
"step": 4290
},
{
"epoch": 0.4279352125992088,
"grad_norm": 0.05774056166410446,
"learning_rate": 0.0013158954027843537,
"loss": -0.8537,
"step": 4300
},
{
"epoch": 0.42893041076804417,
"grad_norm": 0.044757694005966187,
"learning_rate": 0.0013127708850945855,
"loss": -0.8843,
"step": 4310
},
{
"epoch": 0.42992560893687953,
"grad_norm": 0.035451311618089676,
"learning_rate": 0.0013096429791676122,
"loss": -0.8726,
"step": 4320
},
{
"epoch": 0.4309208071057149,
"grad_norm": 0.043845247477293015,
"learning_rate": 0.0013065117188879407,
"loss": -0.8458,
"step": 4330
},
{
"epoch": 0.4319160052745503,
"grad_norm": 0.0807977095246315,
"learning_rate": 0.001303377138176416,
"loss": -0.8799,
"step": 4340
},
{
"epoch": 0.4329112034433857,
"grad_norm": 0.030945105478167534,
"learning_rate": 0.0013002392709898526,
"loss": -0.8306,
"step": 4350
},
{
"epoch": 0.43390640161222105,
"grad_norm": 0.10755793750286102,
"learning_rate": 0.0012970981513206676,
"loss": -0.8119,
"step": 4360
},
{
"epoch": 0.4349015997810564,
"grad_norm": 0.04102904722094536,
"learning_rate": 0.0012939538131965121,
"loss": -0.8274,
"step": 4370
},
{
"epoch": 0.4358967979498918,
"grad_norm": 0.02935400977730751,
"learning_rate": 0.0012908062906799018,
"loss": -0.876,
"step": 4380
},
{
"epoch": 0.43689199611872714,
"grad_norm": 0.09526260197162628,
"learning_rate": 0.0012876556178678504,
"loss": -0.8462,
"step": 4390
},
{
"epoch": 0.4378871942875625,
"grad_norm": 0.03269312158226967,
"learning_rate": 0.0012845018288914977,
"loss": -0.8404,
"step": 4400
},
{
"epoch": 0.4388823924563979,
"grad_norm": 0.029963362962007523,
"learning_rate": 0.0012813449579157405,
"loss": -0.8987,
"step": 4410
},
{
"epoch": 0.43987759062523324,
"grad_norm": 0.043722447007894516,
"learning_rate": 0.0012781850391388638,
"loss": -0.8708,
"step": 4420
},
{
"epoch": 0.4408727887940686,
"grad_norm": 0.06401796638965607,
"learning_rate": 0.0012750221067921686,
"loss": -0.8667,
"step": 4430
},
{
"epoch": 0.44186798696290397,
"grad_norm": 0.0654074028134346,
"learning_rate": 0.0012718561951396016,
"loss": -0.8699,
"step": 4440
},
{
"epoch": 0.44286318513173933,
"grad_norm": 0.049930647015571594,
"learning_rate": 0.0012686873384773847,
"loss": -0.8465,
"step": 4450
},
{
"epoch": 0.4438583833005747,
"grad_norm": 0.04100380092859268,
"learning_rate": 0.001265515571133643,
"loss": -0.8787,
"step": 4460
},
{
"epoch": 0.4448535814694101,
"grad_norm": 0.04323037341237068,
"learning_rate": 0.0012623409274680334,
"loss": -0.8522,
"step": 4470
},
{
"epoch": 0.4458487796382455,
"grad_norm": 0.051976267248392105,
"learning_rate": 0.0012591634418713706,
"loss": -0.8574,
"step": 4480
},
{
"epoch": 0.44684397780708085,
"grad_norm": 0.06411243230104446,
"learning_rate": 0.0012559831487652566,
"loss": -0.8257,
"step": 4490
},
{
"epoch": 0.4478391759759162,
"grad_norm": 0.05125259980559349,
"learning_rate": 0.0012528000826017075,
"loss": -0.8826,
"step": 4500
},
{
"epoch": 0.4488343741447516,
"grad_norm": 0.12121732532978058,
"learning_rate": 0.001249614277862779,
"loss": -0.8667,
"step": 4510
},
{
"epoch": 0.44982957231358695,
"grad_norm": 0.24887067079544067,
"learning_rate": 0.0012464257690601938,
"loss": -0.855,
"step": 4520
},
{
"epoch": 0.4508247704824223,
"grad_norm": 0.041807617992162704,
"learning_rate": 0.001243234590734969,
"loss": -0.89,
"step": 4530
},
{
"epoch": 0.4518199686512577,
"grad_norm": 0.07936426997184753,
"learning_rate": 0.0012400407774570384,
"loss": -0.8489,
"step": 4540
},
{
"epoch": 0.45281516682009304,
"grad_norm": 0.07328188419342041,
"learning_rate": 0.0012368443638248819,
"loss": -0.8469,
"step": 4550
},
{
"epoch": 0.4538103649889284,
"grad_norm": 0.037623438984155655,
"learning_rate": 0.0012336453844651479,
"loss": -0.8402,
"step": 4560
},
{
"epoch": 0.45480556315776377,
"grad_norm": 0.06323853880167007,
"learning_rate": 0.0012304438740322794,
"loss": -0.8408,
"step": 4570
},
{
"epoch": 0.45580076132659914,
"grad_norm": 0.050275370478630066,
"learning_rate": 0.0012272398672081395,
"loss": -0.8539,
"step": 4580
},
{
"epoch": 0.45679595949543456,
"grad_norm": 0.045320775359869,
"learning_rate": 0.0012240333987016331,
"loss": -0.8633,
"step": 4590
},
{
"epoch": 0.4577911576642699,
"grad_norm": 0.1679801642894745,
"learning_rate": 0.0012208245032483335,
"loss": -0.8422,
"step": 4600
},
{
"epoch": 0.4587863558331053,
"grad_norm": 0.06080428883433342,
"learning_rate": 0.0012176132156101046,
"loss": -0.888,
"step": 4610
},
{
"epoch": 0.45978155400194065,
"grad_norm": 0.09289630502462387,
"learning_rate": 0.0012143995705747245,
"loss": -0.8749,
"step": 4620
},
{
"epoch": 0.460776752170776,
"grad_norm": 0.03888930380344391,
"learning_rate": 0.0012111836029555097,
"loss": -0.8391,
"step": 4630
},
{
"epoch": 0.4617719503396114,
"grad_norm": 0.05981600657105446,
"learning_rate": 0.0012079653475909366,
"loss": -0.8282,
"step": 4640
},
{
"epoch": 0.46276714850844675,
"grad_norm": 0.0493403784930706,
"learning_rate": 0.0012047448393442647,
"loss": -0.8631,
"step": 4650
},
{
"epoch": 0.4637623466772821,
"grad_norm": 0.028433311730623245,
"learning_rate": 0.001201522113103159,
"loss": -0.8989,
"step": 4660
},
{
"epoch": 0.4647575448461175,
"grad_norm": 0.03943129628896713,
"learning_rate": 0.0011982972037793123,
"loss": -0.8246,
"step": 4670
},
{
"epoch": 0.46575274301495284,
"grad_norm": 0.08593238145112991,
"learning_rate": 0.0011950701463080656,
"loss": -0.8393,
"step": 4680
},
{
"epoch": 0.4667479411837882,
"grad_norm": 0.10303488373756409,
"learning_rate": 0.001191840975648032,
"loss": -0.8671,
"step": 4690
},
{
"epoch": 0.46774313935262357,
"grad_norm": 0.03931227698922157,
"learning_rate": 0.0011886097267807159,
"loss": -0.861,
"step": 4700
},
{
"epoch": 0.46873833752145894,
"grad_norm": 0.06996316462755203,
"learning_rate": 0.0011853764347101354,
"loss": -0.8654,
"step": 4710
},
{
"epoch": 0.46973353569029436,
"grad_norm": 0.05362046882510185,
"learning_rate": 0.001182141134462442,
"loss": -0.8735,
"step": 4720
},
{
"epoch": 0.4707287338591297,
"grad_norm": 0.04977956414222717,
"learning_rate": 0.0011789038610855425,
"loss": -0.8828,
"step": 4730
},
{
"epoch": 0.4717239320279651,
"grad_norm": 0.05062364414334297,
"learning_rate": 0.001175664649648717,
"loss": -0.8668,
"step": 4740
},
{
"epoch": 0.47271913019680045,
"grad_norm": 0.0940314307808876,
"learning_rate": 0.0011724235352422426,
"loss": -0.8503,
"step": 4750
},
{
"epoch": 0.4737143283656358,
"grad_norm": 0.10042206943035126,
"learning_rate": 0.0011691805529770094,
"loss": -0.8796,
"step": 4760
},
{
"epoch": 0.4747095265344712,
"grad_norm": 0.07659121602773666,
"learning_rate": 0.0011659357379841433,
"loss": -0.8085,
"step": 4770
},
{
"epoch": 0.47570472470330655,
"grad_norm": 0.04234703257679939,
"learning_rate": 0.0011626891254146235,
"loss": -0.9062,
"step": 4780
},
{
"epoch": 0.4766999228721419,
"grad_norm": 0.041499655693769455,
"learning_rate": 0.0011594407504389016,
"loss": -0.8454,
"step": 4790
},
{
"epoch": 0.4776951210409773,
"grad_norm": 0.05934852361679077,
"learning_rate": 0.001156190648246523,
"loss": -0.8612,
"step": 4800
},
{
"epoch": 0.47869031920981264,
"grad_norm": 0.035424910485744476,
"learning_rate": 0.0011529388540457422,
"loss": -0.9154,
"step": 4810
},
{
"epoch": 0.479685517378648,
"grad_norm": 0.09620529413223267,
"learning_rate": 0.0011496854030631444,
"loss": -0.8585,
"step": 4820
},
{
"epoch": 0.4806807155474834,
"grad_norm": 0.09317132830619812,
"learning_rate": 0.001146430330543262,
"loss": -0.8507,
"step": 4830
},
{
"epoch": 0.48167591371631874,
"grad_norm": 0.03553410992026329,
"learning_rate": 0.0011431736717481935,
"loss": -0.8593,
"step": 4840
},
{
"epoch": 0.48267111188515416,
"grad_norm": 0.23797239363193512,
"learning_rate": 0.0011399154619572225,
"loss": -0.8728,
"step": 4850
},
{
"epoch": 0.4836663100539895,
"grad_norm": 0.11758790910243988,
"learning_rate": 0.0011366557364664326,
"loss": -0.8411,
"step": 4860
},
{
"epoch": 0.4846615082228249,
"grad_norm": 0.028982490301132202,
"learning_rate": 0.001133394530588329,
"loss": -0.8588,
"step": 4870
},
{
"epoch": 0.48565670639166025,
"grad_norm": 0.030277423560619354,
"learning_rate": 0.0011301318796514519,
"loss": -0.8613,
"step": 4880
},
{
"epoch": 0.4866519045604956,
"grad_norm": 0.0474855974316597,
"learning_rate": 0.001126867818999997,
"loss": -0.8964,
"step": 4890
},
{
"epoch": 0.487647102729331,
"grad_norm": 0.034340258687734604,
"learning_rate": 0.0011236023839934315,
"loss": -0.8219,
"step": 4900
},
{
"epoch": 0.48864230089816635,
"grad_norm": 0.08748650550842285,
"learning_rate": 0.0011203356100061104,
"loss": -0.837,
"step": 4910
},
{
"epoch": 0.4896374990670017,
"grad_norm": 0.05180235579609871,
"learning_rate": 0.0011170675324268943,
"loss": -0.8452,
"step": 4920
},
{
"epoch": 0.4906326972358371,
"grad_norm": 0.05069277063012123,
"learning_rate": 0.0011137981866587644,
"loss": -0.8689,
"step": 4930
},
{
"epoch": 0.49162789540467244,
"grad_norm": 0.0969507172703743,
"learning_rate": 0.0011105276081184417,
"loss": -0.8616,
"step": 4940
},
{
"epoch": 0.4926230935735078,
"grad_norm": 0.03841566666960716,
"learning_rate": 0.0011072558322360014,
"loss": -0.8737,
"step": 4950
},
{
"epoch": 0.4936182917423432,
"grad_norm": 0.03460973501205444,
"learning_rate": 0.0011039828944544884,
"loss": -0.8892,
"step": 4960
},
{
"epoch": 0.49461348991117854,
"grad_norm": 0.052395064383745193,
"learning_rate": 0.0011007088302295359,
"loss": -0.8513,
"step": 4970
},
{
"epoch": 0.49560868808001396,
"grad_norm": 0.09648430347442627,
"learning_rate": 0.0010974336750289788,
"loss": -0.8646,
"step": 4980
},
{
"epoch": 0.4966038862488493,
"grad_norm": 0.05110828951001167,
"learning_rate": 0.001094157464332471,
"loss": -0.8282,
"step": 4990
},
{
"epoch": 0.4975990844176847,
"grad_norm": 0.046441659331321716,
"learning_rate": 0.0010908802336311002,
"loss": -0.8459,
"step": 5000
},
{
"epoch": 0.49859428258652005,
"grad_norm": 0.08815504610538483,
"learning_rate": 0.0010876020184270039,
"loss": -0.8547,
"step": 5010
},
{
"epoch": 0.4995894807553554,
"grad_norm": 0.054612450301647186,
"learning_rate": 0.0010843228542329849,
"loss": -0.8494,
"step": 5020
},
{
"epoch": 0.5005846789241908,
"grad_norm": 0.0314970538020134,
"learning_rate": 0.0010810427765721266,
"loss": -0.867,
"step": 5030
},
{
"epoch": 0.5015798770930261,
"grad_norm": 0.05496123805642128,
"learning_rate": 0.0010777618209774071,
"loss": -0.8536,
"step": 5040
},
{
"epoch": 0.5025750752618615,
"grad_norm": 0.08668066561222076,
"learning_rate": 0.001074480022991316,
"loss": -0.8833,
"step": 5050
},
{
"epoch": 0.5035702734306969,
"grad_norm": 0.04756620153784752,
"learning_rate": 0.0010711974181654676,
"loss": -0.8271,
"step": 5060
},
{
"epoch": 0.5045654715995322,
"grad_norm": 0.055564671754837036,
"learning_rate": 0.0010679140420602176,
"loss": -0.8774,
"step": 5070
},
{
"epoch": 0.5055606697683677,
"grad_norm": 0.044413670897483826,
"learning_rate": 0.001064629930244276,
"loss": -0.7932,
"step": 5080
},
{
"epoch": 0.506555867937203,
"grad_norm": 0.035696543753147125,
"learning_rate": 0.0010613451182943244,
"loss": -0.8406,
"step": 5090
},
{
"epoch": 0.5075510661060384,
"grad_norm": 0.042886171489953995,
"learning_rate": 0.001058059641794627,
"loss": -0.8608,
"step": 5100
},
{
"epoch": 0.5085462642748737,
"grad_norm": 0.04136540740728378,
"learning_rate": 0.001054773536336648,
"loss": -0.8654,
"step": 5110
},
{
"epoch": 0.5095414624437091,
"grad_norm": 0.07801708579063416,
"learning_rate": 0.0010514868375186646,
"loss": -0.8573,
"step": 5120
},
{
"epoch": 0.5105366606125444,
"grad_norm": 0.02773255482316017,
"learning_rate": 0.0010481995809453826,
"loss": -0.8535,
"step": 5130
},
{
"epoch": 0.5115318587813799,
"grad_norm": 0.03874874860048294,
"learning_rate": 0.0010449118022275495,
"loss": -0.8643,
"step": 5140
},
{
"epoch": 0.5125270569502152,
"grad_norm": 0.06789931654930115,
"learning_rate": 0.0010416235369815692,
"loss": -0.8693,
"step": 5150
},
{
"epoch": 0.5135222551190506,
"grad_norm": 0.09328795224428177,
"learning_rate": 0.001038334820829116,
"loss": -0.851,
"step": 5160
},
{
"epoch": 0.5145174532878859,
"grad_norm": 0.05135517194867134,
"learning_rate": 0.001035045689396749,
"loss": -0.9017,
"step": 5170
},
{
"epoch": 0.5155126514567213,
"grad_norm": 0.0431673601269722,
"learning_rate": 0.0010317561783155262,
"loss": -0.8748,
"step": 5180
},
{
"epoch": 0.5165078496255567,
"grad_norm": 0.03427030146121979,
"learning_rate": 0.0010284663232206178,
"loss": -0.8548,
"step": 5190
},
{
"epoch": 0.517503047794392,
"grad_norm": 0.07007991522550583,
"learning_rate": 0.0010251761597509215,
"loss": -0.881,
"step": 5200
},
{
"epoch": 0.5184982459632275,
"grad_norm": 0.08876392245292664,
"learning_rate": 0.001021885723548675,
"loss": -0.8547,
"step": 5210
},
{
"epoch": 0.5194934441320628,
"grad_norm": 0.11546041816473007,
"learning_rate": 0.0010185950502590703,
"loss": -0.8441,
"step": 5220
},
{
"epoch": 0.5204886423008982,
"grad_norm": 0.03411949425935745,
"learning_rate": 0.0010153041755298687,
"loss": -0.8651,
"step": 5230
},
{
"epoch": 0.5214838404697335,
"grad_norm": 0.043606266379356384,
"learning_rate": 0.0010120131350110125,
"loss": -0.9115,
"step": 5240
},
{
"epoch": 0.5224790386385689,
"grad_norm": 0.03440025821328163,
"learning_rate": 0.001008721964354241,
"loss": -0.9109,
"step": 5250
},
{
"epoch": 0.5234742368074042,
"grad_norm": 0.029122570529580116,
"learning_rate": 0.0010054306992127028,
"loss": -0.8272,
"step": 5260
},
{
"epoch": 0.5244694349762397,
"grad_norm": 0.06181350350379944,
"learning_rate": 0.0010021393752405697,
"loss": -0.8695,
"step": 5270
},
{
"epoch": 0.525464633145075,
"grad_norm": 0.13611923158168793,
"learning_rate": 0.0009988480280926522,
"loss": -0.8621,
"step": 5280
},
{
"epoch": 0.5264598313139104,
"grad_norm": 0.04990173131227493,
"learning_rate": 0.00099555669342401,
"loss": -0.8884,
"step": 5290
},
{
"epoch": 0.5274550294827457,
"grad_norm": 0.030578065663576126,
"learning_rate": 0.000992265406889569,
"loss": -0.8843,
"step": 5300
},
{
"epoch": 0.5284502276515811,
"grad_norm": 0.05405525118112564,
"learning_rate": 0.0009889742041437322,
"loss": -0.8543,
"step": 5310
},
{
"epoch": 0.5294454258204165,
"grad_norm": 0.025917502120137215,
"learning_rate": 0.0009856831208399975,
"loss": -0.8611,
"step": 5320
},
{
"epoch": 0.5304406239892518,
"grad_norm": 0.058636005967855453,
"learning_rate": 0.0009823921926305661,
"loss": -0.8683,
"step": 5330
},
{
"epoch": 0.5314358221580873,
"grad_norm": 0.08390301465988159,
"learning_rate": 0.000979101455165961,
"loss": -0.8605,
"step": 5340
},
{
"epoch": 0.5324310203269226,
"grad_norm": 0.06718181073665619,
"learning_rate": 0.0009758109440946377,
"loss": -0.8469,
"step": 5350
},
{
"epoch": 0.533426218495758,
"grad_norm": 0.04684724658727646,
"learning_rate": 0.000972520695062599,
"loss": -0.8649,
"step": 5360
},
{
"epoch": 0.5344214166645933,
"grad_norm": 0.04522204399108887,
"learning_rate": 0.0009692307437130106,
"loss": -0.8793,
"step": 5370
},
{
"epoch": 0.5354166148334287,
"grad_norm": 0.02280861884355545,
"learning_rate": 0.0009659411256858122,
"loss": -0.8456,
"step": 5380
},
{
"epoch": 0.536411813002264,
"grad_norm": 0.022606611251831055,
"learning_rate": 0.000962651876617333,
"loss": -0.8819,
"step": 5390
},
{
"epoch": 0.5374070111710995,
"grad_norm": 0.04157610610127449,
"learning_rate": 0.0009593630321399049,
"loss": -0.8498,
"step": 5400
},
{
"epoch": 0.5384022093399348,
"grad_norm": 0.04567135497927666,
"learning_rate": 0.0009560746278814769,
"loss": -0.8388,
"step": 5410
},
{
"epoch": 0.5393974075087702,
"grad_norm": 0.04893554747104645,
"learning_rate": 0.0009527866994652299,
"loss": -0.877,
"step": 5420
},
{
"epoch": 0.5403926056776056,
"grad_norm": 0.03215763717889786,
"learning_rate": 0.0009494992825091892,
"loss": -0.8619,
"step": 5430
},
{
"epoch": 0.5413878038464409,
"grad_norm": 0.04611560329794884,
"learning_rate": 0.0009462124126258401,
"loss": -0.882,
"step": 5440
},
{
"epoch": 0.5423830020152763,
"grad_norm": 0.03594022989273071,
"learning_rate": 0.0009429261254217408,
"loss": -0.8399,
"step": 5450
},
{
"epoch": 0.5433782001841116,
"grad_norm": 0.047222770750522614,
"learning_rate": 0.0009396404564971374,
"loss": -0.8705,
"step": 5460
},
{
"epoch": 0.5443733983529471,
"grad_norm": 0.04804938659071922,
"learning_rate": 0.0009363554414455789,
"loss": -0.8645,
"step": 5470
},
{
"epoch": 0.5453685965217824,
"grad_norm": 0.07348743081092834,
"learning_rate": 0.0009330711158535307,
"loss": -0.88,
"step": 5480
},
{
"epoch": 0.5463637946906178,
"grad_norm": 0.03578435257077217,
"learning_rate": 0.0009297875152999887,
"loss": -0.8764,
"step": 5490
},
{
"epoch": 0.5473589928594531,
"grad_norm": 0.03122727759182453,
"learning_rate": 0.000926504675356095,
"loss": -0.8684,
"step": 5500
},
{
"epoch": 0.5483541910282885,
"grad_norm": 0.04529644921422005,
"learning_rate": 0.000923222631584752,
"loss": -0.8612,
"step": 5510
},
{
"epoch": 0.5493493891971238,
"grad_norm": 0.21876747906208038,
"learning_rate": 0.0009199414195402367,
"loss": -0.8319,
"step": 5520
},
{
"epoch": 0.5503445873659593,
"grad_norm": 0.039174988865852356,
"learning_rate": 0.000916661074767817,
"loss": -0.8831,
"step": 5530
},
{
"epoch": 0.5513397855347946,
"grad_norm": 0.032812558114528656,
"learning_rate": 0.0009133816328033649,
"loss": -0.8915,
"step": 5540
},
{
"epoch": 0.55233498370363,
"grad_norm": 0.04397067800164223,
"learning_rate": 0.0009101031291729726,
"loss": -0.8962,
"step": 5550
},
{
"epoch": 0.5533301818724654,
"grad_norm": 0.09248298406600952,
"learning_rate": 0.000906825599392567,
"loss": -0.8603,
"step": 5560
},
{
"epoch": 0.5543253800413007,
"grad_norm": 0.13714151084423065,
"learning_rate": 0.0009035490789675257,
"loss": -0.8565,
"step": 5570
},
{
"epoch": 0.5553205782101361,
"grad_norm": 0.06470950692892075,
"learning_rate": 0.000900273603392292,
"loss": -0.8365,
"step": 5580
},
{
"epoch": 0.5563157763789714,
"grad_norm": 0.02791593410074711,
"learning_rate": 0.0008969992081499903,
"loss": -0.8642,
"step": 5590
},
{
"epoch": 0.5573109745478069,
"grad_norm": 0.12671250104904175,
"learning_rate": 0.0008937259287120415,
"loss": -0.7883,
"step": 5600
},
{
"epoch": 0.5583061727166422,
"grad_norm": 0.034118831157684326,
"learning_rate": 0.0008904538005377793,
"loss": -0.8028,
"step": 5610
},
{
"epoch": 0.5593013708854776,
"grad_norm": 0.04268977791070938,
"learning_rate": 0.000887182859074066,
"loss": -0.8865,
"step": 5620
},
{
"epoch": 0.5602965690543129,
"grad_norm": 0.05735842511057854,
"learning_rate": 0.0008839131397549074,
"loss": -0.8794,
"step": 5630
},
{
"epoch": 0.5612917672231483,
"grad_norm": 0.03807393088936806,
"learning_rate": 0.0008806446780010716,
"loss": -0.8765,
"step": 5640
},
{
"epoch": 0.5622869653919836,
"grad_norm": 0.05110938474535942,
"learning_rate": 0.0008773775092197017,
"loss": -0.8541,
"step": 5650
},
{
"epoch": 0.5632821635608191,
"grad_norm": 0.05651829019188881,
"learning_rate": 0.0008741116688039349,
"loss": -0.877,
"step": 5660
},
{
"epoch": 0.5642773617296544,
"grad_norm": 0.16070760786533356,
"learning_rate": 0.000870847192132518,
"loss": -0.897,
"step": 5670
},
{
"epoch": 0.5652725598984898,
"grad_norm": 0.17324046790599823,
"learning_rate": 0.0008675841145694246,
"loss": -0.8577,
"step": 5680
},
{
"epoch": 0.5662677580673252,
"grad_norm": 0.0440760962665081,
"learning_rate": 0.0008643224714634723,
"loss": -0.8694,
"step": 5690
},
{
"epoch": 0.5672629562361605,
"grad_norm": 0.03526591882109642,
"learning_rate": 0.0008610622981479383,
"loss": -0.8741,
"step": 5700
},
{
"epoch": 0.5682581544049959,
"grad_norm": 0.04858911782503128,
"learning_rate": 0.0008578036299401784,
"loss": -0.8604,
"step": 5710
},
{
"epoch": 0.5692533525738313,
"grad_norm": 0.042971156537532806,
"learning_rate": 0.0008545465021412428,
"loss": -0.8235,
"step": 5720
},
{
"epoch": 0.5702485507426667,
"grad_norm": 0.03934862092137337,
"learning_rate": 0.0008512909500354946,
"loss": -0.8501,
"step": 5730
},
{
"epoch": 0.571243748911502,
"grad_norm": 0.12695066630840302,
"learning_rate": 0.000848037008890229,
"loss": -0.849,
"step": 5740
},
{
"epoch": 0.5722389470803374,
"grad_norm": 0.06290268152952194,
"learning_rate": 0.0008447847139552878,
"loss": -0.8564,
"step": 5750
},
{
"epoch": 0.5732341452491727,
"grad_norm": 0.06408194452524185,
"learning_rate": 0.0008415341004626802,
"loss": -0.894,
"step": 5760
},
{
"epoch": 0.5742293434180081,
"grad_norm": 0.03536716476082802,
"learning_rate": 0.0008382852036262007,
"loss": -0.8723,
"step": 5770
},
{
"epoch": 0.5752245415868434,
"grad_norm": 0.10467568039894104,
"learning_rate": 0.0008350380586410468,
"loss": -0.8447,
"step": 5780
},
{
"epoch": 0.5762197397556789,
"grad_norm": 0.059401463717222214,
"learning_rate": 0.000831792700683438,
"loss": -0.847,
"step": 5790
},
{
"epoch": 0.5772149379245142,
"grad_norm": 0.028337785974144936,
"learning_rate": 0.0008285491649102361,
"loss": -0.8471,
"step": 5800
},
{
"epoch": 0.5782101360933496,
"grad_norm": 0.0379803366959095,
"learning_rate": 0.0008253074864585625,
"loss": -0.8737,
"step": 5810
},
{
"epoch": 0.579205334262185,
"grad_norm": 0.0755845308303833,
"learning_rate": 0.0008220677004454181,
"loss": -0.8041,
"step": 5820
},
{
"epoch": 0.5802005324310203,
"grad_norm": 0.07279976457357407,
"learning_rate": 0.0008188298419673036,
"loss": -0.8035,
"step": 5830
},
{
"epoch": 0.5811957305998557,
"grad_norm": 0.049286697059869766,
"learning_rate": 0.0008155939460998381,
"loss": -0.8583,
"step": 5840
},
{
"epoch": 0.582190928768691,
"grad_norm": 0.045156385749578476,
"learning_rate": 0.0008123600478973808,
"loss": -0.8901,
"step": 5850
},
{
"epoch": 0.5831861269375265,
"grad_norm": 0.047281067818403244,
"learning_rate": 0.000809128182392649,
"loss": -0.8595,
"step": 5860
},
{
"epoch": 0.5841813251063618,
"grad_norm": 0.08644957095384598,
"learning_rate": 0.0008058983845963412,
"loss": -0.8523,
"step": 5870
},
{
"epoch": 0.5851765232751972,
"grad_norm": 0.05607840046286583,
"learning_rate": 0.0008026706894967554,
"loss": -0.8779,
"step": 5880
},
{
"epoch": 0.5861717214440325,
"grad_norm": 0.15865112841129303,
"learning_rate": 0.0007994451320594113,
"loss": -0.8798,
"step": 5890
},
{
"epoch": 0.5871669196128679,
"grad_norm": 0.05027535930275917,
"learning_rate": 0.0007962217472266723,
"loss": -0.8804,
"step": 5900
},
{
"epoch": 0.5881621177817032,
"grad_norm": 0.08624923229217529,
"learning_rate": 0.0007930005699173649,
"loss": -0.8769,
"step": 5910
},
{
"epoch": 0.5891573159505387,
"grad_norm": 0.0529584065079689,
"learning_rate": 0.0007897816350264023,
"loss": -0.8858,
"step": 5920
},
{
"epoch": 0.590152514119374,
"grad_norm": 0.08956887573003769,
"learning_rate": 0.0007865649774244049,
"loss": -0.8535,
"step": 5930
},
{
"epoch": 0.5911477122882094,
"grad_norm": 0.07602567970752716,
"learning_rate": 0.0007833506319573244,
"loss": -0.8598,
"step": 5940
},
{
"epoch": 0.5921429104570448,
"grad_norm": 0.034962136298418045,
"learning_rate": 0.0007801386334460638,
"loss": -0.8663,
"step": 5950
},
{
"epoch": 0.5931381086258801,
"grad_norm": 0.05293145775794983,
"learning_rate": 0.0007769290166861033,
"loss": -0.8277,
"step": 5960
},
{
"epoch": 0.5941333067947155,
"grad_norm": 0.04431833326816559,
"learning_rate": 0.0007737218164471205,
"loss": -0.8222,
"step": 5970
},
{
"epoch": 0.5951285049635509,
"grad_norm": 0.12767989933490753,
"learning_rate": 0.0007705170674726148,
"loss": -0.8855,
"step": 5980
},
{
"epoch": 0.5961237031323863,
"grad_norm": 0.03889622166752815,
"learning_rate": 0.0007673148044795319,
"loss": -0.8203,
"step": 5990
},
{
"epoch": 0.5971189013012216,
"grad_norm": 0.04737401381134987,
"learning_rate": 0.000764115062157886,
"loss": -0.8795,
"step": 6000
},
{
"epoch": 0.598114099470057,
"grad_norm": 0.03833894431591034,
"learning_rate": 0.0007609178751703861,
"loss": -0.8269,
"step": 6010
},
{
"epoch": 0.5991092976388923,
"grad_norm": 0.14908933639526367,
"learning_rate": 0.0007577232781520585,
"loss": -0.8262,
"step": 6020
},
{
"epoch": 0.6001044958077277,
"grad_norm": 0.05108148232102394,
"learning_rate": 0.0007545313057098726,
"loss": -0.8171,
"step": 6030
},
{
"epoch": 0.601099693976563,
"grad_norm": 0.05908135324716568,
"learning_rate": 0.0007513419924223661,
"loss": -0.8625,
"step": 6040
},
{
"epoch": 0.6020948921453985,
"grad_norm": 0.027432410046458244,
"learning_rate": 0.0007481553728392692,
"loss": -0.8343,
"step": 6050
},
{
"epoch": 0.6030900903142338,
"grad_norm": 0.04073173180222511,
"learning_rate": 0.0007449714814811333,
"loss": -0.8918,
"step": 6060
},
{
"epoch": 0.6040852884830692,
"grad_norm": 0.05039272829890251,
"learning_rate": 0.0007417903528389534,
"loss": -0.8388,
"step": 6070
},
{
"epoch": 0.6050804866519046,
"grad_norm": 0.04605141282081604,
"learning_rate": 0.0007386120213737961,
"loss": -0.8493,
"step": 6080
},
{
"epoch": 0.6060756848207399,
"grad_norm": 0.07342466711997986,
"learning_rate": 0.0007354365215164267,
"loss": -0.8343,
"step": 6090
},
{
"epoch": 0.6070708829895753,
"grad_norm": 0.03411560505628586,
"learning_rate": 0.000732263887666936,
"loss": -0.8551,
"step": 6100
},
{
"epoch": 0.6080660811584107,
"grad_norm": 0.1048220545053482,
"learning_rate": 0.0007290941541943664,
"loss": -0.8344,
"step": 6110
},
{
"epoch": 0.6090612793272461,
"grad_norm": 0.15538884699344635,
"learning_rate": 0.0007259273554363426,
"loss": -0.8448,
"step": 6120
},
{
"epoch": 0.6100564774960814,
"grad_norm": 0.03414257988333702,
"learning_rate": 0.0007227635256986955,
"loss": -0.8815,
"step": 6130
},
{
"epoch": 0.6110516756649168,
"grad_norm": 0.05820440128445625,
"learning_rate": 0.0007196026992550941,
"loss": -0.8684,
"step": 6140
},
{
"epoch": 0.6120468738337521,
"grad_norm": 0.03307825326919556,
"learning_rate": 0.000716444910346672,
"loss": -0.8694,
"step": 6150
},
{
"epoch": 0.6130420720025875,
"grad_norm": 0.050421956926584244,
"learning_rate": 0.0007132901931816571,
"loss": -0.8455,
"step": 6160
},
{
"epoch": 0.6140372701714228,
"grad_norm": 0.11055120080709457,
"learning_rate": 0.0007101385819350025,
"loss": -0.8937,
"step": 6170
},
{
"epoch": 0.6150324683402583,
"grad_norm": 0.041438162326812744,
"learning_rate": 0.0007069901107480138,
"loss": -0.8734,
"step": 6180
},
{
"epoch": 0.6160276665090936,
"grad_norm": 0.03807257115840912,
"learning_rate": 0.000703844813727981,
"loss": -0.8929,
"step": 6190
},
{
"epoch": 0.617022864677929,
"grad_norm": 0.06824404746294022,
"learning_rate": 0.0007007027249478077,
"loss": -0.8985,
"step": 6200
},
{
"epoch": 0.6180180628467644,
"grad_norm": 0.07732049375772476,
"learning_rate": 0.0006975638784456437,
"loss": -0.8537,
"step": 6210
},
{
"epoch": 0.6190132610155997,
"grad_norm": 0.14929652214050293,
"learning_rate": 0.0006944283082245149,
"loss": -0.8419,
"step": 6220
},
{
"epoch": 0.6200084591844351,
"grad_norm": 0.14729063212871552,
"learning_rate": 0.0006912960482519553,
"loss": -0.8594,
"step": 6230
},
{
"epoch": 0.6210036573532705,
"grad_norm": 0.023850033059716225,
"learning_rate": 0.0006881671324596388,
"loss": -0.8411,
"step": 6240
},
{
"epoch": 0.6219988555221059,
"grad_norm": 0.024202531203627586,
"learning_rate": 0.0006850415947430126,
"loss": -0.8503,
"step": 6250
},
{
"epoch": 0.6229940536909412,
"grad_norm": 0.06503470242023468,
"learning_rate": 0.0006819194689609287,
"loss": -0.8522,
"step": 6260
},
{
"epoch": 0.6239892518597766,
"grad_norm": 0.06802047044038773,
"learning_rate": 0.0006788007889352777,
"loss": -0.855,
"step": 6270
},
{
"epoch": 0.6249844500286119,
"grad_norm": 0.052513618022203445,
"learning_rate": 0.0006756855884506234,
"loss": -0.8805,
"step": 6280
},
{
"epoch": 0.6259796481974473,
"grad_norm": 0.03414776176214218,
"learning_rate": 0.0006725739012538345,
"loss": -0.8627,
"step": 6290
},
{
"epoch": 0.6269748463662826,
"grad_norm": 0.0709032341837883,
"learning_rate": 0.0006694657610537211,
"loss": -0.8588,
"step": 6300
},
{
"epoch": 0.6279700445351181,
"grad_norm": 0.0368841178715229,
"learning_rate": 0.0006663612015206687,
"loss": -0.8632,
"step": 6310
},
{
"epoch": 0.6289652427039534,
"grad_norm": 0.09083976596593857,
"learning_rate": 0.0006632602562862733,
"loss": -0.8653,
"step": 6320
},
{
"epoch": 0.6299604408727888,
"grad_norm": 0.037867575883865356,
"learning_rate": 0.000660162958942978,
"loss": -0.891,
"step": 6330
},
{
"epoch": 0.6309556390416242,
"grad_norm": 0.046128176152706146,
"learning_rate": 0.0006570693430437077,
"loss": -0.8694,
"step": 6340
},
{
"epoch": 0.6319508372104595,
"grad_norm": 0.078248530626297,
"learning_rate": 0.0006539794421015066,
"loss": -0.885,
"step": 6350
},
{
"epoch": 0.632946035379295,
"grad_norm": 0.047228600829839706,
"learning_rate": 0.0006508932895891747,
"loss": -0.8547,
"step": 6360
},
{
"epoch": 0.6339412335481303,
"grad_norm": 0.03179970011115074,
"learning_rate": 0.0006478109189389056,
"loss": -0.8712,
"step": 6370
},
{
"epoch": 0.6349364317169657,
"grad_norm": 0.05649185925722122,
"learning_rate": 0.000644732363541924,
"loss": -0.854,
"step": 6380
},
{
"epoch": 0.635931629885801,
"grad_norm": 0.03506692498922348,
"learning_rate": 0.0006416576567481245,
"loss": -0.8737,
"step": 6390
},
{
"epoch": 0.6369268280546364,
"grad_norm": 0.04961368441581726,
"learning_rate": 0.0006385868318657091,
"loss": -0.8685,
"step": 6400
},
{
"epoch": 0.6379220262234717,
"grad_norm": 0.11547062546014786,
"learning_rate": 0.0006355199221608277,
"loss": -0.8553,
"step": 6410
},
{
"epoch": 0.6389172243923071,
"grad_norm": 0.06517086923122406,
"learning_rate": 0.0006324569608572171,
"loss": -0.8541,
"step": 6420
},
{
"epoch": 0.6399124225611424,
"grad_norm": 0.03779355809092522,
"learning_rate": 0.0006293979811358413,
"loss": -0.8782,
"step": 6430
},
{
"epoch": 0.6409076207299779,
"grad_norm": 0.03799843415617943,
"learning_rate": 0.0006263430161345316,
"loss": -0.876,
"step": 6440
},
{
"epoch": 0.6419028188988132,
"grad_norm": 0.09262421727180481,
"learning_rate": 0.0006232920989476285,
"loss": -0.8416,
"step": 6450
},
{
"epoch": 0.6428980170676486,
"grad_norm": 0.046641264110803604,
"learning_rate": 0.0006202452626256223,
"loss": -0.8784,
"step": 6460
},
{
"epoch": 0.643893215236484,
"grad_norm": 0.12603308260440826,
"learning_rate": 0.0006172025401747955,
"loss": -0.8633,
"step": 6470
},
{
"epoch": 0.6448884134053193,
"grad_norm": 0.0811128169298172,
"learning_rate": 0.0006141639645568645,
"loss": -0.858,
"step": 6480
},
{
"epoch": 0.6458836115741547,
"grad_norm": 0.04808896780014038,
"learning_rate": 0.0006111295686886248,
"loss": -0.8712,
"step": 6490
},
{
"epoch": 0.6468788097429901,
"grad_norm": 0.07724402099847794,
"learning_rate": 0.0006080993854415916,
"loss": -0.8559,
"step": 6500
},
{
"epoch": 0.6478740079118255,
"grad_norm": 0.0562870092689991,
"learning_rate": 0.0006050734476416448,
"loss": -0.8769,
"step": 6510
},
{
"epoch": 0.6488692060806608,
"grad_norm": 0.04276958853006363,
"learning_rate": 0.0006020517880686738,
"loss": -0.8588,
"step": 6520
},
{
"epoch": 0.6498644042494962,
"grad_norm": 0.167204350233078,
"learning_rate": 0.0005990344394562226,
"loss": -0.8472,
"step": 6530
},
{
"epoch": 0.6508596024183315,
"grad_norm": 0.04978903755545616,
"learning_rate": 0.0005960214344911334,
"loss": -0.8613,
"step": 6540
},
{
"epoch": 0.6518548005871669,
"grad_norm": 0.04822786897420883,
"learning_rate": 0.0005930128058131957,
"loss": -0.8988,
"step": 6550
},
{
"epoch": 0.6528499987560022,
"grad_norm": 0.08410031348466873,
"learning_rate": 0.000590008586014789,
"loss": -0.8125,
"step": 6560
},
{
"epoch": 0.6538451969248377,
"grad_norm": 0.10245650261640549,
"learning_rate": 0.000587008807640533,
"loss": -0.8562,
"step": 6570
},
{
"epoch": 0.654840395093673,
"grad_norm": 0.052541933953762054,
"learning_rate": 0.0005840135031869322,
"loss": -0.8309,
"step": 6580
},
{
"epoch": 0.6558355932625084,
"grad_norm": 0.09064075350761414,
"learning_rate": 0.0005810227051020261,
"loss": -0.8508,
"step": 6590
},
{
"epoch": 0.6568307914313438,
"grad_norm": 0.023997120559215546,
"learning_rate": 0.0005780364457850369,
"loss": -0.8659,
"step": 6600
},
{
"epoch": 0.6578259896001791,
"grad_norm": 0.049732670187950134,
"learning_rate": 0.0005750547575860184,
"loss": -0.8599,
"step": 6610
},
{
"epoch": 0.6588211877690145,
"grad_norm": 0.04103129357099533,
"learning_rate": 0.000572077672805505,
"loss": -0.8497,
"step": 6620
},
{
"epoch": 0.6598163859378499,
"grad_norm": 0.026592710986733437,
"learning_rate": 0.0005691052236941639,
"loss": -0.8606,
"step": 6630
},
{
"epoch": 0.6608115841066853,
"grad_norm": 0.06547830998897552,
"learning_rate": 0.0005661374424524415,
"loss": -0.9132,
"step": 6640
},
{
"epoch": 0.6618067822755206,
"grad_norm": 0.03540598228573799,
"learning_rate": 0.000563174361230221,
"loss": -0.876,
"step": 6650
},
{
"epoch": 0.662801980444356,
"grad_norm": 0.037611719220876694,
"learning_rate": 0.0005602160121264677,
"loss": -0.8552,
"step": 6660
},
{
"epoch": 0.6637971786131913,
"grad_norm": 0.05235538259148598,
"learning_rate": 0.0005572624271888844,
"loss": -0.8538,
"step": 6670
},
{
"epoch": 0.6647923767820267,
"grad_norm": 0.03196645900607109,
"learning_rate": 0.0005543136384135649,
"loss": -0.886,
"step": 6680
},
{
"epoch": 0.665787574950862,
"grad_norm": 0.036912400275468826,
"learning_rate": 0.000551369677744645,
"loss": -0.8372,
"step": 6690
},
{
"epoch": 0.6667827731196975,
"grad_norm": 0.04574640095233917,
"learning_rate": 0.0005484305770739589,
"loss": -0.8618,
"step": 6700
},
{
"epoch": 0.6677779712885328,
"grad_norm": 0.09847969561815262,
"learning_rate": 0.0005454963682406921,
"loss": -0.8348,
"step": 6710
},
{
"epoch": 0.6687731694573682,
"grad_norm": 0.05522662028670311,
"learning_rate": 0.0005425670830310372,
"loss": -0.8791,
"step": 6720
},
{
"epoch": 0.6697683676262036,
"grad_norm": 0.03674080967903137,
"learning_rate": 0.0005396427531778492,
"loss": -0.8899,
"step": 6730
},
{
"epoch": 0.6707635657950389,
"grad_norm": 0.06172318384051323,
"learning_rate": 0.0005367234103603009,
"loss": -0.9089,
"step": 6740
},
{
"epoch": 0.6717587639638744,
"grad_norm": 0.034352660179138184,
"learning_rate": 0.0005338090862035426,
"loss": -0.86,
"step": 6750
},
{
"epoch": 0.6727539621327097,
"grad_norm": 0.05538021773099899,
"learning_rate": 0.0005308998122783561,
"loss": -0.8895,
"step": 6760
},
{
"epoch": 0.6737491603015451,
"grad_norm": 0.04365914314985275,
"learning_rate": 0.0005279956201008154,
"loss": -0.8885,
"step": 6770
},
{
"epoch": 0.6747443584703804,
"grad_norm": 0.04602828249335289,
"learning_rate": 0.0005250965411319427,
"loss": -0.8615,
"step": 6780
},
{
"epoch": 0.6757395566392158,
"grad_norm": 0.060164544731378555,
"learning_rate": 0.0005222026067773705,
"loss": -0.8694,
"step": 6790
},
{
"epoch": 0.6767347548080511,
"grad_norm": 0.09196409583091736,
"learning_rate": 0.0005193138483869979,
"loss": -0.8795,
"step": 6800
},
{
"epoch": 0.6777299529768865,
"grad_norm": 0.050159208476543427,
"learning_rate": 0.0005164302972546548,
"loss": -0.8552,
"step": 6810
},
{
"epoch": 0.6787251511457219,
"grad_norm": 0.04204307124018669,
"learning_rate": 0.0005135519846177609,
"loss": -0.8477,
"step": 6820
},
{
"epoch": 0.6797203493145573,
"grad_norm": 0.05275953561067581,
"learning_rate": 0.0005106789416569857,
"loss": -0.8263,
"step": 6830
},
{
"epoch": 0.6807155474833926,
"grad_norm": 0.04620354622602463,
"learning_rate": 0.0005078111994959145,
"loss": -0.8495,
"step": 6840
},
{
"epoch": 0.681710745652228,
"grad_norm": 0.043242305517196655,
"learning_rate": 0.0005049487892007078,
"loss": -0.8876,
"step": 6850
},
{
"epoch": 0.6827059438210634,
"grad_norm": 0.06443754583597183,
"learning_rate": 0.0005020917417797668,
"loss": -0.8523,
"step": 6860
},
{
"epoch": 0.6837011419898987,
"grad_norm": 0.054831694811582565,
"learning_rate": 0.0004992400881833973,
"loss": -0.8757,
"step": 6870
},
{
"epoch": 0.6846963401587342,
"grad_norm": 0.043946340680122375,
"learning_rate": 0.0004963938593034726,
"loss": -0.8559,
"step": 6880
},
{
"epoch": 0.6856915383275695,
"grad_norm": 0.04879409447312355,
"learning_rate": 0.0004935530859731018,
"loss": -0.8686,
"step": 6890
},
{
"epoch": 0.6866867364964049,
"grad_norm": 0.04872363060712814,
"learning_rate": 0.0004907177989662926,
"loss": -0.8543,
"step": 6900
},
{
"epoch": 0.6876819346652402,
"grad_norm": 0.06434311717748642,
"learning_rate": 0.00048788802899762094,
"loss": -0.8264,
"step": 6910
},
{
"epoch": 0.6886771328340756,
"grad_norm": 0.03144066780805588,
"learning_rate": 0.00048506380672189663,
"loss": -0.8666,
"step": 6920
},
{
"epoch": 0.6896723310029109,
"grad_norm": 0.050940971821546555,
"learning_rate": 0.0004822451627338302,
"loss": -0.8725,
"step": 6930
},
{
"epoch": 0.6906675291717463,
"grad_norm": 0.037656985223293304,
"learning_rate": 0.00047943212756770473,
"loss": -0.8629,
"step": 6940
},
{
"epoch": 0.6916627273405817,
"grad_norm": 0.052575841546058655,
"learning_rate": 0.0004766247316970411,
"loss": -0.8356,
"step": 6950
},
{
"epoch": 0.6926579255094171,
"grad_norm": 0.0732717514038086,
"learning_rate": 0.0004738230055342714,
"loss": -0.8525,
"step": 6960
},
{
"epoch": 0.6936531236782524,
"grad_norm": 0.06910708546638489,
"learning_rate": 0.00047102697943040775,
"loss": -0.9031,
"step": 6970
},
{
"epoch": 0.6946483218470878,
"grad_norm": 0.04597754031419754,
"learning_rate": 0.0004682366836747126,
"loss": -0.8482,
"step": 6980
},
{
"epoch": 0.6956435200159232,
"grad_norm": 0.08330798149108887,
"learning_rate": 0.00046545214849437347,
"loss": -0.8632,
"step": 6990
},
{
"epoch": 0.6966387181847585,
"grad_norm": 0.03966710716485977,
"learning_rate": 0.00046267340405417167,
"loss": -0.8473,
"step": 7000
},
{
"epoch": 0.697633916353594,
"grad_norm": 0.06126915290951729,
"learning_rate": 0.00045990048045615973,
"loss": -0.9005,
"step": 7010
},
{
"epoch": 0.6986291145224293,
"grad_norm": 0.0399840883910656,
"learning_rate": 0.0004571334077393313,
"loss": -0.8857,
"step": 7020
},
{
"epoch": 0.6996243126912647,
"grad_norm": 0.047068994492292404,
"learning_rate": 0.000454372215879299,
"loss": -0.8877,
"step": 7030
},
{
"epoch": 0.7006195108601,
"grad_norm": 0.05347800627350807,
"learning_rate": 0.00045161693478796796,
"loss": -0.8843,
"step": 7040
},
{
"epoch": 0.7016147090289354,
"grad_norm": 0.0830862745642662,
"learning_rate": 0.0004488675943132113,
"loss": -0.8706,
"step": 7050
},
{
"epoch": 0.7026099071977707,
"grad_norm": 0.04167691618204117,
"learning_rate": 0.00044612422423854917,
"loss": -0.8649,
"step": 7060
},
{
"epoch": 0.7036051053666061,
"grad_norm": 0.05179230123758316,
"learning_rate": 0.0004433868542828224,
"loss": -0.8429,
"step": 7070
},
{
"epoch": 0.7046003035354415,
"grad_norm": 0.0687342956662178,
"learning_rate": 0.0004406555140998756,
"loss": -0.8253,
"step": 7080
},
{
"epoch": 0.7055955017042769,
"grad_norm": 0.04089014232158661,
"learning_rate": 0.00043793023327823067,
"loss": -0.8639,
"step": 7090
},
{
"epoch": 0.7065906998731122,
"grad_norm": 0.05847838521003723,
"learning_rate": 0.00043521104134076904,
"loss": -0.8564,
"step": 7100
},
{
"epoch": 0.7075858980419476,
"grad_norm": 0.078822560608387,
"learning_rate": 0.00043249796774441255,
"loss": -0.8841,
"step": 7110
},
{
"epoch": 0.708581096210783,
"grad_norm": 0.037112757563591,
"learning_rate": 0.00042979104187980144,
"loss": -0.9033,
"step": 7120
},
{
"epoch": 0.7095762943796183,
"grad_norm": 0.03796043619513512,
"learning_rate": 0.00042709029307098033,
"loss": -0.8329,
"step": 7130
},
{
"epoch": 0.7105714925484538,
"grad_norm": 0.17252549529075623,
"learning_rate": 0.0004243957505750754,
"loss": -0.889,
"step": 7140
},
{
"epoch": 0.7115666907172891,
"grad_norm": 0.06463641673326492,
"learning_rate": 0.00042170744358198186,
"loss": -0.9217,
"step": 7150
},
{
"epoch": 0.7125618888861245,
"grad_norm": 0.0993466004729271,
"learning_rate": 0.00041902540121404474,
"loss": -0.8848,
"step": 7160
},
{
"epoch": 0.7135570870549598,
"grad_norm": 0.09770558774471283,
"learning_rate": 0.00041634965252574486,
"loss": -0.8752,
"step": 7170
},
{
"epoch": 0.7145522852237952,
"grad_norm": 0.03831467777490616,
"learning_rate": 0.00041368022650338423,
"loss": -0.9107,
"step": 7180
},
{
"epoch": 0.7155474833926305,
"grad_norm": 0.03482762351632118,
"learning_rate": 0.0004110171520647713,
"loss": -0.8684,
"step": 7190
},
{
"epoch": 0.7165426815614659,
"grad_norm": 0.07047673314809799,
"learning_rate": 0.00040836045805890854,
"loss": -0.8774,
"step": 7200
},
{
"epoch": 0.7175378797303013,
"grad_norm": 0.0411464087665081,
"learning_rate": 0.00040571017326567816,
"loss": -0.8507,
"step": 7210
},
{
"epoch": 0.7185330778991367,
"grad_norm": 0.04212634265422821,
"learning_rate": 0.0004030663263955332,
"loss": -0.8251,
"step": 7220
},
{
"epoch": 0.719528276067972,
"grad_norm": 0.10478588193655014,
"learning_rate": 0.000400428946089183,
"loss": -0.8466,
"step": 7230
},
{
"epoch": 0.7205234742368074,
"grad_norm": 0.10188216716051102,
"learning_rate": 0.00039779806091728656,
"loss": -0.8648,
"step": 7240
},
{
"epoch": 0.7215186724056428,
"grad_norm": 0.03693991154432297,
"learning_rate": 0.00039517369938014057,
"loss": -0.8833,
"step": 7250
},
{
"epoch": 0.7225138705744781,
"grad_norm": 0.0405866913497448,
"learning_rate": 0.000392555889907371,
"loss": -0.8494,
"step": 7260
},
{
"epoch": 0.7235090687433136,
"grad_norm": 0.03353391960263252,
"learning_rate": 0.00038994466085762636,
"loss": -0.8369,
"step": 7270
},
{
"epoch": 0.7245042669121489,
"grad_norm": 0.03976055979728699,
"learning_rate": 0.00038734004051826866,
"loss": -0.8975,
"step": 7280
},
{
"epoch": 0.7254994650809843,
"grad_norm": 0.05324774980545044,
"learning_rate": 0.0003847420571050687,
"loss": -0.8547,
"step": 7290
},
{
"epoch": 0.7264946632498196,
"grad_norm": 0.07501663267612457,
"learning_rate": 0.0003821507387618999,
"loss": -0.8501,
"step": 7300
},
{
"epoch": 0.727489861418655,
"grad_norm": 0.04217078164219856,
"learning_rate": 0.0003795661135604319,
"loss": -0.8804,
"step": 7310
},
{
"epoch": 0.7284850595874903,
"grad_norm": 0.0331064835190773,
"learning_rate": 0.00037698820949982946,
"loss": -0.8464,
"step": 7320
},
{
"epoch": 0.7294802577563257,
"grad_norm": 0.030969824641942978,
"learning_rate": 0.0003744170545064458,
"loss": -0.8597,
"step": 7330
},
{
"epoch": 0.730475455925161,
"grad_norm": 0.0441804938018322,
"learning_rate": 0.00037185267643352274,
"loss": -0.8352,
"step": 7340
},
{
"epoch": 0.7314706540939965,
"grad_norm": 0.032954998314380646,
"learning_rate": 0.00036929510306088796,
"loss": -0.8148,
"step": 7350
},
{
"epoch": 0.7324658522628318,
"grad_norm": 0.04472510516643524,
"learning_rate": 0.0003667443620946531,
"loss": -0.8487,
"step": 7360
},
{
"epoch": 0.7334610504316672,
"grad_norm": 0.03529642894864082,
"learning_rate": 0.00036420048116691584,
"loss": -0.8733,
"step": 7370
},
{
"epoch": 0.7344562486005026,
"grad_norm": 0.06249464303255081,
"learning_rate": 0.0003616634878354581,
"loss": -0.8785,
"step": 7380
},
{
"epoch": 0.7354514467693379,
"grad_norm": 0.03591424226760864,
"learning_rate": 0.00035913340958344933,
"loss": -0.8787,
"step": 7390
},
{
"epoch": 0.7364466449381734,
"grad_norm": 0.03739127516746521,
"learning_rate": 0.00035661027381914833,
"loss": -0.8771,
"step": 7400
},
{
"epoch": 0.7374418431070087,
"grad_norm": 0.040590737015008926,
"learning_rate": 0.00035409410787560537,
"loss": -0.8405,
"step": 7410
},
{
"epoch": 0.7384370412758441,
"grad_norm": 0.040956106036901474,
"learning_rate": 0.00035158493901036783,
"loss": -0.8661,
"step": 7420
},
{
"epoch": 0.7394322394446794,
"grad_norm": 0.06951002776622772,
"learning_rate": 0.00034908279440518277,
"loss": -0.8241,
"step": 7430
},
{
"epoch": 0.7404274376135148,
"grad_norm": 0.04111276939511299,
"learning_rate": 0.0003465877011657048,
"loss": -0.8855,
"step": 7440
},
{
"epoch": 0.7414226357823501,
"grad_norm": 0.06843695789575577,
"learning_rate": 0.00034409968632120126,
"loss": -0.8462,
"step": 7450
},
{
"epoch": 0.7424178339511855,
"grad_norm": 0.04960676655173302,
"learning_rate": 0.00034161877682425826,
"loss": -0.8506,
"step": 7460
},
{
"epoch": 0.7434130321200209,
"grad_norm": 0.06440480053424835,
"learning_rate": 0.00033914499955049125,
"loss": -0.8441,
"step": 7470
},
{
"epoch": 0.7444082302888563,
"grad_norm": 0.043779365718364716,
"learning_rate": 0.0003366783812982516,
"loss": -0.8786,
"step": 7480
},
{
"epoch": 0.7454034284576916,
"grad_norm": 0.042465850710868835,
"learning_rate": 0.00033421894878833805,
"loss": -0.8727,
"step": 7490
},
{
"epoch": 0.746398626626527,
"grad_norm": 0.042998846620321274,
"learning_rate": 0.00033176672866370505,
"loss": -0.8465,
"step": 7500
},
{
"epoch": 0.7473938247953624,
"grad_norm": 0.03316570073366165,
"learning_rate": 0.00032932174748917775,
"loss": -0.8432,
"step": 7510
},
{
"epoch": 0.7483890229641977,
"grad_norm": 0.05994047224521637,
"learning_rate": 0.00032688403175116,
"loss": -0.8284,
"step": 7520
},
{
"epoch": 0.7493842211330332,
"grad_norm": 0.049616336822509766,
"learning_rate": 0.0003244536078573497,
"loss": -0.8264,
"step": 7530
},
{
"epoch": 0.7503794193018685,
"grad_norm": 0.11972280591726303,
"learning_rate": 0.00032203050213645357,
"loss": -0.8796,
"step": 7540
},
{
"epoch": 0.7513746174707039,
"grad_norm": 0.0751202329993248,
"learning_rate": 0.00031961474083789886,
"loss": -0.8651,
"step": 7550
},
{
"epoch": 0.7523698156395392,
"grad_norm": 0.15553037822246552,
"learning_rate": 0.0003172063501315534,
"loss": -0.869,
"step": 7560
},
{
"epoch": 0.7533650138083746,
"grad_norm": 0.09399143606424332,
"learning_rate": 0.00031480535610743757,
"loss": -0.8175,
"step": 7570
},
{
"epoch": 0.7543602119772099,
"grad_norm": 0.045804236084222794,
"learning_rate": 0.00031241178477544473,
"loss": -0.8935,
"step": 7580
},
{
"epoch": 0.7553554101460453,
"grad_norm": 0.04258604347705841,
"learning_rate": 0.0003100256620650581,
"loss": -0.8238,
"step": 7590
},
{
"epoch": 0.7563506083148807,
"grad_norm": 0.13974538445472717,
"learning_rate": 0.00030764701382506965,
"loss": -0.843,
"step": 7600
},
{
"epoch": 0.7573458064837161,
"grad_norm": 0.05728553980588913,
"learning_rate": 0.00030527586582330247,
"loss": -0.8302,
"step": 7610
},
{
"epoch": 0.7583410046525514,
"grad_norm": 0.05504688248038292,
"learning_rate": 0.00030291224374632766,
"loss": -0.8729,
"step": 7620
},
{
"epoch": 0.7593362028213868,
"grad_norm": 0.05460618436336517,
"learning_rate": 0.0003005561731991898,
"loss": -0.8746,
"step": 7630
},
{
"epoch": 0.7603314009902222,
"grad_norm": 0.03898203745484352,
"learning_rate": 0.00029820767970512686,
"loss": -0.8818,
"step": 7640
},
{
"epoch": 0.7613265991590575,
"grad_norm": 0.043011441826820374,
"learning_rate": 0.00029586678870529583,
"loss": -0.8876,
"step": 7650
},
{
"epoch": 0.762321797327893,
"grad_norm": 0.060498643666505814,
"learning_rate": 0.000293533525558495,
"loss": -0.8379,
"step": 7660
},
{
"epoch": 0.7633169954967283,
"grad_norm": 0.044412821531295776,
"learning_rate": 0.00029120791554089134,
"loss": -0.8474,
"step": 7670
},
{
"epoch": 0.7643121936655637,
"grad_norm": 0.028071725741028786,
"learning_rate": 0.0002888899838457455,
"loss": -0.8469,
"step": 7680
},
{
"epoch": 0.765307391834399,
"grad_norm": 0.10462887585163116,
"learning_rate": 0.00028657975558313867,
"loss": -0.8564,
"step": 7690
},
{
"epoch": 0.7663025900032344,
"grad_norm": 0.041337281465530396,
"learning_rate": 0.00028427725577970155,
"loss": -0.8903,
"step": 7700
},
{
"epoch": 0.7672977881720697,
"grad_norm": 0.13814806938171387,
"learning_rate": 0.00028198250937834204,
"loss": -0.8536,
"step": 7710
},
{
"epoch": 0.7682929863409051,
"grad_norm": 0.07473795861005783,
"learning_rate": 0.00027969554123797615,
"loss": -0.8976,
"step": 7720
},
{
"epoch": 0.7692881845097405,
"grad_norm": 0.25231143832206726,
"learning_rate": 0.00027741637613325866,
"loss": -0.8385,
"step": 7730
},
{
"epoch": 0.7702833826785759,
"grad_norm": 0.08813674002885818,
"learning_rate": 0.0002751450387543131,
"loss": -0.8863,
"step": 7740
},
{
"epoch": 0.7712785808474112,
"grad_norm": 0.1306094378232956,
"learning_rate": 0.00027288155370646663,
"loss": -0.8489,
"step": 7750
},
{
"epoch": 0.7722737790162466,
"grad_norm": 0.04535212367773056,
"learning_rate": 0.00027062594550998154,
"loss": -0.8451,
"step": 7760
},
{
"epoch": 0.773268977185082,
"grad_norm": 0.043633297085762024,
"learning_rate": 0.0002683782385997909,
"loss": -0.8781,
"step": 7770
},
{
"epoch": 0.7742641753539173,
"grad_norm": 0.08653497695922852,
"learning_rate": 0.0002661384573252338,
"loss": -0.874,
"step": 7780
},
{
"epoch": 0.7752593735227528,
"grad_norm": 0.0407242476940155,
"learning_rate": 0.0002639066259497899,
"loss": -0.8756,
"step": 7790
},
{
"epoch": 0.7762545716915881,
"grad_norm": 0.03243414685130119,
"learning_rate": 0.0002616827686508192,
"loss": -0.8929,
"step": 7800
},
{
"epoch": 0.7772497698604235,
"grad_norm": 0.08098597824573517,
"learning_rate": 0.00025946690951929763,
"loss": -0.8485,
"step": 7810
},
{
"epoch": 0.7782449680292588,
"grad_norm": 0.0585702583193779,
"learning_rate": 0.00025725907255955805,
"loss": -0.8777,
"step": 7820
},
{
"epoch": 0.7792401661980942,
"grad_norm": 0.04518997669219971,
"learning_rate": 0.0002550592816890295,
"loss": -0.8682,
"step": 7830
},
{
"epoch": 0.7802353643669295,
"grad_norm": 0.04417124390602112,
"learning_rate": 0.0002528675607379769,
"loss": -0.8164,
"step": 7840
},
{
"epoch": 0.781230562535765,
"grad_norm": 0.11929408460855484,
"learning_rate": 0.00025068393344924533,
"loss": -0.8469,
"step": 7850
},
{
"epoch": 0.7822257607046003,
"grad_norm": 0.04880857467651367,
"learning_rate": 0.00024850842347800016,
"loss": -0.8689,
"step": 7860
},
{
"epoch": 0.7832209588734357,
"grad_norm": 0.04271296039223671,
"learning_rate": 0.0002463410543914734,
"loss": -0.8697,
"step": 7870
},
{
"epoch": 0.784216157042271,
"grad_norm": 0.2216760665178299,
"learning_rate": 0.0002441818496687064,
"loss": -0.8428,
"step": 7880
},
{
"epoch": 0.7852113552111064,
"grad_norm": 0.060294851660728455,
"learning_rate": 0.0002420308327002958,
"loss": -0.8743,
"step": 7890
},
{
"epoch": 0.7862065533799418,
"grad_norm": 0.02434210106730461,
"learning_rate": 0.0002398880267881419,
"loss": -0.8757,
"step": 7900
},
{
"epoch": 0.7872017515487771,
"grad_norm": 0.017280230298638344,
"learning_rate": 0.0002377534551451932,
"loss": -0.8565,
"step": 7910
},
{
"epoch": 0.7881969497176126,
"grad_norm": 0.056493718177080154,
"learning_rate": 0.0002356271408951982,
"loss": -0.8494,
"step": 7920
},
{
"epoch": 0.7891921478864479,
"grad_norm": 0.0454886369407177,
"learning_rate": 0.00023350910707245175,
"loss": -0.8915,
"step": 7930
},
{
"epoch": 0.7901873460552833,
"grad_norm": 0.07155793905258179,
"learning_rate": 0.00023139937662154897,
"loss": -0.8625,
"step": 7940
},
{
"epoch": 0.7911825442241186,
"grad_norm": 0.03907801955938339,
"learning_rate": 0.00022929797239713324,
"loss": -0.8586,
"step": 7950
},
{
"epoch": 0.792177742392954,
"grad_norm": 0.03151841461658478,
"learning_rate": 0.00022720491716365056,
"loss": -0.8832,
"step": 7960
},
{
"epoch": 0.7931729405617893,
"grad_norm": 0.06963200867176056,
"learning_rate": 0.00022512023359510302,
"loss": -0.8816,
"step": 7970
},
{
"epoch": 0.7941681387306248,
"grad_norm": 0.04865657910704613,
"learning_rate": 0.0002230439442748019,
"loss": -0.8955,
"step": 7980
},
{
"epoch": 0.7951633368994601,
"grad_norm": 0.05831298232078552,
"learning_rate": 0.00022097607169512535,
"loss": -0.8827,
"step": 7990
},
{
"epoch": 0.7961585350682955,
"grad_norm": 0.049753572791814804,
"learning_rate": 0.0002189166382572715,
"loss": -0.8189,
"step": 8000
},
{
"epoch": 0.7971537332371308,
"grad_norm": 0.09022746980190277,
"learning_rate": 0.00021686566627101888,
"loss": -0.8772,
"step": 8010
},
{
"epoch": 0.7981489314059662,
"grad_norm": 0.06951210647821426,
"learning_rate": 0.00021482317795448248,
"loss": -0.8906,
"step": 8020
},
{
"epoch": 0.7991441295748016,
"grad_norm": 0.044376324862241745,
"learning_rate": 0.00021278919543387366,
"loss": -0.8528,
"step": 8030
},
{
"epoch": 0.8001393277436369,
"grad_norm": 0.04204118996858597,
"learning_rate": 0.00021076374074326253,
"loss": -0.8485,
"step": 8040
},
{
"epoch": 0.8011345259124724,
"grad_norm": 0.04041268303990364,
"learning_rate": 0.00020874683582433563,
"loss": -0.898,
"step": 8050
},
{
"epoch": 0.8021297240813077,
"grad_norm": 0.04211512207984924,
"learning_rate": 0.0002067385025261611,
"loss": -0.8741,
"step": 8060
},
{
"epoch": 0.8031249222501431,
"grad_norm": 0.14570793509483337,
"learning_rate": 0.0002047387626049504,
"loss": -0.8216,
"step": 8070
},
{
"epoch": 0.8041201204189784,
"grad_norm": 0.04487913101911545,
"learning_rate": 0.00020274763772382386,
"loss": -0.8405,
"step": 8080
},
{
"epoch": 0.8051153185878138,
"grad_norm": 0.03393075242638588,
"learning_rate": 0.00020076514945257441,
"loss": -0.8828,
"step": 8090
},
{
"epoch": 0.8061105167566491,
"grad_norm": 0.05002270266413689,
"learning_rate": 0.00019879131926743576,
"loss": -0.885,
"step": 8100
},
{
"epoch": 0.8071057149254846,
"grad_norm": 0.03206557407975197,
"learning_rate": 0.00019682616855084878,
"loss": -0.8404,
"step": 8110
},
{
"epoch": 0.8081009130943199,
"grad_norm": 0.08117040991783142,
"learning_rate": 0.00019486971859122916,
"loss": -0.8432,
"step": 8120
},
{
"epoch": 0.8090961112631553,
"grad_norm": 0.032579779624938965,
"learning_rate": 0.0001929219905827384,
"loss": -0.875,
"step": 8130
},
{
"epoch": 0.8100913094319906,
"grad_norm": 0.03493876755237579,
"learning_rate": 0.00019098300562505265,
"loss": -0.8912,
"step": 8140
},
{
"epoch": 0.811086507600826,
"grad_norm": 0.061572328209877014,
"learning_rate": 0.00018905278472313548,
"loss": -0.8395,
"step": 8150
},
{
"epoch": 0.8120817057696614,
"grad_norm": 0.040702104568481445,
"learning_rate": 0.00018713134878700977,
"loss": -0.8944,
"step": 8160
},
{
"epoch": 0.8130769039384967,
"grad_norm": 0.03129720687866211,
"learning_rate": 0.00018521871863153017,
"loss": -0.8673,
"step": 8170
},
{
"epoch": 0.8140721021073322,
"grad_norm": 0.03429539501667023,
"learning_rate": 0.00018331491497616004,
"loss": -0.8957,
"step": 8180
},
{
"epoch": 0.8150673002761675,
"grad_norm": 0.04819618538022041,
"learning_rate": 0.00018141995844474414,
"loss": -0.8567,
"step": 8190
},
{
"epoch": 0.8160624984450029,
"grad_norm": 0.07731886953115463,
"learning_rate": 0.0001795338695652874,
"loss": -0.8782,
"step": 8200
},
{
"epoch": 0.8170576966138382,
"grad_norm": 0.11185674369335175,
"learning_rate": 0.00017765666876973197,
"loss": -0.8439,
"step": 8210
},
{
"epoch": 0.8180528947826736,
"grad_norm": 0.04613180086016655,
"learning_rate": 0.0001757883763937348,
"loss": -0.8608,
"step": 8220
},
{
"epoch": 0.8190480929515089,
"grad_norm": 0.07438188791275024,
"learning_rate": 0.0001739290126764491,
"loss": -0.862,
"step": 8230
},
{
"epoch": 0.8200432911203444,
"grad_norm": 0.03218982741236687,
"learning_rate": 0.00017207859776030332,
"loss": -0.8423,
"step": 8240
},
{
"epoch": 0.8210384892891797,
"grad_norm": 0.043732915073633194,
"learning_rate": 0.00017023715169078458,
"loss": -0.8335,
"step": 8250
},
{
"epoch": 0.8220336874580151,
"grad_norm": 0.10098463296890259,
"learning_rate": 0.00016840469441622085,
"loss": -0.8614,
"step": 8260
},
{
"epoch": 0.8230288856268504,
"grad_norm": 0.04664154723286629,
"learning_rate": 0.00016658124578756373,
"loss": -0.856,
"step": 8270
},
{
"epoch": 0.8240240837956858,
"grad_norm": 0.03871015086770058,
"learning_rate": 0.00016476682555817567,
"loss": -0.8304,
"step": 8280
},
{
"epoch": 0.8250192819645212,
"grad_norm": 0.04428160935640335,
"learning_rate": 0.0001629614533836138,
"loss": -0.8413,
"step": 8290
},
{
"epoch": 0.8260144801333565,
"grad_norm": 0.04464396834373474,
"learning_rate": 0.00016116514882141852,
"loss": -0.874,
"step": 8300
},
{
"epoch": 0.827009678302192,
"grad_norm": 0.04489503055810928,
"learning_rate": 0.00015937793133090117,
"loss": -0.8462,
"step": 8310
},
{
"epoch": 0.8280048764710273,
"grad_norm": 0.055837105959653854,
"learning_rate": 0.00015759982027293242,
"loss": -0.8428,
"step": 8320
},
{
"epoch": 0.8290000746398627,
"grad_norm": 0.031371161341667175,
"learning_rate": 0.00015583083490973404,
"loss": -0.8567,
"step": 8330
},
{
"epoch": 0.829995272808698,
"grad_norm": 0.04616473242640495,
"learning_rate": 0.00015407099440466876,
"loss": -0.8605,
"step": 8340
},
{
"epoch": 0.8309904709775334,
"grad_norm": 0.059836626052856445,
"learning_rate": 0.0001523203178220338,
"loss": -0.8875,
"step": 8350
},
{
"epoch": 0.8319856691463687,
"grad_norm": 0.037584539502859116,
"learning_rate": 0.00015057882412685387,
"loss": -0.861,
"step": 8360
},
{
"epoch": 0.8329808673152042,
"grad_norm": 0.0409194752573967,
"learning_rate": 0.0001488465321846757,
"loss": -0.8785,
"step": 8370
},
{
"epoch": 0.8339760654840395,
"grad_norm": 0.06696359813213348,
"learning_rate": 0.00014712346076136361,
"loss": -0.8812,
"step": 8380
},
{
"epoch": 0.8349712636528749,
"grad_norm": 0.07238131761550903,
"learning_rate": 0.00014540962852289607,
"loss": -0.8724,
"step": 8390
},
{
"epoch": 0.8359664618217102,
"grad_norm": 0.040291257202625275,
"learning_rate": 0.00014370505403516444,
"loss": -0.8852,
"step": 8400
},
{
"epoch": 0.8369616599905456,
"grad_norm": 0.04370676726102829,
"learning_rate": 0.00014200975576377019,
"loss": -0.8811,
"step": 8410
},
{
"epoch": 0.837956858159381,
"grad_norm": 0.05080821365118027,
"learning_rate": 0.0001403237520738273,
"loss": -0.8789,
"step": 8420
},
{
"epoch": 0.8389520563282163,
"grad_norm": 0.02113007754087448,
"learning_rate": 0.00013864706122976024,
"loss": -0.8439,
"step": 8430
},
{
"epoch": 0.8399472544970518,
"grad_norm": 0.0272366963326931,
"learning_rate": 0.00013697970139510895,
"loss": -0.8755,
"step": 8440
},
{
"epoch": 0.8409424526658871,
"grad_norm": 0.0732220858335495,
"learning_rate": 0.00013532169063233,
"loss": -0.8723,
"step": 8450
},
{
"epoch": 0.8419376508347225,
"grad_norm": 0.03853273764252663,
"learning_rate": 0.00013367304690260163,
"loss": -0.8629,
"step": 8460
},
{
"epoch": 0.8429328490035578,
"grad_norm": 0.06368320435285568,
"learning_rate": 0.0001320337880656307,
"loss": -0.8632,
"step": 8470
},
{
"epoch": 0.8439280471723932,
"grad_norm": 0.052474457770586014,
"learning_rate": 0.00013040393187945621,
"loss": -0.8442,
"step": 8480
},
{
"epoch": 0.8449232453412285,
"grad_norm": 0.06055787578225136,
"learning_rate": 0.00012878349600025952,
"loss": -0.8541,
"step": 8490
},
{
"epoch": 0.845918443510064,
"grad_norm": 0.040329884737730026,
"learning_rate": 0.00012717249798217134,
"loss": -0.8903,
"step": 8500
},
{
"epoch": 0.8469136416788993,
"grad_norm": 0.04345664381980896,
"learning_rate": 0.00012557095527708306,
"loss": -0.8855,
"step": 8510
},
{
"epoch": 0.8479088398477347,
"grad_norm": 0.035441722720861435,
"learning_rate": 0.00012397888523445688,
"loss": -0.8772,
"step": 8520
},
{
"epoch": 0.84890403801657,
"grad_norm": 0.04323457553982735,
"learning_rate": 0.00012239630510113732,
"loss": -0.8932,
"step": 8530
},
{
"epoch": 0.8498992361854054,
"grad_norm": 0.06097767502069473,
"learning_rate": 0.00012082323202116563,
"loss": -0.8563,
"step": 8540
},
{
"epoch": 0.8508944343542408,
"grad_norm": 0.07687395066022873,
"learning_rate": 0.0001192596830355931,
"loss": -0.85,
"step": 8550
},
{
"epoch": 0.8518896325230761,
"grad_norm": 0.04906463623046875,
"learning_rate": 0.00011770567508229712,
"loss": -0.8315,
"step": 8560
},
{
"epoch": 0.8528848306919116,
"grad_norm": 0.04658054560422897,
"learning_rate": 0.00011616122499579684,
"loss": -0.8413,
"step": 8570
},
{
"epoch": 0.8538800288607469,
"grad_norm": 0.08086878061294556,
"learning_rate": 0.00011462634950707185,
"loss": -0.8537,
"step": 8580
},
{
"epoch": 0.8548752270295823,
"grad_norm": 0.05222253501415253,
"learning_rate": 0.00011310106524338071,
"loss": -0.8909,
"step": 8590
},
{
"epoch": 0.8558704251984176,
"grad_norm": 0.08359741419553757,
"learning_rate": 0.00011158538872807933,
"loss": -0.8564,
"step": 8600
},
{
"epoch": 0.856865623367253,
"grad_norm": 0.10812195390462875,
"learning_rate": 0.00011007933638044454,
"loss": -0.8798,
"step": 8610
},
{
"epoch": 0.8578608215360883,
"grad_norm": 0.05695914104580879,
"learning_rate": 0.0001085829245154939,
"loss": -0.8917,
"step": 8620
},
{
"epoch": 0.8588560197049238,
"grad_norm": 0.08079014718532562,
"learning_rate": 0.00010709616934381038,
"loss": -0.8678,
"step": 8630
},
{
"epoch": 0.8598512178737591,
"grad_norm": 0.04299706593155861,
"learning_rate": 0.00010561908697136657,
"loss": -0.8425,
"step": 8640
},
{
"epoch": 0.8608464160425945,
"grad_norm": 0.04730239138007164,
"learning_rate": 0.00010415169339934894,
"loss": -0.8632,
"step": 8650
},
{
"epoch": 0.8618416142114298,
"grad_norm": 0.08874726295471191,
"learning_rate": 0.00010269400452398659,
"loss": -0.8682,
"step": 8660
},
{
"epoch": 0.8628368123802652,
"grad_norm": 0.038999781012535095,
"learning_rate": 0.00010124603613637707,
"loss": -0.8594,
"step": 8670
},
{
"epoch": 0.8638320105491006,
"grad_norm": 0.04859554022550583,
"learning_rate": 9.980780392231692e-05,
"loss": -0.8741,
"step": 8680
},
{
"epoch": 0.864827208717936,
"grad_norm": 0.04477314278483391,
"learning_rate": 9.837932346213063e-05,
"loss": -0.8545,
"step": 8690
},
{
"epoch": 0.8658224068867714,
"grad_norm": 0.033657848834991455,
"learning_rate": 9.696061023050207e-05,
"loss": -0.8219,
"step": 8700
},
{
"epoch": 0.8668176050556067,
"grad_norm": 0.07091525197029114,
"learning_rate": 9.555167959630762e-05,
"loss": -0.8416,
"step": 8710
},
{
"epoch": 0.8678128032244421,
"grad_norm": 0.04914763569831848,
"learning_rate": 9.415254682244834e-05,
"loss": -0.8523,
"step": 8720
},
{
"epoch": 0.8688080013932774,
"grad_norm": 0.08185221999883652,
"learning_rate": 9.276322706568596e-05,
"loss": -0.883,
"step": 8730
},
{
"epoch": 0.8698031995621128,
"grad_norm": 0.07200242578983307,
"learning_rate": 9.138373537647804e-05,
"loss": -0.8945,
"step": 8740
},
{
"epoch": 0.8707983977309481,
"grad_norm": 0.1187279000878334,
"learning_rate": 9.00140866988145e-05,
"loss": -0.8538,
"step": 8750
},
{
"epoch": 0.8717935958997836,
"grad_norm": 0.10631956160068512,
"learning_rate": 8.865429587005702e-05,
"loss": -0.8618,
"step": 8760
},
{
"epoch": 0.8727887940686189,
"grad_norm": 0.05690061300992966,
"learning_rate": 8.730437762077658e-05,
"loss": -0.8398,
"step": 8770
},
{
"epoch": 0.8737839922374543,
"grad_norm": 0.14262869954109192,
"learning_rate": 8.596434657459562e-05,
"loss": -0.8841,
"step": 8780
},
{
"epoch": 0.8747791904062896,
"grad_norm": 0.06665827333927155,
"learning_rate": 8.463421724802845e-05,
"loss": -0.8232,
"step": 8790
},
{
"epoch": 0.875774388575125,
"grad_norm": 0.039045125246047974,
"learning_rate": 8.331400405032452e-05,
"loss": -0.9178,
"step": 8800
},
{
"epoch": 0.8767695867439604,
"grad_norm": 0.05045272409915924,
"learning_rate": 8.200372128331202e-05,
"loss": -0.8279,
"step": 8810
},
{
"epoch": 0.8777647849127957,
"grad_norm": 0.02887490577995777,
"learning_rate": 8.070338314124282e-05,
"loss": -0.8917,
"step": 8820
},
{
"epoch": 0.8787599830816312,
"grad_norm": 0.20094476640224457,
"learning_rate": 7.941300371063954e-05,
"loss": -0.8626,
"step": 8830
},
{
"epoch": 0.8797551812504665,
"grad_norm": 0.05032164603471756,
"learning_rate": 7.813259697014219e-05,
"loss": -0.8431,
"step": 8840
},
{
"epoch": 0.8807503794193019,
"grad_norm": 0.04273492842912674,
"learning_rate": 7.686217679035712e-05,
"loss": -0.8707,
"step": 8850
},
{
"epoch": 0.8817455775881372,
"grad_norm": 0.03904595598578453,
"learning_rate": 7.560175693370575e-05,
"loss": -0.8689,
"step": 8860
},
{
"epoch": 0.8827407757569726,
"grad_norm": 0.03059651516377926,
"learning_rate": 7.43513510542776e-05,
"loss": -0.868,
"step": 8870
},
{
"epoch": 0.8837359739258079,
"grad_norm": 0.04011029377579689,
"learning_rate": 7.311097269767997e-05,
"loss": -0.8794,
"step": 8880
},
{
"epoch": 0.8847311720946434,
"grad_norm": 0.06519157439470291,
"learning_rate": 7.188063530089262e-05,
"loss": -0.8933,
"step": 8890
},
{
"epoch": 0.8857263702634787,
"grad_norm": 0.0295392032712698,
"learning_rate": 7.066035219212264e-05,
"loss": -0.8859,
"step": 8900
},
{
"epoch": 0.8867215684323141,
"grad_norm": 0.05787239223718643,
"learning_rate": 6.945013659065813e-05,
"loss": -0.8734,
"step": 8910
},
{
"epoch": 0.8877167666011494,
"grad_norm": 0.026180433109402657,
"learning_rate": 6.825000160672734e-05,
"loss": -0.8962,
"step": 8920
},
{
"epoch": 0.8887119647699848,
"grad_norm": 0.045901086181402206,
"learning_rate": 6.705996024135453e-05,
"loss": -0.8262,
"step": 8930
},
{
"epoch": 0.8897071629388202,
"grad_norm": 0.11189127713441849,
"learning_rate": 6.588002538622062e-05,
"loss": -0.8305,
"step": 8940
},
{
"epoch": 0.8907023611076555,
"grad_norm": 0.03631236031651497,
"learning_rate": 6.471020982352338e-05,
"loss": -0.8792,
"step": 8950
},
{
"epoch": 0.891697559276491,
"grad_norm": 0.017476355656981468,
"learning_rate": 6.355052622583756e-05,
"loss": -0.8748,
"step": 8960
},
{
"epoch": 0.8926927574453263,
"grad_norm": 0.10378779470920563,
"learning_rate": 6.240098715597975e-05,
"loss": -0.8606,
"step": 8970
},
{
"epoch": 0.8936879556141617,
"grad_norm": 0.029117906466126442,
"learning_rate": 6.12616050668704e-05,
"loss": -0.8741,
"step": 8980
},
{
"epoch": 0.894683153782997,
"grad_norm": 0.026523800566792488,
"learning_rate": 6.0132392301400105e-05,
"loss": -0.838,
"step": 8990
},
{
"epoch": 0.8956783519518324,
"grad_norm": 0.06945938616991043,
"learning_rate": 5.901336109229538e-05,
"loss": -0.8427,
"step": 9000
},
{
"epoch": 0.8966735501206677,
"grad_norm": 0.06329178810119629,
"learning_rate": 5.790452356198628e-05,
"loss": -0.8554,
"step": 9010
},
{
"epoch": 0.8976687482895032,
"grad_norm": 0.0641210675239563,
"learning_rate": 5.680589172247519e-05,
"loss": -0.8831,
"step": 9020
},
{
"epoch": 0.8986639464583385,
"grad_norm": 0.09026167541742325,
"learning_rate": 5.571747747520617e-05,
"loss": -0.873,
"step": 9030
},
{
"epoch": 0.8996591446271739,
"grad_norm": 0.06779684126377106,
"learning_rate": 5.463929261093692e-05,
"loss": -0.855,
"step": 9040
},
{
"epoch": 0.9006543427960093,
"grad_norm": 0.09357842057943344,
"learning_rate": 5.357134880961012e-05,
"loss": -0.8481,
"step": 9050
},
{
"epoch": 0.9016495409648446,
"grad_norm": 0.051097650080919266,
"learning_rate": 5.251365764022753e-05,
"loss": -0.8628,
"step": 9060
},
{
"epoch": 0.90264473913368,
"grad_norm": 0.0375586561858654,
"learning_rate": 5.1466230560724746e-05,
"loss": -0.8639,
"step": 9070
},
{
"epoch": 0.9036399373025154,
"grad_norm": 0.03898193687200546,
"learning_rate": 5.0429078917846204e-05,
"loss": -0.872,
"step": 9080
},
{
"epoch": 0.9046351354713508,
"grad_norm": 0.03768244758248329,
"learning_rate": 4.940221394702349e-05,
"loss": -0.8711,
"step": 9090
},
{
"epoch": 0.9056303336401861,
"grad_norm": 0.03134647756814957,
"learning_rate": 4.8385646772252324e-05,
"loss": -0.8317,
"step": 9100
},
{
"epoch": 0.9066255318090215,
"grad_norm": 0.04638220742344856,
"learning_rate": 4.7379388405973225e-05,
"loss": -0.8638,
"step": 9110
},
{
"epoch": 0.9076207299778568,
"grad_norm": 0.0377880223095417,
"learning_rate": 4.6383449748951703e-05,
"loss": -0.8896,
"step": 9120
},
{
"epoch": 0.9086159281466922,
"grad_norm": 0.03936488553881645,
"learning_rate": 4.539784159015992e-05,
"loss": -0.8538,
"step": 9130
},
{
"epoch": 0.9096111263155275,
"grad_norm": 0.06327816098928452,
"learning_rate": 4.4422574606660216e-05,
"loss": -0.8563,
"step": 9140
},
{
"epoch": 0.910606324484363,
"grad_norm": 0.08217553049325943,
"learning_rate": 4.3457659363489224e-05,
"loss": -0.8979,
"step": 9150
},
{
"epoch": 0.9116015226531983,
"grad_norm": 0.06306605041027069,
"learning_rate": 4.2503106313543705e-05,
"loss": -0.8936,
"step": 9160
},
{
"epoch": 0.9125967208220337,
"grad_norm": 0.036004096269607544,
"learning_rate": 4.15589257974669e-05,
"loss": -0.8948,
"step": 9170
},
{
"epoch": 0.9135919189908691,
"grad_norm": 0.06238359585404396,
"learning_rate": 4.062512804353669e-05,
"loss": -0.8376,
"step": 9180
},
{
"epoch": 0.9145871171597044,
"grad_norm": 0.05194539204239845,
"learning_rate": 3.9701723167555046e-05,
"loss": -0.8677,
"step": 9190
},
{
"epoch": 0.9155823153285398,
"grad_norm": 0.08999146521091461,
"learning_rate": 3.87887211727379e-05,
"loss": -0.8978,
"step": 9200
},
{
"epoch": 0.9165775134973752,
"grad_norm": 0.05085189267992973,
"learning_rate": 3.788613194960733e-05,
"loss": -0.8942,
"step": 9210
},
{
"epoch": 0.9175727116662106,
"grad_norm": 0.11529362946748734,
"learning_rate": 3.699396527588428e-05,
"loss": -0.8911,
"step": 9220
},
{
"epoch": 0.9185679098350459,
"grad_norm": 0.09689343720674515,
"learning_rate": 3.6112230816382374e-05,
"loss": -0.8627,
"step": 9230
},
{
"epoch": 0.9195631080038813,
"grad_norm": 0.05098208412528038,
"learning_rate": 3.52409381229033e-05,
"loss": -0.8221,
"step": 9240
},
{
"epoch": 0.9205583061727166,
"grad_norm": 0.030068768188357353,
"learning_rate": 3.4380096634133326e-05,
"loss": -0.8836,
"step": 9250
},
{
"epoch": 0.921553504341552,
"grad_norm": 0.036479201167821884,
"learning_rate": 3.352971567554175e-05,
"loss": -0.8772,
"step": 9260
},
{
"epoch": 0.9225487025103873,
"grad_norm": 0.06124093383550644,
"learning_rate": 3.2689804459278494e-05,
"loss": -0.8289,
"step": 9270
},
{
"epoch": 0.9235439006792228,
"grad_norm": 0.04751259461045265,
"learning_rate": 3.186037208407588e-05,
"loss": -0.8631,
"step": 9280
},
{
"epoch": 0.9245390988480581,
"grad_norm": 0.22392483055591583,
"learning_rate": 3.1041427535148495e-05,
"loss": -0.8707,
"step": 9290
},
{
"epoch": 0.9255342970168935,
"grad_norm": 0.0312654934823513,
"learning_rate": 3.0232979684097218e-05,
"loss": -0.8499,
"step": 9300
},
{
"epoch": 0.9265294951857289,
"grad_norm": 0.04291137680411339,
"learning_rate": 2.943503728881225e-05,
"loss": -0.8521,
"step": 9310
},
{
"epoch": 0.9275246933545642,
"grad_norm": 0.03710932657122612,
"learning_rate": 2.8647608993378372e-05,
"loss": -0.8544,
"step": 9320
},
{
"epoch": 0.9285198915233996,
"grad_norm": 0.039480455219745636,
"learning_rate": 2.7870703327981917e-05,
"loss": -0.831,
"step": 9330
},
{
"epoch": 0.929515089692235,
"grad_norm": 0.03444267436861992,
"learning_rate": 2.7104328708817517e-05,
"loss": -0.8909,
"step": 9340
},
{
"epoch": 0.9305102878610704,
"grad_norm": 0.038241058588027954,
"learning_rate": 2.63484934379975e-05,
"loss": -0.8829,
"step": 9350
},
{
"epoch": 0.9315054860299057,
"grad_norm": 0.031606342643499374,
"learning_rate": 2.560320570346164e-05,
"loss": -0.9075,
"step": 9360
},
{
"epoch": 0.9325006841987411,
"grad_norm": 0.04962693154811859,
"learning_rate": 2.486847357888844e-05,
"loss": -0.8927,
"step": 9370
},
{
"epoch": 0.9334958823675764,
"grad_norm": 0.04051986709237099,
"learning_rate": 2.4144305023608427e-05,
"loss": -0.8797,
"step": 9380
},
{
"epoch": 0.9344910805364118,
"grad_norm": 0.04305783286690712,
"learning_rate": 2.3430707882516555e-05,
"loss": -0.8594,
"step": 9390
},
{
"epoch": 0.9354862787052471,
"grad_norm": 0.12860926985740662,
"learning_rate": 2.2727689885988388e-05,
"loss": -0.8479,
"step": 9400
},
{
"epoch": 0.9364814768740826,
"grad_norm": 0.11391417682170868,
"learning_rate": 2.203525864979583e-05,
"loss": -0.8835,
"step": 9410
},
{
"epoch": 0.9374766750429179,
"grad_norm": 0.04358312115073204,
"learning_rate": 2.1353421675024854e-05,
"loss": -0.8827,
"step": 9420
},
{
"epoch": 0.9384718732117533,
"grad_norm": 0.0354497916996479,
"learning_rate": 2.0682186347994127e-05,
"loss": -0.8626,
"step": 9430
},
{
"epoch": 0.9394670713805887,
"grad_norm": 0.10072290152311325,
"learning_rate": 2.002155994017474e-05,
"loss": -0.8557,
"step": 9440
},
{
"epoch": 0.940462269549424,
"grad_norm": 0.10969705134630203,
"learning_rate": 1.9371549608112048e-05,
"loss": -0.8552,
"step": 9450
},
{
"epoch": 0.9414574677182594,
"grad_norm": 0.04841183125972748,
"learning_rate": 1.8732162393347518e-05,
"loss": -0.8737,
"step": 9460
},
{
"epoch": 0.9424526658870948,
"grad_norm": 0.14277507364749908,
"learning_rate": 1.8103405222342883e-05,
"loss": -0.8753,
"step": 9470
},
{
"epoch": 0.9434478640559302,
"grad_norm": 0.03108733333647251,
"learning_rate": 1.7485284906404776e-05,
"loss": -0.8666,
"step": 9480
},
{
"epoch": 0.9444430622247655,
"grad_norm": 0.07302725315093994,
"learning_rate": 1.687780814161144e-05,
"loss": -0.8559,
"step": 9490
},
{
"epoch": 0.9454382603936009,
"grad_norm": 0.03906615450978279,
"learning_rate": 1.6280981508739467e-05,
"loss": -0.9099,
"step": 9500
},
{
"epoch": 0.9464334585624362,
"grad_norm": 0.0752185583114624,
"learning_rate": 1.569481147319318e-05,
"loss": -0.8274,
"step": 9510
},
{
"epoch": 0.9474286567312716,
"grad_norm": 0.045788075774908066,
"learning_rate": 1.5119304384934252e-05,
"loss": -0.8736,
"step": 9520
},
{
"epoch": 0.9484238549001069,
"grad_norm": 0.027977997437119484,
"learning_rate": 1.4554466478412743e-05,
"loss": -0.892,
"step": 9530
},
{
"epoch": 0.9494190530689424,
"grad_norm": 0.027789343148469925,
"learning_rate": 1.4000303872500286e-05,
"loss": -0.8402,
"step": 9540
},
{
"epoch": 0.9504142512377777,
"grad_norm": 0.053874798119068146,
"learning_rate": 1.34568225704228e-05,
"loss": -0.893,
"step": 9550
},
{
"epoch": 0.9514094494066131,
"grad_norm": 0.06773625314235687,
"learning_rate": 1.2924028459696314e-05,
"loss": -0.8907,
"step": 9560
},
{
"epoch": 0.9524046475754485,
"grad_norm": 0.03913061320781708,
"learning_rate": 1.240192731206291e-05,
"loss": -0.8149,
"step": 9570
},
{
"epoch": 0.9533998457442838,
"grad_norm": 0.04107360541820526,
"learning_rate": 1.1890524783427559e-05,
"loss": -0.8805,
"step": 9580
},
{
"epoch": 0.9543950439131192,
"grad_norm": 0.03134811297059059,
"learning_rate": 1.1389826413798265e-05,
"loss": -0.8906,
"step": 9590
},
{
"epoch": 0.9553902420819546,
"grad_norm": 0.06669458001852036,
"learning_rate": 1.0899837627224685e-05,
"loss": -0.8781,
"step": 9600
},
{
"epoch": 0.95638544025079,
"grad_norm": 0.07125691324472427,
"learning_rate": 1.0420563731739829e-05,
"loss": -0.8394,
"step": 9610
},
{
"epoch": 0.9573806384196253,
"grad_norm": 0.04976421222090721,
"learning_rate": 9.952009919302896e-06,
"loss": -0.8506,
"step": 9620
},
{
"epoch": 0.9583758365884607,
"grad_norm": 0.04743211343884468,
"learning_rate": 9.494181265742641e-06,
"loss": -0.8757,
"step": 9630
},
{
"epoch": 0.959371034757296,
"grad_norm": 0.037107232958078384,
"learning_rate": 9.04708273070265e-06,
"loss": -0.8332,
"step": 9640
},
{
"epoch": 0.9603662329261314,
"grad_norm": 0.03232187032699585,
"learning_rate": 8.610719157587155e-06,
"loss": -0.8973,
"step": 9650
},
{
"epoch": 0.9613614310949667,
"grad_norm": 0.03275300934910774,
"learning_rate": 8.185095273509412e-06,
"loss": -0.8738,
"step": 9660
},
{
"epoch": 0.9623566292638022,
"grad_norm": 0.045168716460466385,
"learning_rate": 7.770215689239301e-06,
"loss": -0.8946,
"step": 9670
},
{
"epoch": 0.9633518274326375,
"grad_norm": 0.03318966180086136,
"learning_rate": 7.366084899154357e-06,
"loss": -0.8539,
"step": 9680
},
{
"epoch": 0.9643470256014729,
"grad_norm": 0.049187105149030685,
"learning_rate": 6.972707281191037e-06,
"loss": -0.8727,
"step": 9690
},
{
"epoch": 0.9653422237703083,
"grad_norm": 0.10269180685281754,
"learning_rate": 6.5900870967965375e-06,
"loss": -0.7973,
"step": 9700
},
{
"epoch": 0.9663374219391436,
"grad_norm": 0.11406390368938446,
"learning_rate": 6.218228490883493e-06,
"loss": -0.8991,
"step": 9710
},
{
"epoch": 0.967332620107979,
"grad_norm": 0.03116353042423725,
"learning_rate": 5.8571354917844596e-06,
"loss": -0.8567,
"step": 9720
},
{
"epoch": 0.9683278182768144,
"grad_norm": 0.04186422377824783,
"learning_rate": 5.5068120112086174e-06,
"loss": -0.8416,
"step": 9730
},
{
"epoch": 0.9693230164456498,
"grad_norm": 0.05916735902428627,
"learning_rate": 5.167261844199134e-06,
"loss": -0.8515,
"step": 9740
},
{
"epoch": 0.9703182146144851,
"grad_norm": 0.05601034685969353,
"learning_rate": 4.838488669092533e-06,
"loss": -0.8607,
"step": 9750
},
{
"epoch": 0.9713134127833205,
"grad_norm": 0.05893021821975708,
"learning_rate": 4.520496047478284e-06,
"loss": -0.8704,
"step": 9760
},
{
"epoch": 0.9723086109521558,
"grad_norm": 0.16313856840133667,
"learning_rate": 4.213287424160272e-06,
"loss": -0.8473,
"step": 9770
},
{
"epoch": 0.9733038091209912,
"grad_norm": 0.03145838901400566,
"learning_rate": 3.916866127120278e-06,
"loss": -0.8733,
"step": 9780
},
{
"epoch": 0.9742990072898265,
"grad_norm": 0.05181713029742241,
"learning_rate": 3.6312353674805567e-06,
"loss": -0.868,
"step": 9790
},
{
"epoch": 0.975294205458662,
"grad_norm": 0.0550004206597805,
"learning_rate": 3.3563982394704262e-06,
"loss": -0.8923,
"step": 9800
},
{
"epoch": 0.9762894036274973,
"grad_norm": 0.13421136140823364,
"learning_rate": 3.0923577203918474e-06,
"loss": -0.8603,
"step": 9810
},
{
"epoch": 0.9772846017963327,
"grad_norm": 0.04877639561891556,
"learning_rate": 2.8391166705874493e-06,
"loss": -0.8536,
"step": 9820
},
{
"epoch": 0.9782797999651681,
"grad_norm": 0.055065833032131195,
"learning_rate": 2.5966778334096662e-06,
"loss": -0.8699,
"step": 9830
},
{
"epoch": 0.9792749981340034,
"grad_norm": 0.04793205112218857,
"learning_rate": 2.36504383519065e-06,
"loss": -0.8556,
"step": 9840
},
{
"epoch": 0.9802701963028388,
"grad_norm": 0.03042910061776638,
"learning_rate": 2.1442171852144032e-06,
"loss": -0.856,
"step": 9850
},
{
"epoch": 0.9812653944716742,
"grad_norm": 0.08802367746829987,
"learning_rate": 1.9342002756891353e-06,
"loss": -0.8797,
"step": 9860
},
{
"epoch": 0.9822605926405096,
"grad_norm": 0.05212515965104103,
"learning_rate": 1.7349953817213938e-06,
"loss": -0.8264,
"step": 9870
},
{
"epoch": 0.9832557908093449,
"grad_norm": 0.1572272628545761,
"learning_rate": 1.5466046612915286e-06,
"loss": -0.894,
"step": 9880
},
{
"epoch": 0.9842509889781803,
"grad_norm": 0.05914374813437462,
"learning_rate": 1.3690301552303775e-06,
"loss": -0.8948,
"step": 9890
},
{
"epoch": 0.9852461871470156,
"grad_norm": 0.04328109323978424,
"learning_rate": 1.2022737871969502e-06,
"loss": -0.868,
"step": 9900
},
{
"epoch": 0.986241385315851,
"grad_norm": 0.037384625524282455,
"learning_rate": 1.0463373636578898e-06,
"loss": -0.874,
"step": 9910
},
{
"epoch": 0.9872365834846863,
"grad_norm": 0.038554031401872635,
"learning_rate": 9.012225738673774e-07,
"loss": -0.8484,
"step": 9920
},
{
"epoch": 0.9882317816535218,
"grad_norm": 0.092408187687397,
"learning_rate": 7.669309898495902e-07,
"loss": -0.864,
"step": 9930
},
{
"epoch": 0.9892269798223571,
"grad_norm": 0.05051958188414574,
"learning_rate": 6.434640663808278e-07,
"loss": -0.8263,
"step": 9940
},
{
"epoch": 0.9902221779911925,
"grad_norm": 0.03660441190004349,
"learning_rate": 5.308231409746345e-07,
"loss": -0.848,
"step": 9950
},
{
"epoch": 0.9912173761600279,
"grad_norm": 0.03256652131676674,
"learning_rate": 4.290094338664785e-07,
"loss": -0.8729,
"step": 9960
},
{
"epoch": 0.9922125743288632,
"grad_norm": 0.05838792771100998,
"learning_rate": 3.3802404800120646e-07,
"loss": -0.8351,
"step": 9970
},
{
"epoch": 0.9932077724976986,
"grad_norm": 0.05779346823692322,
"learning_rate": 2.578679690204977e-07,
"loss": -0.8578,
"step": 9980
},
{
"epoch": 0.994202970666534,
"grad_norm": 0.03772381693124771,
"learning_rate": 1.8854206525265039e-07,
"loss": -0.8928,
"step": 9990
},
{
"epoch": 0.9951981688353694,
"grad_norm": 0.025430290028452873,
"learning_rate": 1.3004708770314455e-07,
"loss": -0.8629,
"step": 10000
},
{
"epoch": 0.9961933670042047,
"grad_norm": 0.04903922230005264,
"learning_rate": 8.238367004609337e-08,
"loss": -0.8402,
"step": 10010
},
{
"epoch": 0.9971885651730401,
"grad_norm": 0.035914886742830276,
"learning_rate": 4.555232861802594e-08,
"loss": -0.8761,
"step": 10020
},
{
"epoch": 0.9981837633418754,
"grad_norm": 0.06849048286676407,
"learning_rate": 1.9553462411447953e-08,
"loss": -0.8929,
"step": 10030
},
{
"epoch": 0.9991789615107108,
"grad_norm": 0.03138072043657303,
"learning_rate": 4.387353071400035e-09,
"loss": -0.8618,
"step": 10040
}
],
"logging_steps": 10,
"max_steps": 10048,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.2351234362159137e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}