opencodeinst_5k_sft / checkpoint-1600 /trainer_state.json
modrill's picture
Add files using upload-large-folder tool
4adcd3e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.6938947368421053,
"eval_steps": 100,
"global_step": 1600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016842105263157894,
"grad_norm": 0.21757784485816956,
"learning_rate": 5.027932960893855e-08,
"loss": 0.7252199172973632,
"step": 10
},
{
"epoch": 0.03368421052631579,
"grad_norm": 0.2456846386194229,
"learning_rate": 1.0614525139664805e-07,
"loss": 0.6507451057434082,
"step": 20
},
{
"epoch": 0.05052631578947368,
"grad_norm": 0.20819272100925446,
"learning_rate": 1.6201117318435754e-07,
"loss": 0.7381344795227051,
"step": 30
},
{
"epoch": 0.06736842105263158,
"grad_norm": 0.26373574137687683,
"learning_rate": 2.17877094972067e-07,
"loss": 0.7012194156646728,
"step": 40
},
{
"epoch": 0.08421052631578947,
"grad_norm": 0.2081507444381714,
"learning_rate": 2.7374301675977653e-07,
"loss": 0.6083873748779297,
"step": 50
},
{
"epoch": 0.10105263157894737,
"grad_norm": 0.2091236114501953,
"learning_rate": 3.29608938547486e-07,
"loss": 0.6980491638183594,
"step": 60
},
{
"epoch": 0.11789473684210526,
"grad_norm": 0.20970331132411957,
"learning_rate": 3.8547486033519547e-07,
"loss": 0.708641767501831,
"step": 70
},
{
"epoch": 0.13473684210526315,
"grad_norm": 0.18810197710990906,
"learning_rate": 4.41340782122905e-07,
"loss": 0.6742453098297119,
"step": 80
},
{
"epoch": 0.15157894736842106,
"grad_norm": 0.20251069962978363,
"learning_rate": 4.972067039106145e-07,
"loss": 0.6590609550476074,
"step": 90
},
{
"epoch": 0.16842105263157894,
"grad_norm": 0.2644217908382416,
"learning_rate": 5.53072625698324e-07,
"loss": 0.704926872253418,
"step": 100
},
{
"epoch": 0.18526315789473685,
"grad_norm": 0.23766489326953888,
"learning_rate": 6.089385474860335e-07,
"loss": 0.7445036888122558,
"step": 110
},
{
"epoch": 0.20210526315789473,
"grad_norm": 0.27427056431770325,
"learning_rate": 6.64804469273743e-07,
"loss": 0.7476531028747558,
"step": 120
},
{
"epoch": 0.21894736842105264,
"grad_norm": 0.3208928406238556,
"learning_rate": 7.206703910614524e-07,
"loss": 0.7291872501373291,
"step": 130
},
{
"epoch": 0.23578947368421052,
"grad_norm": 0.3123615086078644,
"learning_rate": 7.76536312849162e-07,
"loss": 0.721175241470337,
"step": 140
},
{
"epoch": 0.25263157894736843,
"grad_norm": 0.26158222556114197,
"learning_rate": 8.324022346368714e-07,
"loss": 0.7556095600128174,
"step": 150
},
{
"epoch": 0.2694736842105263,
"grad_norm": 0.2592650353908539,
"learning_rate": 8.88268156424581e-07,
"loss": 0.7328392505645752,
"step": 160
},
{
"epoch": 0.2863157894736842,
"grad_norm": 0.24533776938915253,
"learning_rate": 9.441340782122904e-07,
"loss": 0.6990129470825195,
"step": 170
},
{
"epoch": 0.3031578947368421,
"grad_norm": 0.23409004509449005,
"learning_rate": 1e-06,
"loss": 0.6694639205932618,
"step": 180
},
{
"epoch": 0.32,
"grad_norm": 0.3267499506473541,
"learning_rate": 9.999039806396227e-07,
"loss": 0.7123252868652343,
"step": 190
},
{
"epoch": 0.3368421052631579,
"grad_norm": 0.2115064263343811,
"learning_rate": 9.996159594373611e-07,
"loss": 0.6858412742614746,
"step": 200
},
{
"epoch": 0.35368421052631577,
"grad_norm": 0.26226580142974854,
"learning_rate": 9.991360470156615e-07,
"loss": 0.6541069507598877,
"step": 210
},
{
"epoch": 0.3705263157894737,
"grad_norm": 0.24552594125270844,
"learning_rate": 9.984644276980594e-07,
"loss": 0.6506116390228271,
"step": 220
},
{
"epoch": 0.3873684210526316,
"grad_norm": 0.25084301829338074,
"learning_rate": 9.976013594383835e-07,
"loss": 0.6540626049041748,
"step": 230
},
{
"epoch": 0.40421052631578946,
"grad_norm": 0.34244054555892944,
"learning_rate": 9.965471737216833e-07,
"loss": 0.6737770557403564,
"step": 240
},
{
"epoch": 0.42105263157894735,
"grad_norm": 0.34752583503723145,
"learning_rate": 9.953022754369114e-07,
"loss": 0.6755708217620849,
"step": 250
},
{
"epoch": 0.4378947368421053,
"grad_norm": 0.31017956137657166,
"learning_rate": 9.938671427214158e-07,
"loss": 0.6578442573547363,
"step": 260
},
{
"epoch": 0.45473684210526316,
"grad_norm": 0.21509627997875214,
"learning_rate": 9.922423267772986e-07,
"loss": 0.639409875869751,
"step": 270
},
{
"epoch": 0.47157894736842104,
"grad_norm": 0.3022947609424591,
"learning_rate": 9.904284516597102e-07,
"loss": 0.5995691776275635,
"step": 280
},
{
"epoch": 0.4884210526315789,
"grad_norm": 0.3367304801940918,
"learning_rate": 9.884262140371648e-07,
"loss": 0.5898309707641601,
"step": 290
},
{
"epoch": 0.5052631578947369,
"grad_norm": 0.294842928647995,
"learning_rate": 9.862363829239662e-07,
"loss": 0.6371779441833496,
"step": 300
},
{
"epoch": 0.5221052631578947,
"grad_norm": 0.25171560049057007,
"learning_rate": 9.838597993848456e-07,
"loss": 0.5795581817626954,
"step": 310
},
{
"epoch": 0.5389473684210526,
"grad_norm": 0.2818540036678314,
"learning_rate": 9.81297376211928e-07,
"loss": 0.5668415546417236,
"step": 320
},
{
"epoch": 0.5557894736842105,
"grad_norm": 0.32951900362968445,
"learning_rate": 9.785500975741498e-07,
"loss": 0.5933257102966308,
"step": 330
},
{
"epoch": 0.5726315789473684,
"grad_norm": 0.2763514518737793,
"learning_rate": 9.756190186392615e-07,
"loss": 0.5574678897857666,
"step": 340
},
{
"epoch": 0.5894736842105263,
"grad_norm": 0.3070182204246521,
"learning_rate": 9.725052651685612e-07,
"loss": 0.5532425880432129,
"step": 350
},
{
"epoch": 0.6063157894736843,
"grad_norm": 0.2079988420009613,
"learning_rate": 9.692100330845153e-07,
"loss": 0.5613389492034913,
"step": 360
},
{
"epoch": 0.6231578947368421,
"grad_norm": 0.282924622297287,
"learning_rate": 9.657345880114318e-07,
"loss": 0.5131485939025879,
"step": 370
},
{
"epoch": 0.64,
"grad_norm": 0.20901450514793396,
"learning_rate": 9.620802647893623e-07,
"loss": 0.6279027462005615,
"step": 380
},
{
"epoch": 0.6568421052631579,
"grad_norm": 0.2637634575366974,
"learning_rate": 9.58248466961421e-07,
"loss": 0.5403085231781006,
"step": 390
},
{
"epoch": 0.6736842105263158,
"grad_norm": 0.29078468680381775,
"learning_rate": 9.542406662347137e-07,
"loss": 0.5678809642791748,
"step": 400
},
{
"epoch": 0.6905263157894737,
"grad_norm": 0.2865101397037506,
"learning_rate": 9.500584019150895e-07,
"loss": 0.5479135036468505,
"step": 410
},
{
"epoch": 0.7073684210526315,
"grad_norm": 0.22857311367988586,
"learning_rate": 9.45703280315928e-07,
"loss": 0.5604462623596191,
"step": 420
},
{
"epoch": 0.7242105263157895,
"grad_norm": 0.23971959948539734,
"learning_rate": 9.411769741411903e-07,
"loss": 0.4704423427581787,
"step": 430
},
{
"epoch": 0.7410526315789474,
"grad_norm": 0.29793378710746765,
"learning_rate": 9.364812218429721e-07,
"loss": 0.560968017578125,
"step": 440
},
{
"epoch": 0.7578947368421053,
"grad_norm": 0.2236040234565735,
"learning_rate": 9.316178269538014e-07,
"loss": 0.5088452816009521,
"step": 450
},
{
"epoch": 0.7747368421052632,
"grad_norm": 0.22047854959964752,
"learning_rate": 9.265886573939446e-07,
"loss": 0.5030550956726074,
"step": 460
},
{
"epoch": 0.791578947368421,
"grad_norm": 0.2273361086845398,
"learning_rate": 9.213956447539792e-07,
"loss": 0.46353440284729003,
"step": 470
},
{
"epoch": 0.8084210526315789,
"grad_norm": 0.2170158326625824,
"learning_rate": 9.160407835529136e-07,
"loss": 0.49871411323547366,
"step": 480
},
{
"epoch": 0.8252631578947368,
"grad_norm": 0.19333498179912567,
"learning_rate": 9.105261304721375e-07,
"loss": 0.4416178226470947,
"step": 490
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.18490085005760193,
"learning_rate": 9.048538035654969e-07,
"loss": 0.39783194065093996,
"step": 500
},
{
"epoch": 0.8589473684210527,
"grad_norm": 0.22122648358345032,
"learning_rate": 8.990259814457977e-07,
"loss": 0.4318229198455811,
"step": 510
},
{
"epoch": 0.8757894736842106,
"grad_norm": 0.17448943853378296,
"learning_rate": 8.930449024480491e-07,
"loss": 0.42445807456970214,
"step": 520
},
{
"epoch": 0.8926315789473684,
"grad_norm": 0.18165165185928345,
"learning_rate": 8.8691286376977e-07,
"loss": 0.46429901123046874,
"step": 530
},
{
"epoch": 0.9094736842105263,
"grad_norm": 0.16785287857055664,
"learning_rate": 8.806322205886873e-07,
"loss": 0.3975703239440918,
"step": 540
},
{
"epoch": 0.9263157894736842,
"grad_norm": 0.1613738089799881,
"learning_rate": 8.74205385158165e-07,
"loss": 0.4458911418914795,
"step": 550
},
{
"epoch": 0.9431578947368421,
"grad_norm": 0.15376177430152893,
"learning_rate": 8.676348258807121e-07,
"loss": 0.45571184158325195,
"step": 560
},
{
"epoch": 0.96,
"grad_norm": 0.14966322481632233,
"learning_rate": 8.609230663599254e-07,
"loss": 0.4039600372314453,
"step": 570
},
{
"epoch": 0.9768421052631578,
"grad_norm": 0.16819055378437042,
"learning_rate": 8.540726844312294e-07,
"loss": 0.4382494926452637,
"step": 580
},
{
"epoch": 0.9936842105263158,
"grad_norm": 0.16405776143074036,
"learning_rate": 8.470863111717889e-07,
"loss": 0.4306180477142334,
"step": 590
},
{
"epoch": 1.0101052631578948,
"grad_norm": 0.18503950536251068,
"learning_rate": 8.399666298899706e-07,
"loss": 0.39806089401245115,
"step": 600
},
{
"epoch": 1.0269473684210526,
"grad_norm": 0.14375492930412292,
"learning_rate": 8.327163750947457e-07,
"loss": 0.4271697044372559,
"step": 610
},
{
"epoch": 1.0437894736842106,
"grad_norm": 0.1412728875875473,
"learning_rate": 8.253383314454263e-07,
"loss": 0.3939049243927002,
"step": 620
},
{
"epoch": 1.0606315789473684,
"grad_norm": 0.20121850073337555,
"learning_rate": 8.178353326821404e-07,
"loss": 0.43197131156921387,
"step": 630
},
{
"epoch": 1.0774736842105264,
"grad_norm": 0.17767728865146637,
"learning_rate": 8.102102605374566e-07,
"loss": 0.437807559967041,
"step": 640
},
{
"epoch": 1.0943157894736841,
"grad_norm": 0.1498359888792038,
"learning_rate": 8.024660436295759e-07,
"loss": 0.38409013748168946,
"step": 650
},
{
"epoch": 1.1111578947368421,
"grad_norm": 0.15958793461322784,
"learning_rate": 7.946056563375145e-07,
"loss": 0.4204962730407715,
"step": 660
},
{
"epoch": 1.1280000000000001,
"grad_norm": 0.157291978597641,
"learning_rate": 7.866321176587128e-07,
"loss": 0.42113161087036133,
"step": 670
},
{
"epoch": 1.1448421052631579,
"grad_norm": 0.14119838178157806,
"learning_rate": 7.785484900495065e-07,
"loss": 0.4151731491088867,
"step": 680
},
{
"epoch": 1.1616842105263159,
"grad_norm": 0.1296525001525879,
"learning_rate": 7.703578782489058e-07,
"loss": 0.38312902450561526,
"step": 690
},
{
"epoch": 1.1785263157894736,
"grad_norm": 0.13671696186065674,
"learning_rate": 7.620634280861351e-07,
"loss": 0.42612557411193847,
"step": 700
},
{
"epoch": 1.1953684210526316,
"grad_norm": 0.15196114778518677,
"learning_rate": 7.536683252723923e-07,
"loss": 0.4306772708892822,
"step": 710
},
{
"epoch": 1.2122105263157894,
"grad_norm": 0.1136903315782547,
"learning_rate": 7.451757941772868e-07,
"loss": 0.38483757972717286,
"step": 720
},
{
"epoch": 1.2290526315789474,
"grad_norm": 0.12378744781017303,
"learning_rate": 7.365890965904337e-07,
"loss": 0.4030342102050781,
"step": 730
},
{
"epoch": 1.2458947368421052,
"grad_norm": 0.1265542209148407,
"learning_rate": 7.279115304686733e-07,
"loss": 0.4091166973114014,
"step": 740
},
{
"epoch": 1.2627368421052632,
"grad_norm": 0.11647409200668335,
"learning_rate": 7.191464286694e-07,
"loss": 0.41426806449890136,
"step": 750
},
{
"epoch": 1.279578947368421,
"grad_norm": 0.11192695051431656,
"learning_rate": 7.102971576704875e-07,
"loss": 0.38181486129760744,
"step": 760
},
{
"epoch": 1.296421052631579,
"grad_norm": 0.14947861433029175,
"learning_rate": 7.013671162773003e-07,
"loss": 0.39824953079223635,
"step": 770
},
{
"epoch": 1.313263157894737,
"grad_norm": 0.11269424855709076,
"learning_rate": 6.923597343172891e-07,
"loss": 0.40348024368286134,
"step": 780
},
{
"epoch": 1.3301052631578947,
"grad_norm": 0.3742346167564392,
"learning_rate": 6.83278471322672e-07,
"loss": 0.38022048473358155,
"step": 790
},
{
"epoch": 1.3469473684210527,
"grad_norm": 0.1310902237892151,
"learning_rate": 6.741268152017057e-07,
"loss": 0.42791285514831545,
"step": 800
},
{
"epoch": 1.3637894736842107,
"grad_norm": 0.1692703813314438,
"learning_rate": 6.649082808990585e-07,
"loss": 0.4263493061065674,
"step": 810
},
{
"epoch": 1.3806315789473684,
"grad_norm": 0.1279117316007614,
"learning_rate": 6.556264090457998e-07,
"loss": 0.37379777431488037,
"step": 820
},
{
"epoch": 1.3974736842105262,
"grad_norm": 0.12949039041996002,
"learning_rate": 6.462847645995237e-07,
"loss": 0.38636391162872313,
"step": 830
},
{
"epoch": 1.4143157894736842,
"grad_norm": 0.10221126675605774,
"learning_rate": 6.368869354751284e-07,
"loss": 0.408221435546875,
"step": 840
},
{
"epoch": 1.4311578947368422,
"grad_norm": 0.11505889147520065,
"learning_rate": 6.274365311667797e-07,
"loss": 0.3951406717300415,
"step": 850
},
{
"epoch": 1.448,
"grad_norm": 0.11054962873458862,
"learning_rate": 6.179371813615859e-07,
"loss": 0.3732129096984863,
"step": 860
},
{
"epoch": 1.464842105263158,
"grad_norm": 0.10150120407342911,
"learning_rate": 6.083925345455158e-07,
"loss": 0.38601529598236084,
"step": 870
},
{
"epoch": 1.4816842105263157,
"grad_norm": 0.12239400297403336,
"learning_rate": 5.988062566020986e-07,
"loss": 0.3859985828399658,
"step": 880
},
{
"epoch": 1.4985263157894737,
"grad_norm": 0.15801067650318146,
"learning_rate": 5.891820294044408e-07,
"loss": 0.3983951807022095,
"step": 890
},
{
"epoch": 1.5153684210526315,
"grad_norm": 0.10104545950889587,
"learning_rate": 5.795235494011007e-07,
"loss": 0.41107850074768065,
"step": 900
},
{
"epoch": 1.5322105263157895,
"grad_norm": 0.1378099024295807,
"learning_rate": 5.698345261963668e-07,
"loss": 0.3708331823348999,
"step": 910
},
{
"epoch": 1.5490526315789475,
"grad_norm": 0.12936057150363922,
"learning_rate": 5.601186811254825e-07,
"loss": 0.387884521484375,
"step": 920
},
{
"epoch": 1.5658947368421052,
"grad_norm": 0.12379129230976105,
"learning_rate": 5.503797458253646e-07,
"loss": 0.43808717727661134,
"step": 930
},
{
"epoch": 1.582736842105263,
"grad_norm": 0.12017743289470673,
"learning_rate": 5.406214608013662e-07,
"loss": 0.41345391273498533,
"step": 940
},
{
"epoch": 1.5995789473684212,
"grad_norm": 0.1095535159111023,
"learning_rate": 5.308475739906328e-07,
"loss": 0.40022664070129393,
"step": 950
},
{
"epoch": 1.616421052631579,
"grad_norm": 0.13831396400928497,
"learning_rate": 5.210618393226045e-07,
"loss": 0.3909924983978271,
"step": 960
},
{
"epoch": 1.6332631578947368,
"grad_norm": 0.10449163615703583,
"learning_rate": 5.112680152772156e-07,
"loss": 0.37143146991729736,
"step": 970
},
{
"epoch": 1.6501052631578947,
"grad_norm": 0.11249610036611557,
"learning_rate": 5.01469863441348e-07,
"loss": 0.38103113174438474,
"step": 980
},
{
"epoch": 1.6669473684210527,
"grad_norm": 0.13718819618225098,
"learning_rate": 4.916711470640907e-07,
"loss": 0.4071629524230957,
"step": 990
},
{
"epoch": 1.6837894736842105,
"grad_norm": 0.10473571717739105,
"learning_rate": 4.818756296113595e-07,
"loss": 0.417419958114624,
"step": 1000
},
{
"epoch": 1.7006315789473683,
"grad_norm": 0.10846224427223206,
"learning_rate": 4.7208707332043623e-07,
"loss": 0.3998772859573364,
"step": 1010
},
{
"epoch": 1.7174736842105263,
"grad_norm": 0.10248563438653946,
"learning_rate": 4.6230923775497714e-07,
"loss": 0.38056583404541017,
"step": 1020
},
{
"epoch": 1.7343157894736843,
"grad_norm": 0.12221980094909668,
"learning_rate": 4.5254587836104964e-07,
"loss": 0.39371190071105955,
"step": 1030
},
{
"epoch": 1.751157894736842,
"grad_norm": 0.10641586035490036,
"learning_rate": 4.4280074502475017e-07,
"loss": 0.4280440330505371,
"step": 1040
},
{
"epoch": 1.768,
"grad_norm": 0.12907131016254425,
"learning_rate": 4.3307758063195796e-07,
"loss": 0.3791615962982178,
"step": 1050
},
{
"epoch": 1.784842105263158,
"grad_norm": 0.12383506447076797,
"learning_rate": 4.233801196307762e-07,
"loss": 0.347782301902771,
"step": 1060
},
{
"epoch": 1.8016842105263158,
"grad_norm": 0.12547679245471954,
"learning_rate": 4.1371208659721536e-07,
"loss": 0.38370628356933595,
"step": 1070
},
{
"epoch": 1.8185263157894735,
"grad_norm": 0.10580642521381378,
"learning_rate": 4.0407719480466736e-07,
"loss": 0.40404376983642576,
"step": 1080
},
{
"epoch": 1.8353684210526315,
"grad_norm": 0.1055402085185051,
"learning_rate": 3.944791447977213e-07,
"loss": 0.4167450428009033,
"step": 1090
},
{
"epoch": 1.8522105263157895,
"grad_norm": 0.11053823679685593,
"learning_rate": 3.849216229708671e-07,
"loss": 0.4046513080596924,
"step": 1100
},
{
"epoch": 1.8690526315789473,
"grad_norm": 0.10185246914625168,
"learning_rate": 3.7540830015263526e-07,
"loss": 0.39672977924346925,
"step": 1110
},
{
"epoch": 1.8858947368421053,
"grad_norm": 0.08342823386192322,
"learning_rate": 3.6594283019571416e-07,
"loss": 0.39356396198272703,
"step": 1120
},
{
"epoch": 1.9027368421052633,
"grad_norm": 0.11821646988391876,
"learning_rate": 3.565288485735874e-07,
"loss": 0.42082643508911133,
"step": 1130
},
{
"epoch": 1.919578947368421,
"grad_norm": 0.1106327474117279,
"learning_rate": 3.4716997098423085e-07,
"loss": 0.34105117321014405,
"step": 1140
},
{
"epoch": 1.9364210526315788,
"grad_norm": 0.11533800512552261,
"learning_rate": 3.378697919614045e-07,
"loss": 0.3924069404602051,
"step": 1150
},
{
"epoch": 1.9532631578947368,
"grad_norm": 0.1431114822626114,
"learning_rate": 3.286318834940729e-07,
"loss": 0.3922377586364746,
"step": 1160
},
{
"epoch": 1.9701052631578948,
"grad_norm": 0.16050194203853607,
"learning_rate": 3.1945979365448517e-07,
"loss": 0.3745201587677002,
"step": 1170
},
{
"epoch": 1.9869473684210526,
"grad_norm": 0.11921833455562592,
"learning_rate": 3.103570452354402e-07,
"loss": 0.40110602378845217,
"step": 1180
},
{
"epoch": 2.0033684210526315,
"grad_norm": 0.0832003727555275,
"learning_rate": 3.013271343972613e-07,
"loss": 0.3981154918670654,
"step": 1190
},
{
"epoch": 2.0202105263157897,
"grad_norm": 0.09975888580083847,
"learning_rate": 2.9237352932500046e-07,
"loss": 0.3726134061813354,
"step": 1200
},
{
"epoch": 2.0370526315789474,
"grad_norm": 0.14600081741809845,
"learning_rate": 2.8349966889638615e-07,
"loss": 0.42558698654174804,
"step": 1210
},
{
"epoch": 2.053894736842105,
"grad_norm": 0.10875770449638367,
"learning_rate": 2.747089613610278e-07,
"loss": 0.3682931184768677,
"step": 1220
},
{
"epoch": 2.070736842105263,
"grad_norm": 0.10050549358129501,
"learning_rate": 2.66004783031385e-07,
"loss": 0.3756644487380981,
"step": 1230
},
{
"epoch": 2.087578947368421,
"grad_norm": 0.08914914727210999,
"learning_rate": 2.573904769860009e-07,
"loss": 0.3804330825805664,
"step": 1240
},
{
"epoch": 2.104421052631579,
"grad_norm": 0.08296852558851242,
"learning_rate": 2.488693517855016e-07,
"loss": 0.3978404521942139,
"step": 1250
},
{
"epoch": 2.1212631578947367,
"grad_norm": 0.13885149359703064,
"learning_rate": 2.404446802018533e-07,
"loss": 0.3935218334197998,
"step": 1260
},
{
"epoch": 2.138105263157895,
"grad_norm": 0.13195137679576874,
"learning_rate": 2.3211969796136305e-07,
"loss": 0.42966952323913576,
"step": 1270
},
{
"epoch": 2.1549473684210527,
"grad_norm": 0.13367892801761627,
"learning_rate": 2.2389760250191038e-07,
"loss": 0.3679579019546509,
"step": 1280
},
{
"epoch": 2.1717894736842105,
"grad_norm": 0.1288345605134964,
"learning_rate": 2.1578155174488343e-07,
"loss": 0.41324810981750487,
"step": 1290
},
{
"epoch": 2.1886315789473683,
"grad_norm": 0.09626021236181259,
"learning_rate": 2.0777466288229205e-07,
"loss": 0.40120248794555663,
"step": 1300
},
{
"epoch": 2.2054736842105265,
"grad_norm": 0.10264381766319275,
"learning_rate": 1.9988001117952485e-07,
"loss": 0.3501007080078125,
"step": 1310
},
{
"epoch": 2.2223157894736842,
"grad_norm": 0.09031466394662857,
"learning_rate": 1.9210062879420973e-07,
"loss": 0.3839429378509521,
"step": 1320
},
{
"epoch": 2.239157894736842,
"grad_norm": 0.12686079740524292,
"learning_rate": 1.8443950361162957e-07,
"loss": 0.4338528156280518,
"step": 1330
},
{
"epoch": 2.2560000000000002,
"grad_norm": 0.12199016660451889,
"learning_rate": 1.7689957809714346e-07,
"loss": 0.39229888916015626,
"step": 1340
},
{
"epoch": 2.272842105263158,
"grad_norm": 0.12029567360877991,
"learning_rate": 1.694837481660525e-07,
"loss": 0.38006880283355715,
"step": 1350
},
{
"epoch": 2.2896842105263158,
"grad_norm": 0.08686309307813644,
"learning_rate": 1.6219486207134313e-07,
"loss": 0.3808159589767456,
"step": 1360
},
{
"epoch": 2.3065263157894735,
"grad_norm": 0.10810462385416031,
"learning_rate": 1.5503571930973785e-07,
"loss": 0.401824426651001,
"step": 1370
},
{
"epoch": 2.3233684210526317,
"grad_norm": 0.10281873494386673,
"learning_rate": 1.480090695464723e-07,
"loss": 0.40149493217468263,
"step": 1380
},
{
"epoch": 2.3402105263157895,
"grad_norm": 0.09503985196352005,
"learning_rate": 1.4111761155920975e-07,
"loss": 0.38567726612091063,
"step": 1390
},
{
"epoch": 2.3570526315789473,
"grad_norm": 0.10420782119035721,
"learning_rate": 1.3436399220150212e-07,
"loss": 0.3759742736816406,
"step": 1400
},
{
"epoch": 2.3738947368421055,
"grad_norm": 0.10681115835905075,
"learning_rate": 1.2775080538619347e-07,
"loss": 0.3913698196411133,
"step": 1410
},
{
"epoch": 2.3907368421052633,
"grad_norm": 0.10323983430862427,
"learning_rate": 1.2128059108915595e-07,
"loss": 0.39077584743499755,
"step": 1420
},
{
"epoch": 2.407578947368421,
"grad_norm": 0.09566064178943634,
"learning_rate": 1.1495583437374263e-07,
"loss": 0.39895172119140626,
"step": 1430
},
{
"epoch": 2.424421052631579,
"grad_norm": 0.13018426299095154,
"learning_rate": 1.0877896443633117e-07,
"loss": 0.38982129096984863,
"step": 1440
},
{
"epoch": 2.441263157894737,
"grad_norm": 0.10760781168937683,
"learning_rate": 1.0275235367332347e-07,
"loss": 0.3756714344024658,
"step": 1450
},
{
"epoch": 2.458105263157895,
"grad_norm": 0.11606904864311218,
"learning_rate": 9.687831676996238e-08,
"loss": 0.37858171463012696,
"step": 1460
},
{
"epoch": 2.4749473684210526,
"grad_norm": 0.12957172095775604,
"learning_rate": 9.115910981131336e-08,
"loss": 0.40050196647644043,
"step": 1470
},
{
"epoch": 2.4917894736842103,
"grad_norm": 0.11186131089925766,
"learning_rate": 8.559692941575231e-08,
"loss": 0.3684133291244507,
"step": 1480
},
{
"epoch": 2.5086315789473685,
"grad_norm": 0.13279542326927185,
"learning_rate": 8.019391189129466e-08,
"loss": 0.3452518224716187,
"step": 1490
},
{
"epoch": 2.5254736842105263,
"grad_norm": 0.09041756391525269,
"learning_rate": 7.495213241508786e-08,
"loss": 0.36301617622375487,
"step": 1500
},
{
"epoch": 2.542315789473684,
"grad_norm": 0.10033190995454788,
"learning_rate": 6.987360423638205e-08,
"loss": 0.3706004858016968,
"step": 1510
},
{
"epoch": 2.559157894736842,
"grad_norm": 0.10681814700365067,
"learning_rate": 6.49602779032865e-08,
"loss": 0.36011199951171874,
"step": 1520
},
{
"epoch": 2.576,
"grad_norm": 0.1008416935801506,
"learning_rate": 6.02140405136089e-08,
"loss": 0.37473766803741454,
"step": 1530
},
{
"epoch": 2.592842105263158,
"grad_norm": 0.11559010297060013,
"learning_rate": 5.5636714990062393e-08,
"loss": 0.39232525825500486,
"step": 1540
},
{
"epoch": 2.609684210526316,
"grad_norm": 0.10601615905761719,
"learning_rate": 5.1230059380123034e-08,
"loss": 0.34370343685150145,
"step": 1550
},
{
"epoch": 2.626526315789474,
"grad_norm": 0.11516924202442169,
"learning_rate": 4.699576618080331e-08,
"loss": 0.39509878158569334,
"step": 1560
},
{
"epoch": 2.6433684210526316,
"grad_norm": 0.11444627493619919,
"learning_rate": 4.293546168860163e-08,
"loss": 0.3881126165390015,
"step": 1570
},
{
"epoch": 2.6602105263157894,
"grad_norm": 0.09985339641571045,
"learning_rate": 3.9050705374879086e-08,
"loss": 0.34624040126800537,
"step": 1580
},
{
"epoch": 2.677052631578947,
"grad_norm": 0.10439962148666382,
"learning_rate": 3.534298928690166e-08,
"loss": 0.35141232013702395,
"step": 1590
},
{
"epoch": 2.6938947368421053,
"grad_norm": 0.12180087715387344,
"learning_rate": 3.181373747477822e-08,
"loss": 0.39980330467224123,
"step": 1600
}
],
"logging_steps": 10,
"max_steps": 1782,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.925778724822016e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}