| { | |
| "best_global_step": 2600, | |
| "best_metric": 0.4455747, | |
| "best_model_checkpoint": "/root/ms-swift/output_1/v4-20250825-221955/checkpoint-2600", | |
| "epoch": 3.0, | |
| "eval_steps": 50, | |
| "global_step": 3921, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007651109410864575, | |
| "grad_norm": 15.001960754394531, | |
| "learning_rate": 5.076142131979695e-07, | |
| "loss": 1.2726802825927734, | |
| "step": 1, | |
| "token_acc": 0.6764705777168274 | |
| }, | |
| { | |
| "epoch": 0.0038255547054322878, | |
| "grad_norm": 13.028708457946777, | |
| "learning_rate": 2.5380710659898476e-06, | |
| "loss": 1.495189905166626, | |
| "step": 5, | |
| "token_acc": 0.6392497420310974 | |
| }, | |
| { | |
| "epoch": 0.0076511094108645756, | |
| "grad_norm": 5.605969429016113, | |
| "learning_rate": 5.076142131979695e-06, | |
| "loss": 1.1087797164916993, | |
| "step": 10, | |
| "token_acc": 0.7032846808433533 | |
| }, | |
| { | |
| "epoch": 0.011476664116296864, | |
| "grad_norm": 4.179737091064453, | |
| "learning_rate": 7.614213197969544e-06, | |
| "loss": 0.7857523918151855, | |
| "step": 15, | |
| "token_acc": 0.7791855931282043 | |
| }, | |
| { | |
| "epoch": 0.015302218821729151, | |
| "grad_norm": 4.184815883636475, | |
| "learning_rate": 1.015228426395939e-05, | |
| "loss": 0.6412610054016114, | |
| "step": 20, | |
| "token_acc": 0.8024289011955261 | |
| }, | |
| { | |
| "epoch": 0.019127773527161437, | |
| "grad_norm": 3.188452959060669, | |
| "learning_rate": 1.2690355329949238e-05, | |
| "loss": 0.6599317073822022, | |
| "step": 25, | |
| "token_acc": 0.7991740703582764 | |
| }, | |
| { | |
| "epoch": 0.022953328232593728, | |
| "grad_norm": 2.735691785812378, | |
| "learning_rate": 1.5228426395939088e-05, | |
| "loss": 0.6142410278320313, | |
| "step": 30, | |
| "token_acc": 0.8127740025520325 | |
| }, | |
| { | |
| "epoch": 0.026778882938026015, | |
| "grad_norm": 2.9147984981536865, | |
| "learning_rate": 1.7766497461928935e-05, | |
| "loss": 0.6038710117340088, | |
| "step": 35, | |
| "token_acc": 0.813315212726593 | |
| }, | |
| { | |
| "epoch": 0.030604437643458302, | |
| "grad_norm": 2.701826572418213, | |
| "learning_rate": 2.030456852791878e-05, | |
| "loss": 0.5683969497680664, | |
| "step": 40, | |
| "token_acc": 0.8215563893318176 | |
| }, | |
| { | |
| "epoch": 0.03442999234889059, | |
| "grad_norm": 2.8082520961761475, | |
| "learning_rate": 2.284263959390863e-05, | |
| "loss": 0.6069915771484375, | |
| "step": 45, | |
| "token_acc": 0.8085312843322754 | |
| }, | |
| { | |
| "epoch": 0.03825554705432287, | |
| "grad_norm": 2.6436877250671387, | |
| "learning_rate": 2.5380710659898476e-05, | |
| "loss": 0.5704009056091308, | |
| "step": 50, | |
| "token_acc": 0.8219647407531738 | |
| }, | |
| { | |
| "epoch": 0.03825554705432287, | |
| "eval_loss": 0.5656692981719971, | |
| "eval_runtime": 6.1089, | |
| "eval_samples_per_second": 17.024, | |
| "eval_steps_per_second": 2.128, | |
| "eval_token_acc": 0.8207153677940369, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.042081101759755164, | |
| "grad_norm": 2.689117670059204, | |
| "learning_rate": 2.7918781725888326e-05, | |
| "loss": 0.575815486907959, | |
| "step": 55, | |
| "token_acc": 0.8211867213249207 | |
| }, | |
| { | |
| "epoch": 0.045906656465187455, | |
| "grad_norm": 2.2790122032165527, | |
| "learning_rate": 3.0456852791878175e-05, | |
| "loss": 0.5862385749816894, | |
| "step": 60, | |
| "token_acc": 0.8205827474594116 | |
| }, | |
| { | |
| "epoch": 0.04973221117061974, | |
| "grad_norm": 2.6730895042419434, | |
| "learning_rate": 3.299492385786802e-05, | |
| "loss": 0.5797908782958985, | |
| "step": 65, | |
| "token_acc": 0.819099485874176 | |
| }, | |
| { | |
| "epoch": 0.05355776587605203, | |
| "grad_norm": 2.4526894092559814, | |
| "learning_rate": 3.553299492385787e-05, | |
| "loss": 0.6487821102142334, | |
| "step": 70, | |
| "token_acc": 0.7994943857192993 | |
| }, | |
| { | |
| "epoch": 0.057383320581484314, | |
| "grad_norm": 2.265002489089966, | |
| "learning_rate": 3.8071065989847716e-05, | |
| "loss": 0.6046820640563965, | |
| "step": 75, | |
| "token_acc": 0.8156428933143616 | |
| }, | |
| { | |
| "epoch": 0.061208875286916604, | |
| "grad_norm": 2.5733046531677246, | |
| "learning_rate": 4.060913705583756e-05, | |
| "loss": 0.5806538581848144, | |
| "step": 80, | |
| "token_acc": 0.8199408054351807 | |
| }, | |
| { | |
| "epoch": 0.06503442999234889, | |
| "grad_norm": 2.3223984241485596, | |
| "learning_rate": 4.3147208121827415e-05, | |
| "loss": 0.6687778949737548, | |
| "step": 85, | |
| "token_acc": 0.7976916432380676 | |
| }, | |
| { | |
| "epoch": 0.06885998469778118, | |
| "grad_norm": 1.9996718168258667, | |
| "learning_rate": 4.568527918781726e-05, | |
| "loss": 0.5714664459228516, | |
| "step": 90, | |
| "token_acc": 0.8250343203544617 | |
| }, | |
| { | |
| "epoch": 0.07268553940321347, | |
| "grad_norm": 2.2907140254974365, | |
| "learning_rate": 4.822335025380711e-05, | |
| "loss": 0.6378528118133545, | |
| "step": 95, | |
| "token_acc": 0.8057200312614441 | |
| }, | |
| { | |
| "epoch": 0.07651109410864575, | |
| "grad_norm": 1.9822206497192383, | |
| "learning_rate": 5.076142131979695e-05, | |
| "loss": 0.6435206413269043, | |
| "step": 100, | |
| "token_acc": 0.8065351843833923 | |
| }, | |
| { | |
| "epoch": 0.07651109410864575, | |
| "eval_loss": 0.6021918654441833, | |
| "eval_runtime": 6.7812, | |
| "eval_samples_per_second": 15.337, | |
| "eval_steps_per_second": 1.917, | |
| "eval_token_acc": 0.814389705657959, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08033664881407804, | |
| "grad_norm": 1.8460628986358643, | |
| "learning_rate": 5.329949238578681e-05, | |
| "loss": 0.6554917335510254, | |
| "step": 105, | |
| "token_acc": 0.8218502998352051 | |
| }, | |
| { | |
| "epoch": 0.08416220351951033, | |
| "grad_norm": 2.0430757999420166, | |
| "learning_rate": 5.583756345177665e-05, | |
| "loss": 0.7082652091979981, | |
| "step": 110, | |
| "token_acc": 0.7900523543357849 | |
| }, | |
| { | |
| "epoch": 0.08798775822494262, | |
| "grad_norm": 2.1763596534729004, | |
| "learning_rate": 5.83756345177665e-05, | |
| "loss": 0.6629996299743652, | |
| "step": 115, | |
| "token_acc": 0.7997561097145081 | |
| }, | |
| { | |
| "epoch": 0.09181331293037491, | |
| "grad_norm": 1.8452140092849731, | |
| "learning_rate": 6.091370558375635e-05, | |
| "loss": 0.6425168991088868, | |
| "step": 120, | |
| "token_acc": 0.8068760633468628 | |
| }, | |
| { | |
| "epoch": 0.09563886763580719, | |
| "grad_norm": 2.0671913623809814, | |
| "learning_rate": 6.34517766497462e-05, | |
| "loss": 0.6626197814941406, | |
| "step": 125, | |
| "token_acc": 0.8050779700279236 | |
| }, | |
| { | |
| "epoch": 0.09946442234123948, | |
| "grad_norm": 1.9707857370376587, | |
| "learning_rate": 6.598984771573604e-05, | |
| "loss": 0.6357526779174805, | |
| "step": 130, | |
| "token_acc": 0.8117111921310425 | |
| }, | |
| { | |
| "epoch": 0.10328997704667177, | |
| "grad_norm": 1.684924840927124, | |
| "learning_rate": 6.852791878172589e-05, | |
| "loss": 0.6633370399475098, | |
| "step": 135, | |
| "token_acc": 0.8078529834747314 | |
| }, | |
| { | |
| "epoch": 0.10711553175210406, | |
| "grad_norm": 1.8460227251052856, | |
| "learning_rate": 7.106598984771574e-05, | |
| "loss": 0.7214941501617431, | |
| "step": 140, | |
| "token_acc": 0.7888500690460205 | |
| }, | |
| { | |
| "epoch": 0.11094108645753634, | |
| "grad_norm": 1.8344098329544067, | |
| "learning_rate": 7.360406091370558e-05, | |
| "loss": 0.7153414249420166, | |
| "step": 145, | |
| "token_acc": 0.7917036414146423 | |
| }, | |
| { | |
| "epoch": 0.11476664116296863, | |
| "grad_norm": 2.0649237632751465, | |
| "learning_rate": 7.614213197969543e-05, | |
| "loss": 0.8018023490905761, | |
| "step": 150, | |
| "token_acc": 0.7870769500732422 | |
| }, | |
| { | |
| "epoch": 0.11476664116296863, | |
| "eval_loss": 0.6869359612464905, | |
| "eval_runtime": 7.176, | |
| "eval_samples_per_second": 14.493, | |
| "eval_steps_per_second": 1.812, | |
| "eval_token_acc": 0.8004150390625, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.11859219586840092, | |
| "grad_norm": 2.0781986713409424, | |
| "learning_rate": 7.868020304568529e-05, | |
| "loss": 0.7426050186157227, | |
| "step": 155, | |
| "token_acc": 0.784966230392456 | |
| }, | |
| { | |
| "epoch": 0.12241775057383321, | |
| "grad_norm": 3.169353485107422, | |
| "learning_rate": 8.121827411167512e-05, | |
| "loss": 0.6967845916748047, | |
| "step": 160, | |
| "token_acc": 0.799592137336731 | |
| }, | |
| { | |
| "epoch": 0.1262433052792655, | |
| "grad_norm": 2.8000311851501465, | |
| "learning_rate": 8.375634517766498e-05, | |
| "loss": 0.6940568923950196, | |
| "step": 165, | |
| "token_acc": 0.7990803718566895 | |
| }, | |
| { | |
| "epoch": 0.13006885998469778, | |
| "grad_norm": 1.7199612855911255, | |
| "learning_rate": 8.629441624365483e-05, | |
| "loss": 0.6588430404663086, | |
| "step": 170, | |
| "token_acc": 0.8097391724586487 | |
| }, | |
| { | |
| "epoch": 0.13389441469013008, | |
| "grad_norm": 1.6225758790969849, | |
| "learning_rate": 8.883248730964467e-05, | |
| "loss": 0.7546923160552979, | |
| "step": 175, | |
| "token_acc": 0.7823401093482971 | |
| }, | |
| { | |
| "epoch": 0.13771996939556236, | |
| "grad_norm": 1.738344430923462, | |
| "learning_rate": 9.137055837563452e-05, | |
| "loss": 0.6869890213012695, | |
| "step": 180, | |
| "token_acc": 0.8029044270515442 | |
| }, | |
| { | |
| "epoch": 0.14154552410099464, | |
| "grad_norm": 1.7446883916854858, | |
| "learning_rate": 9.390862944162437e-05, | |
| "loss": 0.744170093536377, | |
| "step": 185, | |
| "token_acc": 0.7861586213111877 | |
| }, | |
| { | |
| "epoch": 0.14537107880642694, | |
| "grad_norm": 1.5875240564346313, | |
| "learning_rate": 9.644670050761421e-05, | |
| "loss": 0.6316198348999024, | |
| "step": 190, | |
| "token_acc": 0.8180323839187622 | |
| }, | |
| { | |
| "epoch": 0.14919663351185922, | |
| "grad_norm": 1.83012855052948, | |
| "learning_rate": 9.898477157360407e-05, | |
| "loss": 1.0572455406188965, | |
| "step": 195, | |
| "token_acc": 0.7630072236061096 | |
| }, | |
| { | |
| "epoch": 0.1530221882172915, | |
| "grad_norm": 9.883597373962402, | |
| "learning_rate": 9.99998398736932e-05, | |
| "loss": 0.703323221206665, | |
| "step": 200, | |
| "token_acc": 0.8030744194984436 | |
| }, | |
| { | |
| "epoch": 0.1530221882172915, | |
| "eval_loss": 0.7220072150230408, | |
| "eval_runtime": 7.3149, | |
| "eval_samples_per_second": 14.218, | |
| "eval_steps_per_second": 1.777, | |
| "eval_token_acc": 0.7940492630004883, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1568477429227238, | |
| "grad_norm": 1.4011379480361938, | |
| "learning_rate": 9.999886132775469e-05, | |
| "loss": 0.7197819232940674, | |
| "step": 205, | |
| "token_acc": 0.7953398823738098 | |
| }, | |
| { | |
| "epoch": 0.16067329762815608, | |
| "grad_norm": 1.5504759550094604, | |
| "learning_rate": 9.999699321232598e-05, | |
| "loss": 0.6872771263122559, | |
| "step": 210, | |
| "token_acc": 0.804167628288269 | |
| }, | |
| { | |
| "epoch": 0.16449885233358838, | |
| "grad_norm": 2.0014920234680176, | |
| "learning_rate": 9.999423556064422e-05, | |
| "loss": 0.6684097290039063, | |
| "step": 215, | |
| "token_acc": 0.8079100847244263 | |
| }, | |
| { | |
| "epoch": 0.16832440703902066, | |
| "grad_norm": 1.3064231872558594, | |
| "learning_rate": 9.999058842177297e-05, | |
| "loss": 0.747900390625, | |
| "step": 220, | |
| "token_acc": 0.7928001880645752 | |
| }, | |
| { | |
| "epoch": 0.17214996174445293, | |
| "grad_norm": 1.6330523490905762, | |
| "learning_rate": 9.998605186060137e-05, | |
| "loss": 0.715455961227417, | |
| "step": 225, | |
| "token_acc": 0.7988653779029846 | |
| }, | |
| { | |
| "epoch": 0.17597551644988524, | |
| "grad_norm": 1.6291477680206299, | |
| "learning_rate": 9.9980625957843e-05, | |
| "loss": 0.792291784286499, | |
| "step": 230, | |
| "token_acc": 0.7906692624092102 | |
| }, | |
| { | |
| "epoch": 0.17980107115531752, | |
| "grad_norm": 1.3224996328353882, | |
| "learning_rate": 9.99743108100344e-05, | |
| "loss": 0.6187815189361572, | |
| "step": 235, | |
| "token_acc": 0.8209345936775208 | |
| }, | |
| { | |
| "epoch": 0.18362662586074982, | |
| "grad_norm": 1.3888137340545654, | |
| "learning_rate": 9.996710652953338e-05, | |
| "loss": 0.7097324371337891, | |
| "step": 240, | |
| "token_acc": 0.8024294376373291 | |
| }, | |
| { | |
| "epoch": 0.1874521805661821, | |
| "grad_norm": 1.340208649635315, | |
| "learning_rate": 9.995901324451704e-05, | |
| "loss": 0.7415911674499511, | |
| "step": 245, | |
| "token_acc": 0.7968400716781616 | |
| }, | |
| { | |
| "epoch": 0.19127773527161437, | |
| "grad_norm": 1.1856446266174316, | |
| "learning_rate": 9.995003109897942e-05, | |
| "loss": 0.7001552581787109, | |
| "step": 250, | |
| "token_acc": 0.8009890913963318 | |
| }, | |
| { | |
| "epoch": 0.19127773527161437, | |
| "eval_loss": 0.6857067942619324, | |
| "eval_runtime": 7.3358, | |
| "eval_samples_per_second": 14.177, | |
| "eval_steps_per_second": 1.772, | |
| "eval_token_acc": 0.803743302822113, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.19510328997704668, | |
| "grad_norm": 1.2998038530349731, | |
| "learning_rate": 9.994016025272905e-05, | |
| "loss": 0.6838603019714355, | |
| "step": 255, | |
| "token_acc": 0.8089724779129028 | |
| }, | |
| { | |
| "epoch": 0.19892884468247896, | |
| "grad_norm": 1.449840784072876, | |
| "learning_rate": 9.992940088138597e-05, | |
| "loss": 0.6695821762084961, | |
| "step": 260, | |
| "token_acc": 0.8115434646606445 | |
| }, | |
| { | |
| "epoch": 0.20275439938791126, | |
| "grad_norm": 2.188504219055176, | |
| "learning_rate": 9.991775317637873e-05, | |
| "loss": 0.7405529499053956, | |
| "step": 265, | |
| "token_acc": 0.7956330180168152 | |
| }, | |
| { | |
| "epoch": 0.20657995409334354, | |
| "grad_norm": 1.2301571369171143, | |
| "learning_rate": 9.99052173449409e-05, | |
| "loss": 0.7626109600067139, | |
| "step": 270, | |
| "token_acc": 0.7877880334854126 | |
| }, | |
| { | |
| "epoch": 0.21040550879877581, | |
| "grad_norm": 1.217523455619812, | |
| "learning_rate": 9.989179361010741e-05, | |
| "loss": 0.7369673728942872, | |
| "step": 275, | |
| "token_acc": 0.7953155040740967 | |
| }, | |
| { | |
| "epoch": 0.21423106350420812, | |
| "grad_norm": 1.3204615116119385, | |
| "learning_rate": 9.987748221071062e-05, | |
| "loss": 0.6772171497344971, | |
| "step": 280, | |
| "token_acc": 0.8045340180397034 | |
| }, | |
| { | |
| "epoch": 0.2180566182096404, | |
| "grad_norm": 1.3093225955963135, | |
| "learning_rate": 9.9862283401376e-05, | |
| "loss": 0.904904556274414, | |
| "step": 285, | |
| "token_acc": 0.7854760885238647 | |
| }, | |
| { | |
| "epoch": 0.22188217291507267, | |
| "grad_norm": 1.4255338907241821, | |
| "learning_rate": 9.984619745251767e-05, | |
| "loss": 0.669553565979004, | |
| "step": 290, | |
| "token_acc": 0.8050349354743958 | |
| }, | |
| { | |
| "epoch": 0.22570772762050498, | |
| "grad_norm": 1.4884202480316162, | |
| "learning_rate": 9.98292246503335e-05, | |
| "loss": 0.7445178508758545, | |
| "step": 295, | |
| "token_acc": 0.8016032576560974 | |
| }, | |
| { | |
| "epoch": 0.22953328232593725, | |
| "grad_norm": 1.3081945180892944, | |
| "learning_rate": 9.981136529680013e-05, | |
| "loss": 0.6435537815093995, | |
| "step": 300, | |
| "token_acc": 0.8145782947540283 | |
| }, | |
| { | |
| "epoch": 0.22953328232593725, | |
| "eval_loss": 0.6707971096038818, | |
| "eval_runtime": 7.6759, | |
| "eval_samples_per_second": 13.549, | |
| "eval_steps_per_second": 1.694, | |
| "eval_token_acc": 0.809266984462738, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23335883703136956, | |
| "grad_norm": 1.2945371866226196, | |
| "learning_rate": 9.979261970966752e-05, | |
| "loss": 0.671229362487793, | |
| "step": 305, | |
| "token_acc": 0.8093103170394897 | |
| }, | |
| { | |
| "epoch": 0.23718439173680184, | |
| "grad_norm": 1.094642996788025, | |
| "learning_rate": 9.97729882224533e-05, | |
| "loss": 0.638882064819336, | |
| "step": 310, | |
| "token_acc": 0.8210087418556213 | |
| }, | |
| { | |
| "epoch": 0.2410099464422341, | |
| "grad_norm": 1.2039848566055298, | |
| "learning_rate": 9.975247118443686e-05, | |
| "loss": 0.7105097770690918, | |
| "step": 315, | |
| "token_acc": 0.79979407787323 | |
| }, | |
| { | |
| "epoch": 0.24483550114766642, | |
| "grad_norm": 9.3181734085083, | |
| "learning_rate": 9.973106896065318e-05, | |
| "loss": 0.7334442615509034, | |
| "step": 320, | |
| "token_acc": 0.8001999855041504 | |
| }, | |
| { | |
| "epoch": 0.2486610558530987, | |
| "grad_norm": 1.2156879901885986, | |
| "learning_rate": 9.970878193188617e-05, | |
| "loss": 0.6516756534576416, | |
| "step": 325, | |
| "token_acc": 0.8167580366134644 | |
| }, | |
| { | |
| "epoch": 0.252486610558531, | |
| "grad_norm": 1.382604956626892, | |
| "learning_rate": 9.968561049466214e-05, | |
| "loss": 0.7214525222778321, | |
| "step": 330, | |
| "token_acc": 0.7979754209518433 | |
| }, | |
| { | |
| "epoch": 0.2563121652639633, | |
| "grad_norm": 1.0208624601364136, | |
| "learning_rate": 9.96615550612425e-05, | |
| "loss": 0.6243480205535888, | |
| "step": 335, | |
| "token_acc": 0.822067379951477 | |
| }, | |
| { | |
| "epoch": 0.26013771996939555, | |
| "grad_norm": 1.2273170948028564, | |
| "learning_rate": 9.96366160596166e-05, | |
| "loss": 0.7538263320922851, | |
| "step": 340, | |
| "token_acc": 0.7931398749351501 | |
| }, | |
| { | |
| "epoch": 0.26396327467482783, | |
| "grad_norm": 1.005936622619629, | |
| "learning_rate": 9.961079393349408e-05, | |
| "loss": 0.6441500663757325, | |
| "step": 345, | |
| "token_acc": 0.8183194398880005 | |
| }, | |
| { | |
| "epoch": 0.26778882938026016, | |
| "grad_norm": 1.2466620206832886, | |
| "learning_rate": 9.958408914229687e-05, | |
| "loss": 0.7031271934509278, | |
| "step": 350, | |
| "token_acc": 0.8006601929664612 | |
| }, | |
| { | |
| "epoch": 0.26778882938026016, | |
| "eval_loss": 0.6655329465866089, | |
| "eval_runtime": 8.6572, | |
| "eval_samples_per_second": 12.013, | |
| "eval_steps_per_second": 1.502, | |
| "eval_token_acc": 0.810479998588562, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.27161438408569244, | |
| "grad_norm": 1.1055852174758911, | |
| "learning_rate": 9.955650216115118e-05, | |
| "loss": 0.7128757953643798, | |
| "step": 355, | |
| "token_acc": 0.8017191886901855 | |
| }, | |
| { | |
| "epoch": 0.2754399387911247, | |
| "grad_norm": 0.9971266388893127, | |
| "learning_rate": 9.952803348087888e-05, | |
| "loss": 0.6931791305541992, | |
| "step": 360, | |
| "token_acc": 0.8039373159408569 | |
| }, | |
| { | |
| "epoch": 0.279265493496557, | |
| "grad_norm": 1.3013373613357544, | |
| "learning_rate": 9.949868360798893e-05, | |
| "loss": 0.6467844486236572, | |
| "step": 365, | |
| "token_acc": 0.8141829371452332 | |
| }, | |
| { | |
| "epoch": 0.28309104820198927, | |
| "grad_norm": 1.1281312704086304, | |
| "learning_rate": 9.946845306466822e-05, | |
| "loss": 0.6698862075805664, | |
| "step": 370, | |
| "token_acc": 0.8099541664123535 | |
| }, | |
| { | |
| "epoch": 0.2869166029074216, | |
| "grad_norm": 1.0093694925308228, | |
| "learning_rate": 9.943734238877241e-05, | |
| "loss": 0.640196704864502, | |
| "step": 375, | |
| "token_acc": 0.8200778961181641 | |
| }, | |
| { | |
| "epoch": 0.2907421576128539, | |
| "grad_norm": 1.161116361618042, | |
| "learning_rate": 9.940535213381623e-05, | |
| "loss": 0.7982209682464599, | |
| "step": 380, | |
| "token_acc": 0.803227961063385 | |
| }, | |
| { | |
| "epoch": 0.29456771231828616, | |
| "grad_norm": 1.17842435836792, | |
| "learning_rate": 9.937248286896376e-05, | |
| "loss": 0.674342155456543, | |
| "step": 385, | |
| "token_acc": 0.8081824779510498 | |
| }, | |
| { | |
| "epoch": 0.29839326702371843, | |
| "grad_norm": 1.2346426248550415, | |
| "learning_rate": 9.933873517901825e-05, | |
| "loss": 0.6990632057189942, | |
| "step": 390, | |
| "token_acc": 0.8067554235458374 | |
| }, | |
| { | |
| "epoch": 0.3022188217291507, | |
| "grad_norm": 1.1731232404708862, | |
| "learning_rate": 9.930410966441164e-05, | |
| "loss": 0.7052478790283203, | |
| "step": 395, | |
| "token_acc": 0.8015207052230835 | |
| }, | |
| { | |
| "epoch": 0.306044376434583, | |
| "grad_norm": 1.1818660497665405, | |
| "learning_rate": 9.926860694119398e-05, | |
| "loss": 0.6852362632751465, | |
| "step": 400, | |
| "token_acc": 0.8096556663513184 | |
| }, | |
| { | |
| "epoch": 0.306044376434583, | |
| "eval_loss": 0.6521208882331848, | |
| "eval_runtime": 7.4215, | |
| "eval_samples_per_second": 14.013, | |
| "eval_steps_per_second": 1.752, | |
| "eval_token_acc": 0.8145099878311157, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3098699311400153, | |
| "grad_norm": 1.166639804840088, | |
| "learning_rate": 9.923222764102248e-05, | |
| "loss": 0.6215761661529541, | |
| "step": 405, | |
| "token_acc": 0.8188217282295227 | |
| }, | |
| { | |
| "epoch": 0.3136954858454476, | |
| "grad_norm": 1.0579371452331543, | |
| "learning_rate": 9.919497241115016e-05, | |
| "loss": 0.6619209289550781, | |
| "step": 410, | |
| "token_acc": 0.8130149841308594 | |
| }, | |
| { | |
| "epoch": 0.3175210405508799, | |
| "grad_norm": 1.025505542755127, | |
| "learning_rate": 9.915684191441446e-05, | |
| "loss": 0.681110954284668, | |
| "step": 415, | |
| "token_acc": 0.8061873316764832 | |
| }, | |
| { | |
| "epoch": 0.32134659525631215, | |
| "grad_norm": 1.1900734901428223, | |
| "learning_rate": 9.911783682922533e-05, | |
| "loss": 0.6414823532104492, | |
| "step": 420, | |
| "token_acc": 0.8169435262680054 | |
| }, | |
| { | |
| "epoch": 0.32517214996174443, | |
| "grad_norm": 1.0435925722122192, | |
| "learning_rate": 9.907795784955327e-05, | |
| "loss": 0.650167179107666, | |
| "step": 425, | |
| "token_acc": 0.8135402202606201 | |
| }, | |
| { | |
| "epoch": 0.32899770466717676, | |
| "grad_norm": 0.9976479411125183, | |
| "learning_rate": 9.90372056849169e-05, | |
| "loss": 0.6622737884521485, | |
| "step": 430, | |
| "token_acc": 0.8130133152008057 | |
| }, | |
| { | |
| "epoch": 0.33282325937260904, | |
| "grad_norm": 1.025640606880188, | |
| "learning_rate": 9.899558106037039e-05, | |
| "loss": 0.7082881927490234, | |
| "step": 435, | |
| "token_acc": 0.8012630343437195 | |
| }, | |
| { | |
| "epoch": 0.3366488140780413, | |
| "grad_norm": 1.1692794561386108, | |
| "learning_rate": 9.895308471649052e-05, | |
| "loss": 0.7149417877197266, | |
| "step": 440, | |
| "token_acc": 0.8121411204338074 | |
| }, | |
| { | |
| "epoch": 0.3404743687834736, | |
| "grad_norm": 1.0781068801879883, | |
| "learning_rate": 9.890971740936352e-05, | |
| "loss": 0.6460227012634278, | |
| "step": 445, | |
| "token_acc": 0.8171982169151306 | |
| }, | |
| { | |
| "epoch": 0.34429992348890587, | |
| "grad_norm": 1.7874302864074707, | |
| "learning_rate": 9.886547991057162e-05, | |
| "loss": 0.6831697463989258, | |
| "step": 450, | |
| "token_acc": 0.8117350339889526 | |
| }, | |
| { | |
| "epoch": 0.34429992348890587, | |
| "eval_loss": 0.6621751189231873, | |
| "eval_runtime": 7.4514, | |
| "eval_samples_per_second": 13.957, | |
| "eval_steps_per_second": 1.745, | |
| "eval_token_acc": 0.8132668733596802, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3481254781943382, | |
| "grad_norm": 1.1658034324645996, | |
| "learning_rate": 9.882037300717936e-05, | |
| "loss": 0.6283795356750488, | |
| "step": 455, | |
| "token_acc": 0.8232808709144592 | |
| }, | |
| { | |
| "epoch": 0.3519510328997705, | |
| "grad_norm": 0.8861122727394104, | |
| "learning_rate": 9.87743975017195e-05, | |
| "loss": 0.5845287322998047, | |
| "step": 460, | |
| "token_acc": 0.8338332176208496 | |
| }, | |
| { | |
| "epoch": 0.35577658760520275, | |
| "grad_norm": 1.1082383394241333, | |
| "learning_rate": 9.872755421217881e-05, | |
| "loss": 0.7373793125152588, | |
| "step": 465, | |
| "token_acc": 0.7927750945091248 | |
| }, | |
| { | |
| "epoch": 0.35960214231063503, | |
| "grad_norm": 0.9668710827827454, | |
| "learning_rate": 9.867984397198348e-05, | |
| "loss": 0.6381460189819336, | |
| "step": 470, | |
| "token_acc": 0.8192023038864136 | |
| }, | |
| { | |
| "epoch": 0.3634276970160673, | |
| "grad_norm": 1.0808384418487549, | |
| "learning_rate": 9.863126762998436e-05, | |
| "loss": 0.7160910606384278, | |
| "step": 475, | |
| "token_acc": 0.8008524179458618 | |
| }, | |
| { | |
| "epoch": 0.36725325172149964, | |
| "grad_norm": 1.0136635303497314, | |
| "learning_rate": 9.858182605044172e-05, | |
| "loss": 0.6220456123352051, | |
| "step": 480, | |
| "token_acc": 0.8248037099838257 | |
| }, | |
| { | |
| "epoch": 0.3710788064269319, | |
| "grad_norm": 1.2998031377792358, | |
| "learning_rate": 9.853152011301003e-05, | |
| "loss": 0.6555353164672851, | |
| "step": 485, | |
| "token_acc": 0.8161742687225342 | |
| }, | |
| { | |
| "epoch": 0.3749043611323642, | |
| "grad_norm": 1.0749304294586182, | |
| "learning_rate": 9.848035071272222e-05, | |
| "loss": 0.6211759567260742, | |
| "step": 490, | |
| "token_acc": 0.821867048740387 | |
| }, | |
| { | |
| "epoch": 0.37872991583779647, | |
| "grad_norm": 0.9710472226142883, | |
| "learning_rate": 9.842831875997375e-05, | |
| "loss": 0.6431370735168457, | |
| "step": 495, | |
| "token_acc": 0.8220862150192261 | |
| }, | |
| { | |
| "epoch": 0.38255547054322875, | |
| "grad_norm": 1.0042985677719116, | |
| "learning_rate": 9.837542518050649e-05, | |
| "loss": 0.6818212509155274, | |
| "step": 500, | |
| "token_acc": 0.8100237846374512 | |
| }, | |
| { | |
| "epoch": 0.38255547054322875, | |
| "eval_loss": 0.6374099254608154, | |
| "eval_runtime": 7.6101, | |
| "eval_samples_per_second": 13.666, | |
| "eval_steps_per_second": 1.708, | |
| "eval_token_acc": 0.8186302185058594, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3863810252486611, | |
| "grad_norm": 1.0197993516921997, | |
| "learning_rate": 9.832167091539214e-05, | |
| "loss": 0.6007397174835205, | |
| "step": 505, | |
| "token_acc": 0.8282684683799744 | |
| }, | |
| { | |
| "epoch": 0.39020657995409336, | |
| "grad_norm": 1.0835719108581543, | |
| "learning_rate": 9.826705692101555e-05, | |
| "loss": 0.7205737113952637, | |
| "step": 510, | |
| "token_acc": 0.7967984080314636 | |
| }, | |
| { | |
| "epoch": 0.39403213465952563, | |
| "grad_norm": 0.9672032594680786, | |
| "learning_rate": 9.821158416905773e-05, | |
| "loss": 0.6137794494628906, | |
| "step": 515, | |
| "token_acc": 0.8238478899002075 | |
| }, | |
| { | |
| "epoch": 0.3978576893649579, | |
| "grad_norm": 1.0274014472961426, | |
| "learning_rate": 9.815525364647853e-05, | |
| "loss": 0.6839157104492187, | |
| "step": 520, | |
| "token_acc": 0.8090466856956482 | |
| }, | |
| { | |
| "epoch": 0.4016832440703902, | |
| "grad_norm": 0.966098427772522, | |
| "learning_rate": 9.809806635549901e-05, | |
| "loss": 0.5641196250915528, | |
| "step": 525, | |
| "token_acc": 0.8359003663063049 | |
| }, | |
| { | |
| "epoch": 0.4055087987758225, | |
| "grad_norm": 1.1138949394226074, | |
| "learning_rate": 9.804002331358377e-05, | |
| "loss": 0.615296745300293, | |
| "step": 530, | |
| "token_acc": 0.8272916674613953 | |
| }, | |
| { | |
| "epoch": 0.4093343534812548, | |
| "grad_norm": 2.4379749298095703, | |
| "learning_rate": 9.798112555342268e-05, | |
| "loss": 0.5940766334533691, | |
| "step": 535, | |
| "token_acc": 0.8358057737350464 | |
| }, | |
| { | |
| "epoch": 0.4131599081866871, | |
| "grad_norm": 1.1517431735992432, | |
| "learning_rate": 9.792137412291265e-05, | |
| "loss": 0.6338438034057617, | |
| "step": 540, | |
| "token_acc": 0.8158274292945862 | |
| }, | |
| { | |
| "epoch": 0.41698546289211935, | |
| "grad_norm": 6.055464744567871, | |
| "learning_rate": 9.786077008513883e-05, | |
| "loss": 0.6075318336486817, | |
| "step": 545, | |
| "token_acc": 0.8209756016731262 | |
| }, | |
| { | |
| "epoch": 0.42081101759755163, | |
| "grad_norm": 0.9165500402450562, | |
| "learning_rate": 9.779931451835589e-05, | |
| "loss": 0.659608793258667, | |
| "step": 550, | |
| "token_acc": 0.815700113773346 | |
| }, | |
| { | |
| "epoch": 0.42081101759755163, | |
| "eval_loss": 0.6386705636978149, | |
| "eval_runtime": 8.1335, | |
| "eval_samples_per_second": 12.787, | |
| "eval_steps_per_second": 1.598, | |
| "eval_token_acc": 0.8192116618156433, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4246365723029839, | |
| "grad_norm": 3.8534209728240967, | |
| "learning_rate": 9.773700851596864e-05, | |
| "loss": 0.689471435546875, | |
| "step": 555, | |
| "token_acc": 0.8077275156974792 | |
| }, | |
| { | |
| "epoch": 0.42846212700841624, | |
| "grad_norm": 1.0717378854751587, | |
| "learning_rate": 9.767385318651272e-05, | |
| "loss": 0.6236325740814209, | |
| "step": 560, | |
| "token_acc": 0.826772928237915 | |
| }, | |
| { | |
| "epoch": 0.4322876817138485, | |
| "grad_norm": 0.9380275011062622, | |
| "learning_rate": 9.760984965363478e-05, | |
| "loss": 0.6055815696716309, | |
| "step": 565, | |
| "token_acc": 0.8277127146720886 | |
| }, | |
| { | |
| "epoch": 0.4361132364192808, | |
| "grad_norm": 0.9301455020904541, | |
| "learning_rate": 9.75449990560726e-05, | |
| "loss": 0.5975317001342774, | |
| "step": 570, | |
| "token_acc": 0.8306687474250793 | |
| }, | |
| { | |
| "epoch": 0.43993879112471307, | |
| "grad_norm": 0.9384899735450745, | |
| "learning_rate": 9.747930254763467e-05, | |
| "loss": 0.631765604019165, | |
| "step": 575, | |
| "token_acc": 0.8169443011283875 | |
| }, | |
| { | |
| "epoch": 0.44376434583014535, | |
| "grad_norm": 0.9002703428268433, | |
| "learning_rate": 9.74127612971798e-05, | |
| "loss": 0.6044256210327148, | |
| "step": 580, | |
| "token_acc": 0.8257142305374146 | |
| }, | |
| { | |
| "epoch": 0.4475899005355777, | |
| "grad_norm": 0.8999844193458557, | |
| "learning_rate": 9.73453764885963e-05, | |
| "loss": 0.6237145900726319, | |
| "step": 585, | |
| "token_acc": 0.8252273797988892 | |
| }, | |
| { | |
| "epoch": 0.45141545524100996, | |
| "grad_norm": 0.9064670205116272, | |
| "learning_rate": 9.727714932078088e-05, | |
| "loss": 0.6549233436584473, | |
| "step": 590, | |
| "token_acc": 0.8153916001319885 | |
| }, | |
| { | |
| "epoch": 0.45524100994644223, | |
| "grad_norm": 1.0747268199920654, | |
| "learning_rate": 9.720808100761729e-05, | |
| "loss": 0.6232728004455567, | |
| "step": 595, | |
| "token_acc": 0.8211687207221985 | |
| }, | |
| { | |
| "epoch": 0.4590665646518745, | |
| "grad_norm": 1.031503438949585, | |
| "learning_rate": 9.713817277795482e-05, | |
| "loss": 0.6111268043518067, | |
| "step": 600, | |
| "token_acc": 0.8248355984687805 | |
| }, | |
| { | |
| "epoch": 0.4590665646518745, | |
| "eval_loss": 0.634019136428833, | |
| "eval_runtime": 7.7263, | |
| "eval_samples_per_second": 13.46, | |
| "eval_steps_per_second": 1.683, | |
| "eval_token_acc": 0.8194121718406677, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4628921193573068, | |
| "grad_norm": 18.878767013549805, | |
| "learning_rate": 9.706742587558635e-05, | |
| "loss": 0.7319217681884765, | |
| "step": 605, | |
| "token_acc": 0.8135314583778381 | |
| }, | |
| { | |
| "epoch": 0.4667176740627391, | |
| "grad_norm": 0.9823316931724548, | |
| "learning_rate": 9.699584155922625e-05, | |
| "loss": 0.658491849899292, | |
| "step": 610, | |
| "token_acc": 0.8164398670196533 | |
| }, | |
| { | |
| "epoch": 0.4705432287681714, | |
| "grad_norm": 1.1845817565917969, | |
| "learning_rate": 9.692342110248802e-05, | |
| "loss": 0.6585088729858398, | |
| "step": 615, | |
| "token_acc": 0.8140710592269897 | |
| }, | |
| { | |
| "epoch": 0.4743687834736037, | |
| "grad_norm": 1.0284193754196167, | |
| "learning_rate": 9.685016579386159e-05, | |
| "loss": 0.6060408592224121, | |
| "step": 620, | |
| "token_acc": 0.8255147933959961 | |
| }, | |
| { | |
| "epoch": 0.47819433817903595, | |
| "grad_norm": 1.0485318899154663, | |
| "learning_rate": 9.677607693669035e-05, | |
| "loss": 0.6855095863342285, | |
| "step": 625, | |
| "token_acc": 0.8098092079162598 | |
| }, | |
| { | |
| "epoch": 0.4820198928844682, | |
| "grad_norm": 2.119432210922241, | |
| "learning_rate": 9.67011558491481e-05, | |
| "loss": 0.6514041423797607, | |
| "step": 630, | |
| "token_acc": 0.8163265585899353 | |
| }, | |
| { | |
| "epoch": 0.48584544758990056, | |
| "grad_norm": 0.9313147664070129, | |
| "learning_rate": 9.662540386421546e-05, | |
| "loss": 0.6687870025634766, | |
| "step": 635, | |
| "token_acc": 0.8119432330131531 | |
| }, | |
| { | |
| "epoch": 0.48967100229533284, | |
| "grad_norm": 0.9492276310920715, | |
| "learning_rate": 9.65488223296562e-05, | |
| "loss": 0.6563722610473632, | |
| "step": 640, | |
| "token_acc": 0.8168354034423828 | |
| }, | |
| { | |
| "epoch": 0.4934965570007651, | |
| "grad_norm": 1.0297837257385254, | |
| "learning_rate": 9.64714126079933e-05, | |
| "loss": 0.5913913726806641, | |
| "step": 645, | |
| "token_acc": 0.828011691570282 | |
| }, | |
| { | |
| "epoch": 0.4973221117061974, | |
| "grad_norm": 1.0799224376678467, | |
| "learning_rate": 9.639317607648463e-05, | |
| "loss": 0.6493720054626465, | |
| "step": 650, | |
| "token_acc": 0.8191680312156677 | |
| }, | |
| { | |
| "epoch": 0.4973221117061974, | |
| "eval_loss": 0.6336340308189392, | |
| "eval_runtime": 8.085, | |
| "eval_samples_per_second": 12.863, | |
| "eval_steps_per_second": 1.608, | |
| "eval_token_acc": 0.8203945755958557, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5011476664116297, | |
| "grad_norm": 0.9438362717628479, | |
| "learning_rate": 9.631411412709856e-05, | |
| "loss": 0.634061050415039, | |
| "step": 655, | |
| "token_acc": 0.8196708559989929 | |
| }, | |
| { | |
| "epoch": 0.504973221117062, | |
| "grad_norm": 0.9886628985404968, | |
| "learning_rate": 9.623422816648905e-05, | |
| "loss": 0.6314868450164794, | |
| "step": 660, | |
| "token_acc": 0.8192417025566101 | |
| }, | |
| { | |
| "epoch": 0.5087987758224942, | |
| "grad_norm": 1.053757667541504, | |
| "learning_rate": 9.615351961597075e-05, | |
| "loss": 0.6161402225494385, | |
| "step": 665, | |
| "token_acc": 0.8249170780181885 | |
| }, | |
| { | |
| "epoch": 0.5126243305279266, | |
| "grad_norm": 0.8857008814811707, | |
| "learning_rate": 9.607198991149365e-05, | |
| "loss": 0.6382771968841553, | |
| "step": 670, | |
| "token_acc": 0.8191618323326111 | |
| }, | |
| { | |
| "epoch": 0.5164498852333589, | |
| "grad_norm": 0.9176872968673706, | |
| "learning_rate": 9.598964050361749e-05, | |
| "loss": 0.6668461799621582, | |
| "step": 675, | |
| "token_acc": 0.8112070560455322 | |
| }, | |
| { | |
| "epoch": 0.5202754399387911, | |
| "grad_norm": 0.8668197393417358, | |
| "learning_rate": 9.590647285748613e-05, | |
| "loss": 0.6178393363952637, | |
| "step": 680, | |
| "token_acc": 0.8246564269065857 | |
| }, | |
| { | |
| "epoch": 0.5241009946442234, | |
| "grad_norm": 0.8694312572479248, | |
| "learning_rate": 9.582248845280121e-05, | |
| "loss": 0.6056000709533691, | |
| "step": 685, | |
| "token_acc": 0.8267983198165894 | |
| }, | |
| { | |
| "epoch": 0.5279265493496557, | |
| "grad_norm": 1.0597003698349, | |
| "learning_rate": 9.57376887837961e-05, | |
| "loss": 0.6181661128997803, | |
| "step": 690, | |
| "token_acc": 0.8232805728912354 | |
| }, | |
| { | |
| "epoch": 0.531752104055088, | |
| "grad_norm": 0.8571362495422363, | |
| "learning_rate": 9.565207535920906e-05, | |
| "loss": 0.6172348976135253, | |
| "step": 695, | |
| "token_acc": 0.8221156597137451 | |
| }, | |
| { | |
| "epoch": 0.5355776587605203, | |
| "grad_norm": 0.9073564410209656, | |
| "learning_rate": 9.556564970225666e-05, | |
| "loss": 0.6466682434082032, | |
| "step": 700, | |
| "token_acc": 0.8197444081306458 | |
| }, | |
| { | |
| "epoch": 0.5355776587605203, | |
| "eval_loss": 0.6152887344360352, | |
| "eval_runtime": 7.5903, | |
| "eval_samples_per_second": 13.702, | |
| "eval_steps_per_second": 1.713, | |
| "eval_token_acc": 0.8228907585144043, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5394032134659525, | |
| "grad_norm": 0.9663663506507874, | |
| "learning_rate": 9.547841335060641e-05, | |
| "loss": 0.6051031112670898, | |
| "step": 705, | |
| "token_acc": 0.8252653479576111 | |
| }, | |
| { | |
| "epoch": 0.5432287681713849, | |
| "grad_norm": 0.9873702526092529, | |
| "learning_rate": 9.539036785634961e-05, | |
| "loss": 0.6133259296417236, | |
| "step": 710, | |
| "token_acc": 0.8265376687049866 | |
| }, | |
| { | |
| "epoch": 0.5470543228768171, | |
| "grad_norm": 0.8775202035903931, | |
| "learning_rate": 9.530151478597366e-05, | |
| "loss": 0.6536783218383789, | |
| "step": 715, | |
| "token_acc": 0.8136675357818604 | |
| }, | |
| { | |
| "epoch": 0.5508798775822494, | |
| "grad_norm": 0.8767590522766113, | |
| "learning_rate": 9.521185572033416e-05, | |
| "loss": 0.5738767147064209, | |
| "step": 720, | |
| "token_acc": 0.8351121544837952 | |
| }, | |
| { | |
| "epoch": 0.5547054322876818, | |
| "grad_norm": 0.9340411424636841, | |
| "learning_rate": 9.512139225462682e-05, | |
| "loss": 0.60714693069458, | |
| "step": 725, | |
| "token_acc": 0.8243422508239746 | |
| }, | |
| { | |
| "epoch": 0.558530986993114, | |
| "grad_norm": 0.924868643283844, | |
| "learning_rate": 9.503012599835907e-05, | |
| "loss": 0.5976818084716797, | |
| "step": 730, | |
| "token_acc": 0.8307338953018188 | |
| }, | |
| { | |
| "epoch": 0.5623565416985463, | |
| "grad_norm": 1.1880912780761719, | |
| "learning_rate": 9.493805857532148e-05, | |
| "loss": 0.7305125236511231, | |
| "step": 735, | |
| "token_acc": 0.7984393239021301 | |
| }, | |
| { | |
| "epoch": 0.5661820964039785, | |
| "grad_norm": 0.8552014827728271, | |
| "learning_rate": 9.48451916235587e-05, | |
| "loss": 0.631963062286377, | |
| "step": 740, | |
| "token_acc": 0.8215923309326172 | |
| }, | |
| { | |
| "epoch": 0.5700076511094109, | |
| "grad_norm": 0.9064537882804871, | |
| "learning_rate": 9.475152679534052e-05, | |
| "loss": 0.5955155849456787, | |
| "step": 745, | |
| "token_acc": 0.8277559876441956 | |
| }, | |
| { | |
| "epoch": 0.5738332058148432, | |
| "grad_norm": 0.953490138053894, | |
| "learning_rate": 9.465706575713236e-05, | |
| "loss": 0.5581603050231934, | |
| "step": 750, | |
| "token_acc": 0.8392514586448669 | |
| }, | |
| { | |
| "epoch": 0.5738332058148432, | |
| "eval_loss": 0.6101195812225342, | |
| "eval_runtime": 7.839, | |
| "eval_samples_per_second": 13.267, | |
| "eval_steps_per_second": 1.658, | |
| "eval_token_acc": 0.8254771828651428, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5776587605202754, | |
| "grad_norm": 0.9111331105232239, | |
| "learning_rate": 9.456181018956567e-05, | |
| "loss": 0.5761038780212402, | |
| "step": 755, | |
| "token_acc": 0.8335671424865723 | |
| }, | |
| { | |
| "epoch": 0.5814843152257078, | |
| "grad_norm": 0.9279806613922119, | |
| "learning_rate": 9.446576178740795e-05, | |
| "loss": 0.6236689567565918, | |
| "step": 760, | |
| "token_acc": 0.8229003548622131 | |
| }, | |
| { | |
| "epoch": 0.58530986993114, | |
| "grad_norm": 0.8497107028961182, | |
| "learning_rate": 9.436892225953269e-05, | |
| "loss": 0.6130060672760009, | |
| "step": 765, | |
| "token_acc": 0.8241313099861145 | |
| }, | |
| { | |
| "epoch": 0.5891354246365723, | |
| "grad_norm": 0.933496356010437, | |
| "learning_rate": 9.427129332888891e-05, | |
| "loss": 0.6331747055053711, | |
| "step": 770, | |
| "token_acc": 0.8258751034736633 | |
| }, | |
| { | |
| "epoch": 0.5929609793420046, | |
| "grad_norm": 0.95807945728302, | |
| "learning_rate": 9.417287673247052e-05, | |
| "loss": 0.5901139259338379, | |
| "step": 775, | |
| "token_acc": 0.8336220383644104 | |
| }, | |
| { | |
| "epoch": 0.5967865340474369, | |
| "grad_norm": 0.9931139349937439, | |
| "learning_rate": 9.407367422128547e-05, | |
| "loss": 0.6363272666931152, | |
| "step": 780, | |
| "token_acc": 0.8183371424674988 | |
| }, | |
| { | |
| "epoch": 0.6006120887528692, | |
| "grad_norm": 0.8274650573730469, | |
| "learning_rate": 9.397368756032445e-05, | |
| "loss": 0.5664173603057862, | |
| "step": 785, | |
| "token_acc": 0.8378447890281677 | |
| }, | |
| { | |
| "epoch": 0.6044376434583014, | |
| "grad_norm": 1.017050862312317, | |
| "learning_rate": 9.387291852852967e-05, | |
| "loss": 0.6467793464660645, | |
| "step": 790, | |
| "token_acc": 0.818406343460083 | |
| }, | |
| { | |
| "epoch": 0.6082631981637338, | |
| "grad_norm": 0.8612256050109863, | |
| "learning_rate": 9.377136891876306e-05, | |
| "loss": 0.644353199005127, | |
| "step": 795, | |
| "token_acc": 0.8149409294128418 | |
| }, | |
| { | |
| "epoch": 0.612088752869166, | |
| "grad_norm": 0.9359307289123535, | |
| "learning_rate": 9.366904053777447e-05, | |
| "loss": 0.6541380882263184, | |
| "step": 800, | |
| "token_acc": 0.8136578798294067 | |
| }, | |
| { | |
| "epoch": 0.612088752869166, | |
| "eval_loss": 0.600931704044342, | |
| "eval_runtime": 7.7335, | |
| "eval_samples_per_second": 13.448, | |
| "eval_steps_per_second": 1.681, | |
| "eval_token_acc": 0.8262491226196289, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6159143075745983, | |
| "grad_norm": 0.8562702536582947, | |
| "learning_rate": 9.356593520616948e-05, | |
| "loss": 0.5768568038940429, | |
| "step": 805, | |
| "token_acc": 0.8369309902191162 | |
| }, | |
| { | |
| "epoch": 0.6197398622800306, | |
| "grad_norm": 0.8822196125984192, | |
| "learning_rate": 9.3462054758377e-05, | |
| "loss": 0.6508576393127441, | |
| "step": 810, | |
| "token_acc": 0.8174927234649658 | |
| }, | |
| { | |
| "epoch": 0.6235654169854629, | |
| "grad_norm": 0.8938590288162231, | |
| "learning_rate": 9.335740104261664e-05, | |
| "loss": 0.6667316436767579, | |
| "step": 815, | |
| "token_acc": 0.8100781440734863 | |
| }, | |
| { | |
| "epoch": 0.6273909716908952, | |
| "grad_norm": 1.007367491722107, | |
| "learning_rate": 9.32519759208659e-05, | |
| "loss": 0.72325439453125, | |
| "step": 820, | |
| "token_acc": 0.8077250123023987 | |
| }, | |
| { | |
| "epoch": 0.6312165263963274, | |
| "grad_norm": 1.01559579372406, | |
| "learning_rate": 9.314578126882691e-05, | |
| "loss": 0.5955130577087402, | |
| "step": 825, | |
| "token_acc": 0.8294063806533813 | |
| }, | |
| { | |
| "epoch": 0.6350420811017597, | |
| "grad_norm": 0.9418911933898926, | |
| "learning_rate": 9.303881897589315e-05, | |
| "loss": 0.6099714279174805, | |
| "step": 830, | |
| "token_acc": 0.8279644250869751 | |
| }, | |
| { | |
| "epoch": 0.6388676358071921, | |
| "grad_norm": 0.9409440755844116, | |
| "learning_rate": 9.29310909451158e-05, | |
| "loss": 0.5885293006896972, | |
| "step": 835, | |
| "token_acc": 0.8318097591400146 | |
| }, | |
| { | |
| "epoch": 0.6426931905126243, | |
| "grad_norm": 0.9052807688713074, | |
| "learning_rate": 9.28225990931699e-05, | |
| "loss": 0.5844202995300293, | |
| "step": 840, | |
| "token_acc": 0.8323644399642944 | |
| }, | |
| { | |
| "epoch": 0.6465187452180566, | |
| "grad_norm": 1.170585036277771, | |
| "learning_rate": 9.271334535032026e-05, | |
| "loss": 0.6612658500671387, | |
| "step": 845, | |
| "token_acc": 0.8123800754547119 | |
| }, | |
| { | |
| "epoch": 0.6503442999234889, | |
| "grad_norm": 0.89767986536026, | |
| "learning_rate": 9.260333166038704e-05, | |
| "loss": 0.6106939315795898, | |
| "step": 850, | |
| "token_acc": 0.8253637552261353 | |
| }, | |
| { | |
| "epoch": 0.6503442999234889, | |
| "eval_loss": 0.595952033996582, | |
| "eval_runtime": 7.7396, | |
| "eval_samples_per_second": 13.437, | |
| "eval_steps_per_second": 1.68, | |
| "eval_token_acc": 0.8275924324989319, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6541698546289212, | |
| "grad_norm": 0.8901084661483765, | |
| "learning_rate": 9.249255998071126e-05, | |
| "loss": 0.5618688106536865, | |
| "step": 855, | |
| "token_acc": 0.8380252718925476 | |
| }, | |
| { | |
| "epoch": 0.6579954093343535, | |
| "grad_norm": 0.8414104580879211, | |
| "learning_rate": 9.238103228211997e-05, | |
| "loss": 0.5890965461730957, | |
| "step": 860, | |
| "token_acc": 0.8292516469955444 | |
| }, | |
| { | |
| "epoch": 0.6618209640397857, | |
| "grad_norm": 0.8542090058326721, | |
| "learning_rate": 9.226875054889108e-05, | |
| "loss": 0.5492356300354004, | |
| "step": 865, | |
| "token_acc": 0.8417258858680725 | |
| }, | |
| { | |
| "epoch": 0.6656465187452181, | |
| "grad_norm": 0.928252100944519, | |
| "learning_rate": 9.21557167787182e-05, | |
| "loss": 0.6059693813323974, | |
| "step": 870, | |
| "token_acc": 0.827387273311615 | |
| }, | |
| { | |
| "epoch": 0.6694720734506503, | |
| "grad_norm": 0.8323174118995667, | |
| "learning_rate": 9.204193298267496e-05, | |
| "loss": 0.6152177810668945, | |
| "step": 875, | |
| "token_acc": 0.8236430287361145 | |
| }, | |
| { | |
| "epoch": 0.6732976281560826, | |
| "grad_norm": 0.8953769207000732, | |
| "learning_rate": 9.192740118517935e-05, | |
| "loss": 0.6013946056365966, | |
| "step": 880, | |
| "token_acc": 0.8297914862632751 | |
| }, | |
| { | |
| "epoch": 0.677123182861515, | |
| "grad_norm": 0.9411488771438599, | |
| "learning_rate": 9.181212342395764e-05, | |
| "loss": 0.521054458618164, | |
| "step": 885, | |
| "token_acc": 0.8486282229423523 | |
| }, | |
| { | |
| "epoch": 0.6809487375669472, | |
| "grad_norm": 0.9547863602638245, | |
| "learning_rate": 9.169610175000812e-05, | |
| "loss": 0.5880234718322754, | |
| "step": 890, | |
| "token_acc": 0.8322908878326416 | |
| }, | |
| { | |
| "epoch": 0.6847742922723795, | |
| "grad_norm": 1.0470699071884155, | |
| "learning_rate": 9.157933822756459e-05, | |
| "loss": 0.6081759452819824, | |
| "step": 895, | |
| "token_acc": 0.8250705003738403 | |
| }, | |
| { | |
| "epoch": 0.6885998469778117, | |
| "grad_norm": 0.9556779861450195, | |
| "learning_rate": 9.146183493405975e-05, | |
| "loss": 0.6601164817810059, | |
| "step": 900, | |
| "token_acc": 0.8116152286529541 | |
| }, | |
| { | |
| "epoch": 0.6885998469778117, | |
| "eval_loss": 0.5903816223144531, | |
| "eval_runtime": 7.6904, | |
| "eval_samples_per_second": 13.523, | |
| "eval_steps_per_second": 1.69, | |
| "eval_token_acc": 0.8289057016372681, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6924254016832441, | |
| "grad_norm": 1.1069297790527344, | |
| "learning_rate": 9.13435939600881e-05, | |
| "loss": 0.6385367393493653, | |
| "step": 905, | |
| "token_acc": 0.8162096738815308 | |
| }, | |
| { | |
| "epoch": 0.6962509563886764, | |
| "grad_norm": 0.9318839311599731, | |
| "learning_rate": 9.12246174093688e-05, | |
| "loss": 0.604517650604248, | |
| "step": 910, | |
| "token_acc": 0.82686847448349 | |
| }, | |
| { | |
| "epoch": 0.7000765110941086, | |
| "grad_norm": 0.8273342251777649, | |
| "learning_rate": 9.110490739870824e-05, | |
| "loss": 0.6841697216033935, | |
| "step": 915, | |
| "token_acc": 0.8044203519821167 | |
| }, | |
| { | |
| "epoch": 0.703902065799541, | |
| "grad_norm": 0.8293759822845459, | |
| "learning_rate": 9.098446605796239e-05, | |
| "loss": 0.5717193603515625, | |
| "step": 920, | |
| "token_acc": 0.8351298570632935 | |
| }, | |
| { | |
| "epoch": 0.7077276205049732, | |
| "grad_norm": 7.753383636474609, | |
| "learning_rate": 9.086329552999891e-05, | |
| "loss": 0.5882965564727783, | |
| "step": 925, | |
| "token_acc": 0.8285040259361267 | |
| }, | |
| { | |
| "epoch": 0.7115531752104055, | |
| "grad_norm": 0.9893306493759155, | |
| "learning_rate": 9.074139797065897e-05, | |
| "loss": 0.648917293548584, | |
| "step": 930, | |
| "token_acc": 0.8116658329963684 | |
| }, | |
| { | |
| "epoch": 0.7153787299158378, | |
| "grad_norm": 0.902746856212616, | |
| "learning_rate": 9.061877554871896e-05, | |
| "loss": 0.6094418525695801, | |
| "step": 935, | |
| "token_acc": 0.8259324431419373 | |
| }, | |
| { | |
| "epoch": 0.7192042846212701, | |
| "grad_norm": 0.9152299165725708, | |
| "learning_rate": 9.049543044585187e-05, | |
| "loss": 0.6678308486938477, | |
| "step": 940, | |
| "token_acc": 0.816949725151062 | |
| }, | |
| { | |
| "epoch": 0.7230298393267024, | |
| "grad_norm": 1.0613242387771606, | |
| "learning_rate": 9.03713648565885e-05, | |
| "loss": 0.6197181701660156, | |
| "step": 945, | |
| "token_acc": 0.8243659138679504 | |
| }, | |
| { | |
| "epoch": 0.7268553940321346, | |
| "grad_norm": 0.7965312600135803, | |
| "learning_rate": 9.024658098827838e-05, | |
| "loss": 0.6047243118286133, | |
| "step": 950, | |
| "token_acc": 0.8313871622085571 | |
| }, | |
| { | |
| "epoch": 0.7268553940321346, | |
| "eval_loss": 0.587164044380188, | |
| "eval_runtime": 7.7893, | |
| "eval_samples_per_second": 13.352, | |
| "eval_steps_per_second": 1.669, | |
| "eval_token_acc": 0.8293668031692505, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.730680948737567, | |
| "grad_norm": 0.8924623131752014, | |
| "learning_rate": 9.012108106105048e-05, | |
| "loss": 0.5776640892028808, | |
| "step": 955, | |
| "token_acc": 0.8302121758460999 | |
| }, | |
| { | |
| "epoch": 0.7345065034429993, | |
| "grad_norm": 1.0438350439071655, | |
| "learning_rate": 8.99948673077738e-05, | |
| "loss": 0.5650456428527832, | |
| "step": 960, | |
| "token_acc": 0.8433432579040527 | |
| }, | |
| { | |
| "epoch": 0.7383320581484315, | |
| "grad_norm": 0.8841288685798645, | |
| "learning_rate": 8.986794197401754e-05, | |
| "loss": 0.5597739219665527, | |
| "step": 965, | |
| "token_acc": 0.8350304365158081 | |
| }, | |
| { | |
| "epoch": 0.7421576128538638, | |
| "grad_norm": 0.9303543567657471, | |
| "learning_rate": 8.974030731801127e-05, | |
| "loss": 0.6170159816741944, | |
| "step": 970, | |
| "token_acc": 0.8251381516456604 | |
| }, | |
| { | |
| "epoch": 0.7459831675592961, | |
| "grad_norm": 1.05469810962677, | |
| "learning_rate": 8.961196561060454e-05, | |
| "loss": 0.61129789352417, | |
| "step": 975, | |
| "token_acc": 0.8258439302444458 | |
| }, | |
| { | |
| "epoch": 0.7498087222647284, | |
| "grad_norm": 0.8528873920440674, | |
| "learning_rate": 8.948291913522677e-05, | |
| "loss": 0.642275619506836, | |
| "step": 980, | |
| "token_acc": 0.8284429907798767 | |
| }, | |
| { | |
| "epoch": 0.7536342769701607, | |
| "grad_norm": 0.7755897641181946, | |
| "learning_rate": 8.935317018784637e-05, | |
| "loss": 0.5369032859802246, | |
| "step": 985, | |
| "token_acc": 0.8431283235549927 | |
| }, | |
| { | |
| "epoch": 0.7574598316755929, | |
| "grad_norm": 0.8636773228645325, | |
| "learning_rate": 8.922272107693e-05, | |
| "loss": 0.5884841442108154, | |
| "step": 990, | |
| "token_acc": 0.830573558807373 | |
| }, | |
| { | |
| "epoch": 0.7612853863810253, | |
| "grad_norm": 0.8464745283126831, | |
| "learning_rate": 8.90915741234015e-05, | |
| "loss": 0.5174911022186279, | |
| "step": 995, | |
| "token_acc": 0.8450327515602112 | |
| }, | |
| { | |
| "epoch": 0.7651109410864575, | |
| "grad_norm": 0.8121261596679688, | |
| "learning_rate": 8.895973166060058e-05, | |
| "loss": 0.5794853687286377, | |
| "step": 1000, | |
| "token_acc": 0.8360881209373474 | |
| }, | |
| { | |
| "epoch": 0.7651109410864575, | |
| "eval_loss": 0.5729076862335205, | |
| "eval_runtime": 7.9584, | |
| "eval_samples_per_second": 13.068, | |
| "eval_steps_per_second": 1.634, | |
| "eval_token_acc": 0.8330559730529785, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7689364957918898, | |
| "grad_norm": 0.8082830309867859, | |
| "learning_rate": 8.882719603424133e-05, | |
| "loss": 0.6191754341125488, | |
| "step": 1005, | |
| "token_acc": 0.8256863355636597 | |
| }, | |
| { | |
| "epoch": 0.7727620504973222, | |
| "grad_norm": 0.8163895010948181, | |
| "learning_rate": 8.86939696023704e-05, | |
| "loss": 0.5695658683776855, | |
| "step": 1010, | |
| "token_acc": 0.8331784605979919 | |
| }, | |
| { | |
| "epoch": 0.7765876052027544, | |
| "grad_norm": 0.8397212624549866, | |
| "learning_rate": 8.856005473532519e-05, | |
| "loss": 0.5332405090332031, | |
| "step": 1015, | |
| "token_acc": 0.8452962636947632 | |
| }, | |
| { | |
| "epoch": 0.7804131599081867, | |
| "grad_norm": 0.8272839188575745, | |
| "learning_rate": 8.842545381569155e-05, | |
| "loss": 0.5343279838562012, | |
| "step": 1020, | |
| "token_acc": 0.8402997255325317 | |
| }, | |
| { | |
| "epoch": 0.7842387146136189, | |
| "grad_norm": 0.8609519004821777, | |
| "learning_rate": 8.829016923826144e-05, | |
| "loss": 0.5459603309631348, | |
| "step": 1025, | |
| "token_acc": 0.8402543067932129 | |
| }, | |
| { | |
| "epoch": 0.7880642693190513, | |
| "grad_norm": 0.8439111113548279, | |
| "learning_rate": 8.815420340999033e-05, | |
| "loss": 0.5824572563171386, | |
| "step": 1030, | |
| "token_acc": 0.8306134343147278 | |
| }, | |
| { | |
| "epoch": 0.7918898240244836, | |
| "grad_norm": 0.8207530975341797, | |
| "learning_rate": 8.801755874995437e-05, | |
| "loss": 0.5932113647460937, | |
| "step": 1035, | |
| "token_acc": 0.8294033408164978 | |
| }, | |
| { | |
| "epoch": 0.7957153787299158, | |
| "grad_norm": 0.9178765416145325, | |
| "learning_rate": 8.788023768930732e-05, | |
| "loss": 0.5900128364562989, | |
| "step": 1040, | |
| "token_acc": 0.8334224820137024 | |
| }, | |
| { | |
| "epoch": 0.7995409334353482, | |
| "grad_norm": 0.7986139059066772, | |
| "learning_rate": 8.774224267123734e-05, | |
| "loss": 0.6000078678131103, | |
| "step": 1045, | |
| "token_acc": 0.8272825479507446 | |
| }, | |
| { | |
| "epoch": 0.8033664881407804, | |
| "grad_norm": 0.8349852561950684, | |
| "learning_rate": 8.760357615092351e-05, | |
| "loss": 0.5280231475830078, | |
| "step": 1050, | |
| "token_acc": 0.8440104722976685 | |
| }, | |
| { | |
| "epoch": 0.8033664881407804, | |
| "eval_loss": 0.574630856513977, | |
| "eval_runtime": 7.7226, | |
| "eval_samples_per_second": 13.467, | |
| "eval_steps_per_second": 1.683, | |
| "eval_token_acc": 0.833396852016449, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8071920428462127, | |
| "grad_norm": 0.7667945027351379, | |
| "learning_rate": 8.746424059549213e-05, | |
| "loss": 0.5487701416015625, | |
| "step": 1055, | |
| "token_acc": 0.8400689959526062 | |
| }, | |
| { | |
| "epoch": 0.811017597551645, | |
| "grad_norm": 0.9147979617118835, | |
| "learning_rate": 8.732423848397284e-05, | |
| "loss": 0.5697606563568115, | |
| "step": 1060, | |
| "token_acc": 0.8328049182891846 | |
| }, | |
| { | |
| "epoch": 0.8148431522570773, | |
| "grad_norm": 0.8798291087150574, | |
| "learning_rate": 8.718357230725449e-05, | |
| "loss": 0.5843188285827636, | |
| "step": 1065, | |
| "token_acc": 0.8351316452026367 | |
| }, | |
| { | |
| "epoch": 0.8186687069625096, | |
| "grad_norm": 0.9299157857894897, | |
| "learning_rate": 8.704224456804087e-05, | |
| "loss": 0.6090686798095704, | |
| "step": 1070, | |
| "token_acc": 0.8255612850189209 | |
| }, | |
| { | |
| "epoch": 0.8224942616679418, | |
| "grad_norm": 0.8285570740699768, | |
| "learning_rate": 8.690025778080613e-05, | |
| "loss": 0.5678855419158936, | |
| "step": 1075, | |
| "token_acc": 0.834744930267334 | |
| }, | |
| { | |
| "epoch": 0.8263198163733741, | |
| "grad_norm": 1.0449912548065186, | |
| "learning_rate": 8.67576144717501e-05, | |
| "loss": 0.5510326385498047, | |
| "step": 1080, | |
| "token_acc": 0.8414307832717896 | |
| }, | |
| { | |
| "epoch": 0.8301453710788065, | |
| "grad_norm": 0.7922863364219666, | |
| "learning_rate": 8.661431717875328e-05, | |
| "loss": 0.5484563827514648, | |
| "step": 1085, | |
| "token_acc": 0.8401945233345032 | |
| }, | |
| { | |
| "epoch": 0.8339709257842387, | |
| "grad_norm": 1.0209932327270508, | |
| "learning_rate": 8.647036845133172e-05, | |
| "loss": 0.5764856338500977, | |
| "step": 1090, | |
| "token_acc": 0.8333871960639954 | |
| }, | |
| { | |
| "epoch": 0.837796480489671, | |
| "grad_norm": 0.8326112627983093, | |
| "learning_rate": 8.632577085059168e-05, | |
| "loss": 0.6004890441894531, | |
| "step": 1095, | |
| "token_acc": 0.827037513256073 | |
| }, | |
| { | |
| "epoch": 0.8416220351951033, | |
| "grad_norm": 0.7816240787506104, | |
| "learning_rate": 8.618052694918399e-05, | |
| "loss": 0.5333565711975098, | |
| "step": 1100, | |
| "token_acc": 0.8430129885673523 | |
| }, | |
| { | |
| "epoch": 0.8416220351951033, | |
| "eval_loss": 0.5720469951629639, | |
| "eval_runtime": 7.8984, | |
| "eval_samples_per_second": 13.167, | |
| "eval_steps_per_second": 1.646, | |
| "eval_token_acc": 0.8314921259880066, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8454475899005356, | |
| "grad_norm": 0.9619238376617432, | |
| "learning_rate": 8.603463933125842e-05, | |
| "loss": 0.5509546756744385, | |
| "step": 1105, | |
| "token_acc": 0.8384957313537598 | |
| }, | |
| { | |
| "epoch": 0.8492731446059678, | |
| "grad_norm": 0.9528924822807312, | |
| "learning_rate": 8.588811059241755e-05, | |
| "loss": 0.6007543563842773, | |
| "step": 1110, | |
| "token_acc": 0.8273714780807495 | |
| }, | |
| { | |
| "epoch": 0.8530986993114001, | |
| "grad_norm": 0.812016487121582, | |
| "learning_rate": 8.574094333967064e-05, | |
| "loss": 0.5877734661102295, | |
| "step": 1115, | |
| "token_acc": 0.8291584253311157 | |
| }, | |
| { | |
| "epoch": 0.8569242540168325, | |
| "grad_norm": 1.103339433670044, | |
| "learning_rate": 8.559314019138727e-05, | |
| "loss": 0.6196231842041016, | |
| "step": 1120, | |
| "token_acc": 0.8281660676002502 | |
| }, | |
| { | |
| "epoch": 0.8607498087222647, | |
| "grad_norm": 0.9961858987808228, | |
| "learning_rate": 8.544470377725078e-05, | |
| "loss": 0.571223258972168, | |
| "step": 1125, | |
| "token_acc": 0.8321356177330017 | |
| }, | |
| { | |
| "epoch": 0.864575363427697, | |
| "grad_norm": 0.8015458583831787, | |
| "learning_rate": 8.529563673821141e-05, | |
| "loss": 0.538951301574707, | |
| "step": 1130, | |
| "token_acc": 0.8429505228996277 | |
| }, | |
| { | |
| "epoch": 0.8684009181331293, | |
| "grad_norm": 0.8478720784187317, | |
| "learning_rate": 8.514594172643934e-05, | |
| "loss": 0.5572677612304687, | |
| "step": 1135, | |
| "token_acc": 0.8356977105140686 | |
| }, | |
| { | |
| "epoch": 0.8722264728385616, | |
| "grad_norm": 0.814361572265625, | |
| "learning_rate": 8.499562140527754e-05, | |
| "loss": 0.5883401870727539, | |
| "step": 1140, | |
| "token_acc": 0.8291968107223511 | |
| }, | |
| { | |
| "epoch": 0.8760520275439939, | |
| "grad_norm": 0.8049572706222534, | |
| "learning_rate": 8.484467844919437e-05, | |
| "loss": 0.5637226104736328, | |
| "step": 1145, | |
| "token_acc": 0.8390661478042603 | |
| }, | |
| { | |
| "epoch": 0.8798775822494261, | |
| "grad_norm": 0.749894917011261, | |
| "learning_rate": 8.469311554373594e-05, | |
| "loss": 0.4973104000091553, | |
| "step": 1150, | |
| "token_acc": 0.8528492450714111 | |
| }, | |
| { | |
| "epoch": 0.8798775822494261, | |
| "eval_loss": 0.564576268196106, | |
| "eval_runtime": 7.7633, | |
| "eval_samples_per_second": 13.396, | |
| "eval_steps_per_second": 1.675, | |
| "eval_token_acc": 0.835151195526123, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8837031369548585, | |
| "grad_norm": 0.9036749005317688, | |
| "learning_rate": 8.454093538547838e-05, | |
| "loss": 0.5535676956176758, | |
| "step": 1155, | |
| "token_acc": 0.8383986949920654 | |
| }, | |
| { | |
| "epoch": 0.8875286916602907, | |
| "grad_norm": 0.7430348992347717, | |
| "learning_rate": 8.438814068197988e-05, | |
| "loss": 0.557097339630127, | |
| "step": 1160, | |
| "token_acc": 0.8430325388908386 | |
| }, | |
| { | |
| "epoch": 0.891354246365723, | |
| "grad_norm": 0.9356522560119629, | |
| "learning_rate": 8.423473415173247e-05, | |
| "loss": 0.5787965774536132, | |
| "step": 1165, | |
| "token_acc": 0.8321569561958313 | |
| }, | |
| { | |
| "epoch": 0.8951798010711554, | |
| "grad_norm": 0.7668983340263367, | |
| "learning_rate": 8.40807185241137e-05, | |
| "loss": 0.5303655624389648, | |
| "step": 1170, | |
| "token_acc": 0.8440219163894653 | |
| }, | |
| { | |
| "epoch": 0.8990053557765876, | |
| "grad_norm": 0.7720690965652466, | |
| "learning_rate": 8.392609653933803e-05, | |
| "loss": 0.5396030426025391, | |
| "step": 1175, | |
| "token_acc": 0.8430536985397339 | |
| }, | |
| { | |
| "epoch": 0.9028309104820199, | |
| "grad_norm": 0.7427228689193726, | |
| "learning_rate": 8.377087094840813e-05, | |
| "loss": 0.5650552749633789, | |
| "step": 1180, | |
| "token_acc": 0.8388790488243103 | |
| }, | |
| { | |
| "epoch": 0.9066564651874521, | |
| "grad_norm": 0.8698520660400391, | |
| "learning_rate": 8.361504451306585e-05, | |
| "loss": 0.5175793647766114, | |
| "step": 1185, | |
| "token_acc": 0.8486889004707336 | |
| }, | |
| { | |
| "epoch": 0.9104820198928845, | |
| "grad_norm": 0.838016927242279, | |
| "learning_rate": 8.345862000574321e-05, | |
| "loss": 0.5568198204040528, | |
| "step": 1190, | |
| "token_acc": 0.8362753391265869 | |
| }, | |
| { | |
| "epoch": 0.9143075745983168, | |
| "grad_norm": 0.7980285286903381, | |
| "learning_rate": 8.330160020951299e-05, | |
| "loss": 0.5795284748077393, | |
| "step": 1195, | |
| "token_acc": 0.8336633443832397 | |
| }, | |
| { | |
| "epoch": 0.918133129303749, | |
| "grad_norm": 0.7379786968231201, | |
| "learning_rate": 8.314398791803916e-05, | |
| "loss": 0.5594221115112304, | |
| "step": 1200, | |
| "token_acc": 0.8377372026443481 | |
| }, | |
| { | |
| "epoch": 0.918133129303749, | |
| "eval_loss": 0.5564058423042297, | |
| "eval_runtime": 7.7456, | |
| "eval_samples_per_second": 13.427, | |
| "eval_steps_per_second": 1.678, | |
| "eval_token_acc": 0.8375070095062256, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9219586840091814, | |
| "grad_norm": 0.8150419592857361, | |
| "learning_rate": 8.298578593552737e-05, | |
| "loss": 0.5221155166625977, | |
| "step": 1205, | |
| "token_acc": 0.8457277417182922 | |
| }, | |
| { | |
| "epoch": 0.9257842387146136, | |
| "grad_norm": 0.9086570739746094, | |
| "learning_rate": 8.28269970766748e-05, | |
| "loss": 0.574681568145752, | |
| "step": 1210, | |
| "token_acc": 0.8327599763870239 | |
| }, | |
| { | |
| "epoch": 0.9296097934200459, | |
| "grad_norm": 0.8389135599136353, | |
| "learning_rate": 8.26676241666203e-05, | |
| "loss": 0.5882039070129395, | |
| "step": 1215, | |
| "token_acc": 0.8281732797622681 | |
| }, | |
| { | |
| "epoch": 0.9334353481254782, | |
| "grad_norm": 1.0141870975494385, | |
| "learning_rate": 8.250767004089399e-05, | |
| "loss": 0.5588771820068359, | |
| "step": 1220, | |
| "token_acc": 0.8358601331710815 | |
| }, | |
| { | |
| "epoch": 0.9372609028309105, | |
| "grad_norm": 0.8374904990196228, | |
| "learning_rate": 8.23471375453669e-05, | |
| "loss": 0.5152300834655762, | |
| "step": 1225, | |
| "token_acc": 0.8489376902580261 | |
| }, | |
| { | |
| "epoch": 0.9410864575363428, | |
| "grad_norm": 0.8244453072547913, | |
| "learning_rate": 8.21860295362003e-05, | |
| "loss": 0.500080680847168, | |
| "step": 1230, | |
| "token_acc": 0.8521796464920044 | |
| }, | |
| { | |
| "epoch": 0.944912012241775, | |
| "grad_norm": 0.9917334318161011, | |
| "learning_rate": 8.20243488797948e-05, | |
| "loss": 0.5609046459197998, | |
| "step": 1235, | |
| "token_acc": 0.8397351503372192 | |
| }, | |
| { | |
| "epoch": 0.9487375669472073, | |
| "grad_norm": 1.4528796672821045, | |
| "learning_rate": 8.186209845273954e-05, | |
| "loss": 0.6106361389160156, | |
| "step": 1240, | |
| "token_acc": 0.8287570476531982 | |
| }, | |
| { | |
| "epoch": 0.9525631216526397, | |
| "grad_norm": 0.8477284908294678, | |
| "learning_rate": 8.169928114176084e-05, | |
| "loss": 0.534299659729004, | |
| "step": 1245, | |
| "token_acc": 0.8459932208061218 | |
| }, | |
| { | |
| "epoch": 0.9563886763580719, | |
| "grad_norm": 0.9785248041152954, | |
| "learning_rate": 8.153589984367091e-05, | |
| "loss": 0.5453691959381104, | |
| "step": 1250, | |
| "token_acc": 0.8423656821250916 | |
| }, | |
| { | |
| "epoch": 0.9563886763580719, | |
| "eval_loss": 0.5528830885887146, | |
| "eval_runtime": 8.7628, | |
| "eval_samples_per_second": 11.868, | |
| "eval_steps_per_second": 1.484, | |
| "eval_token_acc": 0.8377977609634399, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9602142310635042, | |
| "grad_norm": 0.7743374705314636, | |
| "learning_rate": 8.137195746531635e-05, | |
| "loss": 0.5649035453796387, | |
| "step": 1255, | |
| "token_acc": 0.83652263879776 | |
| }, | |
| { | |
| "epoch": 0.9640397857689365, | |
| "grad_norm": 0.9111794829368591, | |
| "learning_rate": 8.120745692352627e-05, | |
| "loss": 0.5429101943969726, | |
| "step": 1260, | |
| "token_acc": 0.8409203886985779 | |
| }, | |
| { | |
| "epoch": 0.9678653404743688, | |
| "grad_norm": 0.8705430030822754, | |
| "learning_rate": 8.104240114506065e-05, | |
| "loss": 0.5348100185394287, | |
| "step": 1265, | |
| "token_acc": 0.8432644009590149 | |
| }, | |
| { | |
| "epoch": 0.9716908951798011, | |
| "grad_norm": 0.7576097249984741, | |
| "learning_rate": 8.087679306655804e-05, | |
| "loss": 0.5683703422546387, | |
| "step": 1270, | |
| "token_acc": 0.836378276348114 | |
| }, | |
| { | |
| "epoch": 0.9755164498852333, | |
| "grad_norm": 1.1635630130767822, | |
| "learning_rate": 8.07106356344834e-05, | |
| "loss": 0.6346898078918457, | |
| "step": 1275, | |
| "token_acc": 0.8285390734672546 | |
| }, | |
| { | |
| "epoch": 0.9793420045906657, | |
| "grad_norm": 0.827690601348877, | |
| "learning_rate": 8.054393180507572e-05, | |
| "loss": 0.5661238193511963, | |
| "step": 1280, | |
| "token_acc": 0.8387032747268677 | |
| }, | |
| { | |
| "epoch": 0.9831675592960979, | |
| "grad_norm": 0.888037383556366, | |
| "learning_rate": 8.037668454429534e-05, | |
| "loss": 0.5784870624542237, | |
| "step": 1285, | |
| "token_acc": 0.8306419849395752 | |
| }, | |
| { | |
| "epoch": 0.9869931140015302, | |
| "grad_norm": 0.7650582790374756, | |
| "learning_rate": 8.020889682777127e-05, | |
| "loss": 0.5594500064849853, | |
| "step": 1290, | |
| "token_acc": 0.8358885645866394 | |
| }, | |
| { | |
| "epoch": 0.9908186687069626, | |
| "grad_norm": 0.8132854104042053, | |
| "learning_rate": 8.004057164074814e-05, | |
| "loss": 0.5590912818908691, | |
| "step": 1295, | |
| "token_acc": 0.8387227654457092 | |
| }, | |
| { | |
| "epoch": 0.9946442234123948, | |
| "grad_norm": 0.8819040656089783, | |
| "learning_rate": 7.987171197803315e-05, | |
| "loss": 0.5425111770629882, | |
| "step": 1300, | |
| "token_acc": 0.8366984128952026 | |
| }, | |
| { | |
| "epoch": 0.9946442234123948, | |
| "eval_loss": 0.5410341024398804, | |
| "eval_runtime": 7.8851, | |
| "eval_samples_per_second": 13.189, | |
| "eval_steps_per_second": 1.649, | |
| "eval_token_acc": 0.8402237296104431, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9984697781178271, | |
| "grad_norm": 0.7759367227554321, | |
| "learning_rate": 7.970232084394282e-05, | |
| "loss": 0.4794795989990234, | |
| "step": 1305, | |
| "token_acc": 0.8576377034187317 | |
| }, | |
| { | |
| "epoch": 1.0022953328232593, | |
| "grad_norm": 0.7615346908569336, | |
| "learning_rate": 7.953240125224948e-05, | |
| "loss": 0.4416775703430176, | |
| "step": 1310, | |
| "token_acc": 0.8654638528823853 | |
| }, | |
| { | |
| "epoch": 1.0061208875286916, | |
| "grad_norm": 0.7918492555618286, | |
| "learning_rate": 7.936195622612767e-05, | |
| "loss": 0.37592229843139646, | |
| "step": 1315, | |
| "token_acc": 0.8773406147956848 | |
| }, | |
| { | |
| "epoch": 1.009946442234124, | |
| "grad_norm": 0.717467725276947, | |
| "learning_rate": 7.919098879810036e-05, | |
| "loss": 0.4267716407775879, | |
| "step": 1320, | |
| "token_acc": 0.8671300411224365 | |
| }, | |
| { | |
| "epoch": 1.0137719969395562, | |
| "grad_norm": 0.7892487645149231, | |
| "learning_rate": 7.901950200998493e-05, | |
| "loss": 0.382064151763916, | |
| "step": 1325, | |
| "token_acc": 0.8785242438316345 | |
| }, | |
| { | |
| "epoch": 1.0175975516449884, | |
| "grad_norm": 0.7296363711357117, | |
| "learning_rate": 7.884749891283922e-05, | |
| "loss": 0.36800203323364256, | |
| "step": 1330, | |
| "token_acc": 0.881615400314331 | |
| }, | |
| { | |
| "epoch": 1.0214231063504209, | |
| "grad_norm": 1.087638258934021, | |
| "learning_rate": 7.867498256690704e-05, | |
| "loss": 0.37799820899963377, | |
| "step": 1335, | |
| "token_acc": 0.877220094203949 | |
| }, | |
| { | |
| "epoch": 1.025248661055853, | |
| "grad_norm": 0.7339928150177002, | |
| "learning_rate": 7.850195604156385e-05, | |
| "loss": 0.37110204696655275, | |
| "step": 1340, | |
| "token_acc": 0.884996771812439 | |
| }, | |
| { | |
| "epoch": 1.0290742157612853, | |
| "grad_norm": 0.8464434742927551, | |
| "learning_rate": 7.832842241526212e-05, | |
| "loss": 0.3805660009384155, | |
| "step": 1345, | |
| "token_acc": 0.879789412021637 | |
| }, | |
| { | |
| "epoch": 1.0328997704667178, | |
| "grad_norm": 0.689896821975708, | |
| "learning_rate": 7.815438477547655e-05, | |
| "loss": 0.3583992481231689, | |
| "step": 1350, | |
| "token_acc": 0.8869645595550537 | |
| }, | |
| { | |
| "epoch": 1.0328997704667178, | |
| "eval_loss": 0.5596266984939575, | |
| "eval_runtime": 7.7953, | |
| "eval_samples_per_second": 13.341, | |
| "eval_steps_per_second": 1.668, | |
| "eval_token_acc": 0.8413565754890442, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.03672532517215, | |
| "grad_norm": 0.6902993321418762, | |
| "learning_rate": 7.797984621864916e-05, | |
| "loss": 0.42625932693481444, | |
| "step": 1355, | |
| "token_acc": 0.8614287376403809 | |
| }, | |
| { | |
| "epoch": 1.0405508798775822, | |
| "grad_norm": 0.7562316060066223, | |
| "learning_rate": 7.780480985013413e-05, | |
| "loss": 0.3689578533172607, | |
| "step": 1360, | |
| "token_acc": 0.8820473551750183 | |
| }, | |
| { | |
| "epoch": 1.0443764345830144, | |
| "grad_norm": 0.6517492532730103, | |
| "learning_rate": 7.762927878414267e-05, | |
| "loss": 0.32921748161315917, | |
| "step": 1365, | |
| "token_acc": 0.8926072120666504 | |
| }, | |
| { | |
| "epoch": 1.0482019892884469, | |
| "grad_norm": 0.8397619724273682, | |
| "learning_rate": 7.745325614368755e-05, | |
| "loss": 0.3830822229385376, | |
| "step": 1370, | |
| "token_acc": 0.8756515383720398 | |
| }, | |
| { | |
| "epoch": 1.052027543993879, | |
| "grad_norm": 0.7649819254875183, | |
| "learning_rate": 7.727674506052743e-05, | |
| "loss": 0.37806334495544436, | |
| "step": 1375, | |
| "token_acc": 0.8804787993431091 | |
| }, | |
| { | |
| "epoch": 1.0558530986993113, | |
| "grad_norm": 0.7365129590034485, | |
| "learning_rate": 7.709974867511138e-05, | |
| "loss": 0.3349802017211914, | |
| "step": 1380, | |
| "token_acc": 0.8934342861175537 | |
| }, | |
| { | |
| "epoch": 1.0596786534047438, | |
| "grad_norm": 0.884164571762085, | |
| "learning_rate": 7.692227013652278e-05, | |
| "loss": 0.36524980068206786, | |
| "step": 1385, | |
| "token_acc": 0.8806947469711304 | |
| }, | |
| { | |
| "epoch": 1.063504208110176, | |
| "grad_norm": 0.6860577464103699, | |
| "learning_rate": 7.674431260242338e-05, | |
| "loss": 0.367877721786499, | |
| "step": 1390, | |
| "token_acc": 0.8842624425888062 | |
| }, | |
| { | |
| "epoch": 1.0673297628156082, | |
| "grad_norm": 0.7009398937225342, | |
| "learning_rate": 7.656587923899718e-05, | |
| "loss": 0.3564207315444946, | |
| "step": 1395, | |
| "token_acc": 0.8837472200393677 | |
| }, | |
| { | |
| "epoch": 1.0711553175210407, | |
| "grad_norm": 0.7540706396102905, | |
| "learning_rate": 7.638697322089398e-05, | |
| "loss": 0.3640351057052612, | |
| "step": 1400, | |
| "token_acc": 0.8847005367279053 | |
| }, | |
| { | |
| "epoch": 1.0711553175210407, | |
| "eval_loss": 0.5507253408432007, | |
| "eval_runtime": 7.7117, | |
| "eval_samples_per_second": 13.486, | |
| "eval_steps_per_second": 1.686, | |
| "eval_token_acc": 0.8438527584075928, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0749808722264729, | |
| "grad_norm": 0.6863798499107361, | |
| "learning_rate": 7.620759773117299e-05, | |
| "loss": 0.3779132604598999, | |
| "step": 1405, | |
| "token_acc": 0.8826145529747009 | |
| }, | |
| { | |
| "epoch": 1.078806426931905, | |
| "grad_norm": 0.7733192443847656, | |
| "learning_rate": 7.602775596124611e-05, | |
| "loss": 0.3633275032043457, | |
| "step": 1410, | |
| "token_acc": 0.886398196220398 | |
| }, | |
| { | |
| "epoch": 1.0826319816373373, | |
| "grad_norm": 0.7949317693710327, | |
| "learning_rate": 7.584745111082127e-05, | |
| "loss": 0.3376323699951172, | |
| "step": 1415, | |
| "token_acc": 0.8887669444084167 | |
| }, | |
| { | |
| "epoch": 1.0864575363427698, | |
| "grad_norm": 0.6832326650619507, | |
| "learning_rate": 7.566668638784542e-05, | |
| "loss": 0.33144965171813967, | |
| "step": 1420, | |
| "token_acc": 0.8916584849357605 | |
| }, | |
| { | |
| "epoch": 1.090283091048202, | |
| "grad_norm": 0.8551044464111328, | |
| "learning_rate": 7.548546500844742e-05, | |
| "loss": 0.3287867546081543, | |
| "step": 1425, | |
| "token_acc": 0.8930348753929138 | |
| }, | |
| { | |
| "epoch": 1.0941086457536342, | |
| "grad_norm": 0.7423316240310669, | |
| "learning_rate": 7.530379019688092e-05, | |
| "loss": 0.3902039289474487, | |
| "step": 1430, | |
| "token_acc": 0.8757656812667847 | |
| }, | |
| { | |
| "epoch": 1.0979342004590666, | |
| "grad_norm": 0.8404172658920288, | |
| "learning_rate": 7.51216651854669e-05, | |
| "loss": 0.390373969078064, | |
| "step": 1435, | |
| "token_acc": 0.8776587843894958 | |
| }, | |
| { | |
| "epoch": 1.1017597551644989, | |
| "grad_norm": 0.8963853120803833, | |
| "learning_rate": 7.493909321453625e-05, | |
| "loss": 0.4068464279174805, | |
| "step": 1440, | |
| "token_acc": 0.8700478076934814 | |
| }, | |
| { | |
| "epoch": 1.105585309869931, | |
| "grad_norm": 0.7311558723449707, | |
| "learning_rate": 7.475607753237202e-05, | |
| "loss": 0.3884909629821777, | |
| "step": 1445, | |
| "token_acc": 0.8745863437652588 | |
| }, | |
| { | |
| "epoch": 1.1094108645753635, | |
| "grad_norm": 0.7590047121047974, | |
| "learning_rate": 7.457262139515171e-05, | |
| "loss": 0.3895232677459717, | |
| "step": 1450, | |
| "token_acc": 0.8725248575210571 | |
| }, | |
| { | |
| "epoch": 1.1094108645753635, | |
| "eval_loss": 0.5504098534584045, | |
| "eval_runtime": 7.7559, | |
| "eval_samples_per_second": 13.409, | |
| "eval_steps_per_second": 1.676, | |
| "eval_token_acc": 0.8443038463592529, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.1132364192807958, | |
| "grad_norm": 0.882554292678833, | |
| "learning_rate": 7.438872806688934e-05, | |
| "loss": 0.40759758949279784, | |
| "step": 1455, | |
| "token_acc": 0.8712476491928101 | |
| }, | |
| { | |
| "epoch": 1.117061973986228, | |
| "grad_norm": 0.6808732748031616, | |
| "learning_rate": 7.420440081937728e-05, | |
| "loss": 0.3652071237564087, | |
| "step": 1460, | |
| "token_acc": 0.8835034370422363 | |
| }, | |
| { | |
| "epoch": 1.1208875286916602, | |
| "grad_norm": 0.837759256362915, | |
| "learning_rate": 7.401964293212809e-05, | |
| "loss": 0.409121036529541, | |
| "step": 1465, | |
| "token_acc": 0.8712127208709717 | |
| }, | |
| { | |
| "epoch": 1.1247130833970926, | |
| "grad_norm": 0.6652865409851074, | |
| "learning_rate": 7.383445769231627e-05, | |
| "loss": 0.3703787803649902, | |
| "step": 1470, | |
| "token_acc": 0.8831153512001038 | |
| }, | |
| { | |
| "epoch": 1.1285386381025249, | |
| "grad_norm": 0.8179388642311096, | |
| "learning_rate": 7.364884839471964e-05, | |
| "loss": 0.39147076606750486, | |
| "step": 1475, | |
| "token_acc": 0.8752105236053467 | |
| }, | |
| { | |
| "epoch": 1.132364192807957, | |
| "grad_norm": 0.719514012336731, | |
| "learning_rate": 7.346281834166075e-05, | |
| "loss": 0.37967238426208494, | |
| "step": 1480, | |
| "token_acc": 0.8796840906143188 | |
| }, | |
| { | |
| "epoch": 1.1361897475133895, | |
| "grad_norm": 0.9179552793502808, | |
| "learning_rate": 7.327637084294817e-05, | |
| "loss": 0.3995789051055908, | |
| "step": 1485, | |
| "token_acc": 0.8751766085624695 | |
| }, | |
| { | |
| "epoch": 1.1400153022188217, | |
| "grad_norm": 0.7656182050704956, | |
| "learning_rate": 7.308950921581756e-05, | |
| "loss": 0.34888639450073244, | |
| "step": 1490, | |
| "token_acc": 0.89056795835495 | |
| }, | |
| { | |
| "epoch": 1.143840856924254, | |
| "grad_norm": 0.7309355735778809, | |
| "learning_rate": 7.290223678487272e-05, | |
| "loss": 0.39315025806427, | |
| "step": 1495, | |
| "token_acc": 0.876833438873291 | |
| }, | |
| { | |
| "epoch": 1.1476664116296864, | |
| "grad_norm": 0.7618235349655151, | |
| "learning_rate": 7.27145568820263e-05, | |
| "loss": 0.35439176559448243, | |
| "step": 1500, | |
| "token_acc": 0.8836838006973267 | |
| }, | |
| { | |
| "epoch": 1.1476664116296864, | |
| "eval_loss": 0.5430108904838562, | |
| "eval_runtime": 7.6873, | |
| "eval_samples_per_second": 13.529, | |
| "eval_steps_per_second": 1.691, | |
| "eval_token_acc": 0.8448953628540039, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.1514919663351186, | |
| "grad_norm": 0.8058356046676636, | |
| "learning_rate": 7.25264728464407e-05, | |
| "loss": 0.3466159820556641, | |
| "step": 1505, | |
| "token_acc": 0.8869272470474243 | |
| }, | |
| { | |
| "epoch": 1.1553175210405509, | |
| "grad_norm": 0.7806113362312317, | |
| "learning_rate": 7.233798802446847e-05, | |
| "loss": 0.40935721397399905, | |
| "step": 1510, | |
| "token_acc": 0.8709314465522766 | |
| }, | |
| { | |
| "epoch": 1.159143075745983, | |
| "grad_norm": 0.8264714479446411, | |
| "learning_rate": 7.214910576959297e-05, | |
| "loss": 0.38201849460601806, | |
| "step": 1515, | |
| "token_acc": 0.8780457973480225 | |
| }, | |
| { | |
| "epoch": 1.1629686304514155, | |
| "grad_norm": 0.6713389158248901, | |
| "learning_rate": 7.195982944236851e-05, | |
| "loss": 0.3252051115036011, | |
| "step": 1520, | |
| "token_acc": 0.892856240272522 | |
| }, | |
| { | |
| "epoch": 1.1667941851568477, | |
| "grad_norm": 0.7945072650909424, | |
| "learning_rate": 7.177016241036075e-05, | |
| "loss": 0.35387892723083497, | |
| "step": 1525, | |
| "token_acc": 0.8838560581207275 | |
| }, | |
| { | |
| "epoch": 1.17061973986228, | |
| "grad_norm": 0.8310626745223999, | |
| "learning_rate": 7.15801080480866e-05, | |
| "loss": 0.3746853590011597, | |
| "step": 1530, | |
| "token_acc": 0.8799676299095154 | |
| }, | |
| { | |
| "epoch": 1.1744452945677124, | |
| "grad_norm": 0.9108403325080872, | |
| "learning_rate": 7.138966973695431e-05, | |
| "loss": 0.36667909622192385, | |
| "step": 1535, | |
| "token_acc": 0.8820632100105286 | |
| }, | |
| { | |
| "epoch": 1.1782708492731446, | |
| "grad_norm": 0.7420673966407776, | |
| "learning_rate": 7.119885086520329e-05, | |
| "loss": 0.36235547065734863, | |
| "step": 1540, | |
| "token_acc": 0.8849785923957825 | |
| }, | |
| { | |
| "epoch": 1.1820964039785768, | |
| "grad_norm": 0.6693369150161743, | |
| "learning_rate": 7.100765482784377e-05, | |
| "loss": 0.3710158824920654, | |
| "step": 1545, | |
| "token_acc": 0.8811267614364624 | |
| }, | |
| { | |
| "epoch": 1.185921958684009, | |
| "grad_norm": 0.7249651551246643, | |
| "learning_rate": 7.081608502659646e-05, | |
| "loss": 0.3993852615356445, | |
| "step": 1550, | |
| "token_acc": 0.8718493580818176 | |
| }, | |
| { | |
| "epoch": 1.185921958684009, | |
| "eval_loss": 0.5383990406990051, | |
| "eval_runtime": 7.5793, | |
| "eval_samples_per_second": 13.722, | |
| "eval_steps_per_second": 1.715, | |
| "eval_token_acc": 0.8461685180664062, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.1897475133894415, | |
| "grad_norm": 0.9157434105873108, | |
| "learning_rate": 7.062414486983197e-05, | |
| "loss": 0.3987370491027832, | |
| "step": 1555, | |
| "token_acc": 0.8729732036590576 | |
| }, | |
| { | |
| "epoch": 1.1935730680948737, | |
| "grad_norm": 0.6402376890182495, | |
| "learning_rate": 7.043183777251024e-05, | |
| "loss": 0.2903183698654175, | |
| "step": 1560, | |
| "token_acc": 0.9057296514511108 | |
| }, | |
| { | |
| "epoch": 1.197398622800306, | |
| "grad_norm": 0.7679566144943237, | |
| "learning_rate": 7.023916715611969e-05, | |
| "loss": 0.4904749870300293, | |
| "step": 1565, | |
| "token_acc": 0.8663699626922607 | |
| }, | |
| { | |
| "epoch": 1.2012241775057384, | |
| "grad_norm": 0.8699092864990234, | |
| "learning_rate": 7.004613644861647e-05, | |
| "loss": 0.4231747627258301, | |
| "step": 1570, | |
| "token_acc": 0.8677194714546204 | |
| }, | |
| { | |
| "epoch": 1.2050497322111706, | |
| "grad_norm": 0.6792256832122803, | |
| "learning_rate": 6.985274908436333e-05, | |
| "loss": 0.44817123413085935, | |
| "step": 1575, | |
| "token_acc": 0.8659628629684448 | |
| }, | |
| { | |
| "epoch": 1.2088752869166028, | |
| "grad_norm": 0.7418417930603027, | |
| "learning_rate": 6.965900850406859e-05, | |
| "loss": 0.33240585327148436, | |
| "step": 1580, | |
| "token_acc": 0.8937970399856567 | |
| }, | |
| { | |
| "epoch": 1.2127008416220353, | |
| "grad_norm": 0.8835020065307617, | |
| "learning_rate": 6.946491815472496e-05, | |
| "loss": 0.3884410381317139, | |
| "step": 1585, | |
| "token_acc": 0.876690149307251 | |
| }, | |
| { | |
| "epoch": 1.2165263963274675, | |
| "grad_norm": 0.9086595177650452, | |
| "learning_rate": 6.927048148954812e-05, | |
| "loss": 0.410748291015625, | |
| "step": 1590, | |
| "token_acc": 0.8735622763633728 | |
| }, | |
| { | |
| "epoch": 1.2203519510328997, | |
| "grad_norm": 0.7838605642318726, | |
| "learning_rate": 6.907570196791538e-05, | |
| "loss": 0.3603389739990234, | |
| "step": 1595, | |
| "token_acc": 0.8829374313354492 | |
| }, | |
| { | |
| "epoch": 1.2241775057383322, | |
| "grad_norm": 0.7454732060432434, | |
| "learning_rate": 6.888058305530406e-05, | |
| "loss": 0.37654249668121337, | |
| "step": 1600, | |
| "token_acc": 0.8782923817634583 | |
| }, | |
| { | |
| "epoch": 1.2241775057383322, | |
| "eval_loss": 0.5343810319900513, | |
| "eval_runtime": 7.6236, | |
| "eval_samples_per_second": 13.642, | |
| "eval_steps_per_second": 1.705, | |
| "eval_token_acc": 0.8475719690322876, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2280030604437644, | |
| "grad_norm": 0.7611352801322937, | |
| "learning_rate": 6.868512822322981e-05, | |
| "loss": 0.38566131591796876, | |
| "step": 1605, | |
| "token_acc": 0.8766804337501526 | |
| }, | |
| { | |
| "epoch": 1.2318286151491966, | |
| "grad_norm": 0.8874756693840027, | |
| "learning_rate": 6.848934094918498e-05, | |
| "loss": 0.38291475772857664, | |
| "step": 1610, | |
| "token_acc": 0.87657630443573 | |
| }, | |
| { | |
| "epoch": 1.2356541698546288, | |
| "grad_norm": 0.7193310260772705, | |
| "learning_rate": 6.829322471657658e-05, | |
| "loss": 0.3452467441558838, | |
| "step": 1615, | |
| "token_acc": 0.8881570100784302 | |
| }, | |
| { | |
| "epoch": 1.2394797245600613, | |
| "grad_norm": 0.661790668964386, | |
| "learning_rate": 6.809678301466443e-05, | |
| "loss": 0.3452208757400513, | |
| "step": 1620, | |
| "token_acc": 0.8885095715522766 | |
| }, | |
| { | |
| "epoch": 1.2433052792654935, | |
| "grad_norm": 0.8313160538673401, | |
| "learning_rate": 6.790001933849899e-05, | |
| "loss": 0.39090492725372317, | |
| "step": 1625, | |
| "token_acc": 0.8772667646408081 | |
| }, | |
| { | |
| "epoch": 1.2471308339709257, | |
| "grad_norm": 0.7543197870254517, | |
| "learning_rate": 6.770293718885928e-05, | |
| "loss": 0.37844099998474123, | |
| "step": 1630, | |
| "token_acc": 0.8773866295814514 | |
| }, | |
| { | |
| "epoch": 1.2509563886763582, | |
| "grad_norm": 0.7187685370445251, | |
| "learning_rate": 6.750554007219047e-05, | |
| "loss": 0.37274966239929197, | |
| "step": 1635, | |
| "token_acc": 0.8813634514808655 | |
| }, | |
| { | |
| "epoch": 1.2547819433817904, | |
| "grad_norm": 0.7216220498085022, | |
| "learning_rate": 6.730783150054164e-05, | |
| "loss": 0.40465946197509767, | |
| "step": 1640, | |
| "token_acc": 0.8722350597381592 | |
| }, | |
| { | |
| "epoch": 1.2586074980872226, | |
| "grad_norm": 0.808250367641449, | |
| "learning_rate": 6.71098149915031e-05, | |
| "loss": 0.39015932083129884, | |
| "step": 1645, | |
| "token_acc": 0.8755351901054382 | |
| }, | |
| { | |
| "epoch": 1.2624330527926548, | |
| "grad_norm": 0.6570851802825928, | |
| "learning_rate": 6.691149406814403e-05, | |
| "loss": 0.33088486194610595, | |
| "step": 1650, | |
| "token_acc": 0.8907855153083801 | |
| }, | |
| { | |
| "epoch": 1.2624330527926548, | |
| "eval_loss": 0.5374127626419067, | |
| "eval_runtime": 7.8026, | |
| "eval_samples_per_second": 13.329, | |
| "eval_steps_per_second": 1.666, | |
| "eval_token_acc": 0.8472611904144287, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.2662586074980873, | |
| "grad_norm": 0.6985551714897156, | |
| "learning_rate": 6.67128722589496e-05, | |
| "loss": 0.3755918502807617, | |
| "step": 1655, | |
| "token_acc": 0.8816916346549988 | |
| }, | |
| { | |
| "epoch": 1.2700841622035195, | |
| "grad_norm": 0.7275698781013489, | |
| "learning_rate": 6.651395309775837e-05, | |
| "loss": 0.3765554428100586, | |
| "step": 1660, | |
| "token_acc": 0.8811103701591492 | |
| }, | |
| { | |
| "epoch": 1.2739097169089517, | |
| "grad_norm": 0.729633092880249, | |
| "learning_rate": 6.631474012369921e-05, | |
| "loss": 0.3696659803390503, | |
| "step": 1665, | |
| "token_acc": 0.8816789984703064 | |
| }, | |
| { | |
| "epoch": 1.2777352716143842, | |
| "grad_norm": 0.7620216012001038, | |
| "learning_rate": 6.611523688112858e-05, | |
| "loss": 0.35426578521728513, | |
| "step": 1670, | |
| "token_acc": 0.8883428573608398 | |
| }, | |
| { | |
| "epoch": 1.2815608263198164, | |
| "grad_norm": 0.8159366846084595, | |
| "learning_rate": 6.591544691956723e-05, | |
| "loss": 0.38610110282897947, | |
| "step": 1675, | |
| "token_acc": 0.8776164054870605 | |
| }, | |
| { | |
| "epoch": 1.2853863810252486, | |
| "grad_norm": 0.8567126989364624, | |
| "learning_rate": 6.571537379363719e-05, | |
| "loss": 0.4222766399383545, | |
| "step": 1680, | |
| "token_acc": 0.8723132610321045 | |
| }, | |
| { | |
| "epoch": 1.2892119357306808, | |
| "grad_norm": 0.8297275304794312, | |
| "learning_rate": 6.551502106299851e-05, | |
| "loss": 0.37399892807006835, | |
| "step": 1685, | |
| "token_acc": 0.8821731209754944 | |
| }, | |
| { | |
| "epoch": 1.2930374904361133, | |
| "grad_norm": 0.6843409538269043, | |
| "learning_rate": 6.531439229228591e-05, | |
| "loss": 0.3343992233276367, | |
| "step": 1690, | |
| "token_acc": 0.892397403717041 | |
| }, | |
| { | |
| "epoch": 1.2968630451415455, | |
| "grad_norm": 0.7213367819786072, | |
| "learning_rate": 6.511349105104534e-05, | |
| "loss": 0.38822097778320314, | |
| "step": 1695, | |
| "token_acc": 0.8769423365592957 | |
| }, | |
| { | |
| "epoch": 1.300688599846978, | |
| "grad_norm": 0.700702428817749, | |
| "learning_rate": 6.491232091367049e-05, | |
| "loss": 0.35975372791290283, | |
| "step": 1700, | |
| "token_acc": 0.8861437439918518 | |
| }, | |
| { | |
| "epoch": 1.300688599846978, | |
| "eval_loss": 0.526591956615448, | |
| "eval_runtime": 7.7916, | |
| "eval_samples_per_second": 13.348, | |
| "eval_steps_per_second": 1.668, | |
| "eval_token_acc": 0.8482837677001953, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.3045141545524102, | |
| "grad_norm": 0.7598251104354858, | |
| "learning_rate": 6.471088545933921e-05, | |
| "loss": 0.3564164638519287, | |
| "step": 1705, | |
| "token_acc": 0.8872470855712891 | |
| }, | |
| { | |
| "epoch": 1.3083397092578424, | |
| "grad_norm": 0.7174568176269531, | |
| "learning_rate": 6.450918827194978e-05, | |
| "loss": 0.3287261962890625, | |
| "step": 1710, | |
| "token_acc": 0.894193708896637 | |
| }, | |
| { | |
| "epoch": 1.3121652639632746, | |
| "grad_norm": 0.7934249043464661, | |
| "learning_rate": 6.430723294005726e-05, | |
| "loss": 0.3405998468399048, | |
| "step": 1715, | |
| "token_acc": 0.8878347277641296 | |
| }, | |
| { | |
| "epoch": 1.315990818668707, | |
| "grad_norm": 0.8109247088432312, | |
| "learning_rate": 6.410502305680946e-05, | |
| "loss": 0.3818791389465332, | |
| "step": 1720, | |
| "token_acc": 0.8762706518173218 | |
| }, | |
| { | |
| "epoch": 1.3198163733741393, | |
| "grad_norm": 0.7905654311180115, | |
| "learning_rate": 6.390256221988318e-05, | |
| "loss": 0.3510235548019409, | |
| "step": 1725, | |
| "token_acc": 0.8884668946266174 | |
| }, | |
| { | |
| "epoch": 1.3236419280795715, | |
| "grad_norm": 0.7302840352058411, | |
| "learning_rate": 6.369985403142014e-05, | |
| "loss": 0.3860185146331787, | |
| "step": 1730, | |
| "token_acc": 0.8776938915252686 | |
| }, | |
| { | |
| "epoch": 1.327467482785004, | |
| "grad_norm": 0.7890005111694336, | |
| "learning_rate": 6.349690209796285e-05, | |
| "loss": 0.4002682685852051, | |
| "step": 1735, | |
| "token_acc": 0.8717520236968994 | |
| }, | |
| { | |
| "epoch": 1.3312930374904361, | |
| "grad_norm": 0.6541386842727661, | |
| "learning_rate": 6.329371003039051e-05, | |
| "loss": 0.3814365863800049, | |
| "step": 1740, | |
| "token_acc": 0.8806993365287781 | |
| }, | |
| { | |
| "epoch": 1.3351185921958684, | |
| "grad_norm": 0.7147980332374573, | |
| "learning_rate": 6.309028144385472e-05, | |
| "loss": 0.3602738380432129, | |
| "step": 1745, | |
| "token_acc": 0.8850005269050598 | |
| }, | |
| { | |
| "epoch": 1.3389441469013006, | |
| "grad_norm": 0.6951248049736023, | |
| "learning_rate": 6.288661995771522e-05, | |
| "loss": 0.35432114601135256, | |
| "step": 1750, | |
| "token_acc": 0.8871864080429077 | |
| }, | |
| { | |
| "epoch": 1.3389441469013006, | |
| "eval_loss": 0.5236285924911499, | |
| "eval_runtime": 7.7868, | |
| "eval_samples_per_second": 13.356, | |
| "eval_steps_per_second": 1.669, | |
| "eval_token_acc": 0.8510806560516357, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.342769701606733, | |
| "grad_norm": 0.7138703465461731, | |
| "learning_rate": 6.268272919547537e-05, | |
| "loss": 0.3437394857406616, | |
| "step": 1755, | |
| "token_acc": 0.8870205283164978 | |
| }, | |
| { | |
| "epoch": 1.3465952563121653, | |
| "grad_norm": 0.7315565943717957, | |
| "learning_rate": 6.247861278471785e-05, | |
| "loss": 0.3766175270080566, | |
| "step": 1760, | |
| "token_acc": 0.883225679397583 | |
| }, | |
| { | |
| "epoch": 1.3504208110175975, | |
| "grad_norm": 0.7530694603919983, | |
| "learning_rate": 6.227427435703997e-05, | |
| "loss": 0.3583348035812378, | |
| "step": 1765, | |
| "token_acc": 0.8860324025154114 | |
| }, | |
| { | |
| "epoch": 1.35424636572303, | |
| "grad_norm": 0.7517703175544739, | |
| "learning_rate": 6.206971754798913e-05, | |
| "loss": 0.3681065559387207, | |
| "step": 1770, | |
| "token_acc": 0.8821339011192322 | |
| }, | |
| { | |
| "epoch": 1.3580719204284621, | |
| "grad_norm": 1.0113003253936768, | |
| "learning_rate": 6.186494599699819e-05, | |
| "loss": 0.34742186069488523, | |
| "step": 1775, | |
| "token_acc": 0.8917561173439026 | |
| }, | |
| { | |
| "epoch": 1.3618974751338944, | |
| "grad_norm": 0.9447914361953735, | |
| "learning_rate": 6.165996334732055e-05, | |
| "loss": 0.3852540969848633, | |
| "step": 1780, | |
| "token_acc": 0.8768330216407776 | |
| }, | |
| { | |
| "epoch": 1.3657230298393266, | |
| "grad_norm": 1.0187249183654785, | |
| "learning_rate": 6.145477324596552e-05, | |
| "loss": 0.41319589614868163, | |
| "step": 1785, | |
| "token_acc": 0.8700772523880005 | |
| }, | |
| { | |
| "epoch": 1.369548584544759, | |
| "grad_norm": 0.716583251953125, | |
| "learning_rate": 6.124937934363331e-05, | |
| "loss": 0.33546440601348876, | |
| "step": 1790, | |
| "token_acc": 0.8909059166908264 | |
| }, | |
| { | |
| "epoch": 1.3733741392501913, | |
| "grad_norm": 0.7670001983642578, | |
| "learning_rate": 6.104378529465009e-05, | |
| "loss": 0.35624008178710936, | |
| "step": 1795, | |
| "token_acc": 0.8858749270439148 | |
| }, | |
| { | |
| "epoch": 1.3771996939556237, | |
| "grad_norm": 0.7541671991348267, | |
| "learning_rate": 6.083799475690309e-05, | |
| "loss": 0.38024513721466063, | |
| "step": 1800, | |
| "token_acc": 0.8788754343986511 | |
| }, | |
| { | |
| "epoch": 1.3771996939556237, | |
| "eval_loss": 0.5222176909446716, | |
| "eval_runtime": 7.9549, | |
| "eval_samples_per_second": 13.074, | |
| "eval_steps_per_second": 1.634, | |
| "eval_token_acc": 0.8502886891365051, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.381025248661056, | |
| "grad_norm": 0.7164918184280396, | |
| "learning_rate": 6.0632011391775325e-05, | |
| "loss": 0.3274393081665039, | |
| "step": 1805, | |
| "token_acc": 0.8930581212043762 | |
| }, | |
| { | |
| "epoch": 1.3848508033664881, | |
| "grad_norm": 0.7994803786277771, | |
| "learning_rate": 6.0425838864080594e-05, | |
| "loss": 0.37533011436462405, | |
| "step": 1810, | |
| "token_acc": 0.8814812898635864 | |
| }, | |
| { | |
| "epoch": 1.3886763580719204, | |
| "grad_norm": 0.610385000705719, | |
| "learning_rate": 6.0219480841998265e-05, | |
| "loss": 0.3626489877700806, | |
| "step": 1815, | |
| "token_acc": 0.8824625611305237 | |
| }, | |
| { | |
| "epoch": 1.3925019127773526, | |
| "grad_norm": 0.8779500126838684, | |
| "learning_rate": 6.001294099700795e-05, | |
| "loss": 0.3818621873855591, | |
| "step": 1820, | |
| "token_acc": 0.8814284205436707 | |
| }, | |
| { | |
| "epoch": 1.396327467482785, | |
| "grad_norm": 0.9023825526237488, | |
| "learning_rate": 5.980622300382424e-05, | |
| "loss": 0.34031038284301757, | |
| "step": 1825, | |
| "token_acc": 0.8901993036270142 | |
| }, | |
| { | |
| "epoch": 1.4001530221882172, | |
| "grad_norm": 0.7254869937896729, | |
| "learning_rate": 5.959933054033125e-05, | |
| "loss": 0.33964922428131106, | |
| "step": 1830, | |
| "token_acc": 0.8894827365875244 | |
| }, | |
| { | |
| "epoch": 1.4039785768936497, | |
| "grad_norm": 0.7711949944496155, | |
| "learning_rate": 5.9392267287517325e-05, | |
| "loss": 0.37581453323364256, | |
| "step": 1835, | |
| "token_acc": 0.8802526593208313 | |
| }, | |
| { | |
| "epoch": 1.407804131599082, | |
| "grad_norm": 0.8236564993858337, | |
| "learning_rate": 5.918503692940936e-05, | |
| "loss": 0.3631006717681885, | |
| "step": 1840, | |
| "token_acc": 0.8837177753448486 | |
| }, | |
| { | |
| "epoch": 1.4116296863045141, | |
| "grad_norm": 0.729147732257843, | |
| "learning_rate": 5.8977643153007436e-05, | |
| "loss": 0.39508538246154784, | |
| "step": 1845, | |
| "token_acc": 0.8759874701499939 | |
| }, | |
| { | |
| "epoch": 1.4154552410099464, | |
| "grad_norm": 0.7146396636962891, | |
| "learning_rate": 5.8770089648219086e-05, | |
| "loss": 0.38811707496643066, | |
| "step": 1850, | |
| "token_acc": 0.8768134713172913 | |
| }, | |
| { | |
| "epoch": 1.4154552410099464, | |
| "eval_loss": 0.508669912815094, | |
| "eval_runtime": 7.7202, | |
| "eval_samples_per_second": 13.471, | |
| "eval_steps_per_second": 1.684, | |
| "eval_token_acc": 0.8521132469177246, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.4192807957153788, | |
| "grad_norm": 0.729438066482544, | |
| "learning_rate": 5.8562380107793723e-05, | |
| "loss": 0.39258522987365724, | |
| "step": 1855, | |
| "token_acc": 0.8775860071182251 | |
| }, | |
| { | |
| "epoch": 1.423106350420811, | |
| "grad_norm": 0.6797559857368469, | |
| "learning_rate": 5.835451822725691e-05, | |
| "loss": 0.3752496957778931, | |
| "step": 1860, | |
| "token_acc": 0.8779392242431641 | |
| }, | |
| { | |
| "epoch": 1.4269319051262432, | |
| "grad_norm": 0.49813270568847656, | |
| "learning_rate": 5.814650770484461e-05, | |
| "loss": 0.36016933917999266, | |
| "step": 1865, | |
| "token_acc": 0.885236382484436 | |
| }, | |
| { | |
| "epoch": 1.4307574598316757, | |
| "grad_norm": 0.7051418423652649, | |
| "learning_rate": 5.7938352241437366e-05, | |
| "loss": 0.3023838996887207, | |
| "step": 1870, | |
| "token_acc": 0.9016345143318176 | |
| }, | |
| { | |
| "epoch": 1.434583014537108, | |
| "grad_norm": 0.7764083743095398, | |
| "learning_rate": 5.773005554049455e-05, | |
| "loss": 0.3270875453948975, | |
| "step": 1875, | |
| "token_acc": 0.8943535685539246 | |
| }, | |
| { | |
| "epoch": 1.4384085692425401, | |
| "grad_norm": 0.6883430480957031, | |
| "learning_rate": 5.752162130798833e-05, | |
| "loss": 0.3316964864730835, | |
| "step": 1880, | |
| "token_acc": 0.8921953439712524 | |
| }, | |
| { | |
| "epoch": 1.4422341239479723, | |
| "grad_norm": 0.7114600539207458, | |
| "learning_rate": 5.7313053252337854e-05, | |
| "loss": 0.31533355712890626, | |
| "step": 1885, | |
| "token_acc": 0.8978268504142761 | |
| }, | |
| { | |
| "epoch": 1.4460596786534048, | |
| "grad_norm": 0.8558183908462524, | |
| "learning_rate": 5.7104355084343196e-05, | |
| "loss": 0.3653078556060791, | |
| "step": 1890, | |
| "token_acc": 0.885123074054718 | |
| }, | |
| { | |
| "epoch": 1.449885233358837, | |
| "grad_norm": 0.7565247416496277, | |
| "learning_rate": 5.689553051711939e-05, | |
| "loss": 0.3589335441589355, | |
| "step": 1895, | |
| "token_acc": 0.8860511779785156 | |
| }, | |
| { | |
| "epoch": 1.4537107880642695, | |
| "grad_norm": 0.830723762512207, | |
| "learning_rate": 5.668658326603032e-05, | |
| "loss": 0.32294435501098634, | |
| "step": 1900, | |
| "token_acc": 0.8950970768928528 | |
| }, | |
| { | |
| "epoch": 1.4537107880642695, | |
| "eval_loss": 0.5095290541648865, | |
| "eval_runtime": 7.9225, | |
| "eval_samples_per_second": 13.127, | |
| "eval_steps_per_second": 1.641, | |
| "eval_token_acc": 0.8521934151649475, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.4575363427697017, | |
| "grad_norm": 0.707747220993042, | |
| "learning_rate": 5.647751704862263e-05, | |
| "loss": 0.3198162794113159, | |
| "step": 1905, | |
| "token_acc": 0.8932924270629883 | |
| }, | |
| { | |
| "epoch": 1.461361897475134, | |
| "grad_norm": 0.8484877347946167, | |
| "learning_rate": 5.626833558455961e-05, | |
| "loss": 0.34911117553710935, | |
| "step": 1910, | |
| "token_acc": 0.8880250453948975 | |
| }, | |
| { | |
| "epoch": 1.4651874521805661, | |
| "grad_norm": 0.6321529150009155, | |
| "learning_rate": 5.605904259555496e-05, | |
| "loss": 0.3261146306991577, | |
| "step": 1915, | |
| "token_acc": 0.8926582932472229 | |
| }, | |
| { | |
| "epoch": 1.4690130068859983, | |
| "grad_norm": 0.888900101184845, | |
| "learning_rate": 5.5849641805306654e-05, | |
| "loss": 0.34900679588317873, | |
| "step": 1920, | |
| "token_acc": 0.8897786736488342 | |
| }, | |
| { | |
| "epoch": 1.4728385615914308, | |
| "grad_norm": 0.687582790851593, | |
| "learning_rate": 5.564013693943062e-05, | |
| "loss": 0.34392595291137695, | |
| "step": 1925, | |
| "token_acc": 0.8870816826820374 | |
| }, | |
| { | |
| "epoch": 1.476664116296863, | |
| "grad_norm": 0.7888776659965515, | |
| "learning_rate": 5.5430531725394485e-05, | |
| "loss": 0.40218586921691896, | |
| "step": 1930, | |
| "token_acc": 0.8707258105278015 | |
| }, | |
| { | |
| "epoch": 1.4804896710022954, | |
| "grad_norm": 0.7543318867683411, | |
| "learning_rate": 5.522082989245122e-05, | |
| "loss": 0.3061817646026611, | |
| "step": 1935, | |
| "token_acc": 0.9006242156028748 | |
| }, | |
| { | |
| "epoch": 1.4843152257077277, | |
| "grad_norm": 0.8193092942237854, | |
| "learning_rate": 5.501103517157288e-05, | |
| "loss": 0.36248459815979006, | |
| "step": 1940, | |
| "token_acc": 0.8838417530059814 | |
| }, | |
| { | |
| "epoch": 1.48814078041316, | |
| "grad_norm": 0.7776079177856445, | |
| "learning_rate": 5.480115129538409e-05, | |
| "loss": 0.3319098949432373, | |
| "step": 1945, | |
| "token_acc": 0.8905050754547119 | |
| }, | |
| { | |
| "epoch": 1.4919663351185921, | |
| "grad_norm": 0.6906784176826477, | |
| "learning_rate": 5.459118199809577e-05, | |
| "loss": 0.30999135971069336, | |
| "step": 1950, | |
| "token_acc": 0.9011686444282532 | |
| }, | |
| { | |
| "epoch": 1.4919663351185921, | |
| "eval_loss": 0.5100167989730835, | |
| "eval_runtime": 8.6421, | |
| "eval_samples_per_second": 12.034, | |
| "eval_steps_per_second": 1.504, | |
| "eval_token_acc": 0.8534665703773499, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.4957918898240246, | |
| "grad_norm": 0.60188227891922, | |
| "learning_rate": 5.438113101543861e-05, | |
| "loss": 0.3165478467941284, | |
| "step": 1955, | |
| "token_acc": 0.8978914022445679 | |
| }, | |
| { | |
| "epoch": 1.4996174445294568, | |
| "grad_norm": 0.7757999300956726, | |
| "learning_rate": 5.417100208459662e-05, | |
| "loss": 0.33252928256988523, | |
| "step": 1960, | |
| "token_acc": 0.8919309377670288 | |
| }, | |
| { | |
| "epoch": 1.5034429992348892, | |
| "grad_norm": 0.8450996279716492, | |
| "learning_rate": 5.396079894414067e-05, | |
| "loss": 0.3332216739654541, | |
| "step": 1965, | |
| "token_acc": 0.8910924196243286 | |
| }, | |
| { | |
| "epoch": 1.5072685539403214, | |
| "grad_norm": 0.7125052809715271, | |
| "learning_rate": 5.375052533396191e-05, | |
| "loss": 0.32312803268432616, | |
| "step": 1970, | |
| "token_acc": 0.8956630229949951 | |
| }, | |
| { | |
| "epoch": 1.5110941086457537, | |
| "grad_norm": 0.728113055229187, | |
| "learning_rate": 5.354018499520536e-05, | |
| "loss": 0.3401800155639648, | |
| "step": 1975, | |
| "token_acc": 0.8904479742050171 | |
| }, | |
| { | |
| "epoch": 1.5149196633511859, | |
| "grad_norm": 0.5629063844680786, | |
| "learning_rate": 5.332978167020314e-05, | |
| "loss": 0.33483114242553713, | |
| "step": 1980, | |
| "token_acc": 0.8900842666625977 | |
| }, | |
| { | |
| "epoch": 1.518745218056618, | |
| "grad_norm": 0.7541650533676147, | |
| "learning_rate": 5.31193191024081e-05, | |
| "loss": 0.3606285095214844, | |
| "step": 1985, | |
| "token_acc": 0.8818128705024719 | |
| }, | |
| { | |
| "epoch": 1.5225707727620506, | |
| "grad_norm": 0.7752453684806824, | |
| "learning_rate": 5.2908801036327115e-05, | |
| "loss": 0.3571962356567383, | |
| "step": 1990, | |
| "token_acc": 0.8851061463356018 | |
| }, | |
| { | |
| "epoch": 1.5263963274674828, | |
| "grad_norm": 0.7320619225502014, | |
| "learning_rate": 5.269823121745443e-05, | |
| "loss": 0.34485607147216796, | |
| "step": 1995, | |
| "token_acc": 0.8938528895378113 | |
| }, | |
| { | |
| "epoch": 1.5302218821729152, | |
| "grad_norm": 0.7084663510322571, | |
| "learning_rate": 5.248761339220511e-05, | |
| "loss": 0.3630984306335449, | |
| "step": 2000, | |
| "token_acc": 0.8881708979606628 | |
| }, | |
| { | |
| "epoch": 1.5302218821729152, | |
| "eval_loss": 0.501686155796051, | |
| "eval_runtime": 8.3476, | |
| "eval_samples_per_second": 12.459, | |
| "eval_steps_per_second": 1.557, | |
| "eval_token_acc": 0.8552410006523132, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.5340474368783474, | |
| "grad_norm": 0.7146458029747009, | |
| "learning_rate": 5.227695130784833e-05, | |
| "loss": 0.3331026554107666, | |
| "step": 2005, | |
| "token_acc": 0.8917819261550903 | |
| }, | |
| { | |
| "epoch": 1.5378729915837797, | |
| "grad_norm": 0.8245148062705994, | |
| "learning_rate": 5.2066248712440656e-05, | |
| "loss": 0.37367663383483884, | |
| "step": 2010, | |
| "token_acc": 0.879398763179779 | |
| }, | |
| { | |
| "epoch": 1.5416985462892119, | |
| "grad_norm": 0.7592694163322449, | |
| "learning_rate": 5.185550935475953e-05, | |
| "loss": 0.30876760482788085, | |
| "step": 2015, | |
| "token_acc": 0.8983100056648254 | |
| }, | |
| { | |
| "epoch": 1.545524100994644, | |
| "grad_norm": 0.9255443215370178, | |
| "learning_rate": 5.164473698423636e-05, | |
| "loss": 0.3594630241394043, | |
| "step": 2020, | |
| "token_acc": 0.8848262429237366 | |
| }, | |
| { | |
| "epoch": 1.5493496557000765, | |
| "grad_norm": 0.7179040908813477, | |
| "learning_rate": 5.143393535088998e-05, | |
| "loss": 0.3523809194564819, | |
| "step": 2025, | |
| "token_acc": 0.8905043601989746 | |
| }, | |
| { | |
| "epoch": 1.5531752104055088, | |
| "grad_norm": 0.7476411461830139, | |
| "learning_rate": 5.122310820525981e-05, | |
| "loss": 0.3416067361831665, | |
| "step": 2030, | |
| "token_acc": 0.8892166018486023 | |
| }, | |
| { | |
| "epoch": 1.5570007651109412, | |
| "grad_norm": 0.7161547541618347, | |
| "learning_rate": 5.101225929833921e-05, | |
| "loss": 0.30915536880493166, | |
| "step": 2035, | |
| "token_acc": 0.8991933465003967 | |
| }, | |
| { | |
| "epoch": 1.5608263198163734, | |
| "grad_norm": 0.935799777507782, | |
| "learning_rate": 5.08013923815087e-05, | |
| "loss": 0.31090846061706545, | |
| "step": 2040, | |
| "token_acc": 0.8967577815055847 | |
| }, | |
| { | |
| "epoch": 1.5646518745218057, | |
| "grad_norm": 0.7758647799491882, | |
| "learning_rate": 5.059051120646924e-05, | |
| "loss": 0.3375053882598877, | |
| "step": 2045, | |
| "token_acc": 0.8911775350570679 | |
| }, | |
| { | |
| "epoch": 1.5684774292272379, | |
| "grad_norm": 0.6921541094779968, | |
| "learning_rate": 5.0379619525175437e-05, | |
| "loss": 0.3175233840942383, | |
| "step": 2050, | |
| "token_acc": 0.897928774356842 | |
| }, | |
| { | |
| "epoch": 1.5684774292272379, | |
| "eval_loss": 0.49708712100982666, | |
| "eval_runtime": 8.4069, | |
| "eval_samples_per_second": 12.371, | |
| "eval_steps_per_second": 1.546, | |
| "eval_token_acc": 0.8562836050987244, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.57230298393267, | |
| "grad_norm": 0.8368853330612183, | |
| "learning_rate": 5.016872108976889e-05, | |
| "loss": 0.3685647964477539, | |
| "step": 2055, | |
| "token_acc": 0.8830959796905518 | |
| }, | |
| { | |
| "epoch": 1.5761285386381025, | |
| "grad_norm": 0.7727574706077576, | |
| "learning_rate": 4.99578196525113e-05, | |
| "loss": 0.326021146774292, | |
| "step": 2060, | |
| "token_acc": 0.8955893516540527 | |
| }, | |
| { | |
| "epoch": 1.5799540933435348, | |
| "grad_norm": 0.7962800860404968, | |
| "learning_rate": 4.974691896571781e-05, | |
| "loss": 0.36289157867431643, | |
| "step": 2065, | |
| "token_acc": 0.8842934370040894 | |
| }, | |
| { | |
| "epoch": 1.5837796480489672, | |
| "grad_norm": 0.7509872317314148, | |
| "learning_rate": 4.9536022781690185e-05, | |
| "loss": 0.31728103160858157, | |
| "step": 2070, | |
| "token_acc": 0.8965554237365723 | |
| }, | |
| { | |
| "epoch": 1.5876052027543994, | |
| "grad_norm": 0.6993099451065063, | |
| "learning_rate": 4.9325134852650124e-05, | |
| "loss": 0.36268980503082277, | |
| "step": 2075, | |
| "token_acc": 0.8835968375205994 | |
| }, | |
| { | |
| "epoch": 1.5914307574598316, | |
| "grad_norm": 0.7634088397026062, | |
| "learning_rate": 4.911425893067239e-05, | |
| "loss": 0.368328332901001, | |
| "step": 2080, | |
| "token_acc": 0.8840143084526062 | |
| }, | |
| { | |
| "epoch": 1.5952563121652639, | |
| "grad_norm": 0.734311580657959, | |
| "learning_rate": 4.8903398767618165e-05, | |
| "loss": 0.3379722833633423, | |
| "step": 2085, | |
| "token_acc": 0.8937978148460388 | |
| }, | |
| { | |
| "epoch": 1.599081866870696, | |
| "grad_norm": 1.7793625593185425, | |
| "learning_rate": 4.8692558115068254e-05, | |
| "loss": 0.33839640617370603, | |
| "step": 2090, | |
| "token_acc": 0.8909159302711487 | |
| }, | |
| { | |
| "epoch": 1.6029074215761285, | |
| "grad_norm": 0.6846344470977783, | |
| "learning_rate": 4.8481740724256324e-05, | |
| "loss": 0.36859283447265623, | |
| "step": 2095, | |
| "token_acc": 0.8814284801483154 | |
| }, | |
| { | |
| "epoch": 1.606732976281561, | |
| "grad_norm": 0.7191367149353027, | |
| "learning_rate": 4.827095034600215e-05, | |
| "loss": 0.32262775897979734, | |
| "step": 2100, | |
| "token_acc": 0.8948466777801514 | |
| }, | |
| { | |
| "epoch": 1.606732976281561, | |
| "eval_loss": 0.49555426836013794, | |
| "eval_runtime": 8.4995, | |
| "eval_samples_per_second": 12.236, | |
| "eval_steps_per_second": 1.53, | |
| "eval_token_acc": 0.8567647933959961, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.6105585309869932, | |
| "grad_norm": 0.7318239808082581, | |
| "learning_rate": 4.806019073064493e-05, | |
| "loss": 0.28886990547180175, | |
| "step": 2105, | |
| "token_acc": 0.9057518243789673 | |
| }, | |
| { | |
| "epoch": 1.6143840856924254, | |
| "grad_norm": 0.7161886096000671, | |
| "learning_rate": 4.7849465627976574e-05, | |
| "loss": 0.3786638259887695, | |
| "step": 2110, | |
| "token_acc": 0.877372682094574 | |
| }, | |
| { | |
| "epoch": 1.6182096403978576, | |
| "grad_norm": 0.7079288959503174, | |
| "learning_rate": 4.763877878717484e-05, | |
| "loss": 0.3339807987213135, | |
| "step": 2115, | |
| "token_acc": 0.892234742641449 | |
| }, | |
| { | |
| "epoch": 1.6220351951032899, | |
| "grad_norm": 0.7738683819770813, | |
| "learning_rate": 4.742813395673684e-05, | |
| "loss": 0.3155964851379395, | |
| "step": 2120, | |
| "token_acc": 0.8984229564666748 | |
| }, | |
| { | |
| "epoch": 1.6258607498087223, | |
| "grad_norm": 0.7651445269584656, | |
| "learning_rate": 4.721753488441222e-05, | |
| "loss": 0.34331388473510743, | |
| "step": 2125, | |
| "token_acc": 0.8891043663024902 | |
| }, | |
| { | |
| "epoch": 1.6296863045141545, | |
| "grad_norm": 0.7328031063079834, | |
| "learning_rate": 4.700698531713648e-05, | |
| "loss": 0.3365816354751587, | |
| "step": 2130, | |
| "token_acc": 0.8924189805984497 | |
| }, | |
| { | |
| "epoch": 1.633511859219587, | |
| "grad_norm": 0.7824881672859192, | |
| "learning_rate": 4.679648900096436e-05, | |
| "loss": 0.3375370502471924, | |
| "step": 2135, | |
| "token_acc": 0.8933680653572083 | |
| }, | |
| { | |
| "epoch": 1.6373374139250192, | |
| "grad_norm": 0.7239261269569397, | |
| "learning_rate": 4.658604968100318e-05, | |
| "loss": 0.44536380767822265, | |
| "step": 2140, | |
| "token_acc": 0.8609479665756226 | |
| }, | |
| { | |
| "epoch": 1.6411629686304514, | |
| "grad_norm": 0.8158916234970093, | |
| "learning_rate": 4.6375671101346135e-05, | |
| "loss": 0.31634106636047366, | |
| "step": 2145, | |
| "token_acc": 0.8972258567810059 | |
| }, | |
| { | |
| "epoch": 1.6449885233358836, | |
| "grad_norm": 0.6787914633750916, | |
| "learning_rate": 4.616535700500583e-05, | |
| "loss": 0.3428164005279541, | |
| "step": 2150, | |
| "token_acc": 0.8936346769332886 | |
| }, | |
| { | |
| "epoch": 1.6449885233358836, | |
| "eval_loss": 0.4892226755619049, | |
| "eval_runtime": 8.5201, | |
| "eval_samples_per_second": 12.206, | |
| "eval_steps_per_second": 1.526, | |
| "eval_token_acc": 0.8588098287582397, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.6488140780413159, | |
| "grad_norm": 0.7179057002067566, | |
| "learning_rate": 4.5955111133847516e-05, | |
| "loss": 0.3500206470489502, | |
| "step": 2155, | |
| "token_acc": 0.8879844546318054 | |
| }, | |
| { | |
| "epoch": 1.6526396327467483, | |
| "grad_norm": 0.9363833665847778, | |
| "learning_rate": 4.574493722852266e-05, | |
| "loss": 0.33152313232421876, | |
| "step": 2160, | |
| "token_acc": 0.8924428820610046 | |
| }, | |
| { | |
| "epoch": 1.6564651874521805, | |
| "grad_norm": 0.8011144995689392, | |
| "learning_rate": 4.553483902840227e-05, | |
| "loss": 0.33824012279510496, | |
| "step": 2165, | |
| "token_acc": 0.888818621635437 | |
| }, | |
| { | |
| "epoch": 1.660290742157613, | |
| "grad_norm": 0.754247784614563, | |
| "learning_rate": 4.5324820271510446e-05, | |
| "loss": 0.3261884689331055, | |
| "step": 2170, | |
| "token_acc": 0.8930807709693909 | |
| }, | |
| { | |
| "epoch": 1.6641162968630452, | |
| "grad_norm": 0.8901833891868591, | |
| "learning_rate": 4.5114884694457906e-05, | |
| "loss": 0.3530290603637695, | |
| "step": 2175, | |
| "token_acc": 0.8864350914955139 | |
| }, | |
| { | |
| "epoch": 1.6679418515684774, | |
| "grad_norm": 0.7795696258544922, | |
| "learning_rate": 4.490503603237532e-05, | |
| "loss": 0.28058276176452634, | |
| "step": 2180, | |
| "token_acc": 0.9070743322372437 | |
| }, | |
| { | |
| "epoch": 1.6717674062739096, | |
| "grad_norm": 0.7988150119781494, | |
| "learning_rate": 4.4695278018847105e-05, | |
| "loss": 0.3197885036468506, | |
| "step": 2185, | |
| "token_acc": 0.8948556780815125 | |
| }, | |
| { | |
| "epoch": 1.6755929609793418, | |
| "grad_norm": 0.7500495910644531, | |
| "learning_rate": 4.448561438584484e-05, | |
| "loss": 0.30902011394500734, | |
| "step": 2190, | |
| "token_acc": 0.8987115621566772 | |
| }, | |
| { | |
| "epoch": 1.6794185156847743, | |
| "grad_norm": 0.8123504519462585, | |
| "learning_rate": 4.4276048863660874e-05, | |
| "loss": 0.34034423828125, | |
| "step": 2195, | |
| "token_acc": 0.8910139203071594 | |
| }, | |
| { | |
| "epoch": 1.6832440703902067, | |
| "grad_norm": 1.9124935865402222, | |
| "learning_rate": 4.406658518084201e-05, | |
| "loss": 0.27848803997039795, | |
| "step": 2200, | |
| "token_acc": 0.9100915789604187 | |
| }, | |
| { | |
| "epoch": 1.6832440703902067, | |
| "eval_loss": 0.48997873067855835, | |
| "eval_runtime": 7.7857, | |
| "eval_samples_per_second": 13.358, | |
| "eval_steps_per_second": 1.67, | |
| "eval_token_acc": 0.8590003252029419, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.687069625095639, | |
| "grad_norm": 0.7550795674324036, | |
| "learning_rate": 4.3857227064123184e-05, | |
| "loss": 0.3289813995361328, | |
| "step": 2205, | |
| "token_acc": 0.8935672044754028 | |
| }, | |
| { | |
| "epoch": 1.6908951798010712, | |
| "grad_norm": 0.6573622822761536, | |
| "learning_rate": 4.364797823836108e-05, | |
| "loss": 0.3325567483901978, | |
| "step": 2210, | |
| "token_acc": 0.8916365504264832 | |
| }, | |
| { | |
| "epoch": 1.6947207345065034, | |
| "grad_norm": 0.7994371056556702, | |
| "learning_rate": 4.3438842426467885e-05, | |
| "loss": 0.3089787483215332, | |
| "step": 2215, | |
| "token_acc": 0.8987955451011658 | |
| }, | |
| { | |
| "epoch": 1.6985462892119356, | |
| "grad_norm": 0.7001591920852661, | |
| "learning_rate": 4.322982334934509e-05, | |
| "loss": 0.3258508682250977, | |
| "step": 2220, | |
| "token_acc": 0.89599609375 | |
| }, | |
| { | |
| "epoch": 1.702371843917368, | |
| "grad_norm": 0.7623443603515625, | |
| "learning_rate": 4.302092472581729e-05, | |
| "loss": 0.29424998760223386, | |
| "step": 2225, | |
| "token_acc": 0.9034655094146729 | |
| }, | |
| { | |
| "epoch": 1.7061973986228003, | |
| "grad_norm": 0.8438885807991028, | |
| "learning_rate": 4.281215027256592e-05, | |
| "loss": 0.30596625804901123, | |
| "step": 2230, | |
| "token_acc": 0.8992859125137329 | |
| }, | |
| { | |
| "epoch": 1.7100229533282327, | |
| "grad_norm": 0.7240939736366272, | |
| "learning_rate": 4.260350370406329e-05, | |
| "loss": 0.30459909439086913, | |
| "step": 2235, | |
| "token_acc": 0.8981994986534119 | |
| }, | |
| { | |
| "epoch": 1.713848508033665, | |
| "grad_norm": 0.630903422832489, | |
| "learning_rate": 4.239498873250637e-05, | |
| "loss": 0.2987601041793823, | |
| "step": 2240, | |
| "token_acc": 0.9012813568115234 | |
| }, | |
| { | |
| "epoch": 1.7176740627390972, | |
| "grad_norm": 0.6413953304290771, | |
| "learning_rate": 4.218660906775076e-05, | |
| "loss": 0.27812976837158204, | |
| "step": 2245, | |
| "token_acc": 0.9085516929626465 | |
| }, | |
| { | |
| "epoch": 1.7214996174445294, | |
| "grad_norm": 0.8842605948448181, | |
| "learning_rate": 4.1978368417244754e-05, | |
| "loss": 0.3460667610168457, | |
| "step": 2250, | |
| "token_acc": 0.8905196785926819 | |
| }, | |
| { | |
| "epoch": 1.7214996174445294, | |
| "eval_loss": 0.48436981439590454, | |
| "eval_runtime": 6.1279, | |
| "eval_samples_per_second": 16.972, | |
| "eval_steps_per_second": 2.121, | |
| "eval_token_acc": 0.860263466835022, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.7253251721499616, | |
| "grad_norm": 0.6968632340431213, | |
| "learning_rate": 4.17702704859633e-05, | |
| "loss": 0.29213814735412597, | |
| "step": 2255, | |
| "token_acc": 0.9040796160697937 | |
| }, | |
| { | |
| "epoch": 1.729150726855394, | |
| "grad_norm": 0.7017317414283752, | |
| "learning_rate": 4.1562318976342165e-05, | |
| "loss": 0.3319288730621338, | |
| "step": 2260, | |
| "token_acc": 0.8922781944274902 | |
| }, | |
| { | |
| "epoch": 1.7329762815608263, | |
| "grad_norm": 0.7793192267417908, | |
| "learning_rate": 4.135451758821191e-05, | |
| "loss": 0.3711602210998535, | |
| "step": 2265, | |
| "token_acc": 0.8815440535545349 | |
| }, | |
| { | |
| "epoch": 1.7368018362662587, | |
| "grad_norm": 0.870146632194519, | |
| "learning_rate": 4.114687001873228e-05, | |
| "loss": 0.3280991554260254, | |
| "step": 2270, | |
| "token_acc": 0.8962957262992859 | |
| }, | |
| { | |
| "epoch": 1.740627390971691, | |
| "grad_norm": 0.6839405298233032, | |
| "learning_rate": 4.093937996232625e-05, | |
| "loss": 0.31872236728668213, | |
| "step": 2275, | |
| "token_acc": 0.8943005204200745 | |
| }, | |
| { | |
| "epoch": 1.7444529456771232, | |
| "grad_norm": 0.7605020999908447, | |
| "learning_rate": 4.073205111061436e-05, | |
| "loss": 0.31961095333099365, | |
| "step": 2280, | |
| "token_acc": 0.8964794278144836 | |
| }, | |
| { | |
| "epoch": 1.7482785003825554, | |
| "grad_norm": 0.6984594464302063, | |
| "learning_rate": 4.052488715234902e-05, | |
| "loss": 0.31977455615997313, | |
| "step": 2285, | |
| "token_acc": 0.8969309329986572 | |
| }, | |
| { | |
| "epoch": 1.7521040550879876, | |
| "grad_norm": 0.7754748463630676, | |
| "learning_rate": 4.0317891773348946e-05, | |
| "loss": 0.31035671234130857, | |
| "step": 2290, | |
| "token_acc": 0.8990971446037292 | |
| }, | |
| { | |
| "epoch": 1.75592960979342, | |
| "grad_norm": 0.8007567524909973, | |
| "learning_rate": 4.0111068656433426e-05, | |
| "loss": 0.34440956115722654, | |
| "step": 2295, | |
| "token_acc": 0.8881877660751343 | |
| }, | |
| { | |
| "epoch": 1.7597551644988525, | |
| "grad_norm": 0.9330772161483765, | |
| "learning_rate": 3.9904421481357e-05, | |
| "loss": 0.3286851406097412, | |
| "step": 2300, | |
| "token_acc": 0.8939043283462524 | |
| }, | |
| { | |
| "epoch": 1.7597551644988525, | |
| "eval_loss": 0.4778790771961212, | |
| "eval_runtime": 7.806, | |
| "eval_samples_per_second": 13.323, | |
| "eval_steps_per_second": 1.665, | |
| "eval_token_acc": 0.8623987436294556, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.7635807192042847, | |
| "grad_norm": 0.5906277894973755, | |
| "learning_rate": 3.969795392474383e-05, | |
| "loss": 0.34573922157287595, | |
| "step": 2305, | |
| "token_acc": 0.8903287053108215 | |
| }, | |
| { | |
| "epoch": 1.767406273909717, | |
| "grad_norm": 0.7397768497467041, | |
| "learning_rate": 3.9491669660022345e-05, | |
| "loss": 0.35153021812438967, | |
| "step": 2310, | |
| "token_acc": 0.8872886896133423 | |
| }, | |
| { | |
| "epoch": 1.7712318286151492, | |
| "grad_norm": 0.7996999025344849, | |
| "learning_rate": 3.928557235735989e-05, | |
| "loss": 0.31516518592834475, | |
| "step": 2315, | |
| "token_acc": 0.8970757722854614 | |
| }, | |
| { | |
| "epoch": 1.7750573833205814, | |
| "grad_norm": 0.6419305205345154, | |
| "learning_rate": 3.907966568359742e-05, | |
| "loss": 0.3054972171783447, | |
| "step": 2320, | |
| "token_acc": 0.8993676900863647 | |
| }, | |
| { | |
| "epoch": 1.7788829380260138, | |
| "grad_norm": 0.6739971041679382, | |
| "learning_rate": 3.887395330218429e-05, | |
| "loss": 0.3448510646820068, | |
| "step": 2325, | |
| "token_acc": 0.8888943195343018 | |
| }, | |
| { | |
| "epoch": 1.782708492731446, | |
| "grad_norm": 0.7799039483070374, | |
| "learning_rate": 3.866843887311297e-05, | |
| "loss": 0.31788105964660646, | |
| "step": 2330, | |
| "token_acc": 0.8954451680183411 | |
| }, | |
| { | |
| "epoch": 1.7865340474368785, | |
| "grad_norm": 0.7341748476028442, | |
| "learning_rate": 3.846312605285408e-05, | |
| "loss": 0.34601006507873533, | |
| "step": 2335, | |
| "token_acc": 0.8898206353187561 | |
| }, | |
| { | |
| "epoch": 1.7903596021423107, | |
| "grad_norm": 0.7024774551391602, | |
| "learning_rate": 3.8258018494291234e-05, | |
| "loss": 0.32241551876068114, | |
| "step": 2340, | |
| "token_acc": 0.89708012342453 | |
| }, | |
| { | |
| "epoch": 1.794185156847743, | |
| "grad_norm": 0.7515860795974731, | |
| "learning_rate": 3.8053119846656026e-05, | |
| "loss": 0.30928614139556887, | |
| "step": 2345, | |
| "token_acc": 0.8996888995170593 | |
| }, | |
| { | |
| "epoch": 1.7980107115531752, | |
| "grad_norm": 0.8652954697608948, | |
| "learning_rate": 3.78484337554632e-05, | |
| "loss": 0.30088629722595217, | |
| "step": 2350, | |
| "token_acc": 0.9041286110877991 | |
| }, | |
| { | |
| "epoch": 1.7980107115531752, | |
| "eval_loss": 0.47428014874458313, | |
| "eval_runtime": 7.8145, | |
| "eval_samples_per_second": 13.309, | |
| "eval_steps_per_second": 1.664, | |
| "eval_token_acc": 0.8631907105445862, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.8018362662586074, | |
| "grad_norm": 0.9508410692214966, | |
| "learning_rate": 3.764396386244577e-05, | |
| "loss": 0.34288840293884276, | |
| "step": 2355, | |
| "token_acc": 0.8890052437782288 | |
| }, | |
| { | |
| "epoch": 1.8056618209640398, | |
| "grad_norm": 0.775829017162323, | |
| "learning_rate": 3.743971380549008e-05, | |
| "loss": 0.30949153900146487, | |
| "step": 2360, | |
| "token_acc": 0.8984510898590088 | |
| }, | |
| { | |
| "epoch": 1.809487375669472, | |
| "grad_norm": 0.6938086152076721, | |
| "learning_rate": 3.723568721857133e-05, | |
| "loss": 0.28354833126068113, | |
| "step": 2365, | |
| "token_acc": 0.9054216146469116 | |
| }, | |
| { | |
| "epoch": 1.8133129303749045, | |
| "grad_norm": 0.6911359429359436, | |
| "learning_rate": 3.703188773168869e-05, | |
| "loss": 0.2959973096847534, | |
| "step": 2370, | |
| "token_acc": 0.9038095474243164 | |
| }, | |
| { | |
| "epoch": 1.8171384850803367, | |
| "grad_norm": 60.64387130737305, | |
| "learning_rate": 3.682831897080087e-05, | |
| "loss": 0.40934906005859373, | |
| "step": 2375, | |
| "token_acc": 0.8823349475860596 | |
| }, | |
| { | |
| "epoch": 1.820964039785769, | |
| "grad_norm": 0.7439799308776855, | |
| "learning_rate": 3.6624984557761504e-05, | |
| "loss": 0.2931365489959717, | |
| "step": 2380, | |
| "token_acc": 0.9051112532615662 | |
| }, | |
| { | |
| "epoch": 1.8247895944912012, | |
| "grad_norm": 0.6623691320419312, | |
| "learning_rate": 3.642188811025481e-05, | |
| "loss": 0.3292604207992554, | |
| "step": 2385, | |
| "token_acc": 0.8928682208061218 | |
| }, | |
| { | |
| "epoch": 1.8286151491966334, | |
| "grad_norm": 0.6264249086380005, | |
| "learning_rate": 3.621903324173114e-05, | |
| "loss": 0.265956974029541, | |
| "step": 2390, | |
| "token_acc": 0.9118374586105347 | |
| }, | |
| { | |
| "epoch": 1.8324407039020658, | |
| "grad_norm": 0.8278756737709045, | |
| "learning_rate": 3.6016423561342706e-05, | |
| "loss": 0.29644384384155276, | |
| "step": 2395, | |
| "token_acc": 0.9024685025215149 | |
| }, | |
| { | |
| "epoch": 1.836266258607498, | |
| "grad_norm": 0.810718297958374, | |
| "learning_rate": 3.581406267387941e-05, | |
| "loss": 0.281774640083313, | |
| "step": 2400, | |
| "token_acc": 0.9071557521820068 | |
| }, | |
| { | |
| "epoch": 1.836266258607498, | |
| "eval_loss": 0.47047871351242065, | |
| "eval_runtime": 7.868, | |
| "eval_samples_per_second": 13.218, | |
| "eval_steps_per_second": 1.652, | |
| "eval_token_acc": 0.8643736243247986, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.8400918133129305, | |
| "grad_norm": 0.7788925170898438, | |
| "learning_rate": 3.56119541797047e-05, | |
| "loss": 0.3004364013671875, | |
| "step": 2405, | |
| "token_acc": 0.8989213705062866 | |
| }, | |
| { | |
| "epoch": 1.8439173680183627, | |
| "grad_norm": 0.7350240349769592, | |
| "learning_rate": 3.5410101674691434e-05, | |
| "loss": 0.3446574449539185, | |
| "step": 2410, | |
| "token_acc": 0.8929014801979065 | |
| }, | |
| { | |
| "epoch": 1.847742922723795, | |
| "grad_norm": 0.7535839080810547, | |
| "learning_rate": 3.520850875015801e-05, | |
| "loss": 0.31823389530181884, | |
| "step": 2415, | |
| "token_acc": 0.896795928478241 | |
| }, | |
| { | |
| "epoch": 1.8515684774292271, | |
| "grad_norm": 0.8284432291984558, | |
| "learning_rate": 3.5007178992804416e-05, | |
| "loss": 0.30584444999694826, | |
| "step": 2420, | |
| "token_acc": 0.9038248658180237 | |
| }, | |
| { | |
| "epoch": 1.8553940321346594, | |
| "grad_norm": 0.8060945272445679, | |
| "learning_rate": 3.480611598464844e-05, | |
| "loss": 0.2657127857208252, | |
| "step": 2425, | |
| "token_acc": 0.9115975499153137 | |
| }, | |
| { | |
| "epoch": 1.8592195868400918, | |
| "grad_norm": 0.6967042684555054, | |
| "learning_rate": 3.4605323302961854e-05, | |
| "loss": 0.30145883560180664, | |
| "step": 2430, | |
| "token_acc": 0.9007070064544678 | |
| }, | |
| { | |
| "epoch": 1.8630451415455243, | |
| "grad_norm": 0.827389657497406, | |
| "learning_rate": 3.4404804520206915e-05, | |
| "loss": 0.3457145929336548, | |
| "step": 2435, | |
| "token_acc": 0.889440655708313 | |
| }, | |
| { | |
| "epoch": 1.8668706962509565, | |
| "grad_norm": 0.7290979027748108, | |
| "learning_rate": 3.42045632039727e-05, | |
| "loss": 0.29812381267547605, | |
| "step": 2440, | |
| "token_acc": 0.9029287695884705 | |
| }, | |
| { | |
| "epoch": 1.8706962509563887, | |
| "grad_norm": 0.8037905693054199, | |
| "learning_rate": 3.400460291691164e-05, | |
| "loss": 0.32248711585998535, | |
| "step": 2445, | |
| "token_acc": 0.8946207165718079 | |
| }, | |
| { | |
| "epoch": 1.874521805661821, | |
| "grad_norm": 0.7474591732025146, | |
| "learning_rate": 3.380492721667618e-05, | |
| "loss": 0.3022623062133789, | |
| "step": 2450, | |
| "token_acc": 0.9007507562637329 | |
| }, | |
| { | |
| "epoch": 1.874521805661821, | |
| "eval_loss": 0.46530866622924805, | |
| "eval_runtime": 7.8984, | |
| "eval_samples_per_second": 13.167, | |
| "eval_steps_per_second": 1.646, | |
| "eval_token_acc": 0.8647946715354919, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.8783473603672531, | |
| "grad_norm": 0.71452397108078, | |
| "learning_rate": 3.3605539655855445e-05, | |
| "loss": 0.28342552185058595, | |
| "step": 2455, | |
| "token_acc": 0.9065305590629578 | |
| }, | |
| { | |
| "epoch": 1.8821729150726856, | |
| "grad_norm": 0.7897852659225464, | |
| "learning_rate": 3.3406443781912014e-05, | |
| "loss": 0.2861522912979126, | |
| "step": 2460, | |
| "token_acc": 0.9051787257194519 | |
| }, | |
| { | |
| "epoch": 1.8859984697781178, | |
| "grad_norm": 0.7614904642105103, | |
| "learning_rate": 3.3207643137118874e-05, | |
| "loss": 0.2704183578491211, | |
| "step": 2465, | |
| "token_acc": 0.911378026008606 | |
| }, | |
| { | |
| "epoch": 1.8898240244835502, | |
| "grad_norm": 0.6754797697067261, | |
| "learning_rate": 3.3009141258496344e-05, | |
| "loss": 0.31130855083465575, | |
| "step": 2470, | |
| "token_acc": 0.8980752229690552 | |
| }, | |
| { | |
| "epoch": 1.8936495791889825, | |
| "grad_norm": 0.7454941272735596, | |
| "learning_rate": 3.2810941677749164e-05, | |
| "loss": 0.34280953407287595, | |
| "step": 2475, | |
| "token_acc": 0.8920162320137024 | |
| }, | |
| { | |
| "epoch": 1.8974751338944147, | |
| "grad_norm": 0.7202689051628113, | |
| "learning_rate": 3.261304792120361e-05, | |
| "loss": 0.2786979675292969, | |
| "step": 2480, | |
| "token_acc": 0.907993495464325 | |
| }, | |
| { | |
| "epoch": 1.901300688599847, | |
| "grad_norm": 0.7289252281188965, | |
| "learning_rate": 3.2415463509744855e-05, | |
| "loss": 0.28704142570495605, | |
| "step": 2485, | |
| "token_acc": 0.9051684141159058 | |
| }, | |
| { | |
| "epoch": 1.9051262433052791, | |
| "grad_norm": 0.7389020919799805, | |
| "learning_rate": 3.2218191958754226e-05, | |
| "loss": 0.3317502498626709, | |
| "step": 2490, | |
| "token_acc": 0.8912999629974365 | |
| }, | |
| { | |
| "epoch": 1.9089517980107116, | |
| "grad_norm": 0.7187902331352234, | |
| "learning_rate": 3.202123677804672e-05, | |
| "loss": 0.32085230350494387, | |
| "step": 2495, | |
| "token_acc": 0.8973221182823181 | |
| }, | |
| { | |
| "epoch": 1.9127773527161438, | |
| "grad_norm": 0.780617892742157, | |
| "learning_rate": 3.18246014718085e-05, | |
| "loss": 0.2799449682235718, | |
| "step": 2500, | |
| "token_acc": 0.9089812636375427 | |
| }, | |
| { | |
| "epoch": 1.9127773527161438, | |
| "eval_loss": 0.4558640122413635, | |
| "eval_runtime": 7.6268, | |
| "eval_samples_per_second": 13.636, | |
| "eval_steps_per_second": 1.705, | |
| "eval_token_acc": 0.8680527806282043, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.9166029074215762, | |
| "grad_norm": 0.7578943967819214, | |
| "learning_rate": 3.162828953853469e-05, | |
| "loss": 0.283012843132019, | |
| "step": 2505, | |
| "token_acc": 0.908361554145813 | |
| }, | |
| { | |
| "epoch": 1.9204284621270085, | |
| "grad_norm": 0.7080029249191284, | |
| "learning_rate": 3.14323044709669e-05, | |
| "loss": 0.26364171504974365, | |
| "step": 2510, | |
| "token_acc": 0.9134095311164856 | |
| }, | |
| { | |
| "epoch": 1.9242540168324407, | |
| "grad_norm": 0.7052859663963318, | |
| "learning_rate": 3.12366497560313e-05, | |
| "loss": 0.28186535835266113, | |
| "step": 2515, | |
| "token_acc": 0.9079092741012573 | |
| }, | |
| { | |
| "epoch": 1.928079571537873, | |
| "grad_norm": 0.722137451171875, | |
| "learning_rate": 3.104132887477647e-05, | |
| "loss": 0.2929178953170776, | |
| "step": 2520, | |
| "token_acc": 0.9022585153579712 | |
| }, | |
| { | |
| "epoch": 1.9319051262433051, | |
| "grad_norm": 0.6590465903282166, | |
| "learning_rate": 3.084634530231145e-05, | |
| "loss": 0.29388132095336916, | |
| "step": 2525, | |
| "token_acc": 0.9019988179206848 | |
| }, | |
| { | |
| "epoch": 1.9357306809487376, | |
| "grad_norm": 0.7757251858711243, | |
| "learning_rate": 3.065170250774401e-05, | |
| "loss": 0.3049909591674805, | |
| "step": 2530, | |
| "token_acc": 0.8986476063728333 | |
| }, | |
| { | |
| "epoch": 1.93955623565417, | |
| "grad_norm": 0.7149041295051575, | |
| "learning_rate": 3.0457403954118856e-05, | |
| "loss": 0.2536777019500732, | |
| "step": 2535, | |
| "token_acc": 0.9141318202018738 | |
| }, | |
| { | |
| "epoch": 1.9433817903596022, | |
| "grad_norm": 0.6480096578598022, | |
| "learning_rate": 3.026345309835602e-05, | |
| "loss": 0.3146909952163696, | |
| "step": 2540, | |
| "token_acc": 0.8978093266487122 | |
| }, | |
| { | |
| "epoch": 1.9472073450650345, | |
| "grad_norm": 0.7162771224975586, | |
| "learning_rate": 3.0069853391189352e-05, | |
| "loss": 0.29620161056518557, | |
| "step": 2545, | |
| "token_acc": 0.9032965302467346 | |
| }, | |
| { | |
| "epoch": 1.9510328997704667, | |
| "grad_norm": 0.6839264631271362, | |
| "learning_rate": 2.9876608277105145e-05, | |
| "loss": 0.3268457889556885, | |
| "step": 2550, | |
| "token_acc": 0.8923251032829285 | |
| }, | |
| { | |
| "epoch": 1.9510328997704667, | |
| "eval_loss": 0.4463500678539276, | |
| "eval_runtime": 7.6674, | |
| "eval_samples_per_second": 13.564, | |
| "eval_steps_per_second": 1.695, | |
| "eval_token_acc": 0.8691655397415161, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.954858454475899, | |
| "grad_norm": 0.7191382050514221, | |
| "learning_rate": 2.9683721194280877e-05, | |
| "loss": 0.2873558044433594, | |
| "step": 2555, | |
| "token_acc": 0.9027012586593628 | |
| }, | |
| { | |
| "epoch": 1.9586840091813313, | |
| "grad_norm": 0.7788121700286865, | |
| "learning_rate": 2.9491195574523945e-05, | |
| "loss": 0.29071290493011476, | |
| "step": 2560, | |
| "token_acc": 0.9054592251777649 | |
| }, | |
| { | |
| "epoch": 1.9625095638867636, | |
| "grad_norm": 0.6830841302871704, | |
| "learning_rate": 2.9299034843210726e-05, | |
| "loss": 0.2975457668304443, | |
| "step": 2565, | |
| "token_acc": 0.9023709297180176 | |
| }, | |
| { | |
| "epoch": 1.966335118592196, | |
| "grad_norm": 0.8139908909797668, | |
| "learning_rate": 2.9107242419225577e-05, | |
| "loss": 0.2521679401397705, | |
| "step": 2570, | |
| "token_acc": 0.9153000116348267 | |
| }, | |
| { | |
| "epoch": 1.9701606732976282, | |
| "grad_norm": 0.6574170589447021, | |
| "learning_rate": 2.8915821714899917e-05, | |
| "loss": 0.268428373336792, | |
| "step": 2575, | |
| "token_acc": 0.9112088680267334 | |
| }, | |
| { | |
| "epoch": 1.9739862280030605, | |
| "grad_norm": 0.7240482568740845, | |
| "learning_rate": 2.8724776135951747e-05, | |
| "loss": 0.2789809226989746, | |
| "step": 2580, | |
| "token_acc": 0.9081910848617554 | |
| }, | |
| { | |
| "epoch": 1.9778117827084927, | |
| "grad_norm": 0.675998330116272, | |
| "learning_rate": 2.85341090814248e-05, | |
| "loss": 0.300505256652832, | |
| "step": 2585, | |
| "token_acc": 0.9032467007637024 | |
| }, | |
| { | |
| "epoch": 1.981637337413925, | |
| "grad_norm": 0.7282765507698059, | |
| "learning_rate": 2.8343823943628257e-05, | |
| "loss": 0.2605840444564819, | |
| "step": 2590, | |
| "token_acc": 0.9125819206237793 | |
| }, | |
| { | |
| "epoch": 1.9854628921193573, | |
| "grad_norm": 0.8446104526519775, | |
| "learning_rate": 2.8153924108076234e-05, | |
| "loss": 0.3036641120910645, | |
| "step": 2595, | |
| "token_acc": 0.9020313024520874 | |
| }, | |
| { | |
| "epoch": 1.9892884468247896, | |
| "grad_norm": 0.8688914179801941, | |
| "learning_rate": 2.7964412953427667e-05, | |
| "loss": 0.301717472076416, | |
| "step": 2600, | |
| "token_acc": 0.90234375 | |
| }, | |
| { | |
| "epoch": 1.9892884468247896, | |
| "eval_loss": 0.44557470083236694, | |
| "eval_runtime": 7.7719, | |
| "eval_samples_per_second": 13.382, | |
| "eval_steps_per_second": 1.673, | |
| "eval_token_acc": 0.869877278804779, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.993114001530222, | |
| "grad_norm": 0.6388227343559265, | |
| "learning_rate": 2.7775293851426232e-05, | |
| "loss": 0.28205983638763427, | |
| "step": 2605, | |
| "token_acc": 0.9057275056838989 | |
| }, | |
| { | |
| "epoch": 1.9969395562356542, | |
| "grad_norm": 0.6498620510101318, | |
| "learning_rate": 2.7586570166840153e-05, | |
| "loss": 0.28784162998199464, | |
| "step": 2610, | |
| "token_acc": 0.9042630791664124 | |
| }, | |
| { | |
| "epoch": 2.0007651109410864, | |
| "grad_norm": 0.46216583251953125, | |
| "learning_rate": 2.7398245257402567e-05, | |
| "loss": 0.24226248264312744, | |
| "step": 2615, | |
| "token_acc": 0.9181912541389465 | |
| }, | |
| { | |
| "epoch": 2.0045906656465187, | |
| "grad_norm": 0.4526701867580414, | |
| "learning_rate": 2.721032247375165e-05, | |
| "loss": 0.13410005569458008, | |
| "step": 2620, | |
| "token_acc": 0.9554323554039001 | |
| }, | |
| { | |
| "epoch": 2.008416220351951, | |
| "grad_norm": 0.5027770400047302, | |
| "learning_rate": 2.7022805159371023e-05, | |
| "loss": 0.14986848831176758, | |
| "step": 2625, | |
| "token_acc": 0.9495237469673157 | |
| }, | |
| { | |
| "epoch": 2.012241775057383, | |
| "grad_norm": 0.6318019032478333, | |
| "learning_rate": 2.683569665053033e-05, | |
| "loss": 0.13008542060852052, | |
| "step": 2630, | |
| "token_acc": 0.9539133906364441 | |
| }, | |
| { | |
| "epoch": 2.0160673297628158, | |
| "grad_norm": 0.738571286201477, | |
| "learning_rate": 2.664900027622577e-05, | |
| "loss": 0.15502784252166749, | |
| "step": 2635, | |
| "token_acc": 0.9469853043556213 | |
| }, | |
| { | |
| "epoch": 2.019892884468248, | |
| "grad_norm": 0.6892253160476685, | |
| "learning_rate": 2.646271935812098e-05, | |
| "loss": 0.13881022930145265, | |
| "step": 2640, | |
| "token_acc": 0.9516469240188599 | |
| }, | |
| { | |
| "epoch": 2.02371843917368, | |
| "grad_norm": 0.6470181941986084, | |
| "learning_rate": 2.6276857210487858e-05, | |
| "loss": 0.1207735538482666, | |
| "step": 2645, | |
| "token_acc": 0.9576534032821655 | |
| }, | |
| { | |
| "epoch": 2.0275439938791124, | |
| "grad_norm": 0.6596648097038269, | |
| "learning_rate": 2.6091417140147634e-05, | |
| "loss": 0.11292877197265624, | |
| "step": 2650, | |
| "token_acc": 0.9626390337944031 | |
| }, | |
| { | |
| "epoch": 2.0275439938791124, | |
| "eval_loss": 0.5168122053146362, | |
| "eval_runtime": 8.1996, | |
| "eval_samples_per_second": 12.684, | |
| "eval_steps_per_second": 1.585, | |
| "eval_token_acc": 0.8695364594459534, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.0313695485845447, | |
| "grad_norm": 0.577893853187561, | |
| "learning_rate": 2.5906402446412027e-05, | |
| "loss": 0.14242198467254638, | |
| "step": 2655, | |
| "token_acc": 0.9518451690673828 | |
| }, | |
| { | |
| "epoch": 2.035195103289977, | |
| "grad_norm": 0.6954317688941956, | |
| "learning_rate": 2.5721816421024515e-05, | |
| "loss": 0.12017552852630616, | |
| "step": 2660, | |
| "token_acc": 0.9579612016677856 | |
| }, | |
| { | |
| "epoch": 2.0390206579954095, | |
| "grad_norm": 0.5604422688484192, | |
| "learning_rate": 2.553766234810181e-05, | |
| "loss": 0.12801860570907592, | |
| "step": 2665, | |
| "token_acc": 0.9555345773696899 | |
| }, | |
| { | |
| "epoch": 2.0428462127008418, | |
| "grad_norm": 0.6638826727867126, | |
| "learning_rate": 2.535394350407548e-05, | |
| "loss": 0.1116684079170227, | |
| "step": 2670, | |
| "token_acc": 0.960515022277832 | |
| }, | |
| { | |
| "epoch": 2.046671767406274, | |
| "grad_norm": 0.5910780429840088, | |
| "learning_rate": 2.5170663157633477e-05, | |
| "loss": 0.13454906940460204, | |
| "step": 2675, | |
| "token_acc": 0.9548289775848389 | |
| }, | |
| { | |
| "epoch": 2.050497322111706, | |
| "grad_norm": 0.6535590291023254, | |
| "learning_rate": 2.4987824569662167e-05, | |
| "loss": 0.12083430290222168, | |
| "step": 2680, | |
| "token_acc": 0.9585192799568176 | |
| }, | |
| { | |
| "epoch": 2.0543228768171384, | |
| "grad_norm": 0.5576914548873901, | |
| "learning_rate": 2.4805430993188228e-05, | |
| "loss": 0.12852833271026612, | |
| "step": 2685, | |
| "token_acc": 0.9565430879592896 | |
| }, | |
| { | |
| "epoch": 2.0581484315225707, | |
| "grad_norm": 0.57133549451828, | |
| "learning_rate": 2.4623485673320772e-05, | |
| "loss": 0.13395898342132567, | |
| "step": 2690, | |
| "token_acc": 0.9541014432907104 | |
| }, | |
| { | |
| "epoch": 2.061973986228003, | |
| "grad_norm": 0.824409008026123, | |
| "learning_rate": 2.4441991847193636e-05, | |
| "loss": 0.1304774522781372, | |
| "step": 2695, | |
| "token_acc": 0.9565969109535217 | |
| }, | |
| { | |
| "epoch": 2.0657995409334355, | |
| "grad_norm": 0.6546271443367004, | |
| "learning_rate": 2.4260952743907756e-05, | |
| "loss": 0.13317997455596925, | |
| "step": 2700, | |
| "token_acc": 0.9548870325088501 | |
| }, | |
| { | |
| "epoch": 2.0657995409334355, | |
| "eval_loss": 0.5218855142593384, | |
| "eval_runtime": 8.6536, | |
| "eval_samples_per_second": 12.018, | |
| "eval_steps_per_second": 1.502, | |
| "eval_token_acc": 0.8702181577682495, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.0696250956388678, | |
| "grad_norm": 0.49882644414901733, | |
| "learning_rate": 2.4080371584473748e-05, | |
| "loss": 0.10250062942504883, | |
| "step": 2705, | |
| "token_acc": 0.9647969007492065 | |
| }, | |
| { | |
| "epoch": 2.0734506503443, | |
| "grad_norm": 0.6716576814651489, | |
| "learning_rate": 2.390025158175458e-05, | |
| "loss": 0.12553690671920775, | |
| "step": 2710, | |
| "token_acc": 0.9559978246688843 | |
| }, | |
| { | |
| "epoch": 2.077276205049732, | |
| "grad_norm": 0.630893349647522, | |
| "learning_rate": 2.3720595940408413e-05, | |
| "loss": 0.1133840560913086, | |
| "step": 2715, | |
| "token_acc": 0.960378885269165 | |
| }, | |
| { | |
| "epoch": 2.0811017597551644, | |
| "grad_norm": 0.6294081211090088, | |
| "learning_rate": 2.3541407856831598e-05, | |
| "loss": 0.11989744901657104, | |
| "step": 2720, | |
| "token_acc": 0.9581653475761414 | |
| }, | |
| { | |
| "epoch": 2.0849273144605966, | |
| "grad_norm": 0.6295720934867859, | |
| "learning_rate": 2.3362690519101728e-05, | |
| "loss": 0.10788016319274903, | |
| "step": 2725, | |
| "token_acc": 0.9615026116371155 | |
| }, | |
| { | |
| "epoch": 2.088752869166029, | |
| "grad_norm": 0.6127709150314331, | |
| "learning_rate": 2.318444710692109e-05, | |
| "loss": 0.18858987092971802, | |
| "step": 2730, | |
| "token_acc": 0.9527615308761597 | |
| }, | |
| { | |
| "epoch": 2.0925784238714615, | |
| "grad_norm": 0.6840873956680298, | |
| "learning_rate": 2.3006680791559943e-05, | |
| "loss": 0.13058118820190429, | |
| "step": 2735, | |
| "token_acc": 0.9559764862060547 | |
| }, | |
| { | |
| "epoch": 2.0964039785768938, | |
| "grad_norm": 0.6548556089401245, | |
| "learning_rate": 2.2829394735800075e-05, | |
| "loss": 0.12637789249420167, | |
| "step": 2740, | |
| "token_acc": 0.9560421705245972 | |
| }, | |
| { | |
| "epoch": 2.100229533282326, | |
| "grad_norm": 0.6251739263534546, | |
| "learning_rate": 2.2652592093878666e-05, | |
| "loss": 0.1079249382019043, | |
| "step": 2745, | |
| "token_acc": 0.9616904854774475 | |
| }, | |
| { | |
| "epoch": 2.104055087987758, | |
| "grad_norm": 0.5070903301239014, | |
| "learning_rate": 2.2476276011432056e-05, | |
| "loss": 0.10909421443939209, | |
| "step": 2750, | |
| "token_acc": 0.9607372879981995 | |
| }, | |
| { | |
| "epoch": 2.104055087987758, | |
| "eval_loss": 0.5240176916122437, | |
| "eval_runtime": 7.9542, | |
| "eval_samples_per_second": 13.075, | |
| "eval_steps_per_second": 1.634, | |
| "eval_token_acc": 0.8699575066566467, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.1078806426931904, | |
| "grad_norm": 0.5303053259849548, | |
| "learning_rate": 2.230044962543989e-05, | |
| "loss": 0.10541150569915772, | |
| "step": 2755, | |
| "token_acc": 0.9636523723602295 | |
| }, | |
| { | |
| "epoch": 2.1117061973986226, | |
| "grad_norm": 0.6467751264572144, | |
| "learning_rate": 2.2125116064169125e-05, | |
| "loss": 0.11249511241912842, | |
| "step": 2760, | |
| "token_acc": 0.9602897763252258 | |
| }, | |
| { | |
| "epoch": 2.1155317521040553, | |
| "grad_norm": 0.6789493560791016, | |
| "learning_rate": 2.195027844711856e-05, | |
| "loss": 0.13851575851440429, | |
| "step": 2765, | |
| "token_acc": 0.9524257183074951 | |
| }, | |
| { | |
| "epoch": 2.1193573068094875, | |
| "grad_norm": 0.5706949234008789, | |
| "learning_rate": 2.177593988496323e-05, | |
| "loss": 0.0956031322479248, | |
| "step": 2770, | |
| "token_acc": 0.9663383960723877 | |
| }, | |
| { | |
| "epoch": 2.1231828615149198, | |
| "grad_norm": 0.5609292984008789, | |
| "learning_rate": 2.1602103479499093e-05, | |
| "loss": 0.11319952011108399, | |
| "step": 2775, | |
| "token_acc": 0.9608060717582703 | |
| }, | |
| { | |
| "epoch": 2.127008416220352, | |
| "grad_norm": 0.639937162399292, | |
| "learning_rate": 2.1428772323587827e-05, | |
| "loss": 0.13543224334716797, | |
| "step": 2780, | |
| "token_acc": 0.9520896077156067 | |
| }, | |
| { | |
| "epoch": 2.130833970925784, | |
| "grad_norm": 0.6833350658416748, | |
| "learning_rate": 2.1255949501101847e-05, | |
| "loss": 0.14142370223999023, | |
| "step": 2785, | |
| "token_acc": 0.9528786540031433 | |
| }, | |
| { | |
| "epoch": 2.1346595256312164, | |
| "grad_norm": 0.5408839583396912, | |
| "learning_rate": 2.1083638086869327e-05, | |
| "loss": 0.12588857412338256, | |
| "step": 2790, | |
| "token_acc": 0.9563543200492859 | |
| }, | |
| { | |
| "epoch": 2.1384850803366486, | |
| "grad_norm": 0.5438815355300903, | |
| "learning_rate": 2.0911841146619676e-05, | |
| "loss": 0.12137541770935059, | |
| "step": 2795, | |
| "token_acc": 0.958185613155365 | |
| }, | |
| { | |
| "epoch": 2.1423106350420813, | |
| "grad_norm": 0.6048544645309448, | |
| "learning_rate": 2.074056173692881e-05, | |
| "loss": 0.10157194137573242, | |
| "step": 2800, | |
| "token_acc": 0.9674689769744873 | |
| }, | |
| { | |
| "epoch": 2.1423106350420813, | |
| "eval_loss": 0.5312597751617432, | |
| "eval_runtime": 9.0822, | |
| "eval_samples_per_second": 11.451, | |
| "eval_steps_per_second": 1.431, | |
| "eval_token_acc": 0.8708697557449341, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.1461361897475135, | |
| "grad_norm": 0.689985990524292, | |
| "learning_rate": 2.05698029051649e-05, | |
| "loss": 0.12691206932067872, | |
| "step": 2805, | |
| "token_acc": 0.9552291035652161 | |
| }, | |
| { | |
| "epoch": 2.1499617444529457, | |
| "grad_norm": 0.628235936164856, | |
| "learning_rate": 2.0399567689434007e-05, | |
| "loss": 0.12962342500686647, | |
| "step": 2810, | |
| "token_acc": 0.9563965201377869 | |
| }, | |
| { | |
| "epoch": 2.153787299158378, | |
| "grad_norm": 0.583711564540863, | |
| "learning_rate": 2.0229859118526244e-05, | |
| "loss": 0.11104552745819092, | |
| "step": 2815, | |
| "token_acc": 0.9605592489242554 | |
| }, | |
| { | |
| "epoch": 2.15761285386381, | |
| "grad_norm": 0.749139666557312, | |
| "learning_rate": 2.0060680211861722e-05, | |
| "loss": 0.11064702272415161, | |
| "step": 2820, | |
| "token_acc": 0.9618842601776123 | |
| }, | |
| { | |
| "epoch": 2.1614384085692424, | |
| "grad_norm": 0.6225452423095703, | |
| "learning_rate": 1.989203397943682e-05, | |
| "loss": 0.1368303894996643, | |
| "step": 2825, | |
| "token_acc": 0.9523999691009521 | |
| }, | |
| { | |
| "epoch": 2.1652639632746746, | |
| "grad_norm": 0.7548052072525024, | |
| "learning_rate": 1.9723923421770744e-05, | |
| "loss": 0.12567458152770997, | |
| "step": 2830, | |
| "token_acc": 0.9570740461349487 | |
| }, | |
| { | |
| "epoch": 2.1690895179801073, | |
| "grad_norm": 0.6393832564353943, | |
| "learning_rate": 1.9556351529852086e-05, | |
| "loss": 0.12716997861862184, | |
| "step": 2835, | |
| "token_acc": 0.9550226926803589 | |
| }, | |
| { | |
| "epoch": 2.1729150726855395, | |
| "grad_norm": 0.5963457822799683, | |
| "learning_rate": 1.9389321285085572e-05, | |
| "loss": 0.12617888450622558, | |
| "step": 2840, | |
| "token_acc": 0.9543135166168213 | |
| }, | |
| { | |
| "epoch": 2.1767406273909717, | |
| "grad_norm": 0.7114848494529724, | |
| "learning_rate": 1.9222835659239086e-05, | |
| "loss": 0.12233096361160278, | |
| "step": 2845, | |
| "token_acc": 0.9570853114128113 | |
| }, | |
| { | |
| "epoch": 2.180566182096404, | |
| "grad_norm": 0.6505621671676636, | |
| "learning_rate": 1.905689761439075e-05, | |
| "loss": 0.13814208507537842, | |
| "step": 2850, | |
| "token_acc": 0.9528710246086121 | |
| }, | |
| { | |
| "epoch": 2.180566182096404, | |
| "eval_loss": 0.5228633284568787, | |
| "eval_runtime": 7.9764, | |
| "eval_samples_per_second": 13.038, | |
| "eval_steps_per_second": 1.63, | |
| "eval_token_acc": 0.8718922734260559, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.184391736801836, | |
| "grad_norm": 0.6201128959655762, | |
| "learning_rate": 1.8891510102876235e-05, | |
| "loss": 0.12893006801605225, | |
| "step": 2855, | |
| "token_acc": 0.9550007581710815 | |
| }, | |
| { | |
| "epoch": 2.1882172915072684, | |
| "grad_norm": 0.6673233509063721, | |
| "learning_rate": 1.8726676067236245e-05, | |
| "loss": 0.10436077117919922, | |
| "step": 2860, | |
| "token_acc": 0.9645984172821045 | |
| }, | |
| { | |
| "epoch": 2.1920428462127006, | |
| "grad_norm": 0.7207808494567871, | |
| "learning_rate": 1.8562398440164135e-05, | |
| "loss": 0.14118155241012573, | |
| "step": 2865, | |
| "token_acc": 0.9522634148597717 | |
| }, | |
| { | |
| "epoch": 2.1958684009181333, | |
| "grad_norm": 0.7116675972938538, | |
| "learning_rate": 1.8398680144453794e-05, | |
| "loss": 0.11731832027435303, | |
| "step": 2870, | |
| "token_acc": 0.9581528902053833 | |
| }, | |
| { | |
| "epoch": 2.1996939556235655, | |
| "grad_norm": 0.5616986155509949, | |
| "learning_rate": 1.823552409294752e-05, | |
| "loss": 0.10328438282012939, | |
| "step": 2875, | |
| "token_acc": 0.9635567665100098 | |
| }, | |
| { | |
| "epoch": 2.2035195103289977, | |
| "grad_norm": 0.7303850650787354, | |
| "learning_rate": 1.8072933188484385e-05, | |
| "loss": 0.12835383415222168, | |
| "step": 2880, | |
| "token_acc": 0.9546709060668945 | |
| }, | |
| { | |
| "epoch": 2.20734506503443, | |
| "grad_norm": 0.684688925743103, | |
| "learning_rate": 1.7910910323848433e-05, | |
| "loss": 0.12336525917053223, | |
| "step": 2885, | |
| "token_acc": 0.9571567177772522 | |
| }, | |
| { | |
| "epoch": 2.211170619739862, | |
| "grad_norm": 0.5825948119163513, | |
| "learning_rate": 1.774945838171721e-05, | |
| "loss": 0.12321670055389404, | |
| "step": 2890, | |
| "token_acc": 0.9568530321121216 | |
| }, | |
| { | |
| "epoch": 2.2149961744452944, | |
| "grad_norm": 0.5380724668502808, | |
| "learning_rate": 1.758858023461059e-05, | |
| "loss": 0.1462591528892517, | |
| "step": 2895, | |
| "token_acc": 0.9520248770713806 | |
| }, | |
| { | |
| "epoch": 2.218821729150727, | |
| "grad_norm": 0.7477222084999084, | |
| "learning_rate": 1.742827874483958e-05, | |
| "loss": 0.1159374475479126, | |
| "step": 2900, | |
| "token_acc": 0.9597063660621643 | |
| }, | |
| { | |
| "epoch": 2.218821729150727, | |
| "eval_loss": 0.5222508311271667, | |
| "eval_runtime": 8.103, | |
| "eval_samples_per_second": 12.835, | |
| "eval_steps_per_second": 1.604, | |
| "eval_token_acc": 0.872032642364502, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.2226472838561593, | |
| "grad_norm": 0.578953206539154, | |
| "learning_rate": 1.7268556764455433e-05, | |
| "loss": 0.1094053030014038, | |
| "step": 2905, | |
| "token_acc": 0.9612045884132385 | |
| }, | |
| { | |
| "epoch": 2.2264728385615915, | |
| "grad_norm": 0.6454194188117981, | |
| "learning_rate": 1.7109417135198875e-05, | |
| "loss": 0.09978902339935303, | |
| "step": 2910, | |
| "token_acc": 0.9648175239562988 | |
| }, | |
| { | |
| "epoch": 2.2302983932670237, | |
| "grad_norm": 0.6507310271263123, | |
| "learning_rate": 1.6950862688449555e-05, | |
| "loss": 0.12494430541992188, | |
| "step": 2915, | |
| "token_acc": 0.9561623930931091 | |
| }, | |
| { | |
| "epoch": 2.234123947972456, | |
| "grad_norm": 0.5561665296554565, | |
| "learning_rate": 1.6792896245175695e-05, | |
| "loss": 0.12519459724426268, | |
| "step": 2920, | |
| "token_acc": 0.957149863243103 | |
| }, | |
| { | |
| "epoch": 2.237949502677888, | |
| "grad_norm": 0.6335827708244324, | |
| "learning_rate": 1.6635520615883854e-05, | |
| "loss": 0.12490168809890748, | |
| "step": 2925, | |
| "token_acc": 0.956473171710968 | |
| }, | |
| { | |
| "epoch": 2.2417750573833204, | |
| "grad_norm": 0.518527090549469, | |
| "learning_rate": 1.6478738600568978e-05, | |
| "loss": 0.11815754175186158, | |
| "step": 2930, | |
| "token_acc": 0.9581723809242249 | |
| }, | |
| { | |
| "epoch": 2.245600612088753, | |
| "grad_norm": 0.7105391025543213, | |
| "learning_rate": 1.6322552988664548e-05, | |
| "loss": 0.1265929937362671, | |
| "step": 2935, | |
| "token_acc": 0.9559991359710693 | |
| }, | |
| { | |
| "epoch": 2.2494261667941853, | |
| "grad_norm": 0.6597128510475159, | |
| "learning_rate": 1.616696655899291e-05, | |
| "loss": 0.10472848415374755, | |
| "step": 2940, | |
| "token_acc": 0.9618938565254211 | |
| }, | |
| { | |
| "epoch": 2.2532517214996175, | |
| "grad_norm": 0.5978385806083679, | |
| "learning_rate": 1.601198207971596e-05, | |
| "loss": 0.11347222328186035, | |
| "step": 2945, | |
| "token_acc": 0.9598453640937805 | |
| }, | |
| { | |
| "epoch": 2.2570772762050497, | |
| "grad_norm": 0.5900003910064697, | |
| "learning_rate": 1.585760230828579e-05, | |
| "loss": 0.1062214732170105, | |
| "step": 2950, | |
| "token_acc": 0.9621166586875916 | |
| }, | |
| { | |
| "epoch": 2.2570772762050497, | |
| "eval_loss": 0.529563307762146, | |
| "eval_runtime": 7.925, | |
| "eval_samples_per_second": 13.123, | |
| "eval_steps_per_second": 1.64, | |
| "eval_token_acc": 0.8730752468109131, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.260902830910482, | |
| "grad_norm": 0.6690232753753662, | |
| "learning_rate": 1.57038299913956e-05, | |
| "loss": 0.12313377857208252, | |
| "step": 2955, | |
| "token_acc": 0.9577500820159912 | |
| }, | |
| { | |
| "epoch": 2.264728385615914, | |
| "grad_norm": 0.6129235625267029, | |
| "learning_rate": 1.555066786493094e-05, | |
| "loss": 0.11549534797668456, | |
| "step": 2960, | |
| "token_acc": 0.9599046111106873 | |
| }, | |
| { | |
| "epoch": 2.268553940321347, | |
| "grad_norm": 0.7165189385414124, | |
| "learning_rate": 1.5398118653920986e-05, | |
| "loss": 0.10570051670074462, | |
| "step": 2965, | |
| "token_acc": 0.9616792798042297 | |
| }, | |
| { | |
| "epoch": 2.272379495026779, | |
| "grad_norm": 0.7057157754898071, | |
| "learning_rate": 1.5246185072490027e-05, | |
| "loss": 0.11799094676971436, | |
| "step": 2970, | |
| "token_acc": 0.9599979519844055 | |
| }, | |
| { | |
| "epoch": 2.2762050497322113, | |
| "grad_norm": 0.6109249591827393, | |
| "learning_rate": 1.5094869823809166e-05, | |
| "loss": 0.12232885360717774, | |
| "step": 2975, | |
| "token_acc": 0.9563071727752686 | |
| }, | |
| { | |
| "epoch": 2.2800306044376435, | |
| "grad_norm": 0.6849731206893921, | |
| "learning_rate": 1.4944175600048294e-05, | |
| "loss": 0.12355262041091919, | |
| "step": 2980, | |
| "token_acc": 0.9571903944015503 | |
| }, | |
| { | |
| "epoch": 2.2838561591430757, | |
| "grad_norm": 0.551438570022583, | |
| "learning_rate": 1.4794105082328158e-05, | |
| "loss": 0.10952677726745605, | |
| "step": 2985, | |
| "token_acc": 0.963117241859436 | |
| }, | |
| { | |
| "epoch": 2.287681713848508, | |
| "grad_norm": 0.7222511172294617, | |
| "learning_rate": 1.4644660940672627e-05, | |
| "loss": 0.1401592493057251, | |
| "step": 2990, | |
| "token_acc": 0.9511399865150452 | |
| }, | |
| { | |
| "epoch": 2.29150726855394, | |
| "grad_norm": 0.7186452150344849, | |
| "learning_rate": 1.449584583396124e-05, | |
| "loss": 0.1436525344848633, | |
| "step": 2995, | |
| "token_acc": 0.9500516653060913 | |
| }, | |
| { | |
| "epoch": 2.295332823259373, | |
| "grad_norm": 0.7001931071281433, | |
| "learning_rate": 1.4347662409881868e-05, | |
| "loss": 0.12311695814132691, | |
| "step": 3000, | |
| "token_acc": 0.9562889337539673 | |
| }, | |
| { | |
| "epoch": 2.295332823259373, | |
| "eval_loss": 0.5203014612197876, | |
| "eval_runtime": 9.1094, | |
| "eval_samples_per_second": 11.417, | |
| "eval_steps_per_second": 1.427, | |
| "eval_token_acc": 0.8741077780723572, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.299158377964805, | |
| "grad_norm": 0.6098562479019165, | |
| "learning_rate": 1.4200113304883611e-05, | |
| "loss": 0.13382203578948976, | |
| "step": 3005, | |
| "token_acc": 0.9541038274765015 | |
| }, | |
| { | |
| "epoch": 2.3029839326702373, | |
| "grad_norm": 0.6261680126190186, | |
| "learning_rate": 1.405320114412989e-05, | |
| "loss": 0.0949715256690979, | |
| "step": 3010, | |
| "token_acc": 0.96717369556427 | |
| }, | |
| { | |
| "epoch": 2.3068094873756695, | |
| "grad_norm": 0.5904762744903564, | |
| "learning_rate": 1.3906928541451775e-05, | |
| "loss": 0.10795230865478515, | |
| "step": 3015, | |
| "token_acc": 0.9621407985687256 | |
| }, | |
| { | |
| "epoch": 2.3106350420811017, | |
| "grad_norm": 0.6883955001831055, | |
| "learning_rate": 1.3761298099301378e-05, | |
| "loss": 0.12801848649978637, | |
| "step": 3020, | |
| "token_acc": 0.9559524059295654 | |
| }, | |
| { | |
| "epoch": 2.314460596786534, | |
| "grad_norm": 0.6712023615837097, | |
| "learning_rate": 1.3616312408705689e-05, | |
| "loss": 0.12017567157745361, | |
| "step": 3025, | |
| "token_acc": 0.9589926600456238 | |
| }, | |
| { | |
| "epoch": 2.318286151491966, | |
| "grad_norm": 0.5586845874786377, | |
| "learning_rate": 1.3471974049220403e-05, | |
| "loss": 0.09736464023590088, | |
| "step": 3030, | |
| "token_acc": 0.9669448733329773 | |
| }, | |
| { | |
| "epoch": 2.322111706197399, | |
| "grad_norm": 0.7812525033950806, | |
| "learning_rate": 1.3328285588884032e-05, | |
| "loss": 0.11876866817474366, | |
| "step": 3035, | |
| "token_acc": 0.9586123824119568 | |
| }, | |
| { | |
| "epoch": 2.325937260902831, | |
| "grad_norm": 0.5611070394515991, | |
| "learning_rate": 1.3185249584172172e-05, | |
| "loss": 0.09341703653335572, | |
| "step": 3040, | |
| "token_acc": 0.9679653644561768 | |
| }, | |
| { | |
| "epoch": 2.3297628156082633, | |
| "grad_norm": 0.7015408873558044, | |
| "learning_rate": 1.304286857995209e-05, | |
| "loss": 0.10733482837677003, | |
| "step": 3045, | |
| "token_acc": 0.9623789191246033 | |
| }, | |
| { | |
| "epoch": 2.3335883703136955, | |
| "grad_norm": 0.6591479778289795, | |
| "learning_rate": 1.2901145109437474e-05, | |
| "loss": 0.11940803527832031, | |
| "step": 3050, | |
| "token_acc": 0.9576820135116577 | |
| }, | |
| { | |
| "epoch": 2.3335883703136955, | |
| "eval_loss": 0.5162126421928406, | |
| "eval_runtime": 7.6579, | |
| "eval_samples_per_second": 13.581, | |
| "eval_steps_per_second": 1.698, | |
| "eval_token_acc": 0.8742882609367371, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.3374139250191277, | |
| "grad_norm": 0.5746079087257385, | |
| "learning_rate": 1.27600816941432e-05, | |
| "loss": 0.12224366664886474, | |
| "step": 3055, | |
| "token_acc": 0.95743727684021 | |
| }, | |
| { | |
| "epoch": 2.34123947972456, | |
| "grad_norm": 0.6104121208190918, | |
| "learning_rate": 1.2619680843840659e-05, | |
| "loss": 0.12069646120071412, | |
| "step": 3060, | |
| "token_acc": 0.9580378532409668 | |
| }, | |
| { | |
| "epoch": 2.345065034429992, | |
| "grad_norm": 0.6610199213027954, | |
| "learning_rate": 1.2479945056512993e-05, | |
| "loss": 0.10805834531784057, | |
| "step": 3065, | |
| "token_acc": 0.9605792760848999 | |
| }, | |
| { | |
| "epoch": 2.348890589135425, | |
| "grad_norm": 0.6179318428039551, | |
| "learning_rate": 1.2340876818310682e-05, | |
| "loss": 0.1121566653251648, | |
| "step": 3070, | |
| "token_acc": 0.9616247415542603 | |
| }, | |
| { | |
| "epoch": 2.352716143840857, | |
| "grad_norm": 0.6470217108726501, | |
| "learning_rate": 1.22024786035073e-05, | |
| "loss": 0.09998181462287903, | |
| "step": 3075, | |
| "token_acc": 0.9644249081611633 | |
| }, | |
| { | |
| "epoch": 2.3565416985462893, | |
| "grad_norm": 0.6415740847587585, | |
| "learning_rate": 1.206475287445552e-05, | |
| "loss": 0.10013750791549683, | |
| "step": 3080, | |
| "token_acc": 0.9655629396438599 | |
| }, | |
| { | |
| "epoch": 2.3603672532517215, | |
| "grad_norm": 0.5981183648109436, | |
| "learning_rate": 1.1927702081543279e-05, | |
| "loss": 0.10144208669662476, | |
| "step": 3085, | |
| "token_acc": 0.965247631072998 | |
| }, | |
| { | |
| "epoch": 2.3641928079571537, | |
| "grad_norm": 0.4865865409374237, | |
| "learning_rate": 1.179132866315018e-05, | |
| "loss": 0.10601496696472168, | |
| "step": 3090, | |
| "token_acc": 0.9624915719032288 | |
| }, | |
| { | |
| "epoch": 2.368018362662586, | |
| "grad_norm": 0.5336887240409851, | |
| "learning_rate": 1.165563504560413e-05, | |
| "loss": 0.11365892887115478, | |
| "step": 3095, | |
| "token_acc": 0.9594626426696777 | |
| }, | |
| { | |
| "epoch": 2.371843917368018, | |
| "grad_norm": 0.4895932376384735, | |
| "learning_rate": 1.1520623643138162e-05, | |
| "loss": 0.11079982519149781, | |
| "step": 3100, | |
| "token_acc": 0.9616596102714539 | |
| }, | |
| { | |
| "epoch": 2.371843917368018, | |
| "eval_loss": 0.5221489667892456, | |
| "eval_runtime": 8.0254, | |
| "eval_samples_per_second": 12.959, | |
| "eval_steps_per_second": 1.62, | |
| "eval_token_acc": 0.8750301003456116, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.375669472073451, | |
| "grad_norm": 0.6662837266921997, | |
| "learning_rate": 1.1386296857847444e-05, | |
| "loss": 0.09341274499893189, | |
| "step": 3105, | |
| "token_acc": 0.9671337008476257 | |
| }, | |
| { | |
| "epoch": 2.379495026778883, | |
| "grad_norm": 0.5832562446594238, | |
| "learning_rate": 1.12526570796466e-05, | |
| "loss": 0.11719496250152588, | |
| "step": 3110, | |
| "token_acc": 0.9592087864875793 | |
| }, | |
| { | |
| "epoch": 2.3833205814843152, | |
| "grad_norm": 0.5843919515609741, | |
| "learning_rate": 1.1119706686227211e-05, | |
| "loss": 0.10511226654052734, | |
| "step": 3115, | |
| "token_acc": 0.9644036889076233 | |
| }, | |
| { | |
| "epoch": 2.3871461361897475, | |
| "grad_norm": 0.49912717938423157, | |
| "learning_rate": 1.0987448043015374e-05, | |
| "loss": 0.09345480799674988, | |
| "step": 3120, | |
| "token_acc": 0.9667991399765015 | |
| }, | |
| { | |
| "epoch": 2.3909716908951797, | |
| "grad_norm": 0.7507015466690063, | |
| "learning_rate": 1.0855883503129772e-05, | |
| "loss": 0.11863377094268798, | |
| "step": 3125, | |
| "token_acc": 0.9587963819503784 | |
| }, | |
| { | |
| "epoch": 2.394797245600612, | |
| "grad_norm": 0.7630432844161987, | |
| "learning_rate": 1.0725015407339717e-05, | |
| "loss": 0.1126257300376892, | |
| "step": 3130, | |
| "token_acc": 0.9607234597206116 | |
| }, | |
| { | |
| "epoch": 2.398622800306044, | |
| "grad_norm": 0.6372060179710388, | |
| "learning_rate": 1.0594846084023547e-05, | |
| "loss": 0.10468795299530029, | |
| "step": 3135, | |
| "token_acc": 0.9627901315689087 | |
| }, | |
| { | |
| "epoch": 2.402448355011477, | |
| "grad_norm": 0.6120291352272034, | |
| "learning_rate": 1.0465377849127172e-05, | |
| "loss": 0.09292224049568176, | |
| "step": 3140, | |
| "token_acc": 0.9677795171737671 | |
| }, | |
| { | |
| "epoch": 2.406273909716909, | |
| "grad_norm": 0.5614500045776367, | |
| "learning_rate": 1.0336613006122892e-05, | |
| "loss": 0.09670157432556152, | |
| "step": 3145, | |
| "token_acc": 0.9674481153488159 | |
| }, | |
| { | |
| "epoch": 2.4100994644223412, | |
| "grad_norm": 0.5987251996994019, | |
| "learning_rate": 1.0208553845968383e-05, | |
| "loss": 0.13896613121032714, | |
| "step": 3150, | |
| "token_acc": 0.9524605870246887 | |
| }, | |
| { | |
| "epoch": 2.4100994644223412, | |
| "eval_loss": 0.5215019583702087, | |
| "eval_runtime": 7.8548, | |
| "eval_samples_per_second": 13.24, | |
| "eval_steps_per_second": 1.655, | |
| "eval_token_acc": 0.8747493624687195, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.4139250191277735, | |
| "grad_norm": 0.5754761695861816, | |
| "learning_rate": 1.008120264706598e-05, | |
| "loss": 0.10798046588897706, | |
| "step": 3155, | |
| "token_acc": 0.9625075459480286 | |
| }, | |
| { | |
| "epoch": 2.4177505738332057, | |
| "grad_norm": 0.5995942950248718, | |
| "learning_rate": 9.95456167522209e-06, | |
| "loss": 0.11118266582489014, | |
| "step": 3160, | |
| "token_acc": 0.9624667167663574 | |
| }, | |
| { | |
| "epoch": 2.4215761285386384, | |
| "grad_norm": 0.6560847759246826, | |
| "learning_rate": 9.82863318360695e-06, | |
| "loss": 0.11946277618408203, | |
| "step": 3165, | |
| "token_acc": 0.9585193991661072 | |
| }, | |
| { | |
| "epoch": 2.4254016832440706, | |
| "grad_norm": 0.5231161713600159, | |
| "learning_rate": 9.703419412714431e-06, | |
| "loss": 0.1082839012145996, | |
| "step": 3170, | |
| "token_acc": 0.9630952477455139 | |
| }, | |
| { | |
| "epoch": 2.429227237949503, | |
| "grad_norm": 0.6471136808395386, | |
| "learning_rate": 9.578922590322276e-06, | |
| "loss": 0.10554378032684326, | |
| "step": 3175, | |
| "token_acc": 0.9643285870552063 | |
| }, | |
| { | |
| "epoch": 2.433052792654935, | |
| "grad_norm": 0.6062421202659607, | |
| "learning_rate": 9.45514493145246e-06, | |
| "loss": 0.11804389953613281, | |
| "step": 3180, | |
| "token_acc": 0.9601839780807495 | |
| }, | |
| { | |
| "epoch": 2.4368783473603672, | |
| "grad_norm": 0.6130327582359314, | |
| "learning_rate": 9.332088638331682e-06, | |
| "loss": 0.12830252647399903, | |
| "step": 3185, | |
| "token_acc": 0.955107569694519 | |
| }, | |
| { | |
| "epoch": 2.4407039020657995, | |
| "grad_norm": 0.5650054812431335, | |
| "learning_rate": 9.209755900352285e-06, | |
| "loss": 0.08745735883712769, | |
| "step": 3190, | |
| "token_acc": 0.9680666327476501 | |
| }, | |
| { | |
| "epoch": 2.4445294567712317, | |
| "grad_norm": 0.6417719125747681, | |
| "learning_rate": 9.088148894033255e-06, | |
| "loss": 0.10346298217773438, | |
| "step": 3195, | |
| "token_acc": 0.9632440209388733 | |
| }, | |
| { | |
| "epoch": 2.4483550114766643, | |
| "grad_norm": 0.549809992313385, | |
| "learning_rate": 8.967269782981557e-06, | |
| "loss": 0.10478920936584472, | |
| "step": 3200, | |
| "token_acc": 0.964032769203186 | |
| }, | |
| { | |
| "epoch": 2.4483550114766643, | |
| "eval_loss": 0.524568498134613, | |
| "eval_runtime": 7.9187, | |
| "eval_samples_per_second": 13.133, | |
| "eval_steps_per_second": 1.642, | |
| "eval_token_acc": 0.8750100135803223, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.4521805661820966, | |
| "grad_norm": 0.5881340503692627, | |
| "learning_rate": 8.847120717853513e-06, | |
| "loss": 0.09231488704681397, | |
| "step": 3205, | |
| "token_acc": 0.967642068862915 | |
| }, | |
| { | |
| "epoch": 2.456006120887529, | |
| "grad_norm": 0.49171632528305054, | |
| "learning_rate": 8.727703836316664e-06, | |
| "loss": 0.08269585371017456, | |
| "step": 3210, | |
| "token_acc": 0.9714418053627014 | |
| }, | |
| { | |
| "epoch": 2.459831675592961, | |
| "grad_norm": 0.5847451090812683, | |
| "learning_rate": 8.609021263011696e-06, | |
| "loss": 0.09583220481872559, | |
| "step": 3215, | |
| "token_acc": 0.967701256275177 | |
| }, | |
| { | |
| "epoch": 2.4636572302983932, | |
| "grad_norm": 0.6022827625274658, | |
| "learning_rate": 8.491075109514612e-06, | |
| "loss": 0.0968513011932373, | |
| "step": 3220, | |
| "token_acc": 0.965691328048706 | |
| }, | |
| { | |
| "epoch": 2.4674827850038255, | |
| "grad_norm": 0.6396250128746033, | |
| "learning_rate": 8.373867474299197e-06, | |
| "loss": 0.09366763830184936, | |
| "step": 3225, | |
| "token_acc": 0.967291533946991 | |
| }, | |
| { | |
| "epoch": 2.4713083397092577, | |
| "grad_norm": 0.6564737558364868, | |
| "learning_rate": 8.257400442699681e-06, | |
| "loss": 0.09510574340820313, | |
| "step": 3230, | |
| "token_acc": 0.9668706059455872 | |
| }, | |
| { | |
| "epoch": 2.4751338944146903, | |
| "grad_norm": 0.5506086945533752, | |
| "learning_rate": 8.141676086873572e-06, | |
| "loss": 0.09186252355575561, | |
| "step": 3235, | |
| "token_acc": 0.9672021865844727 | |
| }, | |
| { | |
| "epoch": 2.4789594491201226, | |
| "grad_norm": 0.5937402844429016, | |
| "learning_rate": 8.026696465764922e-06, | |
| "loss": 0.09575964212417602, | |
| "step": 3240, | |
| "token_acc": 0.9655571579933167 | |
| }, | |
| { | |
| "epoch": 2.482785003825555, | |
| "grad_norm": 0.5168645977973938, | |
| "learning_rate": 7.912463625067568e-06, | |
| "loss": 0.11513475179672242, | |
| "step": 3245, | |
| "token_acc": 0.9584820866584778 | |
| }, | |
| { | |
| "epoch": 2.486610558530987, | |
| "grad_norm": 12.089369773864746, | |
| "learning_rate": 7.7989795971888e-06, | |
| "loss": 0.29053955078125, | |
| "step": 3250, | |
| "token_acc": 0.9437501430511475 | |
| }, | |
| { | |
| "epoch": 2.486610558530987, | |
| "eval_loss": 0.5287056565284729, | |
| "eval_runtime": 7.9028, | |
| "eval_samples_per_second": 13.16, | |
| "eval_steps_per_second": 1.645, | |
| "eval_token_acc": 0.8761628866195679, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.4904361132364192, | |
| "grad_norm": 0.6238409876823425, | |
| "learning_rate": 7.68624640121316e-06, | |
| "loss": 0.1205405831336975, | |
| "step": 3255, | |
| "token_acc": 0.9586801528930664 | |
| }, | |
| { | |
| "epoch": 2.4942616679418514, | |
| "grad_norm": 0.6099902391433716, | |
| "learning_rate": 7.574266042866546e-06, | |
| "loss": 0.09387488961219788, | |
| "step": 3260, | |
| "token_acc": 0.9670175909996033 | |
| }, | |
| { | |
| "epoch": 2.4980872226472837, | |
| "grad_norm": 0.6190466284751892, | |
| "learning_rate": 7.463040514480579e-06, | |
| "loss": 0.11645488739013672, | |
| "step": 3265, | |
| "token_acc": 0.9598995447158813 | |
| }, | |
| { | |
| "epoch": 2.5019127773527163, | |
| "grad_norm": 0.6443151235580444, | |
| "learning_rate": 7.352571794957025e-06, | |
| "loss": 0.08591481447219848, | |
| "step": 3270, | |
| "token_acc": 0.9710960388183594 | |
| }, | |
| { | |
| "epoch": 2.5057383320581486, | |
| "grad_norm": 0.6558806896209717, | |
| "learning_rate": 7.242861849732696e-06, | |
| "loss": 0.1108025312423706, | |
| "step": 3275, | |
| "token_acc": 0.9633561968803406 | |
| }, | |
| { | |
| "epoch": 2.5095638867635808, | |
| "grad_norm": 0.6043168306350708, | |
| "learning_rate": 7.133912630744455e-06, | |
| "loss": 0.08010676503181458, | |
| "step": 3280, | |
| "token_acc": 0.9711145162582397 | |
| }, | |
| { | |
| "epoch": 2.513389441469013, | |
| "grad_norm": 0.671475887298584, | |
| "learning_rate": 7.025726076394462e-06, | |
| "loss": 0.1144939661026001, | |
| "step": 3285, | |
| "token_acc": 0.9594224691390991 | |
| }, | |
| { | |
| "epoch": 2.517214996174445, | |
| "grad_norm": 0.5959923267364502, | |
| "learning_rate": 6.9183041115157165e-06, | |
| "loss": 0.08532092571258545, | |
| "step": 3290, | |
| "token_acc": 0.9698848724365234 | |
| }, | |
| { | |
| "epoch": 2.5210405508798774, | |
| "grad_norm": 0.552179217338562, | |
| "learning_rate": 6.8116486473377985e-06, | |
| "loss": 0.09567714929580688, | |
| "step": 3295, | |
| "token_acc": 0.966461718082428 | |
| }, | |
| { | |
| "epoch": 2.5248661055853097, | |
| "grad_norm": 0.8035470843315125, | |
| "learning_rate": 6.7057615814528514e-06, | |
| "loss": 0.11172772645950317, | |
| "step": 3300, | |
| "token_acc": 0.9609107375144958 | |
| }, | |
| { | |
| "epoch": 2.5248661055853097, | |
| "eval_loss": 0.5269036889076233, | |
| "eval_runtime": 8.3826, | |
| "eval_samples_per_second": 12.407, | |
| "eval_steps_per_second": 1.551, | |
| "eval_token_acc": 0.8761628866195679, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.5286916602907423, | |
| "grad_norm": 0.5826445817947388, | |
| "learning_rate": 6.600644797781847e-06, | |
| "loss": 0.09061547517776489, | |
| "step": 3305, | |
| "token_acc": 0.9684428572654724 | |
| }, | |
| { | |
| "epoch": 2.5325172149961745, | |
| "grad_norm": 0.6639491319656372, | |
| "learning_rate": 6.496300166541052e-06, | |
| "loss": 0.1045493245124817, | |
| "step": 3310, | |
| "token_acc": 0.9641888737678528 | |
| }, | |
| { | |
| "epoch": 2.5363427697016068, | |
| "grad_norm": 0.5682926177978516, | |
| "learning_rate": 6.392729544208758e-06, | |
| "loss": 0.10315026044845581, | |
| "step": 3315, | |
| "token_acc": 0.963904619216919 | |
| }, | |
| { | |
| "epoch": 2.540168324407039, | |
| "grad_norm": 0.6878834962844849, | |
| "learning_rate": 6.289934773492223e-06, | |
| "loss": 0.10737843513488769, | |
| "step": 3320, | |
| "token_acc": 0.963394284248352 | |
| }, | |
| { | |
| "epoch": 2.543993879112471, | |
| "grad_norm": 0.5965612530708313, | |
| "learning_rate": 6.1879176832949525e-06, | |
| "loss": 0.11070966720581055, | |
| "step": 3325, | |
| "token_acc": 0.9651868939399719 | |
| }, | |
| { | |
| "epoch": 2.5478194338179034, | |
| "grad_norm": 0.6844844818115234, | |
| "learning_rate": 6.086680088684105e-06, | |
| "loss": 0.10959099531173706, | |
| "step": 3330, | |
| "token_acc": 0.9614537358283997 | |
| }, | |
| { | |
| "epoch": 2.5516449885233357, | |
| "grad_norm": 0.5353488922119141, | |
| "learning_rate": 5.986223790858186e-06, | |
| "loss": 0.09058489799499511, | |
| "step": 3335, | |
| "token_acc": 0.9692246317863464 | |
| }, | |
| { | |
| "epoch": 2.5554705432287683, | |
| "grad_norm": 0.6746286749839783, | |
| "learning_rate": 5.886550577115069e-06, | |
| "loss": 0.1055182695388794, | |
| "step": 3340, | |
| "token_acc": 0.9636992812156677 | |
| }, | |
| { | |
| "epoch": 2.5592960979342005, | |
| "grad_norm": 0.5335373282432556, | |
| "learning_rate": 5.787662220820134e-06, | |
| "loss": 0.1255274772644043, | |
| "step": 3345, | |
| "token_acc": 0.9566043615341187 | |
| }, | |
| { | |
| "epoch": 2.5631216526396328, | |
| "grad_norm": 0.6528668403625488, | |
| "learning_rate": 5.689560481374734e-06, | |
| "loss": 0.10252002477645875, | |
| "step": 3350, | |
| "token_acc": 0.9639867544174194 | |
| }, | |
| { | |
| "epoch": 2.5631216526396328, | |
| "eval_loss": 0.5217230319976807, | |
| "eval_runtime": 8.1191, | |
| "eval_samples_per_second": 12.809, | |
| "eval_steps_per_second": 1.601, | |
| "eval_token_acc": 0.8769047260284424, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.566947207345065, | |
| "grad_norm": 0.49694639444351196, | |
| "learning_rate": 5.592247104184917e-06, | |
| "loss": 0.08688923120498657, | |
| "step": 3355, | |
| "token_acc": 0.9706814289093018 | |
| }, | |
| { | |
| "epoch": 2.570772762050497, | |
| "grad_norm": 0.5503761172294617, | |
| "learning_rate": 5.495723820630333e-06, | |
| "loss": 0.12382068634033203, | |
| "step": 3360, | |
| "token_acc": 0.9561320543289185 | |
| }, | |
| { | |
| "epoch": 2.57459831675593, | |
| "grad_norm": 0.6813068985939026, | |
| "learning_rate": 5.399992348033461e-06, | |
| "loss": 0.12225714921951295, | |
| "step": 3365, | |
| "token_acc": 0.9570099711418152 | |
| }, | |
| { | |
| "epoch": 2.5784238714613616, | |
| "grad_norm": 0.5871702432632446, | |
| "learning_rate": 5.305054389629022e-06, | |
| "loss": 0.07900494337081909, | |
| "step": 3370, | |
| "token_acc": 0.9720001220703125 | |
| }, | |
| { | |
| "epoch": 2.5822494261667943, | |
| "grad_norm": 0.7074242830276489, | |
| "learning_rate": 5.210911634533721e-06, | |
| "loss": 0.11348228454589844, | |
| "step": 3375, | |
| "token_acc": 0.9611703157424927 | |
| }, | |
| { | |
| "epoch": 2.5860749808722265, | |
| "grad_norm": 0.6286773085594177, | |
| "learning_rate": 5.117565757716158e-06, | |
| "loss": 0.11759569644927978, | |
| "step": 3380, | |
| "token_acc": 0.9579370617866516 | |
| }, | |
| { | |
| "epoch": 2.5899005355776588, | |
| "grad_norm": 0.6363070607185364, | |
| "learning_rate": 5.025018419967009e-06, | |
| "loss": 0.11911303997039795, | |
| "step": 3385, | |
| "token_acc": 0.9589115977287292 | |
| }, | |
| { | |
| "epoch": 2.593726090283091, | |
| "grad_norm": 0.6866349577903748, | |
| "learning_rate": 4.933271267869566e-06, | |
| "loss": 0.11872742176055909, | |
| "step": 3390, | |
| "token_acc": 0.9597334265708923 | |
| }, | |
| { | |
| "epoch": 2.597551644988523, | |
| "grad_norm": 0.5686379075050354, | |
| "learning_rate": 4.842325933770342e-06, | |
| "loss": 0.10091429948806763, | |
| "step": 3395, | |
| "token_acc": 0.9646428227424622 | |
| }, | |
| { | |
| "epoch": 2.601377199693956, | |
| "grad_norm": 0.5744697451591492, | |
| "learning_rate": 4.752184035750068e-06, | |
| "loss": 0.1112870454788208, | |
| "step": 3400, | |
| "token_acc": 0.9629582166671753 | |
| }, | |
| { | |
| "epoch": 2.601377199693956, | |
| "eval_loss": 0.5221067667007446, | |
| "eval_runtime": 7.949, | |
| "eval_samples_per_second": 13.083, | |
| "eval_steps_per_second": 1.635, | |
| "eval_token_acc": 0.8777067065238953, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.6052027543993876, | |
| "grad_norm": 0.5436497926712036, | |
| "learning_rate": 4.662847177594909e-06, | |
| "loss": 0.09204695224761963, | |
| "step": 3405, | |
| "token_acc": 0.9677549004554749 | |
| }, | |
| { | |
| "epoch": 2.6090283091048203, | |
| "grad_norm": 0.5940696001052856, | |
| "learning_rate": 4.5743169487679316e-06, | |
| "loss": 0.09365889430046082, | |
| "step": 3410, | |
| "token_acc": 0.9672086834907532 | |
| }, | |
| { | |
| "epoch": 2.6128538638102525, | |
| "grad_norm": 0.5806345343589783, | |
| "learning_rate": 4.486594924380838e-06, | |
| "loss": 0.07467930316925049, | |
| "step": 3415, | |
| "token_acc": 0.9740605354309082 | |
| }, | |
| { | |
| "epoch": 2.6166794185156848, | |
| "grad_norm": 0.6086448431015015, | |
| "learning_rate": 4.3996826651658775e-06, | |
| "loss": 0.09224212169647217, | |
| "step": 3420, | |
| "token_acc": 0.9681790471076965 | |
| }, | |
| { | |
| "epoch": 2.620504973221117, | |
| "grad_norm": 0.4966646432876587, | |
| "learning_rate": 4.313581717448156e-06, | |
| "loss": 0.08799538612365723, | |
| "step": 3425, | |
| "token_acc": 0.9687092304229736 | |
| }, | |
| { | |
| "epoch": 2.624330527926549, | |
| "grad_norm": 0.7006512880325317, | |
| "learning_rate": 4.228293613118089e-06, | |
| "loss": 0.10830029249191284, | |
| "step": 3430, | |
| "token_acc": 0.962169885635376 | |
| }, | |
| { | |
| "epoch": 2.628156082631982, | |
| "grad_norm": 0.7951710820198059, | |
| "learning_rate": 4.143819869604132e-06, | |
| "loss": 0.09951411485671997, | |
| "step": 3435, | |
| "token_acc": 0.9649299383163452 | |
| }, | |
| { | |
| "epoch": 2.631981637337414, | |
| "grad_norm": 0.6713584661483765, | |
| "learning_rate": 4.060161989845818e-06, | |
| "loss": 0.09943540692329407, | |
| "step": 3440, | |
| "token_acc": 0.9660786390304565 | |
| }, | |
| { | |
| "epoch": 2.6358071920428463, | |
| "grad_norm": 0.8555734753608704, | |
| "learning_rate": 3.977321462266998e-06, | |
| "loss": 0.12329368591308594, | |
| "step": 3445, | |
| "token_acc": 0.9588665962219238 | |
| }, | |
| { | |
| "epoch": 2.6396327467482785, | |
| "grad_norm": 0.7402066588401794, | |
| "learning_rate": 3.8952997607493325e-06, | |
| "loss": 0.1296180248260498, | |
| "step": 3450, | |
| "token_acc": 0.9544374942779541 | |
| }, | |
| { | |
| "epoch": 2.6396327467482785, | |
| "eval_loss": 0.5221165418624878, | |
| "eval_runtime": 7.8424, | |
| "eval_samples_per_second": 13.261, | |
| "eval_steps_per_second": 1.658, | |
| "eval_token_acc": 0.8774861693382263, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.6434583014537107, | |
| "grad_norm": 0.5311779975891113, | |
| "learning_rate": 3.814098344606143e-06, | |
| "loss": 0.08472838401794433, | |
| "step": 3455, | |
| "token_acc": 0.9710620045661926 | |
| }, | |
| { | |
| "epoch": 2.647283856159143, | |
| "grad_norm": 0.572284460067749, | |
| "learning_rate": 3.7337186585563732e-06, | |
| "loss": 0.08200944662094116, | |
| "step": 3460, | |
| "token_acc": 0.9718431234359741 | |
| }, | |
| { | |
| "epoch": 2.651109410864575, | |
| "grad_norm": 0.4984256327152252, | |
| "learning_rate": 3.654162132698918e-06, | |
| "loss": 0.10278162956237794, | |
| "step": 3465, | |
| "token_acc": 0.965274453163147 | |
| }, | |
| { | |
| "epoch": 2.654934965570008, | |
| "grad_norm": 0.5390318036079407, | |
| "learning_rate": 3.5754301824871605e-06, | |
| "loss": 0.10632505416870117, | |
| "step": 3470, | |
| "token_acc": 0.9644556641578674 | |
| }, | |
| { | |
| "epoch": 2.65876052027544, | |
| "grad_norm": 0.5882481336593628, | |
| "learning_rate": 3.497524208703834e-06, | |
| "loss": 0.10900474786758423, | |
| "step": 3475, | |
| "token_acc": 0.9621248841285706 | |
| }, | |
| { | |
| "epoch": 2.6625860749808723, | |
| "grad_norm": 0.6717934608459473, | |
| "learning_rate": 3.420445597436056e-06, | |
| "loss": 0.0886709749698639, | |
| "step": 3480, | |
| "token_acc": 0.9691559672355652 | |
| }, | |
| { | |
| "epoch": 2.6664116296863045, | |
| "grad_norm": 0.5694244503974915, | |
| "learning_rate": 3.344195720050658e-06, | |
| "loss": 0.09270554780960083, | |
| "step": 3485, | |
| "token_acc": 0.9656193852424622 | |
| }, | |
| { | |
| "epoch": 2.6702371843917367, | |
| "grad_norm": 0.7296086549758911, | |
| "learning_rate": 3.2687759331698375e-06, | |
| "loss": 0.10218125581741333, | |
| "step": 3490, | |
| "token_acc": 0.9648373126983643 | |
| }, | |
| { | |
| "epoch": 2.674062739097169, | |
| "grad_norm": 0.4986768662929535, | |
| "learning_rate": 3.194187578646979e-06, | |
| "loss": 0.09201115369796753, | |
| "step": 3495, | |
| "token_acc": 0.9665822982788086 | |
| }, | |
| { | |
| "epoch": 2.677888293802601, | |
| "grad_norm": 0.6790587306022644, | |
| "learning_rate": 3.120431983542793e-06, | |
| "loss": 0.10237842798233032, | |
| "step": 3500, | |
| "token_acc": 0.9661151170730591 | |
| }, | |
| { | |
| "epoch": 2.677888293802601, | |
| "eval_loss": 0.5228468179702759, | |
| "eval_runtime": 7.9645, | |
| "eval_samples_per_second": 13.058, | |
| "eval_steps_per_second": 1.632, | |
| "eval_token_acc": 0.8785387873649597, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.681713848508034, | |
| "grad_norm": 0.6572025418281555, | |
| "learning_rate": 3.047510460101705e-06, | |
| "loss": 0.13050510883331298, | |
| "step": 3505, | |
| "token_acc": 0.9555116295814514 | |
| }, | |
| { | |
| "epoch": 2.685539403213466, | |
| "grad_norm": 0.8115324378013611, | |
| "learning_rate": 2.9754243057285134e-06, | |
| "loss": 0.1264261245727539, | |
| "step": 3510, | |
| "token_acc": 0.956243634223938 | |
| }, | |
| { | |
| "epoch": 2.6893649579188983, | |
| "grad_norm": 0.5161707401275635, | |
| "learning_rate": 2.9041748029652927e-06, | |
| "loss": 0.08881696462631225, | |
| "step": 3515, | |
| "token_acc": 0.9682623147964478 | |
| }, | |
| { | |
| "epoch": 2.6931905126243305, | |
| "grad_norm": 0.5522788763046265, | |
| "learning_rate": 2.8337632194685993e-06, | |
| "loss": 0.08286306858062745, | |
| "step": 3520, | |
| "token_acc": 0.9708802700042725 | |
| }, | |
| { | |
| "epoch": 2.6970160673297627, | |
| "grad_norm": 0.5946321487426758, | |
| "learning_rate": 2.7641908079868827e-06, | |
| "loss": 0.10248844623565674, | |
| "step": 3525, | |
| "token_acc": 0.9636382460594177 | |
| }, | |
| { | |
| "epoch": 2.700841622035195, | |
| "grad_norm": 0.6317991018295288, | |
| "learning_rate": 2.69545880633823e-06, | |
| "loss": 0.10524777173995972, | |
| "step": 3530, | |
| "token_acc": 0.9621507525444031 | |
| }, | |
| { | |
| "epoch": 2.704667176740627, | |
| "grad_norm": 0.41846737265586853, | |
| "learning_rate": 2.627568437388306e-06, | |
| "loss": 0.08343310356140136, | |
| "step": 3535, | |
| "token_acc": 0.970815122127533 | |
| }, | |
| { | |
| "epoch": 2.70849273144606, | |
| "grad_norm": 0.592873752117157, | |
| "learning_rate": 2.560520909028663e-06, | |
| "loss": 0.08635797500610351, | |
| "step": 3540, | |
| "token_acc": 0.9700879454612732 | |
| }, | |
| { | |
| "epoch": 2.712318286151492, | |
| "grad_norm": 0.5590534210205078, | |
| "learning_rate": 2.4943174141551674e-06, | |
| "loss": 0.10181926488876343, | |
| "step": 3545, | |
| "token_acc": 0.9652162194252014 | |
| }, | |
| { | |
| "epoch": 2.7161438408569243, | |
| "grad_norm": 0.5901391506195068, | |
| "learning_rate": 2.428959130646824e-06, | |
| "loss": 0.09749918580055236, | |
| "step": 3550, | |
| "token_acc": 0.9646121263504028 | |
| }, | |
| { | |
| "epoch": 2.7161438408569243, | |
| "eval_loss": 0.5235512256622314, | |
| "eval_runtime": 7.8855, | |
| "eval_samples_per_second": 13.189, | |
| "eval_steps_per_second": 1.649, | |
| "eval_token_acc": 0.8786590695381165, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.7199693955623565, | |
| "grad_norm": 0.5816419720649719, | |
| "learning_rate": 2.364447221344812e-06, | |
| "loss": 0.12211033105850219, | |
| "step": 3555, | |
| "token_acc": 0.9581829905509949 | |
| }, | |
| { | |
| "epoch": 2.7237949502677887, | |
| "grad_norm": 0.6168470978736877, | |
| "learning_rate": 2.3007828340318114e-06, | |
| "loss": 0.09811439514160156, | |
| "step": 3560, | |
| "token_acc": 0.9663928151130676 | |
| }, | |
| { | |
| "epoch": 2.7276205049732214, | |
| "grad_norm": 0.599656343460083, | |
| "learning_rate": 2.237967101411531e-06, | |
| "loss": 0.12740142345428468, | |
| "step": 3565, | |
| "token_acc": 0.9561182260513306 | |
| }, | |
| { | |
| "epoch": 2.731446059678653, | |
| "grad_norm": 0.6238080263137817, | |
| "learning_rate": 2.1760011410886126e-06, | |
| "loss": 0.09838619828224182, | |
| "step": 3570, | |
| "token_acc": 0.9653590321540833 | |
| }, | |
| { | |
| "epoch": 2.735271614384086, | |
| "grad_norm": 0.5564831495285034, | |
| "learning_rate": 2.1148860555487204e-06, | |
| "loss": 0.09222927689552307, | |
| "step": 3575, | |
| "token_acc": 0.9685646891593933 | |
| }, | |
| { | |
| "epoch": 2.739097169089518, | |
| "grad_norm": 0.6360819935798645, | |
| "learning_rate": 2.0546229321389278e-06, | |
| "loss": 0.09308220148086548, | |
| "step": 3580, | |
| "token_acc": 0.9680613279342651 | |
| }, | |
| { | |
| "epoch": 2.7429227237949503, | |
| "grad_norm": 0.5651523470878601, | |
| "learning_rate": 1.995212843048372e-06, | |
| "loss": 0.09616876244544983, | |
| "step": 3585, | |
| "token_acc": 0.9660496115684509 | |
| }, | |
| { | |
| "epoch": 2.7467482785003825, | |
| "grad_norm": 0.6321117877960205, | |
| "learning_rate": 1.93665684528917e-06, | |
| "loss": 0.09454690217971802, | |
| "step": 3590, | |
| "token_acc": 0.9675334692001343 | |
| }, | |
| { | |
| "epoch": 2.7505738332058147, | |
| "grad_norm": 0.5536521077156067, | |
| "learning_rate": 1.878955980677638e-06, | |
| "loss": 0.07992898225784302, | |
| "step": 3595, | |
| "token_acc": 0.9721735119819641 | |
| }, | |
| { | |
| "epoch": 2.7543993879112474, | |
| "grad_norm": 0.688173770904541, | |
| "learning_rate": 1.82211127581573e-06, | |
| "loss": 0.09609293937683105, | |
| "step": 3600, | |
| "token_acc": 0.9671096205711365 | |
| }, | |
| { | |
| "epoch": 2.7543993879112474, | |
| "eval_loss": 0.5215653777122498, | |
| "eval_runtime": 8.0108, | |
| "eval_samples_per_second": 12.982, | |
| "eval_steps_per_second": 1.623, | |
| "eval_token_acc": 0.8788695931434631, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.758224942616679, | |
| "grad_norm": 0.6505938768386841, | |
| "learning_rate": 1.7661237420727784e-06, | |
| "loss": 0.1013750433921814, | |
| "step": 3605, | |
| "token_acc": 0.9644123315811157 | |
| }, | |
| { | |
| "epoch": 2.762050497322112, | |
| "grad_norm": 0.5934735536575317, | |
| "learning_rate": 1.710994375567504e-06, | |
| "loss": 0.0851688802242279, | |
| "step": 3610, | |
| "token_acc": 0.9705018997192383 | |
| }, | |
| { | |
| "epoch": 2.765876052027544, | |
| "grad_norm": 0.6007834076881409, | |
| "learning_rate": 1.6567241571502912e-06, | |
| "loss": 0.07638438940048217, | |
| "step": 3615, | |
| "token_acc": 0.9712318778038025 | |
| }, | |
| { | |
| "epoch": 2.7697016067329763, | |
| "grad_norm": 0.5481213927268982, | |
| "learning_rate": 1.6033140523857404e-06, | |
| "loss": 0.09145662784576417, | |
| "step": 3620, | |
| "token_acc": 0.9675630927085876 | |
| }, | |
| { | |
| "epoch": 2.7735271614384085, | |
| "grad_norm": 0.6200750470161438, | |
| "learning_rate": 1.5507650115354877e-06, | |
| "loss": 0.10738480091094971, | |
| "step": 3625, | |
| "token_acc": 0.9640287756919861 | |
| }, | |
| { | |
| "epoch": 2.7773527161438407, | |
| "grad_norm": 0.6538658142089844, | |
| "learning_rate": 1.499077969541307e-06, | |
| "loss": 0.10229132175445557, | |
| "step": 3630, | |
| "token_acc": 0.9641778469085693 | |
| }, | |
| { | |
| "epoch": 2.7811782708492734, | |
| "grad_norm": 1.8193166255950928, | |
| "learning_rate": 1.4482538460084293e-06, | |
| "loss": 0.13732895851135254, | |
| "step": 3635, | |
| "token_acc": 0.958136796951294 | |
| }, | |
| { | |
| "epoch": 2.785003825554705, | |
| "grad_norm": 0.5257523655891418, | |
| "learning_rate": 1.3982935451892498e-06, | |
| "loss": 0.08640526533126831, | |
| "step": 3640, | |
| "token_acc": 0.971260130405426 | |
| }, | |
| { | |
| "epoch": 2.788829380260138, | |
| "grad_norm": 0.568705141544342, | |
| "learning_rate": 1.3491979559672075e-06, | |
| "loss": 0.08791974782943726, | |
| "step": 3645, | |
| "token_acc": 0.9699133038520813 | |
| }, | |
| { | |
| "epoch": 2.79265493496557, | |
| "grad_norm": 0.5045759081840515, | |
| "learning_rate": 1.3009679518409479e-06, | |
| "loss": 0.07553626298904419, | |
| "step": 3650, | |
| "token_acc": 0.9740194082260132 | |
| }, | |
| { | |
| "epoch": 2.79265493496557, | |
| "eval_loss": 0.5219829678535461, | |
| "eval_runtime": 8.0288, | |
| "eval_samples_per_second": 12.953, | |
| "eval_steps_per_second": 1.619, | |
| "eval_token_acc": 0.8788595795631409, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.7964804896710023, | |
| "grad_norm": 0.610518217086792, | |
| "learning_rate": 1.2536043909088191e-06, | |
| "loss": 0.10455150604248047, | |
| "step": 3655, | |
| "token_acc": 0.9636396765708923 | |
| }, | |
| { | |
| "epoch": 2.8003060443764345, | |
| "grad_norm": 0.5319099426269531, | |
| "learning_rate": 1.2071081158535947e-06, | |
| "loss": 0.08882582187652588, | |
| "step": 3660, | |
| "token_acc": 0.968651294708252 | |
| }, | |
| { | |
| "epoch": 2.8041315990818667, | |
| "grad_norm": 0.6065900325775146, | |
| "learning_rate": 1.1614799539274634e-06, | |
| "loss": 0.08307374119758607, | |
| "step": 3665, | |
| "token_acc": 0.9706868529319763 | |
| }, | |
| { | |
| "epoch": 2.8079571537872994, | |
| "grad_norm": 0.6401634812355042, | |
| "learning_rate": 1.1167207169373195e-06, | |
| "loss": 0.09725141525268555, | |
| "step": 3670, | |
| "token_acc": 0.9657084941864014 | |
| }, | |
| { | |
| "epoch": 2.8117827084927316, | |
| "grad_norm": 0.524497389793396, | |
| "learning_rate": 1.0728312012303454e-06, | |
| "loss": 0.11780104637145997, | |
| "step": 3675, | |
| "token_acc": 0.960728108882904 | |
| }, | |
| { | |
| "epoch": 2.815608263198164, | |
| "grad_norm": 0.7346832156181335, | |
| "learning_rate": 1.0298121876797962e-06, | |
| "loss": 0.11407887935638428, | |
| "step": 3680, | |
| "token_acc": 0.9612630605697632 | |
| }, | |
| { | |
| "epoch": 2.819433817903596, | |
| "grad_norm": 0.6890755295753479, | |
| "learning_rate": 9.876644416711488e-07, | |
| "loss": 0.11829521656036376, | |
| "step": 3685, | |
| "token_acc": 0.9585215449333191 | |
| }, | |
| { | |
| "epoch": 2.8232593726090283, | |
| "grad_norm": 0.5342867970466614, | |
| "learning_rate": 9.46388713088453e-07, | |
| "loss": 0.09410252571105956, | |
| "step": 3690, | |
| "token_acc": 0.9661674499511719 | |
| }, | |
| { | |
| "epoch": 2.8270849273144605, | |
| "grad_norm": 0.4889836311340332, | |
| "learning_rate": 9.059857363010183e-07, | |
| "loss": 0.09603096842765808, | |
| "step": 3695, | |
| "token_acc": 0.965887188911438 | |
| }, | |
| { | |
| "epoch": 2.8309104820198927, | |
| "grad_norm": 0.5685746073722839, | |
| "learning_rate": 8.664562301503143e-07, | |
| "loss": 0.08459590077400207, | |
| "step": 3700, | |
| "token_acc": 0.9699506163597107 | |
| }, | |
| { | |
| "epoch": 2.8309104820198927, | |
| "eval_loss": 0.5205320119857788, | |
| "eval_runtime": 7.8427, | |
| "eval_samples_per_second": 13.261, | |
| "eval_steps_per_second": 1.658, | |
| "eval_token_acc": 0.8790299892425537, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.8347360367253254, | |
| "grad_norm": 0.5299521684646606, | |
| "learning_rate": 8.278008979372087e-07, | |
| "loss": 0.09127166271209716, | |
| "step": 3705, | |
| "token_acc": 0.9684864282608032 | |
| }, | |
| { | |
| "epoch": 2.8385615914307576, | |
| "grad_norm": 0.4766036868095398, | |
| "learning_rate": 7.900204274094602e-07, | |
| "loss": 0.09881120324134826, | |
| "step": 3710, | |
| "token_acc": 0.9655190706253052 | |
| }, | |
| { | |
| "epoch": 2.84238714613619, | |
| "grad_norm": 8.799799919128418, | |
| "learning_rate": 7.531154907494397e-07, | |
| "loss": 0.13544522523880004, | |
| "step": 3715, | |
| "token_acc": 0.9555306434631348 | |
| }, | |
| { | |
| "epoch": 2.846212700841622, | |
| "grad_norm": 0.563325822353363, | |
| "learning_rate": 7.170867445622287e-07, | |
| "loss": 0.10241570472717285, | |
| "step": 3720, | |
| "token_acc": 0.9647788405418396 | |
| }, | |
| { | |
| "epoch": 2.8500382555470543, | |
| "grad_norm": 0.6075456142425537, | |
| "learning_rate": 6.819348298638839e-07, | |
| "loss": 0.12761690616607665, | |
| "step": 3725, | |
| "token_acc": 0.9584816098213196 | |
| }, | |
| { | |
| "epoch": 2.8538638102524865, | |
| "grad_norm": 0.6337462663650513, | |
| "learning_rate": 6.476603720700636e-07, | |
| "loss": 0.09158645272254944, | |
| "step": 3730, | |
| "token_acc": 0.9687730669975281 | |
| }, | |
| { | |
| "epoch": 2.8576893649579187, | |
| "grad_norm": 0.5899404287338257, | |
| "learning_rate": 6.142639809849027e-07, | |
| "loss": 0.09597094655036927, | |
| "step": 3735, | |
| "token_acc": 0.9665765762329102 | |
| }, | |
| { | |
| "epoch": 2.8615149196633514, | |
| "grad_norm": 0.5653353929519653, | |
| "learning_rate": 5.817462507901383e-07, | |
| "loss": 0.10877490043640137, | |
| "step": 3740, | |
| "token_acc": 0.9619103074073792 | |
| }, | |
| { | |
| "epoch": 2.8653404743687836, | |
| "grad_norm": 0.49452540278434753, | |
| "learning_rate": 5.501077600345572e-07, | |
| "loss": 0.08857889175415039, | |
| "step": 3745, | |
| "token_acc": 0.9700949192047119 | |
| }, | |
| { | |
| "epoch": 2.869166029074216, | |
| "grad_norm": 0.731597900390625, | |
| "learning_rate": 5.193490716237037e-07, | |
| "loss": 0.12281218767166138, | |
| "step": 3750, | |
| "token_acc": 0.9560735821723938 | |
| }, | |
| { | |
| "epoch": 2.869166029074216, | |
| "eval_loss": 0.5206364989280701, | |
| "eval_runtime": 9.2942, | |
| "eval_samples_per_second": 11.19, | |
| "eval_steps_per_second": 1.399, | |
| "eval_token_acc": 0.879270613193512, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.872991583779648, | |
| "grad_norm": 0.6116617321968079, | |
| "learning_rate": 4.894707328098602e-07, | |
| "loss": 0.11083317995071411, | |
| "step": 3755, | |
| "token_acc": 0.9610885977745056 | |
| }, | |
| { | |
| "epoch": 2.8768171384850802, | |
| "grad_norm": 0.5174733400344849, | |
| "learning_rate": 4.6047327518230485e-07, | |
| "loss": 0.08961974382400513, | |
| "step": 3760, | |
| "token_acc": 0.9690099954605103 | |
| }, | |
| { | |
| "epoch": 2.8806426931905125, | |
| "grad_norm": 0.5262379050254822, | |
| "learning_rate": 4.3235721465784697e-07, | |
| "loss": 0.09585506916046142, | |
| "step": 3765, | |
| "token_acc": 0.9667736887931824 | |
| }, | |
| { | |
| "epoch": 2.8844682478959447, | |
| "grad_norm": 0.5788334012031555, | |
| "learning_rate": 4.0512305147167863e-07, | |
| "loss": 0.08268014192581177, | |
| "step": 3770, | |
| "token_acc": 0.9712512493133545 | |
| }, | |
| { | |
| "epoch": 2.8882938026013774, | |
| "grad_norm": 0.687783420085907, | |
| "learning_rate": 3.787712701684598e-07, | |
| "loss": 0.08984529376029968, | |
| "step": 3775, | |
| "token_acc": 0.9686997532844543 | |
| }, | |
| { | |
| "epoch": 2.8921193573068096, | |
| "grad_norm": 0.6016952395439148, | |
| "learning_rate": 3.5330233959365853e-07, | |
| "loss": 0.09222807884216308, | |
| "step": 3780, | |
| "token_acc": 0.9685728549957275 | |
| }, | |
| { | |
| "epoch": 2.895944912012242, | |
| "grad_norm": 0.5089208483695984, | |
| "learning_rate": 3.2871671288528525e-07, | |
| "loss": 0.09786663055419922, | |
| "step": 3785, | |
| "token_acc": 0.9665623903274536 | |
| }, | |
| { | |
| "epoch": 2.899770466717674, | |
| "grad_norm": 1.769921898841858, | |
| "learning_rate": 3.050148274657549e-07, | |
| "loss": 0.12438170909881592, | |
| "step": 3790, | |
| "token_acc": 0.9624179601669312 | |
| }, | |
| { | |
| "epoch": 2.9035960214231062, | |
| "grad_norm": 0.5424771904945374, | |
| "learning_rate": 2.821971050341654e-07, | |
| "loss": 0.0890495777130127, | |
| "step": 3795, | |
| "token_acc": 0.9703425765037537 | |
| }, | |
| { | |
| "epoch": 2.907421576128539, | |
| "grad_norm": 0.5487825274467468, | |
| "learning_rate": 2.6026395155874795e-07, | |
| "loss": 0.10370445251464844, | |
| "step": 3800, | |
| "token_acc": 0.9638125896453857 | |
| }, | |
| { | |
| "epoch": 2.907421576128539, | |
| "eval_loss": 0.5206490159034729, | |
| "eval_runtime": 8.3112, | |
| "eval_samples_per_second": 12.513, | |
| "eval_steps_per_second": 1.564, | |
| "eval_token_acc": 0.8794209361076355, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.9112471308339707, | |
| "grad_norm": 0.5681285262107849, | |
| "learning_rate": 2.3921575726967846e-07, | |
| "loss": 0.09305150508880615, | |
| "step": 3805, | |
| "token_acc": 0.9688363075256348 | |
| }, | |
| { | |
| "epoch": 2.9150726855394034, | |
| "grad_norm": 0.4438033103942871, | |
| "learning_rate": 2.1905289665211104e-07, | |
| "loss": 0.08973047733306885, | |
| "step": 3810, | |
| "token_acc": 0.9688341021537781 | |
| }, | |
| { | |
| "epoch": 2.9188982402448356, | |
| "grad_norm": 0.5287227630615234, | |
| "learning_rate": 1.9977572843953296e-07, | |
| "loss": 0.07862873077392578, | |
| "step": 3815, | |
| "token_acc": 0.9715408086776733 | |
| }, | |
| { | |
| "epoch": 2.922723794950268, | |
| "grad_norm": 0.5739708542823792, | |
| "learning_rate": 1.8138459560735899e-07, | |
| "loss": 0.08315033316612244, | |
| "step": 3820, | |
| "token_acc": 0.9718932509422302 | |
| }, | |
| { | |
| "epoch": 2.9265493496557, | |
| "grad_norm": 0.6123870611190796, | |
| "learning_rate": 1.638798253668694e-07, | |
| "loss": 0.125601065158844, | |
| "step": 3825, | |
| "token_acc": 0.9556345343589783 | |
| }, | |
| { | |
| "epoch": 2.9303749043611322, | |
| "grad_norm": 0.6285126209259033, | |
| "learning_rate": 1.4726172915933146e-07, | |
| "loss": 0.09772306680679321, | |
| "step": 3830, | |
| "token_acc": 0.9654306769371033 | |
| }, | |
| { | |
| "epoch": 2.934200459066565, | |
| "grad_norm": 0.4770904779434204, | |
| "learning_rate": 1.315306026505092e-07, | |
| "loss": 0.0937896728515625, | |
| "step": 3835, | |
| "token_acc": 0.9662994146347046 | |
| }, | |
| { | |
| "epoch": 2.9380260137719967, | |
| "grad_norm": 0.4980320632457733, | |
| "learning_rate": 1.1668672572539008e-07, | |
| "loss": 0.08644679784774781, | |
| "step": 3840, | |
| "token_acc": 0.969020664691925 | |
| }, | |
| { | |
| "epoch": 2.9418515684774293, | |
| "grad_norm": 0.5362405180931091, | |
| "learning_rate": 1.0273036248318324e-07, | |
| "loss": 0.08760695457458496, | |
| "step": 3845, | |
| "token_acc": 0.9707760214805603 | |
| }, | |
| { | |
| "epoch": 2.9456771231828616, | |
| "grad_norm": 0.4886132776737213, | |
| "learning_rate": 8.966176123264003e-08, | |
| "loss": 0.06749528646469116, | |
| "step": 3850, | |
| "token_acc": 0.9768878221511841 | |
| }, | |
| { | |
| "epoch": 2.9456771231828616, | |
| "eval_loss": 0.5208922028541565, | |
| "eval_runtime": 8.161, | |
| "eval_samples_per_second": 12.743, | |
| "eval_steps_per_second": 1.593, | |
| "eval_token_acc": 0.8793407678604126, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.949502677888294, | |
| "grad_norm": 0.5290758013725281, | |
| "learning_rate": 7.748115448763526e-08, | |
| "loss": 0.07928290963172913, | |
| "step": 3855, | |
| "token_acc": 0.971563458442688 | |
| }, | |
| { | |
| "epoch": 2.953328232593726, | |
| "grad_norm": 0.6795271039009094, | |
| "learning_rate": 6.618875896303167e-08, | |
| "loss": 0.10474317073822022, | |
| "step": 3860, | |
| "token_acc": 0.9640142321586609 | |
| }, | |
| { | |
| "epoch": 2.9571537872991582, | |
| "grad_norm": 0.6599166989326477, | |
| "learning_rate": 5.578477557081074e-08, | |
| "loss": 0.10668476819992065, | |
| "step": 3865, | |
| "token_acc": 0.9629032015800476 | |
| }, | |
| { | |
| "epoch": 2.960979342004591, | |
| "grad_norm": 0.6517552733421326, | |
| "learning_rate": 4.6269389416514486e-08, | |
| "loss": 0.08918753862380982, | |
| "step": 3870, | |
| "token_acc": 0.9688775539398193 | |
| }, | |
| { | |
| "epoch": 2.964804896710023, | |
| "grad_norm": 0.6627753376960754, | |
| "learning_rate": 3.764276979593695e-08, | |
| "loss": 0.08152820467948914, | |
| "step": 3875, | |
| "token_acc": 0.9715802669525146 | |
| }, | |
| { | |
| "epoch": 2.9686304514154553, | |
| "grad_norm": 0.5488728284835815, | |
| "learning_rate": 2.990507019213218e-08, | |
| "loss": 0.08794408440589904, | |
| "step": 3880, | |
| "token_acc": 0.9700236916542053 | |
| }, | |
| { | |
| "epoch": 2.9724560061208876, | |
| "grad_norm": 0.5994005799293518, | |
| "learning_rate": 2.305642827266641e-08, | |
| "loss": 0.10513956546783447, | |
| "step": 3885, | |
| "token_acc": 0.9652788639068604 | |
| }, | |
| { | |
| "epoch": 2.97628156082632, | |
| "grad_norm": 0.5402779579162598, | |
| "learning_rate": 1.7096965887164475e-08, | |
| "loss": 0.10320125818252564, | |
| "step": 3890, | |
| "token_acc": 0.964747428894043 | |
| }, | |
| { | |
| "epoch": 2.980107115531752, | |
| "grad_norm": 0.5638807415962219, | |
| "learning_rate": 1.2026789065167077e-08, | |
| "loss": 0.09008901119232178, | |
| "step": 3895, | |
| "token_acc": 0.9677461385726929 | |
| }, | |
| { | |
| "epoch": 2.9839326702371842, | |
| "grad_norm": 0.6424400806427002, | |
| "learning_rate": 7.845988014215655e-09, | |
| "loss": 0.09886548519134522, | |
| "step": 3900, | |
| "token_acc": 0.9671627879142761 | |
| }, | |
| { | |
| "epoch": 2.9839326702371842, | |
| "eval_loss": 0.5208696126937866, | |
| "eval_runtime": 8.2236, | |
| "eval_samples_per_second": 12.647, | |
| "eval_steps_per_second": 1.581, | |
| "eval_token_acc": 0.8792405128479004, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.987758224942617, | |
| "grad_norm": 0.6108574867248535, | |
| "learning_rate": 4.554637118270311e-09, | |
| "loss": 0.10293105840682984, | |
| "step": 3905, | |
| "token_acc": 0.9645171165466309 | |
| }, | |
| { | |
| "epoch": 2.991583779648049, | |
| "grad_norm": 0.5026504993438721, | |
| "learning_rate": 2.1527949363664425e-09, | |
| "loss": 0.1074068307876587, | |
| "step": 3910, | |
| "token_acc": 0.9619331359863281 | |
| }, | |
| { | |
| "epoch": 2.9954093343534813, | |
| "grad_norm": 0.6875292658805847, | |
| "learning_rate": 6.405042015877882e-10, | |
| "loss": 0.11073212623596192, | |
| "step": 3915, | |
| "token_acc": 0.9605428576469421 | |
| }, | |
| { | |
| "epoch": 2.9992348890589136, | |
| "grad_norm": 0.6482424139976501, | |
| "learning_rate": 1.7791820305923523e-11, | |
| "loss": 0.11924041509628296, | |
| "step": 3920, | |
| "token_acc": 0.9589547514915466 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.5209956765174866, | |
| "eval_runtime": 8.1308, | |
| "eval_samples_per_second": 12.791, | |
| "eval_steps_per_second": 1.599, | |
| "eval_token_acc": 0.8794109225273132, | |
| "step": 3921 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3921, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.411019928798757e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |