| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 30.0, | |
| "global_step": 2982, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.000335401643468053, | |
| "grad_norm": 12.375, | |
| "learning_rate": 9.99999722524632e-07, | |
| "loss": 1.274832010269165, | |
| "step": 1, | |
| "token_acc": 0.6722809020839281 | |
| }, | |
| { | |
| "epoch": 0.001677008217340265, | |
| "grad_norm": 11.5, | |
| "learning_rate": 9.999930631312047e-07, | |
| "loss": 1.330723524093628, | |
| "step": 5, | |
| "token_acc": 0.6504451038575668 | |
| }, | |
| { | |
| "epoch": 0.00335401643468053, | |
| "grad_norm": 11.4375, | |
| "learning_rate": 9.999722527172996e-07, | |
| "loss": 1.3872632026672362, | |
| "step": 10, | |
| "token_acc": 0.6347948773605383 | |
| }, | |
| { | |
| "epoch": 0.005031024652020795, | |
| "grad_norm": 10.125, | |
| "learning_rate": 9.999375693357208e-07, | |
| "loss": 1.28599214553833, | |
| "step": 15, | |
| "token_acc": 0.6515521064301553 | |
| }, | |
| { | |
| "epoch": 0.00670803286936106, | |
| "grad_norm": 10.75, | |
| "learning_rate": 9.99889013948845e-07, | |
| "loss": 1.2960277557373048, | |
| "step": 20, | |
| "token_acc": 0.6534720269890357 | |
| }, | |
| { | |
| "epoch": 0.008385041086701324, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 9.998265879039611e-07, | |
| "loss": 1.2671630859375, | |
| "step": 25, | |
| "token_acc": 0.655209801582999 | |
| }, | |
| { | |
| "epoch": 0.01006204930404159, | |
| "grad_norm": 9.625, | |
| "learning_rate": 9.997502929332347e-07, | |
| "loss": 1.261711597442627, | |
| "step": 30, | |
| "token_acc": 0.6588392961193335 | |
| }, | |
| { | |
| "epoch": 0.011739057521381855, | |
| "grad_norm": 9.125, | |
| "learning_rate": 9.996601311536586e-07, | |
| "loss": 1.2757044792175294, | |
| "step": 35, | |
| "token_acc": 0.6571720381298229 | |
| }, | |
| { | |
| "epoch": 0.01341606573872212, | |
| "grad_norm": 9.25, | |
| "learning_rate": 9.99556105066994e-07, | |
| "loss": 1.2386951446533203, | |
| "step": 40, | |
| "token_acc": 0.6721679125934445 | |
| }, | |
| { | |
| "epoch": 0.015093073956062384, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 9.994382175597028e-07, | |
| "loss": 1.2164941787719727, | |
| "step": 45, | |
| "token_acc": 0.6682510594433627 | |
| }, | |
| { | |
| "epoch": 0.01677008217340265, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 9.993064719028653e-07, | |
| "loss": 1.2428162574768067, | |
| "step": 50, | |
| "token_acc": 0.6631278538812785 | |
| }, | |
| { | |
| "epoch": 0.018447090390742913, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 9.991608717520907e-07, | |
| "loss": 1.1938260078430176, | |
| "step": 55, | |
| "token_acc": 0.6717246907924874 | |
| }, | |
| { | |
| "epoch": 0.02012409860808318, | |
| "grad_norm": 7.15625, | |
| "learning_rate": 9.99001421147416e-07, | |
| "loss": 1.22207612991333, | |
| "step": 60, | |
| "token_acc": 0.6641735228122663 | |
| }, | |
| { | |
| "epoch": 0.021801106825423446, | |
| "grad_norm": 7.5625, | |
| "learning_rate": 9.988281245131927e-07, | |
| "loss": 1.2258419036865233, | |
| "step": 65, | |
| "token_acc": 0.6678375645324391 | |
| }, | |
| { | |
| "epoch": 0.02347811504276371, | |
| "grad_norm": 7.65625, | |
| "learning_rate": 9.98640986657965e-07, | |
| "loss": 1.2073140144348145, | |
| "step": 70, | |
| "token_acc": 0.6601010387693705 | |
| }, | |
| { | |
| "epoch": 0.025155123260103975, | |
| "grad_norm": 8.125, | |
| "learning_rate": 9.984400127743356e-07, | |
| "loss": 1.1937344551086426, | |
| "step": 75, | |
| "token_acc": 0.672787979966611 | |
| }, | |
| { | |
| "epoch": 0.02683213147744424, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 9.982252084388226e-07, | |
| "loss": 1.219521713256836, | |
| "step": 80, | |
| "token_acc": 0.6679824317562684 | |
| }, | |
| { | |
| "epoch": 0.028509139694784504, | |
| "grad_norm": 7.3125, | |
| "learning_rate": 9.979965796117037e-07, | |
| "loss": 1.1813300132751465, | |
| "step": 85, | |
| "token_acc": 0.6724050059459765 | |
| }, | |
| { | |
| "epoch": 0.030186147912124768, | |
| "grad_norm": 7.21875, | |
| "learning_rate": 9.977541326368517e-07, | |
| "loss": 1.2407192230224608, | |
| "step": 90, | |
| "token_acc": 0.6603100321731501 | |
| }, | |
| { | |
| "epoch": 0.03186315612946503, | |
| "grad_norm": 7.40625, | |
| "learning_rate": 9.974978742415584e-07, | |
| "loss": 1.228166675567627, | |
| "step": 95, | |
| "token_acc": 0.6610423922180857 | |
| }, | |
| { | |
| "epoch": 0.0335401643468053, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 9.97227811536347e-07, | |
| "loss": 1.204833984375, | |
| "step": 100, | |
| "token_acc": 0.6694168490767245 | |
| }, | |
| { | |
| "epoch": 0.03521717256414556, | |
| "grad_norm": 7.40625, | |
| "learning_rate": 9.969439520147753e-07, | |
| "loss": 1.1833898544311523, | |
| "step": 105, | |
| "token_acc": 0.6732022471910112 | |
| }, | |
| { | |
| "epoch": 0.036894180781485826, | |
| "grad_norm": 6.90625, | |
| "learning_rate": 9.966463035532288e-07, | |
| "loss": 1.1810292243957519, | |
| "step": 110, | |
| "token_acc": 0.673891419297419 | |
| }, | |
| { | |
| "epoch": 0.0385711889988261, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 9.963348744107008e-07, | |
| "loss": 1.1781543731689452, | |
| "step": 115, | |
| "token_acc": 0.674726171055698 | |
| }, | |
| { | |
| "epoch": 0.04024819721616636, | |
| "grad_norm": 7.09375, | |
| "learning_rate": 9.960096732285637e-07, | |
| "loss": 1.1450292587280273, | |
| "step": 120, | |
| "token_acc": 0.681571495988674 | |
| }, | |
| { | |
| "epoch": 0.04192520543350663, | |
| "grad_norm": 6.96875, | |
| "learning_rate": 9.956707090303289e-07, | |
| "loss": 1.20210599899292, | |
| "step": 125, | |
| "token_acc": 0.6689448241373562 | |
| }, | |
| { | |
| "epoch": 0.04360221365084689, | |
| "grad_norm": 6.5, | |
| "learning_rate": 9.953179912213974e-07, | |
| "loss": 1.2348913192749023, | |
| "step": 130, | |
| "token_acc": 0.656823718139148 | |
| }, | |
| { | |
| "epoch": 0.045279221868187156, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 9.949515295887978e-07, | |
| "loss": 1.1797042846679688, | |
| "step": 135, | |
| "token_acc": 0.671729097348932 | |
| }, | |
| { | |
| "epoch": 0.04695623008552742, | |
| "grad_norm": 7.1875, | |
| "learning_rate": 9.945713343009152e-07, | |
| "loss": 1.1191802978515626, | |
| "step": 140, | |
| "token_acc": 0.6833774573311553 | |
| }, | |
| { | |
| "epoch": 0.048633238302867685, | |
| "grad_norm": 6.875, | |
| "learning_rate": 9.941774159072088e-07, | |
| "loss": 1.1308756828308106, | |
| "step": 145, | |
| "token_acc": 0.6832326456172347 | |
| }, | |
| { | |
| "epoch": 0.05031024652020795, | |
| "grad_norm": 6.59375, | |
| "learning_rate": 9.937697853379192e-07, | |
| "loss": 1.2008363723754882, | |
| "step": 150, | |
| "token_acc": 0.665910345250506 | |
| }, | |
| { | |
| "epoch": 0.051987254737548214, | |
| "grad_norm": 7.78125, | |
| "learning_rate": 9.93348453903766e-07, | |
| "loss": 1.1969077110290527, | |
| "step": 155, | |
| "token_acc": 0.6697475202885482 | |
| }, | |
| { | |
| "epoch": 0.05366426295488848, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 9.929134332956327e-07, | |
| "loss": 1.1653069496154784, | |
| "step": 160, | |
| "token_acc": 0.6718004091839054 | |
| }, | |
| { | |
| "epoch": 0.05534127117222874, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 9.924647355842421e-07, | |
| "loss": 1.123036003112793, | |
| "step": 165, | |
| "token_acc": 0.6868122371377542 | |
| }, | |
| { | |
| "epoch": 0.05701827938956901, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 9.920023732198237e-07, | |
| "loss": 1.1395354270935059, | |
| "step": 170, | |
| "token_acc": 0.6813573228623337 | |
| }, | |
| { | |
| "epoch": 0.05869528760690927, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 9.915263590317654e-07, | |
| "loss": 1.1792012214660645, | |
| "step": 175, | |
| "token_acc": 0.6715845846896986 | |
| }, | |
| { | |
| "epoch": 0.060372295824249536, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 9.91036706228259e-07, | |
| "loss": 1.1243346214294434, | |
| "step": 180, | |
| "token_acc": 0.6848594635657103 | |
| }, | |
| { | |
| "epoch": 0.0620493040415898, | |
| "grad_norm": 6.96875, | |
| "learning_rate": 9.905334283959333e-07, | |
| "loss": 1.143388843536377, | |
| "step": 185, | |
| "token_acc": 0.6788610770756753 | |
| }, | |
| { | |
| "epoch": 0.06372631225893007, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 9.90016539499478e-07, | |
| "loss": 1.1932525634765625, | |
| "step": 190, | |
| "token_acc": 0.6702515177797052 | |
| }, | |
| { | |
| "epoch": 0.06540332047627033, | |
| "grad_norm": 6.5, | |
| "learning_rate": 9.894860538812545e-07, | |
| "loss": 1.143165111541748, | |
| "step": 195, | |
| "token_acc": 0.6809917355371901 | |
| }, | |
| { | |
| "epoch": 0.0670803286936106, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 9.889419862608995e-07, | |
| "loss": 1.1876848220825196, | |
| "step": 200, | |
| "token_acc": 0.6701633023361306 | |
| }, | |
| { | |
| "epoch": 0.06875733691095086, | |
| "grad_norm": 6.625, | |
| "learning_rate": 9.883843517349157e-07, | |
| "loss": 1.144710636138916, | |
| "step": 205, | |
| "token_acc": 0.6796767632345665 | |
| }, | |
| { | |
| "epoch": 0.07043434512829112, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 9.878131657762535e-07, | |
| "loss": 1.1471405982971192, | |
| "step": 210, | |
| "token_acc": 0.6775461840900523 | |
| }, | |
| { | |
| "epoch": 0.07211135334563139, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 9.872284442338807e-07, | |
| "loss": 1.1314339637756348, | |
| "step": 215, | |
| "token_acc": 0.683370644624304 | |
| }, | |
| { | |
| "epoch": 0.07378836156297165, | |
| "grad_norm": 6.96875, | |
| "learning_rate": 9.86630203332344e-07, | |
| "loss": 1.1659558296203614, | |
| "step": 220, | |
| "token_acc": 0.67846951255023 | |
| }, | |
| { | |
| "epoch": 0.07546536978031192, | |
| "grad_norm": 6.25, | |
| "learning_rate": 9.860184596713182e-07, | |
| "loss": 1.164224624633789, | |
| "step": 225, | |
| "token_acc": 0.6794126284875184 | |
| }, | |
| { | |
| "epoch": 0.0771423779976522, | |
| "grad_norm": 6.625, | |
| "learning_rate": 9.853932302251449e-07, | |
| "loss": 1.1158638000488281, | |
| "step": 230, | |
| "token_acc": 0.6871271225332721 | |
| }, | |
| { | |
| "epoch": 0.07881938621499246, | |
| "grad_norm": 7.0, | |
| "learning_rate": 9.847545323423632e-07, | |
| "loss": 1.1727291107177735, | |
| "step": 235, | |
| "token_acc": 0.6663918982571833 | |
| }, | |
| { | |
| "epoch": 0.08049639443233272, | |
| "grad_norm": 6.125, | |
| "learning_rate": 9.84102383745226e-07, | |
| "loss": 1.105608081817627, | |
| "step": 240, | |
| "token_acc": 0.6856490759296371 | |
| }, | |
| { | |
| "epoch": 0.08217340264967299, | |
| "grad_norm": 7.03125, | |
| "learning_rate": 9.834368025292112e-07, | |
| "loss": 1.123321533203125, | |
| "step": 245, | |
| "token_acc": 0.6824586230234418 | |
| }, | |
| { | |
| "epoch": 0.08385041086701325, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 9.827578071625163e-07, | |
| "loss": 1.1252297401428222, | |
| "step": 250, | |
| "token_acc": 0.6880288131717357 | |
| }, | |
| { | |
| "epoch": 0.08552741908435352, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 9.82065416485549e-07, | |
| "loss": 1.144364833831787, | |
| "step": 255, | |
| "token_acc": 0.6834830684174154 | |
| }, | |
| { | |
| "epoch": 0.08720442730169378, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 9.813596497104018e-07, | |
| "loss": 1.1258564949035645, | |
| "step": 260, | |
| "token_acc": 0.6776172457287009 | |
| }, | |
| { | |
| "epoch": 0.08888143551903405, | |
| "grad_norm": 6.375, | |
| "learning_rate": 9.806405264203213e-07, | |
| "loss": 1.1180498123168945, | |
| "step": 265, | |
| "token_acc": 0.6824193364496637 | |
| }, | |
| { | |
| "epoch": 0.09055844373637431, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 9.79908066569163e-07, | |
| "loss": 1.1487442016601563, | |
| "step": 270, | |
| "token_acc": 0.6782154722354058 | |
| }, | |
| { | |
| "epoch": 0.09223545195371458, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 9.79162290480838e-07, | |
| "loss": 1.1168856620788574, | |
| "step": 275, | |
| "token_acc": 0.6839214769806029 | |
| }, | |
| { | |
| "epoch": 0.09391246017105484, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 9.784032188487506e-07, | |
| "loss": 1.1488576889038087, | |
| "step": 280, | |
| "token_acc": 0.6758741258741259 | |
| }, | |
| { | |
| "epoch": 0.0955894683883951, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 9.776308727352214e-07, | |
| "loss": 1.105551528930664, | |
| "step": 285, | |
| "token_acc": 0.6866073519082182 | |
| }, | |
| { | |
| "epoch": 0.09726647660573537, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 9.768452735709054e-07, | |
| "loss": 1.1006428718566894, | |
| "step": 290, | |
| "token_acc": 0.6875408084525435 | |
| }, | |
| { | |
| "epoch": 0.09894348482307563, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 9.760464431541956e-07, | |
| "loss": 1.1159579277038574, | |
| "step": 295, | |
| "token_acc": 0.6872186685619521 | |
| }, | |
| { | |
| "epoch": 0.1006204930404159, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 9.752344036506197e-07, | |
| "loss": 1.0885606765747071, | |
| "step": 300, | |
| "token_acc": 0.6948132222520828 | |
| }, | |
| { | |
| "epoch": 0.10229750125775616, | |
| "grad_norm": 6.25, | |
| "learning_rate": 9.74409177592223e-07, | |
| "loss": 1.0900307655334474, | |
| "step": 305, | |
| "token_acc": 0.6930264477643456 | |
| }, | |
| { | |
| "epoch": 0.10397450947509643, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 9.735707878769456e-07, | |
| "loss": 1.1054121017456056, | |
| "step": 310, | |
| "token_acc": 0.6892293353527116 | |
| }, | |
| { | |
| "epoch": 0.10565151769243669, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 9.72719257767985e-07, | |
| "loss": 1.0906902313232423, | |
| "step": 315, | |
| "token_acc": 0.6898504082484057 | |
| }, | |
| { | |
| "epoch": 0.10732852590977696, | |
| "grad_norm": 6.0, | |
| "learning_rate": 9.71854610893152e-07, | |
| "loss": 1.1318517684936524, | |
| "step": 320, | |
| "token_acc": 0.6846991214013373 | |
| }, | |
| { | |
| "epoch": 0.10900553412711722, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 9.709768712442142e-07, | |
| "loss": 1.111149215698242, | |
| "step": 325, | |
| "token_acc": 0.6831088755476229 | |
| }, | |
| { | |
| "epoch": 0.11068254234445749, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 9.700860631762307e-07, | |
| "loss": 1.0418154716491699, | |
| "step": 330, | |
| "token_acc": 0.6975177882787464 | |
| }, | |
| { | |
| "epoch": 0.11235955056179775, | |
| "grad_norm": 6.90625, | |
| "learning_rate": 9.69182211406876e-07, | |
| "loss": 1.0776394844055175, | |
| "step": 335, | |
| "token_acc": 0.6928902718252803 | |
| }, | |
| { | |
| "epoch": 0.11403655877913801, | |
| "grad_norm": 6.875, | |
| "learning_rate": 9.68265341015755e-07, | |
| "loss": 1.1534889221191407, | |
| "step": 340, | |
| "token_acc": 0.6755254892486108 | |
| }, | |
| { | |
| "epoch": 0.11571356699647828, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 9.673354774437062e-07, | |
| "loss": 1.109882926940918, | |
| "step": 345, | |
| "token_acc": 0.684797277474623 | |
| }, | |
| { | |
| "epoch": 0.11739057521381854, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 9.663926464920956e-07, | |
| "loss": 1.060962200164795, | |
| "step": 350, | |
| "token_acc": 0.6972844314616466 | |
| }, | |
| { | |
| "epoch": 0.11906758343115881, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 9.65436874322102e-07, | |
| "loss": 1.134366798400879, | |
| "step": 355, | |
| "token_acc": 0.6782759180141691 | |
| }, | |
| { | |
| "epoch": 0.12074459164849907, | |
| "grad_norm": 6.0, | |
| "learning_rate": 9.6446818745399e-07, | |
| "loss": 1.0987051010131836, | |
| "step": 360, | |
| "token_acc": 0.6879944960440316 | |
| }, | |
| { | |
| "epoch": 0.12242159986583934, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 9.634866127663737e-07, | |
| "loss": 1.111135196685791, | |
| "step": 365, | |
| "token_acc": 0.6866663123771058 | |
| }, | |
| { | |
| "epoch": 0.1240986080831796, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 9.624921774954732e-07, | |
| "loss": 1.0923819541931152, | |
| "step": 370, | |
| "token_acc": 0.6919647711210177 | |
| }, | |
| { | |
| "epoch": 0.12577561630051987, | |
| "grad_norm": 6.625, | |
| "learning_rate": 9.614849092343563e-07, | |
| "loss": 1.0786520004272462, | |
| "step": 375, | |
| "token_acc": 0.6894919168591224 | |
| }, | |
| { | |
| "epoch": 0.12745262451786013, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 9.60464835932174e-07, | |
| "loss": 1.086344051361084, | |
| "step": 380, | |
| "token_acc": 0.6883529734173747 | |
| }, | |
| { | |
| "epoch": 0.1291296327352004, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 9.594319858933847e-07, | |
| "loss": 1.1158102989196776, | |
| "step": 385, | |
| "token_acc": 0.6838648035351725 | |
| }, | |
| { | |
| "epoch": 0.13080664095254066, | |
| "grad_norm": 6.375, | |
| "learning_rate": 9.583863877769696e-07, | |
| "loss": 1.085710620880127, | |
| "step": 390, | |
| "token_acc": 0.6940853163450128 | |
| }, | |
| { | |
| "epoch": 0.13248364916988092, | |
| "grad_norm": 6.125, | |
| "learning_rate": 9.573280705956364e-07, | |
| "loss": 1.0663482666015625, | |
| "step": 395, | |
| "token_acc": 0.695432995258957 | |
| }, | |
| { | |
| "epoch": 0.1341606573872212, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 9.562570637150144e-07, | |
| "loss": 1.1113270759582519, | |
| "step": 400, | |
| "token_acc": 0.6843401825205756 | |
| }, | |
| { | |
| "epoch": 0.13583766560456145, | |
| "grad_norm": 6.0, | |
| "learning_rate": 9.55173396852841e-07, | |
| "loss": 1.0923892974853515, | |
| "step": 405, | |
| "token_acc": 0.6873383560103452 | |
| }, | |
| { | |
| "epoch": 0.13751467382190172, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 9.540771000781357e-07, | |
| "loss": 1.1199935913085937, | |
| "step": 410, | |
| "token_acc": 0.6863687150837989 | |
| }, | |
| { | |
| "epoch": 0.13919168203924198, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 9.529682038103653e-07, | |
| "loss": 1.1222180366516112, | |
| "step": 415, | |
| "token_acc": 0.6797772775737472 | |
| }, | |
| { | |
| "epoch": 0.14086869025658225, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 9.518467388186019e-07, | |
| "loss": 1.1468374252319335, | |
| "step": 420, | |
| "token_acc": 0.6787428571428571 | |
| }, | |
| { | |
| "epoch": 0.1425456984739225, | |
| "grad_norm": 5.75, | |
| "learning_rate": 9.507127362206675e-07, | |
| "loss": 1.1019716262817383, | |
| "step": 425, | |
| "token_acc": 0.688788173087299 | |
| }, | |
| { | |
| "epoch": 0.14422270669126278, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 9.495662274822711e-07, | |
| "loss": 1.0751092910766602, | |
| "step": 430, | |
| "token_acc": 0.6973642825690382 | |
| }, | |
| { | |
| "epoch": 0.14589971490860304, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 9.484072444161354e-07, | |
| "loss": 1.108968734741211, | |
| "step": 435, | |
| "token_acc": 0.6854776790610947 | |
| }, | |
| { | |
| "epoch": 0.1475767231259433, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 9.472358191811143e-07, | |
| "loss": 1.0823619842529297, | |
| "step": 440, | |
| "token_acc": 0.6868514020755133 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 9.460519842813003e-07, | |
| "loss": 1.1193353652954101, | |
| "step": 445, | |
| "token_acc": 0.685983750923243 | |
| }, | |
| { | |
| "epoch": 0.15093073956062383, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 9.448557725651229e-07, | |
| "loss": 1.10097017288208, | |
| "step": 450, | |
| "token_acc": 0.6858677638722213 | |
| }, | |
| { | |
| "epoch": 0.15260774777796413, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 9.436472172244373e-07, | |
| "loss": 1.0627543449401855, | |
| "step": 455, | |
| "token_acc": 0.6981000522921388 | |
| }, | |
| { | |
| "epoch": 0.1542847559953044, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 9.424263517936026e-07, | |
| "loss": 1.0785947799682618, | |
| "step": 460, | |
| "token_acc": 0.6919910406198923 | |
| }, | |
| { | |
| "epoch": 0.15596176421264465, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 9.41193210148552e-07, | |
| "loss": 1.0722208976745606, | |
| "step": 465, | |
| "token_acc": 0.690248354444383 | |
| }, | |
| { | |
| "epoch": 0.15763877242998492, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 9.399478265058528e-07, | |
| "loss": 1.0784868240356444, | |
| "step": 470, | |
| "token_acc": 0.6923379837983799 | |
| }, | |
| { | |
| "epoch": 0.15931578064732518, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 9.386902354217565e-07, | |
| "loss": 1.085107421875, | |
| "step": 475, | |
| "token_acc": 0.6940270167634135 | |
| }, | |
| { | |
| "epoch": 0.16099278886466545, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 9.374204717912408e-07, | |
| "loss": 1.0912228584289552, | |
| "step": 480, | |
| "token_acc": 0.6895368782161235 | |
| }, | |
| { | |
| "epoch": 0.1626697970820057, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 9.361385708470405e-07, | |
| "loss": 1.0886618614196777, | |
| "step": 485, | |
| "token_acc": 0.6871300524268561 | |
| }, | |
| { | |
| "epoch": 0.16434680529934598, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 9.3484456815867e-07, | |
| "loss": 1.0357705116271974, | |
| "step": 490, | |
| "token_acc": 0.7033262723993817 | |
| }, | |
| { | |
| "epoch": 0.16602381351668624, | |
| "grad_norm": 6.5, | |
| "learning_rate": 9.335384996314371e-07, | |
| "loss": 1.0864218711853026, | |
| "step": 495, | |
| "token_acc": 0.6869781488719133 | |
| }, | |
| { | |
| "epoch": 0.1677008217340265, | |
| "grad_norm": 5.625, | |
| "learning_rate": 9.322204015054454e-07, | |
| "loss": 1.064340591430664, | |
| "step": 500, | |
| "token_acc": 0.6971830985915493 | |
| }, | |
| { | |
| "epoch": 0.16937782995136677, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 9.308903103545902e-07, | |
| "loss": 1.10338134765625, | |
| "step": 505, | |
| "token_acc": 0.690793249913902 | |
| }, | |
| { | |
| "epoch": 0.17105483816870704, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 9.295482630855427e-07, | |
| "loss": 1.0584440231323242, | |
| "step": 510, | |
| "token_acc": 0.6965631929046563 | |
| }, | |
| { | |
| "epoch": 0.1727318463860473, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 9.281942969367262e-07, | |
| "loss": 1.101362133026123, | |
| "step": 515, | |
| "token_acc": 0.6852114339711788 | |
| }, | |
| { | |
| "epoch": 0.17440885460338756, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 9.268284494772829e-07, | |
| "loss": 1.0686640739440918, | |
| "step": 520, | |
| "token_acc": 0.6929240558534142 | |
| }, | |
| { | |
| "epoch": 0.17608586282072783, | |
| "grad_norm": 5.875, | |
| "learning_rate": 9.25450758606031e-07, | |
| "loss": 1.068457794189453, | |
| "step": 525, | |
| "token_acc": 0.6926385165326184 | |
| }, | |
| { | |
| "epoch": 0.1777628710380681, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 9.24061262550414e-07, | |
| "loss": 1.0507415771484374, | |
| "step": 530, | |
| "token_acc": 0.6969854770583811 | |
| }, | |
| { | |
| "epoch": 0.17943987925540836, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 9.226599998654391e-07, | |
| "loss": 1.0741724967956543, | |
| "step": 535, | |
| "token_acc": 0.6954720153735287 | |
| }, | |
| { | |
| "epoch": 0.18111688747274862, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 9.212470094326079e-07, | |
| "loss": 1.0903028488159179, | |
| "step": 540, | |
| "token_acc": 0.684026406364611 | |
| }, | |
| { | |
| "epoch": 0.1827938956900889, | |
| "grad_norm": 6.59375, | |
| "learning_rate": 9.198223304588374e-07, | |
| "loss": 1.0722553253173828, | |
| "step": 545, | |
| "token_acc": 0.691402934400388 | |
| }, | |
| { | |
| "epoch": 0.18447090390742915, | |
| "grad_norm": 5.625, | |
| "learning_rate": 9.18386002475372e-07, | |
| "loss": 1.0605772972106933, | |
| "step": 550, | |
| "token_acc": 0.6977486740989285 | |
| }, | |
| { | |
| "epoch": 0.18614791212476942, | |
| "grad_norm": 6.625, | |
| "learning_rate": 9.169380653366869e-07, | |
| "loss": 1.077211570739746, | |
| "step": 555, | |
| "token_acc": 0.6934850863422292 | |
| }, | |
| { | |
| "epoch": 0.18782492034210968, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 9.154785592193819e-07, | |
| "loss": 1.0502148628234864, | |
| "step": 560, | |
| "token_acc": 0.6995120856934552 | |
| }, | |
| { | |
| "epoch": 0.18950192855944994, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 9.140075246210665e-07, | |
| "loss": 1.0825450897216797, | |
| "step": 565, | |
| "token_acc": 0.6873532947844507 | |
| }, | |
| { | |
| "epoch": 0.1911789367767902, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 9.125250023592371e-07, | |
| "loss": 1.0839460372924805, | |
| "step": 570, | |
| "token_acc": 0.6894161870081053 | |
| }, | |
| { | |
| "epoch": 0.19285594499413047, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 9.11031033570143e-07, | |
| "loss": 1.0788863182067872, | |
| "step": 575, | |
| "token_acc": 0.692129246064623 | |
| }, | |
| { | |
| "epoch": 0.19453295321147074, | |
| "grad_norm": 6.625, | |
| "learning_rate": 9.095256597076464e-07, | |
| "loss": 1.0854562759399413, | |
| "step": 580, | |
| "token_acc": 0.6882365102336159 | |
| }, | |
| { | |
| "epoch": 0.196209961428811, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 9.08008922542071e-07, | |
| "loss": 1.0595365524291993, | |
| "step": 585, | |
| "token_acc": 0.6933667083854819 | |
| }, | |
| { | |
| "epoch": 0.19788696964615127, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 9.064808641590438e-07, | |
| "loss": 1.0630743026733398, | |
| "step": 590, | |
| "token_acc": 0.6950098315353138 | |
| }, | |
| { | |
| "epoch": 0.19956397786349153, | |
| "grad_norm": 5.875, | |
| "learning_rate": 9.049415269583267e-07, | |
| "loss": 1.0720837593078614, | |
| "step": 595, | |
| "token_acc": 0.6954955895401639 | |
| }, | |
| { | |
| "epoch": 0.2012409860808318, | |
| "grad_norm": 6.125, | |
| "learning_rate": 9.033909536526405e-07, | |
| "loss": 1.1050517082214355, | |
| "step": 600, | |
| "token_acc": 0.6858130348913759 | |
| }, | |
| { | |
| "epoch": 0.20291799429817206, | |
| "grad_norm": 6.125, | |
| "learning_rate": 9.018291872664796e-07, | |
| "loss": 1.088867473602295, | |
| "step": 605, | |
| "token_acc": 0.6960822450680745 | |
| }, | |
| { | |
| "epoch": 0.20459500251551233, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 9.00256271134918e-07, | |
| "loss": 1.059322452545166, | |
| "step": 610, | |
| "token_acc": 0.6967294969417052 | |
| }, | |
| { | |
| "epoch": 0.2062720107328526, | |
| "grad_norm": 5.875, | |
| "learning_rate": 8.98672248902407e-07, | |
| "loss": 1.0721155166625977, | |
| "step": 615, | |
| "token_acc": 0.6950203894089944 | |
| }, | |
| { | |
| "epoch": 0.20794901895019285, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 8.970771645215643e-07, | |
| "loss": 1.0299058914184571, | |
| "step": 620, | |
| "token_acc": 0.6998539161703562 | |
| }, | |
| { | |
| "epoch": 0.20962602716753312, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 8.95471062251954e-07, | |
| "loss": 1.0335143089294434, | |
| "step": 625, | |
| "token_acc": 0.7026568991909031 | |
| }, | |
| { | |
| "epoch": 0.21130303538487338, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 8.938539866588592e-07, | |
| "loss": 1.0810824394226075, | |
| "step": 630, | |
| "token_acc": 0.6905426180037579 | |
| }, | |
| { | |
| "epoch": 0.21298004360221365, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 8.922259826120444e-07, | |
| "loss": 1.0932263374328612, | |
| "step": 635, | |
| "token_acc": 0.6908346311357213 | |
| }, | |
| { | |
| "epoch": 0.2146570518195539, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 8.905870952845118e-07, | |
| "loss": 1.0775763511657714, | |
| "step": 640, | |
| "token_acc": 0.6908716005349977 | |
| }, | |
| { | |
| "epoch": 0.21633406003689418, | |
| "grad_norm": 6.5, | |
| "learning_rate": 8.889373701512468e-07, | |
| "loss": 1.0795653343200684, | |
| "step": 645, | |
| "token_acc": 0.6852801061327866 | |
| }, | |
| { | |
| "epoch": 0.21801106825423444, | |
| "grad_norm": 6.25, | |
| "learning_rate": 8.872768529879564e-07, | |
| "loss": 1.021756649017334, | |
| "step": 650, | |
| "token_acc": 0.70451876436048 | |
| }, | |
| { | |
| "epoch": 0.2196880764715747, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 8.856055898697997e-07, | |
| "loss": 1.1029382705688477, | |
| "step": 655, | |
| "token_acc": 0.6815459687257402 | |
| }, | |
| { | |
| "epoch": 0.22136508468891497, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 8.839236271701082e-07, | |
| "loss": 1.0495551109313965, | |
| "step": 660, | |
| "token_acc": 0.6949602122015915 | |
| }, | |
| { | |
| "epoch": 0.22304209290625523, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 8.822310115591007e-07, | |
| "loss": 1.0997918128967286, | |
| "step": 665, | |
| "token_acc": 0.6854550609583215 | |
| }, | |
| { | |
| "epoch": 0.2247191011235955, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 8.805277900025863e-07, | |
| "loss": 1.0630650520324707, | |
| "step": 670, | |
| "token_acc": 0.6942992874109264 | |
| }, | |
| { | |
| "epoch": 0.22639610934093576, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 8.788140097606631e-07, | |
| "loss": 1.0519957542419434, | |
| "step": 675, | |
| "token_acc": 0.6950943822764779 | |
| }, | |
| { | |
| "epoch": 0.22807311755827603, | |
| "grad_norm": 6.0, | |
| "learning_rate": 8.770897183864059e-07, | |
| "loss": 1.1048961639404298, | |
| "step": 680, | |
| "token_acc": 0.6844065077910174 | |
| }, | |
| { | |
| "epoch": 0.2297501257756163, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 8.753549637245467e-07, | |
| "loss": 1.0654611587524414, | |
| "step": 685, | |
| "token_acc": 0.6934993924665857 | |
| }, | |
| { | |
| "epoch": 0.23142713399295656, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 8.736097939101476e-07, | |
| "loss": 1.0599603652954102, | |
| "step": 690, | |
| "token_acc": 0.6970522268503685 | |
| }, | |
| { | |
| "epoch": 0.23310414221029682, | |
| "grad_norm": 6.875, | |
| "learning_rate": 8.718542573672644e-07, | |
| "loss": 1.1132248878479003, | |
| "step": 695, | |
| "token_acc": 0.6831187846989202 | |
| }, | |
| { | |
| "epoch": 0.2347811504276371, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 8.700884028076041e-07, | |
| "loss": 1.0341250419616699, | |
| "step": 700, | |
| "token_acc": 0.6999466413707239 | |
| }, | |
| { | |
| "epoch": 0.23645815864497735, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 8.683122792291719e-07, | |
| "loss": 1.0390054702758789, | |
| "step": 705, | |
| "token_acc": 0.6990116801437556 | |
| }, | |
| { | |
| "epoch": 0.23813516686231762, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 8.66525935914913e-07, | |
| "loss": 1.0598421096801758, | |
| "step": 710, | |
| "token_acc": 0.6982547993019197 | |
| }, | |
| { | |
| "epoch": 0.23981217507965788, | |
| "grad_norm": 5.625, | |
| "learning_rate": 8.647294224313442e-07, | |
| "loss": 1.0474308967590331, | |
| "step": 715, | |
| "token_acc": 0.6987998238273508 | |
| }, | |
| { | |
| "epoch": 0.24148918329699814, | |
| "grad_norm": 6.125, | |
| "learning_rate": 8.629227886271786e-07, | |
| "loss": 1.0467673301696778, | |
| "step": 720, | |
| "token_acc": 0.6986263736263736 | |
| }, | |
| { | |
| "epoch": 0.2431661915143384, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 8.611060846319431e-07, | |
| "loss": 1.1083699226379395, | |
| "step": 725, | |
| "token_acc": 0.6849483810417645 | |
| }, | |
| { | |
| "epoch": 0.24484319973167867, | |
| "grad_norm": 5.625, | |
| "learning_rate": 8.592793608545863e-07, | |
| "loss": 1.0226441383361817, | |
| "step": 730, | |
| "token_acc": 0.6998483025579327 | |
| }, | |
| { | |
| "epoch": 0.24652020794901894, | |
| "grad_norm": 6.375, | |
| "learning_rate": 8.574426679820813e-07, | |
| "loss": 1.0406004905700683, | |
| "step": 735, | |
| "token_acc": 0.6985557481224726 | |
| }, | |
| { | |
| "epoch": 0.2481972161663592, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 8.555960569780176e-07, | |
| "loss": 1.056182861328125, | |
| "step": 740, | |
| "token_acc": 0.6949429037520392 | |
| }, | |
| { | |
| "epoch": 0.24987422438369947, | |
| "grad_norm": 6.25, | |
| "learning_rate": 8.537395790811885e-07, | |
| "loss": 1.0577526092529297, | |
| "step": 745, | |
| "token_acc": 0.6974981448107707 | |
| }, | |
| { | |
| "epoch": 0.25155123260103973, | |
| "grad_norm": 5.625, | |
| "learning_rate": 8.518732858041684e-07, | |
| "loss": 1.031444263458252, | |
| "step": 750, | |
| "token_acc": 0.7030166435506241 | |
| }, | |
| { | |
| "epoch": 0.25322824081838, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 8.499972289318835e-07, | |
| "loss": 1.0735219955444335, | |
| "step": 755, | |
| "token_acc": 0.6897931192136244 | |
| }, | |
| { | |
| "epoch": 0.25490524903572026, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 8.481114605201754e-07, | |
| "loss": 1.0631572723388671, | |
| "step": 760, | |
| "token_acc": 0.6946527350952674 | |
| }, | |
| { | |
| "epoch": 0.25658225725306055, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 8.462160328943563e-07, | |
| "loss": 1.0816566467285156, | |
| "step": 765, | |
| "token_acc": 0.6897627597574616 | |
| }, | |
| { | |
| "epoch": 0.2582592654704008, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 8.443109986477572e-07, | |
| "loss": 1.067441463470459, | |
| "step": 770, | |
| "token_acc": 0.6974284888760474 | |
| }, | |
| { | |
| "epoch": 0.2599362736877411, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 8.423964106402686e-07, | |
| "loss": 1.0774710655212403, | |
| "step": 775, | |
| "token_acc": 0.6951841988459831 | |
| }, | |
| { | |
| "epoch": 0.2616132819050813, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 8.404723219968735e-07, | |
| "loss": 1.040436363220215, | |
| "step": 780, | |
| "token_acc": 0.7035983263598327 | |
| }, | |
| { | |
| "epoch": 0.2632902901224216, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 8.385387861061741e-07, | |
| "loss": 1.0534331321716308, | |
| "step": 785, | |
| "token_acc": 0.69294556654623 | |
| }, | |
| { | |
| "epoch": 0.26496729833976185, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 8.365958566189093e-07, | |
| "loss": 1.0408141136169433, | |
| "step": 790, | |
| "token_acc": 0.6980718161875072 | |
| }, | |
| { | |
| "epoch": 0.26664430655710214, | |
| "grad_norm": 6.125, | |
| "learning_rate": 8.346435874464669e-07, | |
| "loss": 1.0549689292907716, | |
| "step": 795, | |
| "token_acc": 0.697240352685217 | |
| }, | |
| { | |
| "epoch": 0.2683213147744424, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 8.326820327593874e-07, | |
| "loss": 1.0545565605163574, | |
| "step": 800, | |
| "token_acc": 0.6958881578947368 | |
| }, | |
| { | |
| "epoch": 0.26999832299178267, | |
| "grad_norm": 6.125, | |
| "learning_rate": 8.307112469858608e-07, | |
| "loss": 1.0242762565612793, | |
| "step": 805, | |
| "token_acc": 0.7026535164964189 | |
| }, | |
| { | |
| "epoch": 0.2716753312091229, | |
| "grad_norm": 5.28125, | |
| "learning_rate": 8.287312848102162e-07, | |
| "loss": 1.0034321784973144, | |
| "step": 810, | |
| "token_acc": 0.7086971121558092 | |
| }, | |
| { | |
| "epoch": 0.2733523394264632, | |
| "grad_norm": 6.75, | |
| "learning_rate": 8.267422011714052e-07, | |
| "loss": 1.06221284866333, | |
| "step": 815, | |
| "token_acc": 0.6891465953018139 | |
| }, | |
| { | |
| "epoch": 0.27502934764380343, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 8.247440512614767e-07, | |
| "loss": 1.0829678535461427, | |
| "step": 820, | |
| "token_acc": 0.6906236178681999 | |
| }, | |
| { | |
| "epoch": 0.2767063558611437, | |
| "grad_norm": 6.0, | |
| "learning_rate": 8.227368905240455e-07, | |
| "loss": 1.0397522926330567, | |
| "step": 825, | |
| "token_acc": 0.6986963959181266 | |
| }, | |
| { | |
| "epoch": 0.27838336407848396, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 8.207207746527545e-07, | |
| "loss": 1.062535858154297, | |
| "step": 830, | |
| "token_acc": 0.6948282453170226 | |
| }, | |
| { | |
| "epoch": 0.28006037229582426, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 8.186957595897287e-07, | |
| "loss": 1.0378836631774901, | |
| "step": 835, | |
| "token_acc": 0.7008086253369272 | |
| }, | |
| { | |
| "epoch": 0.2817373805131645, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 8.166619015240235e-07, | |
| "loss": 1.0530453681945802, | |
| "step": 840, | |
| "token_acc": 0.6956012711155711 | |
| }, | |
| { | |
| "epoch": 0.2834143887305048, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 8.146192568900649e-07, | |
| "loss": 1.1169618606567382, | |
| "step": 845, | |
| "token_acc": 0.6829694849837172 | |
| }, | |
| { | |
| "epoch": 0.285091396947845, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 8.125678823660842e-07, | |
| "loss": 1.0521310806274413, | |
| "step": 850, | |
| "token_acc": 0.6975178316690442 | |
| }, | |
| { | |
| "epoch": 0.2867684051651853, | |
| "grad_norm": 5.75, | |
| "learning_rate": 8.105078348725454e-07, | |
| "loss": 1.0460372924804688, | |
| "step": 855, | |
| "token_acc": 0.6969783842669571 | |
| }, | |
| { | |
| "epoch": 0.28844541338252555, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 8.084391715705647e-07, | |
| "loss": 1.0443012237548828, | |
| "step": 860, | |
| "token_acc": 0.697248736664795 | |
| }, | |
| { | |
| "epoch": 0.29012242159986584, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 8.06361949860326e-07, | |
| "loss": 1.0336613655090332, | |
| "step": 865, | |
| "token_acc": 0.6974640522875817 | |
| }, | |
| { | |
| "epoch": 0.2917994298172061, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 8.042762273794872e-07, | |
| "loss": 1.067410945892334, | |
| "step": 870, | |
| "token_acc": 0.691785183612821 | |
| }, | |
| { | |
| "epoch": 0.29347643803454637, | |
| "grad_norm": 5.875, | |
| "learning_rate": 8.021820620015812e-07, | |
| "loss": 1.1038573265075684, | |
| "step": 875, | |
| "token_acc": 0.6870427572752398 | |
| }, | |
| { | |
| "epoch": 0.2951534462518866, | |
| "grad_norm": 5.875, | |
| "learning_rate": 8.000795118344093e-07, | |
| "loss": 1.0380253791809082, | |
| "step": 880, | |
| "token_acc": 0.7000665041010863 | |
| }, | |
| { | |
| "epoch": 0.2968304544692269, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 7.979686352184306e-07, | |
| "loss": 1.0550785064697266, | |
| "step": 885, | |
| "token_acc": 0.6930515759312321 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 6.9375, | |
| "learning_rate": 7.958494907251414e-07, | |
| "loss": 1.074232292175293, | |
| "step": 890, | |
| "token_acc": 0.6983671171171171 | |
| }, | |
| { | |
| "epoch": 0.30018447090390743, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 7.937221371554512e-07, | |
| "loss": 1.0676633834838867, | |
| "step": 895, | |
| "token_acc": 0.6919191919191919 | |
| }, | |
| { | |
| "epoch": 0.30186147912124767, | |
| "grad_norm": 6.125, | |
| "learning_rate": 7.915866335380499e-07, | |
| "loss": 1.0565213203430175, | |
| "step": 900, | |
| "token_acc": 0.6963085336675402 | |
| }, | |
| { | |
| "epoch": 0.30353848733858796, | |
| "grad_norm": 6.5, | |
| "learning_rate": 7.894430391277713e-07, | |
| "loss": 1.0706295013427733, | |
| "step": 905, | |
| "token_acc": 0.689251808318264 | |
| }, | |
| { | |
| "epoch": 0.30521549555592825, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 7.872914134039484e-07, | |
| "loss": 1.0703039169311523, | |
| "step": 910, | |
| "token_acc": 0.6952719476416159 | |
| }, | |
| { | |
| "epoch": 0.3068925037732685, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 7.851318160687624e-07, | |
| "loss": 1.015502643585205, | |
| "step": 915, | |
| "token_acc": 0.704557514297506 | |
| }, | |
| { | |
| "epoch": 0.3085695119906088, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 7.829643070455864e-07, | |
| "loss": 1.0821684837341308, | |
| "step": 920, | |
| "token_acc": 0.6846540956769539 | |
| }, | |
| { | |
| "epoch": 0.310246520207949, | |
| "grad_norm": 6.25, | |
| "learning_rate": 7.807889464773237e-07, | |
| "loss": 1.041159725189209, | |
| "step": 925, | |
| "token_acc": 0.6964295508144573 | |
| }, | |
| { | |
| "epoch": 0.3119235284252893, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 7.786057947247375e-07, | |
| "loss": 1.0455968856811524, | |
| "step": 930, | |
| "token_acc": 0.6932714357626265 | |
| }, | |
| { | |
| "epoch": 0.31360053664262955, | |
| "grad_norm": 5.75, | |
| "learning_rate": 7.764149123647769e-07, | |
| "loss": 1.0698083877563476, | |
| "step": 935, | |
| "token_acc": 0.6874304163883322 | |
| }, | |
| { | |
| "epoch": 0.31527754485996984, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 7.742163601888958e-07, | |
| "loss": 1.0722060203552246, | |
| "step": 940, | |
| "token_acc": 0.6909824258138865 | |
| }, | |
| { | |
| "epoch": 0.3169545530773101, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 7.720101992013661e-07, | |
| "loss": 1.0373089790344239, | |
| "step": 945, | |
| "token_acc": 0.6987164794865918 | |
| }, | |
| { | |
| "epoch": 0.31863156129465037, | |
| "grad_norm": 6.0, | |
| "learning_rate": 7.69796490617585e-07, | |
| "loss": 1.0665985107421876, | |
| "step": 950, | |
| "token_acc": 0.6956569970602917 | |
| }, | |
| { | |
| "epoch": 0.3203085695119906, | |
| "grad_norm": 6.125, | |
| "learning_rate": 7.675752958623767e-07, | |
| "loss": 1.085744857788086, | |
| "step": 955, | |
| "token_acc": 0.6910532531068998 | |
| }, | |
| { | |
| "epoch": 0.3219855777293309, | |
| "grad_norm": 6.375, | |
| "learning_rate": 7.653466765682872e-07, | |
| "loss": 1.0077353477478028, | |
| "step": 960, | |
| "token_acc": 0.7070019610104972 | |
| }, | |
| { | |
| "epoch": 0.32366258594667113, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 7.631106945738754e-07, | |
| "loss": 1.0229363441467285, | |
| "step": 965, | |
| "token_acc": 0.7041835357624832 | |
| }, | |
| { | |
| "epoch": 0.3253395941640114, | |
| "grad_norm": 6.0, | |
| "learning_rate": 7.60867411921996e-07, | |
| "loss": 1.047335720062256, | |
| "step": 970, | |
| "token_acc": 0.6972093800479029 | |
| }, | |
| { | |
| "epoch": 0.32701660238135166, | |
| "grad_norm": 6.0, | |
| "learning_rate": 7.586168908580789e-07, | |
| "loss": 1.0534196853637696, | |
| "step": 975, | |
| "token_acc": 0.6946582691859109 | |
| }, | |
| { | |
| "epoch": 0.32869361059869195, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 7.56359193828401e-07, | |
| "loss": 1.0379526138305664, | |
| "step": 980, | |
| "token_acc": 0.6913040802510924 | |
| }, | |
| { | |
| "epoch": 0.3303706188160322, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 7.54094383478355e-07, | |
| "loss": 1.0085951805114746, | |
| "step": 985, | |
| "token_acc": 0.7015451501086892 | |
| }, | |
| { | |
| "epoch": 0.3320476270333725, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 7.5182252265071e-07, | |
| "loss": 1.0620606422424317, | |
| "step": 990, | |
| "token_acc": 0.6930627550457704 | |
| }, | |
| { | |
| "epoch": 0.3337246352507127, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 7.495436743838677e-07, | |
| "loss": 1.0689016342163087, | |
| "step": 995, | |
| "token_acc": 0.6921185150486006 | |
| }, | |
| { | |
| "epoch": 0.335401643468053, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 7.472579019101136e-07, | |
| "loss": 1.023653221130371, | |
| "step": 1000, | |
| "token_acc": 0.7040666891968007 | |
| }, | |
| { | |
| "epoch": 0.33707865168539325, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 7.449652686538632e-07, | |
| "loss": 1.0605965614318849, | |
| "step": 1005, | |
| "token_acc": 0.6947503671071953 | |
| }, | |
| { | |
| "epoch": 0.33875565990273354, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 7.426658382299005e-07, | |
| "loss": 1.0809215545654296, | |
| "step": 1010, | |
| "token_acc": 0.6894533536228491 | |
| }, | |
| { | |
| "epoch": 0.3404326681200738, | |
| "grad_norm": 6.0, | |
| "learning_rate": 7.40359674441614e-07, | |
| "loss": 1.0782832145690917, | |
| "step": 1015, | |
| "token_acc": 0.688503381358424 | |
| }, | |
| { | |
| "epoch": 0.34210967633741407, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 7.380468412792267e-07, | |
| "loss": 1.0309969902038574, | |
| "step": 1020, | |
| "token_acc": 0.699964174826845 | |
| }, | |
| { | |
| "epoch": 0.3437866845547543, | |
| "grad_norm": 5.875, | |
| "learning_rate": 7.357274029180191e-07, | |
| "loss": 1.0572206497192382, | |
| "step": 1025, | |
| "token_acc": 0.6972423339011925 | |
| }, | |
| { | |
| "epoch": 0.3454636927720946, | |
| "grad_norm": 6.25, | |
| "learning_rate": 7.334014237165493e-07, | |
| "loss": 1.0772000312805177, | |
| "step": 1030, | |
| "token_acc": 0.6925978312116926 | |
| }, | |
| { | |
| "epoch": 0.34714070098943484, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 7.310689682148679e-07, | |
| "loss": 1.0255406379699707, | |
| "step": 1035, | |
| "token_acc": 0.7015232377140316 | |
| }, | |
| { | |
| "epoch": 0.34881770920677513, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 7.287301011327257e-07, | |
| "loss": 1.0338263511657715, | |
| "step": 1040, | |
| "token_acc": 0.6993613405988164 | |
| }, | |
| { | |
| "epoch": 0.35049471742411537, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 7.263848873677793e-07, | |
| "loss": 1.0368549346923828, | |
| "step": 1045, | |
| "token_acc": 0.7006442705936493 | |
| }, | |
| { | |
| "epoch": 0.35217172564145566, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 7.240333919937892e-07, | |
| "loss": 1.0463068962097168, | |
| "step": 1050, | |
| "token_acc": 0.6975750577367206 | |
| }, | |
| { | |
| "epoch": 0.3538487338587959, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 7.216756802588151e-07, | |
| "loss": 1.0312828063964843, | |
| "step": 1055, | |
| "token_acc": 0.7036994812745825 | |
| }, | |
| { | |
| "epoch": 0.3555257420761362, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 7.193118175834047e-07, | |
| "loss": 1.0586414337158203, | |
| "step": 1060, | |
| "token_acc": 0.6946736263119493 | |
| }, | |
| { | |
| "epoch": 0.3572027502934764, | |
| "grad_norm": 5.875, | |
| "learning_rate": 7.16941869558779e-07, | |
| "loss": 1.0383371353149413, | |
| "step": 1065, | |
| "token_acc": 0.6991913746630728 | |
| }, | |
| { | |
| "epoch": 0.3588797585108167, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 7.145659019450121e-07, | |
| "loss": 1.0529624938964843, | |
| "step": 1070, | |
| "token_acc": 0.6956057007125891 | |
| }, | |
| { | |
| "epoch": 0.36055676672815695, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 7.121839806692062e-07, | |
| "loss": 1.0467044830322265, | |
| "step": 1075, | |
| "token_acc": 0.6972133195719485 | |
| }, | |
| { | |
| "epoch": 0.36223377494549724, | |
| "grad_norm": 11.0, | |
| "learning_rate": 7.097961718236628e-07, | |
| "loss": 1.0414490699768066, | |
| "step": 1080, | |
| "token_acc": 0.6999028071579669 | |
| }, | |
| { | |
| "epoch": 0.3639107831628375, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 7.074025416640487e-07, | |
| "loss": 1.0076875686645508, | |
| "step": 1085, | |
| "token_acc": 0.7048360846314811 | |
| }, | |
| { | |
| "epoch": 0.3655877913801778, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 7.050031566075573e-07, | |
| "loss": 1.0238298416137694, | |
| "step": 1090, | |
| "token_acc": 0.7038746377589353 | |
| }, | |
| { | |
| "epoch": 0.367264799597518, | |
| "grad_norm": 5.5, | |
| "learning_rate": 7.025980832310658e-07, | |
| "loss": 1.03941650390625, | |
| "step": 1095, | |
| "token_acc": 0.7002581015199312 | |
| }, | |
| { | |
| "epoch": 0.3689418078148583, | |
| "grad_norm": 6.125, | |
| "learning_rate": 7.001873882692883e-07, | |
| "loss": 1.0348270416259766, | |
| "step": 1100, | |
| "token_acc": 0.6984748930013864 | |
| }, | |
| { | |
| "epoch": 0.37061881603219854, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 6.977711386129232e-07, | |
| "loss": 1.0247929573059082, | |
| "step": 1105, | |
| "token_acc": 0.7004997620180866 | |
| }, | |
| { | |
| "epoch": 0.37229582424953883, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 6.953494013067978e-07, | |
| "loss": 1.0276754379272461, | |
| "step": 1110, | |
| "token_acc": 0.6988610216290843 | |
| }, | |
| { | |
| "epoch": 0.37397283246687907, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 6.929222435480082e-07, | |
| "loss": 1.0176216125488282, | |
| "step": 1115, | |
| "token_acc": 0.7015344002639828 | |
| }, | |
| { | |
| "epoch": 0.37564984068421936, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 6.904897326840537e-07, | |
| "loss": 1.0373910903930663, | |
| "step": 1120, | |
| "token_acc": 0.7015279241306639 | |
| }, | |
| { | |
| "epoch": 0.3773268489015596, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 6.880519362109694e-07, | |
| "loss": 1.021230125427246, | |
| "step": 1125, | |
| "token_acc": 0.7006535947712418 | |
| }, | |
| { | |
| "epoch": 0.3790038571188999, | |
| "grad_norm": 6.125, | |
| "learning_rate": 6.856089217714521e-07, | |
| "loss": 1.0548656463623047, | |
| "step": 1130, | |
| "token_acc": 0.6955671821997356 | |
| }, | |
| { | |
| "epoch": 0.3806808653362401, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 6.831607571529849e-07, | |
| "loss": 1.0167530059814454, | |
| "step": 1135, | |
| "token_acc": 0.7032388210243192 | |
| }, | |
| { | |
| "epoch": 0.3823578735535804, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 6.807075102859542e-07, | |
| "loss": 1.0101150512695312, | |
| "step": 1140, | |
| "token_acc": 0.6998482017748715 | |
| }, | |
| { | |
| "epoch": 0.38403488177092066, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 6.78249249241767e-07, | |
| "loss": 1.026358699798584, | |
| "step": 1145, | |
| "token_acc": 0.7042928742645457 | |
| }, | |
| { | |
| "epoch": 0.38571188998826095, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 6.757860422309603e-07, | |
| "loss": 1.0511194229125977, | |
| "step": 1150, | |
| "token_acc": 0.6920869208692086 | |
| }, | |
| { | |
| "epoch": 0.3873888982056012, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 6.733179576013097e-07, | |
| "loss": 1.036262321472168, | |
| "step": 1155, | |
| "token_acc": 0.6977180902520913 | |
| }, | |
| { | |
| "epoch": 0.3890659064229415, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 6.70845063835932e-07, | |
| "loss": 1.0658405303955079, | |
| "step": 1160, | |
| "token_acc": 0.6915908343383401 | |
| }, | |
| { | |
| "epoch": 0.3907429146402817, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 6.683674295513858e-07, | |
| "loss": 1.0385713577270508, | |
| "step": 1165, | |
| "token_acc": 0.6958403046212427 | |
| }, | |
| { | |
| "epoch": 0.392419922857622, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 6.658851234957669e-07, | |
| "loss": 1.002643871307373, | |
| "step": 1170, | |
| "token_acc": 0.7057808455565142 | |
| }, | |
| { | |
| "epoch": 0.39409693107496224, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 6.633982145468008e-07, | |
| "loss": 1.0385595321655274, | |
| "step": 1175, | |
| "token_acc": 0.7010718539102818 | |
| }, | |
| { | |
| "epoch": 0.39577393929230253, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 6.609067717099319e-07, | |
| "loss": 1.0319636344909668, | |
| "step": 1180, | |
| "token_acc": 0.7033253524873636 | |
| }, | |
| { | |
| "epoch": 0.39745094750964277, | |
| "grad_norm": 5.75, | |
| "learning_rate": 6.584108641164086e-07, | |
| "loss": 1.0643960952758789, | |
| "step": 1185, | |
| "token_acc": 0.6947075840943899 | |
| }, | |
| { | |
| "epoch": 0.39912795572698306, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 6.559105610213648e-07, | |
| "loss": 0.9819327354431152, | |
| "step": 1190, | |
| "token_acc": 0.7112614578786556 | |
| }, | |
| { | |
| "epoch": 0.4008049639443233, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 6.534059318018988e-07, | |
| "loss": 0.9903202056884766, | |
| "step": 1195, | |
| "token_acc": 0.7077391904323828 | |
| }, | |
| { | |
| "epoch": 0.4024819721616636, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 6.50897045955147e-07, | |
| "loss": 1.0295546531677247, | |
| "step": 1200, | |
| "token_acc": 0.7006684562345387 | |
| }, | |
| { | |
| "epoch": 0.4041589803790039, | |
| "grad_norm": 5.75, | |
| "learning_rate": 6.48383973096358e-07, | |
| "loss": 1.0131707191467285, | |
| "step": 1205, | |
| "token_acc": 0.7029982575256865 | |
| }, | |
| { | |
| "epoch": 0.4058359885963441, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 6.458667829569582e-07, | |
| "loss": 1.034630012512207, | |
| "step": 1210, | |
| "token_acc": 0.697005394010788 | |
| }, | |
| { | |
| "epoch": 0.4075129968136844, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 6.433455453826186e-07, | |
| "loss": 1.011972713470459, | |
| "step": 1215, | |
| "token_acc": 0.7073240730111353 | |
| }, | |
| { | |
| "epoch": 0.40919000503102465, | |
| "grad_norm": 7.28125, | |
| "learning_rate": 6.408203303313161e-07, | |
| "loss": 1.0427475929260255, | |
| "step": 1220, | |
| "token_acc": 0.6974992467610727 | |
| }, | |
| { | |
| "epoch": 0.41086701324836494, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 6.382912078713929e-07, | |
| "loss": 1.030358123779297, | |
| "step": 1225, | |
| "token_acc": 0.7000523560209424 | |
| }, | |
| { | |
| "epoch": 0.4125440214657052, | |
| "grad_norm": 5.75, | |
| "learning_rate": 6.357582481796113e-07, | |
| "loss": 1.0345232009887695, | |
| "step": 1230, | |
| "token_acc": 0.7003829691371931 | |
| }, | |
| { | |
| "epoch": 0.41422102968304547, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 6.332215215392079e-07, | |
| "loss": 1.0514300346374512, | |
| "step": 1235, | |
| "token_acc": 0.6924103419516263 | |
| }, | |
| { | |
| "epoch": 0.4158980379003857, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 6.306810983379418e-07, | |
| "loss": 1.0526814460754395, | |
| "step": 1240, | |
| "token_acc": 0.699214806505889 | |
| }, | |
| { | |
| "epoch": 0.417575046117726, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 6.281370490661432e-07, | |
| "loss": 1.024818229675293, | |
| "step": 1245, | |
| "token_acc": 0.7030024647098364 | |
| }, | |
| { | |
| "epoch": 0.41925205433506624, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 6.255894443147557e-07, | |
| "loss": 1.075798797607422, | |
| "step": 1250, | |
| "token_acc": 0.6899942163100058 | |
| }, | |
| { | |
| "epoch": 0.42092906255240653, | |
| "grad_norm": 5.875, | |
| "learning_rate": 6.230383547733792e-07, | |
| "loss": 1.0811284065246582, | |
| "step": 1255, | |
| "token_acc": 0.6883433394757178 | |
| }, | |
| { | |
| "epoch": 0.42260607076974677, | |
| "grad_norm": 6.375, | |
| "learning_rate": 6.204838512283071e-07, | |
| "loss": 1.0544434547424317, | |
| "step": 1260, | |
| "token_acc": 0.6923693399736677 | |
| }, | |
| { | |
| "epoch": 0.42428307898708706, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 6.179260045605637e-07, | |
| "loss": 1.0577333450317383, | |
| "step": 1265, | |
| "token_acc": 0.6935493235789352 | |
| }, | |
| { | |
| "epoch": 0.4259600872044273, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 6.153648857439352e-07, | |
| "loss": 1.040918731689453, | |
| "step": 1270, | |
| "token_acc": 0.6961138989380324 | |
| }, | |
| { | |
| "epoch": 0.4276370954217676, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 6.128005658430028e-07, | |
| "loss": 1.0624547958374024, | |
| "step": 1275, | |
| "token_acc": 0.6919805920514638 | |
| }, | |
| { | |
| "epoch": 0.4293141036391078, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 6.102331160111692e-07, | |
| "loss": 1.0875247955322265, | |
| "step": 1280, | |
| "token_acc": 0.6913573152511205 | |
| }, | |
| { | |
| "epoch": 0.4309911118564481, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 6.076626074886853e-07, | |
| "loss": 1.016362476348877, | |
| "step": 1285, | |
| "token_acc": 0.7094132926760405 | |
| }, | |
| { | |
| "epoch": 0.43266812007378835, | |
| "grad_norm": 5.75, | |
| "learning_rate": 6.050891116006718e-07, | |
| "loss": 1.0268930435180663, | |
| "step": 1290, | |
| "token_acc": 0.6988262395787627 | |
| }, | |
| { | |
| "epoch": 0.43434512829112865, | |
| "grad_norm": 6.0, | |
| "learning_rate": 6.025126997551426e-07, | |
| "loss": 1.0284778594970703, | |
| "step": 1295, | |
| "token_acc": 0.6979727193092229 | |
| }, | |
| { | |
| "epoch": 0.4360221365084689, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 5.99933443441021e-07, | |
| "loss": 1.0141260147094726, | |
| "step": 1300, | |
| "token_acc": 0.7011908851482881 | |
| }, | |
| { | |
| "epoch": 0.4376991447258092, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 5.973514142261579e-07, | |
| "loss": 1.0410663604736328, | |
| "step": 1305, | |
| "token_acc": 0.694919168591224 | |
| }, | |
| { | |
| "epoch": 0.4393761529431494, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 5.947666837553448e-07, | |
| "loss": 1.0731523513793946, | |
| "step": 1310, | |
| "token_acc": 0.6926234606294315 | |
| }, | |
| { | |
| "epoch": 0.4410531611604897, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 5.921793237483262e-07, | |
| "loss": 1.0419529914855956, | |
| "step": 1315, | |
| "token_acc": 0.6948214091846705 | |
| }, | |
| { | |
| "epoch": 0.44273016937782994, | |
| "grad_norm": 6.125, | |
| "learning_rate": 5.895894059978095e-07, | |
| "loss": 1.0798610687255858, | |
| "step": 1320, | |
| "token_acc": 0.6885527672739773 | |
| }, | |
| { | |
| "epoch": 0.44440717759517023, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 5.869970023674735e-07, | |
| "loss": 1.01141300201416, | |
| "step": 1325, | |
| "token_acc": 0.7033395392134048 | |
| }, | |
| { | |
| "epoch": 0.44608418581251047, | |
| "grad_norm": 6.375, | |
| "learning_rate": 5.844021847899734e-07, | |
| "loss": 1.0301786422729493, | |
| "step": 1330, | |
| "token_acc": 0.6992315482218383 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 5.818050252649458e-07, | |
| "loss": 1.0755172729492188, | |
| "step": 1335, | |
| "token_acc": 0.690394101734472 | |
| }, | |
| { | |
| "epoch": 0.449438202247191, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 5.792055958570098e-07, | |
| "loss": 1.0729934692382812, | |
| "step": 1340, | |
| "token_acc": 0.6900311526479751 | |
| }, | |
| { | |
| "epoch": 0.4511152104645313, | |
| "grad_norm": 6.375, | |
| "learning_rate": 5.766039686937687e-07, | |
| "loss": 1.0664525985717774, | |
| "step": 1345, | |
| "token_acc": 0.6915431560592851 | |
| }, | |
| { | |
| "epoch": 0.45279221868187153, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 5.740002159638072e-07, | |
| "loss": 1.018766498565674, | |
| "step": 1350, | |
| "token_acc": 0.7020927601809954 | |
| }, | |
| { | |
| "epoch": 0.4544692268992118, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 5.713944099146901e-07, | |
| "loss": 1.0389814376831055, | |
| "step": 1355, | |
| "token_acc": 0.6960518301124093 | |
| }, | |
| { | |
| "epoch": 0.45614623511655206, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 5.687866228509558e-07, | |
| "loss": 1.0340880393981933, | |
| "step": 1360, | |
| "token_acc": 0.7008528076198914 | |
| }, | |
| { | |
| "epoch": 0.45782324333389235, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 5.661769271321113e-07, | |
| "loss": 1.0076488494873046, | |
| "step": 1365, | |
| "token_acc": 0.7054647515271587 | |
| }, | |
| { | |
| "epoch": 0.4595002515512326, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 5.635653951706234e-07, | |
| "loss": 1.0600255012512207, | |
| "step": 1370, | |
| "token_acc": 0.6946650892907442 | |
| }, | |
| { | |
| "epoch": 0.4611772597685729, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 5.609520994299108e-07, | |
| "loss": 1.0570174217224122, | |
| "step": 1375, | |
| "token_acc": 0.6946865165819415 | |
| }, | |
| { | |
| "epoch": 0.4628542679859131, | |
| "grad_norm": 6.125, | |
| "learning_rate": 5.58337112422332e-07, | |
| "loss": 1.0049464225769043, | |
| "step": 1380, | |
| "token_acc": 0.7048176149219137 | |
| }, | |
| { | |
| "epoch": 0.4645312762032534, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 5.557205067071739e-07, | |
| "loss": 1.0521051406860351, | |
| "step": 1385, | |
| "token_acc": 0.694855340439417 | |
| }, | |
| { | |
| "epoch": 0.46620828442059364, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 5.531023548886391e-07, | |
| "loss": 1.0039688110351563, | |
| "step": 1390, | |
| "token_acc": 0.708057521113901 | |
| }, | |
| { | |
| "epoch": 0.46788529263793394, | |
| "grad_norm": 6.375, | |
| "learning_rate": 5.5048272961383e-07, | |
| "loss": 1.0771536827087402, | |
| "step": 1395, | |
| "token_acc": 0.6882698610012431 | |
| }, | |
| { | |
| "epoch": 0.4695623008552742, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 5.478617035707337e-07, | |
| "loss": 1.0812073707580567, | |
| "step": 1400, | |
| "token_acc": 0.6879506798040403 | |
| }, | |
| { | |
| "epoch": 0.47123930907261447, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 5.452393494862057e-07, | |
| "loss": 1.0342639923095702, | |
| "step": 1405, | |
| "token_acc": 0.7012715977480101 | |
| }, | |
| { | |
| "epoch": 0.4729163172899547, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 5.426157401239504e-07, | |
| "loss": 1.036158561706543, | |
| "step": 1410, | |
| "token_acc": 0.6999719495091165 | |
| }, | |
| { | |
| "epoch": 0.474593325507295, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 5.399909482825038e-07, | |
| "loss": 1.0552077293395996, | |
| "step": 1415, | |
| "token_acc": 0.6963589076723017 | |
| }, | |
| { | |
| "epoch": 0.47627033372463523, | |
| "grad_norm": 5.75, | |
| "learning_rate": 5.373650467932121e-07, | |
| "loss": 1.005418586730957, | |
| "step": 1420, | |
| "token_acc": 0.709333626325331 | |
| }, | |
| { | |
| "epoch": 0.4779473419419755, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 5.34738108518212e-07, | |
| "loss": 1.0156227111816407, | |
| "step": 1425, | |
| "token_acc": 0.7042805679169315 | |
| }, | |
| { | |
| "epoch": 0.47962435015931576, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 5.321102063484079e-07, | |
| "loss": 1.0417983055114746, | |
| "step": 1430, | |
| "token_acc": 0.695285768597136 | |
| }, | |
| { | |
| "epoch": 0.48130135837665605, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 5.294814132014503e-07, | |
| "loss": 1.0348029136657715, | |
| "step": 1435, | |
| "token_acc": 0.7010616188282873 | |
| }, | |
| { | |
| "epoch": 0.4829783665939963, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 5.268518020197113e-07, | |
| "loss": 1.020607852935791, | |
| "step": 1440, | |
| "token_acc": 0.7056887635166902 | |
| }, | |
| { | |
| "epoch": 0.4846553748113366, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 5.242214457682623e-07, | |
| "loss": 0.9982949256896972, | |
| "step": 1445, | |
| "token_acc": 0.708599000516974 | |
| }, | |
| { | |
| "epoch": 0.4863323830286768, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 5.21590417432848e-07, | |
| "loss": 1.0266719818115235, | |
| "step": 1450, | |
| "token_acc": 0.7015999140985719 | |
| }, | |
| { | |
| "epoch": 0.4880093912460171, | |
| "grad_norm": 6.125, | |
| "learning_rate": 5.18958790017862e-07, | |
| "loss": 1.0599438667297363, | |
| "step": 1455, | |
| "token_acc": 0.6961610486891385 | |
| }, | |
| { | |
| "epoch": 0.48968639946335735, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 5.163266365443202e-07, | |
| "loss": 1.0109405517578125, | |
| "step": 1460, | |
| "token_acc": 0.7010133211886599 | |
| }, | |
| { | |
| "epoch": 0.49136340768069764, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 5.136940300478363e-07, | |
| "loss": 1.052570915222168, | |
| "step": 1465, | |
| "token_acc": 0.6954032957502169 | |
| }, | |
| { | |
| "epoch": 0.4930404158980379, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 5.110610435765934e-07, | |
| "loss": 1.0087080955505372, | |
| "step": 1470, | |
| "token_acc": 0.7043160955584697 | |
| }, | |
| { | |
| "epoch": 0.49471742411537817, | |
| "grad_norm": 6.25, | |
| "learning_rate": 5.084277501893186e-07, | |
| "loss": 1.0556130409240723, | |
| "step": 1475, | |
| "token_acc": 0.6968740328071804 | |
| }, | |
| { | |
| "epoch": 0.4963944323327184, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 5.057942229532544e-07, | |
| "loss": 1.051294708251953, | |
| "step": 1480, | |
| "token_acc": 0.6979548409977538 | |
| }, | |
| { | |
| "epoch": 0.4980714405500587, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 5.031605349421327e-07, | |
| "loss": 1.0535630226135253, | |
| "step": 1485, | |
| "token_acc": 0.6958853013171699 | |
| }, | |
| { | |
| "epoch": 0.49974844876739893, | |
| "grad_norm": 16.25, | |
| "learning_rate": 5.00526759234146e-07, | |
| "loss": 1.072017765045166, | |
| "step": 1490, | |
| "token_acc": 0.6943414284005264 | |
| }, | |
| { | |
| "epoch": 0.5014254569847392, | |
| "grad_norm": 6.125, | |
| "learning_rate": 4.978929689099206e-07, | |
| "loss": 1.0403889656066894, | |
| "step": 1495, | |
| "token_acc": 0.7005302818866871 | |
| }, | |
| { | |
| "epoch": 0.5031024652020795, | |
| "grad_norm": 6.0, | |
| "learning_rate": 4.952592370504881e-07, | |
| "loss": 1.0573260307312011, | |
| "step": 1500, | |
| "token_acc": 0.6974754631816512 | |
| }, | |
| { | |
| "epoch": 0.5047794734194198, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 4.926256367352579e-07, | |
| "loss": 1.0464170455932618, | |
| "step": 1505, | |
| "token_acc": 0.6966352336048007 | |
| }, | |
| { | |
| "epoch": 0.50645648163676, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 4.899922410399895e-07, | |
| "loss": 1.0218128204345702, | |
| "step": 1510, | |
| "token_acc": 0.7029736237889903 | |
| }, | |
| { | |
| "epoch": 0.5081334898541003, | |
| "grad_norm": 6.90625, | |
| "learning_rate": 4.873591230347641e-07, | |
| "loss": 1.066216278076172, | |
| "step": 1515, | |
| "token_acc": 0.696458297457092 | |
| }, | |
| { | |
| "epoch": 0.5098104980714405, | |
| "grad_norm": 6.125, | |
| "learning_rate": 4.847263557819587e-07, | |
| "loss": 0.9884692192077636, | |
| "step": 1520, | |
| "token_acc": 0.7126423559185552 | |
| }, | |
| { | |
| "epoch": 0.5114875062887808, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 4.820940123342173e-07, | |
| "loss": 1.0081668853759767, | |
| "step": 1525, | |
| "token_acc": 0.7077791438470996 | |
| }, | |
| { | |
| "epoch": 0.5131645145061211, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 4.794621657324241e-07, | |
| "loss": 1.0023490905761718, | |
| "step": 1530, | |
| "token_acc": 0.7051523545706371 | |
| }, | |
| { | |
| "epoch": 0.5148415227234614, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 4.7683088900367767e-07, | |
| "loss": 1.0025731086730958, | |
| "step": 1535, | |
| "token_acc": 0.7035923141186299 | |
| }, | |
| { | |
| "epoch": 0.5165185309408016, | |
| "grad_norm": 6.0, | |
| "learning_rate": 4.7420025515926345e-07, | |
| "loss": 1.0486156463623046, | |
| "step": 1540, | |
| "token_acc": 0.6984244776801005 | |
| }, | |
| { | |
| "epoch": 0.5181955391581419, | |
| "grad_norm": 5.75, | |
| "learning_rate": 4.7157033719262894e-07, | |
| "loss": 1.0556530952453613, | |
| "step": 1545, | |
| "token_acc": 0.6891177739430544 | |
| }, | |
| { | |
| "epoch": 0.5198725473754822, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 4.6894120807735756e-07, | |
| "loss": 1.0409958839416504, | |
| "step": 1550, | |
| "token_acc": 0.6963211082093789 | |
| }, | |
| { | |
| "epoch": 0.5215495555928225, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 4.6631294076514426e-07, | |
| "loss": 1.0312573432922363, | |
| "step": 1555, | |
| "token_acc": 0.7006417736289382 | |
| }, | |
| { | |
| "epoch": 0.5232265638101626, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 4.636856081837709e-07, | |
| "loss": 1.0601593017578126, | |
| "step": 1560, | |
| "token_acc": 0.6912083152880034 | |
| }, | |
| { | |
| "epoch": 0.5249035720275029, | |
| "grad_norm": 6.25, | |
| "learning_rate": 4.610592832350832e-07, | |
| "loss": 1.0264972686767577, | |
| "step": 1565, | |
| "token_acc": 0.6995477185549894 | |
| }, | |
| { | |
| "epoch": 0.5265805802448432, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 4.5843403879296757e-07, | |
| "loss": 1.0082528114318847, | |
| "step": 1570, | |
| "token_acc": 0.7059257159058691 | |
| }, | |
| { | |
| "epoch": 0.5282575884621835, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 4.558099477013288e-07, | |
| "loss": 1.0326814651489258, | |
| "step": 1575, | |
| "token_acc": 0.7001332267519318 | |
| }, | |
| { | |
| "epoch": 0.5299345966795237, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 4.531870827720695e-07, | |
| "loss": 1.0291913986206054, | |
| "step": 1580, | |
| "token_acc": 0.7006176652254478 | |
| }, | |
| { | |
| "epoch": 0.531611604896864, | |
| "grad_norm": 6.0, | |
| "learning_rate": 4.5056551678306907e-07, | |
| "loss": 1.098677635192871, | |
| "step": 1585, | |
| "token_acc": 0.6850571099235041 | |
| }, | |
| { | |
| "epoch": 0.5332886131142043, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 4.4794532247616466e-07, | |
| "loss": 1.047648811340332, | |
| "step": 1590, | |
| "token_acc": 0.6972982177318652 | |
| }, | |
| { | |
| "epoch": 0.5349656213315446, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 4.4532657255513315e-07, | |
| "loss": 1.051576805114746, | |
| "step": 1595, | |
| "token_acc": 0.6918334234721472 | |
| }, | |
| { | |
| "epoch": 0.5366426295488848, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 4.4270933968367265e-07, | |
| "loss": 1.045903778076172, | |
| "step": 1600, | |
| "token_acc": 0.7030181086519115 | |
| }, | |
| { | |
| "epoch": 0.538319637766225, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 4.400936964833874e-07, | |
| "loss": 1.0394445419311524, | |
| "step": 1605, | |
| "token_acc": 0.6985150224473351 | |
| }, | |
| { | |
| "epoch": 0.5399966459835653, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 4.374797155317721e-07, | |
| "loss": 1.0475746154785157, | |
| "step": 1610, | |
| "token_acc": 0.6994824888674931 | |
| }, | |
| { | |
| "epoch": 0.5416736542009056, | |
| "grad_norm": 6.0, | |
| "learning_rate": 4.348674693601985e-07, | |
| "loss": 1.03849515914917, | |
| "step": 1615, | |
| "token_acc": 0.7009401404260384 | |
| }, | |
| { | |
| "epoch": 0.5433506624182458, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 4.322570304519022e-07, | |
| "loss": 1.0555237770080566, | |
| "step": 1620, | |
| "token_acc": 0.6955041993742109 | |
| }, | |
| { | |
| "epoch": 0.5450276706355861, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 4.296484712399722e-07, | |
| "loss": 1.013861083984375, | |
| "step": 1625, | |
| "token_acc": 0.6988321728164921 | |
| }, | |
| { | |
| "epoch": 0.5467046788529264, | |
| "grad_norm": 6.25, | |
| "learning_rate": 4.270418641053404e-07, | |
| "loss": 1.030404281616211, | |
| "step": 1630, | |
| "token_acc": 0.7007250418293363 | |
| }, | |
| { | |
| "epoch": 0.5483816870702667, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 4.2443728137477353e-07, | |
| "loss": 1.0251054763793945, | |
| "step": 1635, | |
| "token_acc": 0.7007932573128408 | |
| }, | |
| { | |
| "epoch": 0.5500586952876069, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 4.2183479531886644e-07, | |
| "loss": 1.0058277130126954, | |
| "step": 1640, | |
| "token_acc": 0.7088387174252432 | |
| }, | |
| { | |
| "epoch": 0.5517357035049472, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 4.1923447815003613e-07, | |
| "loss": 1.0381638526916503, | |
| "step": 1645, | |
| "token_acc": 0.6994563662374821 | |
| }, | |
| { | |
| "epoch": 0.5534127117222875, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 4.16636402020519e-07, | |
| "loss": 0.9976962089538575, | |
| "step": 1650, | |
| "token_acc": 0.7139129469242728 | |
| }, | |
| { | |
| "epoch": 0.5550897199396277, | |
| "grad_norm": 6.375, | |
| "learning_rate": 4.1404063902036766e-07, | |
| "loss": 1.0349790573120117, | |
| "step": 1655, | |
| "token_acc": 0.6979560097756055 | |
| }, | |
| { | |
| "epoch": 0.5567667281569679, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 4.114472611754518e-07, | |
| "loss": 0.9997722625732421, | |
| "step": 1660, | |
| "token_acc": 0.7006929187088051 | |
| }, | |
| { | |
| "epoch": 0.5584437363743082, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 4.0885634044545847e-07, | |
| "loss": 0.9896345138549805, | |
| "step": 1665, | |
| "token_acc": 0.710621704745167 | |
| }, | |
| { | |
| "epoch": 0.5601207445916485, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 4.062679487218966e-07, | |
| "loss": 1.0382169723510741, | |
| "step": 1670, | |
| "token_acc": 0.6991681815677849 | |
| }, | |
| { | |
| "epoch": 0.5617977528089888, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 4.0368215782610145e-07, | |
| "loss": 1.021392250061035, | |
| "step": 1675, | |
| "token_acc": 0.704168223301489 | |
| }, | |
| { | |
| "epoch": 0.563474761026329, | |
| "grad_norm": 5.75, | |
| "learning_rate": 4.010990395072413e-07, | |
| "loss": 1.0188769340515136, | |
| "step": 1680, | |
| "token_acc": 0.7017091454272864 | |
| }, | |
| { | |
| "epoch": 0.5651517692436693, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 3.98518665440328e-07, | |
| "loss": 1.045937442779541, | |
| "step": 1685, | |
| "token_acc": 0.6954898185425054 | |
| }, | |
| { | |
| "epoch": 0.5668287774610096, | |
| "grad_norm": 5.875, | |
| "learning_rate": 3.959411072242266e-07, | |
| "loss": 1.0623149871826172, | |
| "step": 1690, | |
| "token_acc": 0.6891484551620196 | |
| }, | |
| { | |
| "epoch": 0.5685057856783499, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 3.9336643637966984e-07, | |
| "loss": 1.0626046180725097, | |
| "step": 1695, | |
| "token_acc": 0.6948011185299321 | |
| }, | |
| { | |
| "epoch": 0.57018279389569, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 3.9079472434727324e-07, | |
| "loss": 1.0445612907409667, | |
| "step": 1700, | |
| "token_acc": 0.696189917936694 | |
| }, | |
| { | |
| "epoch": 0.5718598021130303, | |
| "grad_norm": 6.5, | |
| "learning_rate": 3.882260424855523e-07, | |
| "loss": 1.0410688400268555, | |
| "step": 1705, | |
| "token_acc": 0.698505122893455 | |
| }, | |
| { | |
| "epoch": 0.5735368103303706, | |
| "grad_norm": 6.75, | |
| "learning_rate": 3.856604620689435e-07, | |
| "loss": 1.0453211784362793, | |
| "step": 1710, | |
| "token_acc": 0.701270692469107 | |
| }, | |
| { | |
| "epoch": 0.5752138185477109, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 3.8309805428582557e-07, | |
| "loss": 1.041317081451416, | |
| "step": 1715, | |
| "token_acc": 0.695624676594032 | |
| }, | |
| { | |
| "epoch": 0.5768908267650511, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 3.8053889023654506e-07, | |
| "loss": 1.0477853775024415, | |
| "step": 1720, | |
| "token_acc": 0.700368759975783 | |
| }, | |
| { | |
| "epoch": 0.5785678349823914, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 3.779830409314427e-07, | |
| "loss": 1.025911235809326, | |
| "step": 1725, | |
| "token_acc": 0.6981572062336481 | |
| }, | |
| { | |
| "epoch": 0.5802448431997317, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 3.7543057728888387e-07, | |
| "loss": 1.0449981689453125, | |
| "step": 1730, | |
| "token_acc": 0.6980824153406773 | |
| }, | |
| { | |
| "epoch": 0.581921851417072, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 3.7288157013328986e-07, | |
| "loss": 1.038572120666504, | |
| "step": 1735, | |
| "token_acc": 0.7015234262719172 | |
| }, | |
| { | |
| "epoch": 0.5835988596344122, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 3.7033609019317367e-07, | |
| "loss": 1.0610506057739257, | |
| "step": 1740, | |
| "token_acc": 0.6917713434106877 | |
| }, | |
| { | |
| "epoch": 0.5852758678517525, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 3.6779420809917687e-07, | |
| "loss": 0.9886129379272461, | |
| "step": 1745, | |
| "token_acc": 0.7089907067214177 | |
| }, | |
| { | |
| "epoch": 0.5869528760690927, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 3.6525599438210954e-07, | |
| "loss": 1.0512758255004884, | |
| "step": 1750, | |
| "token_acc": 0.6951606390955759 | |
| }, | |
| { | |
| "epoch": 0.588629884286433, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 3.6272151947099395e-07, | |
| "loss": 1.054044246673584, | |
| "step": 1755, | |
| "token_acc": 0.6979292209926592 | |
| }, | |
| { | |
| "epoch": 0.5903068925037732, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 3.6019085369110966e-07, | |
| "loss": 1.051740550994873, | |
| "step": 1760, | |
| "token_acc": 0.6992283605457392 | |
| }, | |
| { | |
| "epoch": 0.5919839007211135, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 3.576640672620427e-07, | |
| "loss": 1.059780216217041, | |
| "step": 1765, | |
| "token_acc": 0.6915394973070018 | |
| }, | |
| { | |
| "epoch": 0.5936609089384538, | |
| "grad_norm": 6.375, | |
| "learning_rate": 3.5514123029573674e-07, | |
| "loss": 1.0393645286560058, | |
| "step": 1770, | |
| "token_acc": 0.695433964322732 | |
| }, | |
| { | |
| "epoch": 0.5953379171557941, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 3.526224127945478e-07, | |
| "loss": 1.0533989906311034, | |
| "step": 1775, | |
| "token_acc": 0.6924574479919902 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 3.5010768464930234e-07, | |
| "loss": 1.0086621284484862, | |
| "step": 1780, | |
| "token_acc": 0.7095280105866785 | |
| }, | |
| { | |
| "epoch": 0.5986919335904746, | |
| "grad_norm": 6.625, | |
| "learning_rate": 3.475971156373567e-07, | |
| "loss": 1.0667131423950196, | |
| "step": 1785, | |
| "token_acc": 0.6917589316288095 | |
| }, | |
| { | |
| "epoch": 0.6003689418078149, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 3.4509077542066254e-07, | |
| "loss": 1.0151333808898926, | |
| "step": 1790, | |
| "token_acc": 0.7043865991805254 | |
| }, | |
| { | |
| "epoch": 0.6020459500251552, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 3.4258873354383264e-07, | |
| "loss": 1.0384534835815429, | |
| "step": 1795, | |
| "token_acc": 0.6970242507677645 | |
| }, | |
| { | |
| "epoch": 0.6037229582424953, | |
| "grad_norm": 6.125, | |
| "learning_rate": 3.4009105943221206e-07, | |
| "loss": 1.0263296127319337, | |
| "step": 1800, | |
| "token_acc": 0.7001357689459393 | |
| }, | |
| { | |
| "epoch": 0.6053999664598356, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 3.3759782238995093e-07, | |
| "loss": 1.049347496032715, | |
| "step": 1805, | |
| "token_acc": 0.6946134277181728 | |
| }, | |
| { | |
| "epoch": 0.6070769746771759, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 3.3510909159808237e-07, | |
| "loss": 1.0234293937683105, | |
| "step": 1810, | |
| "token_acc": 0.7010961955565977 | |
| }, | |
| { | |
| "epoch": 0.6087539828945162, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 3.326249361126024e-07, | |
| "loss": 1.032447624206543, | |
| "step": 1815, | |
| "token_acc": 0.7008217918510431 | |
| }, | |
| { | |
| "epoch": 0.6104309911118565, | |
| "grad_norm": 7.3125, | |
| "learning_rate": 3.301454248625536e-07, | |
| "loss": 1.0159520149230956, | |
| "step": 1820, | |
| "token_acc": 0.7034609635506497 | |
| }, | |
| { | |
| "epoch": 0.6121079993291967, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 3.276706266481128e-07, | |
| "loss": 1.0407513618469237, | |
| "step": 1825, | |
| "token_acc": 0.7003651425145785 | |
| }, | |
| { | |
| "epoch": 0.613785007546537, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 3.252006101386819e-07, | |
| "loss": 1.0305519104003906, | |
| "step": 1830, | |
| "token_acc": 0.6989974352996037 | |
| }, | |
| { | |
| "epoch": 0.6154620157638773, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 3.2273544387098294e-07, | |
| "loss": 1.0071770668029785, | |
| "step": 1835, | |
| "token_acc": 0.7019307211811471 | |
| }, | |
| { | |
| "epoch": 0.6171390239812176, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 3.2027519624715574e-07, | |
| "loss": 1.041695499420166, | |
| "step": 1840, | |
| "token_acc": 0.6940718062900083 | |
| }, | |
| { | |
| "epoch": 0.6188160321985577, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 3.1781993553286e-07, | |
| "loss": 1.0573740959167481, | |
| "step": 1845, | |
| "token_acc": 0.6961400118108123 | |
| }, | |
| { | |
| "epoch": 0.620493040415898, | |
| "grad_norm": 5.875, | |
| "learning_rate": 3.1536972985538164e-07, | |
| "loss": 1.0022772789001464, | |
| "step": 1850, | |
| "token_acc": 0.7091309896739761 | |
| }, | |
| { | |
| "epoch": 0.6221700486332383, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 3.129246472017416e-07, | |
| "loss": 0.9961446762084961, | |
| "step": 1855, | |
| "token_acc": 0.7071025555924328 | |
| }, | |
| { | |
| "epoch": 0.6238470568505786, | |
| "grad_norm": 5.75, | |
| "learning_rate": 3.104847554168105e-07, | |
| "loss": 1.0580769538879395, | |
| "step": 1860, | |
| "token_acc": 0.6970332150919059 | |
| }, | |
| { | |
| "epoch": 0.6255240650679188, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 3.080501222014248e-07, | |
| "loss": 1.074977207183838, | |
| "step": 1865, | |
| "token_acc": 0.6930246189917937 | |
| }, | |
| { | |
| "epoch": 0.6272010732852591, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 3.056208151105094e-07, | |
| "loss": 1.022191333770752, | |
| "step": 1870, | |
| "token_acc": 0.6983393254579695 | |
| }, | |
| { | |
| "epoch": 0.6288780815025994, | |
| "grad_norm": 5.5, | |
| "learning_rate": 3.0319690155120235e-07, | |
| "loss": 1.051521396636963, | |
| "step": 1875, | |
| "token_acc": 0.6924612810965409 | |
| }, | |
| { | |
| "epoch": 0.6305550897199397, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 3.007784487809852e-07, | |
| "loss": 1.0417262077331544, | |
| "step": 1880, | |
| "token_acc": 0.7002479152580573 | |
| }, | |
| { | |
| "epoch": 0.6322320979372799, | |
| "grad_norm": 6.5, | |
| "learning_rate": 2.9836552390581577e-07, | |
| "loss": 1.0322657585144044, | |
| "step": 1885, | |
| "token_acc": 0.7021973676777947 | |
| }, | |
| { | |
| "epoch": 0.6339091061546201, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 2.9595819387826747e-07, | |
| "loss": 1.04501953125, | |
| "step": 1890, | |
| "token_acc": 0.6964275668073137 | |
| }, | |
| { | |
| "epoch": 0.6355861143719604, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 2.935565254956705e-07, | |
| "loss": 1.0145910263061524, | |
| "step": 1895, | |
| "token_acc": 0.7037352406902816 | |
| }, | |
| { | |
| "epoch": 0.6372631225893007, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 2.911605853982586e-07, | |
| "loss": 1.047500228881836, | |
| "step": 1900, | |
| "token_acc": 0.6984356197352587 | |
| }, | |
| { | |
| "epoch": 0.6389401308066409, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 2.8877044006732034e-07, | |
| "loss": 1.026676845550537, | |
| "step": 1905, | |
| "token_acc": 0.6990015950847758 | |
| }, | |
| { | |
| "epoch": 0.6406171390239812, | |
| "grad_norm": 6.25, | |
| "learning_rate": 2.8638615582335376e-07, | |
| "loss": 1.0263890266418456, | |
| "step": 1910, | |
| "token_acc": 0.7036444444444444 | |
| }, | |
| { | |
| "epoch": 0.6422941472413215, | |
| "grad_norm": 5.875, | |
| "learning_rate": 2.8400779882422676e-07, | |
| "loss": 1.0457491874694824, | |
| "step": 1915, | |
| "token_acc": 0.6944520660703507 | |
| }, | |
| { | |
| "epoch": 0.6439711554586618, | |
| "grad_norm": 6.125, | |
| "learning_rate": 2.816354350633411e-07, | |
| "loss": 0.9950202941894531, | |
| "step": 1920, | |
| "token_acc": 0.7036231884057971 | |
| }, | |
| { | |
| "epoch": 0.645648163676002, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 2.792691303678015e-07, | |
| "loss": 1.0635858535766602, | |
| "step": 1925, | |
| "token_acc": 0.6984164085782738 | |
| }, | |
| { | |
| "epoch": 0.6473251718933423, | |
| "grad_norm": 6.9375, | |
| "learning_rate": 2.7690895039658883e-07, | |
| "loss": 1.074039363861084, | |
| "step": 1930, | |
| "token_acc": 0.6902784014369107 | |
| }, | |
| { | |
| "epoch": 0.6490021801106826, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 2.745549606387381e-07, | |
| "loss": 0.9852777481079101, | |
| "step": 1935, | |
| "token_acc": 0.7147304804346499 | |
| }, | |
| { | |
| "epoch": 0.6506791883280229, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 2.7220722641152156e-07, | |
| "loss": 1.0335915565490723, | |
| "step": 1940, | |
| "token_acc": 0.7019733972961186 | |
| }, | |
| { | |
| "epoch": 0.652356196545363, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 2.6986581285863674e-07, | |
| "loss": 1.0393771171569823, | |
| "step": 1945, | |
| "token_acc": 0.7026867275658248 | |
| }, | |
| { | |
| "epoch": 0.6540332047627033, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 2.6753078494839796e-07, | |
| "loss": 1.0720300674438477, | |
| "step": 1950, | |
| "token_acc": 0.687844706391262 | |
| }, | |
| { | |
| "epoch": 0.6557102129800436, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 2.6520220747193423e-07, | |
| "loss": 1.0267016410827636, | |
| "step": 1955, | |
| "token_acc": 0.701956017239474 | |
| }, | |
| { | |
| "epoch": 0.6573872211973839, | |
| "grad_norm": 5.625, | |
| "learning_rate": 2.62880145041391e-07, | |
| "loss": 1.083882713317871, | |
| "step": 1960, | |
| "token_acc": 0.6892801378344909 | |
| }, | |
| { | |
| "epoch": 0.6590642294147241, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 2.6056466208813814e-07, | |
| "loss": 1.0755278587341308, | |
| "step": 1965, | |
| "token_acc": 0.6931045645840078 | |
| }, | |
| { | |
| "epoch": 0.6607412376320644, | |
| "grad_norm": 6.125, | |
| "learning_rate": 2.582558228609817e-07, | |
| "loss": 1.0210668563842773, | |
| "step": 1970, | |
| "token_acc": 0.7000920904532897 | |
| }, | |
| { | |
| "epoch": 0.6624182458494047, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 2.5595369142438056e-07, | |
| "loss": 1.00156831741333, | |
| "step": 1975, | |
| "token_acc": 0.709395738735592 | |
| }, | |
| { | |
| "epoch": 0.664095254066745, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 2.5365833165666943e-07, | |
| "loss": 1.0243375778198243, | |
| "step": 1980, | |
| "token_acc": 0.7054905490549055 | |
| }, | |
| { | |
| "epoch": 0.6657722622840851, | |
| "grad_norm": 6.0, | |
| "learning_rate": 2.5136980724828695e-07, | |
| "loss": 1.0490416526794433, | |
| "step": 1985, | |
| "token_acc": 0.6968990958007434 | |
| }, | |
| { | |
| "epoch": 0.6674492705014254, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 2.490881817000071e-07, | |
| "loss": 1.0592771530151368, | |
| "step": 1990, | |
| "token_acc": 0.6985099735640471 | |
| }, | |
| { | |
| "epoch": 0.6691262787187657, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 2.4681351832117814e-07, | |
| "loss": 1.0456266403198242, | |
| "step": 1995, | |
| "token_acc": 0.6956661488187711 | |
| }, | |
| { | |
| "epoch": 0.670803286936106, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 2.4454588022796556e-07, | |
| "loss": 1.0303558349609374, | |
| "step": 2000, | |
| "token_acc": 0.6989892984542212 | |
| }, | |
| { | |
| "epoch": 0.6724802951534462, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 2.422853303416015e-07, | |
| "loss": 1.0573721885681153, | |
| "step": 2005, | |
| "token_acc": 0.69331641285956 | |
| }, | |
| { | |
| "epoch": 0.6741573033707865, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 2.4003193138663754e-07, | |
| "loss": 1.0548274040222168, | |
| "step": 2010, | |
| "token_acc": 0.6974822112753147 | |
| }, | |
| { | |
| "epoch": 0.6758343115881268, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 2.3778574588920525e-07, | |
| "loss": 1.0316340446472168, | |
| "step": 2015, | |
| "token_acc": 0.6966531008929113 | |
| }, | |
| { | |
| "epoch": 0.6775113198054671, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 2.3554683617528087e-07, | |
| "loss": 1.0404158592224122, | |
| "step": 2020, | |
| "token_acc": 0.6936044193860699 | |
| }, | |
| { | |
| "epoch": 0.6791883280228073, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 2.3331526436895643e-07, | |
| "loss": 1.0208246231079101, | |
| "step": 2025, | |
| "token_acc": 0.6978810663021189 | |
| }, | |
| { | |
| "epoch": 0.6808653362401476, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 2.310910923907149e-07, | |
| "loss": 1.0479655265808105, | |
| "step": 2030, | |
| "token_acc": 0.6983508932661475 | |
| }, | |
| { | |
| "epoch": 0.6825423444574878, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 2.288743819557134e-07, | |
| "loss": 1.0503520011901855, | |
| "step": 2035, | |
| "token_acc": 0.698051948051948 | |
| }, | |
| { | |
| "epoch": 0.6842193526748281, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 2.266651945720694e-07, | |
| "loss": 1.0153435707092284, | |
| "step": 2040, | |
| "token_acc": 0.7011975694649283 | |
| }, | |
| { | |
| "epoch": 0.6858963608921683, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 2.2446359153915523e-07, | |
| "loss": 1.0074621200561524, | |
| "step": 2045, | |
| "token_acc": 0.7071179799738789 | |
| }, | |
| { | |
| "epoch": 0.6875733691095086, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 2.2226963394589637e-07, | |
| "loss": 1.0132587432861329, | |
| "step": 2050, | |
| "token_acc": 0.7069939843227806 | |
| }, | |
| { | |
| "epoch": 0.6892503773268489, | |
| "grad_norm": 6.625, | |
| "learning_rate": 2.200833826690766e-07, | |
| "loss": 1.0336087226867676, | |
| "step": 2055, | |
| "token_acc": 0.698604598610248 | |
| }, | |
| { | |
| "epoch": 0.6909273855441892, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 2.1790489837164877e-07, | |
| "loss": 1.0673924446105958, | |
| "step": 2060, | |
| "token_acc": 0.6897658185093447 | |
| }, | |
| { | |
| "epoch": 0.6926043937615294, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 2.157342415010523e-07, | |
| "loss": 1.0450064659118652, | |
| "step": 2065, | |
| "token_acc": 0.7005138516532619 | |
| }, | |
| { | |
| "epoch": 0.6942814019788697, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 2.135714722875346e-07, | |
| "loss": 1.0480844497680664, | |
| "step": 2070, | |
| "token_acc": 0.6980156217014989 | |
| }, | |
| { | |
| "epoch": 0.69595841019621, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 2.1141665074248067e-07, | |
| "loss": 1.0112756729125976, | |
| "step": 2075, | |
| "token_acc": 0.7010742643624475 | |
| }, | |
| { | |
| "epoch": 0.6976354184135503, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 2.092698366567478e-07, | |
| "loss": 1.0765247344970703, | |
| "step": 2080, | |
| "token_acc": 0.6889489327208862 | |
| }, | |
| { | |
| "epoch": 0.6993124266308904, | |
| "grad_norm": 5.5, | |
| "learning_rate": 2.0713108959900689e-07, | |
| "loss": 1.0020230293273926, | |
| "step": 2085, | |
| "token_acc": 0.7083483545346861 | |
| }, | |
| { | |
| "epoch": 0.7009894348482307, | |
| "grad_norm": 6.125, | |
| "learning_rate": 2.0500046891408857e-07, | |
| "loss": 1.0372941970825196, | |
| "step": 2090, | |
| "token_acc": 0.6972194225410565 | |
| }, | |
| { | |
| "epoch": 0.702666443065571, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 2.0287803372133756e-07, | |
| "loss": 0.9844575881958008, | |
| "step": 2095, | |
| "token_acc": 0.7124336406817546 | |
| }, | |
| { | |
| "epoch": 0.7043434512829113, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 2.0076384291297133e-07, | |
| "loss": 1.0537097930908204, | |
| "step": 2100, | |
| "token_acc": 0.6906941049400261 | |
| }, | |
| { | |
| "epoch": 0.7060204595002515, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 1.9865795515244722e-07, | |
| "loss": 1.0500137329101562, | |
| "step": 2105, | |
| "token_acc": 0.6935847509735602 | |
| }, | |
| { | |
| "epoch": 0.7076974677175918, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 1.965604288728337e-07, | |
| "loss": 1.040913963317871, | |
| "step": 2110, | |
| "token_acc": 0.697758455197335 | |
| }, | |
| { | |
| "epoch": 0.7093744759349321, | |
| "grad_norm": 7.125, | |
| "learning_rate": 1.9447132227518893e-07, | |
| "loss": 1.0521238327026368, | |
| "step": 2115, | |
| "token_acc": 0.6945375878853435 | |
| }, | |
| { | |
| "epoch": 0.7110514841522724, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 1.923906933269463e-07, | |
| "loss": 1.0393115997314453, | |
| "step": 2120, | |
| "token_acc": 0.6945876988219967 | |
| }, | |
| { | |
| "epoch": 0.7127284923696127, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.9031859976030617e-07, | |
| "loss": 1.0050291061401366, | |
| "step": 2125, | |
| "token_acc": 0.7060748959778086 | |
| }, | |
| { | |
| "epoch": 0.7144055005869528, | |
| "grad_norm": 6.25, | |
| "learning_rate": 1.8825509907063326e-07, | |
| "loss": 1.0308636665344237, | |
| "step": 2130, | |
| "token_acc": 0.6999337602119673 | |
| }, | |
| { | |
| "epoch": 0.7160825088042931, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 1.862002485148617e-07, | |
| "loss": 1.0438469886779784, | |
| "step": 2135, | |
| "token_acc": 0.6971307120085016 | |
| }, | |
| { | |
| "epoch": 0.7177595170216334, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 1.8415410510990608e-07, | |
| "loss": 1.0166708946228027, | |
| "step": 2140, | |
| "token_acc": 0.7032047017076957 | |
| }, | |
| { | |
| "epoch": 0.7194365252389737, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 1.8211672563108023e-07, | |
| "loss": 1.0595402717590332, | |
| "step": 2145, | |
| "token_acc": 0.6895378967825524 | |
| }, | |
| { | |
| "epoch": 0.7211135334563139, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.800881666105203e-07, | |
| "loss": 1.0093088150024414, | |
| "step": 2150, | |
| "token_acc": 0.7006111142840825 | |
| }, | |
| { | |
| "epoch": 0.7227905416736542, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 1.780684843356175e-07, | |
| "loss": 1.0222167015075683, | |
| "step": 2155, | |
| "token_acc": 0.695811209439528 | |
| }, | |
| { | |
| "epoch": 0.7244675498909945, | |
| "grad_norm": 5.625, | |
| "learning_rate": 1.7605773484745545e-07, | |
| "loss": 1.0139375686645509, | |
| "step": 2160, | |
| "token_acc": 0.6973555868646364 | |
| }, | |
| { | |
| "epoch": 0.7261445581083348, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.7405597393925598e-07, | |
| "loss": 1.029660987854004, | |
| "step": 2165, | |
| "token_acc": 0.6958884158637688 | |
| }, | |
| { | |
| "epoch": 0.727821566325675, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 1.7206325715483e-07, | |
| "loss": 1.035785961151123, | |
| "step": 2170, | |
| "token_acc": 0.700274709217371 | |
| }, | |
| { | |
| "epoch": 0.7294985745430153, | |
| "grad_norm": 6.5, | |
| "learning_rate": 1.7007963978703693e-07, | |
| "loss": 1.0544404983520508, | |
| "step": 2175, | |
| "token_acc": 0.6898428674129609 | |
| }, | |
| { | |
| "epoch": 0.7311755827603555, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 1.6810517687625065e-07, | |
| "loss": 1.064098072052002, | |
| "step": 2180, | |
| "token_acc": 0.6946430421717956 | |
| }, | |
| { | |
| "epoch": 0.7328525909776958, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 1.661399232088318e-07, | |
| "loss": 1.0491707801818848, | |
| "step": 2185, | |
| "token_acc": 0.6936832838120391 | |
| }, | |
| { | |
| "epoch": 0.734529599195036, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 1.641839333156077e-07, | |
| "loss": 1.0354165077209472, | |
| "step": 2190, | |
| "token_acc": 0.7039037992331822 | |
| }, | |
| { | |
| "epoch": 0.7362066074123763, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.6223726147035927e-07, | |
| "loss": 1.011677360534668, | |
| "step": 2195, | |
| "token_acc": 0.7069517493711411 | |
| }, | |
| { | |
| "epoch": 0.7378836156297166, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.6029996168831516e-07, | |
| "loss": 1.018637466430664, | |
| "step": 2200, | |
| "token_acc": 0.7055218238057052 | |
| }, | |
| { | |
| "epoch": 0.7395606238470569, | |
| "grad_norm": 6.375, | |
| "learning_rate": 1.5837208772465326e-07, | |
| "loss": 0.9837164878845215, | |
| "step": 2205, | |
| "token_acc": 0.7087787891746594 | |
| }, | |
| { | |
| "epoch": 0.7412376320643971, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 1.5645369307300837e-07, | |
| "loss": 0.9903836250305176, | |
| "step": 2210, | |
| "token_acc": 0.7101433174091768 | |
| }, | |
| { | |
| "epoch": 0.7429146402817374, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 1.5454483096398845e-07, | |
| "loss": 1.002861785888672, | |
| "step": 2215, | |
| "token_acc": 0.7046771644324767 | |
| }, | |
| { | |
| "epoch": 0.7445916484990777, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 1.5264555436369742e-07, | |
| "loss": 1.0327083587646484, | |
| "step": 2220, | |
| "token_acc": 0.7040962133841391 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 1.5075591597226583e-07, | |
| "loss": 1.026947021484375, | |
| "step": 2225, | |
| "token_acc": 0.7050882658359294 | |
| }, | |
| { | |
| "epoch": 0.7479456649337581, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 1.488759682223879e-07, | |
| "loss": 1.0323354721069335, | |
| "step": 2230, | |
| "token_acc": 0.6986567495559503 | |
| }, | |
| { | |
| "epoch": 0.7496226731510984, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 1.4700576327786723e-07, | |
| "loss": 1.0541604042053223, | |
| "step": 2235, | |
| "token_acc": 0.6925109120147025 | |
| }, | |
| { | |
| "epoch": 0.7512996813684387, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 1.451453530321689e-07, | |
| "loss": 1.0624011039733887, | |
| "step": 2240, | |
| "token_acc": 0.6876182829615186 | |
| }, | |
| { | |
| "epoch": 0.752976689585779, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 1.4329478910698033e-07, | |
| "loss": 1.0590134620666505, | |
| "step": 2245, | |
| "token_acc": 0.6944770201597141 | |
| }, | |
| { | |
| "epoch": 0.7546536978031192, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 1.41454122850778e-07, | |
| "loss": 1.033609676361084, | |
| "step": 2250, | |
| "token_acc": 0.701410444916408 | |
| }, | |
| { | |
| "epoch": 0.7563307060204595, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 1.3962340533740297e-07, | |
| "loss": 1.0673054695129394, | |
| "step": 2255, | |
| "token_acc": 0.6927898114721219 | |
| }, | |
| { | |
| "epoch": 0.7580077142377998, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 1.3780268736464417e-07, | |
| "loss": 1.0278964996337892, | |
| "step": 2260, | |
| "token_acc": 0.7006198105484739 | |
| }, | |
| { | |
| "epoch": 0.7596847224551401, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 1.359920194528285e-07, | |
| "loss": 1.0502543449401855, | |
| "step": 2265, | |
| "token_acc": 0.6947629198594227 | |
| }, | |
| { | |
| "epoch": 0.7613617306724803, | |
| "grad_norm": 5.875, | |
| "learning_rate": 1.341914518434188e-07, | |
| "loss": 1.0808907508850099, | |
| "step": 2270, | |
| "token_acc": 0.6939768693461582 | |
| }, | |
| { | |
| "epoch": 0.7630387388898205, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 1.3240103449762e-07, | |
| "loss": 1.1043811798095704, | |
| "step": 2275, | |
| "token_acc": 0.6843830610490111 | |
| }, | |
| { | |
| "epoch": 0.7647157471071608, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 1.30620817094993e-07, | |
| "loss": 1.0409873962402343, | |
| "step": 2280, | |
| "token_acc": 0.6948018528049408 | |
| }, | |
| { | |
| "epoch": 0.7663927553245011, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 1.288508490320762e-07, | |
| "loss": 1.0083087921142577, | |
| "step": 2285, | |
| "token_acc": 0.7059962385219604 | |
| }, | |
| { | |
| "epoch": 0.7680697635418413, | |
| "grad_norm": 6.0, | |
| "learning_rate": 1.2709117942101434e-07, | |
| "loss": 1.0333211898803711, | |
| "step": 2290, | |
| "token_acc": 0.7001386642015253 | |
| }, | |
| { | |
| "epoch": 0.7697467717591816, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 1.2534185708819622e-07, | |
| "loss": 0.9901119232177734, | |
| "step": 2295, | |
| "token_acc": 0.7122798806528988 | |
| }, | |
| { | |
| "epoch": 0.7714237799765219, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 1.2360293057289988e-07, | |
| "loss": 1.0226807594299316, | |
| "step": 2300, | |
| "token_acc": 0.6997545351134526 | |
| }, | |
| { | |
| "epoch": 0.7731007881938622, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 1.2187444812594576e-07, | |
| "loss": 1.0424675941467285, | |
| "step": 2305, | |
| "token_acc": 0.6932173913043478 | |
| }, | |
| { | |
| "epoch": 0.7747777964112024, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 1.2015645770835764e-07, | |
| "loss": 0.9836078643798828, | |
| "step": 2310, | |
| "token_acc": 0.7098943857698722 | |
| }, | |
| { | |
| "epoch": 0.7764548046285427, | |
| "grad_norm": 6.9375, | |
| "learning_rate": 1.1844900699003174e-07, | |
| "loss": 1.010260009765625, | |
| "step": 2315, | |
| "token_acc": 0.7048983752023502 | |
| }, | |
| { | |
| "epoch": 0.778131812845883, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 1.1675214334841488e-07, | |
| "loss": 1.0726960182189942, | |
| "step": 2320, | |
| "token_acc": 0.6901127289807422 | |
| }, | |
| { | |
| "epoch": 0.7798088210632232, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 1.1506591386718861e-07, | |
| "loss": 1.014423942565918, | |
| "step": 2325, | |
| "token_acc": 0.7041590518912333 | |
| }, | |
| { | |
| "epoch": 0.7814858292805634, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 1.1339036533496355e-07, | |
| "loss": 1.0075566291809082, | |
| "step": 2330, | |
| "token_acc": 0.7022232734153264 | |
| }, | |
| { | |
| "epoch": 0.7831628374979037, | |
| "grad_norm": 6.375, | |
| "learning_rate": 1.1172554424398123e-07, | |
| "loss": 1.038377857208252, | |
| "step": 2335, | |
| "token_acc": 0.6992858736889087 | |
| }, | |
| { | |
| "epoch": 0.784839845715244, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 1.1007149678882327e-07, | |
| "loss": 1.0416951179504395, | |
| "step": 2340, | |
| "token_acc": 0.7018931901667138 | |
| }, | |
| { | |
| "epoch": 0.7865168539325843, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 1.0842826886513074e-07, | |
| "loss": 1.054603385925293, | |
| "step": 2345, | |
| "token_acc": 0.6900633743871817 | |
| }, | |
| { | |
| "epoch": 0.7881938621499245, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 1.0679590606832945e-07, | |
| "loss": 1.0523086547851563, | |
| "step": 2350, | |
| "token_acc": 0.6997717563059636 | |
| }, | |
| { | |
| "epoch": 0.7898708703672648, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 1.051744536923656e-07, | |
| "loss": 1.043616485595703, | |
| "step": 2355, | |
| "token_acc": 0.6997246637721063 | |
| }, | |
| { | |
| "epoch": 0.7915478785846051, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 1.0356395672844864e-07, | |
| "loss": 1.0373201370239258, | |
| "step": 2360, | |
| "token_acc": 0.699506331830865 | |
| }, | |
| { | |
| "epoch": 0.7932248868019454, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 1.0196445986380336e-07, | |
| "loss": 1.0343366622924806, | |
| "step": 2365, | |
| "token_acc": 0.6987698944790999 | |
| }, | |
| { | |
| "epoch": 0.7949018950192855, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 1.0037600748042918e-07, | |
| "loss": 1.025636863708496, | |
| "step": 2370, | |
| "token_acc": 0.7044354165507263 | |
| }, | |
| { | |
| "epoch": 0.7965789032366258, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 9.879864365386908e-08, | |
| "loss": 1.0659350395202636, | |
| "step": 2375, | |
| "token_acc": 0.6936859085148058 | |
| }, | |
| { | |
| "epoch": 0.7982559114539661, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 9.723241215198691e-08, | |
| "loss": 1.0253265380859375, | |
| "step": 2380, | |
| "token_acc": 0.6990683229813665 | |
| }, | |
| { | |
| "epoch": 0.7999329196713064, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 9.56773564337523e-08, | |
| "loss": 0.991847038269043, | |
| "step": 2385, | |
| "token_acc": 0.7079682205865171 | |
| }, | |
| { | |
| "epoch": 0.8016099278886466, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 9.413351964803517e-08, | |
| "loss": 1.0407160758972167, | |
| "step": 2390, | |
| "token_acc": 0.699185631414547 | |
| }, | |
| { | |
| "epoch": 0.8032869361059869, | |
| "grad_norm": 6.125, | |
| "learning_rate": 9.26009446324083e-08, | |
| "loss": 1.0785944938659668, | |
| "step": 2395, | |
| "token_acc": 0.6866031454482838 | |
| }, | |
| { | |
| "epoch": 0.8049639443233272, | |
| "grad_norm": 6.5, | |
| "learning_rate": 9.107967391195903e-08, | |
| "loss": 1.062040138244629, | |
| "step": 2400, | |
| "token_acc": 0.6932661267485983 | |
| }, | |
| { | |
| "epoch": 0.8066409525406675, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 8.956974969810905e-08, | |
| "loss": 1.0173826217651367, | |
| "step": 2405, | |
| "token_acc": 0.7024507192328183 | |
| }, | |
| { | |
| "epoch": 0.8083179607580078, | |
| "grad_norm": 5.75, | |
| "learning_rate": 8.807121388744288e-08, | |
| "loss": 1.0436044692993165, | |
| "step": 2410, | |
| "token_acc": 0.69497507142457 | |
| }, | |
| { | |
| "epoch": 0.809994968975348, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 8.658410806054567e-08, | |
| "loss": 1.017934799194336, | |
| "step": 2415, | |
| "token_acc": 0.7072147651006712 | |
| }, | |
| { | |
| "epoch": 0.8116719771926882, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 8.510847348084943e-08, | |
| "loss": 1.018608283996582, | |
| "step": 2420, | |
| "token_acc": 0.708192842354333 | |
| }, | |
| { | |
| "epoch": 0.8133489854100285, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 8.364435109348822e-08, | |
| "loss": 0.9961603164672852, | |
| "step": 2425, | |
| "token_acc": 0.7076452599388379 | |
| }, | |
| { | |
| "epoch": 0.8150259936273688, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 8.219178152416156e-08, | |
| "loss": 1.0228870391845704, | |
| "step": 2430, | |
| "token_acc": 0.7013418833867712 | |
| }, | |
| { | |
| "epoch": 0.816703001844709, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 8.075080507800747e-08, | |
| "loss": 1.087087059020996, | |
| "step": 2435, | |
| "token_acc": 0.6874622107403947 | |
| }, | |
| { | |
| "epoch": 0.8183800100620493, | |
| "grad_norm": 5.625, | |
| "learning_rate": 7.932146173848402e-08, | |
| "loss": 1.0141497611999513, | |
| "step": 2440, | |
| "token_acc": 0.7045632042842798 | |
| }, | |
| { | |
| "epoch": 0.8200570182793896, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 7.790379116626028e-08, | |
| "loss": 1.03942813873291, | |
| "step": 2445, | |
| "token_acc": 0.6949412052915238 | |
| }, | |
| { | |
| "epoch": 0.8217340264967299, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 7.649783269811521e-08, | |
| "loss": 1.0532546043395996, | |
| "step": 2450, | |
| "token_acc": 0.6938823668220164 | |
| }, | |
| { | |
| "epoch": 0.8234110347140701, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 7.510362534584636e-08, | |
| "loss": 1.0119309425354004, | |
| "step": 2455, | |
| "token_acc": 0.7017226277372263 | |
| }, | |
| { | |
| "epoch": 0.8250880429314104, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 7.372120779518787e-08, | |
| "loss": 1.074321174621582, | |
| "step": 2460, | |
| "token_acc": 0.6915051342595134 | |
| }, | |
| { | |
| "epoch": 0.8267650511487507, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 7.235061840473622e-08, | |
| "loss": 0.995145034790039, | |
| "step": 2465, | |
| "token_acc": 0.7089050765147665 | |
| }, | |
| { | |
| "epoch": 0.8284420593660909, | |
| "grad_norm": 6.5, | |
| "learning_rate": 7.099189520488664e-08, | |
| "loss": 1.0098725318908692, | |
| "step": 2470, | |
| "token_acc": 0.7019166817824536 | |
| }, | |
| { | |
| "epoch": 0.8301190675834311, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 6.96450758967772e-08, | |
| "loss": 1.0157196044921875, | |
| "step": 2475, | |
| "token_acc": 0.7020709325396826 | |
| }, | |
| { | |
| "epoch": 0.8317960758007714, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 6.831019785124337e-08, | |
| "loss": 1.037847900390625, | |
| "step": 2480, | |
| "token_acc": 0.6992954337653956 | |
| }, | |
| { | |
| "epoch": 0.8334730840181117, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 6.698729810778064e-08, | |
| "loss": 1.0353034019470215, | |
| "step": 2485, | |
| "token_acc": 0.7004344048653345 | |
| }, | |
| { | |
| "epoch": 0.835150092235452, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 6.567641337351681e-08, | |
| "loss": 1.0612793922424317, | |
| "step": 2490, | |
| "token_acc": 0.6943042912873862 | |
| }, | |
| { | |
| "epoch": 0.8368271004527922, | |
| "grad_norm": 6.25, | |
| "learning_rate": 6.43775800221934e-08, | |
| "loss": 1.040201473236084, | |
| "step": 2495, | |
| "token_acc": 0.7003977538605521 | |
| }, | |
| { | |
| "epoch": 0.8385041086701325, | |
| "grad_norm": 6.125, | |
| "learning_rate": 6.309083409315652e-08, | |
| "loss": 1.0146426200866698, | |
| "step": 2500, | |
| "token_acc": 0.7067005321722303 | |
| }, | |
| { | |
| "epoch": 0.8401811168874728, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 6.181621129035714e-08, | |
| "loss": 1.0435810089111328, | |
| "step": 2505, | |
| "token_acc": 0.6991748352439497 | |
| }, | |
| { | |
| "epoch": 0.8418581251048131, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 6.055374698135973e-08, | |
| "loss": 1.0399096488952637, | |
| "step": 2510, | |
| "token_acc": 0.6975013881177123 | |
| }, | |
| { | |
| "epoch": 0.8435351333221532, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 5.930347619636123e-08, | |
| "loss": 1.0619498252868653, | |
| "step": 2515, | |
| "token_acc": 0.6908962597035991 | |
| }, | |
| { | |
| "epoch": 0.8452121415394935, | |
| "grad_norm": 5.875, | |
| "learning_rate": 5.806543362721944e-08, | |
| "loss": 1.0390033721923828, | |
| "step": 2520, | |
| "token_acc": 0.6962721181927504 | |
| }, | |
| { | |
| "epoch": 0.8468891497568338, | |
| "grad_norm": 6.53125, | |
| "learning_rate": 5.683965362648974e-08, | |
| "loss": 1.0012418746948242, | |
| "step": 2525, | |
| "token_acc": 0.7041342967349378 | |
| }, | |
| { | |
| "epoch": 0.8485661579741741, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 5.5626170206472314e-08, | |
| "loss": 1.054752731323242, | |
| "step": 2530, | |
| "token_acc": 0.6903323262839879 | |
| }, | |
| { | |
| "epoch": 0.8502431661915143, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 5.442501703826802e-08, | |
| "loss": 1.042811965942383, | |
| "step": 2535, | |
| "token_acc": 0.697495183044316 | |
| }, | |
| { | |
| "epoch": 0.8519201744088546, | |
| "grad_norm": 6.125, | |
| "learning_rate": 5.3236227450844884e-08, | |
| "loss": 1.0447772026062012, | |
| "step": 2540, | |
| "token_acc": 0.6942773672452311 | |
| }, | |
| { | |
| "epoch": 0.8535971826261949, | |
| "grad_norm": 6.25, | |
| "learning_rate": 5.2059834430112357e-08, | |
| "loss": 1.0507349967956543, | |
| "step": 2545, | |
| "token_acc": 0.6938989408649603 | |
| }, | |
| { | |
| "epoch": 0.8552741908435352, | |
| "grad_norm": 6.0, | |
| "learning_rate": 5.089587061800643e-08, | |
| "loss": 1.0506488800048828, | |
| "step": 2550, | |
| "token_acc": 0.693307040946526 | |
| }, | |
| { | |
| "epoch": 0.8569511990608754, | |
| "grad_norm": 5.75, | |
| "learning_rate": 4.974436831158441e-08, | |
| "loss": 1.0228717803955079, | |
| "step": 2555, | |
| "token_acc": 0.702984989441242 | |
| }, | |
| { | |
| "epoch": 0.8586282072782156, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 4.8605359462127626e-08, | |
| "loss": 1.0637690544128418, | |
| "step": 2560, | |
| "token_acc": 0.6955604883462819 | |
| }, | |
| { | |
| "epoch": 0.8603052154955559, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 4.747887567425618e-08, | |
| "loss": 1.0389795303344727, | |
| "step": 2565, | |
| "token_acc": 0.7000414716511641 | |
| }, | |
| { | |
| "epoch": 0.8619822237128962, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 4.636494820505082e-08, | |
| "loss": 1.0297457695007324, | |
| "step": 2570, | |
| "token_acc": 0.7029873942324296 | |
| }, | |
| { | |
| "epoch": 0.8636592319302364, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 4.526360796318629e-08, | |
| "loss": 0.9971097946166992, | |
| "step": 2575, | |
| "token_acc": 0.7125974658869396 | |
| }, | |
| { | |
| "epoch": 0.8653362401475767, | |
| "grad_norm": 5.625, | |
| "learning_rate": 4.417488550807386e-08, | |
| "loss": 0.9800214767456055, | |
| "step": 2580, | |
| "token_acc": 0.715742511153601 | |
| }, | |
| { | |
| "epoch": 0.867013248364917, | |
| "grad_norm": 6.375, | |
| "learning_rate": 4.309881104901264e-08, | |
| "loss": 1.0177087783813477, | |
| "step": 2585, | |
| "token_acc": 0.7067709258930158 | |
| }, | |
| { | |
| "epoch": 0.8686902565822573, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 4.20354144443521e-08, | |
| "loss": 1.0427945137023926, | |
| "step": 2590, | |
| "token_acc": 0.6946957601561115 | |
| }, | |
| { | |
| "epoch": 0.8703672647995975, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 4.098472520066293e-08, | |
| "loss": 1.0284164428710938, | |
| "step": 2595, | |
| "token_acc": 0.6990140392240918 | |
| }, | |
| { | |
| "epoch": 0.8720442730169378, | |
| "grad_norm": 6.59375, | |
| "learning_rate": 3.994677247191908e-08, | |
| "loss": 1.0570375442504882, | |
| "step": 2600, | |
| "token_acc": 0.6961351809624042 | |
| }, | |
| { | |
| "epoch": 0.8737212812342781, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 3.892158505868798e-08, | |
| "loss": 1.0632140159606933, | |
| "step": 2605, | |
| "token_acc": 0.6917707567964732 | |
| }, | |
| { | |
| "epoch": 0.8753982894516183, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 3.7909191407332066e-08, | |
| "loss": 1.0276185035705567, | |
| "step": 2610, | |
| "token_acc": 0.7006794751640113 | |
| }, | |
| { | |
| "epoch": 0.8770752976689585, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 3.690961960921879e-08, | |
| "loss": 1.032079315185547, | |
| "step": 2615, | |
| "token_acc": 0.7047474528506394 | |
| }, | |
| { | |
| "epoch": 0.8787523058862988, | |
| "grad_norm": 5.875, | |
| "learning_rate": 3.5922897399942144e-08, | |
| "loss": 1.0215091705322266, | |
| "step": 2620, | |
| "token_acc": 0.7023193629018194 | |
| }, | |
| { | |
| "epoch": 0.8804293141036391, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 3.494905215855187e-08, | |
| "loss": 1.0204978942871095, | |
| "step": 2625, | |
| "token_acc": 0.7062290472585704 | |
| }, | |
| { | |
| "epoch": 0.8821063223209794, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 3.3988110906794875e-08, | |
| "loss": 1.0444301605224608, | |
| "step": 2630, | |
| "token_acc": 0.7006415252712992 | |
| }, | |
| { | |
| "epoch": 0.8837833305383196, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 3.304010030836452e-08, | |
| "loss": 1.070432472229004, | |
| "step": 2635, | |
| "token_acc": 0.6982341693847711 | |
| }, | |
| { | |
| "epoch": 0.8854603387556599, | |
| "grad_norm": 6.75, | |
| "learning_rate": 3.210504666816133e-08, | |
| "loss": 1.0351852416992187, | |
| "step": 2640, | |
| "token_acc": 0.7003612281757977 | |
| }, | |
| { | |
| "epoch": 0.8871373469730002, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 3.118297593156316e-08, | |
| "loss": 1.0338337898254395, | |
| "step": 2645, | |
| "token_acc": 0.6987579354126414 | |
| }, | |
| { | |
| "epoch": 0.8888143551903405, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 3.0273913683704745e-08, | |
| "loss": 1.0125846862792969, | |
| "step": 2650, | |
| "token_acc": 0.7042808634489449 | |
| }, | |
| { | |
| "epoch": 0.8904913634076806, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 2.9377885148768268e-08, | |
| "loss": 1.0237887382507325, | |
| "step": 2655, | |
| "token_acc": 0.7018055399798139 | |
| }, | |
| { | |
| "epoch": 0.8921683716250209, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 2.849491518928332e-08, | |
| "loss": 1.0422614097595215, | |
| "step": 2660, | |
| "token_acc": 0.6977418455533873 | |
| }, | |
| { | |
| "epoch": 0.8938453798423612, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 2.7625028305436838e-08, | |
| "loss": 1.0634222030639648, | |
| "step": 2665, | |
| "token_acc": 0.6951148758412625 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 6.25, | |
| "learning_rate": 2.67682486343937e-08, | |
| "loss": 1.062954807281494, | |
| "step": 2670, | |
| "token_acc": 0.6916413286673169 | |
| }, | |
| { | |
| "epoch": 0.8971993962770417, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 2.5924599949626312e-08, | |
| "loss": 1.0950417518615723, | |
| "step": 2675, | |
| "token_acc": 0.684926074888121 | |
| }, | |
| { | |
| "epoch": 0.898876404494382, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 2.5094105660255883e-08, | |
| "loss": 1.0323663711547852, | |
| "step": 2680, | |
| "token_acc": 0.6994619523443505 | |
| }, | |
| { | |
| "epoch": 0.9005534127117223, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 2.427678881040196e-08, | |
| "loss": 1.0677906036376954, | |
| "step": 2685, | |
| "token_acc": 0.6956816640289396 | |
| }, | |
| { | |
| "epoch": 0.9022304209290626, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 2.3472672078543588e-08, | |
| "loss": 1.0511894226074219, | |
| "step": 2690, | |
| "token_acc": 0.6961982540129541 | |
| }, | |
| { | |
| "epoch": 0.9039074291464028, | |
| "grad_norm": 6.375, | |
| "learning_rate": 2.268177777688973e-08, | |
| "loss": 1.0632619857788086, | |
| "step": 2695, | |
| "token_acc": 0.6953334427485092 | |
| }, | |
| { | |
| "epoch": 0.9055844373637431, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 2.1904127850760458e-08, | |
| "loss": 1.0489700317382813, | |
| "step": 2700, | |
| "token_acc": 0.6962432915921288 | |
| }, | |
| { | |
| "epoch": 0.9072614455810833, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 2.11397438779779e-08, | |
| "loss": 1.056574249267578, | |
| "step": 2705, | |
| "token_acc": 0.6962843295638126 | |
| }, | |
| { | |
| "epoch": 0.9089384537984236, | |
| "grad_norm": 6.0, | |
| "learning_rate": 2.038864706826726e-08, | |
| "loss": 1.018214511871338, | |
| "step": 2710, | |
| "token_acc": 0.7036482909425711 | |
| }, | |
| { | |
| "epoch": 0.9106154620157639, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 1.9650858262668602e-08, | |
| "loss": 1.0284092903137207, | |
| "step": 2715, | |
| "token_acc": 0.7034700315457413 | |
| }, | |
| { | |
| "epoch": 0.9122924702331041, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 1.892639793295858e-08, | |
| "loss": 1.033327293395996, | |
| "step": 2720, | |
| "token_acc": 0.7013865593201386 | |
| }, | |
| { | |
| "epoch": 0.9139694784504444, | |
| "grad_norm": 6.375, | |
| "learning_rate": 1.8215286181082144e-08, | |
| "loss": 1.0109923362731934, | |
| "step": 2725, | |
| "token_acc": 0.7045320429599906 | |
| }, | |
| { | |
| "epoch": 0.9156464866677847, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 1.751754273859507e-08, | |
| "loss": 0.9927311897277832, | |
| "step": 2730, | |
| "token_acc": 0.7082637489202419 | |
| }, | |
| { | |
| "epoch": 0.917323494885125, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 1.6833186966116074e-08, | |
| "loss": 1.0338494300842285, | |
| "step": 2735, | |
| "token_acc": 0.7046143482654093 | |
| }, | |
| { | |
| "epoch": 0.9190005031024652, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.6162237852790083e-08, | |
| "loss": 1.0125389099121094, | |
| "step": 2740, | |
| "token_acc": 0.7063389391979301 | |
| }, | |
| { | |
| "epoch": 0.9206775113198055, | |
| "grad_norm": 6.625, | |
| "learning_rate": 1.550471401576092e-08, | |
| "loss": 1.014925193786621, | |
| "step": 2745, | |
| "token_acc": 0.7055999106694211 | |
| }, | |
| { | |
| "epoch": 0.9223545195371458, | |
| "grad_norm": 6.25, | |
| "learning_rate": 1.4860633699654957e-08, | |
| "loss": 0.9907565116882324, | |
| "step": 2750, | |
| "token_acc": 0.7117064647641235 | |
| }, | |
| { | |
| "epoch": 0.924031527754486, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 1.4230014776074662e-08, | |
| "loss": 1.0230751037597656, | |
| "step": 2755, | |
| "token_acc": 0.7046689586336919 | |
| }, | |
| { | |
| "epoch": 0.9257085359718262, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 1.3612874743103186e-08, | |
| "loss": 1.012996292114258, | |
| "step": 2760, | |
| "token_acc": 0.7042463088016482 | |
| }, | |
| { | |
| "epoch": 0.9273855441891665, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 1.3009230724818132e-08, | |
| "loss": 1.0435994148254395, | |
| "step": 2765, | |
| "token_acc": 0.6978461885430762 | |
| }, | |
| { | |
| "epoch": 0.9290625524065068, | |
| "grad_norm": 6.59375, | |
| "learning_rate": 1.2419099470816873e-08, | |
| "loss": 1.0455306053161622, | |
| "step": 2770, | |
| "token_acc": 0.6964831804281345 | |
| }, | |
| { | |
| "epoch": 0.9307395606238471, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 1.1842497355751824e-08, | |
| "loss": 1.0471959114074707, | |
| "step": 2775, | |
| "token_acc": 0.697155162643092 | |
| }, | |
| { | |
| "epoch": 0.9324165688411873, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.1279440378875904e-08, | |
| "loss": 1.0397340774536132, | |
| "step": 2780, | |
| "token_acc": 0.6997011952191236 | |
| }, | |
| { | |
| "epoch": 0.9340935770585276, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 1.0729944163598514e-08, | |
| "loss": 1.0325641632080078, | |
| "step": 2785, | |
| "token_acc": 0.6986331569664903 | |
| }, | |
| { | |
| "epoch": 0.9357705852758679, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 1.0194023957052268e-08, | |
| "loss": 1.0409153938293456, | |
| "step": 2790, | |
| "token_acc": 0.6998793311498017 | |
| }, | |
| { | |
| "epoch": 0.9374475934932082, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 9.671694629669768e-09, | |
| "loss": 1.012611484527588, | |
| "step": 2795, | |
| "token_acc": 0.7074814896673666 | |
| }, | |
| { | |
| "epoch": 0.9391246017105483, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 9.162970674771176e-09, | |
| "loss": 1.0336393356323241, | |
| "step": 2800, | |
| "token_acc": 0.7030631260191008 | |
| }, | |
| { | |
| "epoch": 0.9408016099278886, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 8.667866208161678e-09, | |
| "loss": 1.0824786186218263, | |
| "step": 2805, | |
| "token_acc": 0.6870084390607957 | |
| }, | |
| { | |
| "epoch": 0.9424786181452289, | |
| "grad_norm": 5.625, | |
| "learning_rate": 8.186394967740207e-09, | |
| "loss": 1.0423837661743165, | |
| "step": 2810, | |
| "token_acc": 0.6977280590473994 | |
| }, | |
| { | |
| "epoch": 0.9441556263625692, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 7.718570313118067e-09, | |
| "loss": 1.0381958961486817, | |
| "step": 2815, | |
| "token_acc": 0.6993798542958637 | |
| }, | |
| { | |
| "epoch": 0.9458326345799094, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 7.2644052252482934e-09, | |
| "loss": 1.0570508003234864, | |
| "step": 2820, | |
| "token_acc": 0.695105855023494 | |
| }, | |
| { | |
| "epoch": 0.9475096427972497, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 6.823912306065327e-09, | |
| "loss": 1.0608396530151367, | |
| "step": 2825, | |
| "token_acc": 0.6950763644131155 | |
| }, | |
| { | |
| "epoch": 0.94918665101459, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 6.397103778135571e-09, | |
| "loss": 1.0604951858520508, | |
| "step": 2830, | |
| "token_acc": 0.6971572810866038 | |
| }, | |
| { | |
| "epoch": 0.9508636592319303, | |
| "grad_norm": 6.25, | |
| "learning_rate": 5.983991484317996e-09, | |
| "loss": 1.0228150367736817, | |
| "step": 2835, | |
| "token_acc": 0.7006679165690181 | |
| }, | |
| { | |
| "epoch": 0.9525406674492705, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 5.5845868874357385e-09, | |
| "loss": 1.0492300033569335, | |
| "step": 2840, | |
| "token_acc": 0.6925122867880891 | |
| }, | |
| { | |
| "epoch": 0.9542176756666108, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 5.198901069957961e-09, | |
| "loss": 1.023094081878662, | |
| "step": 2845, | |
| "token_acc": 0.7031940482183812 | |
| }, | |
| { | |
| "epoch": 0.955894683883951, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 4.826944733692328e-09, | |
| "loss": 1.06231689453125, | |
| "step": 2850, | |
| "token_acc": 0.6977053349517369 | |
| }, | |
| { | |
| "epoch": 0.9575716921012913, | |
| "grad_norm": 6.0, | |
| "learning_rate": 4.468728199487959e-09, | |
| "loss": 1.0265810012817382, | |
| "step": 2855, | |
| "token_acc": 0.7019859813084112 | |
| }, | |
| { | |
| "epoch": 0.9592487003186315, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 4.1242614069493255e-09, | |
| "loss": 1.0546887397766114, | |
| "step": 2860, | |
| "token_acc": 0.6936714670950714 | |
| }, | |
| { | |
| "epoch": 0.9609257085359718, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 3.793553914160253e-09, | |
| "loss": 1.0320470809936524, | |
| "step": 2865, | |
| "token_acc": 0.7007982541779131 | |
| }, | |
| { | |
| "epoch": 0.9626027167533121, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 3.4766148974185728e-09, | |
| "loss": 1.0567503929138184, | |
| "step": 2870, | |
| "token_acc": 0.6973000742051487 | |
| }, | |
| { | |
| "epoch": 0.9642797249706524, | |
| "grad_norm": 6.0, | |
| "learning_rate": 3.173453150981831e-09, | |
| "loss": 1.027595043182373, | |
| "step": 2875, | |
| "token_acc": 0.7055168843122591 | |
| }, | |
| { | |
| "epoch": 0.9659567331879926, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 2.884077086823089e-09, | |
| "loss": 1.0573992729187012, | |
| "step": 2880, | |
| "token_acc": 0.6938729623383924 | |
| }, | |
| { | |
| "epoch": 0.9676337414053329, | |
| "grad_norm": 6.0, | |
| "learning_rate": 2.608494734397504e-09, | |
| "loss": 0.9933361053466797, | |
| "step": 2885, | |
| "token_acc": 0.7056563259309487 | |
| }, | |
| { | |
| "epoch": 0.9693107496226732, | |
| "grad_norm": 6.28125, | |
| "learning_rate": 2.3467137404195036e-09, | |
| "loss": 1.061786937713623, | |
| "step": 2890, | |
| "token_acc": 0.6947505226874612 | |
| }, | |
| { | |
| "epoch": 0.9709877578400135, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 2.098741368650736e-09, | |
| "loss": 1.0342968940734862, | |
| "step": 2895, | |
| "token_acc": 0.6952941849162331 | |
| }, | |
| { | |
| "epoch": 0.9726647660573536, | |
| "grad_norm": 6.25, | |
| "learning_rate": 1.864584499698507e-09, | |
| "loss": 1.0258048057556153, | |
| "step": 2900, | |
| "token_acc": 0.705042560068958 | |
| }, | |
| { | |
| "epoch": 0.9743417742746939, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 1.6442496308246567e-09, | |
| "loss": 1.0686445236206055, | |
| "step": 2905, | |
| "token_acc": 0.6945458630567882 | |
| }, | |
| { | |
| "epoch": 0.9760187824920342, | |
| "grad_norm": 5.875, | |
| "learning_rate": 1.4377428757655353e-09, | |
| "loss": 1.0087648391723634, | |
| "step": 2910, | |
| "token_acc": 0.7034811903425042 | |
| }, | |
| { | |
| "epoch": 0.9776957907093745, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 1.2450699645621399e-09, | |
| "loss": 1.0127264976501464, | |
| "step": 2915, | |
| "token_acc": 0.7020366250213931 | |
| }, | |
| { | |
| "epoch": 0.9793727989267147, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 1.0662362434013529e-09, | |
| "loss": 1.0034564971923827, | |
| "step": 2920, | |
| "token_acc": 0.7072002837550249 | |
| }, | |
| { | |
| "epoch": 0.981049807144055, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 9.012466744673375e-10, | |
| "loss": 1.0292555809020996, | |
| "step": 2925, | |
| "token_acc": 0.6997058823529412 | |
| }, | |
| { | |
| "epoch": 0.9827268153613953, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 7.50105835804149e-10, | |
| "loss": 1.0281611442565919, | |
| "step": 2930, | |
| "token_acc": 0.700266082329003 | |
| }, | |
| { | |
| "epoch": 0.9844038235787356, | |
| "grad_norm": 6.0, | |
| "learning_rate": 6.128179211884466e-10, | |
| "loss": 1.0283337593078614, | |
| "step": 2935, | |
| "token_acc": 0.7049981796432101 | |
| }, | |
| { | |
| "epoch": 0.9860808317960758, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 4.893867400131979e-10, | |
| "loss": 1.0271111488342286, | |
| "step": 2940, | |
| "token_acc": 0.7001903908612387 | |
| }, | |
| { | |
| "epoch": 0.987757840013416, | |
| "grad_norm": 6.5, | |
| "learning_rate": 3.7981571718204153e-10, | |
| "loss": 0.976413345336914, | |
| "step": 2945, | |
| "token_acc": 0.7121462264150943 | |
| }, | |
| { | |
| "epoch": 0.9894348482307563, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 2.8410789301425155e-10, | |
| "loss": 1.028111457824707, | |
| "step": 2950, | |
| "token_acc": 0.7010195164579085 | |
| }, | |
| { | |
| "epoch": 0.9911118564480966, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 2.022659231602497e-10, | |
| "loss": 1.074041748046875, | |
| "step": 2955, | |
| "token_acc": 0.6909885131855686 | |
| }, | |
| { | |
| "epoch": 0.9927888646654368, | |
| "grad_norm": 6.09375, | |
| "learning_rate": 1.3429207852805324e-10, | |
| "loss": 1.050593662261963, | |
| "step": 2960, | |
| "token_acc": 0.6958233107369342 | |
| }, | |
| { | |
| "epoch": 0.9944658728827771, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 8.018824522032507e-11, | |
| "loss": 1.0603754043579101, | |
| "step": 2965, | |
| "token_acc": 0.6945898778359512 | |
| }, | |
| { | |
| "epoch": 0.9961428811001174, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 3.995592448174934e-11, | |
| "loss": 1.0456673622131347, | |
| "step": 2970, | |
| "token_acc": 0.697628927089508 | |
| }, | |
| { | |
| "epoch": 0.9978198893174577, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 1.359623265767551e-11, | |
| "loss": 1.055088996887207, | |
| "step": 2975, | |
| "token_acc": 0.6971778149708547 | |
| }, | |
| { | |
| "epoch": 0.9994968975347979, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 1.109901163032223e-12, | |
| "loss": 1.0523943901062012, | |
| "step": 2980, | |
| "token_acc": 0.7027149321266968 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2982, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 80, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.11716299871276e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |