| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9995543009953944, |
| "eval_steps": 100, |
| "global_step": 841, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0011885306789481504, |
| "grad_norm": 548.1797235189983, |
| "learning_rate": 1.1627906976744187e-07, |
| "loss": 8.022117614746094, |
| "memory(GiB)": 46.35, |
| "step": 1, |
| "token_acc": 0.16666666666666666, |
| "train_speed(iter/s)": 0.035854 |
| }, |
| { |
| "epoch": 0.005942653394740751, |
| "grad_norm": 517.1879841831781, |
| "learning_rate": 5.813953488372094e-07, |
| "loss": 7.886555194854736, |
| "memory(GiB)": 50.11, |
| "step": 5, |
| "token_acc": 0.14788732394366197, |
| "train_speed(iter/s)": 0.064567 |
| }, |
| { |
| "epoch": 0.011885306789481503, |
| "grad_norm": 295.78882511572357, |
| "learning_rate": 1.1627906976744188e-06, |
| "loss": 6.243068695068359, |
| "memory(GiB)": 50.11, |
| "step": 10, |
| "token_acc": 0.2388888888888889, |
| "train_speed(iter/s)": 0.071958 |
| }, |
| { |
| "epoch": 0.017827960184222256, |
| "grad_norm": 12.276341768278998, |
| "learning_rate": 1.7441860465116282e-06, |
| "loss": 1.707086181640625, |
| "memory(GiB)": 50.11, |
| "step": 15, |
| "token_acc": 0.5085714285714286, |
| "train_speed(iter/s)": 0.074777 |
| }, |
| { |
| "epoch": 0.023770613578963005, |
| "grad_norm": 14.766060462541946, |
| "learning_rate": 2.3255813953488376e-06, |
| "loss": 0.6782948493957519, |
| "memory(GiB)": 50.11, |
| "step": 20, |
| "token_acc": 0.7597765363128491, |
| "train_speed(iter/s)": 0.076407 |
| }, |
| { |
| "epoch": 0.02971326697370376, |
| "grad_norm": 14.972149762033396, |
| "learning_rate": 2.9069767441860468e-06, |
| "loss": 0.5007028579711914, |
| "memory(GiB)": 50.11, |
| "step": 25, |
| "token_acc": 0.7833333333333333, |
| "train_speed(iter/s)": 0.077351 |
| }, |
| { |
| "epoch": 0.03565592036844451, |
| "grad_norm": 14.305194685298874, |
| "learning_rate": 3.4883720930232564e-06, |
| "loss": 0.47702178955078123, |
| "memory(GiB)": 50.11, |
| "step": 30, |
| "token_acc": 0.8111111111111111, |
| "train_speed(iter/s)": 0.078111 |
| }, |
| { |
| "epoch": 0.041598573763185265, |
| "grad_norm": 6.111868397462935, |
| "learning_rate": 4.0697674418604655e-06, |
| "loss": 0.42873945236206057, |
| "memory(GiB)": 50.11, |
| "step": 35, |
| "token_acc": 0.8225806451612904, |
| "train_speed(iter/s)": 0.078557 |
| }, |
| { |
| "epoch": 0.04754122715792601, |
| "grad_norm": 8.991713613954735, |
| "learning_rate": 4.651162790697675e-06, |
| "loss": 0.403080415725708, |
| "memory(GiB)": 50.11, |
| "step": 40, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.078949 |
| }, |
| { |
| "epoch": 0.053483880552666764, |
| "grad_norm": 4.917565148203119, |
| "learning_rate": 4.999922507133964e-06, |
| "loss": 0.37815165519714355, |
| "memory(GiB)": 50.11, |
| "step": 45, |
| "token_acc": 0.8789473684210526, |
| "train_speed(iter/s)": 0.07932 |
| }, |
| { |
| "epoch": 0.05942653394740752, |
| "grad_norm": 11.989491241774688, |
| "learning_rate": 4.999050767562379e-06, |
| "loss": 0.3381024360656738, |
| "memory(GiB)": 50.11, |
| "step": 50, |
| "token_acc": 0.8735632183908046, |
| "train_speed(iter/s)": 0.079547 |
| }, |
| { |
| "epoch": 0.06536918734214826, |
| "grad_norm": 6.777839360520193, |
| "learning_rate": 4.99721076122146e-06, |
| "loss": 0.3407719135284424, |
| "memory(GiB)": 50.11, |
| "step": 55, |
| "token_acc": 0.8579234972677595, |
| "train_speed(iter/s)": 0.079765 |
| }, |
| { |
| "epoch": 0.07131184073688902, |
| "grad_norm": 6.139296584815925, |
| "learning_rate": 4.994403201028695e-06, |
| "loss": 0.33808121681213377, |
| "memory(GiB)": 50.11, |
| "step": 60, |
| "token_acc": 0.8743455497382199, |
| "train_speed(iter/s)": 0.079935 |
| }, |
| { |
| "epoch": 0.07725449413162977, |
| "grad_norm": 5.890787284633843, |
| "learning_rate": 4.990629174784009e-06, |
| "loss": 0.3293055295944214, |
| "memory(GiB)": 50.11, |
| "step": 65, |
| "token_acc": 0.898936170212766, |
| "train_speed(iter/s)": 0.080067 |
| }, |
| { |
| "epoch": 0.08319714752637053, |
| "grad_norm": 10.931767446162842, |
| "learning_rate": 4.9858901447482924e-06, |
| "loss": 0.3187825679779053, |
| "memory(GiB)": 50.11, |
| "step": 70, |
| "token_acc": 0.8478260869565217, |
| "train_speed(iter/s)": 0.080169 |
| }, |
| { |
| "epoch": 0.08913980092111128, |
| "grad_norm": 10.824836091859028, |
| "learning_rate": 4.980187947076841e-06, |
| "loss": 0.34889607429504393, |
| "memory(GiB)": 50.11, |
| "step": 75, |
| "token_acc": 0.8495145631067961, |
| "train_speed(iter/s)": 0.08027 |
| }, |
| { |
| "epoch": 0.09508245431585202, |
| "grad_norm": 6.620646988752802, |
| "learning_rate": 4.973524791107931e-06, |
| "loss": 0.3007768154144287, |
| "memory(GiB)": 50.11, |
| "step": 80, |
| "token_acc": 0.8418079096045198, |
| "train_speed(iter/s)": 0.080366 |
| }, |
| { |
| "epoch": 0.10102510771059278, |
| "grad_norm": 9.519794919805669, |
| "learning_rate": 4.965903258506806e-06, |
| "loss": 0.34135324954986573, |
| "memory(GiB)": 50.11, |
| "step": 85, |
| "token_acc": 0.8839779005524862, |
| "train_speed(iter/s)": 0.080443 |
| }, |
| { |
| "epoch": 0.10696776110533353, |
| "grad_norm": 8.719690644097742, |
| "learning_rate": 4.957326302265395e-06, |
| "loss": 0.31029839515686036, |
| "memory(GiB)": 50.11, |
| "step": 90, |
| "token_acc": 0.8924731182795699, |
| "train_speed(iter/s)": 0.080511 |
| }, |
| { |
| "epoch": 0.11291041450007429, |
| "grad_norm": 7.358059720333543, |
| "learning_rate": 4.947797245558168e-06, |
| "loss": 0.3377894639968872, |
| "memory(GiB)": 50.11, |
| "step": 95, |
| "token_acc": 0.8789473684210526, |
| "train_speed(iter/s)": 0.080597 |
| }, |
| { |
| "epoch": 0.11885306789481503, |
| "grad_norm": 4.938431225494223, |
| "learning_rate": 4.937319780454559e-06, |
| "loss": 0.31557579040527345, |
| "memory(GiB)": 50.11, |
| "step": 100, |
| "token_acc": 0.8663101604278075, |
| "train_speed(iter/s)": 0.080572 |
| }, |
| { |
| "epoch": 0.12479572128955578, |
| "grad_norm": 7.518639654198759, |
| "learning_rate": 4.9258979664884595e-06, |
| "loss": 0.3270684242248535, |
| "memory(GiB)": 50.11, |
| "step": 105, |
| "token_acc": 0.8390804597701149, |
| "train_speed(iter/s)": 0.077371 |
| }, |
| { |
| "epoch": 0.13073837468429653, |
| "grad_norm": 5.275542865372311, |
| "learning_rate": 4.9135362290853365e-06, |
| "loss": 0.3212160348892212, |
| "memory(GiB)": 50.11, |
| "step": 110, |
| "token_acc": 0.8793103448275862, |
| "train_speed(iter/s)": 0.077562 |
| }, |
| { |
| "epoch": 0.1366810280790373, |
| "grad_norm": 5.256523302626162, |
| "learning_rate": 4.900239357847582e-06, |
| "loss": 0.32684850692749023, |
| "memory(GiB)": 50.11, |
| "step": 115, |
| "token_acc": 0.8659217877094972, |
| "train_speed(iter/s)": 0.077745 |
| }, |
| { |
| "epoch": 0.14262368147377805, |
| "grad_norm": 6.9956864362318925, |
| "learning_rate": 4.886012504698769e-06, |
| "loss": 0.3396021842956543, |
| "memory(GiB)": 50.11, |
| "step": 120, |
| "token_acc": 0.8314606741573034, |
| "train_speed(iter/s)": 0.077907 |
| }, |
| { |
| "epoch": 0.1485663348685188, |
| "grad_norm": 8.220091525088554, |
| "learning_rate": 4.870861181887514e-06, |
| "loss": 0.2878549575805664, |
| "memory(GiB)": 50.11, |
| "step": 125, |
| "token_acc": 0.867816091954023, |
| "train_speed(iter/s)": 0.078077 |
| }, |
| { |
| "epoch": 0.15450898826325954, |
| "grad_norm": 9.001408898309402, |
| "learning_rate": 4.854791259851735e-06, |
| "loss": 0.28747098445892333, |
| "memory(GiB)": 50.11, |
| "step": 130, |
| "token_acc": 0.9010416666666666, |
| "train_speed(iter/s)": 0.078215 |
| }, |
| { |
| "epoch": 0.1604516416580003, |
| "grad_norm": 9.115366763265595, |
| "learning_rate": 4.8378089649441355e-06, |
| "loss": 0.37898375988006594, |
| "memory(GiB)": 50.11, |
| "step": 135, |
| "token_acc": 0.8135593220338984, |
| "train_speed(iter/s)": 0.078346 |
| }, |
| { |
| "epoch": 0.16639429505274106, |
| "grad_norm": 6.196127668973169, |
| "learning_rate": 4.819920877019767e-06, |
| "loss": 0.3501492977142334, |
| "memory(GiB)": 50.11, |
| "step": 140, |
| "token_acc": 0.824468085106383, |
| "train_speed(iter/s)": 0.078454 |
| }, |
| { |
| "epoch": 0.1723369484474818, |
| "grad_norm": 6.007338381318857, |
| "learning_rate": 4.8011339268866505e-06, |
| "loss": 0.322367000579834, |
| "memory(GiB)": 50.11, |
| "step": 145, |
| "token_acc": 0.8932584269662921, |
| "train_speed(iter/s)": 0.07857 |
| }, |
| { |
| "epoch": 0.17827960184222255, |
| "grad_norm": 4.125624729653197, |
| "learning_rate": 4.781455393620407e-06, |
| "loss": 0.3071049690246582, |
| "memory(GiB)": 50.11, |
| "step": 150, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.078684 |
| }, |
| { |
| "epoch": 0.1842222552369633, |
| "grad_norm": 6.3546672576711085, |
| "learning_rate": 4.760892901743944e-06, |
| "loss": 0.28942744731903075, |
| "memory(GiB)": 50.11, |
| "step": 155, |
| "token_acc": 0.868020304568528, |
| "train_speed(iter/s)": 0.078783 |
| }, |
| { |
| "epoch": 0.19016490863170404, |
| "grad_norm": 6.332293006494747, |
| "learning_rate": 4.739454418273314e-06, |
| "loss": 0.2908078908920288, |
| "memory(GiB)": 50.11, |
| "step": 160, |
| "token_acc": 0.8160919540229885, |
| "train_speed(iter/s)": 0.07888 |
| }, |
| { |
| "epoch": 0.1961075620264448, |
| "grad_norm": 7.334991420256453, |
| "learning_rate": 4.717148249630859e-06, |
| "loss": 0.30244882106781007, |
| "memory(GiB)": 50.11, |
| "step": 165, |
| "token_acc": 0.9, |
| "train_speed(iter/s)": 0.078938 |
| }, |
| { |
| "epoch": 0.20205021542118556, |
| "grad_norm": 5.66996373194292, |
| "learning_rate": 4.693983038426857e-06, |
| "loss": 0.26120476722717284, |
| "memory(GiB)": 50.11, |
| "step": 170, |
| "token_acc": 0.9022988505747126, |
| "train_speed(iter/s)": 0.079018 |
| }, |
| { |
| "epoch": 0.20799286881592632, |
| "grad_norm": 5.485146565073312, |
| "learning_rate": 4.669967760110908e-06, |
| "loss": 0.3136150360107422, |
| "memory(GiB)": 50.11, |
| "step": 175, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.079091 |
| }, |
| { |
| "epoch": 0.21393552221066706, |
| "grad_norm": 4.790229015417923, |
| "learning_rate": 4.645111719494363e-06, |
| "loss": 0.31197218894958495, |
| "memory(GiB)": 50.11, |
| "step": 180, |
| "token_acc": 0.8707865168539326, |
| "train_speed(iter/s)": 0.079157 |
| }, |
| { |
| "epoch": 0.21987817560540782, |
| "grad_norm": 3.9788813029239924, |
| "learning_rate": 4.6194245471451395e-06, |
| "loss": 0.29698636531829836, |
| "memory(GiB)": 50.11, |
| "step": 185, |
| "token_acc": 0.8829787234042553, |
| "train_speed(iter/s)": 0.079217 |
| }, |
| { |
| "epoch": 0.22582082900014858, |
| "grad_norm": 5.38043307053944, |
| "learning_rate": 4.592916195656322e-06, |
| "loss": 0.3022609233856201, |
| "memory(GiB)": 50.11, |
| "step": 190, |
| "token_acc": 0.8870967741935484, |
| "train_speed(iter/s)": 0.079283 |
| }, |
| { |
| "epoch": 0.2317634823948893, |
| "grad_norm": 5.706060180746966, |
| "learning_rate": 4.565596935789987e-06, |
| "loss": 0.28669142723083496, |
| "memory(GiB)": 50.11, |
| "step": 195, |
| "token_acc": 0.8837209302325582, |
| "train_speed(iter/s)": 0.079339 |
| }, |
| { |
| "epoch": 0.23770613578963007, |
| "grad_norm": 4.399286357383267, |
| "learning_rate": 4.537477352497766e-06, |
| "loss": 0.27329254150390625, |
| "memory(GiB)": 50.11, |
| "step": 200, |
| "token_acc": 0.9301075268817204, |
| "train_speed(iter/s)": 0.079333 |
| }, |
| { |
| "epoch": 0.24364878918437083, |
| "grad_norm": 2.7396674582431135, |
| "learning_rate": 4.508568340819654e-06, |
| "loss": 0.30466504096984864, |
| "memory(GiB)": 50.11, |
| "step": 205, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.077513 |
| }, |
| { |
| "epoch": 0.24959144257911156, |
| "grad_norm": 6.688114488293933, |
| "learning_rate": 4.478881101662694e-06, |
| "loss": 0.2621138095855713, |
| "memory(GiB)": 50.11, |
| "step": 210, |
| "token_acc": 0.8810810810810811, |
| "train_speed(iter/s)": 0.077595 |
| }, |
| { |
| "epoch": 0.25553409597385235, |
| "grad_norm": 5.255888806481665, |
| "learning_rate": 4.44842713746114e-06, |
| "loss": 0.28530049324035645, |
| "memory(GiB)": 50.11, |
| "step": 215, |
| "token_acc": 0.9065934065934066, |
| "train_speed(iter/s)": 0.07769 |
| }, |
| { |
| "epoch": 0.26147674936859305, |
| "grad_norm": 3.91726058342407, |
| "learning_rate": 4.417218247719794e-06, |
| "loss": 0.31370389461517334, |
| "memory(GiB)": 50.11, |
| "step": 220, |
| "token_acc": 0.9130434782608695, |
| "train_speed(iter/s)": 0.077784 |
| }, |
| { |
| "epoch": 0.2674194027633338, |
| "grad_norm": 5.422963885256581, |
| "learning_rate": 4.385266524442241e-06, |
| "loss": 0.2774806499481201, |
| "memory(GiB)": 50.11, |
| "step": 225, |
| "token_acc": 0.8681318681318682, |
| "train_speed(iter/s)": 0.077873 |
| }, |
| { |
| "epoch": 0.2733620561580746, |
| "grad_norm": 6.722879151062824, |
| "learning_rate": 4.352584347445761e-06, |
| "loss": 0.2985124111175537, |
| "memory(GiB)": 50.11, |
| "step": 230, |
| "token_acc": 0.8736263736263736, |
| "train_speed(iter/s)": 0.077946 |
| }, |
| { |
| "epoch": 0.27930470955281533, |
| "grad_norm": 6.4224195510449285, |
| "learning_rate": 4.319184379564716e-06, |
| "loss": 0.28224008083343505, |
| "memory(GiB)": 50.11, |
| "step": 235, |
| "token_acc": 0.8839779005524862, |
| "train_speed(iter/s)": 0.077999 |
| }, |
| { |
| "epoch": 0.2852473629475561, |
| "grad_norm": 10.728017343540358, |
| "learning_rate": 4.285079561744292e-06, |
| "loss": 0.315199875831604, |
| "memory(GiB)": 50.11, |
| "step": 240, |
| "token_acc": 0.86, |
| "train_speed(iter/s)": 0.07806 |
| }, |
| { |
| "epoch": 0.29119001634229685, |
| "grad_norm": 3.5104593820561916, |
| "learning_rate": 4.250283108026474e-06, |
| "loss": 0.28578615188598633, |
| "memory(GiB)": 50.11, |
| "step": 245, |
| "token_acc": 0.8715083798882681, |
| "train_speed(iter/s)": 0.078121 |
| }, |
| { |
| "epoch": 0.2971326697370376, |
| "grad_norm": 12.24592817826425, |
| "learning_rate": 4.2148085004302205e-06, |
| "loss": 0.3042537927627563, |
| "memory(GiB)": 50.11, |
| "step": 250, |
| "token_acc": 0.8556149732620321, |
| "train_speed(iter/s)": 0.078192 |
| }, |
| { |
| "epoch": 0.3030753231317783, |
| "grad_norm": 5.395962132144766, |
| "learning_rate": 4.178669483727803e-06, |
| "loss": 0.32353897094726564, |
| "memory(GiB)": 50.11, |
| "step": 255, |
| "token_acc": 0.9037433155080213, |
| "train_speed(iter/s)": 0.078259 |
| }, |
| { |
| "epoch": 0.3090179765265191, |
| "grad_norm": 10.569254917118544, |
| "learning_rate": 4.141880060119336e-06, |
| "loss": 0.2870334148406982, |
| "memory(GiB)": 50.11, |
| "step": 260, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.078334 |
| }, |
| { |
| "epoch": 0.31496062992125984, |
| "grad_norm": 8.563532881846635, |
| "learning_rate": 4.104454483807579e-06, |
| "loss": 0.2994786262512207, |
| "memory(GiB)": 50.11, |
| "step": 265, |
| "token_acc": 0.9144385026737968, |
| "train_speed(iter/s)": 0.078387 |
| }, |
| { |
| "epoch": 0.3209032833160006, |
| "grad_norm": 6.750118253503608, |
| "learning_rate": 4.066407255475086e-06, |
| "loss": 0.2697244644165039, |
| "memory(GiB)": 50.11, |
| "step": 270, |
| "token_acc": 0.8956043956043956, |
| "train_speed(iter/s)": 0.078439 |
| }, |
| { |
| "epoch": 0.32684593671074136, |
| "grad_norm": 3.934344456524906, |
| "learning_rate": 4.027753116665859e-06, |
| "loss": 0.2338794946670532, |
| "memory(GiB)": 50.11, |
| "step": 275, |
| "token_acc": 0.9405405405405406, |
| "train_speed(iter/s)": 0.078498 |
| }, |
| { |
| "epoch": 0.3327885901054821, |
| "grad_norm": 5.970848924895414, |
| "learning_rate": 3.988507044073687e-06, |
| "loss": 0.28338768482208254, |
| "memory(GiB)": 50.11, |
| "step": 280, |
| "token_acc": 0.8804347826086957, |
| "train_speed(iter/s)": 0.078559 |
| }, |
| { |
| "epoch": 0.3387312435002228, |
| "grad_norm": 3.9146270439627364, |
| "learning_rate": 3.948684243739366e-06, |
| "loss": 0.2838298797607422, |
| "memory(GiB)": 50.11, |
| "step": 285, |
| "token_acc": 0.8797814207650273, |
| "train_speed(iter/s)": 0.078615 |
| }, |
| { |
| "epoch": 0.3446738968949636, |
| "grad_norm": 9.327297524134098, |
| "learning_rate": 3.908300145159055e-06, |
| "loss": 0.2870032787322998, |
| "memory(GiB)": 50.11, |
| "step": 290, |
| "token_acc": 0.8707865168539326, |
| "train_speed(iter/s)": 0.078663 |
| }, |
| { |
| "epoch": 0.35061655028970434, |
| "grad_norm": 6.335507218560851, |
| "learning_rate": 3.8673703953060685e-06, |
| "loss": 0.278945779800415, |
| "memory(GiB)": 50.11, |
| "step": 295, |
| "token_acc": 0.8994413407821229, |
| "train_speed(iter/s)": 0.078706 |
| }, |
| { |
| "epoch": 0.3565592036844451, |
| "grad_norm": 3.6244644464198563, |
| "learning_rate": 3.8259108525683854e-06, |
| "loss": 0.2880126953125, |
| "memory(GiB)": 50.11, |
| "step": 300, |
| "token_acc": 0.8770949720670391, |
| "train_speed(iter/s)": 0.07875 |
| }, |
| { |
| "epoch": 0.36250185707918586, |
| "grad_norm": 9.73665571895264, |
| "learning_rate": 3.7839375806042672e-06, |
| "loss": 0.3021416187286377, |
| "memory(GiB)": 50.11, |
| "step": 305, |
| "token_acc": 0.8829787234042553, |
| "train_speed(iter/s)": 0.077565 |
| }, |
| { |
| "epoch": 0.3684445104739266, |
| "grad_norm": 4.111562074991708, |
| "learning_rate": 3.741466842118327e-06, |
| "loss": 0.24168176651000978, |
| "memory(GiB)": 50.11, |
| "step": 310, |
| "token_acc": 0.9135135135135135, |
| "train_speed(iter/s)": 0.077626 |
| }, |
| { |
| "epoch": 0.3743871638686674, |
| "grad_norm": 5.628251208959277, |
| "learning_rate": 3.698515092560481e-06, |
| "loss": 0.28665938377380373, |
| "memory(GiB)": 50.11, |
| "step": 315, |
| "token_acc": 0.8978494623655914, |
| "train_speed(iter/s)": 0.07769 |
| }, |
| { |
| "epoch": 0.3803298172634081, |
| "grad_norm": 3.2364463841026265, |
| "learning_rate": 3.655098973750223e-06, |
| "loss": 0.2696410894393921, |
| "memory(GiB)": 50.11, |
| "step": 320, |
| "token_acc": 0.9034090909090909, |
| "train_speed(iter/s)": 0.077754 |
| }, |
| { |
| "epoch": 0.38627247065814885, |
| "grad_norm": 6.612958655073718, |
| "learning_rate": 3.61123530742869e-06, |
| "loss": 0.26622281074523924, |
| "memory(GiB)": 50.11, |
| "step": 325, |
| "token_acc": 0.9080459770114943, |
| "train_speed(iter/s)": 0.077814 |
| }, |
| { |
| "epoch": 0.3922151240528896, |
| "grad_norm": 3.3735858832830448, |
| "learning_rate": 3.5669410887410095e-06, |
| "loss": 0.28887372016906737, |
| "memory(GiB)": 50.11, |
| "step": 330, |
| "token_acc": 0.9055555555555556, |
| "train_speed(iter/s)": 0.077876 |
| }, |
| { |
| "epoch": 0.39815777744763037, |
| "grad_norm": 2.3741384793834066, |
| "learning_rate": 3.5222334796514724e-06, |
| "loss": 0.27968392372131345, |
| "memory(GiB)": 50.11, |
| "step": 335, |
| "token_acc": 0.88268156424581, |
| "train_speed(iter/s)": 0.077927 |
| }, |
| { |
| "epoch": 0.40410043084237113, |
| "grad_norm": 4.628193650632556, |
| "learning_rate": 3.477129802294057e-06, |
| "loss": 0.3233078956604004, |
| "memory(GiB)": 50.11, |
| "step": 340, |
| "token_acc": 0.8111111111111111, |
| "train_speed(iter/s)": 0.077985 |
| }, |
| { |
| "epoch": 0.4100430842371119, |
| "grad_norm": 5.86802322326311, |
| "learning_rate": 3.431647532260908e-06, |
| "loss": 0.2708756923675537, |
| "memory(GiB)": 50.11, |
| "step": 345, |
| "token_acc": 0.9037433155080213, |
| "train_speed(iter/s)": 0.078049 |
| }, |
| { |
| "epoch": 0.41598573763185265, |
| "grad_norm": 3.7680564123531686, |
| "learning_rate": 3.385804291831347e-06, |
| "loss": 0.28063795566558836, |
| "memory(GiB)": 50.11, |
| "step": 350, |
| "token_acc": 0.8846153846153846, |
| "train_speed(iter/s)": 0.078093 |
| }, |
| { |
| "epoch": 0.42192839102659335, |
| "grad_norm": 3.5932738811910006, |
| "learning_rate": 3.3396178431440572e-06, |
| "loss": 0.25298926830291746, |
| "memory(GiB)": 50.11, |
| "step": 355, |
| "token_acc": 0.9010989010989011, |
| "train_speed(iter/s)": 0.078147 |
| }, |
| { |
| "epoch": 0.4278710444213341, |
| "grad_norm": 8.287812705069305, |
| "learning_rate": 3.2931060813150685e-06, |
| "loss": 0.27823238372802733, |
| "memory(GiB)": 50.11, |
| "step": 360, |
| "token_acc": 0.8804347826086957, |
| "train_speed(iter/s)": 0.0782 |
| }, |
| { |
| "epoch": 0.4338136978160749, |
| "grad_norm": 5.939778148959477, |
| "learning_rate": 3.246287027504237e-06, |
| "loss": 0.23206405639648436, |
| "memory(GiB)": 50.11, |
| "step": 365, |
| "token_acc": 0.90625, |
| "train_speed(iter/s)": 0.07825 |
| }, |
| { |
| "epoch": 0.43975635121081563, |
| "grad_norm": 3.9285461219783153, |
| "learning_rate": 3.1991788219328657e-06, |
| "loss": 0.24766221046447753, |
| "memory(GiB)": 50.11, |
| "step": 370, |
| "token_acc": 0.9069767441860465, |
| "train_speed(iter/s)": 0.078298 |
| }, |
| { |
| "epoch": 0.4456990046055564, |
| "grad_norm": 3.484393370769258, |
| "learning_rate": 3.151799716855215e-06, |
| "loss": 0.2592806339263916, |
| "memory(GiB)": 50.11, |
| "step": 375, |
| "token_acc": 0.8716577540106952, |
| "train_speed(iter/s)": 0.078342 |
| }, |
| { |
| "epoch": 0.45164165800029715, |
| "grad_norm": 3.7696986787730387, |
| "learning_rate": 3.1041680694865937e-06, |
| "loss": 0.24771764278411865, |
| "memory(GiB)": 50.11, |
| "step": 380, |
| "token_acc": 0.8651685393258427, |
| "train_speed(iter/s)": 0.078379 |
| }, |
| { |
| "epoch": 0.45758431139503786, |
| "grad_norm": 4.392389040330689, |
| "learning_rate": 3.056302334890786e-06, |
| "loss": 0.2508379936218262, |
| "memory(GiB)": 50.11, |
| "step": 385, |
| "token_acc": 0.9011627906976745, |
| "train_speed(iter/s)": 0.078418 |
| }, |
| { |
| "epoch": 0.4635269647897786, |
| "grad_norm": 5.433260978821099, |
| "learning_rate": 3.0082210588295673e-06, |
| "loss": 0.25749917030334474, |
| "memory(GiB)": 50.11, |
| "step": 390, |
| "token_acc": 0.9, |
| "train_speed(iter/s)": 0.078455 |
| }, |
| { |
| "epoch": 0.4694696181845194, |
| "grad_norm": 9.046890762181722, |
| "learning_rate": 2.9599428705770773e-06, |
| "loss": 0.24419078826904297, |
| "memory(GiB)": 50.11, |
| "step": 395, |
| "token_acc": 0.918918918918919, |
| "train_speed(iter/s)": 0.078496 |
| }, |
| { |
| "epoch": 0.47541227157926014, |
| "grad_norm": 5.91365665819666, |
| "learning_rate": 2.911486475701835e-06, |
| "loss": 0.2059952735900879, |
| "memory(GiB)": 50.11, |
| "step": 400, |
| "token_acc": 0.8932584269662921, |
| "train_speed(iter/s)": 0.078535 |
| }, |
| { |
| "epoch": 0.4813549249740009, |
| "grad_norm": 5.755994095586612, |
| "learning_rate": 2.8628706488191994e-06, |
| "loss": 0.24112114906311036, |
| "memory(GiB)": 50.11, |
| "step": 405, |
| "token_acc": 0.9028571428571428, |
| "train_speed(iter/s)": 0.077697 |
| }, |
| { |
| "epoch": 0.48729757836874166, |
| "grad_norm": 5.029325178235288, |
| "learning_rate": 2.814114226317066e-06, |
| "loss": 0.2753992795944214, |
| "memory(GiB)": 50.11, |
| "step": 410, |
| "token_acc": 0.8693181818181818, |
| "train_speed(iter/s)": 0.077745 |
| }, |
| { |
| "epoch": 0.4932402317634824, |
| "grad_norm": 4.672471465609193, |
| "learning_rate": 2.7652360990576457e-06, |
| "loss": 0.25846428871154786, |
| "memory(GiB)": 50.11, |
| "step": 415, |
| "token_acc": 0.9080459770114943, |
| "train_speed(iter/s)": 0.07779 |
| }, |
| { |
| "epoch": 0.4991828851582231, |
| "grad_norm": 3.7971655368107315, |
| "learning_rate": 2.7162552050581172e-06, |
| "loss": 0.2129420280456543, |
| "memory(GiB)": 50.11, |
| "step": 420, |
| "token_acc": 0.9447513812154696, |
| "train_speed(iter/s)": 0.077839 |
| }, |
| { |
| "epoch": 0.5051255385529639, |
| "grad_norm": 4.868089805000437, |
| "learning_rate": 2.6671905221530286e-06, |
| "loss": 0.2723516941070557, |
| "memory(GiB)": 50.11, |
| "step": 425, |
| "token_acc": 0.8895027624309392, |
| "train_speed(iter/s)": 0.077893 |
| }, |
| { |
| "epoch": 0.5110681919477047, |
| "grad_norm": 5.948903426193995, |
| "learning_rate": 2.6180610606412587e-06, |
| "loss": 0.28262810707092284, |
| "memory(GiB)": 50.11, |
| "step": 430, |
| "token_acc": 0.8497109826589595, |
| "train_speed(iter/s)": 0.077943 |
| }, |
| { |
| "epoch": 0.5170108453424453, |
| "grad_norm": 5.296623912717331, |
| "learning_rate": 2.5688858559204056e-06, |
| "loss": 0.24843716621398926, |
| "memory(GiB)": 50.11, |
| "step": 435, |
| "token_acc": 0.9096045197740112, |
| "train_speed(iter/s)": 0.077992 |
| }, |
| { |
| "epoch": 0.5229534987371861, |
| "grad_norm": 2.8799847075968446, |
| "learning_rate": 2.519683961111447e-06, |
| "loss": 0.2587742805480957, |
| "memory(GiB)": 50.11, |
| "step": 440, |
| "token_acc": 0.9010989010989011, |
| "train_speed(iter/s)": 0.078034 |
| }, |
| { |
| "epoch": 0.5288961521319269, |
| "grad_norm": 4.4925409006318615, |
| "learning_rate": 2.470474439676539e-06, |
| "loss": 0.24898104667663573, |
| "memory(GiB)": 50.11, |
| "step": 445, |
| "token_acc": 0.8641304347826086, |
| "train_speed(iter/s)": 0.078071 |
| }, |
| { |
| "epoch": 0.5348388055266676, |
| "grad_norm": 3.120530995149119, |
| "learning_rate": 2.4212763580328026e-06, |
| "loss": 0.27778019905090334, |
| "memory(GiB)": 50.11, |
| "step": 450, |
| "token_acc": 0.9180327868852459, |
| "train_speed(iter/s)": 0.078105 |
| }, |
| { |
| "epoch": 0.5407814589214084, |
| "grad_norm": 8.148649805708308, |
| "learning_rate": 2.3721087781649677e-06, |
| "loss": 0.26514854431152346, |
| "memory(GiB)": 50.11, |
| "step": 455, |
| "token_acc": 0.8941176470588236, |
| "train_speed(iter/s)": 0.078143 |
| }, |
| { |
| "epoch": 0.5467241123161491, |
| "grad_norm": 12.288778203336234, |
| "learning_rate": 2.322990750239733e-06, |
| "loss": 0.2430635690689087, |
| "memory(GiB)": 50.11, |
| "step": 460, |
| "token_acc": 0.9080459770114943, |
| "train_speed(iter/s)": 0.078177 |
| }, |
| { |
| "epoch": 0.5526667657108899, |
| "grad_norm": 4.975433909870037, |
| "learning_rate": 2.2739413052247112e-06, |
| "loss": 0.23356072902679442, |
| "memory(GiB)": 50.11, |
| "step": 465, |
| "token_acc": 0.9028571428571428, |
| "train_speed(iter/s)": 0.078208 |
| }, |
| { |
| "epoch": 0.5586094191056307, |
| "grad_norm": 4.123700733275386, |
| "learning_rate": 2.224979447514802e-06, |
| "loss": 0.2492293357849121, |
| "memory(GiB)": 50.11, |
| "step": 470, |
| "token_acc": 0.8983050847457628, |
| "train_speed(iter/s)": 0.078242 |
| }, |
| { |
| "epoch": 0.5645520725003714, |
| "grad_norm": 8.285141746199841, |
| "learning_rate": 2.1761241475688697e-06, |
| "loss": 0.23113720417022704, |
| "memory(GiB)": 50.11, |
| "step": 475, |
| "token_acc": 0.9135135135135135, |
| "train_speed(iter/s)": 0.078271 |
| }, |
| { |
| "epoch": 0.5704947258951122, |
| "grad_norm": 3.60546896428184, |
| "learning_rate": 2.1273943345595637e-06, |
| "loss": 0.247955584526062, |
| "memory(GiB)": 50.11, |
| "step": 480, |
| "token_acc": 0.872093023255814, |
| "train_speed(iter/s)": 0.078303 |
| }, |
| { |
| "epoch": 0.576437379289853, |
| "grad_norm": 7.008401141324143, |
| "learning_rate": 2.078808889039145e-06, |
| "loss": 0.2634110927581787, |
| "memory(GiB)": 50.11, |
| "step": 485, |
| "token_acc": 0.898936170212766, |
| "train_speed(iter/s)": 0.07833 |
| }, |
| { |
| "epoch": 0.5823800326845937, |
| "grad_norm": 3.131382846950792, |
| "learning_rate": 2.030386635624135e-06, |
| "loss": 0.23566818237304688, |
| "memory(GiB)": 50.11, |
| "step": 490, |
| "token_acc": 0.9047619047619048, |
| "train_speed(iter/s)": 0.078358 |
| }, |
| { |
| "epoch": 0.5883226860793345, |
| "grad_norm": 4.264889192430325, |
| "learning_rate": 1.9821463357016517e-06, |
| "loss": 0.23806171417236327, |
| "memory(GiB)": 50.11, |
| "step": 495, |
| "token_acc": 0.896551724137931, |
| "train_speed(iter/s)": 0.078392 |
| }, |
| { |
| "epoch": 0.5942653394740752, |
| "grad_norm": 3.2931924737171885, |
| "learning_rate": 1.934106680160237e-06, |
| "loss": 0.21435232162475587, |
| "memory(GiB)": 50.11, |
| "step": 500, |
| "token_acc": 0.8956043956043956, |
| "train_speed(iter/s)": 0.078424 |
| }, |
| { |
| "epoch": 0.6002079928688159, |
| "grad_norm": 8.47554455420946, |
| "learning_rate": 1.8862862821480023e-06, |
| "loss": 0.23268427848815917, |
| "memory(GiB)": 50.11, |
| "step": 505, |
| "token_acc": 0.8950276243093923, |
| "train_speed(iter/s)": 0.077754 |
| }, |
| { |
| "epoch": 0.6061506462635566, |
| "grad_norm": 4.327104476328467, |
| "learning_rate": 1.8387036698608893e-06, |
| "loss": 0.2465203285217285, |
| "memory(GiB)": 50.11, |
| "step": 510, |
| "token_acc": 0.8944444444444445, |
| "train_speed(iter/s)": 0.077791 |
| }, |
| { |
| "epoch": 0.6120932996582974, |
| "grad_norm": 4.784491787188988, |
| "learning_rate": 1.7913772793638517e-06, |
| "loss": 0.22468171119689942, |
| "memory(GiB)": 50.11, |
| "step": 515, |
| "token_acc": 0.8900523560209425, |
| "train_speed(iter/s)": 0.077832 |
| }, |
| { |
| "epoch": 0.6180359530530382, |
| "grad_norm": 7.681035665569222, |
| "learning_rate": 1.7443254474477328e-06, |
| "loss": 0.25678319931030275, |
| "memory(GiB)": 50.11, |
| "step": 520, |
| "token_acc": 0.9044943820224719, |
| "train_speed(iter/s)": 0.077861 |
| }, |
| { |
| "epoch": 0.6239786064477789, |
| "grad_norm": 4.916037468988723, |
| "learning_rate": 1.697566404524606e-06, |
| "loss": 0.23521194458007813, |
| "memory(GiB)": 50.11, |
| "step": 525, |
| "token_acc": 0.9209039548022598, |
| "train_speed(iter/s)": 0.077894 |
| }, |
| { |
| "epoch": 0.6299212598425197, |
| "grad_norm": 4.319245744348972, |
| "learning_rate": 1.6511182675643273e-06, |
| "loss": 0.2768810272216797, |
| "memory(GiB)": 50.11, |
| "step": 530, |
| "token_acc": 0.9010989010989011, |
| "train_speed(iter/s)": 0.077931 |
| }, |
| { |
| "epoch": 0.6358639132372604, |
| "grad_norm": 4.03213626434002, |
| "learning_rate": 1.6049990330750508e-06, |
| "loss": 0.24452097415924073, |
| "memory(GiB)": 50.11, |
| "step": 535, |
| "token_acc": 0.8924731182795699, |
| "train_speed(iter/s)": 0.077966 |
| }, |
| { |
| "epoch": 0.6418065666320012, |
| "grad_norm": 4.953556314587903, |
| "learning_rate": 1.5592265701304116e-06, |
| "loss": 0.23774099349975586, |
| "memory(GiB)": 50.11, |
| "step": 540, |
| "token_acc": 0.8789473684210526, |
| "train_speed(iter/s)": 0.077993 |
| }, |
| { |
| "epoch": 0.647749220026742, |
| "grad_norm": 3.899829991625053, |
| "learning_rate": 1.5138186134460847e-06, |
| "loss": 0.22564327716827393, |
| "memory(GiB)": 50.11, |
| "step": 545, |
| "token_acc": 0.925531914893617, |
| "train_speed(iter/s)": 0.07802 |
| }, |
| { |
| "epoch": 0.6536918734214827, |
| "grad_norm": 3.0465339812240715, |
| "learning_rate": 1.4687927565084023e-06, |
| "loss": 0.22194738388061525, |
| "memory(GiB)": 50.11, |
| "step": 550, |
| "token_acc": 0.9021739130434783, |
| "train_speed(iter/s)": 0.078047 |
| }, |
| { |
| "epoch": 0.6596345268162235, |
| "grad_norm": 6.803712947943455, |
| "learning_rate": 1.4241664447576876e-06, |
| "loss": 0.23829455375671388, |
| "memory(GiB)": 50.11, |
| "step": 555, |
| "token_acc": 0.8791208791208791, |
| "train_speed(iter/s)": 0.078089 |
| }, |
| { |
| "epoch": 0.6655771802109642, |
| "grad_norm": 5.769102045042221, |
| "learning_rate": 1.379956968828956e-06, |
| "loss": 0.24107139110565184, |
| "memory(GiB)": 50.11, |
| "step": 560, |
| "token_acc": 0.898876404494382, |
| "train_speed(iter/s)": 0.078129 |
| }, |
| { |
| "epoch": 0.671519833605705, |
| "grad_norm": 3.871858784721118, |
| "learning_rate": 1.3361814578525922e-06, |
| "loss": 0.21992707252502441, |
| "memory(GiB)": 50.11, |
| "step": 565, |
| "token_acc": 0.8913043478260869, |
| "train_speed(iter/s)": 0.078168 |
| }, |
| { |
| "epoch": 0.6774624870004456, |
| "grad_norm": 3.7175877166428197, |
| "learning_rate": 1.2928568728175985e-06, |
| "loss": 0.22011113166809082, |
| "memory(GiB)": 50.11, |
| "step": 570, |
| "token_acc": 0.907103825136612, |
| "train_speed(iter/s)": 0.078212 |
| }, |
| { |
| "epoch": 0.6834051403951864, |
| "grad_norm": 2.6462218742223254, |
| "learning_rate": 1.2500000000000007e-06, |
| "loss": 0.26825509071350095, |
| "memory(GiB)": 50.11, |
| "step": 575, |
| "token_acc": 0.8522727272727273, |
| "train_speed(iter/s)": 0.078245 |
| }, |
| { |
| "epoch": 0.6893477937899272, |
| "grad_norm": 5.726663283210285, |
| "learning_rate": 1.2076274444589361e-06, |
| "loss": 0.2344191551208496, |
| "memory(GiB)": 50.11, |
| "step": 580, |
| "token_acc": 0.8913043478260869, |
| "train_speed(iter/s)": 0.078285 |
| }, |
| { |
| "epoch": 0.6952904471846679, |
| "grad_norm": 3.3648860171072914, |
| "learning_rate": 1.1657556236029665e-06, |
| "loss": 0.22296814918518065, |
| "memory(GiB)": 50.11, |
| "step": 585, |
| "token_acc": 0.9162011173184358, |
| "train_speed(iter/s)": 0.078321 |
| }, |
| { |
| "epoch": 0.7012331005794087, |
| "grad_norm": 3.3716796433764746, |
| "learning_rate": 1.1244007608290835e-06, |
| "loss": 0.2307145118713379, |
| "memory(GiB)": 50.11, |
| "step": 590, |
| "token_acc": 0.8944444444444445, |
| "train_speed(iter/s)": 0.078355 |
| }, |
| { |
| "epoch": 0.7071757539741494, |
| "grad_norm": 5.163940745117661, |
| "learning_rate": 1.083578879236895e-06, |
| "loss": 0.18101364374160767, |
| "memory(GiB)": 50.11, |
| "step": 595, |
| "token_acc": 0.907608695652174, |
| "train_speed(iter/s)": 0.078393 |
| }, |
| { |
| "epoch": 0.7131184073688902, |
| "grad_norm": 5.0920246897520975, |
| "learning_rate": 1.043305795420413e-06, |
| "loss": 0.2275557041168213, |
| "memory(GiB)": 50.11, |
| "step": 600, |
| "token_acc": 0.907608695652174, |
| "train_speed(iter/s)": 0.078425 |
| }, |
| { |
| "epoch": 0.719061060763631, |
| "grad_norm": 5.49044908007764, |
| "learning_rate": 1.003597113339855e-06, |
| "loss": 0.19089471101760863, |
| "memory(GiB)": 50.11, |
| "step": 605, |
| "token_acc": 0.9273743016759777, |
| "train_speed(iter/s)": 0.077897 |
| }, |
| { |
| "epoch": 0.7250037141583717, |
| "grad_norm": 4.608743000415324, |
| "learning_rate": 9.644682182758305e-07, |
| "loss": 0.2407398223876953, |
| "memory(GiB)": 50.11, |
| "step": 610, |
| "token_acc": 0.8837209302325582, |
| "train_speed(iter/s)": 0.077939 |
| }, |
| { |
| "epoch": 0.7309463675531125, |
| "grad_norm": 5.486991473096443, |
| "learning_rate": 9.259342708682515e-07, |
| "loss": 0.21678531169891357, |
| "memory(GiB)": 50.11, |
| "step": 615, |
| "token_acc": 0.9444444444444444, |
| "train_speed(iter/s)": 0.077974 |
| }, |
| { |
| "epoch": 0.7368890209478532, |
| "grad_norm": 6.981253763857014, |
| "learning_rate": 8.880102012422873e-07, |
| "loss": 0.2674489736557007, |
| "memory(GiB)": 50.11, |
| "step": 620, |
| "token_acc": 0.9186046511627907, |
| "train_speed(iter/s)": 0.078013 |
| }, |
| { |
| "epoch": 0.742831674342594, |
| "grad_norm": 4.912562813187418, |
| "learning_rate": 8.507107032236323e-07, |
| "loss": 0.21210527420043945, |
| "memory(GiB)": 50.11, |
| "step": 625, |
| "token_acc": 0.9230769230769231, |
| "train_speed(iter/s)": 0.078051 |
| }, |
| { |
| "epoch": 0.7487743277373348, |
| "grad_norm": 2.4710747552066668, |
| "learning_rate": 8.140502286453231e-07, |
| "loss": 0.24000158309936523, |
| "memory(GiB)": 50.11, |
| "step": 630, |
| "token_acc": 0.9226519337016574, |
| "train_speed(iter/s)": 0.078084 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 5.740745699538898, |
| "learning_rate": 7.780429817483229e-07, |
| "loss": 0.23018264770507812, |
| "memory(GiB)": 50.11, |
| "step": 635, |
| "token_acc": 0.9152542372881356, |
| "train_speed(iter/s)": 0.078117 |
| }, |
| { |
| "epoch": 0.7606596345268162, |
| "grad_norm": 3.1530564071349634, |
| "learning_rate": 7.427029136780333e-07, |
| "loss": 0.19342280626296998, |
| "memory(GiB)": 50.11, |
| "step": 640, |
| "token_acc": 0.9308510638297872, |
| "train_speed(iter/s)": 0.07815 |
| }, |
| { |
| "epoch": 0.7666022879215569, |
| "grad_norm": 5.113637773400957, |
| "learning_rate": 7.080437170788723e-07, |
| "loss": 0.22134556770324706, |
| "memory(GiB)": 50.11, |
| "step": 645, |
| "token_acc": 0.925531914893617, |
| "train_speed(iter/s)": 0.078183 |
| }, |
| { |
| "epoch": 0.7725449413162977, |
| "grad_norm": 6.481778521945802, |
| "learning_rate": 6.740788207890017e-07, |
| "loss": 0.21804354190826417, |
| "memory(GiB)": 50.11, |
| "step": 650, |
| "token_acc": 0.9050279329608939, |
| "train_speed(iter/s)": 0.078218 |
| }, |
| { |
| "epoch": 0.7784875947110385, |
| "grad_norm": 8.619444299529343, |
| "learning_rate": 6.40821384637276e-07, |
| "loss": 0.24983735084533693, |
| "memory(GiB)": 50.11, |
| "step": 655, |
| "token_acc": 0.8736842105263158, |
| "train_speed(iter/s)": 0.078254 |
| }, |
| { |
| "epoch": 0.7844302481057792, |
| "grad_norm": 5.377134356716376, |
| "learning_rate": 6.082842943444173e-07, |
| "loss": 0.21551246643066407, |
| "memory(GiB)": 50.11, |
| "step": 660, |
| "token_acc": 0.9148936170212766, |
| "train_speed(iter/s)": 0.078283 |
| }, |
| { |
| "epoch": 0.79037290150052, |
| "grad_norm": 5.763600489660546, |
| "learning_rate": 5.764801565303918e-07, |
| "loss": 0.20307836532592774, |
| "memory(GiB)": 50.11, |
| "step": 665, |
| "token_acc": 0.8932584269662921, |
| "train_speed(iter/s)": 0.078309 |
| }, |
| { |
| "epoch": 0.7963155548952607, |
| "grad_norm": 4.388976996043623, |
| "learning_rate": 5.454212938299256e-07, |
| "loss": 0.23792681694030762, |
| "memory(GiB)": 50.11, |
| "step": 670, |
| "token_acc": 0.9044943820224719, |
| "train_speed(iter/s)": 0.07834 |
| }, |
| { |
| "epoch": 0.8022582082900015, |
| "grad_norm": 5.711136883849875, |
| "learning_rate": 5.151197401180552e-07, |
| "loss": 0.2023566722869873, |
| "memory(GiB)": 50.11, |
| "step": 675, |
| "token_acc": 0.8814432989690721, |
| "train_speed(iter/s)": 0.078375 |
| }, |
| { |
| "epoch": 0.8082008616847423, |
| "grad_norm": 5.454661743233463, |
| "learning_rate": 4.855872358475546e-07, |
| "loss": 0.2369149684906006, |
| "memory(GiB)": 50.11, |
| "step": 680, |
| "token_acc": 0.945054945054945, |
| "train_speed(iter/s)": 0.078406 |
| }, |
| { |
| "epoch": 0.814143515079483, |
| "grad_norm": 4.447210422496218, |
| "learning_rate": 4.5683522350005505e-07, |
| "loss": 0.21783523559570311, |
| "memory(GiB)": 50.11, |
| "step": 685, |
| "token_acc": 0.9197860962566845, |
| "train_speed(iter/s)": 0.078436 |
| }, |
| { |
| "epoch": 0.8200861684742238, |
| "grad_norm": 6.434342650205402, |
| "learning_rate": 4.288748431526082e-07, |
| "loss": 0.23307533264160157, |
| "memory(GiB)": 50.11, |
| "step": 690, |
| "token_acc": 0.8961748633879781, |
| "train_speed(iter/s)": 0.078471 |
| }, |
| { |
| "epoch": 0.8260288218689645, |
| "grad_norm": 4.153284895963464, |
| "learning_rate": 4.017169281614225e-07, |
| "loss": 0.22777295112609863, |
| "memory(GiB)": 50.11, |
| "step": 695, |
| "token_acc": 0.9244186046511628, |
| "train_speed(iter/s)": 0.078498 |
| }, |
| { |
| "epoch": 0.8319714752637053, |
| "grad_norm": 5.4901230399454555, |
| "learning_rate": 3.753720009644371e-07, |
| "loss": 0.21194028854370117, |
| "memory(GiB)": 50.11, |
| "step": 700, |
| "token_acc": 0.9285714285714286, |
| "train_speed(iter/s)": 0.078524 |
| }, |
| { |
| "epoch": 0.837914128658446, |
| "grad_norm": 5.520932610215849, |
| "learning_rate": 3.498502690043651e-07, |
| "loss": 0.19888880252838134, |
| "memory(GiB)": 50.11, |
| "step": 705, |
| "token_acc": 0.9222222222222223, |
| "train_speed(iter/s)": 0.078056 |
| }, |
| { |
| "epoch": 0.8438567820531867, |
| "grad_norm": 5.181898234052906, |
| "learning_rate": 3.2516162077377956e-07, |
| "loss": 0.20560116767883302, |
| "memory(GiB)": 50.11, |
| "step": 710, |
| "token_acc": 0.9310344827586207, |
| "train_speed(iter/s)": 0.078083 |
| }, |
| { |
| "epoch": 0.8497994354479275, |
| "grad_norm": 10.349451389076972, |
| "learning_rate": 3.0131562198377763e-07, |
| "loss": 0.23746159076690673, |
| "memory(GiB)": 50.11, |
| "step": 715, |
| "token_acc": 0.9162303664921466, |
| "train_speed(iter/s)": 0.078112 |
| }, |
| { |
| "epoch": 0.8557420888426682, |
| "grad_norm": 5.618481765930095, |
| "learning_rate": 2.7832151185771096e-07, |
| "loss": 0.19666438102722167, |
| "memory(GiB)": 50.11, |
| "step": 720, |
| "token_acc": 0.91005291005291, |
| "train_speed(iter/s)": 0.078144 |
| }, |
| { |
| "epoch": 0.861684742237409, |
| "grad_norm": 5.312926998681132, |
| "learning_rate": 2.5618819955141453e-07, |
| "loss": 0.23160531520843505, |
| "memory(GiB)": 50.11, |
| "step": 725, |
| "token_acc": 0.9388888888888889, |
| "train_speed(iter/s)": 0.078174 |
| }, |
| { |
| "epoch": 0.8676273956321497, |
| "grad_norm": 6.483313259346484, |
| "learning_rate": 2.3492426070131746e-07, |
| "loss": 0.22388756275177002, |
| "memory(GiB)": 50.11, |
| "step": 730, |
| "token_acc": 0.9441340782122905, |
| "train_speed(iter/s)": 0.078204 |
| }, |
| { |
| "epoch": 0.8735700490268905, |
| "grad_norm": 7.509137819610738, |
| "learning_rate": 2.1453793410178169e-07, |
| "loss": 0.2553365468978882, |
| "memory(GiB)": 50.11, |
| "step": 735, |
| "token_acc": 0.9347826086956522, |
| "train_speed(iter/s)": 0.078234 |
| }, |
| { |
| "epoch": 0.8795127024216313, |
| "grad_norm": 3.277852306532476, |
| "learning_rate": 1.950371185129485e-07, |
| "loss": 0.2170994758605957, |
| "memory(GiB)": 50.11, |
| "step": 740, |
| "token_acc": 0.9153439153439153, |
| "train_speed(iter/s)": 0.078263 |
| }, |
| { |
| "epoch": 0.885455355816372, |
| "grad_norm": 5.884609932336502, |
| "learning_rate": 1.764293696003358e-07, |
| "loss": 0.1915382981300354, |
| "memory(GiB)": 50.11, |
| "step": 745, |
| "token_acc": 0.9508196721311475, |
| "train_speed(iter/s)": 0.078289 |
| }, |
| { |
| "epoch": 0.8913980092111128, |
| "grad_norm": 4.022239658633687, |
| "learning_rate": 1.587218970073634e-07, |
| "loss": 0.22730159759521484, |
| "memory(GiB)": 50.11, |
| "step": 750, |
| "token_acc": 0.893048128342246, |
| "train_speed(iter/s)": 0.078317 |
| }, |
| { |
| "epoch": 0.8973406626058535, |
| "grad_norm": 5.092959105324688, |
| "learning_rate": 1.4192156156195153e-07, |
| "loss": 0.24943215847015382, |
| "memory(GiB)": 50.11, |
| "step": 755, |
| "token_acc": 0.90625, |
| "train_speed(iter/s)": 0.078342 |
| }, |
| { |
| "epoch": 0.9032833160005943, |
| "grad_norm": 5.108519787962356, |
| "learning_rate": 1.2603487261826726e-07, |
| "loss": 0.20730853080749512, |
| "memory(GiB)": 50.11, |
| "step": 760, |
| "token_acc": 0.9209039548022598, |
| "train_speed(iter/s)": 0.07837 |
| }, |
| { |
| "epoch": 0.9092259693953351, |
| "grad_norm": 6.6659075199664635, |
| "learning_rate": 1.1106798553464804e-07, |
| "loss": 0.2543288230895996, |
| "memory(GiB)": 50.11, |
| "step": 765, |
| "token_acc": 0.9047619047619048, |
| "train_speed(iter/s)": 0.078395 |
| }, |
| { |
| "epoch": 0.9151686227900757, |
| "grad_norm": 3.9596865112538637, |
| "learning_rate": 9.702669928868674e-08, |
| "loss": 0.20190849304199218, |
| "memory(GiB)": 50.11, |
| "step": 770, |
| "token_acc": 0.9020618556701031, |
| "train_speed(iter/s)": 0.07842 |
| }, |
| { |
| "epoch": 0.9211112761848165, |
| "grad_norm": 4.119428617367879, |
| "learning_rate": 8.391645423039357e-08, |
| "loss": 0.2413175582885742, |
| "memory(GiB)": 50.11, |
| "step": 775, |
| "token_acc": 0.9411764705882353, |
| "train_speed(iter/s)": 0.078442 |
| }, |
| { |
| "epoch": 0.9270539295795572, |
| "grad_norm": 3.657107164063937, |
| "learning_rate": 7.174232997431391e-08, |
| "loss": 0.2084414005279541, |
| "memory(GiB)": 50.11, |
| "step": 780, |
| "token_acc": 0.9438202247191011, |
| "train_speed(iter/s)": 0.078465 |
| }, |
| { |
| "epoch": 0.932996582974298, |
| "grad_norm": 4.011455719615575, |
| "learning_rate": 6.050904343141095e-08, |
| "loss": 0.20162324905395507, |
| "memory(GiB)": 50.11, |
| "step": 785, |
| "token_acc": 0.9293478260869565, |
| "train_speed(iter/s)": 0.078489 |
| }, |
| { |
| "epoch": 0.9389392363690388, |
| "grad_norm": 6.727744613539731, |
| "learning_rate": 5.022094698148072e-08, |
| "loss": 0.24507064819335939, |
| "memory(GiB)": 50.11, |
| "step": 790, |
| "token_acc": 0.9358288770053476, |
| "train_speed(iter/s)": 0.078512 |
| }, |
| { |
| "epoch": 0.9448818897637795, |
| "grad_norm": 3.2723118161208093, |
| "learning_rate": 4.088202678680597e-08, |
| "loss": 0.23910284042358398, |
| "memory(GiB)": 50.11, |
| "step": 795, |
| "token_acc": 0.861878453038674, |
| "train_speed(iter/s)": 0.078537 |
| }, |
| { |
| "epoch": 0.9508245431585203, |
| "grad_norm": 2.6190372074419326, |
| "learning_rate": 3.249590124770191e-08, |
| "loss": 0.19168223142623902, |
| "memory(GiB)": 50.11, |
| "step": 800, |
| "token_acc": 0.9213483146067416, |
| "train_speed(iter/s)": 0.078553 |
| }, |
| { |
| "epoch": 0.956767196553261, |
| "grad_norm": 4.68738255184625, |
| "learning_rate": 2.506581960055432e-08, |
| "loss": 0.21967520713806152, |
| "memory(GiB)": 50.11, |
| "step": 805, |
| "token_acc": 0.9047619047619048, |
| "train_speed(iter/s)": 0.078158 |
| }, |
| { |
| "epoch": 0.9627098499480018, |
| "grad_norm": 5.503922565128282, |
| "learning_rate": 1.8594660658889095e-08, |
| "loss": 0.22110648155212403, |
| "memory(GiB)": 50.11, |
| "step": 810, |
| "token_acc": 0.9021739130434783, |
| "train_speed(iter/s)": 0.078184 |
| }, |
| { |
| "epoch": 0.9686525033427426, |
| "grad_norm": 4.402575819342294, |
| "learning_rate": 1.3084931697966152e-08, |
| "loss": 0.21621761322021485, |
| "memory(GiB)": 50.11, |
| "step": 815, |
| "token_acc": 0.9090909090909091, |
| "train_speed(iter/s)": 0.07821 |
| }, |
| { |
| "epoch": 0.9745951567374833, |
| "grad_norm": 4.840434765416083, |
| "learning_rate": 8.538767483325384e-09, |
| "loss": 0.2232905387878418, |
| "memory(GiB)": 50.11, |
| "step": 820, |
| "token_acc": 0.8882978723404256, |
| "train_speed(iter/s)": 0.078233 |
| }, |
| { |
| "epoch": 0.9805378101322241, |
| "grad_norm": 4.635090700558647, |
| "learning_rate": 4.9579294436635784e-09, |
| "loss": 0.2567557096481323, |
| "memory(GiB)": 50.11, |
| "step": 825, |
| "token_acc": 0.8911917098445595, |
| "train_speed(iter/s)": 0.078256 |
| }, |
| { |
| "epoch": 0.9864804635269648, |
| "grad_norm": 5.44883977227373, |
| "learning_rate": 2.3438049883625635e-09, |
| "loss": 0.1815792441368103, |
| "memory(GiB)": 50.11, |
| "step": 830, |
| "token_acc": 0.93048128342246, |
| "train_speed(iter/s)": 0.07828 |
| }, |
| { |
| "epoch": 0.9924231169217056, |
| "grad_norm": 6.50498346714347, |
| "learning_rate": 6.974069699314246e-10, |
| "loss": 0.20119824409484863, |
| "memory(GiB)": 50.11, |
| "step": 835, |
| "token_acc": 0.9209039548022598, |
| "train_speed(iter/s)": 0.078303 |
| }, |
| { |
| "epoch": 0.9983657703164462, |
| "grad_norm": 3.2168471908087506, |
| "learning_rate": 1.9373291574031893e-11, |
| "loss": 0.1927746295928955, |
| "memory(GiB)": 50.11, |
| "step": 840, |
| "token_acc": 0.8863636363636364, |
| "train_speed(iter/s)": 0.078328 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 841, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 101605807005696.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|