| { |
| "best_global_step": 1237, |
| "best_metric": 0.36159474, |
| "best_model_checkpoint": "/workspace/checkpoint/gui_exp/sft_amex/v0-20260413_084132/checkpoint-1237", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1237, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008088978766430738, |
| "grad_norm": 64.78370666503906, |
| "learning_rate": 1.6129032258064518e-07, |
| "loss": 1.7486257553100586, |
| "memory(GiB)": 60.95, |
| "step": 1, |
| "token_acc": 0.6546184738955824, |
| "train_speed(iter/s)": 0.017239 |
| }, |
| { |
| "epoch": 0.0016177957532861476, |
| "grad_norm": 68.59134674072266, |
| "learning_rate": 3.2258064516129035e-07, |
| "loss": 1.777339220046997, |
| "memory(GiB)": 71.88, |
| "step": 2, |
| "token_acc": 0.6341463414634146, |
| "train_speed(iter/s)": 0.019622 |
| }, |
| { |
| "epoch": 0.0024266936299292214, |
| "grad_norm": 64.73936462402344, |
| "learning_rate": 4.838709677419355e-07, |
| "loss": 1.8061851263046265, |
| "memory(GiB)": 71.9, |
| "step": 3, |
| "token_acc": 0.6195652173913043, |
| "train_speed(iter/s)": 0.020613 |
| }, |
| { |
| "epoch": 0.0032355915065722953, |
| "grad_norm": 65.572998046875, |
| "learning_rate": 6.451612903225807e-07, |
| "loss": 1.807295560836792, |
| "memory(GiB)": 72.45, |
| "step": 4, |
| "token_acc": 0.5654205607476636, |
| "train_speed(iter/s)": 0.021122 |
| }, |
| { |
| "epoch": 0.004044489383215369, |
| "grad_norm": 65.35359191894531, |
| "learning_rate": 8.064516129032258e-07, |
| "loss": 1.5166772603988647, |
| "memory(GiB)": 72.45, |
| "step": 5, |
| "token_acc": 0.6327433628318584, |
| "train_speed(iter/s)": 0.021426 |
| }, |
| { |
| "epoch": 0.004853387259858443, |
| "grad_norm": 57.624046325683594, |
| "learning_rate": 9.67741935483871e-07, |
| "loss": 1.5725659132003784, |
| "memory(GiB)": 72.45, |
| "step": 6, |
| "token_acc": 0.6591928251121076, |
| "train_speed(iter/s)": 0.021622 |
| }, |
| { |
| "epoch": 0.005662285136501517, |
| "grad_norm": 41.641319274902344, |
| "learning_rate": 1.1290322580645162e-06, |
| "loss": 1.6281558275222778, |
| "memory(GiB)": 72.45, |
| "step": 7, |
| "token_acc": 0.6067415730337079, |
| "train_speed(iter/s)": 0.02179 |
| }, |
| { |
| "epoch": 0.006471183013144591, |
| "grad_norm": 40.06605529785156, |
| "learning_rate": 1.2903225806451614e-06, |
| "loss": 1.6883149147033691, |
| "memory(GiB)": 72.45, |
| "step": 8, |
| "token_acc": 0.6423841059602649, |
| "train_speed(iter/s)": 0.021906 |
| }, |
| { |
| "epoch": 0.007280080889787664, |
| "grad_norm": 23.832304000854492, |
| "learning_rate": 1.4516129032258066e-06, |
| "loss": 1.4600856304168701, |
| "memory(GiB)": 73.8, |
| "step": 9, |
| "token_acc": 0.6683417085427136, |
| "train_speed(iter/s)": 0.022001 |
| }, |
| { |
| "epoch": 0.008088978766430738, |
| "grad_norm": 19.52027702331543, |
| "learning_rate": 1.6129032258064516e-06, |
| "loss": 1.178048014640808, |
| "memory(GiB)": 73.8, |
| "step": 10, |
| "token_acc": 0.6995515695067265, |
| "train_speed(iter/s)": 0.022088 |
| }, |
| { |
| "epoch": 0.008897876643073812, |
| "grad_norm": 22.565189361572266, |
| "learning_rate": 1.774193548387097e-06, |
| "loss": 1.225492000579834, |
| "memory(GiB)": 73.8, |
| "step": 11, |
| "token_acc": 0.680327868852459, |
| "train_speed(iter/s)": 0.022154 |
| }, |
| { |
| "epoch": 0.009706774519716885, |
| "grad_norm": 15.462038040161133, |
| "learning_rate": 1.935483870967742e-06, |
| "loss": 1.0573687553405762, |
| "memory(GiB)": 73.8, |
| "step": 12, |
| "token_acc": 0.7576923076923077, |
| "train_speed(iter/s)": 0.02221 |
| }, |
| { |
| "epoch": 0.010515672396359959, |
| "grad_norm": 14.245152473449707, |
| "learning_rate": 2.096774193548387e-06, |
| "loss": 1.0721827745437622, |
| "memory(GiB)": 73.8, |
| "step": 13, |
| "token_acc": 0.7405857740585774, |
| "train_speed(iter/s)": 0.022257 |
| }, |
| { |
| "epoch": 0.011324570273003034, |
| "grad_norm": 8.204596519470215, |
| "learning_rate": 2.2580645161290324e-06, |
| "loss": 0.8753397464752197, |
| "memory(GiB)": 73.8, |
| "step": 14, |
| "token_acc": 0.7516778523489933, |
| "train_speed(iter/s)": 0.022297 |
| }, |
| { |
| "epoch": 0.012133468149646108, |
| "grad_norm": 11.066507339477539, |
| "learning_rate": 2.4193548387096776e-06, |
| "loss": 0.9424616098403931, |
| "memory(GiB)": 73.8, |
| "step": 15, |
| "token_acc": 0.7411003236245954, |
| "train_speed(iter/s)": 0.022325 |
| }, |
| { |
| "epoch": 0.012942366026289181, |
| "grad_norm": 8.134406089782715, |
| "learning_rate": 2.580645161290323e-06, |
| "loss": 0.9165105819702148, |
| "memory(GiB)": 73.8, |
| "step": 16, |
| "token_acc": 0.7902439024390244, |
| "train_speed(iter/s)": 0.022352 |
| }, |
| { |
| "epoch": 0.013751263902932255, |
| "grad_norm": 14.990755081176758, |
| "learning_rate": 2.7419354838709676e-06, |
| "loss": 0.8677236437797546, |
| "memory(GiB)": 73.8, |
| "step": 17, |
| "token_acc": 0.7635658914728682, |
| "train_speed(iter/s)": 0.02238 |
| }, |
| { |
| "epoch": 0.014560161779575328, |
| "grad_norm": 5.65842342376709, |
| "learning_rate": 2.903225806451613e-06, |
| "loss": 0.7795729637145996, |
| "memory(GiB)": 73.8, |
| "step": 18, |
| "token_acc": 0.7739938080495357, |
| "train_speed(iter/s)": 0.022403 |
| }, |
| { |
| "epoch": 0.015369059656218402, |
| "grad_norm": 5.559131145477295, |
| "learning_rate": 3.0645161290322584e-06, |
| "loss": 0.8590961694717407, |
| "memory(GiB)": 73.8, |
| "step": 19, |
| "token_acc": 0.75, |
| "train_speed(iter/s)": 0.022423 |
| }, |
| { |
| "epoch": 0.016177957532861477, |
| "grad_norm": 4.871716499328613, |
| "learning_rate": 3.225806451612903e-06, |
| "loss": 0.7650733590126038, |
| "memory(GiB)": 73.8, |
| "step": 20, |
| "token_acc": 0.7865612648221344, |
| "train_speed(iter/s)": 0.022441 |
| }, |
| { |
| "epoch": 0.01698685540950455, |
| "grad_norm": 5.387275218963623, |
| "learning_rate": 3.3870967741935484e-06, |
| "loss": 0.7404652833938599, |
| "memory(GiB)": 73.8, |
| "step": 21, |
| "token_acc": 0.7907801418439716, |
| "train_speed(iter/s)": 0.022456 |
| }, |
| { |
| "epoch": 0.017795753286147624, |
| "grad_norm": 6.131480693817139, |
| "learning_rate": 3.548387096774194e-06, |
| "loss": 0.8067750334739685, |
| "memory(GiB)": 73.8, |
| "step": 22, |
| "token_acc": 0.7986111111111112, |
| "train_speed(iter/s)": 0.022476 |
| }, |
| { |
| "epoch": 0.018604651162790697, |
| "grad_norm": 5.183681488037109, |
| "learning_rate": 3.7096774193548392e-06, |
| "loss": 0.8132314682006836, |
| "memory(GiB)": 73.8, |
| "step": 23, |
| "token_acc": 0.7714285714285715, |
| "train_speed(iter/s)": 0.022492 |
| }, |
| { |
| "epoch": 0.01941354903943377, |
| "grad_norm": 5.063383102416992, |
| "learning_rate": 3.870967741935484e-06, |
| "loss": 0.7204439640045166, |
| "memory(GiB)": 73.8, |
| "step": 24, |
| "token_acc": 0.7905982905982906, |
| "train_speed(iter/s)": 0.022506 |
| }, |
| { |
| "epoch": 0.020222446916076844, |
| "grad_norm": 4.753130912780762, |
| "learning_rate": 4.032258064516129e-06, |
| "loss": 0.7673914432525635, |
| "memory(GiB)": 73.8, |
| "step": 25, |
| "token_acc": 0.7453416149068323, |
| "train_speed(iter/s)": 0.022518 |
| }, |
| { |
| "epoch": 0.021031344792719918, |
| "grad_norm": 4.112824440002441, |
| "learning_rate": 4.193548387096774e-06, |
| "loss": 0.6755634546279907, |
| "memory(GiB)": 73.8, |
| "step": 26, |
| "token_acc": 0.7714285714285715, |
| "train_speed(iter/s)": 0.02253 |
| }, |
| { |
| "epoch": 0.02184024266936299, |
| "grad_norm": 3.704129695892334, |
| "learning_rate": 4.35483870967742e-06, |
| "loss": 0.685713529586792, |
| "memory(GiB)": 73.8, |
| "step": 27, |
| "token_acc": 0.8447488584474886, |
| "train_speed(iter/s)": 0.022541 |
| }, |
| { |
| "epoch": 0.02264914054600607, |
| "grad_norm": 4.385001182556152, |
| "learning_rate": 4.516129032258065e-06, |
| "loss": 0.7436140179634094, |
| "memory(GiB)": 73.8, |
| "step": 28, |
| "token_acc": 0.749003984063745, |
| "train_speed(iter/s)": 0.022549 |
| }, |
| { |
| "epoch": 0.023458038422649142, |
| "grad_norm": 5.385667324066162, |
| "learning_rate": 4.67741935483871e-06, |
| "loss": 0.7293410301208496, |
| "memory(GiB)": 73.8, |
| "step": 29, |
| "token_acc": 0.8248175182481752, |
| "train_speed(iter/s)": 0.022558 |
| }, |
| { |
| "epoch": 0.024266936299292215, |
| "grad_norm": 5.816902160644531, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 0.6676285266876221, |
| "memory(GiB)": 73.8, |
| "step": 30, |
| "token_acc": 0.7844827586206896, |
| "train_speed(iter/s)": 0.022568 |
| }, |
| { |
| "epoch": 0.02507583417593529, |
| "grad_norm": 3.9358129501342773, |
| "learning_rate": 5e-06, |
| "loss": 0.6832848787307739, |
| "memory(GiB)": 73.8, |
| "step": 31, |
| "token_acc": 0.8340807174887892, |
| "train_speed(iter/s)": 0.022574 |
| }, |
| { |
| "epoch": 0.025884732052578362, |
| "grad_norm": 3.9400582313537598, |
| "learning_rate": 5.161290322580646e-06, |
| "loss": 0.6794041395187378, |
| "memory(GiB)": 73.8, |
| "step": 32, |
| "token_acc": 0.7857142857142857, |
| "train_speed(iter/s)": 0.022581 |
| }, |
| { |
| "epoch": 0.026693629929221436, |
| "grad_norm": 6.0499725341796875, |
| "learning_rate": 5.322580645161291e-06, |
| "loss": 0.6280096769332886, |
| "memory(GiB)": 73.8, |
| "step": 33, |
| "token_acc": 0.8277511961722488, |
| "train_speed(iter/s)": 0.022589 |
| }, |
| { |
| "epoch": 0.02750252780586451, |
| "grad_norm": 4.963372230529785, |
| "learning_rate": 5.483870967741935e-06, |
| "loss": 0.7461614012718201, |
| "memory(GiB)": 73.8, |
| "step": 34, |
| "token_acc": 0.7442922374429224, |
| "train_speed(iter/s)": 0.022594 |
| }, |
| { |
| "epoch": 0.028311425682507583, |
| "grad_norm": 4.874055862426758, |
| "learning_rate": 5.645161290322582e-06, |
| "loss": 0.6325216889381409, |
| "memory(GiB)": 73.8, |
| "step": 35, |
| "token_acc": 0.8239700374531835, |
| "train_speed(iter/s)": 0.022602 |
| }, |
| { |
| "epoch": 0.029120323559150656, |
| "grad_norm": 4.295459747314453, |
| "learning_rate": 5.806451612903226e-06, |
| "loss": 0.6098757982254028, |
| "memory(GiB)": 73.8, |
| "step": 36, |
| "token_acc": 0.85, |
| "train_speed(iter/s)": 0.022606 |
| }, |
| { |
| "epoch": 0.02992922143579373, |
| "grad_norm": 4.486640453338623, |
| "learning_rate": 5.967741935483872e-06, |
| "loss": 0.6720225811004639, |
| "memory(GiB)": 73.8, |
| "step": 37, |
| "token_acc": 0.7675276752767528, |
| "train_speed(iter/s)": 0.022613 |
| }, |
| { |
| "epoch": 0.030738119312436803, |
| "grad_norm": 3.9755430221557617, |
| "learning_rate": 6.129032258064517e-06, |
| "loss": 0.7007983326911926, |
| "memory(GiB)": 73.8, |
| "step": 38, |
| "token_acc": 0.7446808510638298, |
| "train_speed(iter/s)": 0.022618 |
| }, |
| { |
| "epoch": 0.03154701718907988, |
| "grad_norm": 3.85732102394104, |
| "learning_rate": 6.290322580645162e-06, |
| "loss": 0.6228176355361938, |
| "memory(GiB)": 73.8, |
| "step": 39, |
| "token_acc": 0.8116591928251121, |
| "train_speed(iter/s)": 0.022625 |
| }, |
| { |
| "epoch": 0.032355915065722954, |
| "grad_norm": 3.556612491607666, |
| "learning_rate": 6.451612903225806e-06, |
| "loss": 0.6283481121063232, |
| "memory(GiB)": 73.8, |
| "step": 40, |
| "token_acc": 0.8035087719298246, |
| "train_speed(iter/s)": 0.02263 |
| }, |
| { |
| "epoch": 0.03316481294236603, |
| "grad_norm": 5.600265979766846, |
| "learning_rate": 6.612903225806452e-06, |
| "loss": 0.6793509721755981, |
| "memory(GiB)": 73.8, |
| "step": 41, |
| "token_acc": 0.8174904942965779, |
| "train_speed(iter/s)": 0.022635 |
| }, |
| { |
| "epoch": 0.0339737108190091, |
| "grad_norm": 3.7283554077148438, |
| "learning_rate": 6.774193548387097e-06, |
| "loss": 0.6385987997055054, |
| "memory(GiB)": 73.8, |
| "step": 42, |
| "token_acc": 0.8125, |
| "train_speed(iter/s)": 0.022639 |
| }, |
| { |
| "epoch": 0.034782608695652174, |
| "grad_norm": 3.8624303340911865, |
| "learning_rate": 6.935483870967743e-06, |
| "loss": 0.6532889604568481, |
| "memory(GiB)": 73.8, |
| "step": 43, |
| "token_acc": 0.8297872340425532, |
| "train_speed(iter/s)": 0.022644 |
| }, |
| { |
| "epoch": 0.03559150657229525, |
| "grad_norm": 3.6706488132476807, |
| "learning_rate": 7.096774193548388e-06, |
| "loss": 0.579014241695404, |
| "memory(GiB)": 73.8, |
| "step": 44, |
| "token_acc": 0.8345070422535211, |
| "train_speed(iter/s)": 0.022648 |
| }, |
| { |
| "epoch": 0.03640040444893832, |
| "grad_norm": 3.9184775352478027, |
| "learning_rate": 7.258064516129033e-06, |
| "loss": 0.5859895348548889, |
| "memory(GiB)": 73.8, |
| "step": 45, |
| "token_acc": 0.8291925465838509, |
| "train_speed(iter/s)": 0.022651 |
| }, |
| { |
| "epoch": 0.037209302325581395, |
| "grad_norm": 3.94393253326416, |
| "learning_rate": 7.4193548387096784e-06, |
| "loss": 0.5704982280731201, |
| "memory(GiB)": 73.8, |
| "step": 46, |
| "token_acc": 0.8542713567839196, |
| "train_speed(iter/s)": 0.022655 |
| }, |
| { |
| "epoch": 0.03801820020222447, |
| "grad_norm": 4.142230987548828, |
| "learning_rate": 7.580645161290323e-06, |
| "loss": 0.623918354511261, |
| "memory(GiB)": 73.8, |
| "step": 47, |
| "token_acc": 0.7984790874524715, |
| "train_speed(iter/s)": 0.022657 |
| }, |
| { |
| "epoch": 0.03882709807886754, |
| "grad_norm": 4.207951545715332, |
| "learning_rate": 7.741935483870968e-06, |
| "loss": 0.5815058946609497, |
| "memory(GiB)": 73.8, |
| "step": 48, |
| "token_acc": 0.8186528497409327, |
| "train_speed(iter/s)": 0.022662 |
| }, |
| { |
| "epoch": 0.039635995955510615, |
| "grad_norm": 4.375429153442383, |
| "learning_rate": 7.903225806451613e-06, |
| "loss": 0.6511105895042419, |
| "memory(GiB)": 73.8, |
| "step": 49, |
| "token_acc": 0.809375, |
| "train_speed(iter/s)": 0.022666 |
| }, |
| { |
| "epoch": 0.04044489383215369, |
| "grad_norm": 4.1379499435424805, |
| "learning_rate": 8.064516129032258e-06, |
| "loss": 0.6755905747413635, |
| "memory(GiB)": 73.8, |
| "step": 50, |
| "token_acc": 0.8034934497816594, |
| "train_speed(iter/s)": 0.02267 |
| }, |
| { |
| "epoch": 0.04125379170879676, |
| "grad_norm": 4.107391357421875, |
| "learning_rate": 8.225806451612904e-06, |
| "loss": 0.558114230632782, |
| "memory(GiB)": 73.8, |
| "step": 51, |
| "token_acc": 0.8186528497409327, |
| "train_speed(iter/s)": 0.022672 |
| }, |
| { |
| "epoch": 0.042062689585439836, |
| "grad_norm": 3.2282044887542725, |
| "learning_rate": 8.387096774193549e-06, |
| "loss": 0.5646804571151733, |
| "memory(GiB)": 73.8, |
| "step": 52, |
| "token_acc": 0.7943262411347518, |
| "train_speed(iter/s)": 0.022674 |
| }, |
| { |
| "epoch": 0.04287158746208291, |
| "grad_norm": 3.679171085357666, |
| "learning_rate": 8.548387096774194e-06, |
| "loss": 0.5988277196884155, |
| "memory(GiB)": 73.8, |
| "step": 53, |
| "token_acc": 0.8022922636103151, |
| "train_speed(iter/s)": 0.022675 |
| }, |
| { |
| "epoch": 0.04368048533872598, |
| "grad_norm": 4.386334419250488, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 0.6635404825210571, |
| "memory(GiB)": 73.8, |
| "step": 54, |
| "token_acc": 0.7681159420289855, |
| "train_speed(iter/s)": 0.022674 |
| }, |
| { |
| "epoch": 0.044489383215369056, |
| "grad_norm": 5.1664557456970215, |
| "learning_rate": 8.870967741935484e-06, |
| "loss": 0.5942538976669312, |
| "memory(GiB)": 73.8, |
| "step": 55, |
| "token_acc": 0.86328125, |
| "train_speed(iter/s)": 0.022677 |
| }, |
| { |
| "epoch": 0.04529828109201214, |
| "grad_norm": 5.156553268432617, |
| "learning_rate": 9.03225806451613e-06, |
| "loss": 0.5873563885688782, |
| "memory(GiB)": 74.11, |
| "step": 56, |
| "token_acc": 0.7923875432525952, |
| "train_speed(iter/s)": 0.022677 |
| }, |
| { |
| "epoch": 0.04610717896865521, |
| "grad_norm": 3.327913999557495, |
| "learning_rate": 9.193548387096775e-06, |
| "loss": 0.5179651975631714, |
| "memory(GiB)": 74.11, |
| "step": 57, |
| "token_acc": 0.8286713286713286, |
| "train_speed(iter/s)": 0.022681 |
| }, |
| { |
| "epoch": 0.046916076845298284, |
| "grad_norm": 3.147554397583008, |
| "learning_rate": 9.35483870967742e-06, |
| "loss": 0.6654713153839111, |
| "memory(GiB)": 74.11, |
| "step": 58, |
| "token_acc": 0.8122866894197952, |
| "train_speed(iter/s)": 0.022683 |
| }, |
| { |
| "epoch": 0.04772497472194136, |
| "grad_norm": 3.951767921447754, |
| "learning_rate": 9.516129032258065e-06, |
| "loss": 0.5465582013130188, |
| "memory(GiB)": 74.11, |
| "step": 59, |
| "token_acc": 0.828125, |
| "train_speed(iter/s)": 0.022686 |
| }, |
| { |
| "epoch": 0.04853387259858443, |
| "grad_norm": 3.6060750484466553, |
| "learning_rate": 9.67741935483871e-06, |
| "loss": 0.6206121444702148, |
| "memory(GiB)": 74.11, |
| "step": 60, |
| "token_acc": 0.8258928571428571, |
| "train_speed(iter/s)": 0.022687 |
| }, |
| { |
| "epoch": 0.049342770475227504, |
| "grad_norm": 4.130661487579346, |
| "learning_rate": 9.838709677419356e-06, |
| "loss": 0.6245087385177612, |
| "memory(GiB)": 74.11, |
| "step": 61, |
| "token_acc": 0.8050847457627118, |
| "train_speed(iter/s)": 0.02269 |
| }, |
| { |
| "epoch": 0.05015166835187058, |
| "grad_norm": 4.408290386199951, |
| "learning_rate": 1e-05, |
| "loss": 0.6183744668960571, |
| "memory(GiB)": 74.11, |
| "step": 62, |
| "token_acc": 0.8229665071770335, |
| "train_speed(iter/s)": 0.022694 |
| }, |
| { |
| "epoch": 0.05096056622851365, |
| "grad_norm": 3.7502522468566895, |
| "learning_rate": 9.999982128386562e-06, |
| "loss": 0.5600206851959229, |
| "memory(GiB)": 74.11, |
| "step": 63, |
| "token_acc": 0.8364312267657993, |
| "train_speed(iter/s)": 0.022695 |
| }, |
| { |
| "epoch": 0.051769464105156725, |
| "grad_norm": 4.595156669616699, |
| "learning_rate": 9.999928513674004e-06, |
| "loss": 0.5526872873306274, |
| "memory(GiB)": 74.11, |
| "step": 64, |
| "token_acc": 0.8165467625899281, |
| "train_speed(iter/s)": 0.022697 |
| }, |
| { |
| "epoch": 0.0525783619817998, |
| "grad_norm": 4.10991907119751, |
| "learning_rate": 9.999839156245597e-06, |
| "loss": 0.4983682632446289, |
| "memory(GiB)": 74.11, |
| "step": 65, |
| "token_acc": 0.8742857142857143, |
| "train_speed(iter/s)": 0.022698 |
| }, |
| { |
| "epoch": 0.05338725985844287, |
| "grad_norm": 4.291178226470947, |
| "learning_rate": 9.99971405674013e-06, |
| "loss": 0.6258913278579712, |
| "memory(GiB)": 74.11, |
| "step": 66, |
| "token_acc": 0.8235294117647058, |
| "train_speed(iter/s)": 0.0227 |
| }, |
| { |
| "epoch": 0.054196157735085945, |
| "grad_norm": 4.950540065765381, |
| "learning_rate": 9.999553216051892e-06, |
| "loss": 0.6055471897125244, |
| "memory(GiB)": 74.11, |
| "step": 67, |
| "token_acc": 0.75, |
| "train_speed(iter/s)": 0.0227 |
| }, |
| { |
| "epoch": 0.05500505561172902, |
| "grad_norm": 4.7848076820373535, |
| "learning_rate": 9.999356635330675e-06, |
| "loss": 0.5771285891532898, |
| "memory(GiB)": 74.11, |
| "step": 68, |
| "token_acc": 0.8007518796992481, |
| "train_speed(iter/s)": 0.022702 |
| }, |
| { |
| "epoch": 0.05581395348837209, |
| "grad_norm": 4.7233567237854, |
| "learning_rate": 9.999124315981766e-06, |
| "loss": 0.5602097511291504, |
| "memory(GiB)": 74.11, |
| "step": 69, |
| "token_acc": 0.85, |
| "train_speed(iter/s)": 0.022704 |
| }, |
| { |
| "epoch": 0.056622851365015166, |
| "grad_norm": 3.280118227005005, |
| "learning_rate": 9.998856259665936e-06, |
| "loss": 0.5948894023895264, |
| "memory(GiB)": 74.11, |
| "step": 70, |
| "token_acc": 0.8597285067873304, |
| "train_speed(iter/s)": 0.022706 |
| }, |
| { |
| "epoch": 0.05743174924165824, |
| "grad_norm": 3.6923129558563232, |
| "learning_rate": 9.99855246829942e-06, |
| "loss": 0.615454912185669, |
| "memory(GiB)": 74.11, |
| "step": 71, |
| "token_acc": 0.7639484978540773, |
| "train_speed(iter/s)": 0.022708 |
| }, |
| { |
| "epoch": 0.05824064711830131, |
| "grad_norm": 3.9682765007019043, |
| "learning_rate": 9.99821294405392e-06, |
| "loss": 0.6003280878067017, |
| "memory(GiB)": 74.11, |
| "step": 72, |
| "token_acc": 0.8415492957746479, |
| "train_speed(iter/s)": 0.02271 |
| }, |
| { |
| "epoch": 0.059049544994944386, |
| "grad_norm": 3.5200328826904297, |
| "learning_rate": 9.99783768935657e-06, |
| "loss": 0.5450583100318909, |
| "memory(GiB)": 74.11, |
| "step": 73, |
| "token_acc": 0.8100358422939068, |
| "train_speed(iter/s)": 0.022712 |
| }, |
| { |
| "epoch": 0.05985844287158746, |
| "grad_norm": 4.187544345855713, |
| "learning_rate": 9.997426706889935e-06, |
| "loss": 0.5230978727340698, |
| "memory(GiB)": 74.11, |
| "step": 74, |
| "token_acc": 0.8472222222222222, |
| "train_speed(iter/s)": 0.022714 |
| }, |
| { |
| "epoch": 0.06066734074823053, |
| "grad_norm": 3.5596694946289062, |
| "learning_rate": 9.996979999591982e-06, |
| "loss": 0.5269993543624878, |
| "memory(GiB)": 74.11, |
| "step": 75, |
| "token_acc": 0.8168316831683168, |
| "train_speed(iter/s)": 0.022715 |
| }, |
| { |
| "epoch": 0.06147623862487361, |
| "grad_norm": 3.213773012161255, |
| "learning_rate": 9.996497570656063e-06, |
| "loss": 0.5459144711494446, |
| "memory(GiB)": 74.11, |
| "step": 76, |
| "token_acc": 0.7665198237885462, |
| "train_speed(iter/s)": 0.022716 |
| }, |
| { |
| "epoch": 0.06228513650151668, |
| "grad_norm": 3.1109633445739746, |
| "learning_rate": 9.995979423530893e-06, |
| "loss": 0.5678860545158386, |
| "memory(GiB)": 74.11, |
| "step": 77, |
| "token_acc": 0.8123076923076923, |
| "train_speed(iter/s)": 0.022717 |
| }, |
| { |
| "epoch": 0.06309403437815976, |
| "grad_norm": 3.668972969055176, |
| "learning_rate": 9.99542556192052e-06, |
| "loss": 0.5075556039810181, |
| "memory(GiB)": 74.11, |
| "step": 78, |
| "token_acc": 0.84, |
| "train_speed(iter/s)": 0.022718 |
| }, |
| { |
| "epoch": 0.06390293225480283, |
| "grad_norm": 4.338983535766602, |
| "learning_rate": 9.994835989784305e-06, |
| "loss": 0.5242471098899841, |
| "memory(GiB)": 74.11, |
| "step": 79, |
| "token_acc": 0.865, |
| "train_speed(iter/s)": 0.022721 |
| }, |
| { |
| "epoch": 0.06471183013144591, |
| "grad_norm": 4.064675807952881, |
| "learning_rate": 9.99421071133689e-06, |
| "loss": 0.6131962537765503, |
| "memory(GiB)": 74.11, |
| "step": 80, |
| "token_acc": 0.7767857142857143, |
| "train_speed(iter/s)": 0.022722 |
| }, |
| { |
| "epoch": 0.06552072800808897, |
| "grad_norm": 3.6171154975891113, |
| "learning_rate": 9.993549731048171e-06, |
| "loss": 0.5887628197669983, |
| "memory(GiB)": 74.11, |
| "step": 81, |
| "token_acc": 0.7992125984251969, |
| "train_speed(iter/s)": 0.022724 |
| }, |
| { |
| "epoch": 0.06632962588473205, |
| "grad_norm": 3.9707374572753906, |
| "learning_rate": 9.992853053643257e-06, |
| "loss": 0.5989000201225281, |
| "memory(GiB)": 74.11, |
| "step": 82, |
| "token_acc": 0.8346456692913385, |
| "train_speed(iter/s)": 0.022726 |
| }, |
| { |
| "epoch": 0.06713852376137512, |
| "grad_norm": 4.361082077026367, |
| "learning_rate": 9.992120684102453e-06, |
| "loss": 0.6060096025466919, |
| "memory(GiB)": 74.11, |
| "step": 83, |
| "token_acc": 0.8148148148148148, |
| "train_speed(iter/s)": 0.022728 |
| }, |
| { |
| "epoch": 0.0679474216380182, |
| "grad_norm": 3.9677209854125977, |
| "learning_rate": 9.991352627661205e-06, |
| "loss": 0.5200193524360657, |
| "memory(GiB)": 74.11, |
| "step": 84, |
| "token_acc": 0.8506787330316742, |
| "train_speed(iter/s)": 0.022729 |
| }, |
| { |
| "epoch": 0.06875631951466127, |
| "grad_norm": 3.435011863708496, |
| "learning_rate": 9.990548889810078e-06, |
| "loss": 0.6048153638839722, |
| "memory(GiB)": 74.11, |
| "step": 85, |
| "token_acc": 0.8391608391608392, |
| "train_speed(iter/s)": 0.022731 |
| }, |
| { |
| "epoch": 0.06956521739130435, |
| "grad_norm": 3.5457801818847656, |
| "learning_rate": 9.989709476294708e-06, |
| "loss": 0.5572282671928406, |
| "memory(GiB)": 74.11, |
| "step": 86, |
| "token_acc": 0.8181818181818182, |
| "train_speed(iter/s)": 0.022732 |
| }, |
| { |
| "epoch": 0.07037411526794742, |
| "grad_norm": 3.885216474533081, |
| "learning_rate": 9.988834393115768e-06, |
| "loss": 0.5753508806228638, |
| "memory(GiB)": 74.11, |
| "step": 87, |
| "token_acc": 0.8823529411764706, |
| "train_speed(iter/s)": 0.022732 |
| }, |
| { |
| "epoch": 0.0711830131445905, |
| "grad_norm": 3.5327308177948, |
| "learning_rate": 9.987923646528911e-06, |
| "loss": 0.5835089683532715, |
| "memory(GiB)": 74.11, |
| "step": 88, |
| "token_acc": 0.8226221079691517, |
| "train_speed(iter/s)": 0.022731 |
| }, |
| { |
| "epoch": 0.07199191102123358, |
| "grad_norm": 3.0550527572631836, |
| "learning_rate": 9.986977243044747e-06, |
| "loss": 0.5215576887130737, |
| "memory(GiB)": 74.11, |
| "step": 89, |
| "token_acc": 0.8870292887029289, |
| "train_speed(iter/s)": 0.022732 |
| }, |
| { |
| "epoch": 0.07280080889787664, |
| "grad_norm": 3.0193593502044678, |
| "learning_rate": 9.985995189428775e-06, |
| "loss": 0.4884870648384094, |
| "memory(GiB)": 74.11, |
| "step": 90, |
| "token_acc": 0.8713235294117647, |
| "train_speed(iter/s)": 0.022732 |
| }, |
| { |
| "epoch": 0.07360970677451972, |
| "grad_norm": 3.2098543643951416, |
| "learning_rate": 9.984977492701351e-06, |
| "loss": 0.5010548233985901, |
| "memory(GiB)": 74.11, |
| "step": 91, |
| "token_acc": 0.8104575163398693, |
| "train_speed(iter/s)": 0.022734 |
| }, |
| { |
| "epoch": 0.07441860465116279, |
| "grad_norm": 3.6859188079833984, |
| "learning_rate": 9.983924160137627e-06, |
| "loss": 0.5493002533912659, |
| "memory(GiB)": 74.11, |
| "step": 92, |
| "token_acc": 0.7937743190661478, |
| "train_speed(iter/s)": 0.022734 |
| }, |
| { |
| "epoch": 0.07522750252780587, |
| "grad_norm": 3.2814273834228516, |
| "learning_rate": 9.982835199267502e-06, |
| "loss": 0.6033581495285034, |
| "memory(GiB)": 74.11, |
| "step": 93, |
| "token_acc": 0.8416666666666667, |
| "train_speed(iter/s)": 0.022734 |
| }, |
| { |
| "epoch": 0.07603640040444894, |
| "grad_norm": 3.5553441047668457, |
| "learning_rate": 9.981710617875575e-06, |
| "loss": 0.6103281378746033, |
| "memory(GiB)": 74.11, |
| "step": 94, |
| "token_acc": 0.7589285714285714, |
| "train_speed(iter/s)": 0.022736 |
| }, |
| { |
| "epoch": 0.07684529828109202, |
| "grad_norm": 3.5121068954467773, |
| "learning_rate": 9.980550424001077e-06, |
| "loss": 0.5484324097633362, |
| "memory(GiB)": 74.11, |
| "step": 95, |
| "token_acc": 0.8211678832116789, |
| "train_speed(iter/s)": 0.022736 |
| }, |
| { |
| "epoch": 0.07765419615773508, |
| "grad_norm": 2.6635591983795166, |
| "learning_rate": 9.979354625937821e-06, |
| "loss": 0.509511411190033, |
| "memory(GiB)": 74.11, |
| "step": 96, |
| "token_acc": 0.8333333333333334, |
| "train_speed(iter/s)": 0.022736 |
| }, |
| { |
| "epoch": 0.07846309403437816, |
| "grad_norm": 3.5615248680114746, |
| "learning_rate": 9.978123232234147e-06, |
| "loss": 0.5271934270858765, |
| "memory(GiB)": 74.11, |
| "step": 97, |
| "token_acc": 0.815625, |
| "train_speed(iter/s)": 0.022737 |
| }, |
| { |
| "epoch": 0.07927199191102123, |
| "grad_norm": 4.439089775085449, |
| "learning_rate": 9.976856251692851e-06, |
| "loss": 0.5473837852478027, |
| "memory(GiB)": 74.11, |
| "step": 98, |
| "token_acc": 0.843558282208589, |
| "train_speed(iter/s)": 0.022738 |
| }, |
| { |
| "epoch": 0.08008088978766431, |
| "grad_norm": 3.3765029907226562, |
| "learning_rate": 9.975553693371124e-06, |
| "loss": 0.572515070438385, |
| "memory(GiB)": 74.11, |
| "step": 99, |
| "token_acc": 0.8262411347517731, |
| "train_speed(iter/s)": 0.022739 |
| }, |
| { |
| "epoch": 0.08088978766430738, |
| "grad_norm": 3.8845911026000977, |
| "learning_rate": 9.974215566580499e-06, |
| "loss": 0.5989265441894531, |
| "memory(GiB)": 74.11, |
| "step": 100, |
| "token_acc": 0.8562091503267973, |
| "train_speed(iter/s)": 0.022741 |
| }, |
| { |
| "epoch": 0.08169868554095046, |
| "grad_norm": 3.336557626724243, |
| "learning_rate": 9.972841880886766e-06, |
| "loss": 0.5662233829498291, |
| "memory(GiB)": 74.11, |
| "step": 101, |
| "token_acc": 0.8298755186721992, |
| "train_speed(iter/s)": 0.022741 |
| }, |
| { |
| "epoch": 0.08250758341759352, |
| "grad_norm": 2.8836798667907715, |
| "learning_rate": 9.971432646109919e-06, |
| "loss": 0.44332605600357056, |
| "memory(GiB)": 74.11, |
| "step": 102, |
| "token_acc": 0.8586572438162544, |
| "train_speed(iter/s)": 0.022742 |
| }, |
| { |
| "epoch": 0.0833164812942366, |
| "grad_norm": 4.133236885070801, |
| "learning_rate": 9.969987872324076e-06, |
| "loss": 0.5478776693344116, |
| "memory(GiB)": 74.11, |
| "step": 103, |
| "token_acc": 0.8424908424908425, |
| "train_speed(iter/s)": 0.022743 |
| }, |
| { |
| "epoch": 0.08412537917087967, |
| "grad_norm": 4.5403828620910645, |
| "learning_rate": 9.968507569857413e-06, |
| "loss": 0.5256601572036743, |
| "memory(GiB)": 74.11, |
| "step": 104, |
| "token_acc": 0.7886178861788617, |
| "train_speed(iter/s)": 0.022744 |
| }, |
| { |
| "epoch": 0.08493427704752275, |
| "grad_norm": 3.083695888519287, |
| "learning_rate": 9.966991749292088e-06, |
| "loss": 0.560812771320343, |
| "memory(GiB)": 74.11, |
| "step": 105, |
| "token_acc": 0.8056537102473498, |
| "train_speed(iter/s)": 0.022745 |
| }, |
| { |
| "epoch": 0.08574317492416582, |
| "grad_norm": 2.619795083999634, |
| "learning_rate": 9.965440421464163e-06, |
| "loss": 0.5007873773574829, |
| "memory(GiB)": 74.11, |
| "step": 106, |
| "token_acc": 0.8132295719844358, |
| "train_speed(iter/s)": 0.022745 |
| }, |
| { |
| "epoch": 0.0865520728008089, |
| "grad_norm": 3.6254372596740723, |
| "learning_rate": 9.963853597463533e-06, |
| "loss": 0.49696582555770874, |
| "memory(GiB)": 74.11, |
| "step": 107, |
| "token_acc": 0.846441947565543, |
| "train_speed(iter/s)": 0.022747 |
| }, |
| { |
| "epoch": 0.08736097067745197, |
| "grad_norm": 3.388469934463501, |
| "learning_rate": 9.962231288633838e-06, |
| "loss": 0.4739895462989807, |
| "memory(GiB)": 74.11, |
| "step": 108, |
| "token_acc": 0.84, |
| "train_speed(iter/s)": 0.022748 |
| }, |
| { |
| "epoch": 0.08816986855409505, |
| "grad_norm": 2.8459818363189697, |
| "learning_rate": 9.960573506572391e-06, |
| "loss": 0.46099379658699036, |
| "memory(GiB)": 74.11, |
| "step": 109, |
| "token_acc": 0.821875, |
| "train_speed(iter/s)": 0.022748 |
| }, |
| { |
| "epoch": 0.08897876643073811, |
| "grad_norm": 3.143099546432495, |
| "learning_rate": 9.958880263130084e-06, |
| "loss": 0.48788702487945557, |
| "memory(GiB)": 74.11, |
| "step": 110, |
| "token_acc": 0.8125, |
| "train_speed(iter/s)": 0.022748 |
| }, |
| { |
| "epoch": 0.0897876643073812, |
| "grad_norm": 3.5926871299743652, |
| "learning_rate": 9.957151570411317e-06, |
| "loss": 0.5500156879425049, |
| "memory(GiB)": 74.11, |
| "step": 111, |
| "token_acc": 0.8222222222222222, |
| "train_speed(iter/s)": 0.022748 |
| }, |
| { |
| "epoch": 0.09059656218402427, |
| "grad_norm": 5.149491310119629, |
| "learning_rate": 9.955387440773902e-06, |
| "loss": 0.5181611776351929, |
| "memory(GiB)": 74.11, |
| "step": 112, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022749 |
| }, |
| { |
| "epoch": 0.09140546006066734, |
| "grad_norm": 4.696843147277832, |
| "learning_rate": 9.953587886828973e-06, |
| "loss": 0.5575085282325745, |
| "memory(GiB)": 74.11, |
| "step": 113, |
| "token_acc": 0.7924528301886793, |
| "train_speed(iter/s)": 0.02275 |
| }, |
| { |
| "epoch": 0.09221435793731042, |
| "grad_norm": 4.4397053718566895, |
| "learning_rate": 9.951752921440904e-06, |
| "loss": 0.5986132621765137, |
| "memory(GiB)": 74.11, |
| "step": 114, |
| "token_acc": 0.8097560975609757, |
| "train_speed(iter/s)": 0.022749 |
| }, |
| { |
| "epoch": 0.09302325581395349, |
| "grad_norm": 3.5311803817749023, |
| "learning_rate": 9.949882557727215e-06, |
| "loss": 0.47439733147621155, |
| "memory(GiB)": 74.11, |
| "step": 115, |
| "token_acc": 0.8576779026217228, |
| "train_speed(iter/s)": 0.022749 |
| }, |
| { |
| "epoch": 0.09383215369059657, |
| "grad_norm": 4.034605503082275, |
| "learning_rate": 9.947976809058468e-06, |
| "loss": 0.52587890625, |
| "memory(GiB)": 74.11, |
| "step": 116, |
| "token_acc": 0.8388888888888889, |
| "train_speed(iter/s)": 0.02275 |
| }, |
| { |
| "epoch": 0.09464105156723963, |
| "grad_norm": 2.4622230529785156, |
| "learning_rate": 9.946035689058189e-06, |
| "loss": 0.5111696720123291, |
| "memory(GiB)": 74.11, |
| "step": 117, |
| "token_acc": 0.8653846153846154, |
| "train_speed(iter/s)": 0.02275 |
| }, |
| { |
| "epoch": 0.09544994944388271, |
| "grad_norm": 4.2029523849487305, |
| "learning_rate": 9.944059211602752e-06, |
| "loss": 0.644461452960968, |
| "memory(GiB)": 74.11, |
| "step": 118, |
| "token_acc": 0.8391304347826087, |
| "train_speed(iter/s)": 0.02275 |
| }, |
| { |
| "epoch": 0.09625884732052578, |
| "grad_norm": 3.6433732509613037, |
| "learning_rate": 9.942047390821296e-06, |
| "loss": 0.529866099357605, |
| "memory(GiB)": 74.11, |
| "step": 119, |
| "token_acc": 0.8287671232876712, |
| "train_speed(iter/s)": 0.02275 |
| }, |
| { |
| "epoch": 0.09706774519716886, |
| "grad_norm": 2.930225133895874, |
| "learning_rate": 9.940000241095616e-06, |
| "loss": 0.53721022605896, |
| "memory(GiB)": 74.11, |
| "step": 120, |
| "token_acc": 0.8637873754152824, |
| "train_speed(iter/s)": 0.022751 |
| }, |
| { |
| "epoch": 0.09787664307381193, |
| "grad_norm": 3.059379816055298, |
| "learning_rate": 9.937917777060057e-06, |
| "loss": 0.5285677909851074, |
| "memory(GiB)": 74.11, |
| "step": 121, |
| "token_acc": 0.7914691943127962, |
| "train_speed(iter/s)": 0.022752 |
| }, |
| { |
| "epoch": 0.09868554095045501, |
| "grad_norm": 3.1179027557373047, |
| "learning_rate": 9.935800013601415e-06, |
| "loss": 0.543626606464386, |
| "memory(GiB)": 74.11, |
| "step": 122, |
| "token_acc": 0.8638132295719845, |
| "train_speed(iter/s)": 0.022751 |
| }, |
| { |
| "epoch": 0.09949443882709808, |
| "grad_norm": 2.9850940704345703, |
| "learning_rate": 9.933646965858832e-06, |
| "loss": 0.5759721994400024, |
| "memory(GiB)": 74.11, |
| "step": 123, |
| "token_acc": 0.8392857142857143, |
| "train_speed(iter/s)": 0.022752 |
| }, |
| { |
| "epoch": 0.10030333670374116, |
| "grad_norm": 3.2056992053985596, |
| "learning_rate": 9.931458649223683e-06, |
| "loss": 0.5128383636474609, |
| "memory(GiB)": 74.11, |
| "step": 124, |
| "token_acc": 0.8404255319148937, |
| "train_speed(iter/s)": 0.022753 |
| }, |
| { |
| "epoch": 0.10111223458038422, |
| "grad_norm": 3.4550704956054688, |
| "learning_rate": 9.929235079339466e-06, |
| "loss": 0.4931023418903351, |
| "memory(GiB)": 74.11, |
| "step": 125, |
| "token_acc": 0.7634069400630915, |
| "train_speed(iter/s)": 0.022754 |
| }, |
| { |
| "epoch": 0.1019211324570273, |
| "grad_norm": 4.975637912750244, |
| "learning_rate": 9.926976272101693e-06, |
| "loss": 0.5036507844924927, |
| "memory(GiB)": 74.11, |
| "step": 126, |
| "token_acc": 0.8422818791946308, |
| "train_speed(iter/s)": 0.022754 |
| }, |
| { |
| "epoch": 0.10273003033367037, |
| "grad_norm": 3.2330217361450195, |
| "learning_rate": 9.92468224365778e-06, |
| "loss": 0.4464947581291199, |
| "memory(GiB)": 74.11, |
| "step": 127, |
| "token_acc": 0.8804347826086957, |
| "train_speed(iter/s)": 0.022754 |
| }, |
| { |
| "epoch": 0.10353892821031345, |
| "grad_norm": 2.581622362136841, |
| "learning_rate": 9.922353010406918e-06, |
| "loss": 0.5149933695793152, |
| "memory(GiB)": 74.11, |
| "step": 128, |
| "token_acc": 0.8318181818181818, |
| "train_speed(iter/s)": 0.022755 |
| }, |
| { |
| "epoch": 0.10434782608695652, |
| "grad_norm": 2.6486399173736572, |
| "learning_rate": 9.919988588999971e-06, |
| "loss": 0.5142784118652344, |
| "memory(GiB)": 74.11, |
| "step": 129, |
| "token_acc": 0.8621908127208481, |
| "train_speed(iter/s)": 0.022756 |
| }, |
| { |
| "epoch": 0.1051567239635996, |
| "grad_norm": 3.3094420433044434, |
| "learning_rate": 9.917588996339352e-06, |
| "loss": 0.5297855734825134, |
| "memory(GiB)": 74.11, |
| "step": 130, |
| "token_acc": 0.8177339901477833, |
| "train_speed(iter/s)": 0.022757 |
| }, |
| { |
| "epoch": 0.10596562184024266, |
| "grad_norm": 2.769592046737671, |
| "learning_rate": 9.915154249578894e-06, |
| "loss": 0.5081691145896912, |
| "memory(GiB)": 74.11, |
| "step": 131, |
| "token_acc": 0.8755364806866953, |
| "train_speed(iter/s)": 0.022758 |
| }, |
| { |
| "epoch": 0.10677451971688574, |
| "grad_norm": 2.8748629093170166, |
| "learning_rate": 9.91268436612374e-06, |
| "loss": 0.5512316823005676, |
| "memory(GiB)": 74.11, |
| "step": 132, |
| "token_acc": 0.8618181818181818, |
| "train_speed(iter/s)": 0.022757 |
| }, |
| { |
| "epoch": 0.10758341759352881, |
| "grad_norm": 3.3325603008270264, |
| "learning_rate": 9.91017936363021e-06, |
| "loss": 0.48270368576049805, |
| "memory(GiB)": 74.11, |
| "step": 133, |
| "token_acc": 0.8526315789473684, |
| "train_speed(iter/s)": 0.022757 |
| }, |
| { |
| "epoch": 0.10839231547017189, |
| "grad_norm": 4.002824783325195, |
| "learning_rate": 9.907639260005682e-06, |
| "loss": 0.48671406507492065, |
| "memory(GiB)": 74.11, |
| "step": 134, |
| "token_acc": 0.8547717842323651, |
| "train_speed(iter/s)": 0.022758 |
| }, |
| { |
| "epoch": 0.10920121334681497, |
| "grad_norm": 3.655064344406128, |
| "learning_rate": 9.90506407340845e-06, |
| "loss": 0.5502010583877563, |
| "memory(GiB)": 74.11, |
| "step": 135, |
| "token_acc": 0.7976190476190477, |
| "train_speed(iter/s)": 0.022758 |
| }, |
| { |
| "epoch": 0.11001011122345804, |
| "grad_norm": 3.198472023010254, |
| "learning_rate": 9.902453822247615e-06, |
| "loss": 0.47892680764198303, |
| "memory(GiB)": 74.11, |
| "step": 136, |
| "token_acc": 0.8318965517241379, |
| "train_speed(iter/s)": 0.022759 |
| }, |
| { |
| "epoch": 0.11081900910010112, |
| "grad_norm": 2.7282052040100098, |
| "learning_rate": 9.899808525182935e-06, |
| "loss": 0.49719753861427307, |
| "memory(GiB)": 74.11, |
| "step": 137, |
| "token_acc": 0.8417508417508418, |
| "train_speed(iter/s)": 0.022759 |
| }, |
| { |
| "epoch": 0.11162790697674418, |
| "grad_norm": 3.089430093765259, |
| "learning_rate": 9.897128201124699e-06, |
| "loss": 0.532843291759491, |
| "memory(GiB)": 74.11, |
| "step": 138, |
| "token_acc": 0.8152173913043478, |
| "train_speed(iter/s)": 0.022759 |
| }, |
| { |
| "epoch": 0.11243680485338726, |
| "grad_norm": 6.901391983032227, |
| "learning_rate": 9.894412869233597e-06, |
| "loss": 0.5238447189331055, |
| "memory(GiB)": 74.11, |
| "step": 139, |
| "token_acc": 0.8558558558558559, |
| "train_speed(iter/s)": 0.02276 |
| }, |
| { |
| "epoch": 0.11324570273003033, |
| "grad_norm": 3.125302791595459, |
| "learning_rate": 9.89166254892057e-06, |
| "loss": 0.5573660135269165, |
| "memory(GiB)": 74.11, |
| "step": 140, |
| "token_acc": 0.8068181818181818, |
| "train_speed(iter/s)": 0.022761 |
| }, |
| { |
| "epoch": 0.11405460060667341, |
| "grad_norm": 3.38075852394104, |
| "learning_rate": 9.888877259846686e-06, |
| "loss": 0.524215817451477, |
| "memory(GiB)": 74.33, |
| "step": 141, |
| "token_acc": 0.8505338078291815, |
| "train_speed(iter/s)": 0.02276 |
| }, |
| { |
| "epoch": 0.11486349848331648, |
| "grad_norm": 3.413461446762085, |
| "learning_rate": 9.886057021922984e-06, |
| "loss": 0.49190688133239746, |
| "memory(GiB)": 74.33, |
| "step": 142, |
| "token_acc": 0.8333333333333334, |
| "train_speed(iter/s)": 0.02276 |
| }, |
| { |
| "epoch": 0.11567239635995956, |
| "grad_norm": 4.181169509887695, |
| "learning_rate": 9.88320185531035e-06, |
| "loss": 0.542352557182312, |
| "memory(GiB)": 74.33, |
| "step": 143, |
| "token_acc": 0.8503649635036497, |
| "train_speed(iter/s)": 0.02276 |
| }, |
| { |
| "epoch": 0.11648129423660263, |
| "grad_norm": 2.688110828399658, |
| "learning_rate": 9.880311780419353e-06, |
| "loss": 0.5551398992538452, |
| "memory(GiB)": 74.33, |
| "step": 144, |
| "token_acc": 0.8007246376811594, |
| "train_speed(iter/s)": 0.02276 |
| }, |
| { |
| "epoch": 0.1172901921132457, |
| "grad_norm": 3.9851884841918945, |
| "learning_rate": 9.877386817910118e-06, |
| "loss": 0.49384480714797974, |
| "memory(GiB)": 74.33, |
| "step": 145, |
| "token_acc": 0.8333333333333334, |
| "train_speed(iter/s)": 0.02276 |
| }, |
| { |
| "epoch": 0.11809908998988877, |
| "grad_norm": 2.6871986389160156, |
| "learning_rate": 9.874426988692163e-06, |
| "loss": 0.5515081286430359, |
| "memory(GiB)": 74.33, |
| "step": 146, |
| "token_acc": 0.8006644518272426, |
| "train_speed(iter/s)": 0.02276 |
| }, |
| { |
| "epoch": 0.11890798786653185, |
| "grad_norm": 2.288706064224243, |
| "learning_rate": 9.871432313924255e-06, |
| "loss": 0.4420849084854126, |
| "memory(GiB)": 74.33, |
| "step": 147, |
| "token_acc": 0.8091872791519434, |
| "train_speed(iter/s)": 0.022761 |
| }, |
| { |
| "epoch": 0.11971688574317492, |
| "grad_norm": 2.6680195331573486, |
| "learning_rate": 9.868402815014266e-06, |
| "loss": 0.4678765833377838, |
| "memory(GiB)": 74.33, |
| "step": 148, |
| "token_acc": 0.8604651162790697, |
| "train_speed(iter/s)": 0.022761 |
| }, |
| { |
| "epoch": 0.120525783619818, |
| "grad_norm": 2.3895063400268555, |
| "learning_rate": 9.865338513619005e-06, |
| "loss": 0.4832306504249573, |
| "memory(GiB)": 74.33, |
| "step": 149, |
| "token_acc": 0.8480565371024735, |
| "train_speed(iter/s)": 0.022761 |
| }, |
| { |
| "epoch": 0.12133468149646107, |
| "grad_norm": 2.4143781661987305, |
| "learning_rate": 9.86223943164408e-06, |
| "loss": 0.49357208609580994, |
| "memory(GiB)": 74.33, |
| "step": 150, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.022762 |
| }, |
| { |
| "epoch": 0.12214357937310415, |
| "grad_norm": 3.0790457725524902, |
| "learning_rate": 9.859105591243728e-06, |
| "loss": 0.4809868633747101, |
| "memory(GiB)": 74.33, |
| "step": 151, |
| "token_acc": 0.8617021276595744, |
| "train_speed(iter/s)": 0.022762 |
| }, |
| { |
| "epoch": 0.12295247724974721, |
| "grad_norm": 3.636885643005371, |
| "learning_rate": 9.85593701482066e-06, |
| "loss": 0.5743482112884521, |
| "memory(GiB)": 74.33, |
| "step": 152, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.022763 |
| }, |
| { |
| "epoch": 0.1237613751263903, |
| "grad_norm": 2.7628660202026367, |
| "learning_rate": 9.85273372502591e-06, |
| "loss": 0.46740931272506714, |
| "memory(GiB)": 74.33, |
| "step": 153, |
| "token_acc": 0.8658008658008658, |
| "train_speed(iter/s)": 0.022763 |
| }, |
| { |
| "epoch": 0.12457027300303336, |
| "grad_norm": 3.155374765396118, |
| "learning_rate": 9.849495744758654e-06, |
| "loss": 0.5438951253890991, |
| "memory(GiB)": 74.33, |
| "step": 154, |
| "token_acc": 0.8550185873605948, |
| "train_speed(iter/s)": 0.022763 |
| }, |
| { |
| "epoch": 0.12537917087967643, |
| "grad_norm": 2.9564826488494873, |
| "learning_rate": 9.846223097166072e-06, |
| "loss": 0.537287175655365, |
| "memory(GiB)": 74.33, |
| "step": 155, |
| "token_acc": 0.8456140350877193, |
| "train_speed(iter/s)": 0.022764 |
| }, |
| { |
| "epoch": 0.12618806875631952, |
| "grad_norm": 2.997941017150879, |
| "learning_rate": 9.842915805643156e-06, |
| "loss": 0.4728841781616211, |
| "memory(GiB)": 74.33, |
| "step": 156, |
| "token_acc": 0.9, |
| "train_speed(iter/s)": 0.022764 |
| }, |
| { |
| "epoch": 0.1269969666329626, |
| "grad_norm": 4.7811431884765625, |
| "learning_rate": 9.839573893832564e-06, |
| "loss": 0.48365718126296997, |
| "memory(GiB)": 74.33, |
| "step": 157, |
| "token_acc": 0.8501742160278746, |
| "train_speed(iter/s)": 0.022764 |
| }, |
| { |
| "epoch": 0.12780586450960565, |
| "grad_norm": 2.611847400665283, |
| "learning_rate": 9.836197385624434e-06, |
| "loss": 0.4837043285369873, |
| "memory(GiB)": 74.33, |
| "step": 158, |
| "token_acc": 0.8952879581151832, |
| "train_speed(iter/s)": 0.022765 |
| }, |
| { |
| "epoch": 0.12861476238624875, |
| "grad_norm": 3.331645965576172, |
| "learning_rate": 9.83278630515623e-06, |
| "loss": 0.5694408416748047, |
| "memory(GiB)": 74.33, |
| "step": 159, |
| "token_acc": 0.8177966101694916, |
| "train_speed(iter/s)": 0.022765 |
| }, |
| { |
| "epoch": 0.12942366026289182, |
| "grad_norm": 3.4143426418304443, |
| "learning_rate": 9.829340676812553e-06, |
| "loss": 0.5614443421363831, |
| "memory(GiB)": 74.33, |
| "step": 160, |
| "token_acc": 0.8487972508591065, |
| "train_speed(iter/s)": 0.022765 |
| }, |
| { |
| "epoch": 0.13023255813953488, |
| "grad_norm": 2.541956901550293, |
| "learning_rate": 9.825860525224982e-06, |
| "loss": 0.48274075984954834, |
| "memory(GiB)": 74.33, |
| "step": 161, |
| "token_acc": 0.8207885304659498, |
| "train_speed(iter/s)": 0.022765 |
| }, |
| { |
| "epoch": 0.13104145601617795, |
| "grad_norm": 2.933729410171509, |
| "learning_rate": 9.822345875271884e-06, |
| "loss": 0.47431913018226624, |
| "memory(GiB)": 74.33, |
| "step": 162, |
| "token_acc": 0.8713450292397661, |
| "train_speed(iter/s)": 0.022766 |
| }, |
| { |
| "epoch": 0.13185035389282104, |
| "grad_norm": 2.8055856227874756, |
| "learning_rate": 9.818796752078246e-06, |
| "loss": 0.5554227232933044, |
| "memory(GiB)": 74.33, |
| "step": 163, |
| "token_acc": 0.8627450980392157, |
| "train_speed(iter/s)": 0.022766 |
| }, |
| { |
| "epoch": 0.1326592517694641, |
| "grad_norm": 2.662719488143921, |
| "learning_rate": 9.815213181015489e-06, |
| "loss": 0.4458203911781311, |
| "memory(GiB)": 74.33, |
| "step": 164, |
| "token_acc": 0.8825622775800712, |
| "train_speed(iter/s)": 0.022766 |
| }, |
| { |
| "epoch": 0.13346814964610718, |
| "grad_norm": 5.495974540710449, |
| "learning_rate": 9.811595187701296e-06, |
| "loss": 0.4638062119483948, |
| "memory(GiB)": 74.33, |
| "step": 165, |
| "token_acc": 0.8227848101265823, |
| "train_speed(iter/s)": 0.022766 |
| }, |
| { |
| "epoch": 0.13427704752275024, |
| "grad_norm": 84.01348114013672, |
| "learning_rate": 9.807942797999412e-06, |
| "loss": 0.6657401323318481, |
| "memory(GiB)": 74.33, |
| "step": 166, |
| "token_acc": 0.8483606557377049, |
| "train_speed(iter/s)": 0.022767 |
| }, |
| { |
| "epoch": 0.13508594539939334, |
| "grad_norm": 138.69554138183594, |
| "learning_rate": 9.804256038019482e-06, |
| "loss": 0.6723936796188354, |
| "memory(GiB)": 74.33, |
| "step": 167, |
| "token_acc": 0.8143712574850299, |
| "train_speed(iter/s)": 0.022767 |
| }, |
| { |
| "epoch": 0.1358948432760364, |
| "grad_norm": 11.966114044189453, |
| "learning_rate": 9.800534934116843e-06, |
| "loss": 0.5228875875473022, |
| "memory(GiB)": 74.33, |
| "step": 168, |
| "token_acc": 0.8411552346570397, |
| "train_speed(iter/s)": 0.022768 |
| }, |
| { |
| "epoch": 0.13670374115267947, |
| "grad_norm": 3.311744451522827, |
| "learning_rate": 9.796779512892346e-06, |
| "loss": 0.5082340240478516, |
| "memory(GiB)": 74.33, |
| "step": 169, |
| "token_acc": 0.8514056224899599, |
| "train_speed(iter/s)": 0.022768 |
| }, |
| { |
| "epoch": 0.13751263902932254, |
| "grad_norm": 2.891026735305786, |
| "learning_rate": 9.792989801192167e-06, |
| "loss": 0.4903358519077301, |
| "memory(GiB)": 74.33, |
| "step": 170, |
| "token_acc": 0.8439490445859873, |
| "train_speed(iter/s)": 0.022769 |
| }, |
| { |
| "epoch": 0.13832153690596563, |
| "grad_norm": 2.643505096435547, |
| "learning_rate": 9.789165826107612e-06, |
| "loss": 0.514635443687439, |
| "memory(GiB)": 74.33, |
| "step": 171, |
| "token_acc": 0.8709677419354839, |
| "train_speed(iter/s)": 0.022769 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 2.8423476219177246, |
| "learning_rate": 9.785307614974922e-06, |
| "loss": 0.5150923728942871, |
| "memory(GiB)": 74.33, |
| "step": 172, |
| "token_acc": 0.796875, |
| "train_speed(iter/s)": 0.022769 |
| }, |
| { |
| "epoch": 0.13993933265925176, |
| "grad_norm": 2.4324862957000732, |
| "learning_rate": 9.781415195375078e-06, |
| "loss": 0.4808637797832489, |
| "memory(GiB)": 74.33, |
| "step": 173, |
| "token_acc": 0.8296529968454258, |
| "train_speed(iter/s)": 0.022769 |
| }, |
| { |
| "epoch": 0.14074823053589483, |
| "grad_norm": 2.2403547763824463, |
| "learning_rate": 9.77748859513361e-06, |
| "loss": 0.4378691017627716, |
| "memory(GiB)": 74.33, |
| "step": 174, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022769 |
| }, |
| { |
| "epoch": 0.14155712841253792, |
| "grad_norm": 2.552274703979492, |
| "learning_rate": 9.77352784232039e-06, |
| "loss": 0.4910467565059662, |
| "memory(GiB)": 74.33, |
| "step": 175, |
| "token_acc": 0.8369565217391305, |
| "train_speed(iter/s)": 0.022769 |
| }, |
| { |
| "epoch": 0.142366026289181, |
| "grad_norm": 2.844341278076172, |
| "learning_rate": 9.769532965249435e-06, |
| "loss": 0.5578226447105408, |
| "memory(GiB)": 74.33, |
| "step": 176, |
| "token_acc": 0.8274509803921568, |
| "train_speed(iter/s)": 0.02277 |
| }, |
| { |
| "epoch": 0.14317492416582406, |
| "grad_norm": 2.700742483139038, |
| "learning_rate": 9.765503992478704e-06, |
| "loss": 0.4441274404525757, |
| "memory(GiB)": 74.33, |
| "step": 177, |
| "token_acc": 0.8543689320388349, |
| "train_speed(iter/s)": 0.02277 |
| }, |
| { |
| "epoch": 0.14398382204246715, |
| "grad_norm": 2.824364185333252, |
| "learning_rate": 9.761440952809897e-06, |
| "loss": 0.5075165033340454, |
| "memory(GiB)": 74.33, |
| "step": 178, |
| "token_acc": 0.8222222222222222, |
| "train_speed(iter/s)": 0.022771 |
| }, |
| { |
| "epoch": 0.14479271991911022, |
| "grad_norm": 3.220512628555298, |
| "learning_rate": 9.757343875288242e-06, |
| "loss": 0.47000789642333984, |
| "memory(GiB)": 74.33, |
| "step": 179, |
| "token_acc": 0.828125, |
| "train_speed(iter/s)": 0.022772 |
| }, |
| { |
| "epoch": 0.14560161779575329, |
| "grad_norm": 2.345557689666748, |
| "learning_rate": 9.75321278920229e-06, |
| "loss": 0.5143015384674072, |
| "memory(GiB)": 74.33, |
| "step": 180, |
| "token_acc": 0.8530465949820788, |
| "train_speed(iter/s)": 0.022771 |
| }, |
| { |
| "epoch": 0.14641051567239635, |
| "grad_norm": 3.0752451419830322, |
| "learning_rate": 9.749047724083717e-06, |
| "loss": 0.5505862236022949, |
| "memory(GiB)": 74.33, |
| "step": 181, |
| "token_acc": 0.8475609756097561, |
| "train_speed(iter/s)": 0.022772 |
| }, |
| { |
| "epoch": 0.14721941354903945, |
| "grad_norm": 2.662064552307129, |
| "learning_rate": 9.74484870970709e-06, |
| "loss": 0.5013206601142883, |
| "memory(GiB)": 74.33, |
| "step": 182, |
| "token_acc": 0.873015873015873, |
| "train_speed(iter/s)": 0.022772 |
| }, |
| { |
| "epoch": 0.1480283114256825, |
| "grad_norm": 3.027050256729126, |
| "learning_rate": 9.74061577608968e-06, |
| "loss": 0.554660439491272, |
| "memory(GiB)": 74.33, |
| "step": 183, |
| "token_acc": 0.8388278388278388, |
| "train_speed(iter/s)": 0.022772 |
| }, |
| { |
| "epoch": 0.14883720930232558, |
| "grad_norm": 3.55436635017395, |
| "learning_rate": 9.736348953491224e-06, |
| "loss": 0.5106396675109863, |
| "memory(GiB)": 74.33, |
| "step": 184, |
| "token_acc": 0.797979797979798, |
| "train_speed(iter/s)": 0.022773 |
| }, |
| { |
| "epoch": 0.14964610717896865, |
| "grad_norm": 3.821077585220337, |
| "learning_rate": 9.732048272413725e-06, |
| "loss": 0.5329099297523499, |
| "memory(GiB)": 74.33, |
| "step": 185, |
| "token_acc": 0.8278388278388278, |
| "train_speed(iter/s)": 0.022773 |
| }, |
| { |
| "epoch": 0.15045500505561174, |
| "grad_norm": 2.861586332321167, |
| "learning_rate": 9.727713763601226e-06, |
| "loss": 0.48308447003364563, |
| "memory(GiB)": 74.33, |
| "step": 186, |
| "token_acc": 0.8556701030927835, |
| "train_speed(iter/s)": 0.022773 |
| }, |
| { |
| "epoch": 0.1512639029322548, |
| "grad_norm": 3.025512456893921, |
| "learning_rate": 9.723345458039595e-06, |
| "loss": 0.4873977601528168, |
| "memory(GiB)": 74.33, |
| "step": 187, |
| "token_acc": 0.8426573426573427, |
| "train_speed(iter/s)": 0.022773 |
| }, |
| { |
| "epoch": 0.15207280080889787, |
| "grad_norm": 2.5745112895965576, |
| "learning_rate": 9.718943386956298e-06, |
| "loss": 0.538512110710144, |
| "memory(GiB)": 74.33, |
| "step": 188, |
| "token_acc": 0.8155339805825242, |
| "train_speed(iter/s)": 0.022773 |
| }, |
| { |
| "epoch": 0.15288169868554094, |
| "grad_norm": 2.985320806503296, |
| "learning_rate": 9.714507581820181e-06, |
| "loss": 0.5343044400215149, |
| "memory(GiB)": 74.33, |
| "step": 189, |
| "token_acc": 0.7977099236641222, |
| "train_speed(iter/s)": 0.022773 |
| }, |
| { |
| "epoch": 0.15369059656218403, |
| "grad_norm": 3.339107036590576, |
| "learning_rate": 9.71003807434124e-06, |
| "loss": 0.5087035298347473, |
| "memory(GiB)": 74.33, |
| "step": 190, |
| "token_acc": 0.8478260869565217, |
| "train_speed(iter/s)": 0.022774 |
| }, |
| { |
| "epoch": 0.1544994944388271, |
| "grad_norm": 2.712999105453491, |
| "learning_rate": 9.705534896470401e-06, |
| "loss": 0.4998268783092499, |
| "memory(GiB)": 74.33, |
| "step": 191, |
| "token_acc": 0.8514056224899599, |
| "train_speed(iter/s)": 0.022774 |
| }, |
| { |
| "epoch": 0.15530839231547017, |
| "grad_norm": 3.6283011436462402, |
| "learning_rate": 9.700998080399287e-06, |
| "loss": 0.4922446608543396, |
| "memory(GiB)": 74.33, |
| "step": 192, |
| "token_acc": 0.810126582278481, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.15611729019211323, |
| "grad_norm": 2.546504020690918, |
| "learning_rate": 9.696427658559983e-06, |
| "loss": 0.5213550925254822, |
| "memory(GiB)": 74.33, |
| "step": 193, |
| "token_acc": 0.8381294964028777, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.15692618806875633, |
| "grad_norm": 3.0982861518859863, |
| "learning_rate": 9.691823663624817e-06, |
| "loss": 0.5097714066505432, |
| "memory(GiB)": 74.33, |
| "step": 194, |
| "token_acc": 0.8066914498141264, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1577350859453994, |
| "grad_norm": 2.8496217727661133, |
| "learning_rate": 9.687186128506116e-06, |
| "loss": 0.5594595074653625, |
| "memory(GiB)": 74.33, |
| "step": 195, |
| "token_acc": 0.8622222222222222, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.15854398382204246, |
| "grad_norm": 2.693981647491455, |
| "learning_rate": 9.682515086355973e-06, |
| "loss": 0.5774262547492981, |
| "memory(GiB)": 74.33, |
| "step": 196, |
| "token_acc": 0.7975708502024291, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.15935288169868553, |
| "grad_norm": 3.6492180824279785, |
| "learning_rate": 9.677810570566011e-06, |
| "loss": 0.5103310346603394, |
| "memory(GiB)": 74.33, |
| "step": 197, |
| "token_acc": 0.8129032258064516, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.16016177957532862, |
| "grad_norm": 2.6552608013153076, |
| "learning_rate": 9.673072614767147e-06, |
| "loss": 0.4744953513145447, |
| "memory(GiB)": 74.33, |
| "step": 198, |
| "token_acc": 0.8699186991869918, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1609706774519717, |
| "grad_norm": 2.7724416255950928, |
| "learning_rate": 9.668301252829344e-06, |
| "loss": 0.4586220979690552, |
| "memory(GiB)": 74.33, |
| "step": 199, |
| "token_acc": 0.8583690987124464, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.16177957532861476, |
| "grad_norm": 3.1484899520874023, |
| "learning_rate": 9.663496518861381e-06, |
| "loss": 0.6070712208747864, |
| "memory(GiB)": 74.33, |
| "step": 200, |
| "token_acc": 0.8131313131313131, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.16258847320525785, |
| "grad_norm": 4.5751142501831055, |
| "learning_rate": 9.658658447210595e-06, |
| "loss": 0.5579652786254883, |
| "memory(GiB)": 74.33, |
| "step": 201, |
| "token_acc": 0.8524590163934426, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.16339737108190092, |
| "grad_norm": 2.3848133087158203, |
| "learning_rate": 9.653787072462644e-06, |
| "loss": 0.47080251574516296, |
| "memory(GiB)": 74.33, |
| "step": 202, |
| "token_acc": 0.9058823529411765, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.16420626895854398, |
| "grad_norm": 2.686843156814575, |
| "learning_rate": 9.648882429441258e-06, |
| "loss": 0.46535661816596985, |
| "memory(GiB)": 74.33, |
| "step": 203, |
| "token_acc": 0.8138528138528138, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.16501516683518705, |
| "grad_norm": 3.4251608848571777, |
| "learning_rate": 9.643944553207992e-06, |
| "loss": 0.42402440309524536, |
| "memory(GiB)": 74.33, |
| "step": 204, |
| "token_acc": 0.870722433460076, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.16582406471183014, |
| "grad_norm": 3.019339084625244, |
| "learning_rate": 9.63897347906197e-06, |
| "loss": 0.5313763618469238, |
| "memory(GiB)": 74.33, |
| "step": 205, |
| "token_acc": 0.8062283737024222, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1666329625884732, |
| "grad_norm": 2.4439475536346436, |
| "learning_rate": 9.633969242539643e-06, |
| "loss": 0.47857385873794556, |
| "memory(GiB)": 74.33, |
| "step": 206, |
| "token_acc": 0.8204334365325078, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.16744186046511628, |
| "grad_norm": 2.991232395172119, |
| "learning_rate": 9.628931879414519e-06, |
| "loss": 0.5055133104324341, |
| "memory(GiB)": 74.33, |
| "step": 207, |
| "token_acc": 0.8614864864864865, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.16825075834175934, |
| "grad_norm": 2.8914828300476074, |
| "learning_rate": 9.623861425696919e-06, |
| "loss": 0.48094457387924194, |
| "memory(GiB)": 74.33, |
| "step": 208, |
| "token_acc": 0.8517110266159695, |
| "train_speed(iter/s)": 0.022774 |
| }, |
| { |
| "epoch": 0.16905965621840244, |
| "grad_norm": 3.07913875579834, |
| "learning_rate": 9.618757917633724e-06, |
| "loss": 0.4644262492656708, |
| "memory(GiB)": 74.33, |
| "step": 209, |
| "token_acc": 0.8459016393442623, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1698685540950455, |
| "grad_norm": 3.3538849353790283, |
| "learning_rate": 9.6136213917081e-06, |
| "loss": 0.49402916431427, |
| "memory(GiB)": 74.33, |
| "step": 210, |
| "token_acc": 0.8023255813953488, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.17067745197168857, |
| "grad_norm": 2.8253116607666016, |
| "learning_rate": 9.608451884639249e-06, |
| "loss": 0.5242215394973755, |
| "memory(GiB)": 74.33, |
| "step": 211, |
| "token_acc": 0.8426966292134831, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.17148634984833164, |
| "grad_norm": 3.1118881702423096, |
| "learning_rate": 9.603249433382145e-06, |
| "loss": 0.4387696385383606, |
| "memory(GiB)": 74.33, |
| "step": 212, |
| "token_acc": 0.8384279475982532, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.17229524772497473, |
| "grad_norm": 3.0564656257629395, |
| "learning_rate": 9.598014075127267e-06, |
| "loss": 0.4570474922657013, |
| "memory(GiB)": 74.33, |
| "step": 213, |
| "token_acc": 0.8423423423423423, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1731041456016178, |
| "grad_norm": 2.173403024673462, |
| "learning_rate": 9.592745847300334e-06, |
| "loss": 0.4705919027328491, |
| "memory(GiB)": 74.33, |
| "step": 214, |
| "token_acc": 0.8900343642611683, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 2.676457405090332, |
| "learning_rate": 9.587444787562038e-06, |
| "loss": 0.4593808650970459, |
| "memory(GiB)": 74.33, |
| "step": 215, |
| "token_acc": 0.8425655976676385, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.17472194135490393, |
| "grad_norm": 2.6276440620422363, |
| "learning_rate": 9.582110933807778e-06, |
| "loss": 0.5120923519134521, |
| "memory(GiB)": 74.33, |
| "step": 216, |
| "token_acc": 0.8402555910543131, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.17553083923154703, |
| "grad_norm": 2.9223127365112305, |
| "learning_rate": 9.57674432416738e-06, |
| "loss": 0.5409821271896362, |
| "memory(GiB)": 74.33, |
| "step": 217, |
| "token_acc": 0.8786885245901639, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1763397371081901, |
| "grad_norm": 2.7943737506866455, |
| "learning_rate": 9.571344997004833e-06, |
| "loss": 0.5195801854133606, |
| "memory(GiB)": 74.33, |
| "step": 218, |
| "token_acc": 0.8904761904761904, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.17714863498483316, |
| "grad_norm": 3.1022114753723145, |
| "learning_rate": 9.565912990918014e-06, |
| "loss": 0.5200923085212708, |
| "memory(GiB)": 74.33, |
| "step": 219, |
| "token_acc": 0.8181818181818182, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.17795753286147623, |
| "grad_norm": 2.570176124572754, |
| "learning_rate": 9.560448344738409e-06, |
| "loss": 0.5091375112533569, |
| "memory(GiB)": 74.33, |
| "step": 220, |
| "token_acc": 0.823045267489712, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.17876643073811932, |
| "grad_norm": 3.0033743381500244, |
| "learning_rate": 9.554951097530833e-06, |
| "loss": 0.4781090021133423, |
| "memory(GiB)": 74.33, |
| "step": 221, |
| "token_acc": 0.8544061302681992, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1795753286147624, |
| "grad_norm": 2.6318182945251465, |
| "learning_rate": 9.549421288593157e-06, |
| "loss": 0.4314906597137451, |
| "memory(GiB)": 74.33, |
| "step": 222, |
| "token_acc": 0.8851063829787233, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.18038422649140545, |
| "grad_norm": 2.8283627033233643, |
| "learning_rate": 9.543858957456027e-06, |
| "loss": 0.5246187448501587, |
| "memory(GiB)": 74.33, |
| "step": 223, |
| "token_acc": 0.8140495867768595, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.18119312436804855, |
| "grad_norm": 2.760436773300171, |
| "learning_rate": 9.538264143882573e-06, |
| "loss": 0.583112359046936, |
| "memory(GiB)": 74.33, |
| "step": 224, |
| "token_acc": 0.8316831683168316, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.1820020222446916, |
| "grad_norm": 2.844444513320923, |
| "learning_rate": 9.532636887868132e-06, |
| "loss": 0.5270188450813293, |
| "memory(GiB)": 74.33, |
| "step": 225, |
| "token_acc": 0.8197424892703863, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.18281092012133468, |
| "grad_norm": 3.431413173675537, |
| "learning_rate": 9.526977229639967e-06, |
| "loss": 0.6098812818527222, |
| "memory(GiB)": 74.33, |
| "step": 226, |
| "token_acc": 0.8528138528138528, |
| "train_speed(iter/s)": 0.022777 |
| }, |
| { |
| "epoch": 0.18361981799797775, |
| "grad_norm": 3.651771068572998, |
| "learning_rate": 9.521285209656964e-06, |
| "loss": 0.5220578908920288, |
| "memory(GiB)": 74.33, |
| "step": 227, |
| "token_acc": 0.8111888111888111, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.18442871587462084, |
| "grad_norm": 2.586838960647583, |
| "learning_rate": 9.515560868609353e-06, |
| "loss": 0.5361062288284302, |
| "memory(GiB)": 74.33, |
| "step": 228, |
| "token_acc": 0.8318181818181818, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.1852376137512639, |
| "grad_norm": 3.409284830093384, |
| "learning_rate": 9.509804247418421e-06, |
| "loss": 0.5047948360443115, |
| "memory(GiB)": 74.33, |
| "step": 229, |
| "token_acc": 0.83, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.18604651162790697, |
| "grad_norm": 2.8747854232788086, |
| "learning_rate": 9.504015387236215e-06, |
| "loss": 0.4199560880661011, |
| "memory(GiB)": 74.33, |
| "step": 230, |
| "token_acc": 0.8304347826086956, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.18685540950455004, |
| "grad_norm": 3.537949800491333, |
| "learning_rate": 9.498194329445235e-06, |
| "loss": 0.48431631922721863, |
| "memory(GiB)": 74.33, |
| "step": 231, |
| "token_acc": 0.8588957055214724, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.18766430738119314, |
| "grad_norm": 2.270864486694336, |
| "learning_rate": 9.492341115658167e-06, |
| "loss": 0.43944597244262695, |
| "memory(GiB)": 74.33, |
| "step": 232, |
| "token_acc": 0.8387096774193549, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1884732052578362, |
| "grad_norm": 2.3423984050750732, |
| "learning_rate": 9.486455787717556e-06, |
| "loss": 0.4949726462364197, |
| "memory(GiB)": 74.33, |
| "step": 233, |
| "token_acc": 0.8244274809160306, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.18928210313447927, |
| "grad_norm": 2.186225175857544, |
| "learning_rate": 9.480538387695526e-06, |
| "loss": 0.5247252583503723, |
| "memory(GiB)": 74.33, |
| "step": 234, |
| "token_acc": 0.8256578947368421, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.19009100101112233, |
| "grad_norm": 6.916714191436768, |
| "learning_rate": 9.474588957893471e-06, |
| "loss": 0.5562118291854858, |
| "memory(GiB)": 74.33, |
| "step": 235, |
| "token_acc": 0.815668202764977, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.19089989888776543, |
| "grad_norm": 2.669564962387085, |
| "learning_rate": 9.468607540841755e-06, |
| "loss": 0.4648740589618683, |
| "memory(GiB)": 74.33, |
| "step": 236, |
| "token_acc": 0.8404255319148937, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.1917087967644085, |
| "grad_norm": 2.7446367740631104, |
| "learning_rate": 9.462594179299408e-06, |
| "loss": 0.47179466485977173, |
| "memory(GiB)": 74.33, |
| "step": 237, |
| "token_acc": 0.8296943231441049, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.19251769464105156, |
| "grad_norm": 2.733185052871704, |
| "learning_rate": 9.456548916253816e-06, |
| "loss": 0.43457281589508057, |
| "memory(GiB)": 74.33, |
| "step": 238, |
| "token_acc": 0.8382838283828383, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.19332659251769463, |
| "grad_norm": 2.792586326599121, |
| "learning_rate": 9.450471794920425e-06, |
| "loss": 0.5208027362823486, |
| "memory(GiB)": 74.33, |
| "step": 239, |
| "token_acc": 0.8494623655913979, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.19413549039433772, |
| "grad_norm": 3.106788158416748, |
| "learning_rate": 9.444362858742417e-06, |
| "loss": 0.5069155693054199, |
| "memory(GiB)": 74.33, |
| "step": 240, |
| "token_acc": 0.821917808219178, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.1949443882709808, |
| "grad_norm": 2.545304298400879, |
| "learning_rate": 9.438222151390413e-06, |
| "loss": 0.48083266615867615, |
| "memory(GiB)": 74.33, |
| "step": 241, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.19575328614762386, |
| "grad_norm": 2.3545124530792236, |
| "learning_rate": 9.432049716762151e-06, |
| "loss": 0.45232367515563965, |
| "memory(GiB)": 74.33, |
| "step": 242, |
| "token_acc": 0.8584070796460177, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.19656218402426692, |
| "grad_norm": 2.424670934677124, |
| "learning_rate": 9.425845598982178e-06, |
| "loss": 0.46154850721359253, |
| "memory(GiB)": 74.33, |
| "step": 243, |
| "token_acc": 0.8481848184818482, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.19737108190091002, |
| "grad_norm": 3.0621895790100098, |
| "learning_rate": 9.419609842401529e-06, |
| "loss": 0.5216662883758545, |
| "memory(GiB)": 74.33, |
| "step": 244, |
| "token_acc": 0.8381742738589212, |
| "train_speed(iter/s)": 0.022775 |
| }, |
| { |
| "epoch": 0.19817997977755308, |
| "grad_norm": 3.4800291061401367, |
| "learning_rate": 9.41334249159742e-06, |
| "loss": 0.578390896320343, |
| "memory(GiB)": 74.33, |
| "step": 245, |
| "token_acc": 0.8411214953271028, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.19898887765419615, |
| "grad_norm": 2.887791633605957, |
| "learning_rate": 9.407043591372917e-06, |
| "loss": 0.45752766728401184, |
| "memory(GiB)": 74.33, |
| "step": 246, |
| "token_acc": 0.8452830188679246, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.19979777553083924, |
| "grad_norm": 2.991569995880127, |
| "learning_rate": 9.400713186756625e-06, |
| "loss": 0.47424283623695374, |
| "memory(GiB)": 74.33, |
| "step": 247, |
| "token_acc": 0.8492063492063492, |
| "train_speed(iter/s)": 0.022776 |
| }, |
| { |
| "epoch": 0.2006066734074823, |
| "grad_norm": 2.222763776779175, |
| "learning_rate": 9.394351323002362e-06, |
| "loss": 0.4558030366897583, |
| "memory(GiB)": 74.33, |
| "step": 248, |
| "token_acc": 0.8471615720524017, |
| "train_speed(iter/s)": 0.022777 |
| }, |
| { |
| "epoch": 0.20141557128412538, |
| "grad_norm": 2.18121075630188, |
| "learning_rate": 9.387958045588837e-06, |
| "loss": 0.47976818680763245, |
| "memory(GiB)": 74.33, |
| "step": 249, |
| "token_acc": 0.8878048780487805, |
| "train_speed(iter/s)": 0.022777 |
| }, |
| { |
| "epoch": 0.20222446916076844, |
| "grad_norm": 2.4463536739349365, |
| "learning_rate": 9.381533400219319e-06, |
| "loss": 0.42482298612594604, |
| "memory(GiB)": 74.33, |
| "step": 250, |
| "token_acc": 0.8661971830985915, |
| "train_speed(iter/s)": 0.022777 |
| }, |
| { |
| "epoch": 0.20303336703741154, |
| "grad_norm": 2.2221012115478516, |
| "learning_rate": 9.375077432821322e-06, |
| "loss": 0.4842270016670227, |
| "memory(GiB)": 74.33, |
| "step": 251, |
| "token_acc": 0.8290909090909091, |
| "train_speed(iter/s)": 0.022777 |
| }, |
| { |
| "epoch": 0.2038422649140546, |
| "grad_norm": 2.4321460723876953, |
| "learning_rate": 9.368590189546268e-06, |
| "loss": 0.49549242854118347, |
| "memory(GiB)": 74.33, |
| "step": 252, |
| "token_acc": 0.8470948012232415, |
| "train_speed(iter/s)": 0.022777 |
| }, |
| { |
| "epoch": 0.20465116279069767, |
| "grad_norm": 2.9055986404418945, |
| "learning_rate": 9.362071716769158e-06, |
| "loss": 0.604824423789978, |
| "memory(GiB)": 74.33, |
| "step": 253, |
| "token_acc": 0.8354430379746836, |
| "train_speed(iter/s)": 0.022777 |
| }, |
| { |
| "epoch": 0.20546006066734074, |
| "grad_norm": 2.3008358478546143, |
| "learning_rate": 9.355522061088242e-06, |
| "loss": 0.43147778511047363, |
| "memory(GiB)": 74.33, |
| "step": 254, |
| "token_acc": 0.8907563025210085, |
| "train_speed(iter/s)": 0.022777 |
| }, |
| { |
| "epoch": 0.20626895854398383, |
| "grad_norm": 2.770148515701294, |
| "learning_rate": 9.348941269324686e-06, |
| "loss": 0.4882833957672119, |
| "memory(GiB)": 74.33, |
| "step": 255, |
| "token_acc": 0.8423423423423423, |
| "train_speed(iter/s)": 0.022778 |
| }, |
| { |
| "epoch": 0.2070778564206269, |
| "grad_norm": 3.3866539001464844, |
| "learning_rate": 9.342329388522239e-06, |
| "loss": 0.5174039006233215, |
| "memory(GiB)": 74.33, |
| "step": 256, |
| "token_acc": 0.825503355704698, |
| "train_speed(iter/s)": 0.022778 |
| }, |
| { |
| "epoch": 0.20788675429726997, |
| "grad_norm": 3.170250654220581, |
| "learning_rate": 9.335686465946888e-06, |
| "loss": 0.5126312971115112, |
| "memory(GiB)": 74.33, |
| "step": 257, |
| "token_acc": 0.8125, |
| "train_speed(iter/s)": 0.022778 |
| }, |
| { |
| "epoch": 0.20869565217391303, |
| "grad_norm": 2.1758675575256348, |
| "learning_rate": 9.32901254908653e-06, |
| "loss": 0.3875027298927307, |
| "memory(GiB)": 74.33, |
| "step": 258, |
| "token_acc": 0.7954545454545454, |
| "train_speed(iter/s)": 0.022778 |
| }, |
| { |
| "epoch": 0.20950455005055613, |
| "grad_norm": 2.4927093982696533, |
| "learning_rate": 9.322307685650638e-06, |
| "loss": 0.4708499312400818, |
| "memory(GiB)": 74.33, |
| "step": 259, |
| "token_acc": 0.8743718592964824, |
| "train_speed(iter/s)": 0.022778 |
| }, |
| { |
| "epoch": 0.2103134479271992, |
| "grad_norm": 3.2660865783691406, |
| "learning_rate": 9.315571923569892e-06, |
| "loss": 0.48012182116508484, |
| "memory(GiB)": 74.33, |
| "step": 260, |
| "token_acc": 0.8479087452471483, |
| "train_speed(iter/s)": 0.022778 |
| }, |
| { |
| "epoch": 0.21112234580384226, |
| "grad_norm": 2.607844829559326, |
| "learning_rate": 9.308805310995877e-06, |
| "loss": 0.4679752588272095, |
| "memory(GiB)": 74.33, |
| "step": 261, |
| "token_acc": 0.813953488372093, |
| "train_speed(iter/s)": 0.022778 |
| }, |
| { |
| "epoch": 0.21193124368048533, |
| "grad_norm": 2.9813013076782227, |
| "learning_rate": 9.302007896300697e-06, |
| "loss": 0.47132837772369385, |
| "memory(GiB)": 74.33, |
| "step": 262, |
| "token_acc": 0.8687258687258688, |
| "train_speed(iter/s)": 0.022779 |
| }, |
| { |
| "epoch": 0.21274014155712842, |
| "grad_norm": 2.997264862060547, |
| "learning_rate": 9.295179728076666e-06, |
| "loss": 0.47330912947654724, |
| "memory(GiB)": 74.33, |
| "step": 263, |
| "token_acc": 0.8465608465608465, |
| "train_speed(iter/s)": 0.022779 |
| }, |
| { |
| "epoch": 0.2135490394337715, |
| "grad_norm": 2.7569003105163574, |
| "learning_rate": 9.288320855135936e-06, |
| "loss": 0.5202451348304749, |
| "memory(GiB)": 74.33, |
| "step": 264, |
| "token_acc": 0.8395061728395061, |
| "train_speed(iter/s)": 0.022779 |
| }, |
| { |
| "epoch": 0.21435793731041455, |
| "grad_norm": 3.455897569656372, |
| "learning_rate": 9.281431326510153e-06, |
| "loss": 0.5138571262359619, |
| "memory(GiB)": 74.33, |
| "step": 265, |
| "token_acc": 0.8263888888888888, |
| "train_speed(iter/s)": 0.022779 |
| }, |
| { |
| "epoch": 0.21516683518705762, |
| "grad_norm": 2.402111291885376, |
| "learning_rate": 9.27451119145012e-06, |
| "loss": 0.4587266147136688, |
| "memory(GiB)": 74.33, |
| "step": 266, |
| "token_acc": 0.8116591928251121, |
| "train_speed(iter/s)": 0.022779 |
| }, |
| { |
| "epoch": 0.21597573306370071, |
| "grad_norm": 2.7626912593841553, |
| "learning_rate": 9.267560499425425e-06, |
| "loss": 0.5164949893951416, |
| "memory(GiB)": 74.33, |
| "step": 267, |
| "token_acc": 0.845771144278607, |
| "train_speed(iter/s)": 0.022779 |
| }, |
| { |
| "epoch": 0.21678463094034378, |
| "grad_norm": 2.1381757259368896, |
| "learning_rate": 9.2605793001241e-06, |
| "loss": 0.47523602843284607, |
| "memory(GiB)": 74.33, |
| "step": 268, |
| "token_acc": 0.8202247191011236, |
| "train_speed(iter/s)": 0.02278 |
| }, |
| { |
| "epoch": 0.21759352881698685, |
| "grad_norm": 3.386496067047119, |
| "learning_rate": 9.253567643452263e-06, |
| "loss": 0.5109878778457642, |
| "memory(GiB)": 74.33, |
| "step": 269, |
| "token_acc": 0.8279569892473119, |
| "train_speed(iter/s)": 0.02278 |
| }, |
| { |
| "epoch": 0.21840242669362994, |
| "grad_norm": 3.036259889602661, |
| "learning_rate": 9.246525579533765e-06, |
| "loss": 0.47165533900260925, |
| "memory(GiB)": 74.33, |
| "step": 270, |
| "token_acc": 0.8557046979865772, |
| "train_speed(iter/s)": 0.02278 |
| }, |
| { |
| "epoch": 0.219211324570273, |
| "grad_norm": 2.2953364849090576, |
| "learning_rate": 9.239453158709822e-06, |
| "loss": 0.452242374420166, |
| "memory(GiB)": 74.33, |
| "step": 271, |
| "token_acc": 0.9050445103857567, |
| "train_speed(iter/s)": 0.02278 |
| }, |
| { |
| "epoch": 0.22002022244691607, |
| "grad_norm": 3.2290663719177246, |
| "learning_rate": 9.232350431538656e-06, |
| "loss": 0.5369592905044556, |
| "memory(GiB)": 74.33, |
| "step": 272, |
| "token_acc": 0.8627450980392157, |
| "train_speed(iter/s)": 0.02278 |
| }, |
| { |
| "epoch": 0.22082912032355914, |
| "grad_norm": 2.628915786743164, |
| "learning_rate": 9.225217448795155e-06, |
| "loss": 0.46493035554885864, |
| "memory(GiB)": 74.33, |
| "step": 273, |
| "token_acc": 0.8185483870967742, |
| "train_speed(iter/s)": 0.02278 |
| }, |
| { |
| "epoch": 0.22163801820020224, |
| "grad_norm": 2.308983325958252, |
| "learning_rate": 9.218054261470477e-06, |
| "loss": 0.462538480758667, |
| "memory(GiB)": 74.33, |
| "step": 274, |
| "token_acc": 0.8456375838926175, |
| "train_speed(iter/s)": 0.02278 |
| }, |
| { |
| "epoch": 0.2224469160768453, |
| "grad_norm": 3.000230550765991, |
| "learning_rate": 9.210860920771706e-06, |
| "loss": 0.43489784002304077, |
| "memory(GiB)": 74.33, |
| "step": 275, |
| "token_acc": 0.842741935483871, |
| "train_speed(iter/s)": 0.02278 |
| }, |
| { |
| "epoch": 0.22325581395348837, |
| "grad_norm": 2.6025278568267822, |
| "learning_rate": 9.203637478121492e-06, |
| "loss": 0.46363720297813416, |
| "memory(GiB)": 74.33, |
| "step": 276, |
| "token_acc": 0.8724489795918368, |
| "train_speed(iter/s)": 0.022781 |
| }, |
| { |
| "epoch": 0.22406471183013144, |
| "grad_norm": 3.2257838249206543, |
| "learning_rate": 9.196383985157657e-06, |
| "loss": 0.46590667963027954, |
| "memory(GiB)": 74.33, |
| "step": 277, |
| "token_acc": 0.8736842105263158, |
| "train_speed(iter/s)": 0.022781 |
| }, |
| { |
| "epoch": 0.22487360970677453, |
| "grad_norm": 2.476445436477661, |
| "learning_rate": 9.189100493732852e-06, |
| "loss": 0.4720000624656677, |
| "memory(GiB)": 74.33, |
| "step": 278, |
| "token_acc": 0.8990825688073395, |
| "train_speed(iter/s)": 0.022781 |
| }, |
| { |
| "epoch": 0.2256825075834176, |
| "grad_norm": 1.9399663209915161, |
| "learning_rate": 9.181787055914175e-06, |
| "loss": 0.43296879529953003, |
| "memory(GiB)": 74.33, |
| "step": 279, |
| "token_acc": 0.8297872340425532, |
| "train_speed(iter/s)": 0.022782 |
| }, |
| { |
| "epoch": 0.22649140546006066, |
| "grad_norm": 2.530008554458618, |
| "learning_rate": 9.1744437239828e-06, |
| "loss": 0.43587636947631836, |
| "memory(GiB)": 74.33, |
| "step": 280, |
| "token_acc": 0.8951612903225806, |
| "train_speed(iter/s)": 0.022782 |
| }, |
| { |
| "epoch": 0.22730030333670373, |
| "grad_norm": 2.7868869304656982, |
| "learning_rate": 9.167070550433604e-06, |
| "loss": 0.3868146538734436, |
| "memory(GiB)": 74.33, |
| "step": 281, |
| "token_acc": 0.8425925925925926, |
| "train_speed(iter/s)": 0.022783 |
| }, |
| { |
| "epoch": 0.22810920121334682, |
| "grad_norm": 2.6715898513793945, |
| "learning_rate": 9.159667587974786e-06, |
| "loss": 0.40206730365753174, |
| "memory(GiB)": 74.33, |
| "step": 282, |
| "token_acc": 0.8894736842105263, |
| "train_speed(iter/s)": 0.022783 |
| }, |
| { |
| "epoch": 0.2289180990899899, |
| "grad_norm": 2.36309552192688, |
| "learning_rate": 9.1522348895275e-06, |
| "loss": 0.5806437730789185, |
| "memory(GiB)": 74.33, |
| "step": 283, |
| "token_acc": 0.7923728813559322, |
| "train_speed(iter/s)": 0.022783 |
| }, |
| { |
| "epoch": 0.22972699696663296, |
| "grad_norm": 2.1452529430389404, |
| "learning_rate": 9.144772508225477e-06, |
| "loss": 0.4016059339046478, |
| "memory(GiB)": 74.33, |
| "step": 284, |
| "token_acc": 0.872093023255814, |
| "train_speed(iter/s)": 0.022783 |
| }, |
| { |
| "epoch": 0.23053589484327602, |
| "grad_norm": 2.564225196838379, |
| "learning_rate": 9.137280497414628e-06, |
| "loss": 0.3909257650375366, |
| "memory(GiB)": 74.33, |
| "step": 285, |
| "token_acc": 0.8805460750853242, |
| "train_speed(iter/s)": 0.022783 |
| }, |
| { |
| "epoch": 0.23134479271991912, |
| "grad_norm": 2.211818218231201, |
| "learning_rate": 9.129758910652684e-06, |
| "loss": 0.4310418963432312, |
| "memory(GiB)": 74.33, |
| "step": 286, |
| "token_acc": 0.8644859813084113, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.23215369059656218, |
| "grad_norm": 3.1847712993621826, |
| "learning_rate": 9.122207801708802e-06, |
| "loss": 0.43590471148490906, |
| "memory(GiB)": 74.33, |
| "step": 287, |
| "token_acc": 0.864, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.23296258847320525, |
| "grad_norm": 2.477933406829834, |
| "learning_rate": 9.114627224563182e-06, |
| "loss": 0.4442121386528015, |
| "memory(GiB)": 74.33, |
| "step": 288, |
| "token_acc": 0.884, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.23377148634984835, |
| "grad_norm": 3.274622678756714, |
| "learning_rate": 9.10701723340668e-06, |
| "loss": 0.47166556119918823, |
| "memory(GiB)": 74.33, |
| "step": 289, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.2345803842264914, |
| "grad_norm": 3.145052671432495, |
| "learning_rate": 9.099377882640425e-06, |
| "loss": 0.46739423274993896, |
| "memory(GiB)": 74.33, |
| "step": 290, |
| "token_acc": 0.8502202643171806, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.23538928210313448, |
| "grad_norm": 2.3364012241363525, |
| "learning_rate": 9.09170922687543e-06, |
| "loss": 0.4193730354309082, |
| "memory(GiB)": 74.33, |
| "step": 291, |
| "token_acc": 0.8828451882845189, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.23619817997977754, |
| "grad_norm": 2.827242612838745, |
| "learning_rate": 9.08401132093219e-06, |
| "loss": 0.5026365518569946, |
| "memory(GiB)": 74.33, |
| "step": 292, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.23700707785642064, |
| "grad_norm": 3.1282265186309814, |
| "learning_rate": 9.076284219840306e-06, |
| "loss": 0.46792399883270264, |
| "memory(GiB)": 74.33, |
| "step": 293, |
| "token_acc": 0.8814814814814815, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.2378159757330637, |
| "grad_norm": 2.6595497131347656, |
| "learning_rate": 9.068527978838086e-06, |
| "loss": 0.48813870549201965, |
| "memory(GiB)": 74.33, |
| "step": 294, |
| "token_acc": 0.8664122137404581, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.23862487360970677, |
| "grad_norm": 2.2860071659088135, |
| "learning_rate": 9.060742653372143e-06, |
| "loss": 0.4249404966831207, |
| "memory(GiB)": 74.33, |
| "step": 295, |
| "token_acc": 0.815068493150685, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.23943377148634984, |
| "grad_norm": 2.8490703105926514, |
| "learning_rate": 9.052928299097013e-06, |
| "loss": 0.5840834975242615, |
| "memory(GiB)": 74.33, |
| "step": 296, |
| "token_acc": 0.8630705394190872, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.24024266936299293, |
| "grad_norm": 2.5748631954193115, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.4933628439903259, |
| "memory(GiB)": 74.33, |
| "step": 297, |
| "token_acc": 0.8488372093023255, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.241051567239636, |
| "grad_norm": 2.2127761840820312, |
| "learning_rate": 9.037212727774486e-06, |
| "loss": 0.47793740034103394, |
| "memory(GiB)": 74.33, |
| "step": 298, |
| "token_acc": 0.8963730569948186, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.24186046511627907, |
| "grad_norm": 2.8014166355133057, |
| "learning_rate": 9.029311623072137e-06, |
| "loss": 0.4578291177749634, |
| "memory(GiB)": 74.33, |
| "step": 299, |
| "token_acc": 0.8131868131868132, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.24266936299292213, |
| "grad_norm": 2.5986998081207275, |
| "learning_rate": 9.021381714249888e-06, |
| "loss": 0.5257298350334167, |
| "memory(GiB)": 74.33, |
| "step": 300, |
| "token_acc": 0.8229166666666666, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.24347826086956523, |
| "grad_norm": 2.7166779041290283, |
| "learning_rate": 9.013423057995845e-06, |
| "loss": 0.5010583400726318, |
| "memory(GiB)": 74.33, |
| "step": 301, |
| "token_acc": 0.8590308370044053, |
| "train_speed(iter/s)": 0.022784 |
| }, |
| { |
| "epoch": 0.2442871587462083, |
| "grad_norm": 2.9347927570343018, |
| "learning_rate": 9.005435711203619e-06, |
| "loss": 0.4537706971168518, |
| "memory(GiB)": 74.33, |
| "step": 302, |
| "token_acc": 0.8659003831417624, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.24509605662285136, |
| "grad_norm": 2.4154651165008545, |
| "learning_rate": 8.997419730971917e-06, |
| "loss": 0.39763540029525757, |
| "memory(GiB)": 74.33, |
| "step": 303, |
| "token_acc": 0.8690476190476191, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.24590495449949443, |
| "grad_norm": 2.5024564266204834, |
| "learning_rate": 8.989375174604142e-06, |
| "loss": 0.5160707235336304, |
| "memory(GiB)": 74.33, |
| "step": 304, |
| "token_acc": 0.8614718614718615, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.24671385237613752, |
| "grad_norm": 2.6469497680664062, |
| "learning_rate": 8.981302099607973e-06, |
| "loss": 0.4616546332836151, |
| "memory(GiB)": 74.33, |
| "step": 305, |
| "token_acc": 0.8442028985507246, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.2475227502527806, |
| "grad_norm": 2.6130266189575195, |
| "learning_rate": 8.973200563694964e-06, |
| "loss": 0.42548537254333496, |
| "memory(GiB)": 74.33, |
| "step": 306, |
| "token_acc": 0.852589641434263, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.24833164812942365, |
| "grad_norm": 2.578451156616211, |
| "learning_rate": 8.965070624780117e-06, |
| "loss": 0.48335641622543335, |
| "memory(GiB)": 74.33, |
| "step": 307, |
| "token_acc": 0.846441947565543, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.24914054600606672, |
| "grad_norm": 2.4299726486206055, |
| "learning_rate": 8.956912340981485e-06, |
| "loss": 0.4736361801624298, |
| "memory(GiB)": 74.33, |
| "step": 308, |
| "token_acc": 0.8448979591836735, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.24994944388270982, |
| "grad_norm": 2.816293239593506, |
| "learning_rate": 8.948725770619745e-06, |
| "loss": 0.503253698348999, |
| "memory(GiB)": 74.33, |
| "step": 309, |
| "token_acc": 0.8466453674121406, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.25075834175935285, |
| "grad_norm": 2.6718838214874268, |
| "learning_rate": 8.940510972217785e-06, |
| "loss": 0.43048620223999023, |
| "memory(GiB)": 74.33, |
| "step": 310, |
| "token_acc": 0.8262295081967214, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.251567239635996, |
| "grad_norm": 2.4307098388671875, |
| "learning_rate": 8.932268004500288e-06, |
| "loss": 0.51353919506073, |
| "memory(GiB)": 74.33, |
| "step": 311, |
| "token_acc": 0.8412017167381974, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.25237613751263904, |
| "grad_norm": 2.6662516593933105, |
| "learning_rate": 8.923996926393306e-06, |
| "loss": 0.4586646556854248, |
| "memory(GiB)": 74.33, |
| "step": 312, |
| "token_acc": 0.8692579505300353, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2531850353892821, |
| "grad_norm": 3.027970790863037, |
| "learning_rate": 8.915697797023841e-06, |
| "loss": 0.5299907326698303, |
| "memory(GiB)": 74.33, |
| "step": 313, |
| "token_acc": 0.8582089552238806, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2539939332659252, |
| "grad_norm": 2.6045422554016113, |
| "learning_rate": 8.907370675719428e-06, |
| "loss": 0.5199022889137268, |
| "memory(GiB)": 74.33, |
| "step": 314, |
| "token_acc": 0.8116883116883117, |
| "train_speed(iter/s)": 0.022785 |
| }, |
| { |
| "epoch": 0.25480283114256824, |
| "grad_norm": 2.7272956371307373, |
| "learning_rate": 8.899015622007703e-06, |
| "loss": 0.45891785621643066, |
| "memory(GiB)": 74.33, |
| "step": 315, |
| "token_acc": 0.8243243243243243, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2556117290192113, |
| "grad_norm": 2.200077533721924, |
| "learning_rate": 8.890632695615984e-06, |
| "loss": 0.39891767501831055, |
| "memory(GiB)": 74.33, |
| "step": 316, |
| "token_acc": 0.8440677966101695, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2564206268958544, |
| "grad_norm": 2.301032543182373, |
| "learning_rate": 8.882221956470838e-06, |
| "loss": 0.4599316716194153, |
| "memory(GiB)": 74.33, |
| "step": 317, |
| "token_acc": 0.8325358851674641, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2572295247724975, |
| "grad_norm": 2.614656448364258, |
| "learning_rate": 8.873783464697653e-06, |
| "loss": 0.459076464176178, |
| "memory(GiB)": 74.33, |
| "step": 318, |
| "token_acc": 0.8393939393939394, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.25803842264914056, |
| "grad_norm": 2.1406943798065186, |
| "learning_rate": 8.865317280620221e-06, |
| "loss": 0.39890217781066895, |
| "memory(GiB)": 74.33, |
| "step": 319, |
| "token_acc": 0.8304347826086956, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.25884732052578363, |
| "grad_norm": 2.5298852920532227, |
| "learning_rate": 8.856823464760284e-06, |
| "loss": 0.4256265163421631, |
| "memory(GiB)": 74.33, |
| "step": 320, |
| "token_acc": 0.8717948717948718, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2596562184024267, |
| "grad_norm": 2.3466522693634033, |
| "learning_rate": 8.84830207783712e-06, |
| "loss": 0.395018070936203, |
| "memory(GiB)": 74.33, |
| "step": 321, |
| "token_acc": 0.8884462151394422, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.26046511627906976, |
| "grad_norm": 2.6752617359161377, |
| "learning_rate": 8.839753180767108e-06, |
| "loss": 0.4618658423423767, |
| "memory(GiB)": 74.33, |
| "step": 322, |
| "token_acc": 0.8387096774193549, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.26127401415571283, |
| "grad_norm": 2.248332977294922, |
| "learning_rate": 8.831176834663275e-06, |
| "loss": 0.4209662675857544, |
| "memory(GiB)": 74.33, |
| "step": 323, |
| "token_acc": 0.8830645161290323, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2620829120323559, |
| "grad_norm": 2.6968088150024414, |
| "learning_rate": 8.82257310083488e-06, |
| "loss": 0.4762377440929413, |
| "memory(GiB)": 74.33, |
| "step": 324, |
| "token_acc": 0.8810572687224669, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.26289180990899896, |
| "grad_norm": 3.221013307571411, |
| "learning_rate": 8.813942040786964e-06, |
| "loss": 0.5154784917831421, |
| "memory(GiB)": 74.33, |
| "step": 325, |
| "token_acc": 0.8494208494208494, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2637007077856421, |
| "grad_norm": 1.9791827201843262, |
| "learning_rate": 8.805283716219917e-06, |
| "loss": 0.47922518849372864, |
| "memory(GiB)": 74.33, |
| "step": 326, |
| "token_acc": 0.8412698412698413, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.26450960566228515, |
| "grad_norm": 1.939926266670227, |
| "learning_rate": 8.79659818902903e-06, |
| "loss": 0.4087769389152527, |
| "memory(GiB)": 74.33, |
| "step": 327, |
| "token_acc": 0.8360655737704918, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.2653185035389282, |
| "grad_norm": 2.3445236682891846, |
| "learning_rate": 8.787885521304056e-06, |
| "loss": 0.49197518825531006, |
| "memory(GiB)": 74.33, |
| "step": 328, |
| "token_acc": 0.8293413173652695, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2661274014155713, |
| "grad_norm": 2.549042224884033, |
| "learning_rate": 8.779145775328766e-06, |
| "loss": 0.4610610604286194, |
| "memory(GiB)": 74.33, |
| "step": 329, |
| "token_acc": 0.8407960199004975, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.26693629929221435, |
| "grad_norm": 7.023351192474365, |
| "learning_rate": 8.770379013580507e-06, |
| "loss": 0.5349440574645996, |
| "memory(GiB)": 74.33, |
| "step": 330, |
| "token_acc": 0.8619246861924686, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2677451971688574, |
| "grad_norm": 3.5521559715270996, |
| "learning_rate": 8.761585298729748e-06, |
| "loss": 0.46497541666030884, |
| "memory(GiB)": 74.33, |
| "step": 331, |
| "token_acc": 0.8870292887029289, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2685540950455005, |
| "grad_norm": 2.684696674346924, |
| "learning_rate": 8.75276469363964e-06, |
| "loss": 0.4779859781265259, |
| "memory(GiB)": 74.33, |
| "step": 332, |
| "token_acc": 0.837696335078534, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2693629929221436, |
| "grad_norm": 2.123192310333252, |
| "learning_rate": 8.743917261365557e-06, |
| "loss": 0.43780291080474854, |
| "memory(GiB)": 74.33, |
| "step": 333, |
| "token_acc": 0.8692307692307693, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2701718907987867, |
| "grad_norm": 2.416212558746338, |
| "learning_rate": 8.73504306515466e-06, |
| "loss": 0.43149372935295105, |
| "memory(GiB)": 74.33, |
| "step": 334, |
| "token_acc": 0.85, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.27098078867542974, |
| "grad_norm": 2.407726764678955, |
| "learning_rate": 8.726142168445427e-06, |
| "loss": 0.46393710374832153, |
| "memory(GiB)": 74.33, |
| "step": 335, |
| "token_acc": 0.8478260869565217, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2717896865520728, |
| "grad_norm": 2.2603883743286133, |
| "learning_rate": 8.717214634867213e-06, |
| "loss": 0.4834635555744171, |
| "memory(GiB)": 74.33, |
| "step": 336, |
| "token_acc": 0.8544303797468354, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.2725985844287159, |
| "grad_norm": 2.377035140991211, |
| "learning_rate": 8.708260528239788e-06, |
| "loss": 0.4176112711429596, |
| "memory(GiB)": 74.33, |
| "step": 337, |
| "token_acc": 0.8802083333333334, |
| "train_speed(iter/s)": 0.022786 |
| }, |
| { |
| "epoch": 0.27340748230535894, |
| "grad_norm": 2.855900526046753, |
| "learning_rate": 8.699279912572888e-06, |
| "loss": 0.4877198338508606, |
| "memory(GiB)": 74.33, |
| "step": 338, |
| "token_acc": 0.8592964824120602, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.274216380182002, |
| "grad_norm": 3.3495020866394043, |
| "learning_rate": 8.690272852065748e-06, |
| "loss": 0.44448497891426086, |
| "memory(GiB)": 74.33, |
| "step": 339, |
| "token_acc": 0.8760683760683761, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.2750252780586451, |
| "grad_norm": 2.204909563064575, |
| "learning_rate": 8.68123941110665e-06, |
| "loss": 0.47281521558761597, |
| "memory(GiB)": 74.33, |
| "step": 340, |
| "token_acc": 0.8225108225108225, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.2758341759352882, |
| "grad_norm": 2.295105218887329, |
| "learning_rate": 8.67217965427246e-06, |
| "loss": 0.42724931240081787, |
| "memory(GiB)": 74.33, |
| "step": 341, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.27664307381193126, |
| "grad_norm": 3.001664876937866, |
| "learning_rate": 8.663093646328166e-06, |
| "loss": 0.5214186310768127, |
| "memory(GiB)": 74.33, |
| "step": 342, |
| "token_acc": 0.8205128205128205, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.27745197168857433, |
| "grad_norm": 2.665395736694336, |
| "learning_rate": 8.653981452226418e-06, |
| "loss": 0.43387383222579956, |
| "memory(GiB)": 74.33, |
| "step": 343, |
| "token_acc": 0.908256880733945, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 2.3217623233795166, |
| "learning_rate": 8.644843137107058e-06, |
| "loss": 0.5246144533157349, |
| "memory(GiB)": 74.33, |
| "step": 344, |
| "token_acc": 0.825925925925926, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.27906976744186046, |
| "grad_norm": 2.4558563232421875, |
| "learning_rate": 8.635678766296663e-06, |
| "loss": 0.48798543214797974, |
| "memory(GiB)": 74.33, |
| "step": 345, |
| "token_acc": 0.848780487804878, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.27987866531850353, |
| "grad_norm": 2.1867096424102783, |
| "learning_rate": 8.626488405308067e-06, |
| "loss": 0.5087660551071167, |
| "memory(GiB)": 74.33, |
| "step": 346, |
| "token_acc": 0.8311688311688312, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.2806875631951466, |
| "grad_norm": 2.2217187881469727, |
| "learning_rate": 8.617272119839903e-06, |
| "loss": 0.43445640802383423, |
| "memory(GiB)": 74.33, |
| "step": 347, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.28149646107178966, |
| "grad_norm": 2.6297953128814697, |
| "learning_rate": 8.608029975776128e-06, |
| "loss": 0.4504978656768799, |
| "memory(GiB)": 74.33, |
| "step": 348, |
| "token_acc": 0.8523809523809524, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.2823053589484328, |
| "grad_norm": 3.717496156692505, |
| "learning_rate": 8.598762039185553e-06, |
| "loss": 0.45087772607803345, |
| "memory(GiB)": 74.33, |
| "step": 349, |
| "token_acc": 0.8565400843881856, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.28311425682507585, |
| "grad_norm": 2.353040933609009, |
| "learning_rate": 8.589468376321369e-06, |
| "loss": 0.4105454683303833, |
| "memory(GiB)": 74.33, |
| "step": 350, |
| "token_acc": 0.8566775244299675, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.2839231547017189, |
| "grad_norm": 2.3427672386169434, |
| "learning_rate": 8.580149053620674e-06, |
| "loss": 0.5255011320114136, |
| "memory(GiB)": 74.33, |
| "step": 351, |
| "token_acc": 0.8346456692913385, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.284732052578362, |
| "grad_norm": 2.3275554180145264, |
| "learning_rate": 8.570804137704005e-06, |
| "loss": 0.443267822265625, |
| "memory(GiB)": 74.33, |
| "step": 352, |
| "token_acc": 0.8314176245210728, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.28554095045500505, |
| "grad_norm": 2.162351608276367, |
| "learning_rate": 8.561433695374848e-06, |
| "loss": 0.4688035249710083, |
| "memory(GiB)": 74.33, |
| "step": 353, |
| "token_acc": 0.8375451263537906, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.2863498483316481, |
| "grad_norm": 2.127072811126709, |
| "learning_rate": 8.552037793619177e-06, |
| "loss": 0.488004207611084, |
| "memory(GiB)": 74.33, |
| "step": 354, |
| "token_acc": 0.8119266055045872, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.2871587462082912, |
| "grad_norm": 2.731759786605835, |
| "learning_rate": 8.542616499604958e-06, |
| "loss": 0.4488160312175751, |
| "memory(GiB)": 74.33, |
| "step": 355, |
| "token_acc": 0.8196078431372549, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.2879676440849343, |
| "grad_norm": 2.025136709213257, |
| "learning_rate": 8.533169880681682e-06, |
| "loss": 0.3923991024494171, |
| "memory(GiB)": 74.33, |
| "step": 356, |
| "token_acc": 0.8362989323843416, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.28877654196157737, |
| "grad_norm": 2.501194477081299, |
| "learning_rate": 8.523698004379878e-06, |
| "loss": 0.46766936779022217, |
| "memory(GiB)": 74.33, |
| "step": 357, |
| "token_acc": 0.8260869565217391, |
| "train_speed(iter/s)": 0.022787 |
| }, |
| { |
| "epoch": 0.28958543983822044, |
| "grad_norm": 2.192864179611206, |
| "learning_rate": 8.514200938410628e-06, |
| "loss": 0.48559021949768066, |
| "memory(GiB)": 74.33, |
| "step": 358, |
| "token_acc": 0.86328125, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.2903943377148635, |
| "grad_norm": 2.9228947162628174, |
| "learning_rate": 8.504678750665094e-06, |
| "loss": 0.5047175288200378, |
| "memory(GiB)": 74.33, |
| "step": 359, |
| "token_acc": 0.8647540983606558, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.29120323559150657, |
| "grad_norm": 2.388331174850464, |
| "learning_rate": 8.495131509214015e-06, |
| "loss": 0.4464142620563507, |
| "memory(GiB)": 74.33, |
| "step": 360, |
| "token_acc": 0.8411552346570397, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.29201213346814964, |
| "grad_norm": 3.4440038204193115, |
| "learning_rate": 8.485559282307237e-06, |
| "loss": 0.44610536098480225, |
| "memory(GiB)": 74.33, |
| "step": 361, |
| "token_acc": 0.892018779342723, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.2928210313447927, |
| "grad_norm": 2.4162344932556152, |
| "learning_rate": 8.475962138373212e-06, |
| "loss": 0.43880611658096313, |
| "memory(GiB)": 74.33, |
| "step": 362, |
| "token_acc": 0.8632478632478633, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.29362992922143577, |
| "grad_norm": 2.4398529529571533, |
| "learning_rate": 8.466340146018522e-06, |
| "loss": 0.4168269634246826, |
| "memory(GiB)": 74.33, |
| "step": 363, |
| "token_acc": 0.8543307086614174, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.2944388270980789, |
| "grad_norm": 2.5178182125091553, |
| "learning_rate": 8.456693374027378e-06, |
| "loss": 0.4725669026374817, |
| "memory(GiB)": 74.33, |
| "step": 364, |
| "token_acc": 0.8543689320388349, |
| "train_speed(iter/s)": 0.022788 |
| }, |
| { |
| "epoch": 0.29524772497472196, |
| "grad_norm": 2.5267229080200195, |
| "learning_rate": 8.44702189136113e-06, |
| "loss": 0.5213101506233215, |
| "memory(GiB)": 74.33, |
| "step": 365, |
| "token_acc": 0.839344262295082, |
| "train_speed(iter/s)": 0.022789 |
| }, |
| { |
| "epoch": 0.296056622851365, |
| "grad_norm": 2.3971071243286133, |
| "learning_rate": 8.43732576715778e-06, |
| "loss": 0.4878075122833252, |
| "memory(GiB)": 74.33, |
| "step": 366, |
| "token_acc": 0.8620689655172413, |
| "train_speed(iter/s)": 0.022789 |
| }, |
| { |
| "epoch": 0.2968655207280081, |
| "grad_norm": 3.86580753326416, |
| "learning_rate": 8.427605070731482e-06, |
| "loss": 0.38472825288772583, |
| "memory(GiB)": 74.33, |
| "step": 367, |
| "token_acc": 0.8538461538461538, |
| "train_speed(iter/s)": 0.022789 |
| }, |
| { |
| "epoch": 0.29767441860465116, |
| "grad_norm": 2.5940558910369873, |
| "learning_rate": 8.417859871572045e-06, |
| "loss": 0.5018994808197021, |
| "memory(GiB)": 74.33, |
| "step": 368, |
| "token_acc": 0.8375796178343949, |
| "train_speed(iter/s)": 0.022789 |
| }, |
| { |
| "epoch": 0.2984833164812942, |
| "grad_norm": 2.456550359725952, |
| "learning_rate": 8.408090239344442e-06, |
| "loss": 0.4518444240093231, |
| "memory(GiB)": 74.33, |
| "step": 369, |
| "token_acc": 0.8458149779735683, |
| "train_speed(iter/s)": 0.022789 |
| }, |
| { |
| "epoch": 0.2992922143579373, |
| "grad_norm": 3.4539546966552734, |
| "learning_rate": 8.39829624388831e-06, |
| "loss": 0.4444255828857422, |
| "memory(GiB)": 74.33, |
| "step": 370, |
| "token_acc": 0.8786407766990292, |
| "train_speed(iter/s)": 0.022789 |
| }, |
| { |
| "epoch": 0.30010111223458036, |
| "grad_norm": 2.5049355030059814, |
| "learning_rate": 8.38847795521745e-06, |
| "loss": 0.4359434247016907, |
| "memory(GiB)": 74.33, |
| "step": 371, |
| "token_acc": 0.8415841584158416, |
| "train_speed(iter/s)": 0.022789 |
| }, |
| { |
| "epoch": 0.3009100101112235, |
| "grad_norm": 2.7211098670959473, |
| "learning_rate": 8.378635443519327e-06, |
| "loss": 0.4071110785007477, |
| "memory(GiB)": 74.33, |
| "step": 372, |
| "token_acc": 0.8516949152542372, |
| "train_speed(iter/s)": 0.02279 |
| }, |
| { |
| "epoch": 0.30171890798786655, |
| "grad_norm": 2.0721325874328613, |
| "learning_rate": 8.368768779154564e-06, |
| "loss": 0.449047714471817, |
| "memory(GiB)": 74.33, |
| "step": 373, |
| "token_acc": 0.8604651162790697, |
| "train_speed(iter/s)": 0.02279 |
| }, |
| { |
| "epoch": 0.3025278058645096, |
| "grad_norm": 2.6694495677948, |
| "learning_rate": 8.358878032656446e-06, |
| "loss": 0.436679869890213, |
| "memory(GiB)": 74.33, |
| "step": 374, |
| "token_acc": 0.8672566371681416, |
| "train_speed(iter/s)": 0.02279 |
| }, |
| { |
| "epoch": 0.3033367037411527, |
| "grad_norm": 2.6044750213623047, |
| "learning_rate": 8.348963274730413e-06, |
| "loss": 0.4522557556629181, |
| "memory(GiB)": 74.33, |
| "step": 375, |
| "token_acc": 0.8481675392670157, |
| "train_speed(iter/s)": 0.02279 |
| }, |
| { |
| "epoch": 0.30414560161779575, |
| "grad_norm": 2.2683019638061523, |
| "learning_rate": 8.339024576253555e-06, |
| "loss": 0.3990349769592285, |
| "memory(GiB)": 74.33, |
| "step": 376, |
| "token_acc": 0.8393574297188755, |
| "train_speed(iter/s)": 0.02279 |
| }, |
| { |
| "epoch": 0.3049544994944388, |
| "grad_norm": 2.6098105907440186, |
| "learning_rate": 8.3290620082741e-06, |
| "loss": 0.47003981471061707, |
| "memory(GiB)": 74.33, |
| "step": 377, |
| "token_acc": 0.8828125, |
| "train_speed(iter/s)": 0.02279 |
| }, |
| { |
| "epoch": 0.3057633973710819, |
| "grad_norm": 2.756648540496826, |
| "learning_rate": 8.319075642010914e-06, |
| "loss": 0.46801501512527466, |
| "memory(GiB)": 74.33, |
| "step": 378, |
| "token_acc": 0.8024691358024691, |
| "train_speed(iter/s)": 0.022791 |
| }, |
| { |
| "epoch": 0.306572295247725, |
| "grad_norm": 2.435135841369629, |
| "learning_rate": 8.30906554885299e-06, |
| "loss": 0.45518428087234497, |
| "memory(GiB)": 74.33, |
| "step": 379, |
| "token_acc": 0.86328125, |
| "train_speed(iter/s)": 0.022791 |
| }, |
| { |
| "epoch": 0.30738119312436807, |
| "grad_norm": 2.305549144744873, |
| "learning_rate": 8.299031800358933e-06, |
| "loss": 0.40630266070365906, |
| "memory(GiB)": 74.33, |
| "step": 380, |
| "token_acc": 0.8652173913043478, |
| "train_speed(iter/s)": 0.022791 |
| }, |
| { |
| "epoch": 0.30819009100101113, |
| "grad_norm": 2.8813188076019287, |
| "learning_rate": 8.288974468256453e-06, |
| "loss": 0.5275822877883911, |
| "memory(GiB)": 74.33, |
| "step": 381, |
| "token_acc": 0.8652849740932642, |
| "train_speed(iter/s)": 0.022791 |
| }, |
| { |
| "epoch": 0.3089989888776542, |
| "grad_norm": 2.2883760929107666, |
| "learning_rate": 8.278893624441849e-06, |
| "loss": 0.4657808542251587, |
| "memory(GiB)": 74.33, |
| "step": 382, |
| "token_acc": 0.8081632653061225, |
| "train_speed(iter/s)": 0.022791 |
| }, |
| { |
| "epoch": 0.30980788675429727, |
| "grad_norm": 2.4337222576141357, |
| "learning_rate": 8.268789340979499e-06, |
| "loss": 0.4899158179759979, |
| "memory(GiB)": 74.33, |
| "step": 383, |
| "token_acc": 0.8776371308016878, |
| "train_speed(iter/s)": 0.022791 |
| }, |
| { |
| "epoch": 0.31061678463094033, |
| "grad_norm": 2.359471082687378, |
| "learning_rate": 8.258661690101347e-06, |
| "loss": 0.4913978576660156, |
| "memory(GiB)": 74.33, |
| "step": 384, |
| "token_acc": 0.8454935622317596, |
| "train_speed(iter/s)": 0.022791 |
| }, |
| { |
| "epoch": 0.3114256825075834, |
| "grad_norm": 2.946106433868408, |
| "learning_rate": 8.24851074420637e-06, |
| "loss": 0.3954363167285919, |
| "memory(GiB)": 74.33, |
| "step": 385, |
| "token_acc": 0.876984126984127, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.31223458038422647, |
| "grad_norm": 2.676274299621582, |
| "learning_rate": 8.238336575860085e-06, |
| "loss": 0.4366721212863922, |
| "memory(GiB)": 74.33, |
| "step": 386, |
| "token_acc": 0.8426229508196721, |
| "train_speed(iter/s)": 0.022791 |
| }, |
| { |
| "epoch": 0.3130434782608696, |
| "grad_norm": 2.2800793647766113, |
| "learning_rate": 8.228139257794012e-06, |
| "loss": 0.4242827892303467, |
| "memory(GiB)": 74.33, |
| "step": 387, |
| "token_acc": 0.8724137931034482, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.31385237613751266, |
| "grad_norm": 2.1262009143829346, |
| "learning_rate": 8.217918862905163e-06, |
| "loss": 0.44696488976478577, |
| "memory(GiB)": 74.33, |
| "step": 388, |
| "token_acc": 0.8759398496240601, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.3146612740141557, |
| "grad_norm": 2.389130115509033, |
| "learning_rate": 8.207675464255519e-06, |
| "loss": 0.4506322741508484, |
| "memory(GiB)": 74.33, |
| "step": 389, |
| "token_acc": 0.8823529411764706, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.3154701718907988, |
| "grad_norm": 2.2962496280670166, |
| "learning_rate": 8.197409135071497e-06, |
| "loss": 0.416850209236145, |
| "memory(GiB)": 74.33, |
| "step": 390, |
| "token_acc": 0.8865248226950354, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.31627906976744186, |
| "grad_norm": 2.0682525634765625, |
| "learning_rate": 8.18711994874345e-06, |
| "loss": 0.423944354057312, |
| "memory(GiB)": 74.33, |
| "step": 391, |
| "token_acc": 0.8411552346570397, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.3170879676440849, |
| "grad_norm": 2.43737530708313, |
| "learning_rate": 8.17680797882512e-06, |
| "loss": 0.44383469223976135, |
| "memory(GiB)": 74.33, |
| "step": 392, |
| "token_acc": 0.8977777777777778, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.317896865520728, |
| "grad_norm": 3.0157485008239746, |
| "learning_rate": 8.166473299033122e-06, |
| "loss": 0.4669773280620575, |
| "memory(GiB)": 74.33, |
| "step": 393, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.31870576339737106, |
| "grad_norm": 2.434302568435669, |
| "learning_rate": 8.15611598324642e-06, |
| "loss": 0.46818387508392334, |
| "memory(GiB)": 74.33, |
| "step": 394, |
| "token_acc": 0.7833333333333333, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.3195146612740142, |
| "grad_norm": 2.063925266265869, |
| "learning_rate": 8.145736105505788e-06, |
| "loss": 0.45939022302627563, |
| "memory(GiB)": 74.33, |
| "step": 395, |
| "token_acc": 0.8424908424908425, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.32032355915065724, |
| "grad_norm": 2.5207791328430176, |
| "learning_rate": 8.135333740013294e-06, |
| "loss": 0.5139025449752808, |
| "memory(GiB)": 74.33, |
| "step": 396, |
| "token_acc": 0.8441176470588235, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.3211324570273003, |
| "grad_norm": 2.687681198120117, |
| "learning_rate": 8.124908961131759e-06, |
| "loss": 0.4349074959754944, |
| "memory(GiB)": 74.33, |
| "step": 397, |
| "token_acc": 0.852017937219731, |
| "train_speed(iter/s)": 0.022792 |
| }, |
| { |
| "epoch": 0.3219413549039434, |
| "grad_norm": 2.1986069679260254, |
| "learning_rate": 8.114461843384229e-06, |
| "loss": 0.4546552300453186, |
| "memory(GiB)": 74.33, |
| "step": 398, |
| "token_acc": 0.8714859437751004, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.32275025278058644, |
| "grad_norm": 2.6796491146087646, |
| "learning_rate": 8.103992461453447e-06, |
| "loss": 0.5386300086975098, |
| "memory(GiB)": 74.33, |
| "step": 399, |
| "token_acc": 0.8553191489361702, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.3235591506572295, |
| "grad_norm": 2.465752363204956, |
| "learning_rate": 8.093500890181307e-06, |
| "loss": 0.4470570683479309, |
| "memory(GiB)": 74.33, |
| "step": 400, |
| "token_acc": 0.8025889967637541, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.3243680485338726, |
| "grad_norm": 2.695773124694824, |
| "learning_rate": 8.082987204568336e-06, |
| "loss": 0.4630998373031616, |
| "memory(GiB)": 74.33, |
| "step": 401, |
| "token_acc": 0.8252788104089219, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.3251769464105157, |
| "grad_norm": 2.6388256549835205, |
| "learning_rate": 8.072451479773143e-06, |
| "loss": 0.47690147161483765, |
| "memory(GiB)": 74.33, |
| "step": 402, |
| "token_acc": 0.8565400843881856, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.32598584428715877, |
| "grad_norm": 2.6586854457855225, |
| "learning_rate": 8.061893791111887e-06, |
| "loss": 0.5046311020851135, |
| "memory(GiB)": 74.33, |
| "step": 403, |
| "token_acc": 0.825, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.32679474216380183, |
| "grad_norm": 2.575148820877075, |
| "learning_rate": 8.05131421405774e-06, |
| "loss": 0.45166927576065063, |
| "memory(GiB)": 74.33, |
| "step": 404, |
| "token_acc": 0.8725490196078431, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.3276036400404449, |
| "grad_norm": 2.7520835399627686, |
| "learning_rate": 8.040712824240348e-06, |
| "loss": 0.47704529762268066, |
| "memory(GiB)": 74.33, |
| "step": 405, |
| "token_acc": 0.8539682539682539, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.32841253791708797, |
| "grad_norm": 2.6821768283843994, |
| "learning_rate": 8.030089697445287e-06, |
| "loss": 0.44387978315353394, |
| "memory(GiB)": 74.33, |
| "step": 406, |
| "token_acc": 0.8506224066390041, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.32922143579373103, |
| "grad_norm": 2.5903446674346924, |
| "learning_rate": 8.019444909613524e-06, |
| "loss": 0.47109007835388184, |
| "memory(GiB)": 74.33, |
| "step": 407, |
| "token_acc": 0.8415300546448088, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.3300303336703741, |
| "grad_norm": 1.9421981573104858, |
| "learning_rate": 8.00877853684087e-06, |
| "loss": 0.4276235103607178, |
| "memory(GiB)": 74.33, |
| "step": 408, |
| "token_acc": 0.8691588785046729, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.33083923154701717, |
| "grad_norm": 1.9274567365646362, |
| "learning_rate": 7.998090655377441e-06, |
| "loss": 0.4399895668029785, |
| "memory(GiB)": 74.33, |
| "step": 409, |
| "token_acc": 0.8153846153846154, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.3316481294236603, |
| "grad_norm": 2.349695920944214, |
| "learning_rate": 7.987381341627116e-06, |
| "loss": 0.4371504485607147, |
| "memory(GiB)": 74.33, |
| "step": 410, |
| "token_acc": 0.8447488584474886, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.33245702730030335, |
| "grad_norm": 2.508023738861084, |
| "learning_rate": 7.976650672146977e-06, |
| "loss": 0.4392384886741638, |
| "memory(GiB)": 74.33, |
| "step": 411, |
| "token_acc": 0.845360824742268, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.3332659251769464, |
| "grad_norm": 2.007159948348999, |
| "learning_rate": 7.965898723646777e-06, |
| "loss": 0.42986416816711426, |
| "memory(GiB)": 74.33, |
| "step": 412, |
| "token_acc": 0.8504273504273504, |
| "train_speed(iter/s)": 0.022793 |
| }, |
| { |
| "epoch": 0.3340748230535895, |
| "grad_norm": 2.3318965435028076, |
| "learning_rate": 7.955125572988381e-06, |
| "loss": 0.45020729303359985, |
| "memory(GiB)": 74.33, |
| "step": 413, |
| "token_acc": 0.8546099290780141, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.33488372093023255, |
| "grad_norm": 2.5200366973876953, |
| "learning_rate": 7.944331297185224e-06, |
| "loss": 0.4530584216117859, |
| "memory(GiB)": 74.33, |
| "step": 414, |
| "token_acc": 0.8896103896103896, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.3356926188068756, |
| "grad_norm": 2.353825569152832, |
| "learning_rate": 7.933515973401756e-06, |
| "loss": 0.44627559185028076, |
| "memory(GiB)": 74.33, |
| "step": 415, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.3365015166835187, |
| "grad_norm": 2.2710440158843994, |
| "learning_rate": 7.92267967895289e-06, |
| "loss": 0.4454203248023987, |
| "memory(GiB)": 74.33, |
| "step": 416, |
| "token_acc": 0.8151260504201681, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.33731041456016175, |
| "grad_norm": 2.4699690341949463, |
| "learning_rate": 7.911822491303453e-06, |
| "loss": 0.4395456910133362, |
| "memory(GiB)": 74.33, |
| "step": 417, |
| "token_acc": 0.8617021276595744, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.3381193124368049, |
| "grad_norm": 2.3089406490325928, |
| "learning_rate": 7.90094448806763e-06, |
| "loss": 0.4436686038970947, |
| "memory(GiB)": 74.33, |
| "step": 418, |
| "token_acc": 0.8864468864468864, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.33892821031344794, |
| "grad_norm": 2.105353593826294, |
| "learning_rate": 7.890045747008406e-06, |
| "loss": 0.48908939957618713, |
| "memory(GiB)": 74.33, |
| "step": 419, |
| "token_acc": 0.8593155893536122, |
| "train_speed(iter/s)": 0.022794 |
| }, |
| { |
| "epoch": 0.339737108190091, |
| "grad_norm": 2.435878276824951, |
| "learning_rate": 7.879126346037018e-06, |
| "loss": 0.4750370979309082, |
| "memory(GiB)": 74.33, |
| "step": 420, |
| "token_acc": 0.8844444444444445, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.3405460060667341, |
| "grad_norm": 2.587909698486328, |
| "learning_rate": 7.868186363212392e-06, |
| "loss": 0.4756377339363098, |
| "memory(GiB)": 74.33, |
| "step": 421, |
| "token_acc": 0.8487084870848709, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.34135490394337714, |
| "grad_norm": 2.2281887531280518, |
| "learning_rate": 7.857225876740585e-06, |
| "loss": 0.4277176558971405, |
| "memory(GiB)": 74.62, |
| "step": 422, |
| "token_acc": 0.8493150684931506, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.3421638018200202, |
| "grad_norm": 2.5752649307250977, |
| "learning_rate": 7.846244964974226e-06, |
| "loss": 0.48055747151374817, |
| "memory(GiB)": 74.62, |
| "step": 423, |
| "token_acc": 0.8837209302325582, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.3429726996966633, |
| "grad_norm": 2.586489200592041, |
| "learning_rate": 7.835243706411961e-06, |
| "loss": 0.4750707745552063, |
| "memory(GiB)": 74.62, |
| "step": 424, |
| "token_acc": 0.8576642335766423, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.3437815975733064, |
| "grad_norm": 2.450918674468994, |
| "learning_rate": 7.824222179697884e-06, |
| "loss": 0.5177239179611206, |
| "memory(GiB)": 74.62, |
| "step": 425, |
| "token_acc": 0.852589641434263, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.34459049544994946, |
| "grad_norm": 2.3722708225250244, |
| "learning_rate": 7.813180463620987e-06, |
| "loss": 0.46518608927726746, |
| "memory(GiB)": 74.62, |
| "step": 426, |
| "token_acc": 0.8423645320197044, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.34539939332659253, |
| "grad_norm": 2.5841665267944336, |
| "learning_rate": 7.802118637114575e-06, |
| "loss": 0.4838918149471283, |
| "memory(GiB)": 74.62, |
| "step": 427, |
| "token_acc": 0.8434782608695652, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.3462082912032356, |
| "grad_norm": 2.3192875385284424, |
| "learning_rate": 7.791036779255726e-06, |
| "loss": 0.42157137393951416, |
| "memory(GiB)": 74.62, |
| "step": 428, |
| "token_acc": 0.8404669260700389, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.34701718907987866, |
| "grad_norm": 2.49680495262146, |
| "learning_rate": 7.779934969264714e-06, |
| "loss": 0.4023537039756775, |
| "memory(GiB)": 74.62, |
| "step": 429, |
| "token_acc": 0.8734939759036144, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 2.0230259895324707, |
| "learning_rate": 7.768813286504439e-06, |
| "loss": 0.37253955006599426, |
| "memory(GiB)": 74.62, |
| "step": 430, |
| "token_acc": 0.9224806201550387, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.3486349848331648, |
| "grad_norm": 2.3140506744384766, |
| "learning_rate": 7.757671810479865e-06, |
| "loss": 0.4874904751777649, |
| "memory(GiB)": 74.62, |
| "step": 431, |
| "token_acc": 0.8592057761732852, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.34944388270980786, |
| "grad_norm": 2.2125346660614014, |
| "learning_rate": 7.74651062083746e-06, |
| "loss": 0.37930744886398315, |
| "memory(GiB)": 74.62, |
| "step": 432, |
| "token_acc": 0.8764940239043825, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.350252780586451, |
| "grad_norm": 2.240590810775757, |
| "learning_rate": 7.735329797364605e-06, |
| "loss": 0.47669389843940735, |
| "memory(GiB)": 74.62, |
| "step": 433, |
| "token_acc": 0.8710801393728222, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.35106167846309405, |
| "grad_norm": 2.510114908218384, |
| "learning_rate": 7.724129419989044e-06, |
| "loss": 0.4742322266101837, |
| "memory(GiB)": 74.62, |
| "step": 434, |
| "token_acc": 0.8536585365853658, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.3518705763397371, |
| "grad_norm": 2.476958990097046, |
| "learning_rate": 7.712909568778302e-06, |
| "loss": 0.4492417871952057, |
| "memory(GiB)": 74.62, |
| "step": 435, |
| "token_acc": 0.8333333333333334, |
| "train_speed(iter/s)": 0.022795 |
| }, |
| { |
| "epoch": 0.3526794742163802, |
| "grad_norm": 2.098637104034424, |
| "learning_rate": 7.701670323939117e-06, |
| "loss": 0.4481479525566101, |
| "memory(GiB)": 74.62, |
| "step": 436, |
| "token_acc": 0.8601398601398601, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.35348837209302325, |
| "grad_norm": 2.2469687461853027, |
| "learning_rate": 7.690411765816864e-06, |
| "loss": 0.43956851959228516, |
| "memory(GiB)": 74.62, |
| "step": 437, |
| "token_acc": 0.8629032258064516, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.3542972699696663, |
| "grad_norm": 2.8738715648651123, |
| "learning_rate": 7.679133974894984e-06, |
| "loss": 0.4626030921936035, |
| "memory(GiB)": 74.62, |
| "step": 438, |
| "token_acc": 0.8680851063829788, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.3551061678463094, |
| "grad_norm": 2.638291358947754, |
| "learning_rate": 7.667837031794404e-06, |
| "loss": 0.45615172386169434, |
| "memory(GiB)": 74.62, |
| "step": 439, |
| "token_acc": 0.8088235294117647, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.35591506572295245, |
| "grad_norm": 2.2586326599121094, |
| "learning_rate": 7.656521017272965e-06, |
| "loss": 0.4124460816383362, |
| "memory(GiB)": 74.62, |
| "step": 440, |
| "token_acc": 0.8611111111111112, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.3567239635995956, |
| "grad_norm": 2.374500274658203, |
| "learning_rate": 7.64518601222484e-06, |
| "loss": 0.4275168180465698, |
| "memory(GiB)": 74.62, |
| "step": 441, |
| "token_acc": 0.8487084870848709, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.35753286147623864, |
| "grad_norm": 1.9997868537902832, |
| "learning_rate": 7.633832097679959e-06, |
| "loss": 0.3909873068332672, |
| "memory(GiB)": 74.62, |
| "step": 442, |
| "token_acc": 0.8868613138686131, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.3583417593528817, |
| "grad_norm": 4.926924705505371, |
| "learning_rate": 7.622459354803435e-06, |
| "loss": 0.43666255474090576, |
| "memory(GiB)": 74.62, |
| "step": 443, |
| "token_acc": 0.8704453441295547, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.3591506572295248, |
| "grad_norm": 2.317330837249756, |
| "learning_rate": 7.611067864894972e-06, |
| "loss": 0.44106507301330566, |
| "memory(GiB)": 74.62, |
| "step": 444, |
| "token_acc": 0.8059701492537313, |
| "train_speed(iter/s)": 0.022796 |
| }, |
| { |
| "epoch": 0.35995955510616784, |
| "grad_norm": 2.5835938453674316, |
| "learning_rate": 7.599657709388292e-06, |
| "loss": 0.46531200408935547, |
| "memory(GiB)": 74.62, |
| "step": 445, |
| "token_acc": 0.7931034482758621, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3607684529828109, |
| "grad_norm": 2.8004226684570312, |
| "learning_rate": 7.58822896985055e-06, |
| "loss": 0.5187166333198547, |
| "memory(GiB)": 74.62, |
| "step": 446, |
| "token_acc": 0.8099173553719008, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.36157735085945397, |
| "grad_norm": 2.7265071868896484, |
| "learning_rate": 7.5767817279817505e-06, |
| "loss": 0.47425639629364014, |
| "memory(GiB)": 74.62, |
| "step": 447, |
| "token_acc": 0.8085106382978723, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3623862487360971, |
| "grad_norm": 2.1328177452087402, |
| "learning_rate": 7.565316065614168e-06, |
| "loss": 0.4435673952102661, |
| "memory(GiB)": 74.62, |
| "step": 448, |
| "token_acc": 0.8631578947368421, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.36319514661274016, |
| "grad_norm": 2.4672372341156006, |
| "learning_rate": 7.5538320647117565e-06, |
| "loss": 0.41679224371910095, |
| "memory(GiB)": 74.62, |
| "step": 449, |
| "token_acc": 0.8908296943231441, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3640040444893832, |
| "grad_norm": 2.6723108291625977, |
| "learning_rate": 7.542329807369566e-06, |
| "loss": 0.5179734826087952, |
| "memory(GiB)": 74.62, |
| "step": 450, |
| "token_acc": 0.7644444444444445, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3648129423660263, |
| "grad_norm": 3.7509987354278564, |
| "learning_rate": 7.530809375813155e-06, |
| "loss": 0.4264351725578308, |
| "memory(GiB)": 74.62, |
| "step": 451, |
| "token_acc": 0.9, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.36562184024266936, |
| "grad_norm": 1.9851875305175781, |
| "learning_rate": 7.519270852398002e-06, |
| "loss": 0.4789334535598755, |
| "memory(GiB)": 74.62, |
| "step": 452, |
| "token_acc": 0.8250950570342205, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3664307381193124, |
| "grad_norm": 2.21183705329895, |
| "learning_rate": 7.507714319608922e-06, |
| "loss": 0.36344388127326965, |
| "memory(GiB)": 74.62, |
| "step": 453, |
| "token_acc": 0.8487394957983193, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3672396359959555, |
| "grad_norm": 1.613560676574707, |
| "learning_rate": 7.496139860059468e-06, |
| "loss": 0.4224799871444702, |
| "memory(GiB)": 74.62, |
| "step": 454, |
| "token_acc": 0.8813056379821959, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.36804853387259856, |
| "grad_norm": 2.4515528678894043, |
| "learning_rate": 7.484547556491346e-06, |
| "loss": 0.4368416368961334, |
| "memory(GiB)": 74.62, |
| "step": 455, |
| "token_acc": 0.8559322033898306, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3688574317492417, |
| "grad_norm": 2.2103137969970703, |
| "learning_rate": 7.472937491773824e-06, |
| "loss": 0.3967626392841339, |
| "memory(GiB)": 74.62, |
| "step": 456, |
| "token_acc": 0.8217821782178217, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.36966632962588475, |
| "grad_norm": 2.522752046585083, |
| "learning_rate": 7.461309748903138e-06, |
| "loss": 0.45169344544410706, |
| "memory(GiB)": 74.62, |
| "step": 457, |
| "token_acc": 0.8535714285714285, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3704752275025278, |
| "grad_norm": 3.0310842990875244, |
| "learning_rate": 7.449664411001898e-06, |
| "loss": 0.37837380170822144, |
| "memory(GiB)": 74.62, |
| "step": 458, |
| "token_acc": 0.9108527131782945, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3712841253791709, |
| "grad_norm": 2.2086234092712402, |
| "learning_rate": 7.438001561318494e-06, |
| "loss": 0.44610685110092163, |
| "memory(GiB)": 74.62, |
| "step": 459, |
| "token_acc": 0.870722433460076, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.37209302325581395, |
| "grad_norm": 2.4862678050994873, |
| "learning_rate": 7.426321283226504e-06, |
| "loss": 0.4015771746635437, |
| "memory(GiB)": 74.62, |
| "step": 460, |
| "token_acc": 0.8907563025210085, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.372901921132457, |
| "grad_norm": 2.0166738033294678, |
| "learning_rate": 7.4146236602240936e-06, |
| "loss": 0.4152040481567383, |
| "memory(GiB)": 74.62, |
| "step": 461, |
| "token_acc": 0.9248826291079812, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3737108190091001, |
| "grad_norm": 2.448951005935669, |
| "learning_rate": 7.402908775933419e-06, |
| "loss": 0.5621334910392761, |
| "memory(GiB)": 74.62, |
| "step": 462, |
| "token_acc": 0.8628318584070797, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.37451971688574315, |
| "grad_norm": 2.186652183532715, |
| "learning_rate": 7.391176714100038e-06, |
| "loss": 0.4613068699836731, |
| "memory(GiB)": 74.62, |
| "step": 463, |
| "token_acc": 0.8188679245283019, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.37532861476238627, |
| "grad_norm": 2.2740073204040527, |
| "learning_rate": 7.379427558592296e-06, |
| "loss": 0.4919006824493408, |
| "memory(GiB)": 74.62, |
| "step": 464, |
| "token_acc": 0.8471760797342193, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.37613751263902934, |
| "grad_norm": 2.158538579940796, |
| "learning_rate": 7.36766139340074e-06, |
| "loss": 0.42273247241973877, |
| "memory(GiB)": 74.62, |
| "step": 465, |
| "token_acc": 0.8622047244094488, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3769464105156724, |
| "grad_norm": 3.0366506576538086, |
| "learning_rate": 7.3558783026375156e-06, |
| "loss": 0.5097289085388184, |
| "memory(GiB)": 74.62, |
| "step": 466, |
| "token_acc": 0.9178082191780822, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.37775530839231547, |
| "grad_norm": 2.2849361896514893, |
| "learning_rate": 7.344078370535757e-06, |
| "loss": 0.5165024995803833, |
| "memory(GiB)": 74.62, |
| "step": 467, |
| "token_acc": 0.8006430868167203, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.37856420626895854, |
| "grad_norm": 1.753194808959961, |
| "learning_rate": 7.3322616814489955e-06, |
| "loss": 0.4367058277130127, |
| "memory(GiB)": 74.62, |
| "step": 468, |
| "token_acc": 0.8678571428571429, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3793731041456016, |
| "grad_norm": 1.9058223962783813, |
| "learning_rate": 7.32042831985055e-06, |
| "loss": 0.41317999362945557, |
| "memory(GiB)": 74.62, |
| "step": 469, |
| "token_acc": 0.8257261410788381, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.38018200202224467, |
| "grad_norm": 2.459209680557251, |
| "learning_rate": 7.308578370332926e-06, |
| "loss": 0.3700507581233978, |
| "memory(GiB)": 74.62, |
| "step": 470, |
| "token_acc": 0.8687943262411347, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3809908998988878, |
| "grad_norm": 1.8641716241836548, |
| "learning_rate": 7.296711917607211e-06, |
| "loss": 0.40189939737319946, |
| "memory(GiB)": 74.62, |
| "step": 471, |
| "token_acc": 0.8717948717948718, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.38179979777553086, |
| "grad_norm": 2.2401087284088135, |
| "learning_rate": 7.284829046502467e-06, |
| "loss": 0.4430382251739502, |
| "memory(GiB)": 74.62, |
| "step": 472, |
| "token_acc": 0.8419243986254296, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3826086956521739, |
| "grad_norm": 2.416550636291504, |
| "learning_rate": 7.272929841965126e-06, |
| "loss": 0.4755879342556, |
| "memory(GiB)": 74.62, |
| "step": 473, |
| "token_acc": 0.8486238532110092, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.383417593528817, |
| "grad_norm": 2.260345935821533, |
| "learning_rate": 7.261014389058383e-06, |
| "loss": 0.44997456669807434, |
| "memory(GiB)": 74.62, |
| "step": 474, |
| "token_acc": 0.7671957671957672, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.38422649140546006, |
| "grad_norm": 2.261056661605835, |
| "learning_rate": 7.2490827729615835e-06, |
| "loss": 0.47697365283966064, |
| "memory(GiB)": 74.62, |
| "step": 475, |
| "token_acc": 0.8628318584070797, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3850353892821031, |
| "grad_norm": 2.013577461242676, |
| "learning_rate": 7.237135078969618e-06, |
| "loss": 0.3827347159385681, |
| "memory(GiB)": 74.62, |
| "step": 476, |
| "token_acc": 0.8478964401294499, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3858442871587462, |
| "grad_norm": 2.1973073482513428, |
| "learning_rate": 7.225171392492316e-06, |
| "loss": 0.40540656447410583, |
| "memory(GiB)": 74.62, |
| "step": 477, |
| "token_acc": 0.863013698630137, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.38665318503538926, |
| "grad_norm": 2.2481391429901123, |
| "learning_rate": 7.213191799053832e-06, |
| "loss": 0.4136468172073364, |
| "memory(GiB)": 74.62, |
| "step": 478, |
| "token_acc": 0.8339100346020761, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3874620829120324, |
| "grad_norm": 2.1501901149749756, |
| "learning_rate": 7.201196384292027e-06, |
| "loss": 0.4204309284687042, |
| "memory(GiB)": 74.62, |
| "step": 479, |
| "token_acc": 0.8870967741935484, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.38827098078867545, |
| "grad_norm": 2.1305158138275146, |
| "learning_rate": 7.189185233957868e-06, |
| "loss": 0.4197065830230713, |
| "memory(GiB)": 74.62, |
| "step": 480, |
| "token_acc": 0.8160919540229885, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3890798786653185, |
| "grad_norm": 2.526954174041748, |
| "learning_rate": 7.177158433914811e-06, |
| "loss": 0.4064275622367859, |
| "memory(GiB)": 74.62, |
| "step": 481, |
| "token_acc": 0.8907103825136612, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3898887765419616, |
| "grad_norm": 3.277456283569336, |
| "learning_rate": 7.165116070138183e-06, |
| "loss": 0.46176213026046753, |
| "memory(GiB)": 74.62, |
| "step": 482, |
| "token_acc": 0.834983498349835, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.39069767441860465, |
| "grad_norm": 2.337390184402466, |
| "learning_rate": 7.153058228714573e-06, |
| "loss": 0.3911609649658203, |
| "memory(GiB)": 74.62, |
| "step": 483, |
| "token_acc": 0.8909952606635071, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3915065722952477, |
| "grad_norm": 2.273653745651245, |
| "learning_rate": 7.140984995841214e-06, |
| "loss": 0.43842604756355286, |
| "memory(GiB)": 74.62, |
| "step": 484, |
| "token_acc": 0.844559585492228, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3923154701718908, |
| "grad_norm": 2.842496395111084, |
| "learning_rate": 7.128896457825364e-06, |
| "loss": 0.41556501388549805, |
| "memory(GiB)": 74.62, |
| "step": 485, |
| "token_acc": 0.8459016393442623, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.39312436804853385, |
| "grad_norm": 2.3521416187286377, |
| "learning_rate": 7.116792701083697e-06, |
| "loss": 0.4312630891799927, |
| "memory(GiB)": 74.62, |
| "step": 486, |
| "token_acc": 0.8566433566433567, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.39393326592517697, |
| "grad_norm": 2.2411739826202393, |
| "learning_rate": 7.104673812141676e-06, |
| "loss": 0.4646815359592438, |
| "memory(GiB)": 74.62, |
| "step": 487, |
| "token_acc": 0.8078602620087336, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.39474216380182003, |
| "grad_norm": 2.26692533493042, |
| "learning_rate": 7.09253987763294e-06, |
| "loss": 0.41715699434280396, |
| "memory(GiB)": 74.62, |
| "step": 488, |
| "token_acc": 0.8636363636363636, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3955510616784631, |
| "grad_norm": 2.127204179763794, |
| "learning_rate": 7.080390984298686e-06, |
| "loss": 0.39702218770980835, |
| "memory(GiB)": 74.62, |
| "step": 489, |
| "token_acc": 0.8631578947368421, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.39635995955510617, |
| "grad_norm": 1.905442476272583, |
| "learning_rate": 7.068227218987043e-06, |
| "loss": 0.3825928568840027, |
| "memory(GiB)": 74.62, |
| "step": 490, |
| "token_acc": 0.8986784140969163, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.39716885743174923, |
| "grad_norm": 1.9447747468948364, |
| "learning_rate": 7.056048668652454e-06, |
| "loss": 0.45161956548690796, |
| "memory(GiB)": 74.62, |
| "step": 491, |
| "token_acc": 0.8728813559322034, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3979777553083923, |
| "grad_norm": 2.295433282852173, |
| "learning_rate": 7.04385542035506e-06, |
| "loss": 0.41795414686203003, |
| "memory(GiB)": 74.62, |
| "step": 492, |
| "token_acc": 0.8817733990147784, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.39878665318503537, |
| "grad_norm": 2.265631675720215, |
| "learning_rate": 7.031647561260065e-06, |
| "loss": 0.4432828426361084, |
| "memory(GiB)": 74.62, |
| "step": 493, |
| "token_acc": 0.7985865724381626, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.3995955510616785, |
| "grad_norm": 2.9621176719665527, |
| "learning_rate": 7.019425178637127e-06, |
| "loss": 0.44883739948272705, |
| "memory(GiB)": 74.62, |
| "step": 494, |
| "token_acc": 0.9203539823008849, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.40040444893832156, |
| "grad_norm": 2.9266443252563477, |
| "learning_rate": 7.007188359859727e-06, |
| "loss": 0.48823320865631104, |
| "memory(GiB)": 74.62, |
| "step": 495, |
| "token_acc": 0.8736842105263158, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.4012133468149646, |
| "grad_norm": 3.5501937866210938, |
| "learning_rate": 6.994937192404539e-06, |
| "loss": 0.41887539625167847, |
| "memory(GiB)": 74.62, |
| "step": 496, |
| "token_acc": 0.8600823045267489, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.4020222446916077, |
| "grad_norm": 2.9611189365386963, |
| "learning_rate": 6.982671763850814e-06, |
| "loss": 0.460665225982666, |
| "memory(GiB)": 74.62, |
| "step": 497, |
| "token_acc": 0.8066037735849056, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.40283114256825076, |
| "grad_norm": 2.5562634468078613, |
| "learning_rate": 6.9703921618797556e-06, |
| "loss": 0.42445844411849976, |
| "memory(GiB)": 74.62, |
| "step": 498, |
| "token_acc": 0.8809523809523809, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.4036400404448938, |
| "grad_norm": 2.2612838745117188, |
| "learning_rate": 6.95809847427388e-06, |
| "loss": 0.4139663577079773, |
| "memory(GiB)": 74.62, |
| "step": 499, |
| "token_acc": 0.8612244897959184, |
| "train_speed(iter/s)": 0.022797 |
| }, |
| { |
| "epoch": 0.4044489383215369, |
| "grad_norm": 2.0981252193450928, |
| "learning_rate": 6.945790788916402e-06, |
| "loss": 0.4424452781677246, |
| "memory(GiB)": 74.62, |
| "step": 500, |
| "token_acc": 0.8401486988847584, |
| "train_speed(iter/s)": 0.022798 |
| }, |
| { |
| "epoch": 0.4044489383215369, |
| "eval_loss": 0.42885029315948486, |
| "eval_runtime": 431.8839, |
| "eval_samples_per_second": 3.7, |
| "eval_steps_per_second": 0.116, |
| "eval_token_acc": 0.8577324229008779, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.40525783619817995, |
| "grad_norm": 2.1216652393341064, |
| "learning_rate": 6.9334691937905995e-06, |
| "loss": 0.4369218349456787, |
| "memory(GiB)": 74.62, |
| "step": 501, |
| "token_acc": 0.8514335360556038, |
| "train_speed(iter/s)": 0.022355 |
| }, |
| { |
| "epoch": 0.4060667340748231, |
| "grad_norm": 2.564833641052246, |
| "learning_rate": 6.921133776979186e-06, |
| "loss": 0.4658987820148468, |
| "memory(GiB)": 74.62, |
| "step": 502, |
| "token_acc": 0.8582089552238806, |
| "train_speed(iter/s)": 0.022356 |
| }, |
| { |
| "epoch": 0.40687563195146614, |
| "grad_norm": 1.8351505994796753, |
| "learning_rate": 6.908784626663681e-06, |
| "loss": 0.4119420647621155, |
| "memory(GiB)": 74.62, |
| "step": 503, |
| "token_acc": 0.8387096774193549, |
| "train_speed(iter/s)": 0.022357 |
| }, |
| { |
| "epoch": 0.4076845298281092, |
| "grad_norm": 2.2373807430267334, |
| "learning_rate": 6.896421831123783e-06, |
| "loss": 0.45484626293182373, |
| "memory(GiB)": 74.62, |
| "step": 504, |
| "token_acc": 0.8540772532188842, |
| "train_speed(iter/s)": 0.022358 |
| }, |
| { |
| "epoch": 0.4084934277047523, |
| "grad_norm": 2.1204137802124023, |
| "learning_rate": 6.884045478736732e-06, |
| "loss": 0.3930210471153259, |
| "memory(GiB)": 74.62, |
| "step": 505, |
| "token_acc": 0.9181034482758621, |
| "train_speed(iter/s)": 0.022359 |
| }, |
| { |
| "epoch": 0.40930232558139534, |
| "grad_norm": 2.195955276489258, |
| "learning_rate": 6.871655657976682e-06, |
| "loss": 0.4383777976036072, |
| "memory(GiB)": 74.62, |
| "step": 506, |
| "token_acc": 0.8703703703703703, |
| "train_speed(iter/s)": 0.022359 |
| }, |
| { |
| "epoch": 0.4101112234580384, |
| "grad_norm": 2.449862241744995, |
| "learning_rate": 6.859252457414067e-06, |
| "loss": 0.5421361923217773, |
| "memory(GiB)": 74.62, |
| "step": 507, |
| "token_acc": 0.8745247148288974, |
| "train_speed(iter/s)": 0.02236 |
| }, |
| { |
| "epoch": 0.4109201213346815, |
| "grad_norm": 2.8813657760620117, |
| "learning_rate": 6.8468359657149705e-06, |
| "loss": 0.3448445498943329, |
| "memory(GiB)": 74.62, |
| "step": 508, |
| "token_acc": 0.8831168831168831, |
| "train_speed(iter/s)": 0.022361 |
| }, |
| { |
| "epoch": 0.41172901921132454, |
| "grad_norm": 2.2587554454803467, |
| "learning_rate": 6.834406271640488e-06, |
| "loss": 0.40410223603248596, |
| "memory(GiB)": 74.62, |
| "step": 509, |
| "token_acc": 0.9575289575289575, |
| "train_speed(iter/s)": 0.022362 |
| }, |
| { |
| "epoch": 0.41253791708796766, |
| "grad_norm": 2.2055654525756836, |
| "learning_rate": 6.821963464046096e-06, |
| "loss": 0.4498205780982971, |
| "memory(GiB)": 74.62, |
| "step": 510, |
| "token_acc": 0.8311258278145696, |
| "train_speed(iter/s)": 0.022363 |
| }, |
| { |
| "epoch": 0.41334681496461073, |
| "grad_norm": 2.171542167663574, |
| "learning_rate": 6.809507631881014e-06, |
| "loss": 0.4186447858810425, |
| "memory(GiB)": 74.62, |
| "step": 511, |
| "token_acc": 0.8443708609271523, |
| "train_speed(iter/s)": 0.022364 |
| }, |
| { |
| "epoch": 0.4141557128412538, |
| "grad_norm": 2.509507417678833, |
| "learning_rate": 6.797038864187564e-06, |
| "loss": 0.4081672728061676, |
| "memory(GiB)": 74.62, |
| "step": 512, |
| "token_acc": 0.8518518518518519, |
| "train_speed(iter/s)": 0.022364 |
| }, |
| { |
| "epoch": 0.41496461071789686, |
| "grad_norm": 2.3102705478668213, |
| "learning_rate": 6.78455725010055e-06, |
| "loss": 0.4792659878730774, |
| "memory(GiB)": 74.62, |
| "step": 513, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022365 |
| }, |
| { |
| "epoch": 0.41577350859453993, |
| "grad_norm": 2.7244982719421387, |
| "learning_rate": 6.772062878846604e-06, |
| "loss": 0.41006016731262207, |
| "memory(GiB)": 74.62, |
| "step": 514, |
| "token_acc": 0.8380952380952381, |
| "train_speed(iter/s)": 0.022366 |
| }, |
| { |
| "epoch": 0.416582406471183, |
| "grad_norm": 2.3030154705047607, |
| "learning_rate": 6.75955583974355e-06, |
| "loss": 0.4155740737915039, |
| "memory(GiB)": 74.62, |
| "step": 515, |
| "token_acc": 0.8803418803418803, |
| "train_speed(iter/s)": 0.022367 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 3.1387264728546143, |
| "learning_rate": 6.747036222199783e-06, |
| "loss": 0.4403674602508545, |
| "memory(GiB)": 74.62, |
| "step": 516, |
| "token_acc": 0.8486486486486486, |
| "train_speed(iter/s)": 0.022368 |
| }, |
| { |
| "epoch": 0.4182002022244692, |
| "grad_norm": 2.3326053619384766, |
| "learning_rate": 6.7345041157136035e-06, |
| "loss": 0.5110398530960083, |
| "memory(GiB)": 74.62, |
| "step": 517, |
| "token_acc": 0.8466453674121406, |
| "train_speed(iter/s)": 0.022369 |
| }, |
| { |
| "epoch": 0.41900910010111225, |
| "grad_norm": 1.981326699256897, |
| "learning_rate": 6.7219596098725995e-06, |
| "loss": 0.3945692181587219, |
| "memory(GiB)": 74.62, |
| "step": 518, |
| "token_acc": 0.8426395939086294, |
| "train_speed(iter/s)": 0.02237 |
| }, |
| { |
| "epoch": 0.4198179979777553, |
| "grad_norm": 2.0242714881896973, |
| "learning_rate": 6.709402794352993e-06, |
| "loss": 0.3980899155139923, |
| "memory(GiB)": 74.62, |
| "step": 519, |
| "token_acc": 0.8425925925925926, |
| "train_speed(iter/s)": 0.022371 |
| }, |
| { |
| "epoch": 0.4206268958543984, |
| "grad_norm": 2.2979252338409424, |
| "learning_rate": 6.696833758919006e-06, |
| "loss": 0.4187348484992981, |
| "memory(GiB)": 74.62, |
| "step": 520, |
| "token_acc": 0.9004329004329005, |
| "train_speed(iter/s)": 0.022372 |
| }, |
| { |
| "epoch": 0.42143579373104145, |
| "grad_norm": 2.154912233352661, |
| "learning_rate": 6.684252593422214e-06, |
| "loss": 0.4182782471179962, |
| "memory(GiB)": 74.62, |
| "step": 521, |
| "token_acc": 0.896414342629482, |
| "train_speed(iter/s)": 0.022372 |
| }, |
| { |
| "epoch": 0.4222446916076845, |
| "grad_norm": 2.3540515899658203, |
| "learning_rate": 6.67165938780091e-06, |
| "loss": 0.41942286491394043, |
| "memory(GiB)": 74.62, |
| "step": 522, |
| "token_acc": 0.7923728813559322, |
| "train_speed(iter/s)": 0.022373 |
| }, |
| { |
| "epoch": 0.4230535894843276, |
| "grad_norm": 2.746999502182007, |
| "learning_rate": 6.659054232079454e-06, |
| "loss": 0.48690980672836304, |
| "memory(GiB)": 74.62, |
| "step": 523, |
| "token_acc": 0.8956521739130435, |
| "train_speed(iter/s)": 0.022374 |
| }, |
| { |
| "epoch": 0.42386248736097065, |
| "grad_norm": 2.6656594276428223, |
| "learning_rate": 6.646437216367634e-06, |
| "loss": 0.41001442074775696, |
| "memory(GiB)": 74.62, |
| "step": 524, |
| "token_acc": 0.871244635193133, |
| "train_speed(iter/s)": 0.022375 |
| }, |
| { |
| "epoch": 0.4246713852376138, |
| "grad_norm": 3.287884473800659, |
| "learning_rate": 6.633808430860021e-06, |
| "loss": 0.3976552486419678, |
| "memory(GiB)": 74.62, |
| "step": 525, |
| "token_acc": 0.8932584269662921, |
| "train_speed(iter/s)": 0.022376 |
| }, |
| { |
| "epoch": 0.42548028311425684, |
| "grad_norm": 1.8821219205856323, |
| "learning_rate": 6.6211679658353235e-06, |
| "loss": 0.40812772512435913, |
| "memory(GiB)": 74.62, |
| "step": 526, |
| "token_acc": 0.8380281690140845, |
| "train_speed(iter/s)": 0.022377 |
| }, |
| { |
| "epoch": 0.4262891809908999, |
| "grad_norm": 2.2975385189056396, |
| "learning_rate": 6.608515911655744e-06, |
| "loss": 0.4923143982887268, |
| "memory(GiB)": 74.62, |
| "step": 527, |
| "token_acc": 0.8621908127208481, |
| "train_speed(iter/s)": 0.022378 |
| }, |
| { |
| "epoch": 0.427098078867543, |
| "grad_norm": 2.0141286849975586, |
| "learning_rate": 6.595852358766334e-06, |
| "loss": 0.42522329092025757, |
| "memory(GiB)": 74.62, |
| "step": 528, |
| "token_acc": 0.8579234972677595, |
| "train_speed(iter/s)": 0.022379 |
| }, |
| { |
| "epoch": 0.42790697674418604, |
| "grad_norm": 2.7446937561035156, |
| "learning_rate": 6.583177397694338e-06, |
| "loss": 0.4497550129890442, |
| "memory(GiB)": 74.62, |
| "step": 529, |
| "token_acc": 0.8915094339622641, |
| "train_speed(iter/s)": 0.022379 |
| }, |
| { |
| "epoch": 0.4287158746208291, |
| "grad_norm": 2.207721710205078, |
| "learning_rate": 6.570491119048558e-06, |
| "loss": 0.48890426754951477, |
| "memory(GiB)": 74.62, |
| "step": 530, |
| "token_acc": 0.8237082066869301, |
| "train_speed(iter/s)": 0.02238 |
| }, |
| { |
| "epoch": 0.4295247724974722, |
| "grad_norm": 1.9948323965072632, |
| "learning_rate": 6.557793613518704e-06, |
| "loss": 0.39835628867149353, |
| "memory(GiB)": 74.62, |
| "step": 531, |
| "token_acc": 0.8313953488372093, |
| "train_speed(iter/s)": 0.022381 |
| }, |
| { |
| "epoch": 0.43033367037411524, |
| "grad_norm": 2.0337955951690674, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.4067310094833374, |
| "memory(GiB)": 74.62, |
| "step": 532, |
| "token_acc": 0.8481848184818482, |
| "train_speed(iter/s)": 0.022382 |
| }, |
| { |
| "epoch": 0.43114256825075836, |
| "grad_norm": 1.673884630203247, |
| "learning_rate": 6.5323652849662335e-06, |
| "loss": 0.4390275478363037, |
| "memory(GiB)": 74.62, |
| "step": 533, |
| "token_acc": 0.7947976878612717, |
| "train_speed(iter/s)": 0.022382 |
| }, |
| { |
| "epoch": 0.43195146612740143, |
| "grad_norm": 2.2995364665985107, |
| "learning_rate": 6.519634643721721e-06, |
| "loss": 0.40432244539260864, |
| "memory(GiB)": 74.62, |
| "step": 534, |
| "token_acc": 0.8676470588235294, |
| "train_speed(iter/s)": 0.022383 |
| }, |
| { |
| "epoch": 0.4327603640040445, |
| "grad_norm": 2.3338489532470703, |
| "learning_rate": 6.50689313914804e-06, |
| "loss": 0.4244130849838257, |
| "memory(GiB)": 74.62, |
| "step": 535, |
| "token_acc": 0.8333333333333334, |
| "train_speed(iter/s)": 0.022384 |
| }, |
| { |
| "epoch": 0.43356926188068756, |
| "grad_norm": 3.962207078933716, |
| "learning_rate": 6.494140862329688e-06, |
| "loss": 0.43396979570388794, |
| "memory(GiB)": 74.62, |
| "step": 536, |
| "token_acc": 0.8958333333333334, |
| "train_speed(iter/s)": 0.022385 |
| }, |
| { |
| "epoch": 0.43437815975733063, |
| "grad_norm": 2.2048377990722656, |
| "learning_rate": 6.481377904428171e-06, |
| "loss": 0.4214767813682556, |
| "memory(GiB)": 74.62, |
| "step": 537, |
| "token_acc": 0.92, |
| "train_speed(iter/s)": 0.022385 |
| }, |
| { |
| "epoch": 0.4351870576339737, |
| "grad_norm": 2.1275532245635986, |
| "learning_rate": 6.468604356681347e-06, |
| "loss": 0.47981610894203186, |
| "memory(GiB)": 74.62, |
| "step": 538, |
| "token_acc": 0.8615384615384616, |
| "train_speed(iter/s)": 0.022386 |
| }, |
| { |
| "epoch": 0.43599595551061676, |
| "grad_norm": 2.525294542312622, |
| "learning_rate": 6.4558203104027805e-06, |
| "loss": 0.3834857940673828, |
| "memory(GiB)": 74.62, |
| "step": 539, |
| "token_acc": 0.8962264150943396, |
| "train_speed(iter/s)": 0.022387 |
| }, |
| { |
| "epoch": 0.4368048533872599, |
| "grad_norm": 1.9019864797592163, |
| "learning_rate": 6.443025856981086e-06, |
| "loss": 0.4347085952758789, |
| "memory(GiB)": 74.62, |
| "step": 540, |
| "token_acc": 0.8483606557377049, |
| "train_speed(iter/s)": 0.022388 |
| }, |
| { |
| "epoch": 0.43761375126390295, |
| "grad_norm": 2.1029298305511475, |
| "learning_rate": 6.430221087879272e-06, |
| "loss": 0.3873569071292877, |
| "memory(GiB)": 74.62, |
| "step": 541, |
| "token_acc": 0.8458149779735683, |
| "train_speed(iter/s)": 0.022388 |
| }, |
| { |
| "epoch": 0.438422649140546, |
| "grad_norm": 2.2039341926574707, |
| "learning_rate": 6.41740609463409e-06, |
| "loss": 0.41179242730140686, |
| "memory(GiB)": 74.62, |
| "step": 542, |
| "token_acc": 0.8433179723502304, |
| "train_speed(iter/s)": 0.022389 |
| }, |
| { |
| "epoch": 0.4392315470171891, |
| "grad_norm": 1.985140085220337, |
| "learning_rate": 6.404580968855385e-06, |
| "loss": 0.3754437565803528, |
| "memory(GiB)": 74.62, |
| "step": 543, |
| "token_acc": 0.8695652173913043, |
| "train_speed(iter/s)": 0.02239 |
| }, |
| { |
| "epoch": 0.44004044489383215, |
| "grad_norm": 2.1291117668151855, |
| "learning_rate": 6.3917458022254345e-06, |
| "loss": 0.382461816072464, |
| "memory(GiB)": 74.62, |
| "step": 544, |
| "token_acc": 0.8463768115942029, |
| "train_speed(iter/s)": 0.022391 |
| }, |
| { |
| "epoch": 0.4408493427704752, |
| "grad_norm": 2.164369583129883, |
| "learning_rate": 6.3789006864982885e-06, |
| "loss": 0.41792726516723633, |
| "memory(GiB)": 74.62, |
| "step": 545, |
| "token_acc": 0.8883495145631068, |
| "train_speed(iter/s)": 0.022391 |
| }, |
| { |
| "epoch": 0.4416582406471183, |
| "grad_norm": 2.030388355255127, |
| "learning_rate": 6.366045713499129e-06, |
| "loss": 0.42167988419532776, |
| "memory(GiB)": 74.62, |
| "step": 546, |
| "token_acc": 0.8613445378151261, |
| "train_speed(iter/s)": 0.022392 |
| }, |
| { |
| "epoch": 0.44246713852376135, |
| "grad_norm": 1.9591219425201416, |
| "learning_rate": 6.353180975123595e-06, |
| "loss": 0.3823608458042145, |
| "memory(GiB)": 74.62, |
| "step": 547, |
| "token_acc": 0.8422818791946308, |
| "train_speed(iter/s)": 0.022393 |
| }, |
| { |
| "epoch": 0.44327603640040447, |
| "grad_norm": 2.547567367553711, |
| "learning_rate": 6.340306563337142e-06, |
| "loss": 0.4388830363750458, |
| "memory(GiB)": 74.62, |
| "step": 548, |
| "token_acc": 0.8425925925925926, |
| "train_speed(iter/s)": 0.022393 |
| }, |
| { |
| "epoch": 0.44408493427704754, |
| "grad_norm": 2.0034782886505127, |
| "learning_rate": 6.327422570174373e-06, |
| "loss": 0.3995330035686493, |
| "memory(GiB)": 74.62, |
| "step": 549, |
| "token_acc": 0.8996138996138996, |
| "train_speed(iter/s)": 0.022394 |
| }, |
| { |
| "epoch": 0.4448938321536906, |
| "grad_norm": 2.489525079727173, |
| "learning_rate": 6.314529087738387e-06, |
| "loss": 0.4121745824813843, |
| "memory(GiB)": 74.62, |
| "step": 550, |
| "token_acc": 0.842741935483871, |
| "train_speed(iter/s)": 0.022395 |
| }, |
| { |
| "epoch": 0.44570273003033367, |
| "grad_norm": 2.647597551345825, |
| "learning_rate": 6.301626208200116e-06, |
| "loss": 0.4198951721191406, |
| "memory(GiB)": 74.62, |
| "step": 551, |
| "token_acc": 0.8409090909090909, |
| "train_speed(iter/s)": 0.022395 |
| }, |
| { |
| "epoch": 0.44651162790697674, |
| "grad_norm": 3.1573736667633057, |
| "learning_rate": 6.2887140237976714e-06, |
| "loss": 0.36342883110046387, |
| "memory(GiB)": 74.62, |
| "step": 552, |
| "token_acc": 0.8653846153846154, |
| "train_speed(iter/s)": 0.022396 |
| }, |
| { |
| "epoch": 0.4473205257836198, |
| "grad_norm": 2.4319777488708496, |
| "learning_rate": 6.27579262683568e-06, |
| "loss": 0.4457288086414337, |
| "memory(GiB)": 74.62, |
| "step": 553, |
| "token_acc": 0.842741935483871, |
| "train_speed(iter/s)": 0.022397 |
| }, |
| { |
| "epoch": 0.44812942366026287, |
| "grad_norm": 2.0444133281707764, |
| "learning_rate": 6.2628621096846265e-06, |
| "loss": 0.3989095091819763, |
| "memory(GiB)": 74.62, |
| "step": 554, |
| "token_acc": 0.8648648648648649, |
| "train_speed(iter/s)": 0.022398 |
| }, |
| { |
| "epoch": 0.448938321536906, |
| "grad_norm": 2.0919275283813477, |
| "learning_rate": 6.249922564780193e-06, |
| "loss": 0.4167803227901459, |
| "memory(GiB)": 74.62, |
| "step": 555, |
| "token_acc": 0.8681318681318682, |
| "train_speed(iter/s)": 0.022398 |
| }, |
| { |
| "epoch": 0.44974721941354906, |
| "grad_norm": 2.3367862701416016, |
| "learning_rate": 6.236974084622598e-06, |
| "loss": 0.43416649103164673, |
| "memory(GiB)": 74.62, |
| "step": 556, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022399 |
| }, |
| { |
| "epoch": 0.4505561172901921, |
| "grad_norm": 2.491732597351074, |
| "learning_rate": 6.224016761775933e-06, |
| "loss": 0.451057493686676, |
| "memory(GiB)": 74.62, |
| "step": 557, |
| "token_acc": 0.8170347003154574, |
| "train_speed(iter/s)": 0.0224 |
| }, |
| { |
| "epoch": 0.4513650151668352, |
| "grad_norm": 2.020247459411621, |
| "learning_rate": 6.211050688867504e-06, |
| "loss": 0.4087960422039032, |
| "memory(GiB)": 74.62, |
| "step": 558, |
| "token_acc": 0.8835978835978836, |
| "train_speed(iter/s)": 0.0224 |
| }, |
| { |
| "epoch": 0.45217391304347826, |
| "grad_norm": 2.914745807647705, |
| "learning_rate": 6.198075958587168e-06, |
| "loss": 0.42803430557250977, |
| "memory(GiB)": 74.62, |
| "step": 559, |
| "token_acc": 0.8418079096045198, |
| "train_speed(iter/s)": 0.022401 |
| }, |
| { |
| "epoch": 0.4529828109201213, |
| "grad_norm": 2.470507860183716, |
| "learning_rate": 6.185092663686671e-06, |
| "loss": 0.4218277931213379, |
| "memory(GiB)": 74.62, |
| "step": 560, |
| "token_acc": 0.8411764705882353, |
| "train_speed(iter/s)": 0.022402 |
| }, |
| { |
| "epoch": 0.4537917087967644, |
| "grad_norm": 1.9057127237319946, |
| "learning_rate": 6.172100896978985e-06, |
| "loss": 0.3940941095352173, |
| "memory(GiB)": 74.62, |
| "step": 561, |
| "token_acc": 0.8507936507936508, |
| "train_speed(iter/s)": 0.022403 |
| }, |
| { |
| "epoch": 0.45460060667340746, |
| "grad_norm": 3.1265318393707275, |
| "learning_rate": 6.1591007513376425e-06, |
| "loss": 0.4158666431903839, |
| "memory(GiB)": 74.62, |
| "step": 562, |
| "token_acc": 0.8809523809523809, |
| "train_speed(iter/s)": 0.022403 |
| }, |
| { |
| "epoch": 0.4554095045500506, |
| "grad_norm": 2.3407959938049316, |
| "learning_rate": 6.146092319696073e-06, |
| "loss": 0.4111853241920471, |
| "memory(GiB)": 74.62, |
| "step": 563, |
| "token_acc": 0.8944723618090452, |
| "train_speed(iter/s)": 0.022404 |
| }, |
| { |
| "epoch": 0.45621840242669365, |
| "grad_norm": 2.639300584793091, |
| "learning_rate": 6.133075695046944e-06, |
| "loss": 0.41796183586120605, |
| "memory(GiB)": 74.62, |
| "step": 564, |
| "token_acc": 0.8415094339622642, |
| "train_speed(iter/s)": 0.022405 |
| }, |
| { |
| "epoch": 0.4570273003033367, |
| "grad_norm": 2.0815927982330322, |
| "learning_rate": 6.120050970441485e-06, |
| "loss": 0.4047802686691284, |
| "memory(GiB)": 74.62, |
| "step": 565, |
| "token_acc": 0.8901734104046243, |
| "train_speed(iter/s)": 0.022406 |
| }, |
| { |
| "epoch": 0.4578361981799798, |
| "grad_norm": 2.186722993850708, |
| "learning_rate": 6.107018238988838e-06, |
| "loss": 0.45547983050346375, |
| "memory(GiB)": 74.62, |
| "step": 566, |
| "token_acc": 0.8584905660377359, |
| "train_speed(iter/s)": 0.022406 |
| }, |
| { |
| "epoch": 0.45864509605662285, |
| "grad_norm": 2.1137285232543945, |
| "learning_rate": 6.093977593855376e-06, |
| "loss": 0.4355093836784363, |
| "memory(GiB)": 74.62, |
| "step": 567, |
| "token_acc": 0.8921933085501859, |
| "train_speed(iter/s)": 0.022407 |
| }, |
| { |
| "epoch": 0.4594539939332659, |
| "grad_norm": 2.740379810333252, |
| "learning_rate": 6.080929128264046e-06, |
| "loss": 0.5192371606826782, |
| "memory(GiB)": 74.62, |
| "step": 568, |
| "token_acc": 0.8766519823788547, |
| "train_speed(iter/s)": 0.022408 |
| }, |
| { |
| "epoch": 0.460262891809909, |
| "grad_norm": 2.2080211639404297, |
| "learning_rate": 6.067872935493703e-06, |
| "loss": 0.3434896767139435, |
| "memory(GiB)": 74.62, |
| "step": 569, |
| "token_acc": 0.9264069264069265, |
| "train_speed(iter/s)": 0.022408 |
| }, |
| { |
| "epoch": 0.46107178968655205, |
| "grad_norm": 2.196671724319458, |
| "learning_rate": 6.054809108878438e-06, |
| "loss": 0.4425520896911621, |
| "memory(GiB)": 74.62, |
| "step": 570, |
| "token_acc": 0.8904761904761904, |
| "train_speed(iter/s)": 0.022409 |
| }, |
| { |
| "epoch": 0.46188068756319517, |
| "grad_norm": 2.0799689292907715, |
| "learning_rate": 6.041737741806914e-06, |
| "loss": 0.4603237509727478, |
| "memory(GiB)": 74.62, |
| "step": 571, |
| "token_acc": 0.8606060606060606, |
| "train_speed(iter/s)": 0.02241 |
| }, |
| { |
| "epoch": 0.46268958543983824, |
| "grad_norm": 2.2659521102905273, |
| "learning_rate": 6.028658927721698e-06, |
| "loss": 0.3965636193752289, |
| "memory(GiB)": 74.62, |
| "step": 572, |
| "token_acc": 0.8088235294117647, |
| "train_speed(iter/s)": 0.02241 |
| }, |
| { |
| "epoch": 0.4634984833164813, |
| "grad_norm": 1.9087399244308472, |
| "learning_rate": 6.015572760118597e-06, |
| "loss": 0.3759012222290039, |
| "memory(GiB)": 74.62, |
| "step": 573, |
| "token_acc": 0.8742331288343558, |
| "train_speed(iter/s)": 0.022411 |
| }, |
| { |
| "epoch": 0.46430738119312437, |
| "grad_norm": 1.982033610343933, |
| "learning_rate": 6.002479332545982e-06, |
| "loss": 0.45862114429473877, |
| "memory(GiB)": 74.62, |
| "step": 574, |
| "token_acc": 0.8328173374613003, |
| "train_speed(iter/s)": 0.022411 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 3.0300614833831787, |
| "learning_rate": 5.989378738604121e-06, |
| "loss": 0.47833582758903503, |
| "memory(GiB)": 74.62, |
| "step": 575, |
| "token_acc": 0.8853211009174312, |
| "train_speed(iter/s)": 0.022412 |
| }, |
| { |
| "epoch": 0.4659251769464105, |
| "grad_norm": 2.1511874198913574, |
| "learning_rate": 5.976271071944517e-06, |
| "loss": 0.4461168348789215, |
| "memory(GiB)": 74.62, |
| "step": 576, |
| "token_acc": 0.8412698412698413, |
| "train_speed(iter/s)": 0.022413 |
| }, |
| { |
| "epoch": 0.46673407482305357, |
| "grad_norm": 2.324009418487549, |
| "learning_rate": 5.963156426269228e-06, |
| "loss": 0.3640004098415375, |
| "memory(GiB)": 74.62, |
| "step": 577, |
| "token_acc": 0.8808510638297873, |
| "train_speed(iter/s)": 0.022413 |
| }, |
| { |
| "epoch": 0.4675429726996967, |
| "grad_norm": 2.6052918434143066, |
| "learning_rate": 5.9500348953302055e-06, |
| "loss": 0.3626942038536072, |
| "memory(GiB)": 74.62, |
| "step": 578, |
| "token_acc": 0.8615384615384616, |
| "train_speed(iter/s)": 0.022414 |
| }, |
| { |
| "epoch": 0.46835187057633976, |
| "grad_norm": 3.0375425815582275, |
| "learning_rate": 5.936906572928625e-06, |
| "loss": 0.4241126775741577, |
| "memory(GiB)": 74.62, |
| "step": 579, |
| "token_acc": 0.8881987577639752, |
| "train_speed(iter/s)": 0.022415 |
| }, |
| { |
| "epoch": 0.4691607684529828, |
| "grad_norm": 2.636939764022827, |
| "learning_rate": 5.923771552914202e-06, |
| "loss": 0.4479450583457947, |
| "memory(GiB)": 74.62, |
| "step": 580, |
| "token_acc": 0.8616071428571429, |
| "train_speed(iter/s)": 0.022416 |
| }, |
| { |
| "epoch": 0.4699696663296259, |
| "grad_norm": 1.9995110034942627, |
| "learning_rate": 5.910629929184541e-06, |
| "loss": 0.37398701906204224, |
| "memory(GiB)": 74.62, |
| "step": 581, |
| "token_acc": 0.8115942028985508, |
| "train_speed(iter/s)": 0.022416 |
| }, |
| { |
| "epoch": 0.47077856420626896, |
| "grad_norm": 2.149606227874756, |
| "learning_rate": 5.897481795684447e-06, |
| "loss": 0.4055722951889038, |
| "memory(GiB)": 74.62, |
| "step": 582, |
| "token_acc": 0.8668941979522184, |
| "train_speed(iter/s)": 0.022417 |
| }, |
| { |
| "epoch": 0.471587462082912, |
| "grad_norm": 3.842085599899292, |
| "learning_rate": 5.8843272464052626e-06, |
| "loss": 0.38462674617767334, |
| "memory(GiB)": 74.62, |
| "step": 583, |
| "token_acc": 0.8869565217391304, |
| "train_speed(iter/s)": 0.022418 |
| }, |
| { |
| "epoch": 0.4723963599595551, |
| "grad_norm": 2.599775552749634, |
| "learning_rate": 5.871166375384201e-06, |
| "loss": 0.4538233280181885, |
| "memory(GiB)": 74.62, |
| "step": 584, |
| "token_acc": 0.8263888888888888, |
| "train_speed(iter/s)": 0.022418 |
| }, |
| { |
| "epoch": 0.47320525783619816, |
| "grad_norm": 2.188464403152466, |
| "learning_rate": 5.857999276703657e-06, |
| "loss": 0.39639097452163696, |
| "memory(GiB)": 74.62, |
| "step": 585, |
| "token_acc": 0.8488372093023255, |
| "train_speed(iter/s)": 0.022419 |
| }, |
| { |
| "epoch": 0.4740141557128413, |
| "grad_norm": 2.0777783393859863, |
| "learning_rate": 5.844826044490551e-06, |
| "loss": 0.40574946999549866, |
| "memory(GiB)": 74.62, |
| "step": 586, |
| "token_acc": 0.863013698630137, |
| "train_speed(iter/s)": 0.02242 |
| }, |
| { |
| "epoch": 0.47482305358948435, |
| "grad_norm": 2.120650053024292, |
| "learning_rate": 5.831646772915651e-06, |
| "loss": 0.4573715329170227, |
| "memory(GiB)": 74.62, |
| "step": 587, |
| "token_acc": 0.8333333333333334, |
| "train_speed(iter/s)": 0.022421 |
| }, |
| { |
| "epoch": 0.4756319514661274, |
| "grad_norm": 2.0684597492218018, |
| "learning_rate": 5.8184615561928924e-06, |
| "loss": 0.39603498578071594, |
| "memory(GiB)": 74.62, |
| "step": 588, |
| "token_acc": 0.8802816901408451, |
| "train_speed(iter/s)": 0.022421 |
| }, |
| { |
| "epoch": 0.4764408493427705, |
| "grad_norm": 2.653454303741455, |
| "learning_rate": 5.805270488578715e-06, |
| "loss": 0.4210537075996399, |
| "memory(GiB)": 74.62, |
| "step": 589, |
| "token_acc": 0.8981481481481481, |
| "train_speed(iter/s)": 0.022422 |
| }, |
| { |
| "epoch": 0.47724974721941354, |
| "grad_norm": 2.2436983585357666, |
| "learning_rate": 5.7920736643713835e-06, |
| "loss": 0.3758474290370941, |
| "memory(GiB)": 74.62, |
| "step": 590, |
| "token_acc": 0.8515625, |
| "train_speed(iter/s)": 0.022423 |
| }, |
| { |
| "epoch": 0.4780586450960566, |
| "grad_norm": 3.6357314586639404, |
| "learning_rate": 5.778871177910315e-06, |
| "loss": 0.4624039828777313, |
| "memory(GiB)": 74.62, |
| "step": 591, |
| "token_acc": 0.8033898305084746, |
| "train_speed(iter/s)": 0.022423 |
| }, |
| { |
| "epoch": 0.4788675429726997, |
| "grad_norm": 2.0779330730438232, |
| "learning_rate": 5.765663123575401e-06, |
| "loss": 0.4041805863380432, |
| "memory(GiB)": 74.62, |
| "step": 592, |
| "token_acc": 0.8620689655172413, |
| "train_speed(iter/s)": 0.022424 |
| }, |
| { |
| "epoch": 0.47967644084934274, |
| "grad_norm": 2.654712200164795, |
| "learning_rate": 5.752449595786341e-06, |
| "loss": 0.3960053324699402, |
| "memory(GiB)": 74.62, |
| "step": 593, |
| "token_acc": 0.8228782287822878, |
| "train_speed(iter/s)": 0.022424 |
| }, |
| { |
| "epoch": 0.48048533872598587, |
| "grad_norm": 2.4642553329467773, |
| "learning_rate": 5.7392306890019565e-06, |
| "loss": 0.41592419147491455, |
| "memory(GiB)": 74.62, |
| "step": 594, |
| "token_acc": 0.7847533632286996, |
| "train_speed(iter/s)": 0.022425 |
| }, |
| { |
| "epoch": 0.48129423660262893, |
| "grad_norm": 2.2550253868103027, |
| "learning_rate": 5.726006497719525e-06, |
| "loss": 0.46111100912094116, |
| "memory(GiB)": 74.62, |
| "step": 595, |
| "token_acc": 0.8361204013377926, |
| "train_speed(iter/s)": 0.022426 |
| }, |
| { |
| "epoch": 0.482103134479272, |
| "grad_norm": 2.8922863006591797, |
| "learning_rate": 5.712777116474103e-06, |
| "loss": 0.5086416006088257, |
| "memory(GiB)": 74.62, |
| "step": 596, |
| "token_acc": 0.8284023668639053, |
| "train_speed(iter/s)": 0.022427 |
| }, |
| { |
| "epoch": 0.48291203235591507, |
| "grad_norm": 2.173737049102783, |
| "learning_rate": 5.699542639837844e-06, |
| "loss": 0.45955491065979004, |
| "memory(GiB)": 74.62, |
| "step": 597, |
| "token_acc": 0.8786610878661087, |
| "train_speed(iter/s)": 0.022427 |
| }, |
| { |
| "epoch": 0.48372093023255813, |
| "grad_norm": 1.9948984384536743, |
| "learning_rate": 5.686303162419326e-06, |
| "loss": 0.4127792716026306, |
| "memory(GiB)": 74.62, |
| "step": 598, |
| "token_acc": 0.8712121212121212, |
| "train_speed(iter/s)": 0.022428 |
| }, |
| { |
| "epoch": 0.4845298281092012, |
| "grad_norm": 2.446259021759033, |
| "learning_rate": 5.6730587788628785e-06, |
| "loss": 0.4015938341617584, |
| "memory(GiB)": 74.62, |
| "step": 599, |
| "token_acc": 0.8502202643171806, |
| "train_speed(iter/s)": 0.022429 |
| }, |
| { |
| "epoch": 0.48533872598584427, |
| "grad_norm": 2.781144618988037, |
| "learning_rate": 5.659809583847907e-06, |
| "loss": 0.44586971402168274, |
| "memory(GiB)": 74.62, |
| "step": 600, |
| "token_acc": 0.8482490272373541, |
| "train_speed(iter/s)": 0.022429 |
| }, |
| { |
| "epoch": 0.4861476238624874, |
| "grad_norm": 2.267489433288574, |
| "learning_rate": 5.646555672088203e-06, |
| "loss": 0.36807918548583984, |
| "memory(GiB)": 74.62, |
| "step": 601, |
| "token_acc": 0.8648648648648649, |
| "train_speed(iter/s)": 0.02243 |
| }, |
| { |
| "epoch": 0.48695652173913045, |
| "grad_norm": 2.3026046752929688, |
| "learning_rate": 5.633297138331285e-06, |
| "loss": 0.4327083230018616, |
| "memory(GiB)": 74.62, |
| "step": 602, |
| "token_acc": 0.8597122302158273, |
| "train_speed(iter/s)": 0.02243 |
| }, |
| { |
| "epoch": 0.4877654196157735, |
| "grad_norm": 2.635984420776367, |
| "learning_rate": 5.620034077357708e-06, |
| "loss": 0.44607388973236084, |
| "memory(GiB)": 74.62, |
| "step": 603, |
| "token_acc": 0.8711111111111111, |
| "train_speed(iter/s)": 0.022431 |
| }, |
| { |
| "epoch": 0.4885743174924166, |
| "grad_norm": 2.5992751121520996, |
| "learning_rate": 5.60676658398039e-06, |
| "loss": 0.3917505145072937, |
| "memory(GiB)": 74.62, |
| "step": 604, |
| "token_acc": 0.9137931034482759, |
| "train_speed(iter/s)": 0.022431 |
| }, |
| { |
| "epoch": 0.48938321536905965, |
| "grad_norm": 2.3977952003479004, |
| "learning_rate": 5.593494753043938e-06, |
| "loss": 0.41896378993988037, |
| "memory(GiB)": 74.62, |
| "step": 605, |
| "token_acc": 0.8821548821548821, |
| "train_speed(iter/s)": 0.022432 |
| }, |
| { |
| "epoch": 0.4901921132457027, |
| "grad_norm": 2.1268513202667236, |
| "learning_rate": 5.580218679423965e-06, |
| "loss": 0.436327189207077, |
| "memory(GiB)": 74.62, |
| "step": 606, |
| "token_acc": 0.8737864077669902, |
| "train_speed(iter/s)": 0.022432 |
| }, |
| { |
| "epoch": 0.4910010111223458, |
| "grad_norm": 3.2890071868896484, |
| "learning_rate": 5.566938458026411e-06, |
| "loss": 0.4408925771713257, |
| "memory(GiB)": 74.62, |
| "step": 607, |
| "token_acc": 0.9095744680851063, |
| "train_speed(iter/s)": 0.022433 |
| }, |
| { |
| "epoch": 0.49180990899898885, |
| "grad_norm": 2.2176642417907715, |
| "learning_rate": 5.553654183786872e-06, |
| "loss": 0.46782928705215454, |
| "memory(GiB)": 74.62, |
| "step": 608, |
| "token_acc": 0.8888888888888888, |
| "train_speed(iter/s)": 0.022434 |
| }, |
| { |
| "epoch": 0.492618806875632, |
| "grad_norm": 2.8756251335144043, |
| "learning_rate": 5.540365951669913e-06, |
| "loss": 0.4359992742538452, |
| "memory(GiB)": 74.62, |
| "step": 609, |
| "token_acc": 0.8753993610223643, |
| "train_speed(iter/s)": 0.022434 |
| }, |
| { |
| "epoch": 0.49342770475227504, |
| "grad_norm": 2.9646661281585693, |
| "learning_rate": 5.527073856668391e-06, |
| "loss": 0.4747014343738556, |
| "memory(GiB)": 74.62, |
| "step": 610, |
| "token_acc": 0.889795918367347, |
| "train_speed(iter/s)": 0.022435 |
| }, |
| { |
| "epoch": 0.4942366026289181, |
| "grad_norm": 2.289034128189087, |
| "learning_rate": 5.513777993802781e-06, |
| "loss": 0.4281376600265503, |
| "memory(GiB)": 74.62, |
| "step": 611, |
| "token_acc": 0.87890625, |
| "train_speed(iter/s)": 0.022435 |
| }, |
| { |
| "epoch": 0.4950455005055612, |
| "grad_norm": 2.541618585586548, |
| "learning_rate": 5.500478458120493e-06, |
| "loss": 0.45447611808776855, |
| "memory(GiB)": 74.62, |
| "step": 612, |
| "token_acc": 0.8346456692913385, |
| "train_speed(iter/s)": 0.022436 |
| }, |
| { |
| "epoch": 0.49585439838220424, |
| "grad_norm": 3.065063953399658, |
| "learning_rate": 5.487175344695188e-06, |
| "loss": 0.4350849688053131, |
| "memory(GiB)": 74.62, |
| "step": 613, |
| "token_acc": 0.8583333333333333, |
| "train_speed(iter/s)": 0.022436 |
| }, |
| { |
| "epoch": 0.4966632962588473, |
| "grad_norm": 1.9416303634643555, |
| "learning_rate": 5.47386874862611e-06, |
| "loss": 0.4030672311782837, |
| "memory(GiB)": 74.62, |
| "step": 614, |
| "token_acc": 0.8527397260273972, |
| "train_speed(iter/s)": 0.022437 |
| }, |
| { |
| "epoch": 0.4974721941354904, |
| "grad_norm": 2.4637768268585205, |
| "learning_rate": 5.460558765037392e-06, |
| "loss": 0.4326108396053314, |
| "memory(GiB)": 74.62, |
| "step": 615, |
| "token_acc": 0.8831168831168831, |
| "train_speed(iter/s)": 0.022437 |
| }, |
| { |
| "epoch": 0.49828109201213344, |
| "grad_norm": 2.7800002098083496, |
| "learning_rate": 5.447245489077389e-06, |
| "loss": 0.42490726709365845, |
| "memory(GiB)": 74.62, |
| "step": 616, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022438 |
| }, |
| { |
| "epoch": 0.49908998988877656, |
| "grad_norm": 4.720980167388916, |
| "learning_rate": 5.433929015917988e-06, |
| "loss": 0.39446377754211426, |
| "memory(GiB)": 74.62, |
| "step": 617, |
| "token_acc": 0.8888888888888888, |
| "train_speed(iter/s)": 0.022439 |
| }, |
| { |
| "epoch": 0.49989888776541963, |
| "grad_norm": 2.4783382415771484, |
| "learning_rate": 5.420609440753935e-06, |
| "loss": 0.41358453035354614, |
| "memory(GiB)": 74.62, |
| "step": 618, |
| "token_acc": 0.8716216216216216, |
| "train_speed(iter/s)": 0.022439 |
| }, |
| { |
| "epoch": 0.5007077856420626, |
| "grad_norm": 2.4651012420654297, |
| "learning_rate": 5.407286858802147e-06, |
| "loss": 0.3854910433292389, |
| "memory(GiB)": 74.62, |
| "step": 619, |
| "token_acc": 0.8565217391304348, |
| "train_speed(iter/s)": 0.022424 |
| }, |
| { |
| "epoch": 0.5015166835187057, |
| "grad_norm": 2.053473472595215, |
| "learning_rate": 5.393961365301041e-06, |
| "loss": 0.3815562427043915, |
| "memory(GiB)": 74.62, |
| "step": 620, |
| "token_acc": 0.888135593220339, |
| "train_speed(iter/s)": 0.022425 |
| }, |
| { |
| "epoch": 0.5023255813953489, |
| "grad_norm": 2.1635167598724365, |
| "learning_rate": 5.380633055509843e-06, |
| "loss": 0.45562463998794556, |
| "memory(GiB)": 74.62, |
| "step": 621, |
| "token_acc": 0.8426573426573427, |
| "train_speed(iter/s)": 0.022426 |
| }, |
| { |
| "epoch": 0.503134479271992, |
| "grad_norm": 2.1759238243103027, |
| "learning_rate": 5.367302024707911e-06, |
| "loss": 0.4003329873085022, |
| "memory(GiB)": 74.62, |
| "step": 622, |
| "token_acc": 0.8444444444444444, |
| "train_speed(iter/s)": 0.022427 |
| }, |
| { |
| "epoch": 0.503943377148635, |
| "grad_norm": 2.391221284866333, |
| "learning_rate": 5.35396836819406e-06, |
| "loss": 0.4506310820579529, |
| "memory(GiB)": 74.62, |
| "step": 623, |
| "token_acc": 0.8243727598566308, |
| "train_speed(iter/s)": 0.022427 |
| }, |
| { |
| "epoch": 0.5047522750252781, |
| "grad_norm": 2.422003746032715, |
| "learning_rate": 5.340632181285872e-06, |
| "loss": 0.3775983154773712, |
| "memory(GiB)": 74.62, |
| "step": 624, |
| "token_acc": 0.9178082191780822, |
| "train_speed(iter/s)": 0.022428 |
| }, |
| { |
| "epoch": 0.5055611729019212, |
| "grad_norm": 2.822801113128662, |
| "learning_rate": 5.327293559319014e-06, |
| "loss": 0.46088916063308716, |
| "memory(GiB)": 74.62, |
| "step": 625, |
| "token_acc": 0.8203389830508474, |
| "train_speed(iter/s)": 0.022428 |
| }, |
| { |
| "epoch": 0.5063700707785642, |
| "grad_norm": 2.9713943004608154, |
| "learning_rate": 5.3139525976465675e-06, |
| "loss": 0.4233189821243286, |
| "memory(GiB)": 74.62, |
| "step": 626, |
| "token_acc": 0.8892988929889298, |
| "train_speed(iter/s)": 0.022429 |
| }, |
| { |
| "epoch": 0.5071789686552073, |
| "grad_norm": 2.24816632270813, |
| "learning_rate": 5.300609391638336e-06, |
| "loss": 0.45002853870391846, |
| "memory(GiB)": 74.62, |
| "step": 627, |
| "token_acc": 0.835820895522388, |
| "train_speed(iter/s)": 0.02243 |
| }, |
| { |
| "epoch": 0.5079878665318504, |
| "grad_norm": 3.1802284717559814, |
| "learning_rate": 5.287264036680166e-06, |
| "loss": 0.40955209732055664, |
| "memory(GiB)": 74.62, |
| "step": 628, |
| "token_acc": 0.9, |
| "train_speed(iter/s)": 0.02243 |
| }, |
| { |
| "epoch": 0.5087967644084934, |
| "grad_norm": 2.9746017456054688, |
| "learning_rate": 5.27391662817327e-06, |
| "loss": 0.4412648677825928, |
| "memory(GiB)": 74.62, |
| "step": 629, |
| "token_acc": 0.864951768488746, |
| "train_speed(iter/s)": 0.022431 |
| }, |
| { |
| "epoch": 0.5096056622851365, |
| "grad_norm": 7.995876312255859, |
| "learning_rate": 5.260567261533538e-06, |
| "loss": 0.4368639886379242, |
| "memory(GiB)": 74.62, |
| "step": 630, |
| "token_acc": 0.9067796610169492, |
| "train_speed(iter/s)": 0.022431 |
| }, |
| { |
| "epoch": 0.5104145601617796, |
| "grad_norm": 4.124439239501953, |
| "learning_rate": 5.2472160321908535e-06, |
| "loss": 0.3601537346839905, |
| "memory(GiB)": 74.62, |
| "step": 631, |
| "token_acc": 0.9384615384615385, |
| "train_speed(iter/s)": 0.022432 |
| }, |
| { |
| "epoch": 0.5112234580384226, |
| "grad_norm": 2.16349196434021, |
| "learning_rate": 5.233863035588427e-06, |
| "loss": 0.49298688769340515, |
| "memory(GiB)": 74.62, |
| "step": 632, |
| "token_acc": 0.8697318007662835, |
| "train_speed(iter/s)": 0.022432 |
| }, |
| { |
| "epoch": 0.5120323559150657, |
| "grad_norm": 3.2173032760620117, |
| "learning_rate": 5.22050836718209e-06, |
| "loss": 0.3806041479110718, |
| "memory(GiB)": 74.62, |
| "step": 633, |
| "token_acc": 0.9253112033195021, |
| "train_speed(iter/s)": 0.022433 |
| }, |
| { |
| "epoch": 0.5128412537917088, |
| "grad_norm": 2.4195048809051514, |
| "learning_rate": 5.207152122439635e-06, |
| "loss": 0.41035759449005127, |
| "memory(GiB)": 74.62, |
| "step": 634, |
| "token_acc": 0.86328125, |
| "train_speed(iter/s)": 0.022434 |
| }, |
| { |
| "epoch": 0.5136501516683518, |
| "grad_norm": 2.598662853240967, |
| "learning_rate": 5.1937943968401175e-06, |
| "loss": 0.40409672260284424, |
| "memory(GiB)": 74.62, |
| "step": 635, |
| "token_acc": 0.9050279329608939, |
| "train_speed(iter/s)": 0.022434 |
| }, |
| { |
| "epoch": 0.514459049544995, |
| "grad_norm": 3.158039093017578, |
| "learning_rate": 5.180435285873182e-06, |
| "loss": 0.4163573682308197, |
| "memory(GiB)": 74.62, |
| "step": 636, |
| "token_acc": 0.8577405857740585, |
| "train_speed(iter/s)": 0.022435 |
| }, |
| { |
| "epoch": 0.5152679474216381, |
| "grad_norm": 2.9024956226348877, |
| "learning_rate": 5.1670748850383734e-06, |
| "loss": 0.43788814544677734, |
| "memory(GiB)": 74.62, |
| "step": 637, |
| "token_acc": 0.8318181818181818, |
| "train_speed(iter/s)": 0.022435 |
| }, |
| { |
| "epoch": 0.5160768452982811, |
| "grad_norm": 5.88484001159668, |
| "learning_rate": 5.153713289844462e-06, |
| "loss": 0.43005481362342834, |
| "memory(GiB)": 74.62, |
| "step": 638, |
| "token_acc": 0.8546099290780141, |
| "train_speed(iter/s)": 0.022436 |
| }, |
| { |
| "epoch": 0.5168857431749242, |
| "grad_norm": 2.6073086261749268, |
| "learning_rate": 5.140350595808751e-06, |
| "loss": 0.441942036151886, |
| "memory(GiB)": 74.62, |
| "step": 639, |
| "token_acc": 0.7777777777777778, |
| "train_speed(iter/s)": 0.022437 |
| }, |
| { |
| "epoch": 0.5176946410515673, |
| "grad_norm": 2.607276439666748, |
| "learning_rate": 5.126986898456401e-06, |
| "loss": 0.40762656927108765, |
| "memory(GiB)": 74.62, |
| "step": 640, |
| "token_acc": 0.9018181818181819, |
| "train_speed(iter/s)": 0.022437 |
| }, |
| { |
| "epoch": 0.5185035389282103, |
| "grad_norm": 3.1285383701324463, |
| "learning_rate": 5.113622293319749e-06, |
| "loss": 0.4376784861087799, |
| "memory(GiB)": 74.62, |
| "step": 641, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022438 |
| }, |
| { |
| "epoch": 0.5193124368048534, |
| "grad_norm": 2.1132287979125977, |
| "learning_rate": 5.1002568759376134e-06, |
| "loss": 0.3872153162956238, |
| "memory(GiB)": 74.62, |
| "step": 642, |
| "token_acc": 0.8991596638655462, |
| "train_speed(iter/s)": 0.022438 |
| }, |
| { |
| "epoch": 0.5201213346814965, |
| "grad_norm": 2.294435501098633, |
| "learning_rate": 5.086890741854626e-06, |
| "loss": 0.4477715492248535, |
| "memory(GiB)": 74.62, |
| "step": 643, |
| "token_acc": 0.8445945945945946, |
| "train_speed(iter/s)": 0.022439 |
| }, |
| { |
| "epoch": 0.5209302325581395, |
| "grad_norm": 4.424786567687988, |
| "learning_rate": 5.073523986620539e-06, |
| "loss": 0.4204040765762329, |
| "memory(GiB)": 74.62, |
| "step": 644, |
| "token_acc": 0.8901960784313725, |
| "train_speed(iter/s)": 0.022439 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 6.769619941711426, |
| "learning_rate": 5.060156705789545e-06, |
| "loss": 0.433963418006897, |
| "memory(GiB)": 74.62, |
| "step": 645, |
| "token_acc": 0.8599221789883269, |
| "train_speed(iter/s)": 0.02244 |
| }, |
| { |
| "epoch": 0.5225480283114257, |
| "grad_norm": 2.297720193862915, |
| "learning_rate": 5.046788994919595e-06, |
| "loss": 0.38716062903404236, |
| "memory(GiB)": 74.62, |
| "step": 646, |
| "token_acc": 0.9004329004329005, |
| "train_speed(iter/s)": 0.022441 |
| }, |
| { |
| "epoch": 0.5233569261880687, |
| "grad_norm": 3.8223865032196045, |
| "learning_rate": 5.033420949571712e-06, |
| "loss": 0.3824414610862732, |
| "memory(GiB)": 74.62, |
| "step": 647, |
| "token_acc": 0.898989898989899, |
| "train_speed(iter/s)": 0.022441 |
| }, |
| { |
| "epoch": 0.5241658240647118, |
| "grad_norm": 2.3025248050689697, |
| "learning_rate": 5.020052665309312e-06, |
| "loss": 0.40017083287239075, |
| "memory(GiB)": 74.62, |
| "step": 648, |
| "token_acc": 0.8936170212765957, |
| "train_speed(iter/s)": 0.022442 |
| }, |
| { |
| "epoch": 0.5249747219413549, |
| "grad_norm": 1.8813366889953613, |
| "learning_rate": 5.00668423769752e-06, |
| "loss": 0.3807840347290039, |
| "memory(GiB)": 74.62, |
| "step": 649, |
| "token_acc": 0.8823529411764706, |
| "train_speed(iter/s)": 0.022442 |
| }, |
| { |
| "epoch": 0.5257836198179979, |
| "grad_norm": 2.805870532989502, |
| "learning_rate": 4.993315762302483e-06, |
| "loss": 0.4545632302761078, |
| "memory(GiB)": 74.62, |
| "step": 650, |
| "token_acc": 0.8395061728395061, |
| "train_speed(iter/s)": 0.022443 |
| }, |
| { |
| "epoch": 0.5265925176946411, |
| "grad_norm": 2.4668116569519043, |
| "learning_rate": 4.97994733469069e-06, |
| "loss": 0.39456599950790405, |
| "memory(GiB)": 74.62, |
| "step": 651, |
| "token_acc": 0.8664122137404581, |
| "train_speed(iter/s)": 0.022443 |
| }, |
| { |
| "epoch": 0.5274014155712842, |
| "grad_norm": 2.224895715713501, |
| "learning_rate": 4.96657905042829e-06, |
| "loss": 0.3933877944946289, |
| "memory(GiB)": 74.62, |
| "step": 652, |
| "token_acc": 0.8654708520179372, |
| "train_speed(iter/s)": 0.022444 |
| }, |
| { |
| "epoch": 0.5282103134479272, |
| "grad_norm": 2.5314419269561768, |
| "learning_rate": 4.9532110050804074e-06, |
| "loss": 0.36528831720352173, |
| "memory(GiB)": 74.62, |
| "step": 653, |
| "token_acc": 0.9087591240875912, |
| "train_speed(iter/s)": 0.022444 |
| }, |
| { |
| "epoch": 0.5290192113245703, |
| "grad_norm": 2.0852181911468506, |
| "learning_rate": 4.939843294210456e-06, |
| "loss": 0.39938467741012573, |
| "memory(GiB)": 74.62, |
| "step": 654, |
| "token_acc": 0.8872180451127819, |
| "train_speed(iter/s)": 0.022445 |
| }, |
| { |
| "epoch": 0.5298281092012134, |
| "grad_norm": 2.4768409729003906, |
| "learning_rate": 4.926476013379462e-06, |
| "loss": 0.4587656259536743, |
| "memory(GiB)": 74.62, |
| "step": 655, |
| "token_acc": 0.9147540983606557, |
| "train_speed(iter/s)": 0.022446 |
| }, |
| { |
| "epoch": 0.5306370070778564, |
| "grad_norm": 3.768552303314209, |
| "learning_rate": 4.9131092581453745e-06, |
| "loss": 0.4000494182109833, |
| "memory(GiB)": 74.62, |
| "step": 656, |
| "token_acc": 0.8588957055214724, |
| "train_speed(iter/s)": 0.022446 |
| }, |
| { |
| "epoch": 0.5314459049544995, |
| "grad_norm": 2.7904086112976074, |
| "learning_rate": 4.899743124062387e-06, |
| "loss": 0.42587220668792725, |
| "memory(GiB)": 74.62, |
| "step": 657, |
| "token_acc": 0.8741258741258742, |
| "train_speed(iter/s)": 0.022447 |
| }, |
| { |
| "epoch": 0.5322548028311426, |
| "grad_norm": 2.2774369716644287, |
| "learning_rate": 4.886377706680253e-06, |
| "loss": 0.38174745440483093, |
| "memory(GiB)": 74.62, |
| "step": 658, |
| "token_acc": 0.8697318007662835, |
| "train_speed(iter/s)": 0.022447 |
| }, |
| { |
| "epoch": 0.5330637007077856, |
| "grad_norm": 2.049821376800537, |
| "learning_rate": 4.873013101543599e-06, |
| "loss": 0.4340623617172241, |
| "memory(GiB)": 74.62, |
| "step": 659, |
| "token_acc": 0.8543046357615894, |
| "train_speed(iter/s)": 0.022448 |
| }, |
| { |
| "epoch": 0.5338725985844287, |
| "grad_norm": 2.252617120742798, |
| "learning_rate": 4.859649404191251e-06, |
| "loss": 0.35842257738113403, |
| "memory(GiB)": 74.62, |
| "step": 660, |
| "token_acc": 0.8933333333333333, |
| "train_speed(iter/s)": 0.022448 |
| }, |
| { |
| "epoch": 0.5346814964610718, |
| "grad_norm": 2.1607117652893066, |
| "learning_rate": 4.84628671015554e-06, |
| "loss": 0.40685737133026123, |
| "memory(GiB)": 74.62, |
| "step": 661, |
| "token_acc": 0.8737201365187713, |
| "train_speed(iter/s)": 0.022449 |
| }, |
| { |
| "epoch": 0.5354903943377148, |
| "grad_norm": 2.924506425857544, |
| "learning_rate": 4.832925114961629e-06, |
| "loss": 0.44293731451034546, |
| "memory(GiB)": 74.62, |
| "step": 662, |
| "token_acc": 0.8465608465608465, |
| "train_speed(iter/s)": 0.02245 |
| }, |
| { |
| "epoch": 0.5362992922143579, |
| "grad_norm": 3.0079522132873535, |
| "learning_rate": 4.8195647141268196e-06, |
| "loss": 0.4585626423358917, |
| "memory(GiB)": 74.62, |
| "step": 663, |
| "token_acc": 0.8599221789883269, |
| "train_speed(iter/s)": 0.02245 |
| }, |
| { |
| "epoch": 0.537108190091001, |
| "grad_norm": 2.986860990524292, |
| "learning_rate": 4.8062056031598825e-06, |
| "loss": 0.4173978567123413, |
| "memory(GiB)": 74.62, |
| "step": 664, |
| "token_acc": 0.8721804511278195, |
| "train_speed(iter/s)": 0.022451 |
| }, |
| { |
| "epoch": 0.537917087967644, |
| "grad_norm": 2.1893157958984375, |
| "learning_rate": 4.792847877560367e-06, |
| "loss": 0.40209460258483887, |
| "memory(GiB)": 74.62, |
| "step": 665, |
| "token_acc": 0.8129770992366412, |
| "train_speed(iter/s)": 0.022451 |
| }, |
| { |
| "epoch": 0.5387259858442872, |
| "grad_norm": 2.2716012001037598, |
| "learning_rate": 4.779491632817911e-06, |
| "loss": 0.4765605926513672, |
| "memory(GiB)": 74.62, |
| "step": 666, |
| "token_acc": 0.8706293706293706, |
| "train_speed(iter/s)": 0.022452 |
| }, |
| { |
| "epoch": 0.5395348837209303, |
| "grad_norm": 2.23425555229187, |
| "learning_rate": 4.766136964411576e-06, |
| "loss": 0.39718160033226013, |
| "memory(GiB)": 74.62, |
| "step": 667, |
| "token_acc": 0.8536585365853658, |
| "train_speed(iter/s)": 0.022452 |
| }, |
| { |
| "epoch": 0.5403437815975733, |
| "grad_norm": 2.647259473800659, |
| "learning_rate": 4.752783967809147e-06, |
| "loss": 0.4938986301422119, |
| "memory(GiB)": 74.62, |
| "step": 668, |
| "token_acc": 0.8101694915254237, |
| "train_speed(iter/s)": 0.022453 |
| }, |
| { |
| "epoch": 0.5411526794742164, |
| "grad_norm": 2.081202507019043, |
| "learning_rate": 4.739432738466465e-06, |
| "loss": 0.4376961588859558, |
| "memory(GiB)": 74.62, |
| "step": 669, |
| "token_acc": 0.8683274021352313, |
| "train_speed(iter/s)": 0.022453 |
| }, |
| { |
| "epoch": 0.5419615773508595, |
| "grad_norm": 2.3195981979370117, |
| "learning_rate": 4.726083371826731e-06, |
| "loss": 0.3606075644493103, |
| "memory(GiB)": 74.62, |
| "step": 670, |
| "token_acc": 0.8583690987124464, |
| "train_speed(iter/s)": 0.022454 |
| }, |
| { |
| "epoch": 0.5427704752275025, |
| "grad_norm": 2.1184582710266113, |
| "learning_rate": 4.712735963319834e-06, |
| "loss": 0.4429006576538086, |
| "memory(GiB)": 74.62, |
| "step": 671, |
| "token_acc": 0.8438818565400844, |
| "train_speed(iter/s)": 0.022454 |
| }, |
| { |
| "epoch": 0.5435793731041456, |
| "grad_norm": 2.6941933631896973, |
| "learning_rate": 4.699390608361665e-06, |
| "loss": 0.41405189037323, |
| "memory(GiB)": 74.62, |
| "step": 672, |
| "token_acc": 0.8790697674418605, |
| "train_speed(iter/s)": 0.022455 |
| }, |
| { |
| "epoch": 0.5443882709807887, |
| "grad_norm": 2.466550588607788, |
| "learning_rate": 4.686047402353433e-06, |
| "loss": 0.4570333659648895, |
| "memory(GiB)": 74.62, |
| "step": 673, |
| "token_acc": 0.8647686832740213, |
| "train_speed(iter/s)": 0.022455 |
| }, |
| { |
| "epoch": 0.5451971688574317, |
| "grad_norm": 3.1605703830718994, |
| "learning_rate": 4.672706440680989e-06, |
| "loss": 0.3652383089065552, |
| "memory(GiB)": 74.62, |
| "step": 674, |
| "token_acc": 0.8957345971563981, |
| "train_speed(iter/s)": 0.022456 |
| }, |
| { |
| "epoch": 0.5460060667340748, |
| "grad_norm": 2.547511577606201, |
| "learning_rate": 4.65936781871413e-06, |
| "loss": 0.4206015467643738, |
| "memory(GiB)": 74.62, |
| "step": 675, |
| "token_acc": 0.88671875, |
| "train_speed(iter/s)": 0.022456 |
| }, |
| { |
| "epoch": 0.5468149646107179, |
| "grad_norm": 2.2908408641815186, |
| "learning_rate": 4.64603163180594e-06, |
| "loss": 0.42101001739501953, |
| "memory(GiB)": 74.62, |
| "step": 676, |
| "token_acc": 0.9054054054054054, |
| "train_speed(iter/s)": 0.022457 |
| }, |
| { |
| "epoch": 0.547623862487361, |
| "grad_norm": 2.6179423332214355, |
| "learning_rate": 4.6326979752920905e-06, |
| "loss": 0.4017224907875061, |
| "memory(GiB)": 74.62, |
| "step": 677, |
| "token_acc": 0.8642533936651584, |
| "train_speed(iter/s)": 0.022457 |
| }, |
| { |
| "epoch": 0.548432760364004, |
| "grad_norm": 2.2148091793060303, |
| "learning_rate": 4.619366944490158e-06, |
| "loss": 0.3605102300643921, |
| "memory(GiB)": 74.62, |
| "step": 678, |
| "token_acc": 0.8927038626609443, |
| "train_speed(iter/s)": 0.022458 |
| }, |
| { |
| "epoch": 0.5492416582406471, |
| "grad_norm": 2.3841159343719482, |
| "learning_rate": 4.60603863469896e-06, |
| "loss": 0.3840959370136261, |
| "memory(GiB)": 74.62, |
| "step": 679, |
| "token_acc": 0.8226415094339623, |
| "train_speed(iter/s)": 0.022458 |
| }, |
| { |
| "epoch": 0.5500505561172901, |
| "grad_norm": 2.1525049209594727, |
| "learning_rate": 4.5927131411978536e-06, |
| "loss": 0.41845589876174927, |
| "memory(GiB)": 74.62, |
| "step": 680, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.022459 |
| }, |
| { |
| "epoch": 0.5508594539939332, |
| "grad_norm": 2.088181495666504, |
| "learning_rate": 4.579390559246066e-06, |
| "loss": 0.3538067936897278, |
| "memory(GiB)": 74.62, |
| "step": 681, |
| "token_acc": 0.8301282051282052, |
| "train_speed(iter/s)": 0.022459 |
| }, |
| { |
| "epoch": 0.5516683518705764, |
| "grad_norm": 4.506858825683594, |
| "learning_rate": 4.566070984082013e-06, |
| "loss": 0.4188098907470703, |
| "memory(GiB)": 74.62, |
| "step": 682, |
| "token_acc": 0.8808777429467085, |
| "train_speed(iter/s)": 0.02246 |
| }, |
| { |
| "epoch": 0.5524772497472195, |
| "grad_norm": 7.24404764175415, |
| "learning_rate": 4.552754510922612e-06, |
| "loss": 0.3949962258338928, |
| "memory(GiB)": 74.62, |
| "step": 683, |
| "token_acc": 0.8771929824561403, |
| "train_speed(iter/s)": 0.02246 |
| }, |
| { |
| "epoch": 0.5532861476238625, |
| "grad_norm": 2.410817861557007, |
| "learning_rate": 4.539441234962609e-06, |
| "loss": 0.36630767583847046, |
| "memory(GiB)": 74.62, |
| "step": 684, |
| "token_acc": 0.8398692810457516, |
| "train_speed(iter/s)": 0.022461 |
| }, |
| { |
| "epoch": 0.5540950455005056, |
| "grad_norm": 3.47383975982666, |
| "learning_rate": 4.526131251373892e-06, |
| "loss": 0.4143676161766052, |
| "memory(GiB)": 74.62, |
| "step": 685, |
| "token_acc": 0.8458149779735683, |
| "train_speed(iter/s)": 0.022461 |
| }, |
| { |
| "epoch": 0.5549039433771487, |
| "grad_norm": 3.989591360092163, |
| "learning_rate": 4.512824655304814e-06, |
| "loss": 0.39957284927368164, |
| "memory(GiB)": 74.62, |
| "step": 686, |
| "token_acc": 0.8847457627118644, |
| "train_speed(iter/s)": 0.022462 |
| }, |
| { |
| "epoch": 0.5557128412537917, |
| "grad_norm": 2.368927001953125, |
| "learning_rate": 4.499521541879508e-06, |
| "loss": 0.3500638008117676, |
| "memory(GiB)": 74.62, |
| "step": 687, |
| "token_acc": 0.8498402555910544, |
| "train_speed(iter/s)": 0.022462 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 2.1441452503204346, |
| "learning_rate": 4.48622200619722e-06, |
| "loss": 0.3939352035522461, |
| "memory(GiB)": 74.62, |
| "step": 688, |
| "token_acc": 0.9003831417624522, |
| "train_speed(iter/s)": 0.022463 |
| }, |
| { |
| "epoch": 0.5573306370070779, |
| "grad_norm": 2.4296200275421143, |
| "learning_rate": 4.472926143331612e-06, |
| "loss": 0.4165255129337311, |
| "memory(GiB)": 74.62, |
| "step": 689, |
| "token_acc": 0.8741935483870967, |
| "train_speed(iter/s)": 0.022463 |
| }, |
| { |
| "epoch": 0.5581395348837209, |
| "grad_norm": 2.0704715251922607, |
| "learning_rate": 4.459634048330089e-06, |
| "loss": 0.3778902292251587, |
| "memory(GiB)": 74.62, |
| "step": 690, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022464 |
| }, |
| { |
| "epoch": 0.558948432760364, |
| "grad_norm": 1.9288545846939087, |
| "learning_rate": 4.44634581621313e-06, |
| "loss": 0.3621513843536377, |
| "memory(GiB)": 74.62, |
| "step": 691, |
| "token_acc": 0.8803827751196173, |
| "train_speed(iter/s)": 0.022464 |
| }, |
| { |
| "epoch": 0.5597573306370071, |
| "grad_norm": 2.8786773681640625, |
| "learning_rate": 4.433061541973591e-06, |
| "loss": 0.46439093351364136, |
| "memory(GiB)": 74.62, |
| "step": 692, |
| "token_acc": 0.8423076923076923, |
| "train_speed(iter/s)": 0.022465 |
| }, |
| { |
| "epoch": 0.5605662285136501, |
| "grad_norm": 7.472469329833984, |
| "learning_rate": 4.419781320576037e-06, |
| "loss": 0.3596475124359131, |
| "memory(GiB)": 74.62, |
| "step": 693, |
| "token_acc": 0.8888888888888888, |
| "train_speed(iter/s)": 0.022465 |
| }, |
| { |
| "epoch": 0.5613751263902932, |
| "grad_norm": 2.2149417400360107, |
| "learning_rate": 4.406505246956064e-06, |
| "loss": 0.39849790930747986, |
| "memory(GiB)": 74.62, |
| "step": 694, |
| "token_acc": 0.861904761904762, |
| "train_speed(iter/s)": 0.022466 |
| }, |
| { |
| "epoch": 0.5621840242669363, |
| "grad_norm": 2.573707342147827, |
| "learning_rate": 4.393233416019611e-06, |
| "loss": 0.33962416648864746, |
| "memory(GiB)": 74.62, |
| "step": 695, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022466 |
| }, |
| { |
| "epoch": 0.5629929221435793, |
| "grad_norm": 2.2001919746398926, |
| "learning_rate": 4.379965922642294e-06, |
| "loss": 0.43496495485305786, |
| "memory(GiB)": 74.62, |
| "step": 696, |
| "token_acc": 0.8486842105263158, |
| "train_speed(iter/s)": 0.022467 |
| }, |
| { |
| "epoch": 0.5638018200202225, |
| "grad_norm": 1.9872112274169922, |
| "learning_rate": 4.366702861668717e-06, |
| "loss": 0.3653467297554016, |
| "memory(GiB)": 74.62, |
| "step": 697, |
| "token_acc": 0.8991935483870968, |
| "train_speed(iter/s)": 0.022467 |
| }, |
| { |
| "epoch": 0.5646107178968656, |
| "grad_norm": 2.000946521759033, |
| "learning_rate": 4.353444327911797e-06, |
| "loss": 0.4383889138698578, |
| "memory(GiB)": 74.62, |
| "step": 698, |
| "token_acc": 0.8419243986254296, |
| "train_speed(iter/s)": 0.022468 |
| }, |
| { |
| "epoch": 0.5654196157735086, |
| "grad_norm": 2.3316028118133545, |
| "learning_rate": 4.3401904161520944e-06, |
| "loss": 0.4090406000614166, |
| "memory(GiB)": 74.62, |
| "step": 699, |
| "token_acc": 0.8454106280193237, |
| "train_speed(iter/s)": 0.022468 |
| }, |
| { |
| "epoch": 0.5662285136501517, |
| "grad_norm": 2.3193917274475098, |
| "learning_rate": 4.3269412211371215e-06, |
| "loss": 0.40262287855148315, |
| "memory(GiB)": 74.62, |
| "step": 700, |
| "token_acc": 0.8765432098765432, |
| "train_speed(iter/s)": 0.022469 |
| }, |
| { |
| "epoch": 0.5670374115267948, |
| "grad_norm": 2.7743844985961914, |
| "learning_rate": 4.313696837580677e-06, |
| "loss": 0.40288880467414856, |
| "memory(GiB)": 74.62, |
| "step": 701, |
| "token_acc": 0.8993055555555556, |
| "train_speed(iter/s)": 0.022469 |
| }, |
| { |
| "epoch": 0.5678463094034378, |
| "grad_norm": 1.9505183696746826, |
| "learning_rate": 4.300457360162158e-06, |
| "loss": 0.34644150733947754, |
| "memory(GiB)": 74.62, |
| "step": 702, |
| "token_acc": 0.8622222222222222, |
| "train_speed(iter/s)": 0.02247 |
| }, |
| { |
| "epoch": 0.5686552072800809, |
| "grad_norm": 2.183720588684082, |
| "learning_rate": 4.287222883525897e-06, |
| "loss": 0.429502934217453, |
| "memory(GiB)": 74.62, |
| "step": 703, |
| "token_acc": 0.8661417322834646, |
| "train_speed(iter/s)": 0.022471 |
| }, |
| { |
| "epoch": 0.569464105156724, |
| "grad_norm": 2.0480737686157227, |
| "learning_rate": 4.273993502280476e-06, |
| "loss": 0.3910590708255768, |
| "memory(GiB)": 74.62, |
| "step": 704, |
| "token_acc": 0.8404255319148937, |
| "train_speed(iter/s)": 0.022471 |
| }, |
| { |
| "epoch": 0.570273003033367, |
| "grad_norm": 2.1780683994293213, |
| "learning_rate": 4.2607693109980435e-06, |
| "loss": 0.45382118225097656, |
| "memory(GiB)": 74.62, |
| "step": 705, |
| "token_acc": 0.8862745098039215, |
| "train_speed(iter/s)": 0.022472 |
| }, |
| { |
| "epoch": 0.5710819009100101, |
| "grad_norm": 2.0752146244049072, |
| "learning_rate": 4.247550404213661e-06, |
| "loss": 0.39520663022994995, |
| "memory(GiB)": 74.62, |
| "step": 706, |
| "token_acc": 0.8012422360248447, |
| "train_speed(iter/s)": 0.022472 |
| }, |
| { |
| "epoch": 0.5718907987866532, |
| "grad_norm": 2.0002593994140625, |
| "learning_rate": 4.2343368764246005e-06, |
| "loss": 0.4130653738975525, |
| "memory(GiB)": 74.62, |
| "step": 707, |
| "token_acc": 0.862453531598513, |
| "train_speed(iter/s)": 0.022473 |
| }, |
| { |
| "epoch": 0.5726996966632962, |
| "grad_norm": 2.031238317489624, |
| "learning_rate": 4.221128822089687e-06, |
| "loss": 0.36960452795028687, |
| "memory(GiB)": 74.62, |
| "step": 708, |
| "token_acc": 0.8981818181818182, |
| "train_speed(iter/s)": 0.022473 |
| }, |
| { |
| "epoch": 0.5735085945399393, |
| "grad_norm": 2.3516478538513184, |
| "learning_rate": 4.207926335628617e-06, |
| "loss": 0.43690210580825806, |
| "memory(GiB)": 74.62, |
| "step": 709, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.022474 |
| }, |
| { |
| "epoch": 0.5743174924165824, |
| "grad_norm": 2.5592732429504395, |
| "learning_rate": 4.194729511421285e-06, |
| "loss": 0.3793370723724365, |
| "memory(GiB)": 74.62, |
| "step": 710, |
| "token_acc": 0.8393574297188755, |
| "train_speed(iter/s)": 0.022474 |
| }, |
| { |
| "epoch": 0.5751263902932254, |
| "grad_norm": 2.097623825073242, |
| "learning_rate": 4.181538443807109e-06, |
| "loss": 0.39188504219055176, |
| "memory(GiB)": 74.62, |
| "step": 711, |
| "token_acc": 0.9033613445378151, |
| "train_speed(iter/s)": 0.022475 |
| }, |
| { |
| "epoch": 0.5759352881698686, |
| "grad_norm": 1.9303717613220215, |
| "learning_rate": 4.1683532270843505e-06, |
| "loss": 0.4174485504627228, |
| "memory(GiB)": 74.62, |
| "step": 712, |
| "token_acc": 0.8346774193548387, |
| "train_speed(iter/s)": 0.022475 |
| }, |
| { |
| "epoch": 0.5767441860465117, |
| "grad_norm": 2.5618019104003906, |
| "learning_rate": 4.15517395550945e-06, |
| "loss": 0.3718748390674591, |
| "memory(GiB)": 74.62, |
| "step": 713, |
| "token_acc": 0.8719723183391004, |
| "train_speed(iter/s)": 0.022475 |
| }, |
| { |
| "epoch": 0.5775530839231547, |
| "grad_norm": 2.322850227355957, |
| "learning_rate": 4.1420007232963435e-06, |
| "loss": 0.3762381970882416, |
| "memory(GiB)": 74.62, |
| "step": 714, |
| "token_acc": 0.8874458874458875, |
| "train_speed(iter/s)": 0.022476 |
| }, |
| { |
| "epoch": 0.5783619817997978, |
| "grad_norm": 2.1827359199523926, |
| "learning_rate": 4.1288336246158e-06, |
| "loss": 0.40151140093803406, |
| "memory(GiB)": 74.62, |
| "step": 715, |
| "token_acc": 0.8538461538461538, |
| "train_speed(iter/s)": 0.022476 |
| }, |
| { |
| "epoch": 0.5791708796764409, |
| "grad_norm": 2.6647045612335205, |
| "learning_rate": 4.115672753594739e-06, |
| "loss": 0.34364283084869385, |
| "memory(GiB)": 74.62, |
| "step": 716, |
| "token_acc": 0.903448275862069, |
| "train_speed(iter/s)": 0.022477 |
| }, |
| { |
| "epoch": 0.5799797775530839, |
| "grad_norm": 2.086578845977783, |
| "learning_rate": 4.102518204315555e-06, |
| "loss": 0.4202456474304199, |
| "memory(GiB)": 74.62, |
| "step": 717, |
| "token_acc": 0.8202764976958525, |
| "train_speed(iter/s)": 0.022477 |
| }, |
| { |
| "epoch": 0.580788675429727, |
| "grad_norm": 1.952487826347351, |
| "learning_rate": 4.089370070815463e-06, |
| "loss": 0.37721166014671326, |
| "memory(GiB)": 74.62, |
| "step": 718, |
| "token_acc": 0.878419452887538, |
| "train_speed(iter/s)": 0.022478 |
| }, |
| { |
| "epoch": 0.5815975733063701, |
| "grad_norm": 1.9967212677001953, |
| "learning_rate": 4.0762284470858e-06, |
| "loss": 0.42397576570510864, |
| "memory(GiB)": 74.62, |
| "step": 719, |
| "token_acc": 0.8559322033898306, |
| "train_speed(iter/s)": 0.022479 |
| }, |
| { |
| "epoch": 0.5824064711830131, |
| "grad_norm": 2.281806707382202, |
| "learning_rate": 4.063093427071376e-06, |
| "loss": 0.3868061900138855, |
| "memory(GiB)": 74.62, |
| "step": 720, |
| "token_acc": 0.9313304721030042, |
| "train_speed(iter/s)": 0.022479 |
| }, |
| { |
| "epoch": 0.5832153690596562, |
| "grad_norm": 2.5271997451782227, |
| "learning_rate": 4.049965104669795e-06, |
| "loss": 0.4714341163635254, |
| "memory(GiB)": 74.62, |
| "step": 721, |
| "token_acc": 0.8309859154929577, |
| "train_speed(iter/s)": 0.022479 |
| }, |
| { |
| "epoch": 0.5840242669362993, |
| "grad_norm": 2.1930084228515625, |
| "learning_rate": 4.036843573730774e-06, |
| "loss": 0.4007885456085205, |
| "memory(GiB)": 74.62, |
| "step": 722, |
| "token_acc": 0.9045643153526971, |
| "train_speed(iter/s)": 0.02248 |
| }, |
| { |
| "epoch": 0.5848331648129423, |
| "grad_norm": 2.2075302600860596, |
| "learning_rate": 4.023728928055486e-06, |
| "loss": 0.4345509707927704, |
| "memory(GiB)": 74.62, |
| "step": 723, |
| "token_acc": 0.8504672897196262, |
| "train_speed(iter/s)": 0.02248 |
| }, |
| { |
| "epoch": 0.5856420626895854, |
| "grad_norm": 2.093959331512451, |
| "learning_rate": 4.0106212613958805e-06, |
| "loss": 0.39234721660614014, |
| "memory(GiB)": 74.62, |
| "step": 724, |
| "token_acc": 0.8838174273858921, |
| "train_speed(iter/s)": 0.022481 |
| }, |
| { |
| "epoch": 0.5864509605662285, |
| "grad_norm": 2.8163022994995117, |
| "learning_rate": 3.99752066745402e-06, |
| "loss": 0.377105712890625, |
| "memory(GiB)": 74.62, |
| "step": 725, |
| "token_acc": 0.8704318936877077, |
| "train_speed(iter/s)": 0.022481 |
| }, |
| { |
| "epoch": 0.5872598584428715, |
| "grad_norm": 3.310258626937866, |
| "learning_rate": 3.984427239881404e-06, |
| "loss": 0.33992162346839905, |
| "memory(GiB)": 74.62, |
| "step": 726, |
| "token_acc": 0.8781725888324873, |
| "train_speed(iter/s)": 0.022482 |
| }, |
| { |
| "epoch": 0.5880687563195146, |
| "grad_norm": 2.1290695667266846, |
| "learning_rate": 3.971341072278302e-06, |
| "loss": 0.3612005114555359, |
| "memory(GiB)": 74.62, |
| "step": 727, |
| "token_acc": 0.8576642335766423, |
| "train_speed(iter/s)": 0.022482 |
| }, |
| { |
| "epoch": 0.5888776541961578, |
| "grad_norm": 2.370741844177246, |
| "learning_rate": 3.958262258193089e-06, |
| "loss": 0.39483344554901123, |
| "memory(GiB)": 74.62, |
| "step": 728, |
| "token_acc": 0.8625954198473282, |
| "train_speed(iter/s)": 0.022482 |
| }, |
| { |
| "epoch": 0.5896865520728009, |
| "grad_norm": 1.9654161930084229, |
| "learning_rate": 3.9451908911215645e-06, |
| "loss": 0.3784998059272766, |
| "memory(GiB)": 74.62, |
| "step": 729, |
| "token_acc": 0.8663101604278075, |
| "train_speed(iter/s)": 0.022483 |
| }, |
| { |
| "epoch": 0.5904954499494439, |
| "grad_norm": 2.5404610633850098, |
| "learning_rate": 3.9321270645062995e-06, |
| "loss": 0.4317411780357361, |
| "memory(GiB)": 74.62, |
| "step": 730, |
| "token_acc": 0.8413793103448276, |
| "train_speed(iter/s)": 0.022484 |
| }, |
| { |
| "epoch": 0.591304347826087, |
| "grad_norm": 1.932789921760559, |
| "learning_rate": 3.919070871735956e-06, |
| "loss": 0.3979855179786682, |
| "memory(GiB)": 74.62, |
| "step": 731, |
| "token_acc": 0.8699551569506726, |
| "train_speed(iter/s)": 0.022484 |
| }, |
| { |
| "epoch": 0.59211324570273, |
| "grad_norm": 2.322033643722534, |
| "learning_rate": 3.906022406144625e-06, |
| "loss": 0.4147607088088989, |
| "memory(GiB)": 74.62, |
| "step": 732, |
| "token_acc": 0.8712871287128713, |
| "train_speed(iter/s)": 0.022484 |
| }, |
| { |
| "epoch": 0.5929221435793731, |
| "grad_norm": 2.0661261081695557, |
| "learning_rate": 3.892981761011164e-06, |
| "loss": 0.3968489170074463, |
| "memory(GiB)": 74.62, |
| "step": 733, |
| "token_acc": 0.8418367346938775, |
| "train_speed(iter/s)": 0.022485 |
| }, |
| { |
| "epoch": 0.5937310414560162, |
| "grad_norm": 1.8793938159942627, |
| "learning_rate": 3.8799490295585155e-06, |
| "loss": 0.34254151582717896, |
| "memory(GiB)": 74.62, |
| "step": 734, |
| "token_acc": 0.9105263157894737, |
| "train_speed(iter/s)": 0.022485 |
| }, |
| { |
| "epoch": 0.5945399393326593, |
| "grad_norm": 3.2460901737213135, |
| "learning_rate": 3.866924304953059e-06, |
| "loss": 0.4647367298603058, |
| "memory(GiB)": 74.62, |
| "step": 735, |
| "token_acc": 0.864, |
| "train_speed(iter/s)": 0.022486 |
| }, |
| { |
| "epoch": 0.5953488372093023, |
| "grad_norm": 2.1490590572357178, |
| "learning_rate": 3.8539076803039285e-06, |
| "loss": 0.4941931962966919, |
| "memory(GiB)": 74.62, |
| "step": 736, |
| "token_acc": 0.843065693430657, |
| "train_speed(iter/s)": 0.022486 |
| }, |
| { |
| "epoch": 0.5961577350859454, |
| "grad_norm": 2.9426324367523193, |
| "learning_rate": 3.840899248662358e-06, |
| "loss": 0.43801772594451904, |
| "memory(GiB)": 74.62, |
| "step": 737, |
| "token_acc": 0.7985611510791367, |
| "train_speed(iter/s)": 0.022487 |
| }, |
| { |
| "epoch": 0.5969666329625885, |
| "grad_norm": 1.8307894468307495, |
| "learning_rate": 3.827899103021017e-06, |
| "loss": 0.36532309651374817, |
| "memory(GiB)": 74.62, |
| "step": 738, |
| "token_acc": 0.8484848484848485, |
| "train_speed(iter/s)": 0.022487 |
| }, |
| { |
| "epoch": 0.5977755308392315, |
| "grad_norm": 1.6826763153076172, |
| "learning_rate": 3.814907336313329e-06, |
| "loss": 0.3788911998271942, |
| "memory(GiB)": 74.62, |
| "step": 739, |
| "token_acc": 0.8656716417910447, |
| "train_speed(iter/s)": 0.022488 |
| }, |
| { |
| "epoch": 0.5985844287158746, |
| "grad_norm": 3.5640852451324463, |
| "learning_rate": 3.8019240414128335e-06, |
| "loss": 0.3946545720100403, |
| "memory(GiB)": 74.62, |
| "step": 740, |
| "token_acc": 0.8245614035087719, |
| "train_speed(iter/s)": 0.022488 |
| }, |
| { |
| "epoch": 0.5993933265925177, |
| "grad_norm": 3.612060785293579, |
| "learning_rate": 3.7889493111324977e-06, |
| "loss": 0.4639260172843933, |
| "memory(GiB)": 74.62, |
| "step": 741, |
| "token_acc": 0.8678571428571429, |
| "train_speed(iter/s)": 0.022489 |
| }, |
| { |
| "epoch": 0.6002022244691607, |
| "grad_norm": 2.10774564743042, |
| "learning_rate": 3.77598323822407e-06, |
| "loss": 0.3779371380805969, |
| "memory(GiB)": 74.62, |
| "step": 742, |
| "token_acc": 0.8962264150943396, |
| "train_speed(iter/s)": 0.022489 |
| }, |
| { |
| "epoch": 0.6010111223458039, |
| "grad_norm": 2.0632522106170654, |
| "learning_rate": 3.763025915377403e-06, |
| "loss": 0.4415694773197174, |
| "memory(GiB)": 74.62, |
| "step": 743, |
| "token_acc": 0.8744939271255061, |
| "train_speed(iter/s)": 0.02249 |
| }, |
| { |
| "epoch": 0.601820020222447, |
| "grad_norm": 2.2084765434265137, |
| "learning_rate": 3.7500774352198066e-06, |
| "loss": 0.4385090470314026, |
| "memory(GiB)": 74.62, |
| "step": 744, |
| "token_acc": 0.8181818181818182, |
| "train_speed(iter/s)": 0.02249 |
| }, |
| { |
| "epoch": 0.60262891809909, |
| "grad_norm": 3.2526354789733887, |
| "learning_rate": 3.7371378903153747e-06, |
| "loss": 0.36739417910575867, |
| "memory(GiB)": 74.62, |
| "step": 745, |
| "token_acc": 0.8622047244094488, |
| "train_speed(iter/s)": 0.022491 |
| }, |
| { |
| "epoch": 0.6034378159757331, |
| "grad_norm": 2.1862826347351074, |
| "learning_rate": 3.7242073731643212e-06, |
| "loss": 0.39445218443870544, |
| "memory(GiB)": 74.62, |
| "step": 746, |
| "token_acc": 0.9465648854961832, |
| "train_speed(iter/s)": 0.022491 |
| }, |
| { |
| "epoch": 0.6042467138523762, |
| "grad_norm": 1.964879035949707, |
| "learning_rate": 3.711285976202331e-06, |
| "loss": 0.4600139558315277, |
| "memory(GiB)": 74.62, |
| "step": 747, |
| "token_acc": 0.8509803921568627, |
| "train_speed(iter/s)": 0.022491 |
| }, |
| { |
| "epoch": 0.6050556117290192, |
| "grad_norm": 2.6029324531555176, |
| "learning_rate": 3.6983737917998858e-06, |
| "loss": 0.38224440813064575, |
| "memory(GiB)": 74.62, |
| "step": 748, |
| "token_acc": 0.8801498127340824, |
| "train_speed(iter/s)": 0.022492 |
| }, |
| { |
| "epoch": 0.6058645096056623, |
| "grad_norm": 2.0742950439453125, |
| "learning_rate": 3.685470912261615e-06, |
| "loss": 0.3933752775192261, |
| "memory(GiB)": 74.62, |
| "step": 749, |
| "token_acc": 0.8681318681318682, |
| "train_speed(iter/s)": 0.022492 |
| }, |
| { |
| "epoch": 0.6066734074823054, |
| "grad_norm": 3.2914257049560547, |
| "learning_rate": 3.672577429825629e-06, |
| "loss": 0.39733976125717163, |
| "memory(GiB)": 74.62, |
| "step": 750, |
| "token_acc": 0.9066147859922179, |
| "train_speed(iter/s)": 0.022493 |
| }, |
| { |
| "epoch": 0.6074823053589484, |
| "grad_norm": 1.9089115858078003, |
| "learning_rate": 3.659693436662859e-06, |
| "loss": 0.40482792258262634, |
| "memory(GiB)": 74.62, |
| "step": 751, |
| "token_acc": 0.8535564853556485, |
| "train_speed(iter/s)": 0.022493 |
| }, |
| { |
| "epoch": 0.6082912032355915, |
| "grad_norm": 3.0140185356140137, |
| "learning_rate": 3.6468190248764063e-06, |
| "loss": 0.5314335823059082, |
| "memory(GiB)": 74.62, |
| "step": 752, |
| "token_acc": 0.8707865168539326, |
| "train_speed(iter/s)": 0.022493 |
| }, |
| { |
| "epoch": 0.6091001011122346, |
| "grad_norm": 2.3016703128814697, |
| "learning_rate": 3.6339542865008724e-06, |
| "loss": 0.3704250454902649, |
| "memory(GiB)": 74.62, |
| "step": 753, |
| "token_acc": 0.8878923766816144, |
| "train_speed(iter/s)": 0.022494 |
| }, |
| { |
| "epoch": 0.6099089989888776, |
| "grad_norm": 1.9638766050338745, |
| "learning_rate": 3.6210993135017115e-06, |
| "loss": 0.4164350628852844, |
| "memory(GiB)": 74.62, |
| "step": 754, |
| "token_acc": 0.8492462311557789, |
| "train_speed(iter/s)": 0.022494 |
| }, |
| { |
| "epoch": 0.6107178968655207, |
| "grad_norm": 2.505688428878784, |
| "learning_rate": 3.608254197774567e-06, |
| "loss": 0.40423935651779175, |
| "memory(GiB)": 74.62, |
| "step": 755, |
| "token_acc": 0.8679245283018868, |
| "train_speed(iter/s)": 0.022495 |
| }, |
| { |
| "epoch": 0.6115267947421638, |
| "grad_norm": 2.152834415435791, |
| "learning_rate": 3.595419031144615e-06, |
| "loss": 0.3799169957637787, |
| "memory(GiB)": 74.62, |
| "step": 756, |
| "token_acc": 0.8670520231213873, |
| "train_speed(iter/s)": 0.022495 |
| }, |
| { |
| "epoch": 0.6123356926188068, |
| "grad_norm": 2.534213066101074, |
| "learning_rate": 3.582593905365912e-06, |
| "loss": 0.4056301414966583, |
| "memory(GiB)": 74.62, |
| "step": 757, |
| "token_acc": 0.855072463768116, |
| "train_speed(iter/s)": 0.022496 |
| }, |
| { |
| "epoch": 0.61314459049545, |
| "grad_norm": 1.9786441326141357, |
| "learning_rate": 3.56977891212073e-06, |
| "loss": 0.4082239270210266, |
| "memory(GiB)": 74.62, |
| "step": 758, |
| "token_acc": 0.8888888888888888, |
| "train_speed(iter/s)": 0.022496 |
| }, |
| { |
| "epoch": 0.6139534883720931, |
| "grad_norm": 1.8767694234848022, |
| "learning_rate": 3.5569741430189163e-06, |
| "loss": 0.39076924324035645, |
| "memory(GiB)": 74.62, |
| "step": 759, |
| "token_acc": 0.8728070175438597, |
| "train_speed(iter/s)": 0.022496 |
| }, |
| { |
| "epoch": 0.6147623862487361, |
| "grad_norm": 2.0986220836639404, |
| "learning_rate": 3.5441796895972203e-06, |
| "loss": 0.4426667094230652, |
| "memory(GiB)": 74.62, |
| "step": 760, |
| "token_acc": 0.8986486486486487, |
| "train_speed(iter/s)": 0.022497 |
| }, |
| { |
| "epoch": 0.6155712841253792, |
| "grad_norm": 2.349647045135498, |
| "learning_rate": 3.5313956433186535e-06, |
| "loss": 0.3979909121990204, |
| "memory(GiB)": 74.62, |
| "step": 761, |
| "token_acc": 0.8770949720670391, |
| "train_speed(iter/s)": 0.022497 |
| }, |
| { |
| "epoch": 0.6163801820020223, |
| "grad_norm": 2.267604351043701, |
| "learning_rate": 3.518622095571831e-06, |
| "loss": 0.3654158413410187, |
| "memory(GiB)": 74.62, |
| "step": 762, |
| "token_acc": 0.8448979591836735, |
| "train_speed(iter/s)": 0.022497 |
| }, |
| { |
| "epoch": 0.6171890798786653, |
| "grad_norm": 2.626412868499756, |
| "learning_rate": 3.505859137670313e-06, |
| "loss": 0.3898380398750305, |
| "memory(GiB)": 74.62, |
| "step": 763, |
| "token_acc": 0.860655737704918, |
| "train_speed(iter/s)": 0.022497 |
| }, |
| { |
| "epoch": 0.6179979777553084, |
| "grad_norm": 2.134931802749634, |
| "learning_rate": 3.4931068608519626e-06, |
| "loss": 0.45385637879371643, |
| "memory(GiB)": 74.62, |
| "step": 764, |
| "token_acc": 0.8448275862068966, |
| "train_speed(iter/s)": 0.022498 |
| }, |
| { |
| "epoch": 0.6188068756319515, |
| "grad_norm": 2.1175262928009033, |
| "learning_rate": 3.4803653562782807e-06, |
| "loss": 0.44239288568496704, |
| "memory(GiB)": 74.62, |
| "step": 765, |
| "token_acc": 0.8226600985221675, |
| "train_speed(iter/s)": 0.022498 |
| }, |
| { |
| "epoch": 0.6196157735085945, |
| "grad_norm": 1.9157018661499023, |
| "learning_rate": 3.4676347150337673e-06, |
| "loss": 0.37729379534721375, |
| "memory(GiB)": 74.62, |
| "step": 766, |
| "token_acc": 0.8744588744588745, |
| "train_speed(iter/s)": 0.022499 |
| }, |
| { |
| "epoch": 0.6204246713852376, |
| "grad_norm": 2.0690548419952393, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.40089553594589233, |
| "memory(GiB)": 74.62, |
| "step": 767, |
| "token_acc": 0.9056603773584906, |
| "train_speed(iter/s)": 0.022499 |
| }, |
| { |
| "epoch": 0.6212335692618807, |
| "grad_norm": 2.284152030944824, |
| "learning_rate": 3.442206386481297e-06, |
| "loss": 0.37923118472099304, |
| "memory(GiB)": 74.62, |
| "step": 768, |
| "token_acc": 0.8286713286713286, |
| "train_speed(iter/s)": 0.022499 |
| }, |
| { |
| "epoch": 0.6220424671385237, |
| "grad_norm": 3.0014827251434326, |
| "learning_rate": 3.429508880951444e-06, |
| "loss": 0.38093435764312744, |
| "memory(GiB)": 74.62, |
| "step": 769, |
| "token_acc": 0.8698224852071006, |
| "train_speed(iter/s)": 0.0225 |
| }, |
| { |
| "epoch": 0.6228513650151668, |
| "grad_norm": 2.2891621589660645, |
| "learning_rate": 3.4168226023056638e-06, |
| "loss": 0.4511076509952545, |
| "memory(GiB)": 74.62, |
| "step": 770, |
| "token_acc": 0.7976190476190477, |
| "train_speed(iter/s)": 0.0225 |
| }, |
| { |
| "epoch": 0.6236602628918099, |
| "grad_norm": 2.116448163986206, |
| "learning_rate": 3.4041476412336672e-06, |
| "loss": 0.49026528000831604, |
| "memory(GiB)": 74.62, |
| "step": 771, |
| "token_acc": 0.840625, |
| "train_speed(iter/s)": 0.022501 |
| }, |
| { |
| "epoch": 0.6244691607684529, |
| "grad_norm": 1.6491224765777588, |
| "learning_rate": 3.391484088344257e-06, |
| "loss": 0.3303212523460388, |
| "memory(GiB)": 74.62, |
| "step": 772, |
| "token_acc": 0.8959276018099548, |
| "train_speed(iter/s)": 0.022501 |
| }, |
| { |
| "epoch": 0.625278058645096, |
| "grad_norm": 2.458468198776245, |
| "learning_rate": 3.3788320341646764e-06, |
| "loss": 0.37041348218917847, |
| "memory(GiB)": 74.62, |
| "step": 773, |
| "token_acc": 0.8658008658008658, |
| "train_speed(iter/s)": 0.022501 |
| }, |
| { |
| "epoch": 0.6260869565217392, |
| "grad_norm": 1.9400595426559448, |
| "learning_rate": 3.3661915691399814e-06, |
| "loss": 0.40716874599456787, |
| "memory(GiB)": 74.62, |
| "step": 774, |
| "token_acc": 0.8565400843881856, |
| "train_speed(iter/s)": 0.022502 |
| }, |
| { |
| "epoch": 0.6268958543983822, |
| "grad_norm": 2.076422691345215, |
| "learning_rate": 3.3535627836323683e-06, |
| "loss": 0.4028838276863098, |
| "memory(GiB)": 74.62, |
| "step": 775, |
| "token_acc": 0.8844444444444445, |
| "train_speed(iter/s)": 0.022502 |
| }, |
| { |
| "epoch": 0.6277047522750253, |
| "grad_norm": 2.1392087936401367, |
| "learning_rate": 3.340945767920547e-06, |
| "loss": 0.3876573443412781, |
| "memory(GiB)": 74.62, |
| "step": 776, |
| "token_acc": 0.9047619047619048, |
| "train_speed(iter/s)": 0.022502 |
| }, |
| { |
| "epoch": 0.6285136501516684, |
| "grad_norm": 1.980198860168457, |
| "learning_rate": 3.328340612199091e-06, |
| "loss": 0.3929121494293213, |
| "memory(GiB)": 74.62, |
| "step": 777, |
| "token_acc": 0.8625, |
| "train_speed(iter/s)": 0.022503 |
| }, |
| { |
| "epoch": 0.6293225480283114, |
| "grad_norm": 2.5135369300842285, |
| "learning_rate": 3.315747406577787e-06, |
| "loss": 0.4506552815437317, |
| "memory(GiB)": 74.62, |
| "step": 778, |
| "token_acc": 0.8625954198473282, |
| "train_speed(iter/s)": 0.022503 |
| }, |
| { |
| "epoch": 0.6301314459049545, |
| "grad_norm": 2.9397776126861572, |
| "learning_rate": 3.303166241080996e-06, |
| "loss": 0.366382896900177, |
| "memory(GiB)": 74.62, |
| "step": 779, |
| "token_acc": 0.9363636363636364, |
| "train_speed(iter/s)": 0.022504 |
| }, |
| { |
| "epoch": 0.6309403437815976, |
| "grad_norm": 2.5433013439178467, |
| "learning_rate": 3.290597205647009e-06, |
| "loss": 0.39890724420547485, |
| "memory(GiB)": 74.62, |
| "step": 780, |
| "token_acc": 0.8835341365461847, |
| "train_speed(iter/s)": 0.022504 |
| }, |
| { |
| "epoch": 0.6317492416582406, |
| "grad_norm": 1.8281358480453491, |
| "learning_rate": 3.2780403901274026e-06, |
| "loss": 0.3230600953102112, |
| "memory(GiB)": 74.62, |
| "step": 781, |
| "token_acc": 0.8682170542635659, |
| "train_speed(iter/s)": 0.022504 |
| }, |
| { |
| "epoch": 0.6325581395348837, |
| "grad_norm": 2.3992929458618164, |
| "learning_rate": 3.265495884286397e-06, |
| "loss": 0.3860858082771301, |
| "memory(GiB)": 74.62, |
| "step": 782, |
| "token_acc": 0.8675213675213675, |
| "train_speed(iter/s)": 0.022505 |
| }, |
| { |
| "epoch": 0.6333670374115268, |
| "grad_norm": 2.3929519653320312, |
| "learning_rate": 3.2529637778002177e-06, |
| "loss": 0.41789501905441284, |
| "memory(GiB)": 74.62, |
| "step": 783, |
| "token_acc": 0.8291666666666667, |
| "train_speed(iter/s)": 0.022505 |
| }, |
| { |
| "epoch": 0.6341759352881698, |
| "grad_norm": 2.3482816219329834, |
| "learning_rate": 3.2404441602564507e-06, |
| "loss": 0.42455971240997314, |
| "memory(GiB)": 74.62, |
| "step": 784, |
| "token_acc": 0.8830188679245283, |
| "train_speed(iter/s)": 0.022506 |
| }, |
| { |
| "epoch": 0.6349848331648129, |
| "grad_norm": 1.525108814239502, |
| "learning_rate": 3.2279371211533976e-06, |
| "loss": 0.3243609070777893, |
| "memory(GiB)": 74.62, |
| "step": 785, |
| "token_acc": 0.8814229249011858, |
| "train_speed(iter/s)": 0.022506 |
| }, |
| { |
| "epoch": 0.635793731041456, |
| "grad_norm": 2.330397367477417, |
| "learning_rate": 3.2154427498994517e-06, |
| "loss": 0.424887478351593, |
| "memory(GiB)": 74.62, |
| "step": 786, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022506 |
| }, |
| { |
| "epoch": 0.636602628918099, |
| "grad_norm": 1.9202159643173218, |
| "learning_rate": 3.202961135812437e-06, |
| "loss": 0.3225609064102173, |
| "memory(GiB)": 74.62, |
| "step": 787, |
| "token_acc": 0.9056603773584906, |
| "train_speed(iter/s)": 0.022507 |
| }, |
| { |
| "epoch": 0.6374115267947421, |
| "grad_norm": 1.9144957065582275, |
| "learning_rate": 3.1904923681189883e-06, |
| "loss": 0.3496546745300293, |
| "memory(GiB)": 74.62, |
| "step": 788, |
| "token_acc": 0.903010033444816, |
| "train_speed(iter/s)": 0.022507 |
| }, |
| { |
| "epoch": 0.6382204246713853, |
| "grad_norm": 2.0034921169281006, |
| "learning_rate": 3.1780365359539043e-06, |
| "loss": 0.41543805599212646, |
| "memory(GiB)": 74.62, |
| "step": 789, |
| "token_acc": 0.8977272727272727, |
| "train_speed(iter/s)": 0.022508 |
| }, |
| { |
| "epoch": 0.6390293225480284, |
| "grad_norm": 1.9115188121795654, |
| "learning_rate": 3.1655937283595116e-06, |
| "loss": 0.38339167833328247, |
| "memory(GiB)": 74.62, |
| "step": 790, |
| "token_acc": 0.8555133079847909, |
| "train_speed(iter/s)": 0.022508 |
| }, |
| { |
| "epoch": 0.6398382204246714, |
| "grad_norm": 2.29840350151062, |
| "learning_rate": 3.153164034285031e-06, |
| "loss": 0.3975831866264343, |
| "memory(GiB)": 74.62, |
| "step": 791, |
| "token_acc": 0.8322368421052632, |
| "train_speed(iter/s)": 0.022508 |
| }, |
| { |
| "epoch": 0.6406471183013145, |
| "grad_norm": 2.4968957901000977, |
| "learning_rate": 3.1407475425859348e-06, |
| "loss": 0.346437931060791, |
| "memory(GiB)": 74.62, |
| "step": 792, |
| "token_acc": 0.8744588744588745, |
| "train_speed(iter/s)": 0.022509 |
| }, |
| { |
| "epoch": 0.6414560161779576, |
| "grad_norm": 2.1374566555023193, |
| "learning_rate": 3.1283443420233196e-06, |
| "loss": 0.4348532557487488, |
| "memory(GiB)": 74.62, |
| "step": 793, |
| "token_acc": 0.881578947368421, |
| "train_speed(iter/s)": 0.022509 |
| }, |
| { |
| "epoch": 0.6422649140546006, |
| "grad_norm": 2.104574203491211, |
| "learning_rate": 3.1159545212632697e-06, |
| "loss": 0.3768533170223236, |
| "memory(GiB)": 74.62, |
| "step": 794, |
| "token_acc": 0.8533834586466166, |
| "train_speed(iter/s)": 0.02251 |
| }, |
| { |
| "epoch": 0.6430738119312437, |
| "grad_norm": 2.8082082271575928, |
| "learning_rate": 3.1035781688762177e-06, |
| "loss": 0.3694327473640442, |
| "memory(GiB)": 74.62, |
| "step": 795, |
| "token_acc": 0.8781512605042017, |
| "train_speed(iter/s)": 0.02251 |
| }, |
| { |
| "epoch": 0.6438827098078868, |
| "grad_norm": 2.036285161972046, |
| "learning_rate": 3.0912153733363203e-06, |
| "loss": 0.4223785996437073, |
| "memory(GiB)": 74.62, |
| "step": 796, |
| "token_acc": 0.9013452914798207, |
| "train_speed(iter/s)": 0.02251 |
| }, |
| { |
| "epoch": 0.6446916076845298, |
| "grad_norm": 1.9402992725372314, |
| "learning_rate": 3.078866223020815e-06, |
| "loss": 0.39007920026779175, |
| "memory(GiB)": 74.62, |
| "step": 797, |
| "token_acc": 0.8409090909090909, |
| "train_speed(iter/s)": 0.022511 |
| }, |
| { |
| "epoch": 0.6455005055611729, |
| "grad_norm": 2.3638556003570557, |
| "learning_rate": 3.066530806209402e-06, |
| "loss": 0.39857369661331177, |
| "memory(GiB)": 74.62, |
| "step": 798, |
| "token_acc": 0.8956521739130435, |
| "train_speed(iter/s)": 0.022511 |
| }, |
| { |
| "epoch": 0.646309403437816, |
| "grad_norm": 1.8377914428710938, |
| "learning_rate": 3.0542092110835996e-06, |
| "loss": 0.3549560010433197, |
| "memory(GiB)": 74.62, |
| "step": 799, |
| "token_acc": 0.8955823293172691, |
| "train_speed(iter/s)": 0.022512 |
| }, |
| { |
| "epoch": 0.647118301314459, |
| "grad_norm": 2.2061686515808105, |
| "learning_rate": 3.04190152572612e-06, |
| "loss": 0.43962785601615906, |
| "memory(GiB)": 74.62, |
| "step": 800, |
| "token_acc": 0.8395522388059702, |
| "train_speed(iter/s)": 0.022512 |
| }, |
| { |
| "epoch": 0.6479271991911021, |
| "grad_norm": 2.3892087936401367, |
| "learning_rate": 3.0296078381202465e-06, |
| "loss": 0.37227606773376465, |
| "memory(GiB)": 74.62, |
| "step": 801, |
| "token_acc": 0.8662420382165605, |
| "train_speed(iter/s)": 0.022512 |
| }, |
| { |
| "epoch": 0.6487360970677452, |
| "grad_norm": 2.125608444213867, |
| "learning_rate": 3.017328236149187e-06, |
| "loss": 0.43218767642974854, |
| "memory(GiB)": 74.62, |
| "step": 802, |
| "token_acc": 0.8796992481203008, |
| "train_speed(iter/s)": 0.022513 |
| }, |
| { |
| "epoch": 0.6495449949443882, |
| "grad_norm": 2.3993020057678223, |
| "learning_rate": 3.0050628075954643e-06, |
| "loss": 0.3682135343551636, |
| "memory(GiB)": 74.62, |
| "step": 803, |
| "token_acc": 0.9087136929460581, |
| "train_speed(iter/s)": 0.022513 |
| }, |
| { |
| "epoch": 0.6503538928210314, |
| "grad_norm": 2.251502513885498, |
| "learning_rate": 2.9928116401402753e-06, |
| "loss": 0.4699886441230774, |
| "memory(GiB)": 74.62, |
| "step": 804, |
| "token_acc": 0.8686440677966102, |
| "train_speed(iter/s)": 0.022513 |
| }, |
| { |
| "epoch": 0.6511627906976745, |
| "grad_norm": 13.69151496887207, |
| "learning_rate": 2.9805748213628727e-06, |
| "loss": 0.3267248272895813, |
| "memory(GiB)": 74.62, |
| "step": 805, |
| "token_acc": 0.8592057761732852, |
| "train_speed(iter/s)": 0.022514 |
| }, |
| { |
| "epoch": 0.6519716885743175, |
| "grad_norm": 2.1798858642578125, |
| "learning_rate": 2.968352438739936e-06, |
| "loss": 0.4122653901576996, |
| "memory(GiB)": 74.62, |
| "step": 806, |
| "token_acc": 0.8377581120943953, |
| "train_speed(iter/s)": 0.022514 |
| }, |
| { |
| "epoch": 0.6527805864509606, |
| "grad_norm": 1.9182910919189453, |
| "learning_rate": 2.956144579644942e-06, |
| "loss": 0.36671823263168335, |
| "memory(GiB)": 74.62, |
| "step": 807, |
| "token_acc": 0.8716216216216216, |
| "train_speed(iter/s)": 0.022515 |
| }, |
| { |
| "epoch": 0.6535894843276037, |
| "grad_norm": 2.026547908782959, |
| "learning_rate": 2.9439513313475464e-06, |
| "loss": 0.3970714807510376, |
| "memory(GiB)": 74.62, |
| "step": 808, |
| "token_acc": 0.9066666666666666, |
| "train_speed(iter/s)": 0.022515 |
| }, |
| { |
| "epoch": 0.6543983822042467, |
| "grad_norm": 2.1154861450195312, |
| "learning_rate": 2.931772781012958e-06, |
| "loss": 0.3996396064758301, |
| "memory(GiB)": 74.62, |
| "step": 809, |
| "token_acc": 0.8494623655913979, |
| "train_speed(iter/s)": 0.022515 |
| }, |
| { |
| "epoch": 0.6552072800808898, |
| "grad_norm": 2.0756337642669678, |
| "learning_rate": 2.9196090157013146e-06, |
| "loss": 0.44487231969833374, |
| "memory(GiB)": 74.62, |
| "step": 810, |
| "token_acc": 0.8075471698113208, |
| "train_speed(iter/s)": 0.022516 |
| }, |
| { |
| "epoch": 0.6560161779575329, |
| "grad_norm": 2.0214574337005615, |
| "learning_rate": 2.907460122367062e-06, |
| "loss": 0.3471815586090088, |
| "memory(GiB)": 74.62, |
| "step": 811, |
| "token_acc": 0.8540925266903915, |
| "train_speed(iter/s)": 0.022516 |
| }, |
| { |
| "epoch": 0.6568250758341759, |
| "grad_norm": 1.8203327655792236, |
| "learning_rate": 2.8953261878583263e-06, |
| "loss": 0.3285714387893677, |
| "memory(GiB)": 74.62, |
| "step": 812, |
| "token_acc": 0.9137931034482759, |
| "train_speed(iter/s)": 0.022517 |
| }, |
| { |
| "epoch": 0.657633973710819, |
| "grad_norm": 2.6111230850219727, |
| "learning_rate": 2.8832072989163048e-06, |
| "loss": 0.38925743103027344, |
| "memory(GiB)": 74.62, |
| "step": 813, |
| "token_acc": 0.8852459016393442, |
| "train_speed(iter/s)": 0.022517 |
| }, |
| { |
| "epoch": 0.6584428715874621, |
| "grad_norm": 1.8417023420333862, |
| "learning_rate": 2.871103542174637e-06, |
| "loss": 0.3698727488517761, |
| "memory(GiB)": 74.62, |
| "step": 814, |
| "token_acc": 0.8767605633802817, |
| "train_speed(iter/s)": 0.022517 |
| }, |
| { |
| "epoch": 0.6592517694641051, |
| "grad_norm": 2.0547242164611816, |
| "learning_rate": 2.859015004158789e-06, |
| "loss": 0.37436971068382263, |
| "memory(GiB)": 74.62, |
| "step": 815, |
| "token_acc": 0.8426966292134831, |
| "train_speed(iter/s)": 0.022518 |
| }, |
| { |
| "epoch": 0.6600606673407482, |
| "grad_norm": 3.1478235721588135, |
| "learning_rate": 2.8469417712854287e-06, |
| "loss": 0.4491364359855652, |
| "memory(GiB)": 74.62, |
| "step": 816, |
| "token_acc": 0.8157894736842105, |
| "train_speed(iter/s)": 0.022518 |
| }, |
| { |
| "epoch": 0.6608695652173913, |
| "grad_norm": 2.21091890335083, |
| "learning_rate": 2.834883929861818e-06, |
| "loss": 0.3636167049407959, |
| "memory(GiB)": 74.62, |
| "step": 817, |
| "token_acc": 0.9236947791164659, |
| "train_speed(iter/s)": 0.022518 |
| }, |
| { |
| "epoch": 0.6616784630940343, |
| "grad_norm": 2.1053714752197266, |
| "learning_rate": 2.822841566085192e-06, |
| "loss": 0.3697773218154907, |
| "memory(GiB)": 74.62, |
| "step": 818, |
| "token_acc": 0.9090909090909091, |
| "train_speed(iter/s)": 0.022519 |
| }, |
| { |
| "epoch": 0.6624873609706774, |
| "grad_norm": 1.9461814165115356, |
| "learning_rate": 2.8108147660421325e-06, |
| "loss": 0.42437541484832764, |
| "memory(GiB)": 74.62, |
| "step": 819, |
| "token_acc": 0.8985507246376812, |
| "train_speed(iter/s)": 0.022519 |
| }, |
| { |
| "epoch": 0.6632962588473206, |
| "grad_norm": 1.9878171682357788, |
| "learning_rate": 2.798803615707976e-06, |
| "loss": 0.40904805064201355, |
| "memory(GiB)": 74.62, |
| "step": 820, |
| "token_acc": 0.8475609756097561, |
| "train_speed(iter/s)": 0.022519 |
| }, |
| { |
| "epoch": 0.6641051567239636, |
| "grad_norm": 1.8959929943084717, |
| "learning_rate": 2.78680820094617e-06, |
| "loss": 0.3745640218257904, |
| "memory(GiB)": 74.62, |
| "step": 821, |
| "token_acc": 0.914396887159533, |
| "train_speed(iter/s)": 0.02252 |
| }, |
| { |
| "epoch": 0.6649140546006067, |
| "grad_norm": 2.005540609359741, |
| "learning_rate": 2.7748286075076834e-06, |
| "loss": 0.364071786403656, |
| "memory(GiB)": 74.62, |
| "step": 822, |
| "token_acc": 0.8765432098765432, |
| "train_speed(iter/s)": 0.02252 |
| }, |
| { |
| "epoch": 0.6657229524772498, |
| "grad_norm": 2.166395902633667, |
| "learning_rate": 2.762864921030384e-06, |
| "loss": 0.37051212787628174, |
| "memory(GiB)": 74.62, |
| "step": 823, |
| "token_acc": 0.8909774436090225, |
| "train_speed(iter/s)": 0.02252 |
| }, |
| { |
| "epoch": 0.6665318503538928, |
| "grad_norm": 1.9548283815383911, |
| "learning_rate": 2.750917227038419e-06, |
| "loss": 0.39772191643714905, |
| "memory(GiB)": 74.62, |
| "step": 824, |
| "token_acc": 0.8986928104575164, |
| "train_speed(iter/s)": 0.022521 |
| }, |
| { |
| "epoch": 0.6673407482305359, |
| "grad_norm": 2.373486280441284, |
| "learning_rate": 2.7389856109416178e-06, |
| "loss": 0.39033639430999756, |
| "memory(GiB)": 74.62, |
| "step": 825, |
| "token_acc": 0.8876404494382022, |
| "train_speed(iter/s)": 0.022521 |
| }, |
| { |
| "epoch": 0.668149646107179, |
| "grad_norm": 1.9656351804733276, |
| "learning_rate": 2.7270701580348737e-06, |
| "loss": 0.4327496588230133, |
| "memory(GiB)": 74.62, |
| "step": 826, |
| "token_acc": 0.8840579710144928, |
| "train_speed(iter/s)": 0.022522 |
| }, |
| { |
| "epoch": 0.668958543983822, |
| "grad_norm": 1.7876020669937134, |
| "learning_rate": 2.715170953497532e-06, |
| "loss": 0.4038127064704895, |
| "memory(GiB)": 74.62, |
| "step": 827, |
| "token_acc": 0.8581081081081081, |
| "train_speed(iter/s)": 0.022522 |
| }, |
| { |
| "epoch": 0.6697674418604651, |
| "grad_norm": 2.269183397293091, |
| "learning_rate": 2.703288082392791e-06, |
| "loss": 0.3742678165435791, |
| "memory(GiB)": 74.62, |
| "step": 828, |
| "token_acc": 0.9116279069767442, |
| "train_speed(iter/s)": 0.022522 |
| }, |
| { |
| "epoch": 0.6705763397371082, |
| "grad_norm": 2.3092498779296875, |
| "learning_rate": 2.691421629667076e-06, |
| "loss": 0.3477456867694855, |
| "memory(GiB)": 74.62, |
| "step": 829, |
| "token_acc": 0.8858447488584474, |
| "train_speed(iter/s)": 0.022523 |
| }, |
| { |
| "epoch": 0.6713852376137512, |
| "grad_norm": 2.0374417304992676, |
| "learning_rate": 2.6795716801494538e-06, |
| "loss": 0.3951851725578308, |
| "memory(GiB)": 74.62, |
| "step": 830, |
| "token_acc": 0.8655913978494624, |
| "train_speed(iter/s)": 0.022523 |
| }, |
| { |
| "epoch": 0.6721941354903943, |
| "grad_norm": 2.6279661655426025, |
| "learning_rate": 2.6677383185510053e-06, |
| "loss": 0.37477776408195496, |
| "memory(GiB)": 74.62, |
| "step": 831, |
| "token_acc": 0.8745519713261649, |
| "train_speed(iter/s)": 0.022523 |
| }, |
| { |
| "epoch": 0.6730030333670374, |
| "grad_norm": 2.128077268600464, |
| "learning_rate": 2.6559216294642446e-06, |
| "loss": 0.34244000911712646, |
| "memory(GiB)": 74.62, |
| "step": 832, |
| "token_acc": 0.8764478764478765, |
| "train_speed(iter/s)": 0.022524 |
| }, |
| { |
| "epoch": 0.6738119312436804, |
| "grad_norm": 1.9825257062911987, |
| "learning_rate": 2.6441216973624857e-06, |
| "loss": 0.36798208951950073, |
| "memory(GiB)": 74.62, |
| "step": 833, |
| "token_acc": 0.9363957597173145, |
| "train_speed(iter/s)": 0.022524 |
| }, |
| { |
| "epoch": 0.6746208291203235, |
| "grad_norm": 2.1210215091705322, |
| "learning_rate": 2.6323386065992596e-06, |
| "loss": 0.3946457505226135, |
| "memory(GiB)": 74.62, |
| "step": 834, |
| "token_acc": 0.8380681818181818, |
| "train_speed(iter/s)": 0.022524 |
| }, |
| { |
| "epoch": 0.6754297269969667, |
| "grad_norm": 1.8778958320617676, |
| "learning_rate": 2.6205724414077064e-06, |
| "loss": 0.3758698105812073, |
| "memory(GiB)": 74.62, |
| "step": 835, |
| "token_acc": 0.8895705521472392, |
| "train_speed(iter/s)": 0.022525 |
| }, |
| { |
| "epoch": 0.6762386248736098, |
| "grad_norm": 1.917371153831482, |
| "learning_rate": 2.6088232858999644e-06, |
| "loss": 0.3301732540130615, |
| "memory(GiB)": 74.62, |
| "step": 836, |
| "token_acc": 0.9437229437229437, |
| "train_speed(iter/s)": 0.022525 |
| }, |
| { |
| "epoch": 0.6770475227502528, |
| "grad_norm": 2.223240613937378, |
| "learning_rate": 2.5970912240665815e-06, |
| "loss": 0.4553636908531189, |
| "memory(GiB)": 74.62, |
| "step": 837, |
| "token_acc": 0.8589211618257261, |
| "train_speed(iter/s)": 0.022525 |
| }, |
| { |
| "epoch": 0.6778564206268959, |
| "grad_norm": 3.028218984603882, |
| "learning_rate": 2.585376339775908e-06, |
| "loss": 0.46183380484580994, |
| "memory(GiB)": 74.62, |
| "step": 838, |
| "token_acc": 0.8557213930348259, |
| "train_speed(iter/s)": 0.022526 |
| }, |
| { |
| "epoch": 0.678665318503539, |
| "grad_norm": 1.9921714067459106, |
| "learning_rate": 2.573678716773496e-06, |
| "loss": 0.38901880383491516, |
| "memory(GiB)": 74.62, |
| "step": 839, |
| "token_acc": 0.8819188191881919, |
| "train_speed(iter/s)": 0.022526 |
| }, |
| { |
| "epoch": 0.679474216380182, |
| "grad_norm": 2.3916425704956055, |
| "learning_rate": 2.5619984386815073e-06, |
| "loss": 0.4160255193710327, |
| "memory(GiB)": 74.62, |
| "step": 840, |
| "token_acc": 0.8577981651376146, |
| "train_speed(iter/s)": 0.022526 |
| }, |
| { |
| "epoch": 0.6802831142568251, |
| "grad_norm": 2.2416515350341797, |
| "learning_rate": 2.550335588998103e-06, |
| "loss": 0.46858906745910645, |
| "memory(GiB)": 74.62, |
| "step": 841, |
| "token_acc": 0.8333333333333334, |
| "train_speed(iter/s)": 0.022526 |
| }, |
| { |
| "epoch": 0.6810920121334681, |
| "grad_norm": 1.9555854797363281, |
| "learning_rate": 2.5386902510968627e-06, |
| "loss": 0.4036467969417572, |
| "memory(GiB)": 74.62, |
| "step": 842, |
| "token_acc": 0.8421052631578947, |
| "train_speed(iter/s)": 0.022527 |
| }, |
| { |
| "epoch": 0.6819009100101112, |
| "grad_norm": 4.791243553161621, |
| "learning_rate": 2.527062508226176e-06, |
| "loss": 0.37610989809036255, |
| "memory(GiB)": 74.62, |
| "step": 843, |
| "token_acc": 0.8712871287128713, |
| "train_speed(iter/s)": 0.022527 |
| }, |
| { |
| "epoch": 0.6827098078867543, |
| "grad_norm": 1.9034098386764526, |
| "learning_rate": 2.5154524435086537e-06, |
| "loss": 0.3707886040210724, |
| "memory(GiB)": 74.62, |
| "step": 844, |
| "token_acc": 0.8761904761904762, |
| "train_speed(iter/s)": 0.022527 |
| }, |
| { |
| "epoch": 0.6835187057633973, |
| "grad_norm": 2.0733163356781006, |
| "learning_rate": 2.5038601399405337e-06, |
| "loss": 0.4223529100418091, |
| "memory(GiB)": 74.62, |
| "step": 845, |
| "token_acc": 0.844106463878327, |
| "train_speed(iter/s)": 0.022528 |
| }, |
| { |
| "epoch": 0.6843276036400404, |
| "grad_norm": 1.9344497919082642, |
| "learning_rate": 2.492285680391079e-06, |
| "loss": 0.38297271728515625, |
| "memory(GiB)": 74.62, |
| "step": 846, |
| "token_acc": 0.8446215139442231, |
| "train_speed(iter/s)": 0.022528 |
| }, |
| { |
| "epoch": 0.6851365015166835, |
| "grad_norm": 2.1887664794921875, |
| "learning_rate": 2.4807291476019996e-06, |
| "loss": 0.3631531000137329, |
| "memory(GiB)": 74.62, |
| "step": 847, |
| "token_acc": 0.8963963963963963, |
| "train_speed(iter/s)": 0.022529 |
| }, |
| { |
| "epoch": 0.6859453993933265, |
| "grad_norm": 2.7835731506347656, |
| "learning_rate": 2.4691906241868473e-06, |
| "loss": 0.4326528310775757, |
| "memory(GiB)": 74.62, |
| "step": 848, |
| "token_acc": 0.8395522388059702, |
| "train_speed(iter/s)": 0.022529 |
| }, |
| { |
| "epoch": 0.6867542972699696, |
| "grad_norm": 2.0431745052337646, |
| "learning_rate": 2.4576701926304357e-06, |
| "loss": 0.34864187240600586, |
| "memory(GiB)": 74.62, |
| "step": 849, |
| "token_acc": 0.899581589958159, |
| "train_speed(iter/s)": 0.022529 |
| }, |
| { |
| "epoch": 0.6875631951466128, |
| "grad_norm": 5.60698938369751, |
| "learning_rate": 2.4461679352882443e-06, |
| "loss": 0.4054935574531555, |
| "memory(GiB)": 74.62, |
| "step": 850, |
| "token_acc": 0.8398058252427184, |
| "train_speed(iter/s)": 0.022529 |
| }, |
| { |
| "epoch": 0.6883720930232559, |
| "grad_norm": 1.8018779754638672, |
| "learning_rate": 2.434683934385833e-06, |
| "loss": 0.32462042570114136, |
| "memory(GiB)": 74.62, |
| "step": 851, |
| "token_acc": 0.8726591760299626, |
| "train_speed(iter/s)": 0.02253 |
| }, |
| { |
| "epoch": 0.6891809908998989, |
| "grad_norm": 3.250086545944214, |
| "learning_rate": 2.4232182720182524e-06, |
| "loss": 0.3477787375450134, |
| "memory(GiB)": 74.62, |
| "step": 852, |
| "token_acc": 0.8364312267657993, |
| "train_speed(iter/s)": 0.02253 |
| }, |
| { |
| "epoch": 0.689989888776542, |
| "grad_norm": 2.627101421356201, |
| "learning_rate": 2.4117710301494527e-06, |
| "loss": 0.38884738087654114, |
| "memory(GiB)": 74.62, |
| "step": 853, |
| "token_acc": 0.9395348837209302, |
| "train_speed(iter/s)": 0.022531 |
| }, |
| { |
| "epoch": 0.6907987866531851, |
| "grad_norm": 2.8539373874664307, |
| "learning_rate": 2.40034229061171e-06, |
| "loss": 0.40084555745124817, |
| "memory(GiB)": 74.62, |
| "step": 854, |
| "token_acc": 0.8843283582089553, |
| "train_speed(iter/s)": 0.022531 |
| }, |
| { |
| "epoch": 0.6916076845298281, |
| "grad_norm": 2.5881996154785156, |
| "learning_rate": 2.3889321351050286e-06, |
| "loss": 0.36527204513549805, |
| "memory(GiB)": 74.62, |
| "step": 855, |
| "token_acc": 0.911504424778761, |
| "train_speed(iter/s)": 0.022531 |
| }, |
| { |
| "epoch": 0.6924165824064712, |
| "grad_norm": 2.2433817386627197, |
| "learning_rate": 2.377540645196565e-06, |
| "loss": 0.4530036151409149, |
| "memory(GiB)": 74.62, |
| "step": 856, |
| "token_acc": 0.8681672025723473, |
| "train_speed(iter/s)": 0.022532 |
| }, |
| { |
| "epoch": 0.6932254802831143, |
| "grad_norm": 2.251718759536743, |
| "learning_rate": 2.3661679023200422e-06, |
| "loss": 0.44757646322250366, |
| "memory(GiB)": 74.62, |
| "step": 857, |
| "token_acc": 0.9019607843137255, |
| "train_speed(iter/s)": 0.022532 |
| }, |
| { |
| "epoch": 0.6940343781597573, |
| "grad_norm": 1.987608790397644, |
| "learning_rate": 2.354813987775163e-06, |
| "loss": 0.34107983112335205, |
| "memory(GiB)": 74.62, |
| "step": 858, |
| "token_acc": 0.871875, |
| "train_speed(iter/s)": 0.022532 |
| }, |
| { |
| "epoch": 0.6948432760364004, |
| "grad_norm": 2.4668984413146973, |
| "learning_rate": 2.343478982727039e-06, |
| "loss": 0.4043659269809723, |
| "memory(GiB)": 74.62, |
| "step": 859, |
| "token_acc": 0.8947368421052632, |
| "train_speed(iter/s)": 0.022532 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 1.9259587526321411, |
| "learning_rate": 2.3321629682055984e-06, |
| "loss": 0.378429114818573, |
| "memory(GiB)": 74.62, |
| "step": 860, |
| "token_acc": 0.848297213622291, |
| "train_speed(iter/s)": 0.022533 |
| }, |
| { |
| "epoch": 0.6964610717896865, |
| "grad_norm": 1.9682130813598633, |
| "learning_rate": 2.320866025105016e-06, |
| "loss": 0.34357139468193054, |
| "memory(GiB)": 74.62, |
| "step": 861, |
| "token_acc": 0.8348348348348348, |
| "train_speed(iter/s)": 0.022533 |
| }, |
| { |
| "epoch": 0.6972699696663296, |
| "grad_norm": 2.785592794418335, |
| "learning_rate": 2.309588234183137e-06, |
| "loss": 0.3498800992965698, |
| "memory(GiB)": 74.62, |
| "step": 862, |
| "token_acc": 0.8847736625514403, |
| "train_speed(iter/s)": 0.022534 |
| }, |
| { |
| "epoch": 0.6980788675429727, |
| "grad_norm": 2.4636342525482178, |
| "learning_rate": 2.298329676060884e-06, |
| "loss": 0.39585980772972107, |
| "memory(GiB)": 74.62, |
| "step": 863, |
| "token_acc": 0.865546218487395, |
| "train_speed(iter/s)": 0.022534 |
| }, |
| { |
| "epoch": 0.6988877654196157, |
| "grad_norm": 1.8095598220825195, |
| "learning_rate": 2.287090431221701e-06, |
| "loss": 0.37628334760665894, |
| "memory(GiB)": 74.62, |
| "step": 864, |
| "token_acc": 0.8954703832752613, |
| "train_speed(iter/s)": 0.022534 |
| }, |
| { |
| "epoch": 0.6996966632962589, |
| "grad_norm": 1.9140504598617554, |
| "learning_rate": 2.275870580010958e-06, |
| "loss": 0.3849208354949951, |
| "memory(GiB)": 74.62, |
| "step": 865, |
| "token_acc": 0.910958904109589, |
| "train_speed(iter/s)": 0.022534 |
| }, |
| { |
| "epoch": 0.700505561172902, |
| "grad_norm": 1.7582415342330933, |
| "learning_rate": 2.264670202635396e-06, |
| "loss": 0.3840162754058838, |
| "memory(GiB)": 74.62, |
| "step": 866, |
| "token_acc": 0.8550185873605948, |
| "train_speed(iter/s)": 0.022535 |
| }, |
| { |
| "epoch": 0.701314459049545, |
| "grad_norm": 1.8664969205856323, |
| "learning_rate": 2.2534893791625408e-06, |
| "loss": 0.3248283565044403, |
| "memory(GiB)": 74.62, |
| "step": 867, |
| "token_acc": 0.896, |
| "train_speed(iter/s)": 0.022535 |
| }, |
| { |
| "epoch": 0.7021233569261881, |
| "grad_norm": 1.9030721187591553, |
| "learning_rate": 2.242328189520134e-06, |
| "loss": 0.35055387020111084, |
| "memory(GiB)": 74.62, |
| "step": 868, |
| "token_acc": 0.8939393939393939, |
| "train_speed(iter/s)": 0.022535 |
| }, |
| { |
| "epoch": 0.7029322548028312, |
| "grad_norm": 2.2921035289764404, |
| "learning_rate": 2.2311867134955637e-06, |
| "loss": 0.41889488697052, |
| "memory(GiB)": 74.62, |
| "step": 869, |
| "token_acc": 0.8565573770491803, |
| "train_speed(iter/s)": 0.022535 |
| }, |
| { |
| "epoch": 0.7037411526794742, |
| "grad_norm": 2.5671067237854004, |
| "learning_rate": 2.2200650307352883e-06, |
| "loss": 0.3641519844532013, |
| "memory(GiB)": 74.62, |
| "step": 870, |
| "token_acc": 0.8726415094339622, |
| "train_speed(iter/s)": 0.022536 |
| }, |
| { |
| "epoch": 0.7045500505561173, |
| "grad_norm": 2.0666255950927734, |
| "learning_rate": 2.2089632207442763e-06, |
| "loss": 0.34707674384117126, |
| "memory(GiB)": 74.62, |
| "step": 871, |
| "token_acc": 0.898876404494382, |
| "train_speed(iter/s)": 0.022536 |
| }, |
| { |
| "epoch": 0.7053589484327604, |
| "grad_norm": 2.3214352130889893, |
| "learning_rate": 2.197881362885426e-06, |
| "loss": 0.30853113532066345, |
| "memory(GiB)": 74.62, |
| "step": 872, |
| "token_acc": 0.8477611940298507, |
| "train_speed(iter/s)": 0.022536 |
| }, |
| { |
| "epoch": 0.7061678463094034, |
| "grad_norm": 2.3969626426696777, |
| "learning_rate": 2.1868195363790147e-06, |
| "loss": 0.44838905334472656, |
| "memory(GiB)": 74.62, |
| "step": 873, |
| "token_acc": 0.8190954773869347, |
| "train_speed(iter/s)": 0.022537 |
| }, |
| { |
| "epoch": 0.7069767441860465, |
| "grad_norm": 2.3142099380493164, |
| "learning_rate": 2.1757778203021163e-06, |
| "loss": 0.4084170460700989, |
| "memory(GiB)": 74.62, |
| "step": 874, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022537 |
| }, |
| { |
| "epoch": 0.7077856420626896, |
| "grad_norm": 2.4327192306518555, |
| "learning_rate": 2.1647562935880405e-06, |
| "loss": 0.4108632802963257, |
| "memory(GiB)": 74.62, |
| "step": 875, |
| "token_acc": 0.8553054662379421, |
| "train_speed(iter/s)": 0.022537 |
| }, |
| { |
| "epoch": 0.7085945399393326, |
| "grad_norm": 1.7217832803726196, |
| "learning_rate": 2.153755035025777e-06, |
| "loss": 0.3645017743110657, |
| "memory(GiB)": 74.62, |
| "step": 876, |
| "token_acc": 0.825925925925926, |
| "train_speed(iter/s)": 0.022538 |
| }, |
| { |
| "epoch": 0.7094034378159757, |
| "grad_norm": 1.7630640268325806, |
| "learning_rate": 2.1427741232594185e-06, |
| "loss": 0.3739239573478699, |
| "memory(GiB)": 74.62, |
| "step": 877, |
| "token_acc": 0.8757961783439491, |
| "train_speed(iter/s)": 0.022538 |
| }, |
| { |
| "epoch": 0.7102123356926188, |
| "grad_norm": 1.9821792840957642, |
| "learning_rate": 2.1318136367876098e-06, |
| "loss": 0.3128720223903656, |
| "memory(GiB)": 74.62, |
| "step": 878, |
| "token_acc": 0.8744769874476988, |
| "train_speed(iter/s)": 0.022538 |
| }, |
| { |
| "epoch": 0.7110212335692618, |
| "grad_norm": 1.9988818168640137, |
| "learning_rate": 2.120873653962983e-06, |
| "loss": 0.39012840390205383, |
| "memory(GiB)": 74.62, |
| "step": 879, |
| "token_acc": 0.865814696485623, |
| "train_speed(iter/s)": 0.022538 |
| }, |
| { |
| "epoch": 0.7118301314459049, |
| "grad_norm": 2.3474910259246826, |
| "learning_rate": 2.109954252991595e-06, |
| "loss": 0.3977096676826477, |
| "memory(GiB)": 74.62, |
| "step": 880, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.022539 |
| }, |
| { |
| "epoch": 0.7126390293225481, |
| "grad_norm": 1.7941343784332275, |
| "learning_rate": 2.0990555119323737e-06, |
| "loss": 0.37561237812042236, |
| "memory(GiB)": 74.62, |
| "step": 881, |
| "token_acc": 0.8759124087591241, |
| "train_speed(iter/s)": 0.022539 |
| }, |
| { |
| "epoch": 0.7134479271991911, |
| "grad_norm": 2.288217782974243, |
| "learning_rate": 2.0881775086965494e-06, |
| "loss": 0.3414373993873596, |
| "memory(GiB)": 74.62, |
| "step": 882, |
| "token_acc": 0.8681318681318682, |
| "train_speed(iter/s)": 0.022539 |
| }, |
| { |
| "epoch": 0.7142568250758342, |
| "grad_norm": 1.7807132005691528, |
| "learning_rate": 2.0773203210471115e-06, |
| "loss": 0.3832324147224426, |
| "memory(GiB)": 74.62, |
| "step": 883, |
| "token_acc": 0.8442028985507246, |
| "train_speed(iter/s)": 0.02254 |
| }, |
| { |
| "epoch": 0.7150657229524773, |
| "grad_norm": 1.990700602531433, |
| "learning_rate": 2.0664840265982457e-06, |
| "loss": 0.4304344952106476, |
| "memory(GiB)": 74.62, |
| "step": 884, |
| "token_acc": 0.8095238095238095, |
| "train_speed(iter/s)": 0.02254 |
| }, |
| { |
| "epoch": 0.7158746208291203, |
| "grad_norm": 1.9708170890808105, |
| "learning_rate": 2.0556687028147765e-06, |
| "loss": 0.4029080867767334, |
| "memory(GiB)": 74.62, |
| "step": 885, |
| "token_acc": 0.8530612244897959, |
| "train_speed(iter/s)": 0.02254 |
| }, |
| { |
| "epoch": 0.7166835187057634, |
| "grad_norm": 2.2865779399871826, |
| "learning_rate": 2.0448744270116206e-06, |
| "loss": 0.390356183052063, |
| "memory(GiB)": 74.62, |
| "step": 886, |
| "token_acc": 0.8823529411764706, |
| "train_speed(iter/s)": 0.022541 |
| }, |
| { |
| "epoch": 0.7174924165824065, |
| "grad_norm": 2.5284066200256348, |
| "learning_rate": 2.0341012763532243e-06, |
| "loss": 0.40166282653808594, |
| "memory(GiB)": 74.62, |
| "step": 887, |
| "token_acc": 0.8840579710144928, |
| "train_speed(iter/s)": 0.022541 |
| }, |
| { |
| "epoch": 0.7183013144590495, |
| "grad_norm": 6.747030258178711, |
| "learning_rate": 2.023349327853025e-06, |
| "loss": 0.38176417350769043, |
| "memory(GiB)": 74.62, |
| "step": 888, |
| "token_acc": 0.8652482269503546, |
| "train_speed(iter/s)": 0.022541 |
| }, |
| { |
| "epoch": 0.7191102123356926, |
| "grad_norm": 2.049042224884033, |
| "learning_rate": 2.0126186583728856e-06, |
| "loss": 0.3778286576271057, |
| "memory(GiB)": 74.62, |
| "step": 889, |
| "token_acc": 0.8429319371727748, |
| "train_speed(iter/s)": 0.022542 |
| }, |
| { |
| "epoch": 0.7199191102123357, |
| "grad_norm": 2.2993712425231934, |
| "learning_rate": 2.001909344622559e-06, |
| "loss": 0.4231566786766052, |
| "memory(GiB)": 74.62, |
| "step": 890, |
| "token_acc": 0.8865979381443299, |
| "train_speed(iter/s)": 0.022542 |
| }, |
| { |
| "epoch": 0.7207280080889787, |
| "grad_norm": 2.244127035140991, |
| "learning_rate": 1.9912214631591314e-06, |
| "loss": 0.3927876651287079, |
| "memory(GiB)": 74.62, |
| "step": 891, |
| "token_acc": 0.8867924528301887, |
| "train_speed(iter/s)": 0.022542 |
| }, |
| { |
| "epoch": 0.7215369059656218, |
| "grad_norm": 1.9843049049377441, |
| "learning_rate": 1.9805550903864775e-06, |
| "loss": 0.39008790254592896, |
| "memory(GiB)": 74.62, |
| "step": 892, |
| "token_acc": 0.8244274809160306, |
| "train_speed(iter/s)": 0.022543 |
| }, |
| { |
| "epoch": 0.7223458038422649, |
| "grad_norm": 2.253777027130127, |
| "learning_rate": 1.9699103025547145e-06, |
| "loss": 0.3611776828765869, |
| "memory(GiB)": 74.62, |
| "step": 893, |
| "token_acc": 0.84765625, |
| "train_speed(iter/s)": 0.022543 |
| }, |
| { |
| "epoch": 0.7231547017189079, |
| "grad_norm": 2.2141964435577393, |
| "learning_rate": 1.9592871757596532e-06, |
| "loss": 0.4213542640209198, |
| "memory(GiB)": 74.62, |
| "step": 894, |
| "token_acc": 0.8754716981132076, |
| "train_speed(iter/s)": 0.022543 |
| }, |
| { |
| "epoch": 0.723963599595551, |
| "grad_norm": 1.9213643074035645, |
| "learning_rate": 1.9486857859422607e-06, |
| "loss": 0.4320271611213684, |
| "memory(GiB)": 74.62, |
| "step": 895, |
| "token_acc": 0.8327526132404182, |
| "train_speed(iter/s)": 0.022543 |
| }, |
| { |
| "epoch": 0.7247724974721942, |
| "grad_norm": 2.10569167137146, |
| "learning_rate": 1.9381062088881142e-06, |
| "loss": 0.3284885883331299, |
| "memory(GiB)": 74.62, |
| "step": 896, |
| "token_acc": 0.8831615120274914, |
| "train_speed(iter/s)": 0.022543 |
| }, |
| { |
| "epoch": 0.7255813953488373, |
| "grad_norm": 1.6468027830123901, |
| "learning_rate": 1.9275485202268574e-06, |
| "loss": 0.35665562748908997, |
| "memory(GiB)": 74.62, |
| "step": 897, |
| "token_acc": 0.9037037037037037, |
| "train_speed(iter/s)": 0.022544 |
| }, |
| { |
| "epoch": 0.7263902932254803, |
| "grad_norm": 1.961858868598938, |
| "learning_rate": 1.917012795431665e-06, |
| "loss": 0.3552227020263672, |
| "memory(GiB)": 74.62, |
| "step": 898, |
| "token_acc": 0.8705882352941177, |
| "train_speed(iter/s)": 0.022544 |
| }, |
| { |
| "epoch": 0.7271991911021234, |
| "grad_norm": 2.2594661712646484, |
| "learning_rate": 1.9064991098186935e-06, |
| "loss": 0.42378872632980347, |
| "memory(GiB)": 74.62, |
| "step": 899, |
| "token_acc": 0.8449612403100775, |
| "train_speed(iter/s)": 0.022544 |
| }, |
| { |
| "epoch": 0.7280080889787665, |
| "grad_norm": 2.3480887413024902, |
| "learning_rate": 1.8960075385465547e-06, |
| "loss": 0.38160020112991333, |
| "memory(GiB)": 74.62, |
| "step": 900, |
| "token_acc": 0.8577235772357723, |
| "train_speed(iter/s)": 0.022545 |
| }, |
| { |
| "epoch": 0.7288169868554095, |
| "grad_norm": 2.0713682174682617, |
| "learning_rate": 1.8855381566157727e-06, |
| "loss": 0.3788355588912964, |
| "memory(GiB)": 74.62, |
| "step": 901, |
| "token_acc": 0.9140271493212669, |
| "train_speed(iter/s)": 0.022545 |
| }, |
| { |
| "epoch": 0.7296258847320526, |
| "grad_norm": 1.8822578191757202, |
| "learning_rate": 1.875091038868243e-06, |
| "loss": 0.38564032316207886, |
| "memory(GiB)": 74.62, |
| "step": 902, |
| "token_acc": 0.8618181818181818, |
| "train_speed(iter/s)": 0.022545 |
| }, |
| { |
| "epoch": 0.7304347826086957, |
| "grad_norm": 2.0705273151397705, |
| "learning_rate": 1.8646662599867072e-06, |
| "loss": 0.4137299060821533, |
| "memory(GiB)": 74.62, |
| "step": 903, |
| "token_acc": 0.8893617021276595, |
| "train_speed(iter/s)": 0.022545 |
| }, |
| { |
| "epoch": 0.7312436804853387, |
| "grad_norm": 2.7392282485961914, |
| "learning_rate": 1.8542638944942127e-06, |
| "loss": 0.41165363788604736, |
| "memory(GiB)": 74.62, |
| "step": 904, |
| "token_acc": 0.873015873015873, |
| "train_speed(iter/s)": 0.022546 |
| }, |
| { |
| "epoch": 0.7320525783619818, |
| "grad_norm": 2.251229763031006, |
| "learning_rate": 1.8438840167535826e-06, |
| "loss": 0.39759790897369385, |
| "memory(GiB)": 74.62, |
| "step": 905, |
| "token_acc": 0.8949416342412452, |
| "train_speed(iter/s)": 0.022546 |
| }, |
| { |
| "epoch": 0.7328614762386249, |
| "grad_norm": 2.1164135932922363, |
| "learning_rate": 1.8335267009668794e-06, |
| "loss": 0.36323827505111694, |
| "memory(GiB)": 74.62, |
| "step": 906, |
| "token_acc": 0.9142857142857143, |
| "train_speed(iter/s)": 0.022546 |
| }, |
| { |
| "epoch": 0.7336703741152679, |
| "grad_norm": 2.421180009841919, |
| "learning_rate": 1.8231920211748822e-06, |
| "loss": 0.35361167788505554, |
| "memory(GiB)": 74.62, |
| "step": 907, |
| "token_acc": 0.8603603603603603, |
| "train_speed(iter/s)": 0.022547 |
| }, |
| { |
| "epoch": 0.734479271991911, |
| "grad_norm": 2.0135669708251953, |
| "learning_rate": 1.8128800512565514e-06, |
| "loss": 0.37238454818725586, |
| "memory(GiB)": 74.62, |
| "step": 908, |
| "token_acc": 0.8442906574394463, |
| "train_speed(iter/s)": 0.022547 |
| }, |
| { |
| "epoch": 0.735288169868554, |
| "grad_norm": 3.3785688877105713, |
| "learning_rate": 1.8025908649285033e-06, |
| "loss": 0.41406381130218506, |
| "memory(GiB)": 74.62, |
| "step": 909, |
| "token_acc": 0.8631178707224335, |
| "train_speed(iter/s)": 0.022548 |
| }, |
| { |
| "epoch": 0.7360970677451971, |
| "grad_norm": 2.393422842025757, |
| "learning_rate": 1.7923245357444847e-06, |
| "loss": 0.3641640543937683, |
| "memory(GiB)": 74.62, |
| "step": 910, |
| "token_acc": 0.9076923076923077, |
| "train_speed(iter/s)": 0.022548 |
| }, |
| { |
| "epoch": 0.7369059656218403, |
| "grad_norm": 2.425569534301758, |
| "learning_rate": 1.7820811370948371e-06, |
| "loss": 0.35734257102012634, |
| "memory(GiB)": 74.62, |
| "step": 911, |
| "token_acc": 0.8491379310344828, |
| "train_speed(iter/s)": 0.022548 |
| }, |
| { |
| "epoch": 0.7377148634984834, |
| "grad_norm": 2.3215856552124023, |
| "learning_rate": 1.771860742205988e-06, |
| "loss": 0.4984077513217926, |
| "memory(GiB)": 74.62, |
| "step": 912, |
| "token_acc": 0.8333333333333334, |
| "train_speed(iter/s)": 0.022548 |
| }, |
| { |
| "epoch": 0.7385237613751264, |
| "grad_norm": 2.343384265899658, |
| "learning_rate": 1.7616634241399177e-06, |
| "loss": 0.3875213861465454, |
| "memory(GiB)": 74.62, |
| "step": 913, |
| "token_acc": 0.8376383763837638, |
| "train_speed(iter/s)": 0.022548 |
| }, |
| { |
| "epoch": 0.7393326592517695, |
| "grad_norm": 1.9467604160308838, |
| "learning_rate": 1.7514892557936309e-06, |
| "loss": 0.3730790615081787, |
| "memory(GiB)": 74.62, |
| "step": 914, |
| "token_acc": 0.8776223776223776, |
| "train_speed(iter/s)": 0.022549 |
| }, |
| { |
| "epoch": 0.7401415571284126, |
| "grad_norm": 2.171644687652588, |
| "learning_rate": 1.7413383098986563e-06, |
| "loss": 0.3576545715332031, |
| "memory(GiB)": 74.62, |
| "step": 915, |
| "token_acc": 0.8874172185430463, |
| "train_speed(iter/s)": 0.022549 |
| }, |
| { |
| "epoch": 0.7409504550050556, |
| "grad_norm": 2.274996519088745, |
| "learning_rate": 1.7312106590205014e-06, |
| "loss": 0.3864002227783203, |
| "memory(GiB)": 74.62, |
| "step": 916, |
| "token_acc": 0.9036144578313253, |
| "train_speed(iter/s)": 0.022549 |
| }, |
| { |
| "epoch": 0.7417593528816987, |
| "grad_norm": 1.9122254848480225, |
| "learning_rate": 1.7211063755581524e-06, |
| "loss": 0.36603063344955444, |
| "memory(GiB)": 74.62, |
| "step": 917, |
| "token_acc": 0.8798798798798799, |
| "train_speed(iter/s)": 0.022549 |
| }, |
| { |
| "epoch": 0.7425682507583418, |
| "grad_norm": 2.126805067062378, |
| "learning_rate": 1.7110255317435503e-06, |
| "loss": 0.38110482692718506, |
| "memory(GiB)": 74.62, |
| "step": 918, |
| "token_acc": 0.8325581395348837, |
| "train_speed(iter/s)": 0.02255 |
| }, |
| { |
| "epoch": 0.7433771486349848, |
| "grad_norm": 2.5423946380615234, |
| "learning_rate": 1.7009681996410693e-06, |
| "loss": 0.3204044699668884, |
| "memory(GiB)": 74.62, |
| "step": 919, |
| "token_acc": 0.8575757575757575, |
| "train_speed(iter/s)": 0.02255 |
| }, |
| { |
| "epoch": 0.7441860465116279, |
| "grad_norm": 2.0116817951202393, |
| "learning_rate": 1.6909344511470116e-06, |
| "loss": 0.3353678286075592, |
| "memory(GiB)": 74.62, |
| "step": 920, |
| "token_acc": 0.9169811320754717, |
| "train_speed(iter/s)": 0.02255 |
| }, |
| { |
| "epoch": 0.744994944388271, |
| "grad_norm": 2.245171546936035, |
| "learning_rate": 1.6809243579890865e-06, |
| "loss": 0.401151180267334, |
| "memory(GiB)": 74.62, |
| "step": 921, |
| "token_acc": 0.823321554770318, |
| "train_speed(iter/s)": 0.022551 |
| }, |
| { |
| "epoch": 0.745803842264914, |
| "grad_norm": 2.26719331741333, |
| "learning_rate": 1.6709379917259028e-06, |
| "loss": 0.4471093714237213, |
| "memory(GiB)": 74.62, |
| "step": 922, |
| "token_acc": 0.8719723183391004, |
| "train_speed(iter/s)": 0.022551 |
| }, |
| { |
| "epoch": 0.7466127401415571, |
| "grad_norm": 2.297231435775757, |
| "learning_rate": 1.6609754237464475e-06, |
| "loss": 0.4046187400817871, |
| "memory(GiB)": 74.62, |
| "step": 923, |
| "token_acc": 0.835, |
| "train_speed(iter/s)": 0.022551 |
| }, |
| { |
| "epoch": 0.7474216380182002, |
| "grad_norm": 2.375325918197632, |
| "learning_rate": 1.651036725269588e-06, |
| "loss": 0.3570983409881592, |
| "memory(GiB)": 74.62, |
| "step": 924, |
| "token_acc": 0.883177570093458, |
| "train_speed(iter/s)": 0.022552 |
| }, |
| { |
| "epoch": 0.7482305358948432, |
| "grad_norm": 1.6737430095672607, |
| "learning_rate": 1.6411219673435564e-06, |
| "loss": 0.33470356464385986, |
| "memory(GiB)": 74.62, |
| "step": 925, |
| "token_acc": 0.9057377049180327, |
| "train_speed(iter/s)": 0.022552 |
| }, |
| { |
| "epoch": 0.7490394337714863, |
| "grad_norm": 1.9846247434616089, |
| "learning_rate": 1.6312312208454373e-06, |
| "loss": 0.37246596813201904, |
| "memory(GiB)": 74.62, |
| "step": 926, |
| "token_acc": 0.8854166666666666, |
| "train_speed(iter/s)": 0.022552 |
| }, |
| { |
| "epoch": 0.7498483316481295, |
| "grad_norm": 1.810141921043396, |
| "learning_rate": 1.6213645564806751e-06, |
| "loss": 0.3770504593849182, |
| "memory(GiB)": 74.62, |
| "step": 927, |
| "token_acc": 0.9072164948453608, |
| "train_speed(iter/s)": 0.022553 |
| }, |
| { |
| "epoch": 0.7506572295247725, |
| "grad_norm": 2.10400128364563, |
| "learning_rate": 1.6115220447825503e-06, |
| "loss": 0.376004159450531, |
| "memory(GiB)": 74.62, |
| "step": 928, |
| "token_acc": 0.9196428571428571, |
| "train_speed(iter/s)": 0.022553 |
| }, |
| { |
| "epoch": 0.7514661274014156, |
| "grad_norm": 2.000704050064087, |
| "learning_rate": 1.6017037561116899e-06, |
| "loss": 0.3456549048423767, |
| "memory(GiB)": 74.62, |
| "step": 929, |
| "token_acc": 0.8565573770491803, |
| "train_speed(iter/s)": 0.022553 |
| }, |
| { |
| "epoch": 0.7522750252780587, |
| "grad_norm": 1.5539889335632324, |
| "learning_rate": 1.59190976065556e-06, |
| "loss": 0.33691200613975525, |
| "memory(GiB)": 74.62, |
| "step": 930, |
| "token_acc": 0.8848920863309353, |
| "train_speed(iter/s)": 0.022554 |
| }, |
| { |
| "epoch": 0.7530839231547017, |
| "grad_norm": 1.6070380210876465, |
| "learning_rate": 1.582140128427957e-06, |
| "loss": 0.39200344681739807, |
| "memory(GiB)": 74.62, |
| "step": 931, |
| "token_acc": 0.8892857142857142, |
| "train_speed(iter/s)": 0.022554 |
| }, |
| { |
| "epoch": 0.7538928210313448, |
| "grad_norm": 1.8517992496490479, |
| "learning_rate": 1.5723949292685193e-06, |
| "loss": 0.34315165877342224, |
| "memory(GiB)": 74.62, |
| "step": 932, |
| "token_acc": 0.8774193548387097, |
| "train_speed(iter/s)": 0.022554 |
| }, |
| { |
| "epoch": 0.7547017189079879, |
| "grad_norm": 2.0841267108917236, |
| "learning_rate": 1.5626742328422195e-06, |
| "loss": 0.3751834034919739, |
| "memory(GiB)": 74.62, |
| "step": 933, |
| "token_acc": 0.9234234234234234, |
| "train_speed(iter/s)": 0.022554 |
| }, |
| { |
| "epoch": 0.7555106167846309, |
| "grad_norm": 2.080343008041382, |
| "learning_rate": 1.552978108638869e-06, |
| "loss": 0.37340766191482544, |
| "memory(GiB)": 74.62, |
| "step": 934, |
| "token_acc": 0.9068627450980392, |
| "train_speed(iter/s)": 0.022555 |
| }, |
| { |
| "epoch": 0.756319514661274, |
| "grad_norm": 2.0687668323516846, |
| "learning_rate": 1.543306625972623e-06, |
| "loss": 0.4011552929878235, |
| "memory(GiB)": 74.62, |
| "step": 935, |
| "token_acc": 0.9128440366972477, |
| "train_speed(iter/s)": 0.022555 |
| }, |
| { |
| "epoch": 0.7571284125379171, |
| "grad_norm": 1.9438579082489014, |
| "learning_rate": 1.5336598539814784e-06, |
| "loss": 0.389544278383255, |
| "memory(GiB)": 74.62, |
| "step": 936, |
| "token_acc": 0.8618181818181818, |
| "train_speed(iter/s)": 0.022555 |
| }, |
| { |
| "epoch": 0.7579373104145601, |
| "grad_norm": 2.186204671859741, |
| "learning_rate": 1.5240378616267887e-06, |
| "loss": 0.34044280648231506, |
| "memory(GiB)": 74.62, |
| "step": 937, |
| "token_acc": 0.8879310344827587, |
| "train_speed(iter/s)": 0.022555 |
| }, |
| { |
| "epoch": 0.7587462082912032, |
| "grad_norm": 2.069333076477051, |
| "learning_rate": 1.514440717692765e-06, |
| "loss": 0.41251152753829956, |
| "memory(GiB)": 74.62, |
| "step": 938, |
| "token_acc": 0.8347107438016529, |
| "train_speed(iter/s)": 0.022556 |
| }, |
| { |
| "epoch": 0.7595551061678463, |
| "grad_norm": 1.9282809495925903, |
| "learning_rate": 1.5048684907859873e-06, |
| "loss": 0.4127691984176636, |
| "memory(GiB)": 74.62, |
| "step": 939, |
| "token_acc": 0.842443729903537, |
| "train_speed(iter/s)": 0.022556 |
| }, |
| { |
| "epoch": 0.7603640040444893, |
| "grad_norm": 2.28041410446167, |
| "learning_rate": 1.495321249334908e-06, |
| "loss": 0.42502281069755554, |
| "memory(GiB)": 74.62, |
| "step": 940, |
| "token_acc": 0.8642857142857143, |
| "train_speed(iter/s)": 0.022556 |
| }, |
| { |
| "epoch": 0.7611729019211324, |
| "grad_norm": 1.8921377658843994, |
| "learning_rate": 1.485799061589372e-06, |
| "loss": 0.4206182658672333, |
| "memory(GiB)": 74.62, |
| "step": 941, |
| "token_acc": 0.8811475409836066, |
| "train_speed(iter/s)": 0.022556 |
| }, |
| { |
| "epoch": 0.7619817997977756, |
| "grad_norm": 1.8928072452545166, |
| "learning_rate": 1.4763019956201252e-06, |
| "loss": 0.3889954090118408, |
| "memory(GiB)": 74.62, |
| "step": 942, |
| "token_acc": 0.8821752265861027, |
| "train_speed(iter/s)": 0.022556 |
| }, |
| { |
| "epoch": 0.7627906976744186, |
| "grad_norm": 3.128412961959839, |
| "learning_rate": 1.4668301193183198e-06, |
| "loss": 0.38851073384284973, |
| "memory(GiB)": 74.62, |
| "step": 943, |
| "token_acc": 0.8724137931034482, |
| "train_speed(iter/s)": 0.022556 |
| }, |
| { |
| "epoch": 0.7635995955510617, |
| "grad_norm": 1.9432473182678223, |
| "learning_rate": 1.4573835003950438e-06, |
| "loss": 0.38765308260917664, |
| "memory(GiB)": 74.62, |
| "step": 944, |
| "token_acc": 0.8867924528301887, |
| "train_speed(iter/s)": 0.022557 |
| }, |
| { |
| "epoch": 0.7644084934277048, |
| "grad_norm": 2.4022583961486816, |
| "learning_rate": 1.4479622063808242e-06, |
| "loss": 0.43059998750686646, |
| "memory(GiB)": 74.62, |
| "step": 945, |
| "token_acc": 0.8793774319066148, |
| "train_speed(iter/s)": 0.022557 |
| }, |
| { |
| "epoch": 0.7652173913043478, |
| "grad_norm": 2.3695461750030518, |
| "learning_rate": 1.4385663046251514e-06, |
| "loss": 0.4037495255470276, |
| "memory(GiB)": 74.62, |
| "step": 946, |
| "token_acc": 0.8772727272727273, |
| "train_speed(iter/s)": 0.022557 |
| }, |
| { |
| "epoch": 0.7660262891809909, |
| "grad_norm": 1.9513347148895264, |
| "learning_rate": 1.4291958622959972e-06, |
| "loss": 0.35621780157089233, |
| "memory(GiB)": 74.62, |
| "step": 947, |
| "token_acc": 0.9019607843137255, |
| "train_speed(iter/s)": 0.022557 |
| }, |
| { |
| "epoch": 0.766835187057634, |
| "grad_norm": 2.0191597938537598, |
| "learning_rate": 1.4198509463793275e-06, |
| "loss": 0.38198453187942505, |
| "memory(GiB)": 74.62, |
| "step": 948, |
| "token_acc": 0.8981132075471698, |
| "train_speed(iter/s)": 0.022558 |
| }, |
| { |
| "epoch": 0.767644084934277, |
| "grad_norm": 1.8823531866073608, |
| "learning_rate": 1.4105316236786332e-06, |
| "loss": 0.39389660954475403, |
| "memory(GiB)": 74.62, |
| "step": 949, |
| "token_acc": 0.8419689119170984, |
| "train_speed(iter/s)": 0.022558 |
| }, |
| { |
| "epoch": 0.7684529828109201, |
| "grad_norm": 2.254852771759033, |
| "learning_rate": 1.4012379608144477e-06, |
| "loss": 0.40055525302886963, |
| "memory(GiB)": 74.62, |
| "step": 950, |
| "token_acc": 0.8776371308016878, |
| "train_speed(iter/s)": 0.022558 |
| }, |
| { |
| "epoch": 0.7692618806875632, |
| "grad_norm": 2.2618825435638428, |
| "learning_rate": 1.3919700242238715e-06, |
| "loss": 0.4659748673439026, |
| "memory(GiB)": 74.62, |
| "step": 951, |
| "token_acc": 0.804635761589404, |
| "train_speed(iter/s)": 0.022559 |
| }, |
| { |
| "epoch": 0.7700707785642062, |
| "grad_norm": 1.884406328201294, |
| "learning_rate": 1.382727880160098e-06, |
| "loss": 0.34575021266937256, |
| "memory(GiB)": 74.62, |
| "step": 952, |
| "token_acc": 0.8618421052631579, |
| "train_speed(iter/s)": 0.022559 |
| }, |
| { |
| "epoch": 0.7708796764408493, |
| "grad_norm": 2.369433641433716, |
| "learning_rate": 1.3735115946919342e-06, |
| "loss": 0.35355186462402344, |
| "memory(GiB)": 74.62, |
| "step": 953, |
| "token_acc": 0.9132231404958677, |
| "train_speed(iter/s)": 0.022559 |
| }, |
| { |
| "epoch": 0.7716885743174924, |
| "grad_norm": 1.9989012479782104, |
| "learning_rate": 1.3643212337033396e-06, |
| "loss": 0.35935360193252563, |
| "memory(GiB)": 74.62, |
| "step": 954, |
| "token_acc": 0.8350877192982457, |
| "train_speed(iter/s)": 0.02256 |
| }, |
| { |
| "epoch": 0.7724974721941354, |
| "grad_norm": 2.4037156105041504, |
| "learning_rate": 1.3551568628929434e-06, |
| "loss": 0.41261640191078186, |
| "memory(GiB)": 74.62, |
| "step": 955, |
| "token_acc": 0.849112426035503, |
| "train_speed(iter/s)": 0.02256 |
| }, |
| { |
| "epoch": 0.7733063700707785, |
| "grad_norm": 3.044893741607666, |
| "learning_rate": 1.346018547773582e-06, |
| "loss": 0.46254298090934753, |
| "memory(GiB)": 74.62, |
| "step": 956, |
| "token_acc": 0.8521400778210116, |
| "train_speed(iter/s)": 0.02256 |
| }, |
| { |
| "epoch": 0.7741152679474217, |
| "grad_norm": 2.8486831188201904, |
| "learning_rate": 1.3369063536718347e-06, |
| "loss": 0.39035511016845703, |
| "memory(GiB)": 74.62, |
| "step": 957, |
| "token_acc": 0.8973509933774835, |
| "train_speed(iter/s)": 0.02256 |
| }, |
| { |
| "epoch": 0.7749241658240648, |
| "grad_norm": 2.8728833198547363, |
| "learning_rate": 1.3278203457275401e-06, |
| "loss": 0.4135955572128296, |
| "memory(GiB)": 74.62, |
| "step": 958, |
| "token_acc": 0.8804347826086957, |
| "train_speed(iter/s)": 0.02256 |
| }, |
| { |
| "epoch": 0.7757330637007078, |
| "grad_norm": 2.2483723163604736, |
| "learning_rate": 1.3187605888933508e-06, |
| "loss": 0.3800261616706848, |
| "memory(GiB)": 74.62, |
| "step": 959, |
| "token_acc": 0.8988095238095238, |
| "train_speed(iter/s)": 0.022561 |
| }, |
| { |
| "epoch": 0.7765419615773509, |
| "grad_norm": 2.3790223598480225, |
| "learning_rate": 1.3097271479342526e-06, |
| "loss": 0.4093528389930725, |
| "memory(GiB)": 74.62, |
| "step": 960, |
| "token_acc": 0.8419243986254296, |
| "train_speed(iter/s)": 0.022561 |
| }, |
| { |
| "epoch": 0.777350859453994, |
| "grad_norm": 2.5826141834259033, |
| "learning_rate": 1.3007200874271126e-06, |
| "loss": 0.30737096071243286, |
| "memory(GiB)": 74.62, |
| "step": 961, |
| "token_acc": 0.8328173374613003, |
| "train_speed(iter/s)": 0.022561 |
| }, |
| { |
| "epoch": 0.778159757330637, |
| "grad_norm": 1.8254023790359497, |
| "learning_rate": 1.2917394717602123e-06, |
| "loss": 0.3649098575115204, |
| "memory(GiB)": 74.62, |
| "step": 962, |
| "token_acc": 0.8901960784313725, |
| "train_speed(iter/s)": 0.022562 |
| }, |
| { |
| "epoch": 0.7789686552072801, |
| "grad_norm": 1.9518779516220093, |
| "learning_rate": 1.2827853651327883e-06, |
| "loss": 0.3445701599121094, |
| "memory(GiB)": 74.62, |
| "step": 963, |
| "token_acc": 0.87890625, |
| "train_speed(iter/s)": 0.022562 |
| }, |
| { |
| "epoch": 0.7797775530839232, |
| "grad_norm": 2.0577752590179443, |
| "learning_rate": 1.2738578315545751e-06, |
| "loss": 0.3813546299934387, |
| "memory(GiB)": 74.62, |
| "step": 964, |
| "token_acc": 0.8389830508474576, |
| "train_speed(iter/s)": 0.022562 |
| }, |
| { |
| "epoch": 0.7805864509605662, |
| "grad_norm": 2.2759878635406494, |
| "learning_rate": 1.2649569348453416e-06, |
| "loss": 0.4146482050418854, |
| "memory(GiB)": 74.62, |
| "step": 965, |
| "token_acc": 0.8581314878892734, |
| "train_speed(iter/s)": 0.022562 |
| }, |
| { |
| "epoch": 0.7813953488372093, |
| "grad_norm": 2.162762403488159, |
| "learning_rate": 1.2560827386344444e-06, |
| "loss": 0.43926411867141724, |
| "memory(GiB)": 74.62, |
| "step": 966, |
| "token_acc": 0.908256880733945, |
| "train_speed(iter/s)": 0.022563 |
| }, |
| { |
| "epoch": 0.7822042467138524, |
| "grad_norm": 2.6653337478637695, |
| "learning_rate": 1.2472353063603626e-06, |
| "loss": 0.3915598690509796, |
| "memory(GiB)": 74.62, |
| "step": 967, |
| "token_acc": 0.8888888888888888, |
| "train_speed(iter/s)": 0.022563 |
| }, |
| { |
| "epoch": 0.7830131445904954, |
| "grad_norm": 1.9902511835098267, |
| "learning_rate": 1.238414701270252e-06, |
| "loss": 0.3581811189651489, |
| "memory(GiB)": 74.62, |
| "step": 968, |
| "token_acc": 0.8584615384615385, |
| "train_speed(iter/s)": 0.022563 |
| }, |
| { |
| "epoch": 0.7838220424671385, |
| "grad_norm": 2.0768163204193115, |
| "learning_rate": 1.229620986419494e-06, |
| "loss": 0.40156054496765137, |
| "memory(GiB)": 74.62, |
| "step": 969, |
| "token_acc": 0.8660436137071651, |
| "train_speed(iter/s)": 0.022563 |
| }, |
| { |
| "epoch": 0.7846309403437816, |
| "grad_norm": 2.0157761573791504, |
| "learning_rate": 1.2208542246712346e-06, |
| "loss": 0.3723048269748688, |
| "memory(GiB)": 74.62, |
| "step": 970, |
| "token_acc": 0.9051724137931034, |
| "train_speed(iter/s)": 0.022564 |
| }, |
| { |
| "epoch": 0.7854398382204246, |
| "grad_norm": 2.2510571479797363, |
| "learning_rate": 1.2121144786959466e-06, |
| "loss": 0.39407879114151, |
| "memory(GiB)": 74.62, |
| "step": 971, |
| "token_acc": 0.8828125, |
| "train_speed(iter/s)": 0.022564 |
| }, |
| { |
| "epoch": 0.7862487360970677, |
| "grad_norm": 1.9419714212417603, |
| "learning_rate": 1.2034018109709716e-06, |
| "loss": 0.3809299170970917, |
| "memory(GiB)": 74.62, |
| "step": 972, |
| "token_acc": 0.8663967611336032, |
| "train_speed(iter/s)": 0.022564 |
| }, |
| { |
| "epoch": 0.7870576339737109, |
| "grad_norm": 2.204801321029663, |
| "learning_rate": 1.1947162837800842e-06, |
| "loss": 0.41355523467063904, |
| "memory(GiB)": 74.62, |
| "step": 973, |
| "token_acc": 0.8683274021352313, |
| "train_speed(iter/s)": 0.022565 |
| }, |
| { |
| "epoch": 0.7878665318503539, |
| "grad_norm": 2.461207866668701, |
| "learning_rate": 1.1860579592130366e-06, |
| "loss": 0.407459557056427, |
| "memory(GiB)": 74.62, |
| "step": 974, |
| "token_acc": 0.8583333333333333, |
| "train_speed(iter/s)": 0.022565 |
| }, |
| { |
| "epoch": 0.788675429726997, |
| "grad_norm": 1.8681888580322266, |
| "learning_rate": 1.177426899165121e-06, |
| "loss": 0.33745524287223816, |
| "memory(GiB)": 74.62, |
| "step": 975, |
| "token_acc": 0.9018691588785047, |
| "train_speed(iter/s)": 0.022565 |
| }, |
| { |
| "epoch": 0.7894843276036401, |
| "grad_norm": 1.9317001104354858, |
| "learning_rate": 1.1688231653367271e-06, |
| "loss": 0.36072519421577454, |
| "memory(GiB)": 74.62, |
| "step": 976, |
| "token_acc": 0.8922413793103449, |
| "train_speed(iter/s)": 0.022565 |
| }, |
| { |
| "epoch": 0.7902932254802831, |
| "grad_norm": 1.4545793533325195, |
| "learning_rate": 1.1602468192328936e-06, |
| "loss": 0.3215617537498474, |
| "memory(GiB)": 74.62, |
| "step": 977, |
| "token_acc": 0.9003436426116839, |
| "train_speed(iter/s)": 0.022566 |
| }, |
| { |
| "epoch": 0.7911021233569262, |
| "grad_norm": 2.098681926727295, |
| "learning_rate": 1.1516979221628804e-06, |
| "loss": 0.36492764949798584, |
| "memory(GiB)": 74.62, |
| "step": 978, |
| "token_acc": 0.8724489795918368, |
| "train_speed(iter/s)": 0.022566 |
| }, |
| { |
| "epoch": 0.7919110212335693, |
| "grad_norm": 1.852514386177063, |
| "learning_rate": 1.1431765352397167e-06, |
| "loss": 0.3920031785964966, |
| "memory(GiB)": 74.62, |
| "step": 979, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022566 |
| }, |
| { |
| "epoch": 0.7927199191102123, |
| "grad_norm": 2.011186122894287, |
| "learning_rate": 1.13468271937978e-06, |
| "loss": 0.3568735718727112, |
| "memory(GiB)": 74.62, |
| "step": 980, |
| "token_acc": 0.9057971014492754, |
| "train_speed(iter/s)": 0.022566 |
| }, |
| { |
| "epoch": 0.7935288169868554, |
| "grad_norm": 2.8331449031829834, |
| "learning_rate": 1.1262165353023474e-06, |
| "loss": 0.3684077560901642, |
| "memory(GiB)": 74.62, |
| "step": 981, |
| "token_acc": 0.8900343642611683, |
| "train_speed(iter/s)": 0.022567 |
| }, |
| { |
| "epoch": 0.7943377148634985, |
| "grad_norm": 2.3888087272644043, |
| "learning_rate": 1.1177780435291641e-06, |
| "loss": 0.3318890929222107, |
| "memory(GiB)": 74.62, |
| "step": 982, |
| "token_acc": 0.8755760368663594, |
| "train_speed(iter/s)": 0.022567 |
| }, |
| { |
| "epoch": 0.7951466127401415, |
| "grad_norm": 1.8067930936813354, |
| "learning_rate": 1.1093673043840182e-06, |
| "loss": 0.32926511764526367, |
| "memory(GiB)": 74.62, |
| "step": 983, |
| "token_acc": 0.8690095846645367, |
| "train_speed(iter/s)": 0.022567 |
| }, |
| { |
| "epoch": 0.7959555106167846, |
| "grad_norm": 2.238401412963867, |
| "learning_rate": 1.100984377992298e-06, |
| "loss": 0.39484211802482605, |
| "memory(GiB)": 74.62, |
| "step": 984, |
| "token_acc": 0.8741496598639455, |
| "train_speed(iter/s)": 0.022567 |
| }, |
| { |
| "epoch": 0.7967644084934277, |
| "grad_norm": 2.389265298843384, |
| "learning_rate": 1.0926293242805735e-06, |
| "loss": 0.45714324712753296, |
| "memory(GiB)": 74.62, |
| "step": 985, |
| "token_acc": 0.8478260869565217, |
| "train_speed(iter/s)": 0.022568 |
| }, |
| { |
| "epoch": 0.7975733063700707, |
| "grad_norm": 2.173175811767578, |
| "learning_rate": 1.0843022029761596e-06, |
| "loss": 0.37196797132492065, |
| "memory(GiB)": 74.62, |
| "step": 986, |
| "token_acc": 0.8589211618257261, |
| "train_speed(iter/s)": 0.022568 |
| }, |
| { |
| "epoch": 0.7983822042467138, |
| "grad_norm": 2.0259294509887695, |
| "learning_rate": 1.0760030736066952e-06, |
| "loss": 0.44243717193603516, |
| "memory(GiB)": 74.62, |
| "step": 987, |
| "token_acc": 0.8244274809160306, |
| "train_speed(iter/s)": 0.022568 |
| }, |
| { |
| "epoch": 0.799191102123357, |
| "grad_norm": 2.151653528213501, |
| "learning_rate": 1.0677319954997129e-06, |
| "loss": 0.39491477608680725, |
| "memory(GiB)": 74.62, |
| "step": 988, |
| "token_acc": 0.9015544041450777, |
| "train_speed(iter/s)": 0.022568 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.1169228553771973, |
| "learning_rate": 1.0594890277822151e-06, |
| "loss": 0.3383401334285736, |
| "memory(GiB)": 74.62, |
| "step": 989, |
| "token_acc": 0.8840125391849529, |
| "train_speed(iter/s)": 0.022569 |
| }, |
| { |
| "epoch": 0.8008088978766431, |
| "grad_norm": 2.4547696113586426, |
| "learning_rate": 1.0512742293802558e-06, |
| "loss": 0.38963425159454346, |
| "memory(GiB)": 74.62, |
| "step": 990, |
| "token_acc": 0.8666666666666667, |
| "train_speed(iter/s)": 0.022569 |
| }, |
| { |
| "epoch": 0.8016177957532862, |
| "grad_norm": 1.8448153734207153, |
| "learning_rate": 1.0430876590185162e-06, |
| "loss": 0.36352628469467163, |
| "memory(GiB)": 74.62, |
| "step": 991, |
| "token_acc": 0.92, |
| "train_speed(iter/s)": 0.022569 |
| }, |
| { |
| "epoch": 0.8024266936299292, |
| "grad_norm": 1.883742094039917, |
| "learning_rate": 1.0349293752198842e-06, |
| "loss": 0.37957262992858887, |
| "memory(GiB)": 74.62, |
| "step": 992, |
| "token_acc": 0.887240356083086, |
| "train_speed(iter/s)": 0.022569 |
| }, |
| { |
| "epoch": 0.8032355915065723, |
| "grad_norm": 2.0374629497528076, |
| "learning_rate": 1.0267994363050387e-06, |
| "loss": 0.3739085793495178, |
| "memory(GiB)": 74.62, |
| "step": 993, |
| "token_acc": 0.834061135371179, |
| "train_speed(iter/s)": 0.02257 |
| }, |
| { |
| "epoch": 0.8040444893832154, |
| "grad_norm": 2.806663751602173, |
| "learning_rate": 1.0186979003920273e-06, |
| "loss": 0.31939688324928284, |
| "memory(GiB)": 74.62, |
| "step": 994, |
| "token_acc": 0.8784313725490196, |
| "train_speed(iter/s)": 0.02257 |
| }, |
| { |
| "epoch": 0.8048533872598584, |
| "grad_norm": 2.3647608757019043, |
| "learning_rate": 1.0106248253958607e-06, |
| "loss": 0.37592533230781555, |
| "memory(GiB)": 74.62, |
| "step": 995, |
| "token_acc": 0.900355871886121, |
| "train_speed(iter/s)": 0.02257 |
| }, |
| { |
| "epoch": 0.8056622851365015, |
| "grad_norm": 2.112464427947998, |
| "learning_rate": 1.0025802690280851e-06, |
| "loss": 0.3363335132598877, |
| "memory(GiB)": 74.62, |
| "step": 996, |
| "token_acc": 0.9078014184397163, |
| "train_speed(iter/s)": 0.02257 |
| }, |
| { |
| "epoch": 0.8064711830131446, |
| "grad_norm": 2.177457809448242, |
| "learning_rate": 9.945642887963842e-07, |
| "loss": 0.38282421231269836, |
| "memory(GiB)": 74.62, |
| "step": 997, |
| "token_acc": 0.8685446009389671, |
| "train_speed(iter/s)": 0.022571 |
| }, |
| { |
| "epoch": 0.8072800808897876, |
| "grad_norm": 2.463026523590088, |
| "learning_rate": 9.86576942004156e-07, |
| "loss": 0.3649854063987732, |
| "memory(GiB)": 74.62, |
| "step": 998, |
| "token_acc": 0.8651685393258427, |
| "train_speed(iter/s)": 0.022571 |
| }, |
| { |
| "epoch": 0.8080889787664307, |
| "grad_norm": 2.1493732929229736, |
| "learning_rate": 9.78618285750112e-07, |
| "loss": 0.4093163013458252, |
| "memory(GiB)": 74.62, |
| "step": 999, |
| "token_acc": 0.8106060606060606, |
| "train_speed(iter/s)": 0.022571 |
| }, |
| { |
| "epoch": 0.8088978766430738, |
| "grad_norm": 1.8683381080627441, |
| "learning_rate": 9.70688376927864e-07, |
| "loss": 0.3501003682613373, |
| "memory(GiB)": 74.62, |
| "step": 1000, |
| "token_acc": 0.8641975308641975, |
| "train_speed(iter/s)": 0.022571 |
| }, |
| { |
| "epoch": 0.8088978766430738, |
| "eval_loss": 0.36755362153053284, |
| "eval_runtime": 428.8026, |
| "eval_samples_per_second": 3.727, |
| "eval_steps_per_second": 0.117, |
| "eval_token_acc": 0.8743528175883545, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8097067745197168, |
| "grad_norm": 6.642233371734619, |
| "learning_rate": 9.627872722255154e-07, |
| "loss": 0.3149925470352173, |
| "memory(GiB)": 74.62, |
| "step": 1001, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022354 |
| }, |
| { |
| "epoch": 0.8105156723963599, |
| "grad_norm": 2.2048041820526123, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.4250641167163849, |
| "memory(GiB)": 74.62, |
| "step": 1002, |
| "token_acc": 0.8754448398576512, |
| "train_speed(iter/s)": 0.022354 |
| }, |
| { |
| "epoch": 0.8113245702730031, |
| "grad_norm": 1.8660839796066284, |
| "learning_rate": 9.470717009029889e-07, |
| "loss": 0.32009008526802063, |
| "memory(GiB)": 74.62, |
| "step": 1003, |
| "token_acc": 0.89, |
| "train_speed(iter/s)": 0.022354 |
| }, |
| { |
| "epoch": 0.8121334681496462, |
| "grad_norm": 1.9054193496704102, |
| "learning_rate": 9.39257346627857e-07, |
| "loss": 0.3357139825820923, |
| "memory(GiB)": 74.62, |
| "step": 1004, |
| "token_acc": 0.9012875536480687, |
| "train_speed(iter/s)": 0.022355 |
| }, |
| { |
| "epoch": 0.8129423660262892, |
| "grad_norm": 3.9081051349639893, |
| "learning_rate": 9.314720211619166e-07, |
| "loss": 0.38648778200149536, |
| "memory(GiB)": 74.62, |
| "step": 1005, |
| "token_acc": 0.9029850746268657, |
| "train_speed(iter/s)": 0.022355 |
| }, |
| { |
| "epoch": 0.8137512639029323, |
| "grad_norm": 2.1040167808532715, |
| "learning_rate": 9.237157801596958e-07, |
| "loss": 0.3301439881324768, |
| "memory(GiB)": 74.62, |
| "step": 1006, |
| "token_acc": 0.8925925925925926, |
| "train_speed(iter/s)": 0.022356 |
| }, |
| { |
| "epoch": 0.8145601617795754, |
| "grad_norm": 1.6679681539535522, |
| "learning_rate": 9.159886790678124e-07, |
| "loss": 0.37634193897247314, |
| "memory(GiB)": 74.62, |
| "step": 1007, |
| "token_acc": 0.8092485549132948, |
| "train_speed(iter/s)": 0.022356 |
| }, |
| { |
| "epoch": 0.8153690596562184, |
| "grad_norm": 2.3380022048950195, |
| "learning_rate": 9.082907731245733e-07, |
| "loss": 0.4119229316711426, |
| "memory(GiB)": 74.62, |
| "step": 1008, |
| "token_acc": 0.8262548262548263, |
| "train_speed(iter/s)": 0.022357 |
| }, |
| { |
| "epoch": 0.8161779575328615, |
| "grad_norm": 1.9643757343292236, |
| "learning_rate": 9.006221173595741e-07, |
| "loss": 0.355658620595932, |
| "memory(GiB)": 74.62, |
| "step": 1009, |
| "token_acc": 0.926530612244898, |
| "train_speed(iter/s)": 0.022357 |
| }, |
| { |
| "epoch": 0.8169868554095046, |
| "grad_norm": 1.6694306135177612, |
| "learning_rate": 8.929827665933211e-07, |
| "loss": 0.3310469388961792, |
| "memory(GiB)": 74.62, |
| "step": 1010, |
| "token_acc": 0.9036697247706422, |
| "train_speed(iter/s)": 0.022358 |
| }, |
| { |
| "epoch": 0.8177957532861476, |
| "grad_norm": 3.9332058429718018, |
| "learning_rate": 8.853727754368191e-07, |
| "loss": 0.3335992693901062, |
| "memory(GiB)": 74.62, |
| "step": 1011, |
| "token_acc": 0.9045226130653267, |
| "train_speed(iter/s)": 0.022358 |
| }, |
| { |
| "epoch": 0.8186046511627907, |
| "grad_norm": 2.0935213565826416, |
| "learning_rate": 8.777921982911996e-07, |
| "loss": 0.3944769501686096, |
| "memory(GiB)": 74.62, |
| "step": 1012, |
| "token_acc": 0.8847583643122676, |
| "train_speed(iter/s)": 0.022359 |
| }, |
| { |
| "epoch": 0.8194135490394338, |
| "grad_norm": 1.8062115907669067, |
| "learning_rate": 8.702410893473173e-07, |
| "loss": 0.3291887938976288, |
| "memory(GiB)": 74.62, |
| "step": 1013, |
| "token_acc": 0.8565217391304348, |
| "train_speed(iter/s)": 0.022359 |
| }, |
| { |
| "epoch": 0.8202224469160768, |
| "grad_norm": 2.1609699726104736, |
| "learning_rate": 8.627195025853735e-07, |
| "loss": 0.2895755469799042, |
| "memory(GiB)": 74.62, |
| "step": 1014, |
| "token_acc": 0.9145299145299145, |
| "train_speed(iter/s)": 0.02236 |
| }, |
| { |
| "epoch": 0.8210313447927199, |
| "grad_norm": 2.0408060550689697, |
| "learning_rate": 8.552274917745246e-07, |
| "loss": 0.3750014901161194, |
| "memory(GiB)": 74.62, |
| "step": 1015, |
| "token_acc": 0.8930817610062893, |
| "train_speed(iter/s)": 0.02236 |
| }, |
| { |
| "epoch": 0.821840242669363, |
| "grad_norm": 2.2596545219421387, |
| "learning_rate": 8.477651104724994e-07, |
| "loss": 0.3800932466983795, |
| "memory(GiB)": 74.62, |
| "step": 1016, |
| "token_acc": 0.8439306358381503, |
| "train_speed(iter/s)": 0.022361 |
| }, |
| { |
| "epoch": 0.822649140546006, |
| "grad_norm": 1.547613263130188, |
| "learning_rate": 8.40332412025216e-07, |
| "loss": 0.3251078128814697, |
| "memory(GiB)": 74.62, |
| "step": 1017, |
| "token_acc": 0.9111969111969112, |
| "train_speed(iter/s)": 0.022361 |
| }, |
| { |
| "epoch": 0.8234580384226491, |
| "grad_norm": 2.1521153450012207, |
| "learning_rate": 8.329294495663981e-07, |
| "loss": 0.38296395540237427, |
| "memory(GiB)": 74.62, |
| "step": 1018, |
| "token_acc": 0.8782894736842105, |
| "train_speed(iter/s)": 0.022361 |
| }, |
| { |
| "epoch": 0.8242669362992923, |
| "grad_norm": 2.0719387531280518, |
| "learning_rate": 8.255562760172004e-07, |
| "loss": 0.3523367643356323, |
| "memory(GiB)": 74.62, |
| "step": 1019, |
| "token_acc": 0.8269896193771626, |
| "train_speed(iter/s)": 0.022362 |
| }, |
| { |
| "epoch": 0.8250758341759353, |
| "grad_norm": 2.2503058910369873, |
| "learning_rate": 8.18212944085826e-07, |
| "loss": 0.37082982063293457, |
| "memory(GiB)": 74.62, |
| "step": 1020, |
| "token_acc": 0.8984126984126984, |
| "train_speed(iter/s)": 0.022362 |
| }, |
| { |
| "epoch": 0.8258847320525784, |
| "grad_norm": 2.572887659072876, |
| "learning_rate": 8.108995062671482e-07, |
| "loss": 0.44092637300491333, |
| "memory(GiB)": 74.62, |
| "step": 1021, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022363 |
| }, |
| { |
| "epoch": 0.8266936299292215, |
| "grad_norm": 2.121467113494873, |
| "learning_rate": 8.036160148423449e-07, |
| "loss": 0.3986678123474121, |
| "memory(GiB)": 74.62, |
| "step": 1022, |
| "token_acc": 0.9176470588235294, |
| "train_speed(iter/s)": 0.022363 |
| }, |
| { |
| "epoch": 0.8275025278058645, |
| "grad_norm": 1.8472903966903687, |
| "learning_rate": 7.963625218785099e-07, |
| "loss": 0.35733747482299805, |
| "memory(GiB)": 74.62, |
| "step": 1023, |
| "token_acc": 0.9130434782608695, |
| "train_speed(iter/s)": 0.022364 |
| }, |
| { |
| "epoch": 0.8283114256825076, |
| "grad_norm": 1.9388898611068726, |
| "learning_rate": 7.891390792282927e-07, |
| "loss": 0.2967044711112976, |
| "memory(GiB)": 74.62, |
| "step": 1024, |
| "token_acc": 0.9016393442622951, |
| "train_speed(iter/s)": 0.022364 |
| }, |
| { |
| "epoch": 0.8291203235591507, |
| "grad_norm": 1.7470604181289673, |
| "learning_rate": 7.819457385295254e-07, |
| "loss": 0.31166231632232666, |
| "memory(GiB)": 74.62, |
| "step": 1025, |
| "token_acc": 0.9090909090909091, |
| "train_speed(iter/s)": 0.022364 |
| }, |
| { |
| "epoch": 0.8299292214357937, |
| "grad_norm": 2.2927539348602295, |
| "learning_rate": 7.747825512048462e-07, |
| "loss": 0.3713032007217407, |
| "memory(GiB)": 74.62, |
| "step": 1026, |
| "token_acc": 0.8540540540540541, |
| "train_speed(iter/s)": 0.022365 |
| }, |
| { |
| "epoch": 0.8307381193124368, |
| "grad_norm": 2.0093610286712646, |
| "learning_rate": 7.676495684613433e-07, |
| "loss": 0.3384319543838501, |
| "memory(GiB)": 74.62, |
| "step": 1027, |
| "token_acc": 0.9325842696629213, |
| "train_speed(iter/s)": 0.022365 |
| }, |
| { |
| "epoch": 0.8315470171890799, |
| "grad_norm": 1.9166637659072876, |
| "learning_rate": 7.605468412901801e-07, |
| "loss": 0.4422561824321747, |
| "memory(GiB)": 74.62, |
| "step": 1028, |
| "token_acc": 0.8424657534246576, |
| "train_speed(iter/s)": 0.022366 |
| }, |
| { |
| "epoch": 0.8323559150657229, |
| "grad_norm": 2.4499292373657227, |
| "learning_rate": 7.534744204662348e-07, |
| "loss": 0.42556819319725037, |
| "memory(GiB)": 74.62, |
| "step": 1029, |
| "token_acc": 0.8181818181818182, |
| "train_speed(iter/s)": 0.022366 |
| }, |
| { |
| "epoch": 0.833164812942366, |
| "grad_norm": 2.4436521530151367, |
| "learning_rate": 7.464323565477372e-07, |
| "loss": 0.46478235721588135, |
| "memory(GiB)": 74.62, |
| "step": 1030, |
| "token_acc": 0.8811881188118812, |
| "train_speed(iter/s)": 0.022367 |
| }, |
| { |
| "epoch": 0.8339737108190091, |
| "grad_norm": 1.8678390979766846, |
| "learning_rate": 7.394206998759013e-07, |
| "loss": 0.34241783618927, |
| "memory(GiB)": 74.62, |
| "step": 1031, |
| "token_acc": 0.8908450704225352, |
| "train_speed(iter/s)": 0.022367 |
| }, |
| { |
| "epoch": 0.8347826086956521, |
| "grad_norm": 2.002629041671753, |
| "learning_rate": 7.324395005745772e-07, |
| "loss": 0.3532907962799072, |
| "memory(GiB)": 74.62, |
| "step": 1032, |
| "token_acc": 0.8291814946619217, |
| "train_speed(iter/s)": 0.022368 |
| }, |
| { |
| "epoch": 0.8355915065722952, |
| "grad_norm": 2.4754257202148438, |
| "learning_rate": 7.254888085498812e-07, |
| "loss": 0.39124253392219543, |
| "memory(GiB)": 74.62, |
| "step": 1033, |
| "token_acc": 0.8664122137404581, |
| "train_speed(iter/s)": 0.022368 |
| }, |
| { |
| "epoch": 0.8364004044489384, |
| "grad_norm": 2.009551763534546, |
| "learning_rate": 7.185686734898478e-07, |
| "loss": 0.3519361913204193, |
| "memory(GiB)": 74.62, |
| "step": 1034, |
| "token_acc": 0.8725490196078431, |
| "train_speed(iter/s)": 0.022368 |
| }, |
| { |
| "epoch": 0.8372093023255814, |
| "grad_norm": 2.077303886413574, |
| "learning_rate": 7.116791448640664e-07, |
| "loss": 0.3848615884780884, |
| "memory(GiB)": 74.62, |
| "step": 1035, |
| "token_acc": 0.8981481481481481, |
| "train_speed(iter/s)": 0.022369 |
| }, |
| { |
| "epoch": 0.8380182002022245, |
| "grad_norm": 1.8623238801956177, |
| "learning_rate": 7.048202719233344e-07, |
| "loss": 0.3747529983520508, |
| "memory(GiB)": 74.62, |
| "step": 1036, |
| "token_acc": 0.9141914191419142, |
| "train_speed(iter/s)": 0.022369 |
| }, |
| { |
| "epoch": 0.8388270980788676, |
| "grad_norm": 1.8672590255737305, |
| "learning_rate": 6.979921036993042e-07, |
| "loss": 0.3627777099609375, |
| "memory(GiB)": 74.62, |
| "step": 1037, |
| "token_acc": 0.8225352112676056, |
| "train_speed(iter/s)": 0.02237 |
| }, |
| { |
| "epoch": 0.8396359959555106, |
| "grad_norm": 2.0797042846679688, |
| "learning_rate": 6.911946890041254e-07, |
| "loss": 0.4054332375526428, |
| "memory(GiB)": 74.62, |
| "step": 1038, |
| "token_acc": 0.8860759493670886, |
| "train_speed(iter/s)": 0.02237 |
| }, |
| { |
| "epoch": 0.8404448938321537, |
| "grad_norm": 2.2241296768188477, |
| "learning_rate": 6.844280764301075e-07, |
| "loss": 0.33668115735054016, |
| "memory(GiB)": 74.62, |
| "step": 1039, |
| "token_acc": 0.9083665338645418, |
| "train_speed(iter/s)": 0.022371 |
| }, |
| { |
| "epoch": 0.8412537917087968, |
| "grad_norm": 1.7550405263900757, |
| "learning_rate": 6.776923143493636e-07, |
| "loss": 0.3522379696369171, |
| "memory(GiB)": 74.62, |
| "step": 1040, |
| "token_acc": 0.8508771929824561, |
| "train_speed(iter/s)": 0.022371 |
| }, |
| { |
| "epoch": 0.8420626895854398, |
| "grad_norm": 1.8860352039337158, |
| "learning_rate": 6.709874509134684e-07, |
| "loss": 0.4433209300041199, |
| "memory(GiB)": 74.62, |
| "step": 1041, |
| "token_acc": 0.8392282958199357, |
| "train_speed(iter/s)": 0.022371 |
| }, |
| { |
| "epoch": 0.8428715874620829, |
| "grad_norm": 2.263840913772583, |
| "learning_rate": 6.643135340531137e-07, |
| "loss": 0.3951689302921295, |
| "memory(GiB)": 74.62, |
| "step": 1042, |
| "token_acc": 0.8928571428571429, |
| "train_speed(iter/s)": 0.022372 |
| }, |
| { |
| "epoch": 0.843680485338726, |
| "grad_norm": 2.3143765926361084, |
| "learning_rate": 6.576706114777626e-07, |
| "loss": 0.39435216784477234, |
| "memory(GiB)": 74.62, |
| "step": 1043, |
| "token_acc": 0.8184523809523809, |
| "train_speed(iter/s)": 0.022372 |
| }, |
| { |
| "epoch": 0.844489383215369, |
| "grad_norm": 2.4204423427581787, |
| "learning_rate": 6.510587306753135e-07, |
| "loss": 0.38613927364349365, |
| "memory(GiB)": 74.62, |
| "step": 1044, |
| "token_acc": 0.8405797101449275, |
| "train_speed(iter/s)": 0.022373 |
| }, |
| { |
| "epoch": 0.8452982810920121, |
| "grad_norm": 1.9565153121948242, |
| "learning_rate": 6.444779389117579e-07, |
| "loss": 0.3638315498828888, |
| "memory(GiB)": 74.62, |
| "step": 1045, |
| "token_acc": 0.8671328671328671, |
| "train_speed(iter/s)": 0.022373 |
| }, |
| { |
| "epoch": 0.8461071789686552, |
| "grad_norm": 1.82338547706604, |
| "learning_rate": 6.379282832308414e-07, |
| "loss": 0.3477684557437897, |
| "memory(GiB)": 74.62, |
| "step": 1046, |
| "token_acc": 0.8731343283582089, |
| "train_speed(iter/s)": 0.022374 |
| }, |
| { |
| "epoch": 0.8469160768452982, |
| "grad_norm": 2.053645610809326, |
| "learning_rate": 6.314098104537325e-07, |
| "loss": 0.359966516494751, |
| "memory(GiB)": 74.62, |
| "step": 1047, |
| "token_acc": 0.84, |
| "train_speed(iter/s)": 0.022374 |
| }, |
| { |
| "epoch": 0.8477249747219413, |
| "grad_norm": 2.145159959793091, |
| "learning_rate": 6.249225671786785e-07, |
| "loss": 0.3331785202026367, |
| "memory(GiB)": 74.62, |
| "step": 1048, |
| "token_acc": 0.9153846153846154, |
| "train_speed(iter/s)": 0.022375 |
| }, |
| { |
| "epoch": 0.8485338725985845, |
| "grad_norm": 2.2616126537323, |
| "learning_rate": 6.184665997806832e-07, |
| "loss": 0.3494233191013336, |
| "memory(GiB)": 74.62, |
| "step": 1049, |
| "token_acc": 0.8663594470046083, |
| "train_speed(iter/s)": 0.022375 |
| }, |
| { |
| "epoch": 0.8493427704752275, |
| "grad_norm": 2.032336711883545, |
| "learning_rate": 6.120419544111655e-07, |
| "loss": 0.35964176058769226, |
| "memory(GiB)": 74.62, |
| "step": 1050, |
| "token_acc": 0.893687707641196, |
| "train_speed(iter/s)": 0.022375 |
| }, |
| { |
| "epoch": 0.8501516683518706, |
| "grad_norm": 1.9737732410430908, |
| "learning_rate": 6.056486769976388e-07, |
| "loss": 0.37345680594444275, |
| "memory(GiB)": 74.62, |
| "step": 1051, |
| "token_acc": 0.8767123287671232, |
| "train_speed(iter/s)": 0.022376 |
| }, |
| { |
| "epoch": 0.8509605662285137, |
| "grad_norm": 3.4677176475524902, |
| "learning_rate": 5.992868132433755e-07, |
| "loss": 0.3770935535430908, |
| "memory(GiB)": 74.62, |
| "step": 1052, |
| "token_acc": 0.8894230769230769, |
| "train_speed(iter/s)": 0.022376 |
| }, |
| { |
| "epoch": 0.8517694641051567, |
| "grad_norm": 2.0082759857177734, |
| "learning_rate": 5.929564086270834e-07, |
| "loss": 0.40682828426361084, |
| "memory(GiB)": 74.62, |
| "step": 1053, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022376 |
| }, |
| { |
| "epoch": 0.8525783619817998, |
| "grad_norm": 1.6112618446350098, |
| "learning_rate": 5.866575084025816e-07, |
| "loss": 0.3442041873931885, |
| "memory(GiB)": 74.62, |
| "step": 1054, |
| "token_acc": 0.86, |
| "train_speed(iter/s)": 0.022377 |
| }, |
| { |
| "epoch": 0.8533872598584429, |
| "grad_norm": 2.1978917121887207, |
| "learning_rate": 5.803901575984721e-07, |
| "loss": 0.37148886919021606, |
| "memory(GiB)": 74.62, |
| "step": 1055, |
| "token_acc": 0.9269406392694064, |
| "train_speed(iter/s)": 0.022377 |
| }, |
| { |
| "epoch": 0.854196157735086, |
| "grad_norm": 2.1480906009674072, |
| "learning_rate": 5.74154401017824e-07, |
| "loss": 0.37758809328079224, |
| "memory(GiB)": 74.62, |
| "step": 1056, |
| "token_acc": 0.8744939271255061, |
| "train_speed(iter/s)": 0.022378 |
| }, |
| { |
| "epoch": 0.855005055611729, |
| "grad_norm": 2.161919593811035, |
| "learning_rate": 5.679502832378497e-07, |
| "loss": 0.3692307472229004, |
| "memory(GiB)": 74.62, |
| "step": 1057, |
| "token_acc": 0.8982035928143712, |
| "train_speed(iter/s)": 0.022378 |
| }, |
| { |
| "epoch": 0.8558139534883721, |
| "grad_norm": 2.31783390045166, |
| "learning_rate": 5.61777848609587e-07, |
| "loss": 0.36903953552246094, |
| "memory(GiB)": 74.62, |
| "step": 1058, |
| "token_acc": 0.8892857142857142, |
| "train_speed(iter/s)": 0.022378 |
| }, |
| { |
| "epoch": 0.8566228513650151, |
| "grad_norm": 2.582380771636963, |
| "learning_rate": 5.556371412575834e-07, |
| "loss": 0.40472832322120667, |
| "memory(GiB)": 74.62, |
| "step": 1059, |
| "token_acc": 0.8706896551724138, |
| "train_speed(iter/s)": 0.022379 |
| }, |
| { |
| "epoch": 0.8574317492416582, |
| "grad_norm": 1.9625579118728638, |
| "learning_rate": 5.495282050795763e-07, |
| "loss": 0.3849819302558899, |
| "memory(GiB)": 74.62, |
| "step": 1060, |
| "token_acc": 0.8406374501992032, |
| "train_speed(iter/s)": 0.022379 |
| }, |
| { |
| "epoch": 0.8582406471183013, |
| "grad_norm": 2.0185904502868652, |
| "learning_rate": 5.434510837461854e-07, |
| "loss": 0.43619173765182495, |
| "memory(GiB)": 74.62, |
| "step": 1061, |
| "token_acc": 0.8464730290456431, |
| "train_speed(iter/s)": 0.02238 |
| }, |
| { |
| "epoch": 0.8590495449949443, |
| "grad_norm": 2.0642013549804688, |
| "learning_rate": 5.374058207005945e-07, |
| "loss": 0.37471503019332886, |
| "memory(GiB)": 74.62, |
| "step": 1062, |
| "token_acc": 0.9219512195121952, |
| "train_speed(iter/s)": 0.02238 |
| }, |
| { |
| "epoch": 0.8598584428715874, |
| "grad_norm": 2.187964677810669, |
| "learning_rate": 5.313924591582453e-07, |
| "loss": 0.3878336548805237, |
| "memory(GiB)": 74.62, |
| "step": 1063, |
| "token_acc": 0.8531073446327684, |
| "train_speed(iter/s)": 0.02238 |
| }, |
| { |
| "epoch": 0.8606673407482305, |
| "grad_norm": 3.5268666744232178, |
| "learning_rate": 5.254110421065301e-07, |
| "loss": 0.4011298716068268, |
| "memory(GiB)": 74.62, |
| "step": 1064, |
| "token_acc": 0.8282442748091603, |
| "train_speed(iter/s)": 0.022381 |
| }, |
| { |
| "epoch": 0.8614762386248737, |
| "grad_norm": 1.9126622676849365, |
| "learning_rate": 5.194616123044749e-07, |
| "loss": 0.3823421597480774, |
| "memory(GiB)": 74.62, |
| "step": 1065, |
| "token_acc": 0.8555555555555555, |
| "train_speed(iter/s)": 0.022381 |
| }, |
| { |
| "epoch": 0.8622851365015167, |
| "grad_norm": 1.9851644039154053, |
| "learning_rate": 5.135442122824453e-07, |
| "loss": 0.41584277153015137, |
| "memory(GiB)": 74.62, |
| "step": 1066, |
| "token_acc": 0.896, |
| "train_speed(iter/s)": 0.022382 |
| }, |
| { |
| "epoch": 0.8630940343781598, |
| "grad_norm": 2.158141613006592, |
| "learning_rate": 5.076588843418345e-07, |
| "loss": 0.3853064775466919, |
| "memory(GiB)": 74.62, |
| "step": 1067, |
| "token_acc": 0.8201634877384196, |
| "train_speed(iter/s)": 0.022382 |
| }, |
| { |
| "epoch": 0.8639029322548029, |
| "grad_norm": 2.003866672515869, |
| "learning_rate": 5.018056705547652e-07, |
| "loss": 0.3744017481803894, |
| "memory(GiB)": 74.62, |
| "step": 1068, |
| "token_acc": 0.8693693693693694, |
| "train_speed(iter/s)": 0.022382 |
| }, |
| { |
| "epoch": 0.8647118301314459, |
| "grad_norm": 3.3579702377319336, |
| "learning_rate": 4.959846127637874e-07, |
| "loss": 0.3795039653778076, |
| "memory(GiB)": 74.62, |
| "step": 1069, |
| "token_acc": 0.8388625592417062, |
| "train_speed(iter/s)": 0.022383 |
| }, |
| { |
| "epoch": 0.865520728008089, |
| "grad_norm": 2.1418285369873047, |
| "learning_rate": 4.901957525815787e-07, |
| "loss": 0.35196787118911743, |
| "memory(GiB)": 74.62, |
| "step": 1070, |
| "token_acc": 0.8385650224215246, |
| "train_speed(iter/s)": 0.022383 |
| }, |
| { |
| "epoch": 0.8663296258847321, |
| "grad_norm": 2.060997486114502, |
| "learning_rate": 4.844391313906482e-07, |
| "loss": 0.3312758207321167, |
| "memory(GiB)": 74.62, |
| "step": 1071, |
| "token_acc": 0.8912280701754386, |
| "train_speed(iter/s)": 0.022384 |
| }, |
| { |
| "epoch": 0.8671385237613751, |
| "grad_norm": 2.250108242034912, |
| "learning_rate": 4.787147903430383e-07, |
| "loss": 0.4016328752040863, |
| "memory(GiB)": 74.62, |
| "step": 1072, |
| "token_acc": 0.8404669260700389, |
| "train_speed(iter/s)": 0.022384 |
| }, |
| { |
| "epoch": 0.8679474216380182, |
| "grad_norm": 1.5963561534881592, |
| "learning_rate": 4.730227703600354e-07, |
| "loss": 0.3070691227912903, |
| "memory(GiB)": 74.62, |
| "step": 1073, |
| "token_acc": 0.8928571428571429, |
| "train_speed(iter/s)": 0.022384 |
| }, |
| { |
| "epoch": 0.8687563195146613, |
| "grad_norm": 2.321164846420288, |
| "learning_rate": 4.6736311213186724e-07, |
| "loss": 0.32245370745658875, |
| "memory(GiB)": 74.62, |
| "step": 1074, |
| "token_acc": 0.8725868725868726, |
| "train_speed(iter/s)": 0.022385 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 1.9174984693527222, |
| "learning_rate": 4.617358561174279e-07, |
| "loss": 0.32412296533584595, |
| "memory(GiB)": 74.62, |
| "step": 1075, |
| "token_acc": 0.9473684210526315, |
| "train_speed(iter/s)": 0.022385 |
| }, |
| { |
| "epoch": 0.8703741152679474, |
| "grad_norm": 1.674944281578064, |
| "learning_rate": 4.561410425439744e-07, |
| "loss": 0.299832284450531, |
| "memory(GiB)": 74.62, |
| "step": 1076, |
| "token_acc": 0.9108527131782945, |
| "train_speed(iter/s)": 0.022386 |
| }, |
| { |
| "epoch": 0.8711830131445905, |
| "grad_norm": 1.9611433744430542, |
| "learning_rate": 4.505787114068433e-07, |
| "loss": 0.3502030670642853, |
| "memory(GiB)": 74.62, |
| "step": 1077, |
| "token_acc": 0.8602941176470589, |
| "train_speed(iter/s)": 0.022386 |
| }, |
| { |
| "epoch": 0.8719919110212335, |
| "grad_norm": 2.2846431732177734, |
| "learning_rate": 4.45048902469169e-07, |
| "loss": 0.39019766449928284, |
| "memory(GiB)": 74.62, |
| "step": 1078, |
| "token_acc": 0.8958333333333334, |
| "train_speed(iter/s)": 0.022386 |
| }, |
| { |
| "epoch": 0.8728008088978766, |
| "grad_norm": 2.490588903427124, |
| "learning_rate": 4.3955165526159306e-07, |
| "loss": 0.37344303727149963, |
| "memory(GiB)": 74.62, |
| "step": 1079, |
| "token_acc": 0.9163179916317992, |
| "train_speed(iter/s)": 0.022387 |
| }, |
| { |
| "epoch": 0.8736097067745198, |
| "grad_norm": 5.213693141937256, |
| "learning_rate": 4.3408700908198654e-07, |
| "loss": 0.3260120153427124, |
| "memory(GiB)": 74.62, |
| "step": 1080, |
| "token_acc": 0.8585858585858586, |
| "train_speed(iter/s)": 0.022387 |
| }, |
| { |
| "epoch": 0.8744186046511628, |
| "grad_norm": 2.62857723236084, |
| "learning_rate": 4.2865500299516747e-07, |
| "loss": 0.36192968487739563, |
| "memory(GiB)": 74.62, |
| "step": 1081, |
| "token_acc": 0.8915094339622641, |
| "train_speed(iter/s)": 0.022387 |
| }, |
| { |
| "epoch": 0.8752275025278059, |
| "grad_norm": 2.0130198001861572, |
| "learning_rate": 4.232556758326212e-07, |
| "loss": 0.35925909876823425, |
| "memory(GiB)": 74.62, |
| "step": 1082, |
| "token_acc": 0.8312236286919831, |
| "train_speed(iter/s)": 0.022388 |
| }, |
| { |
| "epoch": 0.876036400404449, |
| "grad_norm": 1.795419454574585, |
| "learning_rate": 4.178890661922241e-07, |
| "loss": 0.34093332290649414, |
| "memory(GiB)": 74.62, |
| "step": 1083, |
| "token_acc": 0.8543046357615894, |
| "train_speed(iter/s)": 0.022388 |
| }, |
| { |
| "epoch": 0.876845298281092, |
| "grad_norm": 2.5592668056488037, |
| "learning_rate": 4.125552124379628e-07, |
| "loss": 0.412899911403656, |
| "memory(GiB)": 74.62, |
| "step": 1084, |
| "token_acc": 0.85, |
| "train_speed(iter/s)": 0.022389 |
| }, |
| { |
| "epoch": 0.8776541961577351, |
| "grad_norm": 1.8965997695922852, |
| "learning_rate": 4.072541526996682e-07, |
| "loss": 0.3767935633659363, |
| "memory(GiB)": 74.62, |
| "step": 1085, |
| "token_acc": 0.8523676880222841, |
| "train_speed(iter/s)": 0.022389 |
| }, |
| { |
| "epoch": 0.8784630940343782, |
| "grad_norm": 2.412139415740967, |
| "learning_rate": 4.0198592487273426e-07, |
| "loss": 0.3973158597946167, |
| "memory(GiB)": 74.62, |
| "step": 1086, |
| "token_acc": 0.8678414096916299, |
| "train_speed(iter/s)": 0.022389 |
| }, |
| { |
| "epoch": 0.8792719919110212, |
| "grad_norm": 1.8268601894378662, |
| "learning_rate": 3.9675056661785563e-07, |
| "loss": 0.35584717988967896, |
| "memory(GiB)": 74.62, |
| "step": 1087, |
| "token_acc": 0.8561643835616438, |
| "train_speed(iter/s)": 0.02239 |
| }, |
| { |
| "epoch": 0.8800808897876643, |
| "grad_norm": 2.1522209644317627, |
| "learning_rate": 3.915481153607525e-07, |
| "loss": 0.37817463278770447, |
| "memory(GiB)": 74.62, |
| "step": 1088, |
| "token_acc": 0.8680851063829788, |
| "train_speed(iter/s)": 0.02239 |
| }, |
| { |
| "epoch": 0.8808897876643074, |
| "grad_norm": 1.805523157119751, |
| "learning_rate": 3.863786082919019e-07, |
| "loss": 0.33031123876571655, |
| "memory(GiB)": 74.62, |
| "step": 1089, |
| "token_acc": 0.9203539823008849, |
| "train_speed(iter/s)": 0.02239 |
| }, |
| { |
| "epoch": 0.8816986855409504, |
| "grad_norm": 1.8276246786117554, |
| "learning_rate": 3.8124208236627825e-07, |
| "loss": 0.32658106088638306, |
| "memory(GiB)": 74.62, |
| "step": 1090, |
| "token_acc": 0.900990099009901, |
| "train_speed(iter/s)": 0.022391 |
| }, |
| { |
| "epoch": 0.8825075834175935, |
| "grad_norm": 2.1186046600341797, |
| "learning_rate": 3.761385743030821e-07, |
| "loss": 0.3983362019062042, |
| "memory(GiB)": 74.62, |
| "step": 1091, |
| "token_acc": 0.9166666666666666, |
| "train_speed(iter/s)": 0.022391 |
| }, |
| { |
| "epoch": 0.8833164812942366, |
| "grad_norm": 2.2194223403930664, |
| "learning_rate": 3.710681205854838e-07, |
| "loss": 0.34843602776527405, |
| "memory(GiB)": 74.62, |
| "step": 1092, |
| "token_acc": 0.8144329896907216, |
| "train_speed(iter/s)": 0.022391 |
| }, |
| { |
| "epoch": 0.8841253791708796, |
| "grad_norm": 1.7586379051208496, |
| "learning_rate": 3.6603075746035886e-07, |
| "loss": 0.3717504143714905, |
| "memory(GiB)": 74.62, |
| "step": 1093, |
| "token_acc": 0.9043824701195219, |
| "train_speed(iter/s)": 0.022392 |
| }, |
| { |
| "epoch": 0.8849342770475227, |
| "grad_norm": 1.730454921722412, |
| "learning_rate": 3.6102652093802983e-07, |
| "loss": 0.33724552392959595, |
| "memory(GiB)": 74.62, |
| "step": 1094, |
| "token_acc": 0.8942857142857142, |
| "train_speed(iter/s)": 0.022392 |
| }, |
| { |
| "epoch": 0.8857431749241659, |
| "grad_norm": 1.7972487211227417, |
| "learning_rate": 3.5605544679200966e-07, |
| "loss": 0.40413105487823486, |
| "memory(GiB)": 74.62, |
| "step": 1095, |
| "token_acc": 0.8922413793103449, |
| "train_speed(iter/s)": 0.022393 |
| }, |
| { |
| "epoch": 0.8865520728008089, |
| "grad_norm": 2.4188039302825928, |
| "learning_rate": 3.511175705587433e-07, |
| "loss": 0.4261808693408966, |
| "memory(GiB)": 74.62, |
| "step": 1096, |
| "token_acc": 0.8990384615384616, |
| "train_speed(iter/s)": 0.022393 |
| }, |
| { |
| "epoch": 0.887360970677452, |
| "grad_norm": 2.6165802478790283, |
| "learning_rate": 3.462129275373577e-07, |
| "loss": 0.3905704617500305, |
| "memory(GiB)": 74.62, |
| "step": 1097, |
| "token_acc": 0.9346153846153846, |
| "train_speed(iter/s)": 0.022394 |
| }, |
| { |
| "epoch": 0.8881698685540951, |
| "grad_norm": 1.8218803405761719, |
| "learning_rate": 3.4134155278940594e-07, |
| "loss": 0.42883560061454773, |
| "memory(GiB)": 74.62, |
| "step": 1098, |
| "token_acc": 0.8745247148288974, |
| "train_speed(iter/s)": 0.022394 |
| }, |
| { |
| "epoch": 0.8889787664307381, |
| "grad_norm": 1.979760766029358, |
| "learning_rate": 3.3650348113861864e-07, |
| "loss": 0.36739590764045715, |
| "memory(GiB)": 74.62, |
| "step": 1099, |
| "token_acc": 0.8987341772151899, |
| "train_speed(iter/s)": 0.022394 |
| }, |
| { |
| "epoch": 0.8897876643073812, |
| "grad_norm": 2.169462203979492, |
| "learning_rate": 3.3169874717065564e-07, |
| "loss": 0.43099868297576904, |
| "memory(GiB)": 74.62, |
| "step": 1100, |
| "token_acc": 0.8531746031746031, |
| "train_speed(iter/s)": 0.022395 |
| }, |
| { |
| "epoch": 0.8905965621840243, |
| "grad_norm": 3.057952642440796, |
| "learning_rate": 3.269273852328547e-07, |
| "loss": 0.3875833749771118, |
| "memory(GiB)": 74.62, |
| "step": 1101, |
| "token_acc": 0.9, |
| "train_speed(iter/s)": 0.022395 |
| }, |
| { |
| "epoch": 0.8914054600606673, |
| "grad_norm": 1.8207221031188965, |
| "learning_rate": 3.2218942943399114e-07, |
| "loss": 0.3375704884529114, |
| "memory(GiB)": 74.62, |
| "step": 1102, |
| "token_acc": 0.8617511520737328, |
| "train_speed(iter/s)": 0.022396 |
| }, |
| { |
| "epoch": 0.8922143579373104, |
| "grad_norm": 2.1824142932891846, |
| "learning_rate": 3.174849136440294e-07, |
| "loss": 0.36066344380378723, |
| "memory(GiB)": 74.62, |
| "step": 1103, |
| "token_acc": 0.8494208494208494, |
| "train_speed(iter/s)": 0.022396 |
| }, |
| { |
| "epoch": 0.8930232558139535, |
| "grad_norm": 2.046804428100586, |
| "learning_rate": 3.1281387149388556e-07, |
| "loss": 0.39939042925834656, |
| "memory(GiB)": 74.62, |
| "step": 1104, |
| "token_acc": 0.8765432098765432, |
| "train_speed(iter/s)": 0.022396 |
| }, |
| { |
| "epoch": 0.8938321536905965, |
| "grad_norm": 2.1102182865142822, |
| "learning_rate": 3.081763363751844e-07, |
| "loss": 0.35777053236961365, |
| "memory(GiB)": 74.62, |
| "step": 1105, |
| "token_acc": 0.8803827751196173, |
| "train_speed(iter/s)": 0.022397 |
| }, |
| { |
| "epoch": 0.8946410515672396, |
| "grad_norm": 1.6538591384887695, |
| "learning_rate": 3.0357234144001766e-07, |
| "loss": 0.32706207036972046, |
| "memory(GiB)": 74.62, |
| "step": 1106, |
| "token_acc": 0.8989169675090253, |
| "train_speed(iter/s)": 0.022397 |
| }, |
| { |
| "epoch": 0.8954499494438827, |
| "grad_norm": 2.0191094875335693, |
| "learning_rate": 2.9900191960071544e-07, |
| "loss": 0.3731483817100525, |
| "memory(GiB)": 74.62, |
| "step": 1107, |
| "token_acc": 0.8875, |
| "train_speed(iter/s)": 0.022397 |
| }, |
| { |
| "epoch": 0.8962588473205257, |
| "grad_norm": 1.9920696020126343, |
| "learning_rate": 2.9446510352959924e-07, |
| "loss": 0.3792566657066345, |
| "memory(GiB)": 74.62, |
| "step": 1108, |
| "token_acc": 0.8431372549019608, |
| "train_speed(iter/s)": 0.022398 |
| }, |
| { |
| "epoch": 0.8970677451971688, |
| "grad_norm": 4.2869157791137695, |
| "learning_rate": 2.899619256587605e-07, |
| "loss": 0.4134003520011902, |
| "memory(GiB)": 74.62, |
| "step": 1109, |
| "token_acc": 0.8088888888888889, |
| "train_speed(iter/s)": 0.022398 |
| }, |
| { |
| "epoch": 0.897876643073812, |
| "grad_norm": 1.730612874031067, |
| "learning_rate": 2.854924181798202e-07, |
| "loss": 0.3089058995246887, |
| "memory(GiB)": 74.62, |
| "step": 1110, |
| "token_acc": 0.8550185873605948, |
| "train_speed(iter/s)": 0.022398 |
| }, |
| { |
| "epoch": 0.898685540950455, |
| "grad_norm": 2.020568370819092, |
| "learning_rate": 2.8105661304370256e-07, |
| "loss": 0.33643391728401184, |
| "memory(GiB)": 74.62, |
| "step": 1111, |
| "token_acc": 0.8732876712328768, |
| "train_speed(iter/s)": 0.022399 |
| }, |
| { |
| "epoch": 0.8994944388270981, |
| "grad_norm": 2.182412624359131, |
| "learning_rate": 2.7665454196040665e-07, |
| "loss": 0.39632314443588257, |
| "memory(GiB)": 74.62, |
| "step": 1112, |
| "token_acc": 0.8884297520661157, |
| "train_speed(iter/s)": 0.022399 |
| }, |
| { |
| "epoch": 0.9003033367037412, |
| "grad_norm": 2.2093279361724854, |
| "learning_rate": 2.722862363987749e-07, |
| "loss": 0.43140286207199097, |
| "memory(GiB)": 74.62, |
| "step": 1113, |
| "token_acc": 0.8701298701298701, |
| "train_speed(iter/s)": 0.022399 |
| }, |
| { |
| "epoch": 0.9011122345803843, |
| "grad_norm": 4.873557090759277, |
| "learning_rate": 2.6795172758627584e-07, |
| "loss": 0.40689289569854736, |
| "memory(GiB)": 74.62, |
| "step": 1114, |
| "token_acc": 0.880184331797235, |
| "train_speed(iter/s)": 0.0224 |
| }, |
| { |
| "epoch": 0.9019211324570273, |
| "grad_norm": 2.0055012702941895, |
| "learning_rate": 2.6365104650877716e-07, |
| "loss": 0.3976328372955322, |
| "memory(GiB)": 74.62, |
| "step": 1115, |
| "token_acc": 0.8812260536398467, |
| "train_speed(iter/s)": 0.0224 |
| }, |
| { |
| "epoch": 0.9027300303336704, |
| "grad_norm": 1.9500057697296143, |
| "learning_rate": 2.593842239103206e-07, |
| "loss": 0.40250563621520996, |
| "memory(GiB)": 74.62, |
| "step": 1116, |
| "token_acc": 0.8953168044077136, |
| "train_speed(iter/s)": 0.022401 |
| }, |
| { |
| "epoch": 0.9035389282103135, |
| "grad_norm": 1.8744258880615234, |
| "learning_rate": 2.5515129029290984e-07, |
| "loss": 0.35562485456466675, |
| "memory(GiB)": 74.62, |
| "step": 1117, |
| "token_acc": 0.8726591760299626, |
| "train_speed(iter/s)": 0.022401 |
| }, |
| { |
| "epoch": 0.9043478260869565, |
| "grad_norm": 1.818701982498169, |
| "learning_rate": 2.5095227591628467e-07, |
| "loss": 0.32878684997558594, |
| "memory(GiB)": 74.62, |
| "step": 1118, |
| "token_acc": 0.8952879581151832, |
| "train_speed(iter/s)": 0.022401 |
| }, |
| { |
| "epoch": 0.9051567239635996, |
| "grad_norm": 2.0827207565307617, |
| "learning_rate": 2.4678721079770984e-07, |
| "loss": 0.4192107617855072, |
| "memory(GiB)": 74.62, |
| "step": 1119, |
| "token_acc": 0.8461538461538461, |
| "train_speed(iter/s)": 0.022402 |
| }, |
| { |
| "epoch": 0.9059656218402427, |
| "grad_norm": 2.060375690460205, |
| "learning_rate": 2.4265612471176036e-07, |
| "loss": 0.3454943895339966, |
| "memory(GiB)": 74.62, |
| "step": 1120, |
| "token_acc": 0.9144981412639405, |
| "train_speed(iter/s)": 0.022402 |
| }, |
| { |
| "epoch": 0.9067745197168857, |
| "grad_norm": 1.8084218502044678, |
| "learning_rate": 2.385590471901045e-07, |
| "loss": 0.31142184138298035, |
| "memory(GiB)": 74.62, |
| "step": 1121, |
| "token_acc": 0.8678571428571429, |
| "train_speed(iter/s)": 0.022402 |
| }, |
| { |
| "epoch": 0.9075834175935288, |
| "grad_norm": 2.012327194213867, |
| "learning_rate": 2.3449600752129598e-07, |
| "loss": 0.3716868460178375, |
| "memory(GiB)": 74.62, |
| "step": 1122, |
| "token_acc": 0.8819672131147541, |
| "train_speed(iter/s)": 0.022403 |
| }, |
| { |
| "epoch": 0.9083923154701719, |
| "grad_norm": 2.0449485778808594, |
| "learning_rate": 2.3046703475056554e-07, |
| "loss": 0.3710024356842041, |
| "memory(GiB)": 74.62, |
| "step": 1123, |
| "token_acc": 0.8555555555555555, |
| "train_speed(iter/s)": 0.022403 |
| }, |
| { |
| "epoch": 0.9092012133468149, |
| "grad_norm": 2.0092179775238037, |
| "learning_rate": 2.2647215767961083e-07, |
| "loss": 0.3403990864753723, |
| "memory(GiB)": 74.62, |
| "step": 1124, |
| "token_acc": 0.925, |
| "train_speed(iter/s)": 0.022403 |
| }, |
| { |
| "epoch": 0.910010111223458, |
| "grad_norm": 2.1806256771087646, |
| "learning_rate": 2.2251140486639068e-07, |
| "loss": 0.37321048974990845, |
| "memory(GiB)": 74.62, |
| "step": 1125, |
| "token_acc": 0.9308510638297872, |
| "train_speed(iter/s)": 0.022404 |
| }, |
| { |
| "epoch": 0.9108190091001012, |
| "grad_norm": 2.1333301067352295, |
| "learning_rate": 2.1858480462492283e-07, |
| "loss": 0.37797796726226807, |
| "memory(GiB)": 74.62, |
| "step": 1126, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022404 |
| }, |
| { |
| "epoch": 0.9116279069767442, |
| "grad_norm": 2.240083694458008, |
| "learning_rate": 2.1469238502507926e-07, |
| "loss": 0.3420672118663788, |
| "memory(GiB)": 74.62, |
| "step": 1127, |
| "token_acc": 0.889795918367347, |
| "train_speed(iter/s)": 0.022404 |
| }, |
| { |
| "epoch": 0.9124368048533873, |
| "grad_norm": 2.032658338546753, |
| "learning_rate": 2.1083417389238858e-07, |
| "loss": 0.3892640471458435, |
| "memory(GiB)": 74.62, |
| "step": 1128, |
| "token_acc": 0.8831168831168831, |
| "train_speed(iter/s)": 0.022405 |
| }, |
| { |
| "epoch": 0.9132457027300304, |
| "grad_norm": 2.2067453861236572, |
| "learning_rate": 2.0701019880783324e-07, |
| "loss": 0.33542943000793457, |
| "memory(GiB)": 74.62, |
| "step": 1129, |
| "token_acc": 0.8740458015267175, |
| "train_speed(iter/s)": 0.022405 |
| }, |
| { |
| "epoch": 0.9140546006066734, |
| "grad_norm": 2.052781343460083, |
| "learning_rate": 2.0322048710765485e-07, |
| "loss": 0.3520893454551697, |
| "memory(GiB)": 74.62, |
| "step": 1130, |
| "token_acc": 0.8688524590163934, |
| "train_speed(iter/s)": 0.022405 |
| }, |
| { |
| "epoch": 0.9148634984833165, |
| "grad_norm": 7.5011773109436035, |
| "learning_rate": 1.9946506588315818e-07, |
| "loss": 0.3370997905731201, |
| "memory(GiB)": 74.62, |
| "step": 1131, |
| "token_acc": 0.8984771573604061, |
| "train_speed(iter/s)": 0.022406 |
| }, |
| { |
| "epoch": 0.9156723963599596, |
| "grad_norm": 2.2244808673858643, |
| "learning_rate": 1.957439619805196e-07, |
| "loss": 0.3234095871448517, |
| "memory(GiB)": 74.62, |
| "step": 1132, |
| "token_acc": 0.8681818181818182, |
| "train_speed(iter/s)": 0.022406 |
| }, |
| { |
| "epoch": 0.9164812942366026, |
| "grad_norm": 1.946089506149292, |
| "learning_rate": 1.9205720200058843e-07, |
| "loss": 0.39126190543174744, |
| "memory(GiB)": 74.62, |
| "step": 1133, |
| "token_acc": 0.909433962264151, |
| "train_speed(iter/s)": 0.022406 |
| }, |
| { |
| "epoch": 0.9172901921132457, |
| "grad_norm": 11.597419738769531, |
| "learning_rate": 1.8840481229870644e-07, |
| "loss": 0.37995028495788574, |
| "memory(GiB)": 74.62, |
| "step": 1134, |
| "token_acc": 0.8557046979865772, |
| "train_speed(iter/s)": 0.022407 |
| }, |
| { |
| "epoch": 0.9180990899898888, |
| "grad_norm": 1.789217233657837, |
| "learning_rate": 1.84786818984512e-07, |
| "loss": 0.3505871295928955, |
| "memory(GiB)": 74.62, |
| "step": 1135, |
| "token_acc": 0.9249146757679181, |
| "train_speed(iter/s)": 0.022407 |
| }, |
| { |
| "epoch": 0.9189079878665318, |
| "grad_norm": 1.919080138206482, |
| "learning_rate": 1.8120324792175569e-07, |
| "loss": 0.3749197721481323, |
| "memory(GiB)": 74.62, |
| "step": 1136, |
| "token_acc": 0.886435331230284, |
| "train_speed(iter/s)": 0.022407 |
| }, |
| { |
| "epoch": 0.9197168857431749, |
| "grad_norm": 2.741631269454956, |
| "learning_rate": 1.776541247281177e-07, |
| "loss": 0.3757126033306122, |
| "memory(GiB)": 74.62, |
| "step": 1137, |
| "token_acc": 0.8934010152284264, |
| "train_speed(iter/s)": 0.022408 |
| }, |
| { |
| "epoch": 0.920525783619818, |
| "grad_norm": 1.856645107269287, |
| "learning_rate": 1.7413947477501913e-07, |
| "loss": 0.3616572320461273, |
| "memory(GiB)": 74.62, |
| "step": 1138, |
| "token_acc": 0.9, |
| "train_speed(iter/s)": 0.022408 |
| }, |
| { |
| "epoch": 0.921334681496461, |
| "grad_norm": 2.687711477279663, |
| "learning_rate": 1.7065932318744704e-07, |
| "loss": 0.3780667185783386, |
| "memory(GiB)": 74.62, |
| "step": 1139, |
| "token_acc": 0.8723404255319149, |
| "train_speed(iter/s)": 0.022409 |
| }, |
| { |
| "epoch": 0.9221435793731041, |
| "grad_norm": 1.6964043378829956, |
| "learning_rate": 1.6721369484377082e-07, |
| "loss": 0.35959312319755554, |
| "memory(GiB)": 74.62, |
| "step": 1140, |
| "token_acc": 0.8790849673202614, |
| "train_speed(iter/s)": 0.022409 |
| }, |
| { |
| "epoch": 0.9229524772497473, |
| "grad_norm": 2.040339469909668, |
| "learning_rate": 1.6380261437556666e-07, |
| "loss": 0.34360718727111816, |
| "memory(GiB)": 74.62, |
| "step": 1141, |
| "token_acc": 0.9063829787234042, |
| "train_speed(iter/s)": 0.022409 |
| }, |
| { |
| "epoch": 0.9237613751263903, |
| "grad_norm": 1.9790493249893188, |
| "learning_rate": 1.6042610616743782e-07, |
| "loss": 0.36330220103263855, |
| "memory(GiB)": 74.62, |
| "step": 1142, |
| "token_acc": 0.8585858585858586, |
| "train_speed(iter/s)": 0.02241 |
| }, |
| { |
| "epoch": 0.9245702730030334, |
| "grad_norm": 1.878999948501587, |
| "learning_rate": 1.5708419435684463e-07, |
| "loss": 0.3349642753601074, |
| "memory(GiB)": 74.62, |
| "step": 1143, |
| "token_acc": 0.8650519031141869, |
| "train_speed(iter/s)": 0.02241 |
| }, |
| { |
| "epoch": 0.9253791708796765, |
| "grad_norm": 1.9973299503326416, |
| "learning_rate": 1.5377690283392977e-07, |
| "loss": 0.3546566963195801, |
| "memory(GiB)": 74.62, |
| "step": 1144, |
| "token_acc": 0.8781725888324873, |
| "train_speed(iter/s)": 0.02241 |
| }, |
| { |
| "epoch": 0.9261880687563195, |
| "grad_norm": 1.9398893117904663, |
| "learning_rate": 1.505042552413466e-07, |
| "loss": 0.34872984886169434, |
| "memory(GiB)": 74.62, |
| "step": 1145, |
| "token_acc": 0.8742138364779874, |
| "train_speed(iter/s)": 0.022411 |
| }, |
| { |
| "epoch": 0.9269969666329626, |
| "grad_norm": 1.9519524574279785, |
| "learning_rate": 1.4726627497409274e-07, |
| "loss": 0.3644063472747803, |
| "memory(GiB)": 74.62, |
| "step": 1146, |
| "token_acc": 0.8945147679324894, |
| "train_speed(iter/s)": 0.022411 |
| }, |
| { |
| "epoch": 0.9278058645096057, |
| "grad_norm": 2.4077093601226807, |
| "learning_rate": 1.440629851793407e-07, |
| "loss": 0.42128363251686096, |
| "memory(GiB)": 74.62, |
| "step": 1147, |
| "token_acc": 0.8775510204081632, |
| "train_speed(iter/s)": 0.022411 |
| }, |
| { |
| "epoch": 0.9286147623862487, |
| "grad_norm": 2.0536437034606934, |
| "learning_rate": 1.408944087562736e-07, |
| "loss": 0.3700520396232605, |
| "memory(GiB)": 74.62, |
| "step": 1148, |
| "token_acc": 0.8464566929133859, |
| "train_speed(iter/s)": 0.022412 |
| }, |
| { |
| "epoch": 0.9294236602628918, |
| "grad_norm": 2.154677391052246, |
| "learning_rate": 1.3776056835592132e-07, |
| "loss": 0.3489128351211548, |
| "memory(GiB)": 74.62, |
| "step": 1149, |
| "token_acc": 0.8795811518324608, |
| "train_speed(iter/s)": 0.022412 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 1.8740899562835693, |
| "learning_rate": 1.346614863809953e-07, |
| "loss": 0.36078181862831116, |
| "memory(GiB)": 74.62, |
| "step": 1150, |
| "token_acc": 0.8864468864468864, |
| "train_speed(iter/s)": 0.022412 |
| }, |
| { |
| "epoch": 0.9310414560161779, |
| "grad_norm": 2.146127700805664, |
| "learning_rate": 1.315971849857356e-07, |
| "loss": 0.3723437190055847, |
| "memory(GiB)": 74.62, |
| "step": 1151, |
| "token_acc": 0.8691275167785235, |
| "train_speed(iter/s)": 0.022413 |
| }, |
| { |
| "epoch": 0.931850353892821, |
| "grad_norm": 1.787015438079834, |
| "learning_rate": 1.2856768607574565e-07, |
| "loss": 0.3393116891384125, |
| "memory(GiB)": 74.62, |
| "step": 1152, |
| "token_acc": 0.9015544041450777, |
| "train_speed(iter/s)": 0.022413 |
| }, |
| { |
| "epoch": 0.9326592517694641, |
| "grad_norm": 2.211394786834717, |
| "learning_rate": 1.255730113078385e-07, |
| "loss": 0.34008848667144775, |
| "memory(GiB)": 74.62, |
| "step": 1153, |
| "token_acc": 0.8700787401574803, |
| "train_speed(iter/s)": 0.022413 |
| }, |
| { |
| "epoch": 0.9334681496461071, |
| "grad_norm": 1.7942789793014526, |
| "learning_rate": 1.2261318208988294e-07, |
| "loss": 0.31053483486175537, |
| "memory(GiB)": 74.62, |
| "step": 1154, |
| "token_acc": 0.8535825545171339, |
| "train_speed(iter/s)": 0.022414 |
| }, |
| { |
| "epoch": 0.9342770475227502, |
| "grad_norm": 2.598997116088867, |
| "learning_rate": 1.1968821958064702e-07, |
| "loss": 0.4369804859161377, |
| "memory(GiB)": 74.62, |
| "step": 1155, |
| "token_acc": 0.8713692946058091, |
| "train_speed(iter/s)": 0.022414 |
| }, |
| { |
| "epoch": 0.9350859453993934, |
| "grad_norm": 1.7106472253799438, |
| "learning_rate": 1.1679814468965211e-07, |
| "loss": 0.3438988924026489, |
| "memory(GiB)": 74.62, |
| "step": 1156, |
| "token_acc": 0.8736059479553904, |
| "train_speed(iter/s)": 0.022414 |
| }, |
| { |
| "epoch": 0.9358948432760364, |
| "grad_norm": 1.8687455654144287, |
| "learning_rate": 1.1394297807701737e-07, |
| "loss": 0.3768293261528015, |
| "memory(GiB)": 74.62, |
| "step": 1157, |
| "token_acc": 0.9270833333333334, |
| "train_speed(iter/s)": 0.022415 |
| }, |
| { |
| "epoch": 0.9367037411526795, |
| "grad_norm": 1.5831663608551025, |
| "learning_rate": 1.111227401533166e-07, |
| "loss": 0.3412172496318817, |
| "memory(GiB)": 74.62, |
| "step": 1158, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022415 |
| }, |
| { |
| "epoch": 0.9375126390293226, |
| "grad_norm": 1.8993335962295532, |
| "learning_rate": 1.083374510794305e-07, |
| "loss": 0.4136160910129547, |
| "memory(GiB)": 74.62, |
| "step": 1159, |
| "token_acc": 0.9094488188976378, |
| "train_speed(iter/s)": 0.022415 |
| }, |
| { |
| "epoch": 0.9383215369059656, |
| "grad_norm": 3.2496023178100586, |
| "learning_rate": 1.0558713076640415e-07, |
| "loss": 0.3755384087562561, |
| "memory(GiB)": 74.62, |
| "step": 1160, |
| "token_acc": 0.9172932330827067, |
| "train_speed(iter/s)": 0.022416 |
| }, |
| { |
| "epoch": 0.9391304347826087, |
| "grad_norm": 2.1333253383636475, |
| "learning_rate": 1.028717988753014e-07, |
| "loss": 0.3936523199081421, |
| "memory(GiB)": 74.62, |
| "step": 1161, |
| "token_acc": 0.8974358974358975, |
| "train_speed(iter/s)": 0.022416 |
| }, |
| { |
| "epoch": 0.9399393326592518, |
| "grad_norm": 2.6341114044189453, |
| "learning_rate": 1.0019147481706626e-07, |
| "loss": 0.40892741084098816, |
| "memory(GiB)": 74.62, |
| "step": 1162, |
| "token_acc": 0.9217391304347826, |
| "train_speed(iter/s)": 0.022417 |
| }, |
| { |
| "epoch": 0.9407482305358948, |
| "grad_norm": 1.8160382509231567, |
| "learning_rate": 9.754617775238562e-08, |
| "loss": 0.36974000930786133, |
| "memory(GiB)": 74.62, |
| "step": 1163, |
| "token_acc": 0.8614457831325302, |
| "train_speed(iter/s)": 0.022417 |
| }, |
| { |
| "epoch": 0.9415571284125379, |
| "grad_norm": 2.1739790439605713, |
| "learning_rate": 9.493592659155004e-08, |
| "loss": 0.3862905502319336, |
| "memory(GiB)": 74.62, |
| "step": 1164, |
| "token_acc": 0.8700787401574803, |
| "train_speed(iter/s)": 0.022417 |
| }, |
| { |
| "epoch": 0.942366026289181, |
| "grad_norm": 2.973860502243042, |
| "learning_rate": 9.236073999431939e-08, |
| "loss": 0.4268924593925476, |
| "memory(GiB)": 74.62, |
| "step": 1165, |
| "token_acc": 0.8847736625514403, |
| "train_speed(iter/s)": 0.022418 |
| }, |
| { |
| "epoch": 0.943174924165824, |
| "grad_norm": 2.2699947357177734, |
| "learning_rate": 8.98206363697901e-08, |
| "loss": 0.3827816843986511, |
| "memory(GiB)": 74.62, |
| "step": 1166, |
| "token_acc": 0.8765432098765432, |
| "train_speed(iter/s)": 0.022418 |
| }, |
| { |
| "epoch": 0.9439838220424671, |
| "grad_norm": 2.014028549194336, |
| "learning_rate": 8.731563387626096e-08, |
| "loss": 0.3976903259754181, |
| "memory(GiB)": 74.62, |
| "step": 1167, |
| "token_acc": 0.8338658146964856, |
| "train_speed(iter/s)": 0.022418 |
| }, |
| { |
| "epoch": 0.9447927199191102, |
| "grad_norm": 2.3635129928588867, |
| "learning_rate": 8.484575042110699e-08, |
| "loss": 0.3837153911590576, |
| "memory(GiB)": 74.62, |
| "step": 1168, |
| "token_acc": 0.8766666666666667, |
| "train_speed(iter/s)": 0.022418 |
| }, |
| { |
| "epoch": 0.9456016177957532, |
| "grad_norm": 2.5257232189178467, |
| "learning_rate": 8.241100366064902e-08, |
| "loss": 0.37266969680786133, |
| "memory(GiB)": 74.62, |
| "step": 1169, |
| "token_acc": 0.8828828828828829, |
| "train_speed(iter/s)": 0.022419 |
| }, |
| { |
| "epoch": 0.9464105156723963, |
| "grad_norm": 2.1283090114593506, |
| "learning_rate": 8.001141100002885e-08, |
| "loss": 0.32720375061035156, |
| "memory(GiB)": 74.62, |
| "step": 1170, |
| "token_acc": 0.8850174216027874, |
| "train_speed(iter/s)": 0.022419 |
| }, |
| { |
| "epoch": 0.9472194135490394, |
| "grad_norm": 2.261035919189453, |
| "learning_rate": 7.764698959308315e-08, |
| "loss": 0.38027650117874146, |
| "memory(GiB)": 74.62, |
| "step": 1171, |
| "token_acc": 0.8956521739130435, |
| "train_speed(iter/s)": 0.022419 |
| }, |
| { |
| "epoch": 0.9480283114256826, |
| "grad_norm": 1.921704888343811, |
| "learning_rate": 7.531775634222138e-08, |
| "loss": 0.37682783603668213, |
| "memory(GiB)": 74.62, |
| "step": 1172, |
| "token_acc": 0.8680851063829788, |
| "train_speed(iter/s)": 0.02242 |
| }, |
| { |
| "epoch": 0.9488372093023256, |
| "grad_norm": 2.031587600708008, |
| "learning_rate": 7.302372789830702e-08, |
| "loss": 0.3404289484024048, |
| "memory(GiB)": 74.62, |
| "step": 1173, |
| "token_acc": 0.8355555555555556, |
| "train_speed(iter/s)": 0.02242 |
| }, |
| { |
| "epoch": 0.9496461071789687, |
| "grad_norm": 1.8540045022964478, |
| "learning_rate": 7.076492066053486e-08, |
| "loss": 0.3675205111503601, |
| "memory(GiB)": 74.62, |
| "step": 1174, |
| "token_acc": 0.8758389261744967, |
| "train_speed(iter/s)": 0.02242 |
| }, |
| { |
| "epoch": 0.9504550050556118, |
| "grad_norm": 2.207390546798706, |
| "learning_rate": 6.854135077631774e-08, |
| "loss": 0.3710861802101135, |
| "memory(GiB)": 74.62, |
| "step": 1175, |
| "token_acc": 0.8367875647668394, |
| "train_speed(iter/s)": 0.022421 |
| }, |
| { |
| "epoch": 0.9512639029322548, |
| "grad_norm": 2.1160874366760254, |
| "learning_rate": 6.635303414116834e-08, |
| "loss": 0.375140517950058, |
| "memory(GiB)": 74.62, |
| "step": 1176, |
| "token_acc": 0.8616600790513834, |
| "train_speed(iter/s)": 0.022421 |
| }, |
| { |
| "epoch": 0.9520728008088979, |
| "grad_norm": 1.8097771406173706, |
| "learning_rate": 6.419998639858538e-08, |
| "loss": 0.33210816979408264, |
| "memory(GiB)": 74.62, |
| "step": 1177, |
| "token_acc": 0.9314079422382672, |
| "train_speed(iter/s)": 0.022421 |
| }, |
| { |
| "epoch": 0.952881698685541, |
| "grad_norm": 1.6278916597366333, |
| "learning_rate": 6.208222293994425e-08, |
| "loss": 0.3717727065086365, |
| "memory(GiB)": 74.62, |
| "step": 1178, |
| "token_acc": 0.8639455782312925, |
| "train_speed(iter/s)": 0.022422 |
| }, |
| { |
| "epoch": 0.953690596562184, |
| "grad_norm": 2.6115875244140625, |
| "learning_rate": 5.999975890438436e-08, |
| "loss": 0.35759437084198, |
| "memory(GiB)": 74.62, |
| "step": 1179, |
| "token_acc": 0.9078498293515358, |
| "train_speed(iter/s)": 0.022422 |
| }, |
| { |
| "epoch": 0.9544994944388271, |
| "grad_norm": 2.0658047199249268, |
| "learning_rate": 5.79526091787036e-08, |
| "loss": 0.37362658977508545, |
| "memory(GiB)": 74.62, |
| "step": 1180, |
| "token_acc": 0.8454545454545455, |
| "train_speed(iter/s)": 0.022422 |
| }, |
| { |
| "epoch": 0.9553083923154702, |
| "grad_norm": 2.23612117767334, |
| "learning_rate": 5.594078839724793e-08, |
| "loss": 0.37239736318588257, |
| "memory(GiB)": 74.62, |
| "step": 1181, |
| "token_acc": 0.855072463768116, |
| "train_speed(iter/s)": 0.022423 |
| }, |
| { |
| "epoch": 0.9561172901921132, |
| "grad_norm": 1.8740304708480835, |
| "learning_rate": 5.396431094181198e-08, |
| "loss": 0.3480920195579529, |
| "memory(GiB)": 74.62, |
| "step": 1182, |
| "token_acc": 0.8709677419354839, |
| "train_speed(iter/s)": 0.022423 |
| }, |
| { |
| "epoch": 0.9569261880687563, |
| "grad_norm": 2.2320539951324463, |
| "learning_rate": 5.202319094153252e-08, |
| "loss": 0.3483563959598541, |
| "memory(GiB)": 74.62, |
| "step": 1183, |
| "token_acc": 0.8866666666666667, |
| "train_speed(iter/s)": 0.022423 |
| }, |
| { |
| "epoch": 0.9577350859453994, |
| "grad_norm": 1.7620937824249268, |
| "learning_rate": 5.011744227278625e-08, |
| "loss": 0.33139705657958984, |
| "memory(GiB)": 74.62, |
| "step": 1184, |
| "token_acc": 0.9110169491525424, |
| "train_speed(iter/s)": 0.022424 |
| }, |
| { |
| "epoch": 0.9585439838220424, |
| "grad_norm": 1.869081974029541, |
| "learning_rate": 4.824707855909605e-08, |
| "loss": 0.3572564125061035, |
| "memory(GiB)": 74.62, |
| "step": 1185, |
| "token_acc": 0.8842592592592593, |
| "train_speed(iter/s)": 0.022424 |
| }, |
| { |
| "epoch": 0.9593528816986855, |
| "grad_norm": 2.5178749561309814, |
| "learning_rate": 4.6412113171028226e-08, |
| "loss": 0.39302319288253784, |
| "memory(GiB)": 74.62, |
| "step": 1186, |
| "token_acc": 0.9107142857142857, |
| "train_speed(iter/s)": 0.022424 |
| }, |
| { |
| "epoch": 0.9601617795753287, |
| "grad_norm": 2.3168158531188965, |
| "learning_rate": 4.461255922609986e-08, |
| "loss": 0.3867931365966797, |
| "memory(GiB)": 74.62, |
| "step": 1187, |
| "token_acc": 0.8819444444444444, |
| "train_speed(iter/s)": 0.022425 |
| }, |
| { |
| "epoch": 0.9609706774519717, |
| "grad_norm": 2.4859671592712402, |
| "learning_rate": 4.2848429588683295e-08, |
| "loss": 0.3992939591407776, |
| "memory(GiB)": 74.62, |
| "step": 1188, |
| "token_acc": 0.8392857142857143, |
| "train_speed(iter/s)": 0.022425 |
| }, |
| { |
| "epoch": 0.9617795753286148, |
| "grad_norm": 3.0036697387695312, |
| "learning_rate": 4.111973686991677e-08, |
| "loss": 0.49971675872802734, |
| "memory(GiB)": 74.62, |
| "step": 1189, |
| "token_acc": 0.8101694915254237, |
| "train_speed(iter/s)": 0.022425 |
| }, |
| { |
| "epoch": 0.9625884732052579, |
| "grad_norm": 2.2183077335357666, |
| "learning_rate": 3.9426493427611177e-08, |
| "loss": 0.38460367918014526, |
| "memory(GiB)": 74.62, |
| "step": 1190, |
| "token_acc": 0.8319327731092437, |
| "train_speed(iter/s)": 0.022426 |
| }, |
| { |
| "epoch": 0.9633973710819009, |
| "grad_norm": 2.1675848960876465, |
| "learning_rate": 3.776871136616289e-08, |
| "loss": 0.4845053553581238, |
| "memory(GiB)": 74.62, |
| "step": 1191, |
| "token_acc": 0.8, |
| "train_speed(iter/s)": 0.022426 |
| }, |
| { |
| "epoch": 0.964206268958544, |
| "grad_norm": 1.8861103057861328, |
| "learning_rate": 3.6146402536468285e-08, |
| "loss": 0.40070268511772156, |
| "memory(GiB)": 74.62, |
| "step": 1192, |
| "token_acc": 0.8436363636363636, |
| "train_speed(iter/s)": 0.022426 |
| }, |
| { |
| "epoch": 0.9650151668351871, |
| "grad_norm": 2.5333354473114014, |
| "learning_rate": 3.455957853583769e-08, |
| "loss": 0.3965553343296051, |
| "memory(GiB)": 74.62, |
| "step": 1193, |
| "token_acc": 0.8791208791208791, |
| "train_speed(iter/s)": 0.022427 |
| }, |
| { |
| "epoch": 0.9658240647118301, |
| "grad_norm": 2.1063308715820312, |
| "learning_rate": 3.3008250707913246e-08, |
| "loss": 0.35347798466682434, |
| "memory(GiB)": 74.62, |
| "step": 1194, |
| "token_acc": 0.8908296943231441, |
| "train_speed(iter/s)": 0.022427 |
| }, |
| { |
| "epoch": 0.9666329625884732, |
| "grad_norm": 2.082961320877075, |
| "learning_rate": 3.14924301425884e-08, |
| "loss": 0.3923337757587433, |
| "memory(GiB)": 74.62, |
| "step": 1195, |
| "token_acc": 0.8774834437086093, |
| "train_speed(iter/s)": 0.022427 |
| }, |
| { |
| "epoch": 0.9674418604651163, |
| "grad_norm": 1.8798726797103882, |
| "learning_rate": 3.0012127675925206e-08, |
| "loss": 0.35899072885513306, |
| "memory(GiB)": 74.62, |
| "step": 1196, |
| "token_acc": 0.8819444444444444, |
| "train_speed(iter/s)": 0.022428 |
| }, |
| { |
| "epoch": 0.9682507583417593, |
| "grad_norm": 13.926689147949219, |
| "learning_rate": 2.8567353890082696e-08, |
| "loss": 0.3928597569465637, |
| "memory(GiB)": 74.62, |
| "step": 1197, |
| "token_acc": 0.8653846153846154, |
| "train_speed(iter/s)": 0.022428 |
| }, |
| { |
| "epoch": 0.9690596562184024, |
| "grad_norm": 1.9069607257843018, |
| "learning_rate": 2.7158119113234738e-08, |
| "loss": 0.344777375459671, |
| "memory(GiB)": 74.62, |
| "step": 1198, |
| "token_acc": 0.9090909090909091, |
| "train_speed(iter/s)": 0.022428 |
| }, |
| { |
| "epoch": 0.9698685540950455, |
| "grad_norm": 2.385317087173462, |
| "learning_rate": 2.5784433419501763e-08, |
| "loss": 0.35486793518066406, |
| "memory(GiB)": 74.62, |
| "step": 1199, |
| "token_acc": 0.8652849740932642, |
| "train_speed(iter/s)": 0.022428 |
| }, |
| { |
| "epoch": 0.9706774519716885, |
| "grad_norm": 2.183742046356201, |
| "learning_rate": 2.4446306628875814e-08, |
| "loss": 0.3595341444015503, |
| "memory(GiB)": 74.62, |
| "step": 1200, |
| "token_acc": 0.8879310344827587, |
| "train_speed(iter/s)": 0.022429 |
| }, |
| { |
| "epoch": 0.9714863498483316, |
| "grad_norm": 2.103287935256958, |
| "learning_rate": 2.3143748307150605e-08, |
| "loss": 0.39095747470855713, |
| "memory(GiB)": 74.62, |
| "step": 1201, |
| "token_acc": 0.8861788617886179, |
| "train_speed(iter/s)": 0.022429 |
| }, |
| { |
| "epoch": 0.9722952477249748, |
| "grad_norm": 2.1582367420196533, |
| "learning_rate": 2.1876767765853237e-08, |
| "loss": 0.3016042113304138, |
| "memory(GiB)": 74.62, |
| "step": 1202, |
| "token_acc": 0.8571428571428571, |
| "train_speed(iter/s)": 0.022429 |
| }, |
| { |
| "epoch": 0.9731041456016178, |
| "grad_norm": 2.0449063777923584, |
| "learning_rate": 2.0645374062179257e-08, |
| "loss": 0.36447232961654663, |
| "memory(GiB)": 74.62, |
| "step": 1203, |
| "token_acc": 0.8480392156862745, |
| "train_speed(iter/s)": 0.02243 |
| }, |
| { |
| "epoch": 0.9739130434782609, |
| "grad_norm": 3.5183372497558594, |
| "learning_rate": 1.9449575998924387e-08, |
| "loss": 0.43112486600875854, |
| "memory(GiB)": 74.62, |
| "step": 1204, |
| "token_acc": 0.8607594936708861, |
| "train_speed(iter/s)": 0.02243 |
| }, |
| { |
| "epoch": 0.974721941354904, |
| "grad_norm": 2.14886736869812, |
| "learning_rate": 1.8289382124426214e-08, |
| "loss": 0.38468360900878906, |
| "memory(GiB)": 74.62, |
| "step": 1205, |
| "token_acc": 0.8654545454545455, |
| "train_speed(iter/s)": 0.02243 |
| }, |
| { |
| "epoch": 0.975530839231547, |
| "grad_norm": 2.688023090362549, |
| "learning_rate": 1.7164800732498156e-08, |
| "loss": 0.3501737713813782, |
| "memory(GiB)": 74.62, |
| "step": 1206, |
| "token_acc": 0.8855421686746988, |
| "train_speed(iter/s)": 0.022431 |
| }, |
| { |
| "epoch": 0.9763397371081901, |
| "grad_norm": 2.0248029232025146, |
| "learning_rate": 1.6075839862374487e-08, |
| "loss": 0.31531471014022827, |
| "memory(GiB)": 74.62, |
| "step": 1207, |
| "token_acc": 0.865979381443299, |
| "train_speed(iter/s)": 0.022431 |
| }, |
| { |
| "epoch": 0.9771486349848332, |
| "grad_norm": 3.5692150592803955, |
| "learning_rate": 1.5022507298649848e-08, |
| "loss": 0.3675447106361389, |
| "memory(GiB)": 74.62, |
| "step": 1208, |
| "token_acc": 0.8636363636363636, |
| "train_speed(iter/s)": 0.022431 |
| }, |
| { |
| "epoch": 0.9779575328614762, |
| "grad_norm": 1.9649704694747925, |
| "learning_rate": 1.400481057122538e-08, |
| "loss": 0.38956940174102783, |
| "memory(GiB)": 74.62, |
| "step": 1209, |
| "token_acc": 0.8914473684210527, |
| "train_speed(iter/s)": 0.022431 |
| }, |
| { |
| "epoch": 0.9787664307381193, |
| "grad_norm": 2.3865509033203125, |
| "learning_rate": 1.3022756955254901e-08, |
| "loss": 0.3772105574607849, |
| "memory(GiB)": 74.62, |
| "step": 1210, |
| "token_acc": 0.8963963963963963, |
| "train_speed(iter/s)": 0.022432 |
| }, |
| { |
| "epoch": 0.9795753286147624, |
| "grad_norm": 9.275412559509277, |
| "learning_rate": 1.207635347108993e-08, |
| "loss": 0.39102572202682495, |
| "memory(GiB)": 74.62, |
| "step": 1211, |
| "token_acc": 0.8317757009345794, |
| "train_speed(iter/s)": 0.022432 |
| }, |
| { |
| "epoch": 0.9803842264914054, |
| "grad_norm": 2.0313827991485596, |
| "learning_rate": 1.1165606884234182e-08, |
| "loss": 0.37432482838630676, |
| "memory(GiB)": 74.62, |
| "step": 1212, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.022432 |
| }, |
| { |
| "epoch": 0.9811931243680485, |
| "grad_norm": 1.960199236869812, |
| "learning_rate": 1.0290523705291932e-08, |
| "loss": 0.3433490991592407, |
| "memory(GiB)": 74.62, |
| "step": 1213, |
| "token_acc": 0.8885714285714286, |
| "train_speed(iter/s)": 0.022433 |
| }, |
| { |
| "epoch": 0.9820020222446916, |
| "grad_norm": 1.8676866292953491, |
| "learning_rate": 9.451110189923063e-09, |
| "loss": 0.3818192183971405, |
| "memory(GiB)": 74.62, |
| "step": 1214, |
| "token_acc": 0.8989547038327527, |
| "train_speed(iter/s)": 0.022433 |
| }, |
| { |
| "epoch": 0.9828109201213346, |
| "grad_norm": 2.4343481063842773, |
| "learning_rate": 8.647372338795867e-09, |
| "loss": 0.4184320569038391, |
| "memory(GiB)": 74.62, |
| "step": 1215, |
| "token_acc": 0.8434782608695652, |
| "train_speed(iter/s)": 0.022433 |
| }, |
| { |
| "epoch": 0.9836198179979777, |
| "grad_norm": 2.3009696006774902, |
| "learning_rate": 7.8793158975482e-09, |
| "loss": 0.40056365728378296, |
| "memory(GiB)": 74.62, |
| "step": 1216, |
| "token_acc": 0.8210526315789474, |
| "train_speed(iter/s)": 0.022433 |
| }, |
| { |
| "epoch": 0.9844287158746208, |
| "grad_norm": 4.763977527618408, |
| "learning_rate": 7.146946356743068e-09, |
| "loss": 0.37496888637542725, |
| "memory(GiB)": 74.62, |
| "step": 1217, |
| "token_acc": 0.9244444444444444, |
| "train_speed(iter/s)": 0.022434 |
| }, |
| { |
| "epoch": 0.985237613751264, |
| "grad_norm": 2.2471978664398193, |
| "learning_rate": 6.450268951830319e-09, |
| "loss": 0.3727502226829529, |
| "memory(GiB)": 74.62, |
| "step": 1218, |
| "token_acc": 0.819327731092437, |
| "train_speed(iter/s)": 0.022434 |
| }, |
| { |
| "epoch": 0.986046511627907, |
| "grad_norm": 1.7557698488235474, |
| "learning_rate": 5.789288663110015e-09, |
| "loss": 0.32791298627853394, |
| "memory(GiB)": 74.62, |
| "step": 1219, |
| "token_acc": 0.8659420289855072, |
| "train_speed(iter/s)": 0.022434 |
| }, |
| { |
| "epoch": 0.9868554095045501, |
| "grad_norm": 2.5717544555664062, |
| "learning_rate": 5.164010215695792e-09, |
| "loss": 0.37463176250457764, |
| "memory(GiB)": 74.62, |
| "step": 1220, |
| "token_acc": 0.8560885608856088, |
| "train_speed(iter/s)": 0.022434 |
| }, |
| { |
| "epoch": 0.9876643073811932, |
| "grad_norm": 3.5073463916778564, |
| "learning_rate": 4.574438079480992e-09, |
| "loss": 0.32435593008995056, |
| "memory(GiB)": 74.62, |
| "step": 1221, |
| "token_acc": 0.8685446009389671, |
| "train_speed(iter/s)": 0.022435 |
| }, |
| { |
| "epoch": 0.9884732052578362, |
| "grad_norm": 1.9765585660934448, |
| "learning_rate": 4.020576469108139e-09, |
| "loss": 0.38409414887428284, |
| "memory(GiB)": 74.62, |
| "step": 1222, |
| "token_acc": 0.8888888888888888, |
| "train_speed(iter/s)": 0.022435 |
| }, |
| { |
| "epoch": 0.9892821031344793, |
| "grad_norm": 1.8832907676696777, |
| "learning_rate": 3.502429343937297e-09, |
| "loss": 0.3716433644294739, |
| "memory(GiB)": 74.62, |
| "step": 1223, |
| "token_acc": 0.8876811594202898, |
| "train_speed(iter/s)": 0.022435 |
| }, |
| { |
| "epoch": 0.9900910010111224, |
| "grad_norm": 1.9831905364990234, |
| "learning_rate": 3.020000408018864e-09, |
| "loss": 0.3268841505050659, |
| "memory(GiB)": 74.62, |
| "step": 1224, |
| "token_acc": 0.9003831417624522, |
| "train_speed(iter/s)": 0.022436 |
| }, |
| { |
| "epoch": 0.9908998988877654, |
| "grad_norm": 2.281235456466675, |
| "learning_rate": 2.573293110065822e-09, |
| "loss": 0.33263713121414185, |
| "memory(GiB)": 74.62, |
| "step": 1225, |
| "token_acc": 0.8669527896995708, |
| "train_speed(iter/s)": 0.022436 |
| }, |
| { |
| "epoch": 0.9917087967644085, |
| "grad_norm": 2.3608005046844482, |
| "learning_rate": 2.162310643430976e-09, |
| "loss": 0.39835768938064575, |
| "memory(GiB)": 74.62, |
| "step": 1226, |
| "token_acc": 0.8962655601659751, |
| "train_speed(iter/s)": 0.022436 |
| }, |
| { |
| "epoch": 0.9925176946410516, |
| "grad_norm": 2.6654913425445557, |
| "learning_rate": 1.7870559460814173e-09, |
| "loss": 0.4261908531188965, |
| "memory(GiB)": 74.62, |
| "step": 1227, |
| "token_acc": 0.8935574229691877, |
| "train_speed(iter/s)": 0.022437 |
| }, |
| { |
| "epoch": 0.9933265925176946, |
| "grad_norm": 1.8069103956222534, |
| "learning_rate": 1.447531700580207e-09, |
| "loss": 0.3241886496543884, |
| "memory(GiB)": 74.62, |
| "step": 1228, |
| "token_acc": 0.9383886255924171, |
| "train_speed(iter/s)": 0.022437 |
| }, |
| { |
| "epoch": 0.9941354903943377, |
| "grad_norm": 2.0414981842041016, |
| "learning_rate": 1.1437403340652797e-09, |
| "loss": 0.4070656895637512, |
| "memory(GiB)": 74.62, |
| "step": 1229, |
| "token_acc": 0.8465753424657534, |
| "train_speed(iter/s)": 0.022437 |
| }, |
| { |
| "epoch": 0.9949443882709808, |
| "grad_norm": 2.6518869400024414, |
| "learning_rate": 8.756840182344573e-10, |
| "loss": 0.3987523317337036, |
| "memory(GiB)": 74.62, |
| "step": 1230, |
| "token_acc": 0.8187134502923976, |
| "train_speed(iter/s)": 0.022438 |
| }, |
| { |
| "epoch": 0.9957532861476238, |
| "grad_norm": 1.9646754264831543, |
| "learning_rate": 6.433646693265738e-10, |
| "loss": 0.32140272855758667, |
| "memory(GiB)": 74.62, |
| "step": 1231, |
| "token_acc": 0.9049773755656109, |
| "train_speed(iter/s)": 0.022438 |
| }, |
| { |
| "epoch": 0.9965621840242669, |
| "grad_norm": 2.0284359455108643, |
| "learning_rate": 4.4678394810981904e-10, |
| "loss": 0.38582661747932434, |
| "memory(GiB)": 74.62, |
| "step": 1232, |
| "token_acc": 0.8961937716262975, |
| "train_speed(iter/s)": 0.022438 |
| }, |
| { |
| "epoch": 0.9973710819009101, |
| "grad_norm": 1.9221043586730957, |
| "learning_rate": 2.8594325987119086e-10, |
| "loss": 0.3542518615722656, |
| "memory(GiB)": 74.62, |
| "step": 1233, |
| "token_acc": 0.9240506329113924, |
| "train_speed(iter/s)": 0.022439 |
| }, |
| { |
| "epoch": 0.9981799797775531, |
| "grad_norm": 2.5311009883880615, |
| "learning_rate": 1.6084375440317268e-10, |
| "loss": 0.44038695096969604, |
| "memory(GiB)": 74.62, |
| "step": 1234, |
| "token_acc": 0.8537735849056604, |
| "train_speed(iter/s)": 0.022439 |
| }, |
| { |
| "epoch": 0.9989888776541962, |
| "grad_norm": 2.092437505722046, |
| "learning_rate": 7.148632599707217e-11, |
| "loss": 0.3628859221935272, |
| "memory(GiB)": 74.62, |
| "step": 1235, |
| "token_acc": 0.8671328671328671, |
| "train_speed(iter/s)": 0.022439 |
| }, |
| { |
| "epoch": 0.9997977755308393, |
| "grad_norm": 2.2749087810516357, |
| "learning_rate": 1.787161343858035e-11, |
| "loss": 0.4479348063468933, |
| "memory(GiB)": 74.62, |
| "step": 1236, |
| "token_acc": 0.8859934853420195, |
| "train_speed(iter/s)": 0.02244 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 4.017106056213379, |
| "learning_rate": 0.0, |
| "loss": 0.41172629594802856, |
| "memory(GiB)": 74.62, |
| "step": 1237, |
| "token_acc": 0.8541666666666666, |
| "train_speed(iter/s)": 0.022445 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.3615947365760803, |
| "eval_runtime": 428.6167, |
| "eval_samples_per_second": 3.728, |
| "eval_steps_per_second": 0.117, |
| "eval_token_acc": 0.8760036017108126, |
| "step": 1237 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1237, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 618, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.135344722858895e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|