| { |
| "best_global_step": 6000, |
| "best_metric": 0.95930004, |
| "best_model_checkpoint": "/user/yutianyu/Duplex_Finetune/output/4B_LLaVA_SFT/zero3_0dot6B_LLaVA_SFT_nopacking/v0-20251202-145343/checkpoint-6000", |
| "epoch": 1.4081488957213604, |
| "eval_steps": 100, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0002347032177811158, |
| "grad_norm": 19.127729366517716, |
| "learning_rate": 4e-09, |
| "loss": 1.9448599815368652, |
| "num_input_tokens_seen": 197295, |
| "step": 1, |
| "token_acc": 0.5819854991634132 |
| }, |
| { |
| "epoch": 0.002347032177811158, |
| "grad_norm": 16.494659300395906, |
| "learning_rate": 4e-08, |
| "loss": 1.92243406507704, |
| "num_input_tokens_seen": 2020272, |
| "step": 10, |
| "token_acc": 0.5839277085360667 |
| }, |
| { |
| "epoch": 0.004694064355622316, |
| "grad_norm": 22.126428554674575, |
| "learning_rate": 8e-08, |
| "loss": 1.916154670715332, |
| "num_input_tokens_seen": 4084884, |
| "step": 20, |
| "token_acc": 0.5835957997637319 |
| }, |
| { |
| "epoch": 0.007041096533433474, |
| "grad_norm": 12.603610772661288, |
| "learning_rate": 1.2e-07, |
| "loss": 1.8856426239013673, |
| "num_input_tokens_seen": 6056667, |
| "step": 30, |
| "token_acc": 0.584776074988841 |
| }, |
| { |
| "epoch": 0.009388128711244632, |
| "grad_norm": 10.779646193701467, |
| "learning_rate": 1.6e-07, |
| "loss": 1.8353569030761718, |
| "num_input_tokens_seen": 8091435, |
| "step": 40, |
| "token_acc": 0.5897754631538845 |
| }, |
| { |
| "epoch": 0.011735160889055789, |
| "grad_norm": 136.86547531489668, |
| "learning_rate": 2e-07, |
| "loss": 1.721211051940918, |
| "num_input_tokens_seen": 10091673, |
| "step": 50, |
| "token_acc": 0.6031858358236022 |
| }, |
| { |
| "epoch": 0.014082193066866948, |
| "grad_norm": 5.080233785680673, |
| "learning_rate": 2.4e-07, |
| "loss": 1.6145668029785156, |
| "num_input_tokens_seen": 12099135, |
| "step": 60, |
| "token_acc": 0.6209089567372474 |
| }, |
| { |
| "epoch": 0.016429225244678103, |
| "grad_norm": 12.207422981911558, |
| "learning_rate": 2.8e-07, |
| "loss": 1.5414657592773438, |
| "num_input_tokens_seen": 14135250, |
| "step": 70, |
| "token_acc": 0.6310852754061408 |
| }, |
| { |
| "epoch": 0.018776257422489263, |
| "grad_norm": 8.118283422146021, |
| "learning_rate": 3.2e-07, |
| "loss": 1.528026008605957, |
| "num_input_tokens_seen": 16200873, |
| "step": 80, |
| "token_acc": 0.6382918453943185 |
| }, |
| { |
| "epoch": 0.02112328960030042, |
| "grad_norm": 3.47292239362735, |
| "learning_rate": 3.6e-07, |
| "loss": 1.4715272903442382, |
| "num_input_tokens_seen": 18177258, |
| "step": 90, |
| "token_acc": 0.6432060553309527 |
| }, |
| { |
| "epoch": 0.023470321778111577, |
| "grad_norm": 2.2332542577632526, |
| "learning_rate": 4e-07, |
| "loss": 1.4491453170776367, |
| "num_input_tokens_seen": 20161581, |
| "step": 100, |
| "token_acc": 0.6477376652924822 |
| }, |
| { |
| "epoch": 0.023470321778111577, |
| "eval_loss": 1.4709749221801758, |
| "eval_runtime": 33.2583, |
| "eval_samples_per_second": 30.068, |
| "eval_steps_per_second": 1.263, |
| "eval_token_acc": 0.6456822326354424, |
| "num_input_tokens_seen": 20161581, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.025817353955922735, |
| "grad_norm": 1.9112779247166412, |
| "learning_rate": 4.3999999999999997e-07, |
| "loss": 1.4166399002075196, |
| "num_input_tokens_seen": 22179654, |
| "step": 110, |
| "token_acc": 0.6549487213586689 |
| }, |
| { |
| "epoch": 0.028164386133733895, |
| "grad_norm": 1.897077748827419, |
| "learning_rate": 4.8e-07, |
| "loss": 1.3960003852844238, |
| "num_input_tokens_seen": 24157263, |
| "step": 120, |
| "token_acc": 0.6563052255139141 |
| }, |
| { |
| "epoch": 0.030511418311545052, |
| "grad_norm": 2.063047689522777, |
| "learning_rate": 5.2e-07, |
| "loss": 1.373966884613037, |
| "num_input_tokens_seen": 26112051, |
| "step": 130, |
| "token_acc": 0.6609459618615088 |
| }, |
| { |
| "epoch": 0.032858450489356206, |
| "grad_norm": 6.453024686764437, |
| "learning_rate": 5.6e-07, |
| "loss": 1.3647557258605958, |
| "num_input_tokens_seen": 28133607, |
| "step": 140, |
| "token_acc": 0.6628235998176535 |
| }, |
| { |
| "epoch": 0.035205482667167366, |
| "grad_norm": 1.9586538878052986, |
| "learning_rate": 6e-07, |
| "loss": 1.394300651550293, |
| "num_input_tokens_seen": 30182052, |
| "step": 150, |
| "token_acc": 0.6566977644780848 |
| }, |
| { |
| "epoch": 0.03755251484497853, |
| "grad_norm": 4.663911418765899, |
| "learning_rate": 6.4e-07, |
| "loss": 1.3559602737426757, |
| "num_input_tokens_seen": 32177622, |
| "step": 160, |
| "token_acc": 0.6647926044470018 |
| }, |
| { |
| "epoch": 0.03989954702278968, |
| "grad_norm": 2.649130437820903, |
| "learning_rate": 6.800000000000001e-07, |
| "loss": 1.320611572265625, |
| "num_input_tokens_seen": 34150332, |
| "step": 170, |
| "token_acc": 0.671068499517214 |
| }, |
| { |
| "epoch": 0.04224657920060084, |
| "grad_norm": 1.988272208775732, |
| "learning_rate": 7.2e-07, |
| "loss": 1.3357341766357422, |
| "num_input_tokens_seen": 36129600, |
| "step": 180, |
| "token_acc": 0.6666514308426074 |
| }, |
| { |
| "epoch": 0.044593611378412, |
| "grad_norm": 2.089282254202976, |
| "learning_rate": 7.599999999999999e-07, |
| "loss": 1.3014695167541503, |
| "num_input_tokens_seen": 38143260, |
| "step": 190, |
| "token_acc": 0.6714680103247839 |
| }, |
| { |
| "epoch": 0.046940643556223155, |
| "grad_norm": 1.6962028687120758, |
| "learning_rate": 8e-07, |
| "loss": 1.316438865661621, |
| "num_input_tokens_seen": 40117473, |
| "step": 200, |
| "token_acc": 0.6719479705996623 |
| }, |
| { |
| "epoch": 0.046940643556223155, |
| "eval_loss": 1.3232439756393433, |
| "eval_runtime": 32.4409, |
| "eval_samples_per_second": 30.825, |
| "eval_steps_per_second": 1.295, |
| "eval_token_acc": 0.6714895778029132, |
| "num_input_tokens_seen": 40117473, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.049287675734034316, |
| "grad_norm": 2.7375989158006453, |
| "learning_rate": 8.399999999999999e-07, |
| "loss": 1.2971059799194335, |
| "num_input_tokens_seen": 42051432, |
| "step": 210, |
| "token_acc": 0.6724870727708412 |
| }, |
| { |
| "epoch": 0.05163470791184547, |
| "grad_norm": 1.8333521024827166, |
| "learning_rate": 8.799999999999999e-07, |
| "loss": 1.251258945465088, |
| "num_input_tokens_seen": 44060409, |
| "step": 220, |
| "token_acc": 0.6825975678761506 |
| }, |
| { |
| "epoch": 0.05398174008965663, |
| "grad_norm": 1.7067135742590114, |
| "learning_rate": 9.2e-07, |
| "loss": 1.2541748046875, |
| "num_input_tokens_seen": 46140576, |
| "step": 230, |
| "token_acc": 0.6838314298998598 |
| }, |
| { |
| "epoch": 0.05632877226746779, |
| "grad_norm": 2.117007115532979, |
| "learning_rate": 9.6e-07, |
| "loss": 1.2801358222961425, |
| "num_input_tokens_seen": 48218463, |
| "step": 240, |
| "token_acc": 0.6776609031540706 |
| }, |
| { |
| "epoch": 0.058675804445278944, |
| "grad_norm": 1.9477588029675073, |
| "learning_rate": 1e-06, |
| "loss": 1.272374153137207, |
| "num_input_tokens_seen": 50212704, |
| "step": 250, |
| "token_acc": 0.6784600688499179 |
| }, |
| { |
| "epoch": 0.061022836623090104, |
| "grad_norm": 2.0653725127756495, |
| "learning_rate": 1.04e-06, |
| "loss": 1.2390222549438477, |
| "num_input_tokens_seen": 52210065, |
| "step": 260, |
| "token_acc": 0.6846190216694448 |
| }, |
| { |
| "epoch": 0.06336986880090126, |
| "grad_norm": 1.6368439315898582, |
| "learning_rate": 1.08e-06, |
| "loss": 1.2289260864257812, |
| "num_input_tokens_seen": 54287394, |
| "step": 270, |
| "token_acc": 0.6870453172664521 |
| }, |
| { |
| "epoch": 0.06571690097871241, |
| "grad_norm": 1.8897694890455825, |
| "learning_rate": 1.12e-06, |
| "loss": 1.2016170501708985, |
| "num_input_tokens_seen": 56286087, |
| "step": 280, |
| "token_acc": 0.6947360302493355 |
| }, |
| { |
| "epoch": 0.06806393315652358, |
| "grad_norm": 4.191924245995845, |
| "learning_rate": 1.16e-06, |
| "loss": 1.2314638137817382, |
| "num_input_tokens_seen": 58259631, |
| "step": 290, |
| "token_acc": 0.6869124082650091 |
| }, |
| { |
| "epoch": 0.07041096533433473, |
| "grad_norm": 2.010449522588459, |
| "learning_rate": 1.2e-06, |
| "loss": 1.2306774139404297, |
| "num_input_tokens_seen": 60262377, |
| "step": 300, |
| "token_acc": 0.6861436424474188 |
| }, |
| { |
| "epoch": 0.07041096533433473, |
| "eval_loss": 1.253986120223999, |
| "eval_runtime": 32.5346, |
| "eval_samples_per_second": 30.737, |
| "eval_steps_per_second": 1.291, |
| "eval_token_acc": 0.6843470834006602, |
| "num_input_tokens_seen": 60262377, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07275799751214589, |
| "grad_norm": 1.6903068952734268, |
| "learning_rate": 1.24e-06, |
| "loss": 1.2244423866271972, |
| "num_input_tokens_seen": 62257881, |
| "step": 310, |
| "token_acc": 0.6906986736484506 |
| }, |
| { |
| "epoch": 0.07510502968995705, |
| "grad_norm": 2.00802423495143, |
| "learning_rate": 1.28e-06, |
| "loss": 1.2204778671264649, |
| "num_input_tokens_seen": 64218216, |
| "step": 320, |
| "token_acc": 0.6881087345222366 |
| }, |
| { |
| "epoch": 0.07745206186776821, |
| "grad_norm": 2.136483656358153, |
| "learning_rate": 1.32e-06, |
| "loss": 1.1911478996276856, |
| "num_input_tokens_seen": 66217860, |
| "step": 330, |
| "token_acc": 0.6964124357320138 |
| }, |
| { |
| "epoch": 0.07979909404557936, |
| "grad_norm": 1.7796738858497867, |
| "learning_rate": 1.3600000000000001e-06, |
| "loss": 1.195077896118164, |
| "num_input_tokens_seen": 68222130, |
| "step": 340, |
| "token_acc": 0.6935684789950294 |
| }, |
| { |
| "epoch": 0.08214612622339053, |
| "grad_norm": 1.7117012000365959, |
| "learning_rate": 1.4e-06, |
| "loss": 1.2089216232299804, |
| "num_input_tokens_seen": 70209570, |
| "step": 350, |
| "token_acc": 0.6915523828674844 |
| }, |
| { |
| "epoch": 0.08449315840120168, |
| "grad_norm": 1.928181461412703, |
| "learning_rate": 1.44e-06, |
| "loss": 1.195500946044922, |
| "num_input_tokens_seen": 72228831, |
| "step": 360, |
| "token_acc": 0.6958758115748244 |
| }, |
| { |
| "epoch": 0.08684019057901284, |
| "grad_norm": 2.016364423213612, |
| "learning_rate": 1.48e-06, |
| "loss": 1.1975667953491211, |
| "num_input_tokens_seen": 74280357, |
| "step": 370, |
| "token_acc": 0.6944993196346585 |
| }, |
| { |
| "epoch": 0.089187222756824, |
| "grad_norm": 1.9269783210667364, |
| "learning_rate": 1.5199999999999998e-06, |
| "loss": 1.196579933166504, |
| "num_input_tokens_seen": 76255509, |
| "step": 380, |
| "token_acc": 0.693136319725866 |
| }, |
| { |
| "epoch": 0.09153425493463516, |
| "grad_norm": 2.276495563257121, |
| "learning_rate": 1.5599999999999999e-06, |
| "loss": 1.1727699279785155, |
| "num_input_tokens_seen": 78216720, |
| "step": 390, |
| "token_acc": 0.6980378317334839 |
| }, |
| { |
| "epoch": 0.09388128711244631, |
| "grad_norm": 2.665827226302004, |
| "learning_rate": 1.6e-06, |
| "loss": 1.2070913314819336, |
| "num_input_tokens_seen": 80187780, |
| "step": 400, |
| "token_acc": 0.6931723081009408 |
| }, |
| { |
| "epoch": 0.09388128711244631, |
| "eval_loss": 1.210001826286316, |
| "eval_runtime": 32.311, |
| "eval_samples_per_second": 30.949, |
| "eval_steps_per_second": 1.3, |
| "eval_token_acc": 0.693446596338958, |
| "num_input_tokens_seen": 80187780, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09622831929025746, |
| "grad_norm": 1.8020869110136488, |
| "learning_rate": 1.6399999999999998e-06, |
| "loss": 1.1979689598083496, |
| "num_input_tokens_seen": 82253211, |
| "step": 410, |
| "token_acc": 0.6943290418797176 |
| }, |
| { |
| "epoch": 0.09857535146806863, |
| "grad_norm": 1.613339482743251, |
| "learning_rate": 1.6799999999999998e-06, |
| "loss": 1.1750219345092774, |
| "num_input_tokens_seen": 84275082, |
| "step": 420, |
| "token_acc": 0.6988580180720491 |
| }, |
| { |
| "epoch": 0.10092238364587978, |
| "grad_norm": 2.0225577242890402, |
| "learning_rate": 1.7199999999999998e-06, |
| "loss": 1.160631275177002, |
| "num_input_tokens_seen": 86256174, |
| "step": 430, |
| "token_acc": 0.7049912003932076 |
| }, |
| { |
| "epoch": 0.10326941582369094, |
| "grad_norm": 2.0588425205195047, |
| "learning_rate": 1.7599999999999999e-06, |
| "loss": 1.155072021484375, |
| "num_input_tokens_seen": 88191771, |
| "step": 440, |
| "token_acc": 0.7027978727051616 |
| }, |
| { |
| "epoch": 0.1056164480015021, |
| "grad_norm": 1.750652589288128, |
| "learning_rate": 1.8e-06, |
| "loss": 1.1657937049865723, |
| "num_input_tokens_seen": 90280068, |
| "step": 450, |
| "token_acc": 0.70126095038482 |
| }, |
| { |
| "epoch": 0.10796348017931326, |
| "grad_norm": 1.6965579041737329, |
| "learning_rate": 1.84e-06, |
| "loss": 1.1403490066528321, |
| "num_input_tokens_seen": 92282058, |
| "step": 460, |
| "token_acc": 0.7044917775975158 |
| }, |
| { |
| "epoch": 0.11031051235712441, |
| "grad_norm": 1.8743542107195483, |
| "learning_rate": 1.8799999999999998e-06, |
| "loss": 1.1614572525024414, |
| "num_input_tokens_seen": 94268343, |
| "step": 470, |
| "token_acc": 0.7024686011260286 |
| }, |
| { |
| "epoch": 0.11265754453493558, |
| "grad_norm": 2.2378848353450693, |
| "learning_rate": 1.92e-06, |
| "loss": 1.1589451789855958, |
| "num_input_tokens_seen": 96220941, |
| "step": 480, |
| "token_acc": 0.7040593029694393 |
| }, |
| { |
| "epoch": 0.11500457671274673, |
| "grad_norm": 1.7219641168340587, |
| "learning_rate": 1.96e-06, |
| "loss": 1.139027214050293, |
| "num_input_tokens_seen": 98234790, |
| "step": 490, |
| "token_acc": 0.7069929196641098 |
| }, |
| { |
| "epoch": 0.11735160889055789, |
| "grad_norm": 1.7720161431115489, |
| "learning_rate": 2e-06, |
| "loss": 1.1347829818725585, |
| "num_input_tokens_seen": 100243815, |
| "step": 500, |
| "token_acc": 0.7060081282908567 |
| }, |
| { |
| "epoch": 0.11735160889055789, |
| "eval_loss": 1.1757478713989258, |
| "eval_runtime": 32.3883, |
| "eval_samples_per_second": 30.875, |
| "eval_steps_per_second": 1.297, |
| "eval_token_acc": 0.7003347106484153, |
| "num_input_tokens_seen": 100243815, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11969864106836904, |
| "grad_norm": 1.8210047186952776, |
| "learning_rate": 1.9999912270311373e-06, |
| "loss": 1.1792086601257323, |
| "num_input_tokens_seen": 102249078, |
| "step": 510, |
| "token_acc": 0.698590893627688 |
| }, |
| { |
| "epoch": 0.12204567324618021, |
| "grad_norm": 1.8609755736841171, |
| "learning_rate": 1.999964908278481e-06, |
| "loss": 1.1209921836853027, |
| "num_input_tokens_seen": 104220897, |
| "step": 520, |
| "token_acc": 0.7090112628579576 |
| }, |
| { |
| "epoch": 0.12439270542399136, |
| "grad_norm": 2.1446809333226584, |
| "learning_rate": 1.9999210442038163e-06, |
| "loss": 1.1469528198242187, |
| "num_input_tokens_seen": 106234191, |
| "step": 530, |
| "token_acc": 0.703947954006619 |
| }, |
| { |
| "epoch": 0.12673973760180252, |
| "grad_norm": 2.046893089210468, |
| "learning_rate": 1.9998596355767802e-06, |
| "loss": 1.1571426391601562, |
| "num_input_tokens_seen": 108272712, |
| "step": 540, |
| "token_acc": 0.7027365001081043 |
| }, |
| { |
| "epoch": 0.12908676977961367, |
| "grad_norm": 1.8591189017227578, |
| "learning_rate": 1.999780683474845e-06, |
| "loss": 1.1333347320556642, |
| "num_input_tokens_seen": 110241915, |
| "step": 550, |
| "token_acc": 0.7072802072223069 |
| }, |
| { |
| "epoch": 0.13143380195742482, |
| "grad_norm": 1.6591451063058131, |
| "learning_rate": 1.9996841892832997e-06, |
| "loss": 1.1434220314025878, |
| "num_input_tokens_seen": 112166943, |
| "step": 560, |
| "token_acc": 0.7056084295682411 |
| }, |
| { |
| "epoch": 0.133780834135236, |
| "grad_norm": 2.020864993257282, |
| "learning_rate": 1.999570154695225e-06, |
| "loss": 1.1571636199951172, |
| "num_input_tokens_seen": 114151494, |
| "step": 570, |
| "token_acc": 0.7044949720967205 |
| }, |
| { |
| "epoch": 0.13612786631304716, |
| "grad_norm": 2.064129107439252, |
| "learning_rate": 1.9994385817114644e-06, |
| "loss": 1.1311494827270507, |
| "num_input_tokens_seen": 116169552, |
| "step": 580, |
| "token_acc": 0.7063148017463998 |
| }, |
| { |
| "epoch": 0.1384748984908583, |
| "grad_norm": 2.0906868028581798, |
| "learning_rate": 1.999289472640589e-06, |
| "loss": 1.1150264739990234, |
| "num_input_tokens_seen": 118161789, |
| "step": 590, |
| "token_acc": 0.7104190105422314 |
| }, |
| { |
| "epoch": 0.14082193066866946, |
| "grad_norm": 1.783399118737723, |
| "learning_rate": 1.999122830098858e-06, |
| "loss": 1.14277925491333, |
| "num_input_tokens_seen": 120188337, |
| "step": 600, |
| "token_acc": 0.7054239286277058 |
| }, |
| { |
| "epoch": 0.14082193066866946, |
| "eval_loss": 1.150290846824646, |
| "eval_runtime": 32.9507, |
| "eval_samples_per_second": 30.348, |
| "eval_steps_per_second": 1.275, |
| "eval_token_acc": 0.7054176958057293, |
| "num_input_tokens_seen": 120188337, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.14316896284648062, |
| "grad_norm": 2.087850842021689, |
| "learning_rate": 1.998938657010171e-06, |
| "loss": 1.1017154693603515, |
| "num_input_tokens_seen": 122187903, |
| "step": 610, |
| "token_acc": 0.7150525542709177 |
| }, |
| { |
| "epoch": 0.14551599502429177, |
| "grad_norm": 1.941074004275762, |
| "learning_rate": 1.9987369566060176e-06, |
| "loss": 1.0946624755859375, |
| "num_input_tokens_seen": 124171368, |
| "step": 620, |
| "token_acc": 0.7163183324905894 |
| }, |
| { |
| "epoch": 0.14786302720210295, |
| "grad_norm": 2.176865017774056, |
| "learning_rate": 1.9985177324254197e-06, |
| "loss": 1.1165874481201172, |
| "num_input_tokens_seen": 126183993, |
| "step": 630, |
| "token_acc": 0.7110827727359269 |
| }, |
| { |
| "epoch": 0.1502100593799141, |
| "grad_norm": 1.7953820815140804, |
| "learning_rate": 1.998280988314872e-06, |
| "loss": 1.1424741744995117, |
| "num_input_tokens_seen": 128176863, |
| "step": 640, |
| "token_acc": 0.7053827925519703 |
| }, |
| { |
| "epoch": 0.15255709155772526, |
| "grad_norm": 1.8929944794579523, |
| "learning_rate": 1.9980267284282714e-06, |
| "loss": 1.1028331756591796, |
| "num_input_tokens_seen": 130125408, |
| "step": 650, |
| "token_acc": 0.7125094339622642 |
| }, |
| { |
| "epoch": 0.15490412373553641, |
| "grad_norm": 1.691459310367227, |
| "learning_rate": 1.9977549572268466e-06, |
| "loss": 1.107553482055664, |
| "num_input_tokens_seen": 132065343, |
| "step": 660, |
| "token_acc": 0.7138998256484975 |
| }, |
| { |
| "epoch": 0.15725115591334757, |
| "grad_norm": 2.1233419395787556, |
| "learning_rate": 1.9974656794790772e-06, |
| "loss": 1.1101640701293944, |
| "num_input_tokens_seen": 134090148, |
| "step": 670, |
| "token_acc": 0.713199782361379 |
| }, |
| { |
| "epoch": 0.15959818809115872, |
| "grad_norm": 1.7559578625602645, |
| "learning_rate": 1.997158900260614e-06, |
| "loss": 1.1094940185546875, |
| "num_input_tokens_seen": 136112988, |
| "step": 680, |
| "token_acc": 0.7122396887639626 |
| }, |
| { |
| "epoch": 0.16194522026896987, |
| "grad_norm": 1.7829226146649233, |
| "learning_rate": 1.9968346249541846e-06, |
| "loss": 1.117540168762207, |
| "num_input_tokens_seen": 138058629, |
| "step": 690, |
| "token_acc": 0.7106555900807559 |
| }, |
| { |
| "epoch": 0.16429225244678106, |
| "grad_norm": 2.1483315176659166, |
| "learning_rate": 1.9964928592495045e-06, |
| "loss": 1.0879833221435546, |
| "num_input_tokens_seen": 140078598, |
| "step": 700, |
| "token_acc": 0.7166827394425921 |
| }, |
| { |
| "epoch": 0.16429225244678106, |
| "eval_loss": 1.1313835382461548, |
| "eval_runtime": 32.334, |
| "eval_samples_per_second": 30.927, |
| "eval_steps_per_second": 1.299, |
| "eval_token_acc": 0.7089033032478474, |
| "num_input_tokens_seen": 140078598, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.1666392846245922, |
| "grad_norm": 1.867471314482214, |
| "learning_rate": 1.9961336091431724e-06, |
| "loss": 1.1190789222717286, |
| "num_input_tokens_seen": 142099659, |
| "step": 710, |
| "token_acc": 0.712375749359721 |
| }, |
| { |
| "epoch": 0.16898631680240336, |
| "grad_norm": 1.9022337846097856, |
| "learning_rate": 1.995756880938569e-06, |
| "loss": 1.0825121879577637, |
| "num_input_tokens_seen": 144092310, |
| "step": 720, |
| "token_acc": 0.7172525783126845 |
| }, |
| { |
| "epoch": 0.17133334898021452, |
| "grad_norm": 1.9984437173736713, |
| "learning_rate": 1.9953626812457438e-06, |
| "loss": 1.095411491394043, |
| "num_input_tokens_seen": 146064039, |
| "step": 730, |
| "token_acc": 0.714463713054313 |
| }, |
| { |
| "epoch": 0.17368038115802567, |
| "grad_norm": 2.1447202509234304, |
| "learning_rate": 1.9949510169813e-06, |
| "loss": 1.1152179718017579, |
| "num_input_tokens_seen": 148112049, |
| "step": 740, |
| "token_acc": 0.712060909164676 |
| }, |
| { |
| "epoch": 0.17602741333583682, |
| "grad_norm": 1.6936993245361356, |
| "learning_rate": 1.994521895368273e-06, |
| "loss": 1.0852348327636718, |
| "num_input_tokens_seen": 150133974, |
| "step": 750, |
| "token_acc": 0.7162799236018076 |
| }, |
| { |
| "epoch": 0.178374445513648, |
| "grad_norm": 2.352601598144833, |
| "learning_rate": 1.9940753239360045e-06, |
| "loss": 1.1107561111450195, |
| "num_input_tokens_seen": 152099280, |
| "step": 760, |
| "token_acc": 0.7127718906860011 |
| }, |
| { |
| "epoch": 0.18072147769145916, |
| "grad_norm": 1.84112693117569, |
| "learning_rate": 1.9936113105200084e-06, |
| "loss": 1.110912036895752, |
| "num_input_tokens_seen": 154146792, |
| "step": 770, |
| "token_acc": 0.7112778436268925 |
| }, |
| { |
| "epoch": 0.1830685098692703, |
| "grad_norm": 2.3592228692729367, |
| "learning_rate": 1.9931298632618353e-06, |
| "loss": 1.127957820892334, |
| "num_input_tokens_seen": 156087093, |
| "step": 780, |
| "token_acc": 0.7073941119432238 |
| }, |
| { |
| "epoch": 0.18541554204708147, |
| "grad_norm": 1.8453081635946817, |
| "learning_rate": 1.9926309906089288e-06, |
| "loss": 1.0826932907104492, |
| "num_input_tokens_seen": 158083548, |
| "step": 790, |
| "token_acc": 0.7176969639197369 |
| }, |
| { |
| "epoch": 0.18776257422489262, |
| "grad_norm": 1.6598465812647105, |
| "learning_rate": 1.9921147013144777e-06, |
| "loss": 1.097795295715332, |
| "num_input_tokens_seen": 160083087, |
| "step": 800, |
| "token_acc": 0.712001722391892 |
| }, |
| { |
| "epoch": 0.18776257422489262, |
| "eval_loss": 1.1155238151550293, |
| "eval_runtime": 32.4633, |
| "eval_samples_per_second": 30.804, |
| "eval_steps_per_second": 1.294, |
| "eval_token_acc": 0.712074975185245, |
| "num_input_tokens_seen": 160083087, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.19010960640270377, |
| "grad_norm": 2.345143984991418, |
| "learning_rate": 1.9915810044372615e-06, |
| "loss": 1.0773065567016602, |
| "num_input_tokens_seen": 162043827, |
| "step": 810, |
| "token_acc": 0.7185854363462685 |
| }, |
| { |
| "epoch": 0.19245663858051493, |
| "grad_norm": 1.6218625881025774, |
| "learning_rate": 1.991029909341493e-06, |
| "loss": 1.1322909355163575, |
| "num_input_tokens_seen": 164065197, |
| "step": 820, |
| "token_acc": 0.7112101172756877 |
| }, |
| { |
| "epoch": 0.1948036707583261, |
| "grad_norm": 3.4128089423104204, |
| "learning_rate": 1.990461425696651e-06, |
| "loss": 1.1018625259399415, |
| "num_input_tokens_seen": 166095825, |
| "step": 830, |
| "token_acc": 0.7132049834650468 |
| }, |
| { |
| "epoch": 0.19715070293613726, |
| "grad_norm": 3.8983014033715273, |
| "learning_rate": 1.9898755634773155e-06, |
| "loss": 1.092278289794922, |
| "num_input_tokens_seen": 168127596, |
| "step": 840, |
| "token_acc": 0.7165934113928826 |
| }, |
| { |
| "epoch": 0.19949773511394842, |
| "grad_norm": 1.7080322418933676, |
| "learning_rate": 1.9892723329629885e-06, |
| "loss": 1.0770910263061524, |
| "num_input_tokens_seen": 170112078, |
| "step": 850, |
| "token_acc": 0.7174151496405977 |
| }, |
| { |
| "epoch": 0.20184476729175957, |
| "grad_norm": 1.6399516756726806, |
| "learning_rate": 1.988651744737914e-06, |
| "loss": 1.119683837890625, |
| "num_input_tokens_seen": 172089120, |
| "step": 860, |
| "token_acc": 0.7093057553740301 |
| }, |
| { |
| "epoch": 0.20419179946957072, |
| "grad_norm": 1.9211847623415963, |
| "learning_rate": 1.988013809690895e-06, |
| "loss": 1.0811002731323243, |
| "num_input_tokens_seen": 174102978, |
| "step": 870, |
| "token_acc": 0.7170278749197704 |
| }, |
| { |
| "epoch": 0.20653883164738188, |
| "grad_norm": 1.7860065176012982, |
| "learning_rate": 1.9873585390151003e-06, |
| "loss": 1.0824663162231445, |
| "num_input_tokens_seen": 176106354, |
| "step": 880, |
| "token_acc": 0.7187242752799151 |
| }, |
| { |
| "epoch": 0.20888586382519303, |
| "grad_norm": 6.212586622472415, |
| "learning_rate": 1.986685944207868e-06, |
| "loss": 1.0738523483276368, |
| "num_input_tokens_seen": 178098096, |
| "step": 890, |
| "token_acc": 0.7199481706694962 |
| }, |
| { |
| "epoch": 0.2112328960030042, |
| "grad_norm": 1.7634279436109257, |
| "learning_rate": 1.985996037070505e-06, |
| "loss": 1.0606145858764648, |
| "num_input_tokens_seen": 180140916, |
| "step": 900, |
| "token_acc": 0.7212711540534449 |
| }, |
| { |
| "epoch": 0.2112328960030042, |
| "eval_loss": 1.102053165435791, |
| "eval_runtime": 32.763, |
| "eval_samples_per_second": 30.522, |
| "eval_steps_per_second": 1.282, |
| "eval_token_acc": 0.7141986565407077, |
| "num_input_tokens_seen": 180140916, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.21357992818081536, |
| "grad_norm": 1.927927016491826, |
| "learning_rate": 1.9852888297080784e-06, |
| "loss": 1.0789798736572265, |
| "num_input_tokens_seen": 182134725, |
| "step": 910, |
| "token_acc": 0.7180467099845159 |
| }, |
| { |
| "epoch": 0.21592696035862652, |
| "grad_norm": 1.633422631466873, |
| "learning_rate": 1.9845643345292055e-06, |
| "loss": 1.075742530822754, |
| "num_input_tokens_seen": 184161738, |
| "step": 920, |
| "token_acc": 0.719577260000721 |
| }, |
| { |
| "epoch": 0.21827399253643767, |
| "grad_norm": 1.763375240928624, |
| "learning_rate": 1.9838225642458328e-06, |
| "loss": 1.0633999824523925, |
| "num_input_tokens_seen": 186250896, |
| "step": 930, |
| "token_acc": 0.7216072711554525 |
| }, |
| { |
| "epoch": 0.22062102471424883, |
| "grad_norm": 1.578730561244102, |
| "learning_rate": 1.9830635318730153e-06, |
| "loss": 1.0807870864868163, |
| "num_input_tokens_seen": 188240646, |
| "step": 940, |
| "token_acc": 0.719998073905838 |
| }, |
| { |
| "epoch": 0.22296805689205998, |
| "grad_norm": 1.9778473417464, |
| "learning_rate": 1.9822872507286887e-06, |
| "loss": 1.0958086013793946, |
| "num_input_tokens_seen": 190240614, |
| "step": 950, |
| "token_acc": 0.715133457837701 |
| }, |
| { |
| "epoch": 0.22531508906987116, |
| "grad_norm": 1.7070736536906375, |
| "learning_rate": 1.9814937344334326e-06, |
| "loss": 1.083117961883545, |
| "num_input_tokens_seen": 192202005, |
| "step": 960, |
| "token_acc": 0.718299042165819 |
| }, |
| { |
| "epoch": 0.22766212124768231, |
| "grad_norm": 1.6694843702106625, |
| "learning_rate": 1.9806829969102353e-06, |
| "loss": 1.0489460945129394, |
| "num_input_tokens_seen": 194152464, |
| "step": 970, |
| "token_acc": 0.7243972802430247 |
| }, |
| { |
| "epoch": 0.23000915342549347, |
| "grad_norm": 1.6802225185406368, |
| "learning_rate": 1.9798550523842466e-06, |
| "loss": 1.055472183227539, |
| "num_input_tokens_seen": 196146252, |
| "step": 980, |
| "token_acc": 0.7222500499869107 |
| }, |
| { |
| "epoch": 0.23235618560330462, |
| "grad_norm": 1.586112316988885, |
| "learning_rate": 1.9790099153825295e-06, |
| "loss": 1.0688490867614746, |
| "num_input_tokens_seen": 198216198, |
| "step": 990, |
| "token_acc": 0.721910041723649 |
| }, |
| { |
| "epoch": 0.23470321778111577, |
| "grad_norm": 1.9657681362344652, |
| "learning_rate": 1.9781476007338054e-06, |
| "loss": 1.0997188568115235, |
| "num_input_tokens_seen": 200266242, |
| "step": 1000, |
| "token_acc": 0.7134316006040672 |
| }, |
| { |
| "epoch": 0.23470321778111577, |
| "eval_loss": 1.0927079916000366, |
| "eval_runtime": 32.3958, |
| "eval_samples_per_second": 30.868, |
| "eval_steps_per_second": 1.296, |
| "eval_token_acc": 0.7168070912490478, |
| "num_input_tokens_seen": 200266242, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.23705024995892693, |
| "grad_norm": 2.279451886611171, |
| "learning_rate": 1.9772681235681933e-06, |
| "loss": 1.0306278228759767, |
| "num_input_tokens_seen": 202268343, |
| "step": 1010, |
| "token_acc": 0.7296832940863017 |
| }, |
| { |
| "epoch": 0.23939728213673808, |
| "grad_norm": 1.7883291650438458, |
| "learning_rate": 1.976371499316945e-06, |
| "loss": 1.0757831573486327, |
| "num_input_tokens_seen": 204289632, |
| "step": 1020, |
| "token_acc": 0.7182072037465692 |
| }, |
| { |
| "epoch": 0.24174431431454926, |
| "grad_norm": 1.8342347796963645, |
| "learning_rate": 1.975457743712173e-06, |
| "loss": 1.0590785980224608, |
| "num_input_tokens_seen": 206327745, |
| "step": 1030, |
| "token_acc": 0.7225627285705905 |
| }, |
| { |
| "epoch": 0.24409134649236042, |
| "grad_norm": 1.762378045102792, |
| "learning_rate": 1.974526872786577e-06, |
| "loss": 1.0789016723632812, |
| "num_input_tokens_seen": 208322556, |
| "step": 1040, |
| "token_acc": 0.7185882266690018 |
| }, |
| { |
| "epoch": 0.24643837867017157, |
| "grad_norm": 1.7642619697840807, |
| "learning_rate": 1.97357890287316e-06, |
| "loss": 1.090459442138672, |
| "num_input_tokens_seen": 210345396, |
| "step": 1050, |
| "token_acc": 0.715633342030789 |
| }, |
| { |
| "epoch": 0.24878541084798272, |
| "grad_norm": 1.8062010829609079, |
| "learning_rate": 1.9726138506049433e-06, |
| "loss": 1.0327832221984863, |
| "num_input_tokens_seen": 212289177, |
| "step": 1060, |
| "token_acc": 0.728890125802145 |
| }, |
| { |
| "epoch": 0.2511324430257939, |
| "grad_norm": 1.6741852997103905, |
| "learning_rate": 1.971631732914674e-06, |
| "loss": 1.0438125610351563, |
| "num_input_tokens_seen": 214294110, |
| "step": 1070, |
| "token_acc": 0.7274771422710105 |
| }, |
| { |
| "epoch": 0.25347947520360503, |
| "grad_norm": 1.8889183202576878, |
| "learning_rate": 1.970632567034527e-06, |
| "loss": 1.0874737739562987, |
| "num_input_tokens_seen": 216250632, |
| "step": 1080, |
| "token_acc": 0.7169543090609345 |
| }, |
| { |
| "epoch": 0.2558265073814162, |
| "grad_norm": 1.768581214259287, |
| "learning_rate": 1.9696163704958057e-06, |
| "loss": 1.0529390335083009, |
| "num_input_tokens_seen": 218235084, |
| "step": 1090, |
| "token_acc": 0.7233062911737727 |
| }, |
| { |
| "epoch": 0.25817353955922734, |
| "grad_norm": 1.6728742294003298, |
| "learning_rate": 1.968583161128631e-06, |
| "loss": 1.0434741973876953, |
| "num_input_tokens_seen": 220250775, |
| "step": 1100, |
| "token_acc": 0.72555486645587 |
| }, |
| { |
| "epoch": 0.25817353955922734, |
| "eval_loss": 1.0830632448196411, |
| "eval_runtime": 32.7745, |
| "eval_samples_per_second": 30.512, |
| "eval_steps_per_second": 1.281, |
| "eval_token_acc": 0.7187414879619584, |
| "num_input_tokens_seen": 220250775, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.2605205717370385, |
| "grad_norm": 3.5179698506650827, |
| "learning_rate": 1.9675329570616295e-06, |
| "loss": 1.036564826965332, |
| "num_input_tokens_seen": 222248643, |
| "step": 1110, |
| "token_acc": 0.7253935790918138 |
| }, |
| { |
| "epoch": 0.26286760391484965, |
| "grad_norm": 1.7194590974245725, |
| "learning_rate": 1.9664657767216175e-06, |
| "loss": 1.034214401245117, |
| "num_input_tokens_seen": 224176074, |
| "step": 1120, |
| "token_acc": 0.731699968385116 |
| }, |
| { |
| "epoch": 0.2652146360926608, |
| "grad_norm": 1.6151699401355315, |
| "learning_rate": 1.9653816388332737e-06, |
| "loss": 1.0186534881591798, |
| "num_input_tokens_seen": 226256241, |
| "step": 1130, |
| "token_acc": 0.729031512194937 |
| }, |
| { |
| "epoch": 0.267561668270472, |
| "grad_norm": 1.915048663566233, |
| "learning_rate": 1.9642805624188146e-06, |
| "loss": 1.0460872650146484, |
| "num_input_tokens_seen": 228227991, |
| "step": 1140, |
| "token_acc": 0.7245494456551131 |
| }, |
| { |
| "epoch": 0.26990870044828313, |
| "grad_norm": 2.3808335250565387, |
| "learning_rate": 1.963162566797658e-06, |
| "loss": 1.0558183670043946, |
| "num_input_tokens_seen": 230254347, |
| "step": 1150, |
| "token_acc": 0.7232573802936575 |
| }, |
| { |
| "epoch": 0.2722557326260943, |
| "grad_norm": 1.7367289249419906, |
| "learning_rate": 1.962027671586086e-06, |
| "loss": 1.050713062286377, |
| "num_input_tokens_seen": 232285218, |
| "step": 1160, |
| "token_acc": 0.7248766799700481 |
| }, |
| { |
| "epoch": 0.27460276480390544, |
| "grad_norm": 1.8903258230442381, |
| "learning_rate": 1.9608758966968984e-06, |
| "loss": 1.0442859649658203, |
| "num_input_tokens_seen": 234350787, |
| "step": 1170, |
| "token_acc": 0.7246446168983565 |
| }, |
| { |
| "epoch": 0.2769497969817166, |
| "grad_norm": 2.0858660720659064, |
| "learning_rate": 1.959707262339067e-06, |
| "loss": 1.0628435134887695, |
| "num_input_tokens_seen": 236401623, |
| "step": 1180, |
| "token_acc": 0.7223079815551465 |
| }, |
| { |
| "epoch": 0.2792968291595278, |
| "grad_norm": 2.977405059549, |
| "learning_rate": 1.9585217890173757e-06, |
| "loss": 1.0738126754760742, |
| "num_input_tokens_seen": 238361190, |
| "step": 1190, |
| "token_acc": 0.7190871093733786 |
| }, |
| { |
| "epoch": 0.28164386133733893, |
| "grad_norm": 2.283563309099777, |
| "learning_rate": 1.957319497532067e-06, |
| "loss": 1.0180787086486816, |
| "num_input_tokens_seen": 240437730, |
| "step": 1200, |
| "token_acc": 0.7330017297652685 |
| }, |
| { |
| "epoch": 0.28164386133733893, |
| "eval_loss": 1.0745400190353394, |
| "eval_runtime": 32.4066, |
| "eval_samples_per_second": 30.858, |
| "eval_steps_per_second": 1.296, |
| "eval_token_acc": 0.7201588144317999, |
| "num_input_tokens_seen": 240437730, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2839908935151501, |
| "grad_norm": 1.9270515119564795, |
| "learning_rate": 1.956100408978472e-06, |
| "loss": 1.0345954895019531, |
| "num_input_tokens_seen": 242382708, |
| "step": 1210, |
| "token_acc": 0.7277172037115998 |
| }, |
| { |
| "epoch": 0.28633792569296124, |
| "grad_norm": 1.5733358413499778, |
| "learning_rate": 1.954864544746643e-06, |
| "loss": 1.0476463317871094, |
| "num_input_tokens_seen": 244350303, |
| "step": 1220, |
| "token_acc": 0.7255343803753794 |
| }, |
| { |
| "epoch": 0.2886849578707724, |
| "grad_norm": 2.0867528996051345, |
| "learning_rate": 1.9536119265209757e-06, |
| "loss": 1.0576335906982421, |
| "num_input_tokens_seen": 246334116, |
| "step": 1230, |
| "token_acc": 0.7241534895699202 |
| }, |
| { |
| "epoch": 0.29103199004858354, |
| "grad_norm": 1.587056177259835, |
| "learning_rate": 1.952342576279833e-06, |
| "loss": 1.0451471328735351, |
| "num_input_tokens_seen": 248362662, |
| "step": 1240, |
| "token_acc": 0.7264873056477157 |
| }, |
| { |
| "epoch": 0.2933790222263947, |
| "grad_norm": 3.147776681472526, |
| "learning_rate": 1.9510565162951534e-06, |
| "loss": 1.0531164169311524, |
| "num_input_tokens_seen": 250326474, |
| "step": 1250, |
| "token_acc": 0.7241427379495411 |
| }, |
| { |
| "epoch": 0.2957260544042059, |
| "grad_norm": 1.677213988705626, |
| "learning_rate": 1.9497537691320667e-06, |
| "loss": 1.0469918251037598, |
| "num_input_tokens_seen": 252382641, |
| "step": 1260, |
| "token_acc": 0.7247498649880667 |
| }, |
| { |
| "epoch": 0.29807308658201703, |
| "grad_norm": 3.244921913867558, |
| "learning_rate": 1.9484343576484934e-06, |
| "loss": 1.0731307983398437, |
| "num_input_tokens_seen": 254380842, |
| "step": 1270, |
| "token_acc": 0.7198529707146587 |
| }, |
| { |
| "epoch": 0.3004201187598282, |
| "grad_norm": 2.2715118534896424, |
| "learning_rate": 1.9470983049947442e-06, |
| "loss": 1.0327179908752442, |
| "num_input_tokens_seen": 256367745, |
| "step": 1280, |
| "token_acc": 0.7273322442040123 |
| }, |
| { |
| "epoch": 0.30276715093763934, |
| "grad_norm": 2.388511262608066, |
| "learning_rate": 1.9457456346131168e-06, |
| "loss": 1.0295280456542968, |
| "num_input_tokens_seen": 258362418, |
| "step": 1290, |
| "token_acc": 0.7289352257814815 |
| }, |
| { |
| "epoch": 0.3051141831154505, |
| "grad_norm": 2.284896449465709, |
| "learning_rate": 1.944376370237481e-06, |
| "loss": 1.0356334686279296, |
| "num_input_tokens_seen": 260389752, |
| "step": 1300, |
| "token_acc": 0.7264502277424404 |
| }, |
| { |
| "epoch": 0.3051141831154505, |
| "eval_loss": 1.0682131052017212, |
| "eval_runtime": 32.3728, |
| "eval_samples_per_second": 30.89, |
| "eval_steps_per_second": 1.297, |
| "eval_token_acc": 0.7214607234366704, |
| "num_input_tokens_seen": 260389752, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.30746121529326165, |
| "grad_norm": 1.8462096822584517, |
| "learning_rate": 1.9429905358928646e-06, |
| "loss": 1.0431997299194335, |
| "num_input_tokens_seen": 262425369, |
| "step": 1310, |
| "token_acc": 0.7247579875646393 |
| }, |
| { |
| "epoch": 0.30980824747107283, |
| "grad_norm": 2.7288254092061286, |
| "learning_rate": 1.94158815589503e-06, |
| "loss": 1.03179931640625, |
| "num_input_tokens_seen": 264478839, |
| "step": 1320, |
| "token_acc": 0.7273030599423818 |
| }, |
| { |
| "epoch": 0.312155279648884, |
| "grad_norm": 2.0483013477422563, |
| "learning_rate": 1.9401692548500502e-06, |
| "loss": 1.0194345474243165, |
| "num_input_tokens_seen": 266467188, |
| "step": 1330, |
| "token_acc": 0.7318709842049548 |
| }, |
| { |
| "epoch": 0.31450231182669514, |
| "grad_norm": 3.607937481626218, |
| "learning_rate": 1.938733857653874e-06, |
| "loss": 1.0359786987304687, |
| "num_input_tokens_seen": 268553511, |
| "step": 1340, |
| "token_acc": 0.7270260288085842 |
| }, |
| { |
| "epoch": 0.3168493440045063, |
| "grad_norm": 2.2908695328416244, |
| "learning_rate": 1.9372819894918914e-06, |
| "loss": 1.005875015258789, |
| "num_input_tokens_seen": 270556128, |
| "step": 1350, |
| "token_acc": 0.733425647272143 |
| }, |
| { |
| "epoch": 0.31919637618231744, |
| "grad_norm": 2.2530826851795576, |
| "learning_rate": 1.935813675838491e-06, |
| "loss": 1.0363348007202149, |
| "num_input_tokens_seen": 272585331, |
| "step": 1360, |
| "token_acc": 0.7270068150894993 |
| }, |
| { |
| "epoch": 0.3215434083601286, |
| "grad_norm": 1.6599911510535466, |
| "learning_rate": 1.934328942456612e-06, |
| "loss": 0.9922657012939453, |
| "num_input_tokens_seen": 274625832, |
| "step": 1370, |
| "token_acc": 0.7369969482933556 |
| }, |
| { |
| "epoch": 0.32389044053793975, |
| "grad_norm": 1.6571812543491504, |
| "learning_rate": 1.9328278153972946e-06, |
| "loss": 1.0838043212890625, |
| "num_input_tokens_seen": 276646638, |
| "step": 1380, |
| "token_acc": 0.7254781164111181 |
| }, |
| { |
| "epoch": 0.32623747271575093, |
| "grad_norm": 1.7846961468797993, |
| "learning_rate": 1.9313103209992204e-06, |
| "loss": 1.0071705818176269, |
| "num_input_tokens_seen": 278652339, |
| "step": 1390, |
| "token_acc": 0.733368638373526 |
| }, |
| { |
| "epoch": 0.3285845048935621, |
| "grad_norm": 2.1490918049490717, |
| "learning_rate": 1.929776485888251e-06, |
| "loss": 1.0504549026489258, |
| "num_input_tokens_seen": 280677636, |
| "step": 1400, |
| "token_acc": 0.72332943463746 |
| }, |
| { |
| "epoch": 0.3285845048935621, |
| "eval_loss": 1.061837077140808, |
| "eval_runtime": 32.7164, |
| "eval_samples_per_second": 30.566, |
| "eval_steps_per_second": 1.284, |
| "eval_token_acc": 0.7231458184252441, |
| "num_input_tokens_seen": 280677636, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.33093153707137324, |
| "grad_norm": 1.776580604562134, |
| "learning_rate": 1.928226336976963e-06, |
| "loss": 1.0266141891479492, |
| "num_input_tokens_seen": 282669069, |
| "step": 1410, |
| "token_acc": 0.7291277131940492 |
| }, |
| { |
| "epoch": 0.3332785692491844, |
| "grad_norm": 8.438214405501748, |
| "learning_rate": 1.926659901464172e-06, |
| "loss": 1.0292797088623047, |
| "num_input_tokens_seen": 284659779, |
| "step": 1420, |
| "token_acc": 0.7288078819771109 |
| }, |
| { |
| "epoch": 0.33562560142699555, |
| "grad_norm": 2.252060217551861, |
| "learning_rate": 1.925077206834458e-06, |
| "loss": 1.0228628158569335, |
| "num_input_tokens_seen": 286673274, |
| "step": 1430, |
| "token_acc": 0.7280252171611444 |
| }, |
| { |
| "epoch": 0.3379726336048067, |
| "grad_norm": 1.4651418770258904, |
| "learning_rate": 1.923478280857682e-06, |
| "loss": 1.0042032241821288, |
| "num_input_tokens_seen": 288677157, |
| "step": 1440, |
| "token_acc": 0.7343410272213868 |
| }, |
| { |
| "epoch": 0.34031966578261785, |
| "grad_norm": 1.6827171089675037, |
| "learning_rate": 1.9218631515885003e-06, |
| "loss": 1.0294583320617676, |
| "num_input_tokens_seen": 290678706, |
| "step": 1450, |
| "token_acc": 0.7304443621152334 |
| }, |
| { |
| "epoch": 0.34266669796042903, |
| "grad_norm": 1.7341043440646111, |
| "learning_rate": 1.9202318473658702e-06, |
| "loss": 0.9965463638305664, |
| "num_input_tokens_seen": 292647750, |
| "step": 1460, |
| "token_acc": 0.736443122122828 |
| }, |
| { |
| "epoch": 0.3450137301382402, |
| "grad_norm": 1.706569258628379, |
| "learning_rate": 1.918584396812554e-06, |
| "loss": 1.0162506103515625, |
| "num_input_tokens_seen": 294701517, |
| "step": 1470, |
| "token_acc": 0.7316330245383567 |
| }, |
| { |
| "epoch": 0.34736076231605134, |
| "grad_norm": 1.6208113959472872, |
| "learning_rate": 1.9169208288346163e-06, |
| "loss": 1.0112849235534669, |
| "num_input_tokens_seen": 296720586, |
| "step": 1480, |
| "token_acc": 0.732423183545091 |
| }, |
| { |
| "epoch": 0.3497077944938625, |
| "grad_norm": 1.7865465491021926, |
| "learning_rate": 1.9152411726209172e-06, |
| "loss": 1.0156356811523437, |
| "num_input_tokens_seen": 298684938, |
| "step": 1490, |
| "token_acc": 0.7308413793103449 |
| }, |
| { |
| "epoch": 0.35205482667167365, |
| "grad_norm": 2.059441241693384, |
| "learning_rate": 1.9135454576426007e-06, |
| "loss": 1.0275184631347656, |
| "num_input_tokens_seen": 300684201, |
| "step": 1500, |
| "token_acc": 0.730526369912453 |
| }, |
| { |
| "epoch": 0.35205482667167365, |
| "eval_loss": 1.0552641153335571, |
| "eval_runtime": 32.4705, |
| "eval_samples_per_second": 30.797, |
| "eval_steps_per_second": 1.293, |
| "eval_token_acc": 0.72383370651647, |
| "num_input_tokens_seen": 300684201, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.35440185884948483, |
| "grad_norm": 2.3565377610515594, |
| "learning_rate": 1.9118337136525756e-06, |
| "loss": 1.0185004234313966, |
| "num_input_tokens_seen": 302704359, |
| "step": 1510, |
| "token_acc": 0.7304355716162425 |
| }, |
| { |
| "epoch": 0.356748891027296, |
| "grad_norm": 14.877826986152865, |
| "learning_rate": 1.9101059706849955e-06, |
| "loss": 1.019582176208496, |
| "num_input_tokens_seen": 304651629, |
| "step": 1520, |
| "token_acc": 0.731234582403383 |
| }, |
| { |
| "epoch": 0.35909592320510714, |
| "grad_norm": 2.879334483584151, |
| "learning_rate": 1.908362259054731e-06, |
| "loss": 1.0251285552978515, |
| "num_input_tokens_seen": 306641097, |
| "step": 1530, |
| "token_acc": 0.7294201685316217 |
| }, |
| { |
| "epoch": 0.3614429553829183, |
| "grad_norm": 1.7887355243868148, |
| "learning_rate": 1.9066026093568377e-06, |
| "loss": 1.0157214164733888, |
| "num_input_tokens_seen": 308660178, |
| "step": 1540, |
| "token_acc": 0.7307293262997984 |
| }, |
| { |
| "epoch": 0.36378998756072944, |
| "grad_norm": 1.867513936920377, |
| "learning_rate": 1.9048270524660196e-06, |
| "loss": 1.0161379814147948, |
| "num_input_tokens_seen": 310777926, |
| "step": 1550, |
| "token_acc": 0.7304925609175636 |
| }, |
| { |
| "epoch": 0.3661370197385406, |
| "grad_norm": 6.302806843132354, |
| "learning_rate": 1.9030356195360873e-06, |
| "loss": 0.9866199493408203, |
| "num_input_tokens_seen": 312788916, |
| "step": 1560, |
| "token_acc": 0.7381302995035983 |
| }, |
| { |
| "epoch": 0.36848405191635175, |
| "grad_norm": 1.8930345198459555, |
| "learning_rate": 1.9012283419994113e-06, |
| "loss": 1.0311415672302247, |
| "num_input_tokens_seen": 314814855, |
| "step": 1570, |
| "token_acc": 0.7291705656140012 |
| }, |
| { |
| "epoch": 0.37083108409416293, |
| "grad_norm": 2.3487824750816646, |
| "learning_rate": 1.899405251566371e-06, |
| "loss": 1.0350725173950195, |
| "num_input_tokens_seen": 316867344, |
| "step": 1580, |
| "token_acc": 0.7278371704934657 |
| }, |
| { |
| "epoch": 0.3731781162719741, |
| "grad_norm": 2.0782965598493917, |
| "learning_rate": 1.8975663802247975e-06, |
| "loss": 1.0283987998962403, |
| "num_input_tokens_seen": 318871404, |
| "step": 1590, |
| "token_acc": 0.7280485561890748 |
| }, |
| { |
| "epoch": 0.37552514844978524, |
| "grad_norm": 2.8179476770543546, |
| "learning_rate": 1.8957117602394128e-06, |
| "loss": 1.027695655822754, |
| "num_input_tokens_seen": 320871228, |
| "step": 1600, |
| "token_acc": 0.7284322929815703 |
| }, |
| { |
| "epoch": 0.37552514844978524, |
| "eval_loss": 1.0503556728363037, |
| "eval_runtime": 32.4119, |
| "eval_samples_per_second": 30.853, |
| "eval_steps_per_second": 1.296, |
| "eval_token_acc": 0.7255742018882297, |
| "num_input_tokens_seen": 320871228, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3778721806275964, |
| "grad_norm": 2.240496844348581, |
| "learning_rate": 1.8938414241512637e-06, |
| "loss": 1.0263992309570313, |
| "num_input_tokens_seen": 322930128, |
| "step": 1610, |
| "token_acc": 0.731757208141934 |
| }, |
| { |
| "epoch": 0.38021921280540755, |
| "grad_norm": 3.896191708778685, |
| "learning_rate": 1.8919554047771507e-06, |
| "loss": 1.0006643295288087, |
| "num_input_tokens_seen": 324982575, |
| "step": 1620, |
| "token_acc": 0.732137966433454 |
| }, |
| { |
| "epoch": 0.38256624498321873, |
| "grad_norm": 1.7935819973243883, |
| "learning_rate": 1.8900537352090523e-06, |
| "loss": 0.9882081985473633, |
| "num_input_tokens_seen": 326990898, |
| "step": 1630, |
| "token_acc": 0.7385387731711782 |
| }, |
| { |
| "epoch": 0.38491327716102985, |
| "grad_norm": 3.1640907355889496, |
| "learning_rate": 1.8881364488135445e-06, |
| "loss": 1.0018336296081543, |
| "num_input_tokens_seen": 329033799, |
| "step": 1640, |
| "token_acc": 0.7350213182627736 |
| }, |
| { |
| "epoch": 0.38726030933884104, |
| "grad_norm": 5.630791095478135, |
| "learning_rate": 1.8862035792312146e-06, |
| "loss": 0.9879220962524414, |
| "num_input_tokens_seen": 331067478, |
| "step": 1650, |
| "token_acc": 0.736295696568692 |
| }, |
| { |
| "epoch": 0.3896073415166522, |
| "grad_norm": 1.5905696004173981, |
| "learning_rate": 1.8842551603760723e-06, |
| "loss": 1.004323387145996, |
| "num_input_tokens_seen": 333089880, |
| "step": 1660, |
| "token_acc": 0.7334599037600028 |
| }, |
| { |
| "epoch": 0.39195437369446334, |
| "grad_norm": 43.2007654518171, |
| "learning_rate": 1.8822912264349532e-06, |
| "loss": 1.0126733779907227, |
| "num_input_tokens_seen": 335093103, |
| "step": 1670, |
| "token_acc": 0.7332479964381122 |
| }, |
| { |
| "epoch": 0.3943014058722745, |
| "grad_norm": 1.6733459020369337, |
| "learning_rate": 1.8803118118669202e-06, |
| "loss": 1.0368854522705078, |
| "num_input_tokens_seen": 337115598, |
| "step": 1680, |
| "token_acc": 0.7274540217150455 |
| }, |
| { |
| "epoch": 0.39664843805008565, |
| "grad_norm": 1.9876180817181506, |
| "learning_rate": 1.8783169514026577e-06, |
| "loss": 1.0030999183654785, |
| "num_input_tokens_seen": 339154959, |
| "step": 1690, |
| "token_acc": 0.7345074320050601 |
| }, |
| { |
| "epoch": 0.39899547022789683, |
| "grad_norm": 1.842434463603931, |
| "learning_rate": 1.8763066800438634e-06, |
| "loss": 0.9946871757507324, |
| "num_input_tokens_seen": 341186700, |
| "step": 1700, |
| "token_acc": 0.7359575477937458 |
| }, |
| { |
| "epoch": 0.39899547022789683, |
| "eval_loss": 1.0446056127548218, |
| "eval_runtime": 33.305, |
| "eval_samples_per_second": 30.026, |
| "eval_steps_per_second": 1.261, |
| "eval_token_acc": 0.7265437085939844, |
| "num_input_tokens_seen": 341186700, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.40134250240570796, |
| "grad_norm": 1.9481089599377517, |
| "learning_rate": 1.8742810330626335e-06, |
| "loss": 1.0056350708007813, |
| "num_input_tokens_seen": 343197345, |
| "step": 1710, |
| "token_acc": 0.7343789679900354 |
| }, |
| { |
| "epoch": 0.40368953458351914, |
| "grad_norm": 1.8925573831015579, |
| "learning_rate": 1.8722400460008437e-06, |
| "loss": 1.0299295425415038, |
| "num_input_tokens_seen": 345220860, |
| "step": 1720, |
| "token_acc": 0.727836675491576 |
| }, |
| { |
| "epoch": 0.4060365667613303, |
| "grad_norm": 1.568094384198171, |
| "learning_rate": 1.8701837546695256e-06, |
| "loss": 1.011802864074707, |
| "num_input_tokens_seen": 347269032, |
| "step": 1730, |
| "token_acc": 0.731503068944188 |
| }, |
| { |
| "epoch": 0.40838359893914145, |
| "grad_norm": 4.690102343755759, |
| "learning_rate": 1.8681121951482393e-06, |
| "loss": 1.0340707778930665, |
| "num_input_tokens_seen": 349265856, |
| "step": 1740, |
| "token_acc": 0.7287572174652813 |
| }, |
| { |
| "epoch": 0.4107306311169526, |
| "grad_norm": 2.0732894110715776, |
| "learning_rate": 1.8660254037844386e-06, |
| "loss": 1.0054452896118165, |
| "num_input_tokens_seen": 351220833, |
| "step": 1750, |
| "token_acc": 0.7349583487050085 |
| }, |
| { |
| "epoch": 0.41307766329476375, |
| "grad_norm": 4.563573246901434, |
| "learning_rate": 1.863923417192835e-06, |
| "loss": 0.9984481811523438, |
| "num_input_tokens_seen": 353217660, |
| "step": 1760, |
| "token_acc": 0.7346953872236972 |
| }, |
| { |
| "epoch": 0.41542469547257493, |
| "grad_norm": 1.8182323815552697, |
| "learning_rate": 1.861806272254755e-06, |
| "loss": 1.0026565551757813, |
| "num_input_tokens_seen": 355231713, |
| "step": 1770, |
| "token_acc": 0.734238520256768 |
| }, |
| { |
| "epoch": 0.41777172765038606, |
| "grad_norm": 2.3723528968369867, |
| "learning_rate": 1.859674006117491e-06, |
| "loss": 0.9838489532470703, |
| "num_input_tokens_seen": 357318357, |
| "step": 1780, |
| "token_acc": 0.7385274102305481 |
| }, |
| { |
| "epoch": 0.42011875982819724, |
| "grad_norm": 2.413365084744393, |
| "learning_rate": 1.8575266561936522e-06, |
| "loss": 1.0196653366088868, |
| "num_input_tokens_seen": 359351646, |
| "step": 1790, |
| "token_acc": 0.730992332131187 |
| }, |
| { |
| "epoch": 0.4224657920060084, |
| "grad_norm": 7.914722238930336, |
| "learning_rate": 1.8553642601605066e-06, |
| "loss": 0.9948186874389648, |
| "num_input_tokens_seen": 361303284, |
| "step": 1800, |
| "token_acc": 0.7360711800377772 |
| }, |
| { |
| "epoch": 0.4224657920060084, |
| "eval_loss": 1.038891315460205, |
| "eval_runtime": 32.4449, |
| "eval_samples_per_second": 30.821, |
| "eval_steps_per_second": 1.295, |
| "eval_token_acc": 0.7277024999422913, |
| "num_input_tokens_seen": 361303284, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.42481282418381955, |
| "grad_norm": 1.7384957796876852, |
| "learning_rate": 1.8531868559593203e-06, |
| "loss": 1.0075714111328125, |
| "num_input_tokens_seen": 363290772, |
| "step": 1810, |
| "token_acc": 0.7332521267838883 |
| }, |
| { |
| "epoch": 0.42715985636163073, |
| "grad_norm": 1.73396216177198, |
| "learning_rate": 1.850994481794692e-06, |
| "loss": 1.018679428100586, |
| "num_input_tokens_seen": 365299026, |
| "step": 1820, |
| "token_acc": 0.7299744624828494 |
| }, |
| { |
| "epoch": 0.42950688853944186, |
| "grad_norm": 1.859054699772832, |
| "learning_rate": 1.8487871761338819e-06, |
| "loss": 0.9975422859191895, |
| "num_input_tokens_seen": 367342086, |
| "step": 1830, |
| "token_acc": 0.735841141099147 |
| }, |
| { |
| "epoch": 0.43185392071725304, |
| "grad_norm": 1.6167732458245692, |
| "learning_rate": 1.8465649777061376e-06, |
| "loss": 1.0366539001464843, |
| "num_input_tokens_seen": 369276633, |
| "step": 1840, |
| "token_acc": 0.7277804414793901 |
| }, |
| { |
| "epoch": 0.4342009528950642, |
| "grad_norm": 2.534040309718505, |
| "learning_rate": 1.844327925502015e-06, |
| "loss": 1.0096059799194337, |
| "num_input_tokens_seen": 371265615, |
| "step": 1850, |
| "token_acc": 0.7326266219047257 |
| }, |
| { |
| "epoch": 0.43654798507287534, |
| "grad_norm": 1.9228862468394357, |
| "learning_rate": 1.8420760587726921e-06, |
| "loss": 1.0271913528442382, |
| "num_input_tokens_seen": 373272270, |
| "step": 1860, |
| "token_acc": 0.7302226164565024 |
| }, |
| { |
| "epoch": 0.4388950172506865, |
| "grad_norm": 1.5025282734361622, |
| "learning_rate": 1.8398094170292829e-06, |
| "loss": 1.0059158325195312, |
| "num_input_tokens_seen": 375279099, |
| "step": 1870, |
| "token_acc": 0.7330154465542768 |
| }, |
| { |
| "epoch": 0.44124204942849765, |
| "grad_norm": 4.754818039721933, |
| "learning_rate": 1.8375280400421418e-06, |
| "loss": 0.9967041969299316, |
| "num_input_tokens_seen": 377223396, |
| "step": 1880, |
| "token_acc": 0.7358239778762203 |
| }, |
| { |
| "epoch": 0.44358908160630883, |
| "grad_norm": 1.691685468916323, |
| "learning_rate": 1.8352319678401674e-06, |
| "loss": 0.999173927307129, |
| "num_input_tokens_seen": 379235661, |
| "step": 1890, |
| "token_acc": 0.7347835016672305 |
| }, |
| { |
| "epoch": 0.44593611378411996, |
| "grad_norm": 1.7737231328640157, |
| "learning_rate": 1.8329212407100993e-06, |
| "loss": 0.9919824600219727, |
| "num_input_tokens_seen": 381243486, |
| "step": 1900, |
| "token_acc": 0.7371798315515523 |
| }, |
| { |
| "epoch": 0.44593611378411996, |
| "eval_loss": 1.0355346202850342, |
| "eval_runtime": 32.2582, |
| "eval_samples_per_second": 31.0, |
| "eval_steps_per_second": 1.302, |
| "eval_token_acc": 0.7281641698021745, |
| "num_input_tokens_seen": 381243486, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.44828314596193114, |
| "grad_norm": 2.5554510353139115, |
| "learning_rate": 1.8305958991958126e-06, |
| "loss": 0.9984329223632813, |
| "num_input_tokens_seen": 383266650, |
| "step": 1910, |
| "token_acc": 0.7348018362631924 |
| }, |
| { |
| "epoch": 0.4506301781397423, |
| "grad_norm": 3.4304227222936854, |
| "learning_rate": 1.8282559840976042e-06, |
| "loss": 0.9989996910095215, |
| "num_input_tokens_seen": 385198056, |
| "step": 1920, |
| "token_acc": 0.7340237302248127 |
| }, |
| { |
| "epoch": 0.45297721031755345, |
| "grad_norm": 1.8203825695395843, |
| "learning_rate": 1.8259015364714785e-06, |
| "loss": 1.005854892730713, |
| "num_input_tokens_seen": 387174645, |
| "step": 1930, |
| "token_acc": 0.7344124724323412 |
| }, |
| { |
| "epoch": 0.45532424249536463, |
| "grad_norm": 2.3790186216357387, |
| "learning_rate": 1.8235325976284273e-06, |
| "loss": 1.0130582809448243, |
| "num_input_tokens_seen": 389123001, |
| "step": 1940, |
| "token_acc": 0.7329481871636396 |
| }, |
| { |
| "epoch": 0.45767127467317575, |
| "grad_norm": 2.2702679233421366, |
| "learning_rate": 1.821149209133704e-06, |
| "loss": 1.0077364921569825, |
| "num_input_tokens_seen": 391185051, |
| "step": 1950, |
| "token_acc": 0.7325617754275695 |
| }, |
| { |
| "epoch": 0.46001830685098694, |
| "grad_norm": 1.7113606013198168, |
| "learning_rate": 1.8187514128060944e-06, |
| "loss": 1.0020957946777345, |
| "num_input_tokens_seen": 393232749, |
| "step": 1960, |
| "token_acc": 0.7342212411181741 |
| }, |
| { |
| "epoch": 0.46236533902879806, |
| "grad_norm": 2.0134995821074524, |
| "learning_rate": 1.816339250717184e-06, |
| "loss": 0.9884714126586914, |
| "num_input_tokens_seen": 395240403, |
| "step": 1970, |
| "token_acc": 0.7366033551966206 |
| }, |
| { |
| "epoch": 0.46471237120660924, |
| "grad_norm": 3.624673089989278, |
| "learning_rate": 1.8139127651906181e-06, |
| "loss": 1.0036752700805665, |
| "num_input_tokens_seen": 397222695, |
| "step": 1980, |
| "token_acc": 0.7327492557949239 |
| }, |
| { |
| "epoch": 0.4670594033844204, |
| "grad_norm": 12.741541567504669, |
| "learning_rate": 1.811471998801361e-06, |
| "loss": 1.0088150024414062, |
| "num_input_tokens_seen": 399265515, |
| "step": 1990, |
| "token_acc": 0.7318671375057033 |
| }, |
| { |
| "epoch": 0.46940643556223155, |
| "grad_norm": 1.9147316254240543, |
| "learning_rate": 1.8090169943749474e-06, |
| "loss": 1.0098794937133788, |
| "num_input_tokens_seen": 401254572, |
| "step": 2000, |
| "token_acc": 0.7348985741915172 |
| }, |
| { |
| "epoch": 0.46940643556223155, |
| "eval_loss": 1.0299264192581177, |
| "eval_runtime": 32.8145, |
| "eval_samples_per_second": 30.474, |
| "eval_steps_per_second": 1.28, |
| "eval_token_acc": 0.7290736594261443, |
| "num_input_tokens_seen": 401254572, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47175346774004273, |
| "grad_norm": 1.8961444721894498, |
| "learning_rate": 1.8065477949867325e-06, |
| "loss": 1.016146469116211, |
| "num_input_tokens_seen": 403296912, |
| "step": 2010, |
| "token_acc": 0.7310783889798314 |
| }, |
| { |
| "epoch": 0.47410049991785386, |
| "grad_norm": 1.5674703012341533, |
| "learning_rate": 1.8040644439611345e-06, |
| "loss": 1.0078514099121094, |
| "num_input_tokens_seen": 405292185, |
| "step": 2020, |
| "token_acc": 0.7319825043230597 |
| }, |
| { |
| "epoch": 0.47644753209566504, |
| "grad_norm": 1.9494898023759353, |
| "learning_rate": 1.8015669848708766e-06, |
| "loss": 1.0296178817749024, |
| "num_input_tokens_seen": 407303625, |
| "step": 2030, |
| "token_acc": 0.7293832613834421 |
| }, |
| { |
| "epoch": 0.47879456427347616, |
| "grad_norm": 10.968015568038117, |
| "learning_rate": 1.7990554615362197e-06, |
| "loss": 0.9932464599609375, |
| "num_input_tokens_seen": 409284657, |
| "step": 2040, |
| "token_acc": 0.7361576877608628 |
| }, |
| { |
| "epoch": 0.48114159645128735, |
| "grad_norm": 1.5634395112041464, |
| "learning_rate": 1.7965299180241961e-06, |
| "loss": 0.9930622100830078, |
| "num_input_tokens_seen": 411350526, |
| "step": 2050, |
| "token_acc": 0.7371341064431953 |
| }, |
| { |
| "epoch": 0.4834886286290985, |
| "grad_norm": 4.940871877481185, |
| "learning_rate": 1.7939903986478354e-06, |
| "loss": 0.9968077659606933, |
| "num_input_tokens_seen": 413329158, |
| "step": 2060, |
| "token_acc": 0.7364979106166089 |
| }, |
| { |
| "epoch": 0.48583566080690965, |
| "grad_norm": 1.6357352710651227, |
| "learning_rate": 1.7914369479653857e-06, |
| "loss": 1.0207565307617188, |
| "num_input_tokens_seen": 415301217, |
| "step": 2070, |
| "token_acc": 0.7303749705838948 |
| }, |
| { |
| "epoch": 0.48818269298472083, |
| "grad_norm": 2.246788650609953, |
| "learning_rate": 1.788869610779534e-06, |
| "loss": 1.00274658203125, |
| "num_input_tokens_seen": 417261702, |
| "step": 2080, |
| "token_acc": 0.7341963767701447 |
| }, |
| { |
| "epoch": 0.49052972516253196, |
| "grad_norm": 1.56745308904305, |
| "learning_rate": 1.7862884321366187e-06, |
| "loss": 1.0060449600219727, |
| "num_input_tokens_seen": 419262057, |
| "step": 2090, |
| "token_acc": 0.7324562018430577 |
| }, |
| { |
| "epoch": 0.49287675734034314, |
| "grad_norm": 1.7117337983013203, |
| "learning_rate": 1.7836934573258397e-06, |
| "loss": 0.9900275230407715, |
| "num_input_tokens_seen": 421246710, |
| "step": 2100, |
| "token_acc": 0.7372878593403012 |
| }, |
| { |
| "epoch": 0.49287675734034314, |
| "eval_loss": 1.027020812034607, |
| "eval_runtime": 32.799, |
| "eval_samples_per_second": 30.489, |
| "eval_steps_per_second": 1.281, |
| "eval_token_acc": 0.7296553634495971, |
| "num_input_tokens_seen": 421246710, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.49522378951815427, |
| "grad_norm": 1.5242891687227014, |
| "learning_rate": 1.781084731878463e-06, |
| "loss": 0.9901479721069336, |
| "num_input_tokens_seen": 423187323, |
| "step": 2110, |
| "token_acc": 0.7374922148637526 |
| }, |
| { |
| "epoch": 0.49757082169596545, |
| "grad_norm": 2.148393307418336, |
| "learning_rate": 1.7784623015670235e-06, |
| "loss": 0.9794765472412109, |
| "num_input_tokens_seen": 425214681, |
| "step": 2120, |
| "token_acc": 0.7396016635749383 |
| }, |
| { |
| "epoch": 0.49991785387377663, |
| "grad_norm": 1.6777795098531292, |
| "learning_rate": 1.7758262124045194e-06, |
| "loss": 1.0104660987854004, |
| "num_input_tokens_seen": 427125735, |
| "step": 2130, |
| "token_acc": 0.7328506355953969 |
| }, |
| { |
| "epoch": 0.5022648860515878, |
| "grad_norm": 1.8399011401453165, |
| "learning_rate": 1.7731765106436071e-06, |
| "loss": 0.9876059532165528, |
| "num_input_tokens_seen": 429143655, |
| "step": 2140, |
| "token_acc": 0.7383790968301517 |
| }, |
| { |
| "epoch": 0.5046119182293989, |
| "grad_norm": 3.2054794139242047, |
| "learning_rate": 1.7705132427757892e-06, |
| "loss": 1.003396987915039, |
| "num_input_tokens_seen": 431161200, |
| "step": 2150, |
| "token_acc": 0.7355545283928578 |
| }, |
| { |
| "epoch": 0.5069589504072101, |
| "grad_norm": 1.5550880678151673, |
| "learning_rate": 1.7678364555305976e-06, |
| "loss": 0.9901845932006836, |
| "num_input_tokens_seen": 433164327, |
| "step": 2160, |
| "token_acc": 0.7361521188091766 |
| }, |
| { |
| "epoch": 0.5093059825850212, |
| "grad_norm": 2.416552637489239, |
| "learning_rate": 1.7651461958747741e-06, |
| "loss": 1.0047142028808593, |
| "num_input_tokens_seen": 435216456, |
| "step": 2170, |
| "token_acc": 0.733555096342685 |
| }, |
| { |
| "epoch": 0.5116530147628324, |
| "grad_norm": 2.387719191103811, |
| "learning_rate": 1.7624425110114479e-06, |
| "loss": 1.0148651123046875, |
| "num_input_tokens_seen": 437206023, |
| "step": 2180, |
| "token_acc": 0.7325390238452453 |
| }, |
| { |
| "epoch": 0.5140000469406436, |
| "grad_norm": 1.481562163308891, |
| "learning_rate": 1.7597254483793048e-06, |
| "loss": 0.9734397888183594, |
| "num_input_tokens_seen": 439163631, |
| "step": 2190, |
| "token_acc": 0.7413863843737306 |
| }, |
| { |
| "epoch": 0.5163470791184547, |
| "grad_norm": 7.115442308152491, |
| "learning_rate": 1.7569950556517563e-06, |
| "loss": 1.019681167602539, |
| "num_input_tokens_seen": 441170622, |
| "step": 2200, |
| "token_acc": 0.7295540569410798 |
| }, |
| { |
| "epoch": 0.5163470791184547, |
| "eval_loss": 1.0215942859649658, |
| "eval_runtime": 32.4901, |
| "eval_samples_per_second": 30.779, |
| "eval_steps_per_second": 1.293, |
| "eval_token_acc": 0.7315389764779207, |
| "num_input_tokens_seen": 441170622, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.5186941112962659, |
| "grad_norm": 1.6355732837542087, |
| "learning_rate": 1.7542513807361037e-06, |
| "loss": 1.0146623611450196, |
| "num_input_tokens_seen": 443157417, |
| "step": 2210, |
| "token_acc": 0.7331868122856259 |
| }, |
| { |
| "epoch": 0.521041143474077, |
| "grad_norm": 1.5373557972963237, |
| "learning_rate": 1.7514944717726961e-06, |
| "loss": 0.996919822692871, |
| "num_input_tokens_seen": 445115421, |
| "step": 2220, |
| "token_acc": 0.7370941300202442 |
| }, |
| { |
| "epoch": 0.5233881756518882, |
| "grad_norm": 3.095727021967102, |
| "learning_rate": 1.748724377134086e-06, |
| "loss": 1.008862018585205, |
| "num_input_tokens_seen": 447113430, |
| "step": 2230, |
| "token_acc": 0.7321047500353728 |
| }, |
| { |
| "epoch": 0.5257352078296993, |
| "grad_norm": 1.6318669740450855, |
| "learning_rate": 1.7459411454241822e-06, |
| "loss": 1.0091367721557618, |
| "num_input_tokens_seen": 449067504, |
| "step": 2240, |
| "token_acc": 0.7306417201986045 |
| }, |
| { |
| "epoch": 0.5280822400075105, |
| "grad_norm": 1.8958429005632293, |
| "learning_rate": 1.743144825477394e-06, |
| "loss": 0.9806262016296386, |
| "num_input_tokens_seen": 451028514, |
| "step": 2250, |
| "token_acc": 0.7392674057301928 |
| }, |
| { |
| "epoch": 0.5304292721853217, |
| "grad_norm": 1.8300311325163234, |
| "learning_rate": 1.740335466357778e-06, |
| "loss": 0.9876058578491211, |
| "num_input_tokens_seen": 453088446, |
| "step": 2260, |
| "token_acc": 0.7375388829110828 |
| }, |
| { |
| "epoch": 0.5327763043631328, |
| "grad_norm": 1.6283939332628163, |
| "learning_rate": 1.737513117358174e-06, |
| "loss": 1.0128792762756347, |
| "num_input_tokens_seen": 455064009, |
| "step": 2270, |
| "token_acc": 0.7309403491726847 |
| }, |
| { |
| "epoch": 0.535123336540944, |
| "grad_norm": 1.7443727538000593, |
| "learning_rate": 1.7346778279993416e-06, |
| "loss": 1.0167512893676758, |
| "num_input_tokens_seen": 457049565, |
| "step": 2280, |
| "token_acc": 0.7327466353251444 |
| }, |
| { |
| "epoch": 0.5374703687187551, |
| "grad_norm": 2.318872931178241, |
| "learning_rate": 1.731829648029091e-06, |
| "loss": 0.9633228302001953, |
| "num_input_tokens_seen": 459050343, |
| "step": 2290, |
| "token_acc": 0.7410114142684382 |
| }, |
| { |
| "epoch": 0.5398174008965663, |
| "grad_norm": 1.5210715736947538, |
| "learning_rate": 1.7289686274214115e-06, |
| "loss": 0.9929851531982422, |
| "num_input_tokens_seen": 461049750, |
| "step": 2300, |
| "token_acc": 0.7357508251313404 |
| }, |
| { |
| "epoch": 0.5398174008965663, |
| "eval_loss": 1.0185507535934448, |
| "eval_runtime": 32.6195, |
| "eval_samples_per_second": 30.657, |
| "eval_steps_per_second": 1.288, |
| "eval_token_acc": 0.731474342697537, |
| "num_input_tokens_seen": 461049750, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5421644330743774, |
| "grad_norm": 1.5749401648234354, |
| "learning_rate": 1.7260948163755917e-06, |
| "loss": 0.9968940734863281, |
| "num_input_tokens_seen": 462989622, |
| "step": 2310, |
| "token_acc": 0.7375997849195517 |
| }, |
| { |
| "epoch": 0.5445114652521886, |
| "grad_norm": 2.5312095421318928, |
| "learning_rate": 1.723208265315342e-06, |
| "loss": 0.9779894828796387, |
| "num_input_tokens_seen": 465006357, |
| "step": 2320, |
| "token_acc": 0.7394803638714152 |
| }, |
| { |
| "epoch": 0.5468584974299998, |
| "grad_norm": 3.2822780472953803, |
| "learning_rate": 1.720309024887907e-06, |
| "loss": 1.0032640457153321, |
| "num_input_tokens_seen": 467017005, |
| "step": 2330, |
| "token_acc": 0.7345803640542331 |
| }, |
| { |
| "epoch": 0.5492055296078109, |
| "grad_norm": 1.6687009392941055, |
| "learning_rate": 1.7173971459631787e-06, |
| "loss": 1.0077280044555663, |
| "num_input_tokens_seen": 468979461, |
| "step": 2340, |
| "token_acc": 0.7342930917761522 |
| }, |
| { |
| "epoch": 0.5515525617856221, |
| "grad_norm": 11.650174621954747, |
| "learning_rate": 1.7144726796328032e-06, |
| "loss": 0.9968754768371582, |
| "num_input_tokens_seen": 470994735, |
| "step": 2350, |
| "token_acc": 0.734416431505073 |
| }, |
| { |
| "epoch": 0.5538995939634332, |
| "grad_norm": 2.599642616517287, |
| "learning_rate": 1.7115356772092855e-06, |
| "loss": 1.0374162673950196, |
| "num_input_tokens_seen": 472979052, |
| "step": 2360, |
| "token_acc": 0.7287551723023211 |
| }, |
| { |
| "epoch": 0.5562466261412444, |
| "grad_norm": 2.7538705299088453, |
| "learning_rate": 1.7085861902250862e-06, |
| "loss": 1.0119436264038086, |
| "num_input_tokens_seen": 475016298, |
| "step": 2370, |
| "token_acc": 0.7321991702851346 |
| }, |
| { |
| "epoch": 0.5585936583190556, |
| "grad_norm": 2.3397709495881682, |
| "learning_rate": 1.7056242704317208e-06, |
| "loss": 0.9402626991271973, |
| "num_input_tokens_seen": 477109281, |
| "step": 2380, |
| "token_acc": 0.7490173941732094 |
| }, |
| { |
| "epoch": 0.5609406904968667, |
| "grad_norm": 1.879207656038821, |
| "learning_rate": 1.7026499697988492e-06, |
| "loss": 0.9886844635009766, |
| "num_input_tokens_seen": 479146713, |
| "step": 2390, |
| "token_acc": 0.7365850879725937 |
| }, |
| { |
| "epoch": 0.5632877226746779, |
| "grad_norm": 1.9704873763682087, |
| "learning_rate": 1.6996633405133653e-06, |
| "loss": 0.9943101882934571, |
| "num_input_tokens_seen": 481102911, |
| "step": 2400, |
| "token_acc": 0.7366662244187203 |
| }, |
| { |
| "epoch": 0.5632877226746779, |
| "eval_loss": 1.015251636505127, |
| "eval_runtime": 32.7961, |
| "eval_samples_per_second": 30.491, |
| "eval_steps_per_second": 1.281, |
| "eval_token_acc": 0.7320929803097804, |
| "num_input_tokens_seen": 481102911, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.565634754852489, |
| "grad_norm": 1.6632244132905207, |
| "learning_rate": 1.6966644349784808e-06, |
| "loss": 0.9883607864379883, |
| "num_input_tokens_seen": 483084549, |
| "step": 2410, |
| "token_acc": 0.7358879192027988 |
| }, |
| { |
| "epoch": 0.5679817870303002, |
| "grad_norm": 1.5330248452956106, |
| "learning_rate": 1.6936533058128049e-06, |
| "loss": 1.0042284965515136, |
| "num_input_tokens_seen": 485112228, |
| "step": 2420, |
| "token_acc": 0.7344426514994169 |
| }, |
| { |
| "epoch": 0.5703288192081113, |
| "grad_norm": 2.5405918981273867, |
| "learning_rate": 1.6906300058494227e-06, |
| "loss": 0.9880990982055664, |
| "num_input_tokens_seen": 487123020, |
| "step": 2430, |
| "token_acc": 0.7372175131700104 |
| }, |
| { |
| "epoch": 0.5726758513859225, |
| "grad_norm": 3.9012975042201297, |
| "learning_rate": 1.6875945881349673e-06, |
| "loss": 0.9801074981689453, |
| "num_input_tokens_seen": 489120441, |
| "step": 2440, |
| "token_acc": 0.7381837376558823 |
| }, |
| { |
| "epoch": 0.5750228835637337, |
| "grad_norm": 1.6637494968221076, |
| "learning_rate": 1.6845471059286886e-06, |
| "loss": 1.0021610260009766, |
| "num_input_tokens_seen": 491066049, |
| "step": 2450, |
| "token_acc": 0.7346050699774175 |
| }, |
| { |
| "epoch": 0.5773699157415448, |
| "grad_norm": 1.652438429477013, |
| "learning_rate": 1.6814876127015198e-06, |
| "loss": 0.9841398239135742, |
| "num_input_tokens_seen": 493112928, |
| "step": 2460, |
| "token_acc": 0.7378321905180247 |
| }, |
| { |
| "epoch": 0.579716947919356, |
| "grad_norm": 3.543309593586376, |
| "learning_rate": 1.678416162135138e-06, |
| "loss": 0.979088020324707, |
| "num_input_tokens_seen": 495119139, |
| "step": 2470, |
| "token_acc": 0.7399358154268393 |
| }, |
| { |
| "epoch": 0.5820639800971671, |
| "grad_norm": 2.893410134875752, |
| "learning_rate": 1.6753328081210244e-06, |
| "loss": 0.9998300552368165, |
| "num_input_tokens_seen": 497115090, |
| "step": 2480, |
| "token_acc": 0.7359860001129023 |
| }, |
| { |
| "epoch": 0.5844110122749783, |
| "grad_norm": 1.9583144196315403, |
| "learning_rate": 1.6722376047595161e-06, |
| "loss": 0.9970391273498536, |
| "num_input_tokens_seen": 499168851, |
| "step": 2490, |
| "token_acc": 0.7355328073638283 |
| }, |
| { |
| "epoch": 0.5867580444527895, |
| "grad_norm": 5.903330257525673, |
| "learning_rate": 1.669130606358858e-06, |
| "loss": 1.0149246215820313, |
| "num_input_tokens_seen": 501138603, |
| "step": 2500, |
| "token_acc": 0.7320385426697377 |
| }, |
| { |
| "epoch": 0.5867580444527895, |
| "eval_loss": 1.0123026371002197, |
| "eval_runtime": 32.7432, |
| "eval_samples_per_second": 30.541, |
| "eval_steps_per_second": 1.283, |
| "eval_token_acc": 0.7328916691673784, |
| "num_input_tokens_seen": 501138603, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5891050766306006, |
| "grad_norm": 1.919536213839018, |
| "learning_rate": 1.6660118674342515e-06, |
| "loss": 0.9900060653686523, |
| "num_input_tokens_seen": 503184900, |
| "step": 2510, |
| "token_acc": 0.7371078337925816 |
| }, |
| { |
| "epoch": 0.5914521088084118, |
| "grad_norm": 3.4511789649891966, |
| "learning_rate": 1.6628814427068952e-06, |
| "loss": 0.9589821815490722, |
| "num_input_tokens_seen": 505223106, |
| "step": 2520, |
| "token_acc": 0.7453759303446423 |
| }, |
| { |
| "epoch": 0.5937991409862229, |
| "grad_norm": 3.9395749071950554, |
| "learning_rate": 1.6597393871030261e-06, |
| "loss": 0.9944395065307617, |
| "num_input_tokens_seen": 507246369, |
| "step": 2530, |
| "token_acc": 0.7347724854980832 |
| }, |
| { |
| "epoch": 0.5961461731640341, |
| "grad_norm": 1.5397013326592903, |
| "learning_rate": 1.6565857557529564e-06, |
| "loss": 0.9756797790527344, |
| "num_input_tokens_seen": 509308893, |
| "step": 2540, |
| "token_acc": 0.7391703562324037 |
| }, |
| { |
| "epoch": 0.5984932053418452, |
| "grad_norm": 1.7526411604347196, |
| "learning_rate": 1.6534206039901055e-06, |
| "loss": 0.9834499359130859, |
| "num_input_tokens_seen": 511244184, |
| "step": 2550, |
| "token_acc": 0.7380458487339893 |
| }, |
| { |
| "epoch": 0.6008402375196564, |
| "grad_norm": 2.2921640319260024, |
| "learning_rate": 1.6502439873500286e-06, |
| "loss": 1.0054790496826171, |
| "num_input_tokens_seen": 513290352, |
| "step": 2560, |
| "token_acc": 0.734738491502126 |
| }, |
| { |
| "epoch": 0.6031872696974675, |
| "grad_norm": 1.9064014496743276, |
| "learning_rate": 1.6470559615694445e-06, |
| "loss": 0.9771562576293945, |
| "num_input_tokens_seen": 515276862, |
| "step": 2570, |
| "token_acc": 0.7392910978769869 |
| }, |
| { |
| "epoch": 0.6055343018752787, |
| "grad_norm": 2.0609613172670764, |
| "learning_rate": 1.6438565825852537e-06, |
| "loss": 0.9563516616821289, |
| "num_input_tokens_seen": 517288296, |
| "step": 2580, |
| "token_acc": 0.744728798321846 |
| }, |
| { |
| "epoch": 0.6078813340530899, |
| "grad_norm": 1.7302019611107595, |
| "learning_rate": 1.6406459065335614e-06, |
| "loss": 0.9771955490112305, |
| "num_input_tokens_seen": 519254622, |
| "step": 2590, |
| "token_acc": 0.740443198920546 |
| }, |
| { |
| "epoch": 0.610228366230901, |
| "grad_norm": 1.8178684355141148, |
| "learning_rate": 1.6374239897486897e-06, |
| "loss": 0.9703773498535156, |
| "num_input_tokens_seen": 521236017, |
| "step": 2600, |
| "token_acc": 0.7407382220106489 |
| }, |
| { |
| "epoch": 0.610228366230901, |
| "eval_loss": 1.0093790292739868, |
| "eval_runtime": 32.6088, |
| "eval_samples_per_second": 30.667, |
| "eval_steps_per_second": 1.288, |
| "eval_token_acc": 0.7333256388356686, |
| "num_input_tokens_seen": 521236017, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.6125753984087122, |
| "grad_norm": 1.8585384534519827, |
| "learning_rate": 1.6341908887621894e-06, |
| "loss": 0.9817310333251953, |
| "num_input_tokens_seen": 523212513, |
| "step": 2610, |
| "token_acc": 0.738175322879972 |
| }, |
| { |
| "epoch": 0.6149224305865233, |
| "grad_norm": 2.4244702161030625, |
| "learning_rate": 1.6309466603018495e-06, |
| "loss": 0.9609703063964844, |
| "num_input_tokens_seen": 525216327, |
| "step": 2620, |
| "token_acc": 0.7439178110371839 |
| }, |
| { |
| "epoch": 0.6172694627643345, |
| "grad_norm": 1.638774547412265, |
| "learning_rate": 1.6276913612907004e-06, |
| "loss": 0.9597613334655761, |
| "num_input_tokens_seen": 527198007, |
| "step": 2630, |
| "token_acc": 0.7433998992304688 |
| }, |
| { |
| "epoch": 0.6196164949421457, |
| "grad_norm": 1.8052959287052057, |
| "learning_rate": 1.6244250488460155e-06, |
| "loss": 0.9595340728759766, |
| "num_input_tokens_seen": 529328826, |
| "step": 2640, |
| "token_acc": 0.7424487405247924 |
| }, |
| { |
| "epoch": 0.6219635271199568, |
| "grad_norm": 2.9059084324443987, |
| "learning_rate": 1.6211477802783102e-06, |
| "loss": 0.9733432769775391, |
| "num_input_tokens_seen": 531353727, |
| "step": 2650, |
| "token_acc": 0.7391637709236651 |
| }, |
| { |
| "epoch": 0.624310559297768, |
| "grad_norm": 2.4613981471794117, |
| "learning_rate": 1.6178596130903343e-06, |
| "loss": 0.9548052787780762, |
| "num_input_tokens_seen": 533357184, |
| "step": 2660, |
| "token_acc": 0.7445567764998143 |
| }, |
| { |
| "epoch": 0.6266575914755791, |
| "grad_norm": 1.959126642555864, |
| "learning_rate": 1.6145606049760642e-06, |
| "loss": 0.9767616271972657, |
| "num_input_tokens_seen": 535321791, |
| "step": 2670, |
| "token_acc": 0.7381060525928277 |
| }, |
| { |
| "epoch": 0.6290046236533903, |
| "grad_norm": 1.5308332739370312, |
| "learning_rate": 1.6112508138196917e-06, |
| "loss": 0.9835859298706054, |
| "num_input_tokens_seen": 537364758, |
| "step": 2680, |
| "token_acc": 0.7381528449040924 |
| }, |
| { |
| "epoch": 0.6313516558312015, |
| "grad_norm": 1.8506281977228691, |
| "learning_rate": 1.6079302976946053e-06, |
| "loss": 0.9697771072387695, |
| "num_input_tokens_seen": 539423991, |
| "step": 2690, |
| "token_acc": 0.7428583040298499 |
| }, |
| { |
| "epoch": 0.6336986880090126, |
| "grad_norm": 2.143447146073978, |
| "learning_rate": 1.604599114862375e-06, |
| "loss": 0.9710499763488769, |
| "num_input_tokens_seen": 541385301, |
| "step": 2700, |
| "token_acc": 0.7437010271608948 |
| }, |
| { |
| "epoch": 0.6336986880090126, |
| "eval_loss": 1.006402611732483, |
| "eval_runtime": 32.4804, |
| "eval_samples_per_second": 30.788, |
| "eval_steps_per_second": 1.293, |
| "eval_token_acc": 0.7333256388356686, |
| "num_input_tokens_seen": 541385301, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.6360457201868238, |
| "grad_norm": 1.6759371033254091, |
| "learning_rate": 1.6012573237717265e-06, |
| "loss": 0.9557651519775391, |
| "num_input_tokens_seen": 543498738, |
| "step": 2710, |
| "token_acc": 0.744166114013349 |
| }, |
| { |
| "epoch": 0.6383927523646349, |
| "grad_norm": 1.8250942426916423, |
| "learning_rate": 1.5979049830575188e-06, |
| "loss": 0.9645903587341309, |
| "num_input_tokens_seen": 545489775, |
| "step": 2720, |
| "token_acc": 0.7429352817436318 |
| }, |
| { |
| "epoch": 0.6407397845424461, |
| "grad_norm": 1.9217599973801651, |
| "learning_rate": 1.5945421515397134e-06, |
| "loss": 0.9858356475830078, |
| "num_input_tokens_seen": 547577721, |
| "step": 2730, |
| "token_acc": 0.7375185153736568 |
| }, |
| { |
| "epoch": 0.6430868167202572, |
| "grad_norm": 1.7809745721720633, |
| "learning_rate": 1.591168888222342e-06, |
| "loss": 0.9513526916503906, |
| "num_input_tokens_seen": 549624339, |
| "step": 2740, |
| "token_acc": 0.745696874109412 |
| }, |
| { |
| "epoch": 0.6454338488980684, |
| "grad_norm": 4.185287393591199, |
| "learning_rate": 1.587785252292473e-06, |
| "loss": 1.0034643173217774, |
| "num_input_tokens_seen": 551637576, |
| "step": 2750, |
| "token_acc": 0.7338386568669174 |
| }, |
| { |
| "epoch": 0.6477808810758795, |
| "grad_norm": 1.5787917866107477, |
| "learning_rate": 1.584391303119172e-06, |
| "loss": 0.9657976150512695, |
| "num_input_tokens_seen": 553630620, |
| "step": 2760, |
| "token_acc": 0.7424132245973986 |
| }, |
| { |
| "epoch": 0.6501279132536907, |
| "grad_norm": 1.6169135735671403, |
| "learning_rate": 1.58098710025246e-06, |
| "loss": 0.976175594329834, |
| "num_input_tokens_seen": 555634122, |
| "step": 2770, |
| "token_acc": 0.7385256195920764 |
| }, |
| { |
| "epoch": 0.6524749454315019, |
| "grad_norm": 2.786090764996497, |
| "learning_rate": 1.5775727034222674e-06, |
| "loss": 1.0152118682861329, |
| "num_input_tokens_seen": 557567646, |
| "step": 2780, |
| "token_acc": 0.7318658065576464 |
| }, |
| { |
| "epoch": 0.654821977609313, |
| "grad_norm": 2.3429135221710142, |
| "learning_rate": 1.5741481725373898e-06, |
| "loss": 0.9660276412963867, |
| "num_input_tokens_seen": 559612812, |
| "step": 2790, |
| "token_acc": 0.7423686792009822 |
| }, |
| { |
| "epoch": 0.6571690097871242, |
| "grad_norm": 2.066607274894778, |
| "learning_rate": 1.5707135676844319e-06, |
| "loss": 0.9577510833740235, |
| "num_input_tokens_seen": 561582108, |
| "step": 2800, |
| "token_acc": 0.7451270299890406 |
| }, |
| { |
| "epoch": 0.6571690097871242, |
| "eval_loss": 1.0031476020812988, |
| "eval_runtime": 32.6219, |
| "eval_samples_per_second": 30.654, |
| "eval_steps_per_second": 1.287, |
| "eval_token_acc": 0.7348629994690796, |
| "num_input_tokens_seen": 561582108, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6595160419649353, |
| "grad_norm": 2.6635168105713385, |
| "learning_rate": 1.5672689491267565e-06, |
| "loss": 0.9600403785705567, |
| "num_input_tokens_seen": 563559690, |
| "step": 2810, |
| "token_acc": 0.7428777482846697 |
| }, |
| { |
| "epoch": 0.6618630741427465, |
| "grad_norm": 1.7450802216902526, |
| "learning_rate": 1.5638143773034266e-06, |
| "loss": 0.9954195022583008, |
| "num_input_tokens_seen": 565524792, |
| "step": 2820, |
| "token_acc": 0.7348587056347071 |
| }, |
| { |
| "epoch": 0.6642101063205577, |
| "grad_norm": 1.8754649742088336, |
| "learning_rate": 1.5603499128281444e-06, |
| "loss": 0.969937515258789, |
| "num_input_tokens_seen": 567451971, |
| "step": 2830, |
| "token_acc": 0.7414208823996457 |
| }, |
| { |
| "epoch": 0.6665571384983688, |
| "grad_norm": 1.5835197058667514, |
| "learning_rate": 1.556875616488188e-06, |
| "loss": 0.969327163696289, |
| "num_input_tokens_seen": 569462406, |
| "step": 2840, |
| "token_acc": 0.7401524628156212 |
| }, |
| { |
| "epoch": 0.66890417067618, |
| "grad_norm": 2.6099644468289567, |
| "learning_rate": 1.553391549243344e-06, |
| "loss": 0.9504291534423828, |
| "num_input_tokens_seen": 571500279, |
| "step": 2850, |
| "token_acc": 0.7466074001336113 |
| }, |
| { |
| "epoch": 0.6712512028539911, |
| "grad_norm": 2.482803714476407, |
| "learning_rate": 1.54989777222484e-06, |
| "loss": 0.9784445762634277, |
| "num_input_tokens_seen": 573509781, |
| "step": 2860, |
| "token_acc": 0.7380322581926356 |
| }, |
| { |
| "epoch": 0.6735982350318023, |
| "grad_norm": 3.1626495400003227, |
| "learning_rate": 1.546394346734269e-06, |
| "loss": 0.9782054901123047, |
| "num_input_tokens_seen": 575490657, |
| "step": 2870, |
| "token_acc": 0.7396435152006547 |
| }, |
| { |
| "epoch": 0.6759452672096135, |
| "grad_norm": 1.8352007570418352, |
| "learning_rate": 1.5428813342425175e-06, |
| "loss": 0.9893608093261719, |
| "num_input_tokens_seen": 577443624, |
| "step": 2880, |
| "token_acc": 0.7371560289894273 |
| }, |
| { |
| "epoch": 0.6782922993874246, |
| "grad_norm": 11.589107012378998, |
| "learning_rate": 1.5393587963886834e-06, |
| "loss": 0.9795863151550293, |
| "num_input_tokens_seen": 579501576, |
| "step": 2890, |
| "token_acc": 0.738575752796563 |
| }, |
| { |
| "epoch": 0.6806393315652357, |
| "grad_norm": 2.3582930414965713, |
| "learning_rate": 1.5358267949789964e-06, |
| "loss": 0.986695671081543, |
| "num_input_tokens_seen": 581445867, |
| "step": 2900, |
| "token_acc": 0.7377336684807478 |
| }, |
| { |
| "epoch": 0.6806393315652357, |
| "eval_loss": 1.0008372068405151, |
| "eval_runtime": 32.2631, |
| "eval_samples_per_second": 30.995, |
| "eval_steps_per_second": 1.302, |
| "eval_token_acc": 0.7351584681794049, |
| "num_input_tokens_seen": 581445867, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6829863637430469, |
| "grad_norm": 1.8683225729956336, |
| "learning_rate": 1.532285391985734e-06, |
| "loss": 0.9824249267578125, |
| "num_input_tokens_seen": 583473981, |
| "step": 2910, |
| "token_acc": 0.7386771656575185 |
| }, |
| { |
| "epoch": 0.6853333959208581, |
| "grad_norm": 1.9051084978341761, |
| "learning_rate": 1.5287346495461316e-06, |
| "loss": 0.9780803680419922, |
| "num_input_tokens_seen": 585488343, |
| "step": 2920, |
| "token_acc": 0.7386755390868261 |
| }, |
| { |
| "epoch": 0.6876804280986692, |
| "grad_norm": 1.7526173635768003, |
| "learning_rate": 1.5251746299612958e-06, |
| "loss": 0.9556564331054688, |
| "num_input_tokens_seen": 587536749, |
| "step": 2930, |
| "token_acc": 0.7437935964230544 |
| }, |
| { |
| "epoch": 0.6900274602764804, |
| "grad_norm": 1.908495061384156, |
| "learning_rate": 1.5216053956951078e-06, |
| "loss": 0.9559732437133789, |
| "num_input_tokens_seen": 589505883, |
| "step": 2940, |
| "token_acc": 0.7442760675515612 |
| }, |
| { |
| "epoch": 0.6923744924542915, |
| "grad_norm": 1.682313702090843, |
| "learning_rate": 1.5180270093731302e-06, |
| "loss": 0.9883411407470704, |
| "num_input_tokens_seen": 591496815, |
| "step": 2950, |
| "token_acc": 0.7374446310537534 |
| }, |
| { |
| "epoch": 0.6947215246321027, |
| "grad_norm": 1.6223526724021116, |
| "learning_rate": 1.5144395337815063e-06, |
| "loss": 0.9544116973876953, |
| "num_input_tokens_seen": 593483805, |
| "step": 2960, |
| "token_acc": 0.7434431431260328 |
| }, |
| { |
| "epoch": 0.6970685568099139, |
| "grad_norm": 3.3044227000595106, |
| "learning_rate": 1.5108430318658599e-06, |
| "loss": 0.9596687316894531, |
| "num_input_tokens_seen": 595472802, |
| "step": 2970, |
| "token_acc": 0.7425995483387807 |
| }, |
| { |
| "epoch": 0.699415588987725, |
| "grad_norm": 1.9946238986715072, |
| "learning_rate": 1.507237566730189e-06, |
| "loss": 0.9447664260864258, |
| "num_input_tokens_seen": 597458052, |
| "step": 2980, |
| "token_acc": 0.7471448055436924 |
| }, |
| { |
| "epoch": 0.7017626211655362, |
| "grad_norm": 1.9985767739510494, |
| "learning_rate": 1.5036232016357608e-06, |
| "loss": 0.9753869056701661, |
| "num_input_tokens_seen": 599511099, |
| "step": 2990, |
| "token_acc": 0.7407295913625692 |
| }, |
| { |
| "epoch": 0.7041096533433473, |
| "grad_norm": 1.7848206984050603, |
| "learning_rate": 1.5e-06, |
| "loss": 0.9929049491882325, |
| "num_input_tokens_seen": 601494039, |
| "step": 3000, |
| "token_acc": 0.733967886177249 |
| }, |
| { |
| "epoch": 0.7041096533433473, |
| "eval_loss": 0.9992188215255737, |
| "eval_runtime": 32.8388, |
| "eval_samples_per_second": 30.452, |
| "eval_steps_per_second": 1.279, |
| "eval_token_acc": 0.7353292860275616, |
| "num_input_tokens_seen": 601494039, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.7064566855211585, |
| "grad_norm": 1.5971312735806535, |
| "learning_rate": 1.4963680253953767e-06, |
| "loss": 0.9550104141235352, |
| "num_input_tokens_seen": 603479547, |
| "step": 3010, |
| "token_acc": 0.7457036074683664 |
| }, |
| { |
| "epoch": 0.7088037176989697, |
| "grad_norm": 1.7733615613171219, |
| "learning_rate": 1.4927273415482915e-06, |
| "loss": 0.9737858772277832, |
| "num_input_tokens_seen": 605442297, |
| "step": 3020, |
| "token_acc": 0.7412765006450565 |
| }, |
| { |
| "epoch": 0.7111507498767808, |
| "grad_norm": 6.926370874803529, |
| "learning_rate": 1.4890780123379563e-06, |
| "loss": 0.9665937423706055, |
| "num_input_tokens_seen": 607477695, |
| "step": 3030, |
| "token_acc": 0.7405696365107176 |
| }, |
| { |
| "epoch": 0.713497782054592, |
| "grad_norm": 1.6484833491764401, |
| "learning_rate": 1.485420101795274e-06, |
| "loss": 0.95927734375, |
| "num_input_tokens_seen": 609444318, |
| "step": 3040, |
| "token_acc": 0.7442635774417046 |
| }, |
| { |
| "epoch": 0.7158448142324031, |
| "grad_norm": 2.0360741208385913, |
| "learning_rate": 1.4817536741017151e-06, |
| "loss": 0.9595672607421875, |
| "num_input_tokens_seen": 611390574, |
| "step": 3050, |
| "token_acc": 0.743094030233154 |
| }, |
| { |
| "epoch": 0.7181918464102143, |
| "grad_norm": 1.6990138073052663, |
| "learning_rate": 1.4780787935881923e-06, |
| "loss": 0.9530370712280274, |
| "num_input_tokens_seen": 613394736, |
| "step": 3060, |
| "token_acc": 0.7442468822691946 |
| }, |
| { |
| "epoch": 0.7205388785880255, |
| "grad_norm": 1.7762074096620095, |
| "learning_rate": 1.474395524733929e-06, |
| "loss": 0.9581127166748047, |
| "num_input_tokens_seen": 615392505, |
| "step": 3070, |
| "token_acc": 0.7441699918818188 |
| }, |
| { |
| "epoch": 0.7228859107658366, |
| "grad_norm": 2.463738917765937, |
| "learning_rate": 1.4707039321653328e-06, |
| "loss": 0.9451935768127442, |
| "num_input_tokens_seen": 617397957, |
| "step": 3080, |
| "token_acc": 0.7463462899737582 |
| }, |
| { |
| "epoch": 0.7252329429436477, |
| "grad_norm": 1.5266790692018193, |
| "learning_rate": 1.4670040806548554e-06, |
| "loss": 0.9604751586914062, |
| "num_input_tokens_seen": 619431237, |
| "step": 3090, |
| "token_acc": 0.743774946972139 |
| }, |
| { |
| "epoch": 0.7275799751214589, |
| "grad_norm": 2.3079209032431858, |
| "learning_rate": 1.4632960351198617e-06, |
| "loss": 0.958247184753418, |
| "num_input_tokens_seen": 621429906, |
| "step": 3100, |
| "token_acc": 0.7430188770047043 |
| }, |
| { |
| "epoch": 0.7275799751214589, |
| "eval_loss": 0.9967913031578064, |
| "eval_runtime": 32.9622, |
| "eval_samples_per_second": 30.338, |
| "eval_steps_per_second": 1.274, |
| "eval_token_acc": 0.7358602063664273, |
| "num_input_tokens_seen": 621429906, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.7299270072992701, |
| "grad_norm": 1.9140433022488452, |
| "learning_rate": 1.459579860621488e-06, |
| "loss": 0.9593525886535644, |
| "num_input_tokens_seen": 623425752, |
| "step": 3110, |
| "token_acc": 0.7432277726301421 |
| }, |
| { |
| "epoch": 0.7322740394770813, |
| "grad_norm": 1.8212366585882274, |
| "learning_rate": 1.4558556223635e-06, |
| "loss": 0.9617977142333984, |
| "num_input_tokens_seen": 625420740, |
| "step": 3120, |
| "token_acc": 0.742332781810841 |
| }, |
| { |
| "epoch": 0.7346210716548924, |
| "grad_norm": 1.81167215973652, |
| "learning_rate": 1.4521233856911506e-06, |
| "loss": 0.958807373046875, |
| "num_input_tokens_seen": 627481314, |
| "step": 3130, |
| "token_acc": 0.7424123292987752 |
| }, |
| { |
| "epoch": 0.7369681038327035, |
| "grad_norm": 2.3831847210640373, |
| "learning_rate": 1.4483832160900325e-06, |
| "loss": 0.9585672378540039, |
| "num_input_tokens_seen": 629442897, |
| "step": 3140, |
| "token_acc": 0.7439413187403806 |
| }, |
| { |
| "epoch": 0.7393151360105147, |
| "grad_norm": 2.043229737472813, |
| "learning_rate": 1.4446351791849273e-06, |
| "loss": 0.9544695854187012, |
| "num_input_tokens_seen": 631432200, |
| "step": 3150, |
| "token_acc": 0.7442476653043495 |
| }, |
| { |
| "epoch": 0.7416621681883259, |
| "grad_norm": 4.811000207072732, |
| "learning_rate": 1.4408793407386585e-06, |
| "loss": 0.9843364715576172, |
| "num_input_tokens_seen": 633445356, |
| "step": 3160, |
| "token_acc": 0.7394687633144498 |
| }, |
| { |
| "epoch": 0.744009200366137, |
| "grad_norm": 2.4106731528164027, |
| "learning_rate": 1.4371157666509327e-06, |
| "loss": 0.9410341262817383, |
| "num_input_tokens_seen": 635526396, |
| "step": 3170, |
| "token_acc": 0.7483812367179011 |
| }, |
| { |
| "epoch": 0.7463562325439482, |
| "grad_norm": 1.7671356519717534, |
| "learning_rate": 1.4333445229571873e-06, |
| "loss": 0.9693818092346191, |
| "num_input_tokens_seen": 637512357, |
| "step": 3180, |
| "token_acc": 0.7406820079650566 |
| }, |
| { |
| "epoch": 0.7487032647217593, |
| "grad_norm": 7.649772673674551, |
| "learning_rate": 1.429565675827428e-06, |
| "loss": 0.9459026336669922, |
| "num_input_tokens_seen": 639512292, |
| "step": 3190, |
| "token_acc": 0.7462010482209617 |
| }, |
| { |
| "epoch": 0.7510502968995705, |
| "grad_norm": 1.8216992633152906, |
| "learning_rate": 1.4257792915650725e-06, |
| "loss": 0.9720870971679687, |
| "num_input_tokens_seen": 641562030, |
| "step": 3200, |
| "token_acc": 0.7415353056114234 |
| }, |
| { |
| "epoch": 0.7510502968995705, |
| "eval_loss": 0.9943264722824097, |
| "eval_runtime": 32.5126, |
| "eval_samples_per_second": 30.757, |
| "eval_steps_per_second": 1.292, |
| "eval_token_acc": 0.7364419103898802, |
| "num_input_tokens_seen": 641562030, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.7533973290773817, |
| "grad_norm": 2.5481168225977733, |
| "learning_rate": 1.421985436605783e-06, |
| "loss": 0.9607316970825195, |
| "num_input_tokens_seen": 643584060, |
| "step": 3210, |
| "token_acc": 0.7432411531496256 |
| }, |
| { |
| "epoch": 0.7557443612551928, |
| "grad_norm": 2.118059690668745, |
| "learning_rate": 1.4181841775163012e-06, |
| "loss": 0.9484768867492676, |
| "num_input_tokens_seen": 645607389, |
| "step": 3220, |
| "token_acc": 0.7466806535620841 |
| }, |
| { |
| "epoch": 0.7580913934330039, |
| "grad_norm": 1.536905846651224, |
| "learning_rate": 1.4143755809932843e-06, |
| "loss": 0.9712394714355469, |
| "num_input_tokens_seen": 647631456, |
| "step": 3230, |
| "token_acc": 0.7404885747138855 |
| }, |
| { |
| "epoch": 0.7604384256108151, |
| "grad_norm": 2.4582256619795935, |
| "learning_rate": 1.4105597138621279e-06, |
| "loss": 0.9821660041809082, |
| "num_input_tokens_seen": 649623648, |
| "step": 3240, |
| "token_acc": 0.7392644424148588 |
| }, |
| { |
| "epoch": 0.7627854577886263, |
| "grad_norm": 1.726231353540505, |
| "learning_rate": 1.4067366430758004e-06, |
| "loss": 0.9590049743652344, |
| "num_input_tokens_seen": 651641892, |
| "step": 3250, |
| "token_acc": 0.7437141846756814 |
| }, |
| { |
| "epoch": 0.7651324899664375, |
| "grad_norm": 1.922673635746528, |
| "learning_rate": 1.4029064357136626e-06, |
| "loss": 0.9750150680541992, |
| "num_input_tokens_seen": 653604150, |
| "step": 3260, |
| "token_acc": 0.7414191376968158 |
| }, |
| { |
| "epoch": 0.7674795221442486, |
| "grad_norm": 1.6204247475263307, |
| "learning_rate": 1.3990691589802952e-06, |
| "loss": 0.9551026344299316, |
| "num_input_tokens_seen": 655600902, |
| "step": 3270, |
| "token_acc": 0.7445325970386258 |
| }, |
| { |
| "epoch": 0.7698265543220597, |
| "grad_norm": 1.4635122389462327, |
| "learning_rate": 1.3952248802043165e-06, |
| "loss": 0.9669751167297364, |
| "num_input_tokens_seen": 657608466, |
| "step": 3280, |
| "token_acc": 0.7429139464814524 |
| }, |
| { |
| "epoch": 0.7721735864998709, |
| "grad_norm": 8.22536747363378, |
| "learning_rate": 1.3913736668372024e-06, |
| "loss": 0.9439043045043946, |
| "num_input_tokens_seen": 659654619, |
| "step": 3290, |
| "token_acc": 0.7456336900472631 |
| }, |
| { |
| "epoch": 0.7745206186776821, |
| "grad_norm": 9.543988869279518, |
| "learning_rate": 1.3875155864521028e-06, |
| "loss": 0.9564947128295899, |
| "num_input_tokens_seen": 661688691, |
| "step": 3300, |
| "token_acc": 0.7438499491922926 |
| }, |
| { |
| "epoch": 0.7745206186776821, |
| "eval_loss": 0.9920927882194519, |
| "eval_runtime": 32.5499, |
| "eval_samples_per_second": 30.722, |
| "eval_steps_per_second": 1.29, |
| "eval_token_acc": 0.7375683848479953, |
| "num_input_tokens_seen": 661688691, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.7768676508554933, |
| "grad_norm": 1.6682984595331, |
| "learning_rate": 1.3836507067426564e-06, |
| "loss": 0.9715993881225586, |
| "num_input_tokens_seen": 663716223, |
| "step": 3310, |
| "token_acc": 0.7398505776738471 |
| }, |
| { |
| "epoch": 0.7792146830333044, |
| "grad_norm": 1.4995458445383936, |
| "learning_rate": 1.379779095521801e-06, |
| "loss": 0.9635456085205079, |
| "num_input_tokens_seen": 665680179, |
| "step": 3320, |
| "token_acc": 0.7437482270495307 |
| }, |
| { |
| "epoch": 0.7815617152111155, |
| "grad_norm": 2.9414382856417665, |
| "learning_rate": 1.3759008207205866e-06, |
| "loss": 0.955263328552246, |
| "num_input_tokens_seen": 667683303, |
| "step": 3330, |
| "token_acc": 0.7446334146072263 |
| }, |
| { |
| "epoch": 0.7839087473889267, |
| "grad_norm": 2.4991760245357852, |
| "learning_rate": 1.3720159503869814e-06, |
| "loss": 0.9503087997436523, |
| "num_input_tokens_seen": 669640779, |
| "step": 3340, |
| "token_acc": 0.7461391567718691 |
| }, |
| { |
| "epoch": 0.7862557795667379, |
| "grad_norm": 1.6816037590987798, |
| "learning_rate": 1.3681245526846781e-06, |
| "loss": 0.9773989677429199, |
| "num_input_tokens_seen": 671655801, |
| "step": 3350, |
| "token_acc": 0.7381824953149948 |
| }, |
| { |
| "epoch": 0.788602811744549, |
| "grad_norm": 13.867129183446435, |
| "learning_rate": 1.3642266958918981e-06, |
| "loss": 0.9606409072875977, |
| "num_input_tokens_seen": 673618887, |
| "step": 3360, |
| "token_acc": 0.7444690515700922 |
| }, |
| { |
| "epoch": 0.7909498439223602, |
| "grad_norm": 1.68642281844996, |
| "learning_rate": 1.3603224484001947e-06, |
| "loss": 0.9683753967285156, |
| "num_input_tokens_seen": 675600486, |
| "step": 3370, |
| "token_acc": 0.7418183604302765 |
| }, |
| { |
| "epoch": 0.7932968761001713, |
| "grad_norm": 3.6876372609830947, |
| "learning_rate": 1.3564118787132506e-06, |
| "loss": 0.9690577507019043, |
| "num_input_tokens_seen": 677573577, |
| "step": 3380, |
| "token_acc": 0.7409562127336359 |
| }, |
| { |
| "epoch": 0.7956439082779825, |
| "grad_norm": 2.195994268899717, |
| "learning_rate": 1.3524950554456784e-06, |
| "loss": 0.9620229721069335, |
| "num_input_tokens_seen": 679562811, |
| "step": 3390, |
| "token_acc": 0.7450269148735509 |
| }, |
| { |
| "epoch": 0.7979909404557937, |
| "grad_norm": 1.66370232482538, |
| "learning_rate": 1.3485720473218152e-06, |
| "loss": 0.9747153282165527, |
| "num_input_tokens_seen": 681515289, |
| "step": 3400, |
| "token_acc": 0.7406435118536351 |
| }, |
| { |
| "epoch": 0.7979909404557937, |
| "eval_loss": 0.989876389503479, |
| "eval_runtime": 32.5612, |
| "eval_samples_per_second": 30.711, |
| "eval_steps_per_second": 1.29, |
| "eval_token_acc": 0.7377299692989543, |
| "num_input_tokens_seen": 681515289, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.8003379726336048, |
| "grad_norm": 7.5891295742929765, |
| "learning_rate": 1.344642923174517e-06, |
| "loss": 0.9531444549560547, |
| "num_input_tokens_seen": 683512767, |
| "step": 3410, |
| "token_acc": 0.743843269116981 |
| }, |
| { |
| "epoch": 0.8026850048114159, |
| "grad_norm": 2.227763312783814, |
| "learning_rate": 1.3407077519439517e-06, |
| "loss": 0.9736311912536622, |
| "num_input_tokens_seen": 685506138, |
| "step": 3420, |
| "token_acc": 0.7399243439837672 |
| }, |
| { |
| "epoch": 0.8050320369892271, |
| "grad_norm": 2.113082381566505, |
| "learning_rate": 1.3367666026763882e-06, |
| "loss": 0.9282070159912109, |
| "num_input_tokens_seen": 687553683, |
| "step": 3430, |
| "token_acc": 0.7491222650322436 |
| }, |
| { |
| "epoch": 0.8073790691670383, |
| "grad_norm": 2.6528881128590505, |
| "learning_rate": 1.3328195445229867e-06, |
| "loss": 0.9803478240966796, |
| "num_input_tokens_seen": 689471004, |
| "step": 3440, |
| "token_acc": 0.7387909473555786 |
| }, |
| { |
| "epoch": 0.8097261013448495, |
| "grad_norm": 1.9793103853657699, |
| "learning_rate": 1.3288666467385831e-06, |
| "loss": 0.9667415618896484, |
| "num_input_tokens_seen": 691496667, |
| "step": 3450, |
| "token_acc": 0.7424123423266975 |
| }, |
| { |
| "epoch": 0.8120731335226606, |
| "grad_norm": 1.7709247958435497, |
| "learning_rate": 1.3249079786804764e-06, |
| "loss": 0.9529176712036133, |
| "num_input_tokens_seen": 693546759, |
| "step": 3460, |
| "token_acc": 0.7441175099271877 |
| }, |
| { |
| "epoch": 0.8144201657004717, |
| "grad_norm": 1.5610954433541373, |
| "learning_rate": 1.3209436098072093e-06, |
| "loss": 0.9164794921875, |
| "num_input_tokens_seen": 695642895, |
| "step": 3470, |
| "token_acc": 0.7535160611124015 |
| }, |
| { |
| "epoch": 0.8167671978782829, |
| "grad_norm": 5.4874386973622675, |
| "learning_rate": 1.3169736096773518e-06, |
| "loss": 0.9681709289550782, |
| "num_input_tokens_seen": 697628748, |
| "step": 3480, |
| "token_acc": 0.7417104783717662 |
| }, |
| { |
| "epoch": 0.8191142300560941, |
| "grad_norm": 1.5904173197084162, |
| "learning_rate": 1.3129980479482781e-06, |
| "loss": 0.9423411369323731, |
| "num_input_tokens_seen": 699612816, |
| "step": 3490, |
| "token_acc": 0.7463674068222216 |
| }, |
| { |
| "epoch": 0.8214612622339053, |
| "grad_norm": 2.5766852327480185, |
| "learning_rate": 1.3090169943749473e-06, |
| "loss": 0.9422481536865235, |
| "num_input_tokens_seen": 701681886, |
| "step": 3500, |
| "token_acc": 0.746677911017143 |
| }, |
| { |
| "epoch": 0.8214612622339053, |
| "eval_loss": 0.9871490597724915, |
| "eval_runtime": 32.4224, |
| "eval_samples_per_second": 30.843, |
| "eval_steps_per_second": 1.295, |
| "eval_token_acc": 0.738205489254634, |
| "num_input_tokens_seen": 701681886, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.8238082944117164, |
| "grad_norm": 1.6839398965190602, |
| "learning_rate": 1.3050305188086776e-06, |
| "loss": 0.9780057907104492, |
| "num_input_tokens_seen": 703749471, |
| "step": 3510, |
| "token_acc": 0.7461169628181562 |
| }, |
| { |
| "epoch": 0.8261553265895275, |
| "grad_norm": 1.6472063314918655, |
| "learning_rate": 1.3010386911959206e-06, |
| "loss": 0.9228075981140137, |
| "num_input_tokens_seen": 705742899, |
| "step": 3520, |
| "token_acc": 0.750938660857144 |
| }, |
| { |
| "epoch": 0.8285023587673387, |
| "grad_norm": 2.0632172614206934, |
| "learning_rate": 1.2970415815770348e-06, |
| "loss": 0.9639385223388672, |
| "num_input_tokens_seen": 707763786, |
| "step": 3530, |
| "token_acc": 0.7435530770762796 |
| }, |
| { |
| "epoch": 0.8308493909451499, |
| "grad_norm": 1.9277876571318946, |
| "learning_rate": 1.2930392600850572e-06, |
| "loss": 0.9361279487609864, |
| "num_input_tokens_seen": 709803774, |
| "step": 3540, |
| "token_acc": 0.7479319140358494 |
| }, |
| { |
| "epoch": 0.833196423122961, |
| "grad_norm": 1.7198703719511412, |
| "learning_rate": 1.2890317969444716e-06, |
| "loss": 0.9535655975341797, |
| "num_input_tokens_seen": 711862587, |
| "step": 3550, |
| "token_acc": 0.7448029965128141 |
| }, |
| { |
| "epoch": 0.8355434553007721, |
| "grad_norm": 2.212450916967764, |
| "learning_rate": 1.285019262469976e-06, |
| "loss": 0.9320892333984375, |
| "num_input_tokens_seen": 713902905, |
| "step": 3560, |
| "token_acc": 0.7496303953267546 |
| }, |
| { |
| "epoch": 0.8378904874785833, |
| "grad_norm": 1.9712068144466057, |
| "learning_rate": 1.281001727065251e-06, |
| "loss": 0.9570484161376953, |
| "num_input_tokens_seen": 715896024, |
| "step": 3570, |
| "token_acc": 0.7434224760474031 |
| }, |
| { |
| "epoch": 0.8402375196563945, |
| "grad_norm": 10.730434108038908, |
| "learning_rate": 1.2769792612217224e-06, |
| "loss": 0.9570381164550781, |
| "num_input_tokens_seen": 717863472, |
| "step": 3580, |
| "token_acc": 0.7445581595776979 |
| }, |
| { |
| "epoch": 0.8425845518342057, |
| "grad_norm": 3.30727503447712, |
| "learning_rate": 1.2729519355173253e-06, |
| "loss": 0.9440830230712891, |
| "num_input_tokens_seen": 719863371, |
| "step": 3590, |
| "token_acc": 0.7474822302083397 |
| }, |
| { |
| "epoch": 0.8449315840120168, |
| "grad_norm": 3.713841498382935, |
| "learning_rate": 1.2689198206152656e-06, |
| "loss": 0.9532724380493164, |
| "num_input_tokens_seen": 721831113, |
| "step": 3600, |
| "token_acc": 0.7449260731906336 |
| }, |
| { |
| "epoch": 0.8449315840120168, |
| "eval_loss": 0.9854407906532288, |
| "eval_runtime": 32.7136, |
| "eval_samples_per_second": 30.568, |
| "eval_steps_per_second": 1.284, |
| "eval_token_acc": 0.738343990212599, |
| "num_input_tokens_seen": 721831113, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.8472786161898279, |
| "grad_norm": 2.0589116386432122, |
| "learning_rate": 1.2648829872627807e-06, |
| "loss": 0.9483745574951172, |
| "num_input_tokens_seen": 723825324, |
| "step": 3610, |
| "token_acc": 0.745855639432676 |
| }, |
| { |
| "epoch": 0.8496256483676391, |
| "grad_norm": 2.2896157925507143, |
| "learning_rate": 1.2608415062898969e-06, |
| "loss": 0.9875471115112304, |
| "num_input_tokens_seen": 725824848, |
| "step": 3620, |
| "token_acc": 0.736929354012106 |
| }, |
| { |
| "epoch": 0.8519726805454503, |
| "grad_norm": 1.8359922545608438, |
| "learning_rate": 1.2567954486081878e-06, |
| "loss": 0.9514982223510742, |
| "num_input_tokens_seen": 727830747, |
| "step": 3630, |
| "token_acc": 0.7452454133152131 |
| }, |
| { |
| "epoch": 0.8543197127232615, |
| "grad_norm": 3.153372907954943, |
| "learning_rate": 1.2527448852095292e-06, |
| "loss": 0.9558559417724609, |
| "num_input_tokens_seen": 729852828, |
| "step": 3640, |
| "token_acc": 0.7435630305059377 |
| }, |
| { |
| "epoch": 0.8566667449010726, |
| "grad_norm": 3.2189620482043386, |
| "learning_rate": 1.2486898871648551e-06, |
| "loss": 0.9721113204956054, |
| "num_input_tokens_seen": 731850777, |
| "step": 3650, |
| "token_acc": 0.7411079350146542 |
| }, |
| { |
| "epoch": 0.8590137770788837, |
| "grad_norm": 3.3099093175401872, |
| "learning_rate": 1.2446305256229072e-06, |
| "loss": 0.9803009986877441, |
| "num_input_tokens_seen": 733814010, |
| "step": 3660, |
| "token_acc": 0.7365633927510155 |
| }, |
| { |
| "epoch": 0.8613608092566949, |
| "grad_norm": 1.5270344015944395, |
| "learning_rate": 1.2405668718089917e-06, |
| "loss": 0.9435177803039551, |
| "num_input_tokens_seen": 735837123, |
| "step": 3670, |
| "token_acc": 0.746749139522123 |
| }, |
| { |
| "epoch": 0.8637078414345061, |
| "grad_norm": 5.787047190916268, |
| "learning_rate": 1.2364989970237248e-06, |
| "loss": 0.956524658203125, |
| "num_input_tokens_seen": 737845806, |
| "step": 3680, |
| "token_acc": 0.7443589079040083 |
| }, |
| { |
| "epoch": 0.8660548736123173, |
| "grad_norm": 8.359169785563331, |
| "learning_rate": 1.232426972641784e-06, |
| "loss": 0.9011870384216308, |
| "num_input_tokens_seen": 739830486, |
| "step": 3690, |
| "token_acc": 0.75567660422689 |
| }, |
| { |
| "epoch": 0.8684019057901284, |
| "grad_norm": 1.5845135247364173, |
| "learning_rate": 1.2283508701106558e-06, |
| "loss": 0.9817106246948242, |
| "num_input_tokens_seen": 741791226, |
| "step": 3700, |
| "token_acc": 0.7385339271890049 |
| }, |
| { |
| "epoch": 0.8684019057901284, |
| "eval_loss": 0.983921468257904, |
| "eval_runtime": 32.7463, |
| "eval_samples_per_second": 30.538, |
| "eval_steps_per_second": 1.283, |
| "eval_token_acc": 0.7384963412663604, |
| "num_input_tokens_seen": 741791226, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8707489379679395, |
| "grad_norm": 2.3840469812175087, |
| "learning_rate": 1.224270760949381e-06, |
| "loss": 0.9575783729553222, |
| "num_input_tokens_seen": 743787261, |
| "step": 3710, |
| "token_acc": 0.7436981812982442 |
| }, |
| { |
| "epoch": 0.8730959701457507, |
| "grad_norm": 1.947777089028747, |
| "learning_rate": 1.2201867167473015e-06, |
| "loss": 0.9696456909179687, |
| "num_input_tokens_seen": 745796382, |
| "step": 3720, |
| "token_acc": 0.7412485623553386 |
| }, |
| { |
| "epoch": 0.8754430023235619, |
| "grad_norm": 1.755420766932852, |
| "learning_rate": 1.2160988091628022e-06, |
| "loss": 0.9615589141845703, |
| "num_input_tokens_seen": 747780156, |
| "step": 3730, |
| "token_acc": 0.7427405478352258 |
| }, |
| { |
| "epoch": 0.877790034501373, |
| "grad_norm": 1.5327100981263035, |
| "learning_rate": 1.2120071099220547e-06, |
| "loss": 0.9285150527954101, |
| "num_input_tokens_seen": 749739183, |
| "step": 3740, |
| "token_acc": 0.7498815184287402 |
| }, |
| { |
| "epoch": 0.8801370666791841, |
| "grad_norm": 1.797316309204294, |
| "learning_rate": 1.207911690817759e-06, |
| "loss": 0.9365687370300293, |
| "num_input_tokens_seen": 751694550, |
| "step": 3750, |
| "token_acc": 0.747152564554286 |
| }, |
| { |
| "epoch": 0.8824840988569953, |
| "grad_norm": 3.689781286827284, |
| "learning_rate": 1.2038126237078849e-06, |
| "loss": 0.953128433227539, |
| "num_input_tokens_seen": 753712974, |
| "step": 3760, |
| "token_acc": 0.7452915604974099 |
| }, |
| { |
| "epoch": 0.8848311310348065, |
| "grad_norm": 1.7805781440802038, |
| "learning_rate": 1.1997099805144068e-06, |
| "loss": 0.9508394241333008, |
| "num_input_tokens_seen": 755748069, |
| "step": 3770, |
| "token_acc": 0.7452503865456881 |
| }, |
| { |
| "epoch": 0.8871781632126177, |
| "grad_norm": 1.6166917326261805, |
| "learning_rate": 1.195603833222048e-06, |
| "loss": 0.9421730995178222, |
| "num_input_tokens_seen": 757732731, |
| "step": 3780, |
| "token_acc": 0.746435002974226 |
| }, |
| { |
| "epoch": 0.8895251953904288, |
| "grad_norm": 2.7425269690357057, |
| "learning_rate": 1.191494253877013e-06, |
| "loss": 0.9745880126953125, |
| "num_input_tokens_seen": 759774399, |
| "step": 3790, |
| "token_acc": 0.7450119697550278 |
| }, |
| { |
| "epoch": 0.8918722275682399, |
| "grad_norm": 1.6146982833566892, |
| "learning_rate": 1.1873813145857248e-06, |
| "loss": 0.9547751426696778, |
| "num_input_tokens_seen": 761780385, |
| "step": 3800, |
| "token_acc": 0.7437249909057839 |
| }, |
| { |
| "epoch": 0.8918722275682399, |
| "eval_loss": 0.9822799563407898, |
| "eval_runtime": 32.7794, |
| "eval_samples_per_second": 30.507, |
| "eval_steps_per_second": 1.281, |
| "eval_token_acc": 0.738865677154267, |
| "num_input_tokens_seen": 761780385, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.8942192597460511, |
| "grad_norm": 8.557612907531114, |
| "learning_rate": 1.1832650875135597e-06, |
| "loss": 0.9583858489990235, |
| "num_input_tokens_seen": 763769655, |
| "step": 3810, |
| "token_acc": 0.7431487370276885 |
| }, |
| { |
| "epoch": 0.8965662919238623, |
| "grad_norm": 1.5077356512025262, |
| "learning_rate": 1.1791456448835825e-06, |
| "loss": 0.9206510543823242, |
| "num_input_tokens_seen": 765823593, |
| "step": 3820, |
| "token_acc": 0.7506628223950441 |
| }, |
| { |
| "epoch": 0.8989133241016735, |
| "grad_norm": 1.5006830716992956, |
| "learning_rate": 1.175023058975276e-06, |
| "loss": 0.9615950584411621, |
| "num_input_tokens_seen": 767831079, |
| "step": 3830, |
| "token_acc": 0.7423029397870712 |
| }, |
| { |
| "epoch": 0.9012603562794846, |
| "grad_norm": 1.6769633570300284, |
| "learning_rate": 1.1708974021232767e-06, |
| "loss": 0.9534446716308593, |
| "num_input_tokens_seen": 769798548, |
| "step": 3840, |
| "token_acc": 0.7445747944292532 |
| }, |
| { |
| "epoch": 0.9036073884572957, |
| "grad_norm": 1.759779515088976, |
| "learning_rate": 1.1667687467161023e-06, |
| "loss": 0.9459953308105469, |
| "num_input_tokens_seen": 771774078, |
| "step": 3850, |
| "token_acc": 0.744865905394826 |
| }, |
| { |
| "epoch": 0.9059544206351069, |
| "grad_norm": 1.6599709517731647, |
| "learning_rate": 1.1626371651948836e-06, |
| "loss": 0.9330622673034668, |
| "num_input_tokens_seen": 773817642, |
| "step": 3860, |
| "token_acc": 0.7481679393835271 |
| }, |
| { |
| "epoch": 0.9083014528129181, |
| "grad_norm": 1.6573686376498213, |
| "learning_rate": 1.158502730052093e-06, |
| "loss": 0.943012809753418, |
| "num_input_tokens_seen": 775877070, |
| "step": 3870, |
| "token_acc": 0.7472794230837547 |
| }, |
| { |
| "epoch": 0.9106484849907293, |
| "grad_norm": 2.4726992986853444, |
| "learning_rate": 1.1543655138302713e-06, |
| "loss": 0.9866430282592773, |
| "num_input_tokens_seen": 777904599, |
| "step": 3880, |
| "token_acc": 0.7372872068022087 |
| }, |
| { |
| "epoch": 0.9129955171685403, |
| "grad_norm": 1.7326340330977308, |
| "learning_rate": 1.150225589120757e-06, |
| "loss": 0.9427039146423339, |
| "num_input_tokens_seen": 779960793, |
| "step": 3890, |
| "token_acc": 0.7463757958063197 |
| }, |
| { |
| "epoch": 0.9153425493463515, |
| "grad_norm": 1.634253822545075, |
| "learning_rate": 1.1460830285624116e-06, |
| "loss": 0.9683923721313477, |
| "num_input_tokens_seen": 782008791, |
| "step": 3900, |
| "token_acc": 0.741813429536215 |
| }, |
| { |
| "epoch": 0.9153425493463515, |
| "eval_loss": 0.97979736328125, |
| "eval_runtime": 32.457, |
| "eval_samples_per_second": 30.81, |
| "eval_steps_per_second": 1.294, |
| "eval_token_acc": 0.739553565245493, |
| "num_input_tokens_seen": 782008791, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.9176895815241627, |
| "grad_norm": 5.153362224558377, |
| "learning_rate": 1.1419379048403444e-06, |
| "loss": 0.9662550926208496, |
| "num_input_tokens_seen": 784016886, |
| "step": 3910, |
| "token_acc": 0.7420221405659442 |
| }, |
| { |
| "epoch": 0.9200366137019739, |
| "grad_norm": 1.9857737502868835, |
| "learning_rate": 1.137790290684638e-06, |
| "loss": 0.9286038398742675, |
| "num_input_tokens_seen": 786018876, |
| "step": 3920, |
| "token_acc": 0.7495468248085001 |
| }, |
| { |
| "epoch": 0.922383645879785, |
| "grad_norm": 1.842562371990634, |
| "learning_rate": 1.1336402588690725e-06, |
| "loss": 0.9483222007751465, |
| "num_input_tokens_seen": 788055180, |
| "step": 3930, |
| "token_acc": 0.7456087098512761 |
| }, |
| { |
| "epoch": 0.9247306780575961, |
| "grad_norm": 1.928971592873294, |
| "learning_rate": 1.1294878822098467e-06, |
| "loss": 0.9480892181396484, |
| "num_input_tokens_seen": 790110096, |
| "step": 3940, |
| "token_acc": 0.7468523363829526 |
| }, |
| { |
| "epoch": 0.9270777102354073, |
| "grad_norm": 1.6567939468576487, |
| "learning_rate": 1.1253332335643042e-06, |
| "loss": 0.947171974182129, |
| "num_input_tokens_seen": 792098733, |
| "step": 3950, |
| "token_acc": 0.7463428498622995 |
| }, |
| { |
| "epoch": 0.9294247424132185, |
| "grad_norm": 2.382881124913188, |
| "learning_rate": 1.1211763858296505e-06, |
| "loss": 0.9341253280639649, |
| "num_input_tokens_seen": 794107374, |
| "step": 3960, |
| "token_acc": 0.749001431982777 |
| }, |
| { |
| "epoch": 0.9317717745910297, |
| "grad_norm": 2.385202785146866, |
| "learning_rate": 1.1170174119416775e-06, |
| "loss": 0.9605335235595703, |
| "num_input_tokens_seen": 796145907, |
| "step": 3970, |
| "token_acc": 0.7420721101207574 |
| }, |
| { |
| "epoch": 0.9341188067688408, |
| "grad_norm": 1.6538910226354369, |
| "learning_rate": 1.1128563848734815e-06, |
| "loss": 0.904339599609375, |
| "num_input_tokens_seen": 798189987, |
| "step": 3980, |
| "token_acc": 0.7552502219081598 |
| }, |
| { |
| "epoch": 0.9364658389466519, |
| "grad_norm": 2.1083368115488206, |
| "learning_rate": 1.108693377634185e-06, |
| "loss": 0.9489521980285645, |
| "num_input_tokens_seen": 800197461, |
| "step": 3990, |
| "token_acc": 0.7454285509759317 |
| }, |
| { |
| "epoch": 0.9388128711244631, |
| "grad_norm": 1.9940124977981624, |
| "learning_rate": 1.1045284632676535e-06, |
| "loss": 0.9406743049621582, |
| "num_input_tokens_seen": 802174746, |
| "step": 4000, |
| "token_acc": 0.7459721976990789 |
| }, |
| { |
| "epoch": 0.9388128711244631, |
| "eval_loss": 0.9778164029121399, |
| "eval_runtime": 32.5943, |
| "eval_samples_per_second": 30.68, |
| "eval_steps_per_second": 1.289, |
| "eval_token_acc": 0.7396274324230743, |
| "num_input_tokens_seen": 802174746, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9411599033022743, |
| "grad_norm": 1.869832978916969, |
| "learning_rate": 1.1003617148512149e-06, |
| "loss": 0.9346565246582031, |
| "num_input_tokens_seen": 804141819, |
| "step": 4010, |
| "token_acc": 0.7472374245472837 |
| }, |
| { |
| "epoch": 0.9435069354800855, |
| "grad_norm": 2.364187676148168, |
| "learning_rate": 1.0961932054943776e-06, |
| "loss": 0.9504963874816894, |
| "num_input_tokens_seen": 806092293, |
| "step": 4020, |
| "token_acc": 0.7476745370464685 |
| }, |
| { |
| "epoch": 0.9458539676578966, |
| "grad_norm": 1.7457815556862932, |
| "learning_rate": 1.0920230083375472e-06, |
| "loss": 0.9478288650512695, |
| "num_input_tokens_seen": 808096725, |
| "step": 4030, |
| "token_acc": 0.7461893605967633 |
| }, |
| { |
| "epoch": 0.9482009998357077, |
| "grad_norm": 1.7540758806187229, |
| "learning_rate": 1.0878511965507434e-06, |
| "loss": 0.9289562225341796, |
| "num_input_tokens_seen": 810119691, |
| "step": 4040, |
| "token_acc": 0.7498504598729057 |
| }, |
| { |
| "epoch": 0.9505480320135189, |
| "grad_norm": 5.524603084757776, |
| "learning_rate": 1.0836778433323157e-06, |
| "loss": 0.9280494689941406, |
| "num_input_tokens_seen": 812173641, |
| "step": 4050, |
| "token_acc": 0.7489092478671032 |
| }, |
| { |
| "epoch": 0.9528950641913301, |
| "grad_norm": 2.2610221290856205, |
| "learning_rate": 1.0795030219076598e-06, |
| "loss": 0.9323202133178711, |
| "num_input_tokens_seen": 814155057, |
| "step": 4060, |
| "token_acc": 0.7484355792832109 |
| }, |
| { |
| "epoch": 0.9552420963691413, |
| "grad_norm": 1.7453803466041382, |
| "learning_rate": 1.0753268055279328e-06, |
| "loss": 0.9361183166503906, |
| "num_input_tokens_seen": 816203571, |
| "step": 4070, |
| "token_acc": 0.7480308978092947 |
| }, |
| { |
| "epoch": 0.9575891285469523, |
| "grad_norm": 3.200843146499252, |
| "learning_rate": 1.071149267468767e-06, |
| "loss": 0.9665923118591309, |
| "num_input_tokens_seen": 818255160, |
| "step": 4080, |
| "token_acc": 0.7428710890766919 |
| }, |
| { |
| "epoch": 0.9599361607247635, |
| "grad_norm": 2.769528286877977, |
| "learning_rate": 1.066970481028985e-06, |
| "loss": 0.9312915802001953, |
| "num_input_tokens_seen": 820210017, |
| "step": 4090, |
| "token_acc": 0.7505294435331026 |
| }, |
| { |
| "epoch": 0.9622831929025747, |
| "grad_norm": 3.5116532009374186, |
| "learning_rate": 1.0627905195293135e-06, |
| "loss": 0.9360153198242187, |
| "num_input_tokens_seen": 822213030, |
| "step": 4100, |
| "token_acc": 0.7485829324512936 |
| }, |
| { |
| "epoch": 0.9622831929025747, |
| "eval_loss": 0.9762653112411499, |
| "eval_runtime": 32.7782, |
| "eval_samples_per_second": 30.508, |
| "eval_steps_per_second": 1.281, |
| "eval_token_acc": 0.7401121857759516, |
| "num_input_tokens_seen": 822213030, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.9646302250803859, |
| "grad_norm": 5.045367081523594, |
| "learning_rate": 1.0586094563110963e-06, |
| "loss": 0.9286471366882324, |
| "num_input_tokens_seen": 824216382, |
| "step": 4110, |
| "token_acc": 0.7514687934606761 |
| }, |
| { |
| "epoch": 0.966977257258197, |
| "grad_norm": 2.1231322680588756, |
| "learning_rate": 1.054427364735009e-06, |
| "loss": 0.9417591094970703, |
| "num_input_tokens_seen": 826177221, |
| "step": 4120, |
| "token_acc": 0.746542864029784 |
| }, |
| { |
| "epoch": 0.9693242894360081, |
| "grad_norm": 1.5051650791104427, |
| "learning_rate": 1.0502443181797696e-06, |
| "loss": 0.9733121871948243, |
| "num_input_tokens_seen": 828212934, |
| "step": 4130, |
| "token_acc": 0.7397737060065835 |
| }, |
| { |
| "epoch": 0.9716713216138193, |
| "grad_norm": 1.9170280031638867, |
| "learning_rate": 1.0460603900408523e-06, |
| "loss": 0.9613967895507812, |
| "num_input_tokens_seen": 830208120, |
| "step": 4140, |
| "token_acc": 0.7418330397530002 |
| }, |
| { |
| "epoch": 0.9740183537916305, |
| "grad_norm": 2.477727800782275, |
| "learning_rate": 1.0418756537291995e-06, |
| "loss": 0.920326042175293, |
| "num_input_tokens_seen": 832205229, |
| "step": 4150, |
| "token_acc": 0.7535178501070156 |
| }, |
| { |
| "epoch": 0.9763653859694417, |
| "grad_norm": 1.544900641515008, |
| "learning_rate": 1.0376901826699347e-06, |
| "loss": 0.9237567901611328, |
| "num_input_tokens_seen": 834138633, |
| "step": 4160, |
| "token_acc": 0.7496954091824597 |
| }, |
| { |
| "epoch": 0.9787124181472528, |
| "grad_norm": 1.6877147081648456, |
| "learning_rate": 1.0335040503010715e-06, |
| "loss": 0.9391614913940429, |
| "num_input_tokens_seen": 836153739, |
| "step": 4170, |
| "token_acc": 0.7479080675786391 |
| }, |
| { |
| "epoch": 0.9810594503250639, |
| "grad_norm": 2.055524057953317, |
| "learning_rate": 1.0293173300722284e-06, |
| "loss": 0.9410205841064453, |
| "num_input_tokens_seen": 838071294, |
| "step": 4180, |
| "token_acc": 0.747964305973199 |
| }, |
| { |
| "epoch": 0.9834064825028751, |
| "grad_norm": 1.9825443022012719, |
| "learning_rate": 1.0251300954433374e-06, |
| "loss": 0.9293361663818359, |
| "num_input_tokens_seen": 840082950, |
| "step": 4190, |
| "token_acc": 0.7505939412855415 |
| }, |
| { |
| "epoch": 0.9857535146806863, |
| "grad_norm": 1.6517348379687422, |
| "learning_rate": 1.020942419883357e-06, |
| "loss": 0.9549247741699218, |
| "num_input_tokens_seen": 842083761, |
| "step": 4200, |
| "token_acc": 0.7446830629715671 |
| }, |
| { |
| "epoch": 0.9857535146806863, |
| "eval_loss": 0.9754964709281921, |
| "eval_runtime": 32.4547, |
| "eval_samples_per_second": 30.812, |
| "eval_steps_per_second": 1.294, |
| "eval_token_acc": 0.7408277740587705, |
| "num_input_tokens_seen": 842083761, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.9881005468584975, |
| "grad_norm": 1.7669813904614138, |
| "learning_rate": 1.0167543768689815e-06, |
| "loss": 0.9350774765014649, |
| "num_input_tokens_seen": 844080483, |
| "step": 4210, |
| "token_acc": 0.7474908930171247 |
| }, |
| { |
| "epoch": 0.9904475790363085, |
| "grad_norm": 1.9977363833715536, |
| "learning_rate": 1.0125660398833527e-06, |
| "loss": 0.9390117645263671, |
| "num_input_tokens_seen": 846069951, |
| "step": 4220, |
| "token_acc": 0.7463500450267371 |
| }, |
| { |
| "epoch": 0.9927946112141197, |
| "grad_norm": 1.6725983628184662, |
| "learning_rate": 1.0083774824147707e-06, |
| "loss": 0.946631908416748, |
| "num_input_tokens_seen": 848098152, |
| "step": 4230, |
| "token_acc": 0.7457750693945103 |
| }, |
| { |
| "epoch": 0.9951416433919309, |
| "grad_norm": 1.7247846754251406, |
| "learning_rate": 1.004188777955404e-06, |
| "loss": 0.9343754768371582, |
| "num_input_tokens_seen": 850113609, |
| "step": 4240, |
| "token_acc": 0.7490662455788695 |
| }, |
| { |
| "epoch": 0.9974886755697421, |
| "grad_norm": 2.0830434897072894, |
| "learning_rate": 1e-06, |
| "loss": 0.9314743041992187, |
| "num_input_tokens_seen": 852105906, |
| "step": 4250, |
| "token_acc": 0.749313829578074 |
| }, |
| { |
| "epoch": 0.9998357077475533, |
| "grad_norm": 1.814610722365582, |
| "learning_rate": 9.958112220445962e-07, |
| "loss": 0.9592094421386719, |
| "num_input_tokens_seen": 854098311, |
| "step": 4260, |
| "token_acc": 0.7431068897769029 |
| }, |
| { |
| "epoch": 1.00211232896003, |
| "grad_norm": 1.5113637229667725, |
| "learning_rate": 9.916225175852293e-07, |
| "loss": 0.894398307800293, |
| "num_input_tokens_seen": 856086594, |
| "step": 4270, |
| "token_acc": 0.7580048741904789 |
| }, |
| { |
| "epoch": 1.0044593611378412, |
| "grad_norm": 4.446393040487181, |
| "learning_rate": 9.874339601166472e-07, |
| "loss": 0.9135477066040039, |
| "num_input_tokens_seen": 858108198, |
| "step": 4280, |
| "token_acc": 0.7531681304263087 |
| }, |
| { |
| "epoch": 1.0068063933156524, |
| "grad_norm": 1.9208454193735196, |
| "learning_rate": 9.832456231310188e-07, |
| "loss": 0.9318746566772461, |
| "num_input_tokens_seen": 860120775, |
| "step": 4290, |
| "token_acc": 0.747537408902533 |
| }, |
| { |
| "epoch": 1.0091534254934635, |
| "grad_norm": 1.5928331203409287, |
| "learning_rate": 9.790575801166431e-07, |
| "loss": 0.9145861625671386, |
| "num_input_tokens_seen": 862143132, |
| "step": 4300, |
| "token_acc": 0.7532685063928213 |
| }, |
| { |
| "epoch": 1.0091534254934635, |
| "eval_loss": 0.9742150902748108, |
| "eval_runtime": 32.578, |
| "eval_samples_per_second": 30.696, |
| "eval_steps_per_second": 1.289, |
| "eval_token_acc": 0.7412802105214561, |
| "num_input_tokens_seen": 862143132, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.0115004576712747, |
| "grad_norm": 2.2199758281219837, |
| "learning_rate": 9.748699045566625e-07, |
| "loss": 0.9037257194519043, |
| "num_input_tokens_seen": 864130884, |
| "step": 4310, |
| "token_acc": 0.7554067579469933 |
| }, |
| { |
| "epoch": 1.013847489849086, |
| "grad_norm": 2.5403224399288926, |
| "learning_rate": 9.706826699277717e-07, |
| "loss": 0.8928478240966797, |
| "num_input_tokens_seen": 866146368, |
| "step": 4320, |
| "token_acc": 0.7571011279244853 |
| }, |
| { |
| "epoch": 1.016194522026897, |
| "grad_norm": 1.6880663111795373, |
| "learning_rate": 9.664959496989284e-07, |
| "loss": 0.8799491882324219, |
| "num_input_tokens_seen": 868132068, |
| "step": 4330, |
| "token_acc": 0.7608739162744612 |
| }, |
| { |
| "epoch": 1.018541554204708, |
| "grad_norm": 1.9603998555475624, |
| "learning_rate": 9.623098173300653e-07, |
| "loss": 0.9061168670654297, |
| "num_input_tokens_seen": 870168408, |
| "step": 4340, |
| "token_acc": 0.7558231445173181 |
| }, |
| { |
| "epoch": 1.0208885863825192, |
| "grad_norm": 2.052768381078441, |
| "learning_rate": 9.581243462708005e-07, |
| "loss": 0.891018009185791, |
| "num_input_tokens_seen": 872101149, |
| "step": 4350, |
| "token_acc": 0.7599988872462524 |
| }, |
| { |
| "epoch": 1.0232356185603304, |
| "grad_norm": 1.514439023769519, |
| "learning_rate": 9.539396099591476e-07, |
| "loss": 0.9129314422607422, |
| "num_input_tokens_seen": 874087335, |
| "step": 4360, |
| "token_acc": 0.7564216192481887 |
| }, |
| { |
| "epoch": 1.0255826507381416, |
| "grad_norm": 1.8673183879809325, |
| "learning_rate": 9.497556818202304e-07, |
| "loss": 0.9109779357910156, |
| "num_input_tokens_seen": 876059952, |
| "step": 4370, |
| "token_acc": 0.7535195830085737 |
| }, |
| { |
| "epoch": 1.0279296829159528, |
| "grad_norm": 6.147575681746076, |
| "learning_rate": 9.45572635264991e-07, |
| "loss": 0.9013278961181641, |
| "num_input_tokens_seen": 878124633, |
| "step": 4380, |
| "token_acc": 0.756046360357164 |
| }, |
| { |
| "epoch": 1.030276715093764, |
| "grad_norm": 3.3826066958331045, |
| "learning_rate": 9.413905436889033e-07, |
| "loss": 0.8935451507568359, |
| "num_input_tokens_seen": 880109727, |
| "step": 4390, |
| "token_acc": 0.7567750980510352 |
| }, |
| { |
| "epoch": 1.0326237472715751, |
| "grad_norm": 2.791787214417096, |
| "learning_rate": 9.372094804706866e-07, |
| "loss": 0.9111810684204101, |
| "num_input_tokens_seen": 882111045, |
| "step": 4400, |
| "token_acc": 0.7554985194799139 |
| }, |
| { |
| "epoch": 1.0326237472715751, |
| "eval_loss": 0.9730333685874939, |
| "eval_runtime": 32.4657, |
| "eval_samples_per_second": 30.802, |
| "eval_steps_per_second": 1.294, |
| "eval_token_acc": 0.7414048613836246, |
| "num_input_tokens_seen": 882111045, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.0349707794493863, |
| "grad_norm": 1.927568219024905, |
| "learning_rate": 9.330295189710151e-07, |
| "loss": 0.9100271224975586, |
| "num_input_tokens_seen": 884198595, |
| "step": 4410, |
| "token_acc": 0.7540011119241447 |
| }, |
| { |
| "epoch": 1.0373178116271975, |
| "grad_norm": 2.5062754907489797, |
| "learning_rate": 9.288507325312334e-07, |
| "loss": 0.8903081893920899, |
| "num_input_tokens_seen": 886152855, |
| "step": 4420, |
| "token_acc": 0.7574611181168558 |
| }, |
| { |
| "epoch": 1.0396648438050087, |
| "grad_norm": 1.9923532749108916, |
| "learning_rate": 9.246731944720674e-07, |
| "loss": 0.9105890274047852, |
| "num_input_tokens_seen": 888141444, |
| "step": 4430, |
| "token_acc": 0.7539804724713297 |
| }, |
| { |
| "epoch": 1.0420118759828196, |
| "grad_norm": 1.8502910487817004, |
| "learning_rate": 9.204969780923403e-07, |
| "loss": 0.9087862968444824, |
| "num_input_tokens_seen": 890115771, |
| "step": 4440, |
| "token_acc": 0.7559308727674652 |
| }, |
| { |
| "epoch": 1.0443589081606308, |
| "grad_norm": 5.223223230980478, |
| "learning_rate": 9.163221566676847e-07, |
| "loss": 0.9071809768676757, |
| "num_input_tokens_seen": 892098426, |
| "step": 4450, |
| "token_acc": 0.7547434701771973 |
| }, |
| { |
| "epoch": 1.046705940338442, |
| "grad_norm": 1.5951294272531664, |
| "learning_rate": 9.121488034492568e-07, |
| "loss": 0.9115602493286132, |
| "num_input_tokens_seen": 894150594, |
| "step": 4460, |
| "token_acc": 0.7560878381891606 |
| }, |
| { |
| "epoch": 1.0490529725162532, |
| "grad_norm": 24.227203178087926, |
| "learning_rate": 9.079769916624529e-07, |
| "loss": 0.8929647445678711, |
| "num_input_tokens_seen": 896182068, |
| "step": 4470, |
| "token_acc": 0.7569376280966494 |
| }, |
| { |
| "epoch": 1.0514000046940644, |
| "grad_norm": 4.446148288911931, |
| "learning_rate": 9.038067945056227e-07, |
| "loss": 0.8845357894897461, |
| "num_input_tokens_seen": 898144740, |
| "step": 4480, |
| "token_acc": 0.7596217335121099 |
| }, |
| { |
| "epoch": 1.0537470368718755, |
| "grad_norm": 2.33113822520666, |
| "learning_rate": 8.996382851487849e-07, |
| "loss": 0.9204854011535645, |
| "num_input_tokens_seen": 900153033, |
| "step": 4490, |
| "token_acc": 0.7531009457228544 |
| }, |
| { |
| "epoch": 1.0560940690496867, |
| "grad_norm": 1.6705258835681585, |
| "learning_rate": 8.954715367323466e-07, |
| "loss": 0.9108184814453125, |
| "num_input_tokens_seen": 902159874, |
| "step": 4500, |
| "token_acc": 0.7534851198704926 |
| }, |
| { |
| "epoch": 1.0560940690496867, |
| "eval_loss": 0.9722611308097839, |
| "eval_runtime": 32.6343, |
| "eval_samples_per_second": 30.643, |
| "eval_steps_per_second": 1.287, |
| "eval_token_acc": 0.7414879619584035, |
| "num_input_tokens_seen": 902159874, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.058441101227498, |
| "grad_norm": 1.814968079519632, |
| "learning_rate": 8.91306622365815e-07, |
| "loss": 0.9042104721069336, |
| "num_input_tokens_seen": 904127259, |
| "step": 4510, |
| "token_acc": 0.7549473429720114 |
| }, |
| { |
| "epoch": 1.060788133405309, |
| "grad_norm": 1.9598731265622114, |
| "learning_rate": 8.871436151265182e-07, |
| "loss": 0.9021028518676758, |
| "num_input_tokens_seen": 906131709, |
| "step": 4520, |
| "token_acc": 0.7555155495065009 |
| }, |
| { |
| "epoch": 1.06313516558312, |
| "grad_norm": 3.5546689619235106, |
| "learning_rate": 8.829825880583226e-07, |
| "loss": 0.8736377716064453, |
| "num_input_tokens_seen": 908144946, |
| "step": 4530, |
| "token_acc": 0.7615734862488263 |
| }, |
| { |
| "epoch": 1.0654821977609312, |
| "grad_norm": 3.1846241923818295, |
| "learning_rate": 8.788236141703497e-07, |
| "loss": 0.9034311294555664, |
| "num_input_tokens_seen": 910148658, |
| "step": 4540, |
| "token_acc": 0.7564678744009387 |
| }, |
| { |
| "epoch": 1.0678292299387424, |
| "grad_norm": 2.027265382942688, |
| "learning_rate": 8.746667664356955e-07, |
| "loss": 0.9266244888305664, |
| "num_input_tokens_seen": 912148779, |
| "step": 4550, |
| "token_acc": 0.7503857571491999 |
| }, |
| { |
| "epoch": 1.0701762621165536, |
| "grad_norm": 1.7499276338815972, |
| "learning_rate": 8.705121177901531e-07, |
| "loss": 0.900362205505371, |
| "num_input_tokens_seen": 914157060, |
| "step": 4560, |
| "token_acc": 0.757182167972395 |
| }, |
| { |
| "epoch": 1.0725232942943648, |
| "grad_norm": 2.8471968306459092, |
| "learning_rate": 8.663597411309278e-07, |
| "loss": 0.8963720321655273, |
| "num_input_tokens_seen": 916145403, |
| "step": 4570, |
| "token_acc": 0.7560617462222132 |
| }, |
| { |
| "epoch": 1.074870326472176, |
| "grad_norm": 1.6540494435074347, |
| "learning_rate": 8.62209709315362e-07, |
| "loss": 0.9004743576049805, |
| "num_input_tokens_seen": 918115113, |
| "step": 4580, |
| "token_acc": 0.7545025247249607 |
| }, |
| { |
| "epoch": 1.0772173586499871, |
| "grad_norm": 2.057030263327695, |
| "learning_rate": 8.580620951596556e-07, |
| "loss": 0.9495843887329102, |
| "num_input_tokens_seen": 920159124, |
| "step": 4590, |
| "token_acc": 0.7448036906164115 |
| }, |
| { |
| "epoch": 1.0795643908277983, |
| "grad_norm": 1.7066770272878358, |
| "learning_rate": 8.539169714375885e-07, |
| "loss": 0.9105659484863281, |
| "num_input_tokens_seen": 922121547, |
| "step": 4600, |
| "token_acc": 0.7536738054675078 |
| }, |
| { |
| "epoch": 1.0795643908277983, |
| "eval_loss": 0.9716529250144958, |
| "eval_runtime": 32.5395, |
| "eval_samples_per_second": 30.732, |
| "eval_steps_per_second": 1.291, |
| "eval_token_acc": 0.7416818632995544, |
| "num_input_tokens_seen": 922121547, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.0819114230056095, |
| "grad_norm": 1.9597668178542205, |
| "learning_rate": 8.497744108792429e-07, |
| "loss": 0.8963167190551757, |
| "num_input_tokens_seen": 924093546, |
| "step": 4610, |
| "token_acc": 0.7577693693987556 |
| }, |
| { |
| "epoch": 1.0842584551834205, |
| "grad_norm": 1.477104530901047, |
| "learning_rate": 8.456344861697287e-07, |
| "loss": 0.9177652359008789, |
| "num_input_tokens_seen": 926103639, |
| "step": 4620, |
| "token_acc": 0.7516901953627176 |
| }, |
| { |
| "epoch": 1.0866054873612316, |
| "grad_norm": 1.8830008370086135, |
| "learning_rate": 8.414972699479075e-07, |
| "loss": 0.9002264022827149, |
| "num_input_tokens_seen": 928135683, |
| "step": 4630, |
| "token_acc": 0.7559382042427807 |
| }, |
| { |
| "epoch": 1.0889525195390428, |
| "grad_norm": 3.016423460140028, |
| "learning_rate": 8.373628348051163e-07, |
| "loss": 0.8956707000732422, |
| "num_input_tokens_seen": 930127536, |
| "step": 4640, |
| "token_acc": 0.7571149500895269 |
| }, |
| { |
| "epoch": 1.091299551716854, |
| "grad_norm": 1.57022279289949, |
| "learning_rate": 8.332312532838978e-07, |
| "loss": 0.9269239425659179, |
| "num_input_tokens_seen": 932125299, |
| "step": 4650, |
| "token_acc": 0.7517471473920727 |
| }, |
| { |
| "epoch": 1.0936465838946652, |
| "grad_norm": 3.5134027190857435, |
| "learning_rate": 8.291025978767234e-07, |
| "loss": 0.9176504135131835, |
| "num_input_tokens_seen": 934168311, |
| "step": 4660, |
| "token_acc": 0.7548118730939853 |
| }, |
| { |
| "epoch": 1.0959936160724764, |
| "grad_norm": 2.5211326313148623, |
| "learning_rate": 8.249769410247238e-07, |
| "loss": 0.9234855651855469, |
| "num_input_tokens_seen": 936133608, |
| "step": 4670, |
| "token_acc": 0.7515400792838399 |
| }, |
| { |
| "epoch": 1.0983406482502875, |
| "grad_norm": 2.572125880008109, |
| "learning_rate": 8.208543551164177e-07, |
| "loss": 0.8986695289611817, |
| "num_input_tokens_seen": 938147853, |
| "step": 4680, |
| "token_acc": 0.7556977694823225 |
| }, |
| { |
| "epoch": 1.1006876804280987, |
| "grad_norm": 2.988789824663344, |
| "learning_rate": 8.167349124864404e-07, |
| "loss": 0.9072399139404297, |
| "num_input_tokens_seen": 940144569, |
| "step": 4690, |
| "token_acc": 0.7530836929897347 |
| }, |
| { |
| "epoch": 1.10303471260591, |
| "grad_norm": 1.6468695088048304, |
| "learning_rate": 8.126186854142751e-07, |
| "loss": 0.9020254135131835, |
| "num_input_tokens_seen": 942165525, |
| "step": 4700, |
| "token_acc": 0.7548501978958501 |
| }, |
| { |
| "epoch": 1.10303471260591, |
| "eval_loss": 0.9701104164123535, |
| "eval_runtime": 33.0994, |
| "eval_samples_per_second": 30.212, |
| "eval_steps_per_second": 1.269, |
| "eval_token_acc": 0.7415941460261767, |
| "num_input_tokens_seen": 942165525, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.105381744783721, |
| "grad_norm": 1.6564712470148706, |
| "learning_rate": 8.08505746122987e-07, |
| "loss": 0.8915030479431152, |
| "num_input_tokens_seen": 944177469, |
| "step": 4710, |
| "token_acc": 0.7565814201146365 |
| }, |
| { |
| "epoch": 1.107728776961532, |
| "grad_norm": 2.791755191613104, |
| "learning_rate": 8.043961667779518e-07, |
| "loss": 0.9122766494750977, |
| "num_input_tokens_seen": 946234932, |
| "step": 4720, |
| "token_acc": 0.7535114631778791 |
| }, |
| { |
| "epoch": 1.1100758091393432, |
| "grad_norm": 1.6738087861309878, |
| "learning_rate": 8.002900194855931e-07, |
| "loss": 0.9000448226928711, |
| "num_input_tokens_seen": 948228513, |
| "step": 4730, |
| "token_acc": 0.7559363093706895 |
| }, |
| { |
| "epoch": 1.1124228413171544, |
| "grad_norm": 1.5535937671654965, |
| "learning_rate": 7.961873762921151e-07, |
| "loss": 0.9070523262023926, |
| "num_input_tokens_seen": 950332011, |
| "step": 4740, |
| "token_acc": 0.7553185494918014 |
| }, |
| { |
| "epoch": 1.1147698734949656, |
| "grad_norm": 2.301542689211403, |
| "learning_rate": 7.920883091822408e-07, |
| "loss": 0.90597505569458, |
| "num_input_tokens_seen": 952319049, |
| "step": 4750, |
| "token_acc": 0.7548275049458286 |
| }, |
| { |
| "epoch": 1.1171169056727768, |
| "grad_norm": 1.7473797104994677, |
| "learning_rate": 7.879928900779455e-07, |
| "loss": 0.9030384063720703, |
| "num_input_tokens_seen": 954299892, |
| "step": 4760, |
| "token_acc": 0.756532667257456 |
| }, |
| { |
| "epoch": 1.119463937850588, |
| "grad_norm": 2.558847573037429, |
| "learning_rate": 7.839011908371979e-07, |
| "loss": 0.9100503921508789, |
| "num_input_tokens_seen": 956318847, |
| "step": 4770, |
| "token_acc": 0.7527636165796845 |
| }, |
| { |
| "epoch": 1.1218109700283991, |
| "grad_norm": 1.9894868553546619, |
| "learning_rate": 7.798132832526985e-07, |
| "loss": 0.8903913497924805, |
| "num_input_tokens_seen": 958308174, |
| "step": 4780, |
| "token_acc": 0.7594328320061341 |
| }, |
| { |
| "epoch": 1.1241580022062103, |
| "grad_norm": 1.9090250979917347, |
| "learning_rate": 7.757292390506189e-07, |
| "loss": 0.9077445983886718, |
| "num_input_tokens_seen": 960311976, |
| "step": 4790, |
| "token_acc": 0.7563037639640341 |
| }, |
| { |
| "epoch": 1.1265050343840215, |
| "grad_norm": 1.5195604142033567, |
| "learning_rate": 7.716491298893441e-07, |
| "loss": 0.9030027389526367, |
| "num_input_tokens_seen": 962312673, |
| "step": 4800, |
| "token_acc": 0.7546611261686987 |
| }, |
| { |
| "epoch": 1.1265050343840215, |
| "eval_loss": 0.9690244197845459, |
| "eval_runtime": 32.6363, |
| "eval_samples_per_second": 30.641, |
| "eval_steps_per_second": 1.287, |
| "eval_token_acc": 0.7421065995706471, |
| "num_input_tokens_seen": 962312673, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.1288520665618327, |
| "grad_norm": 7.06114138514342, |
| "learning_rate": 7.675730273582159e-07, |
| "loss": 0.9238859176635742, |
| "num_input_tokens_seen": 964266690, |
| "step": 4810, |
| "token_acc": 0.7510988303005139 |
| }, |
| { |
| "epoch": 1.1311990987396436, |
| "grad_norm": 1.9887377640273287, |
| "learning_rate": 7.635010029762755e-07, |
| "loss": 0.893895149230957, |
| "num_input_tokens_seen": 966243534, |
| "step": 4820, |
| "token_acc": 0.7578514127725531 |
| }, |
| { |
| "epoch": 1.1335461309174548, |
| "grad_norm": 2.8072244352137545, |
| "learning_rate": 7.594331281910081e-07, |
| "loss": 0.8709514617919922, |
| "num_input_tokens_seen": 968205627, |
| "step": 4830, |
| "token_acc": 0.7630826790971541 |
| }, |
| { |
| "epoch": 1.135893163095266, |
| "grad_norm": 1.5697632247100872, |
| "learning_rate": 7.553694743770927e-07, |
| "loss": 0.8988607406616211, |
| "num_input_tokens_seen": 970177137, |
| "step": 4840, |
| "token_acc": 0.7561233380663482 |
| }, |
| { |
| "epoch": 1.1382401952730772, |
| "grad_norm": 2.446099829583827, |
| "learning_rate": 7.513101128351453e-07, |
| "loss": 0.9138158798217774, |
| "num_input_tokens_seen": 972139821, |
| "step": 4850, |
| "token_acc": 0.7539762326169406 |
| }, |
| { |
| "epoch": 1.1405872274508884, |
| "grad_norm": 2.2189495017577103, |
| "learning_rate": 7.472551147904707e-07, |
| "loss": 0.9274373054504395, |
| "num_input_tokens_seen": 974155848, |
| "step": 4860, |
| "token_acc": 0.750778398745103 |
| }, |
| { |
| "epoch": 1.1429342596286995, |
| "grad_norm": 1.4873269538334397, |
| "learning_rate": 7.432045513918122e-07, |
| "loss": 0.8865886688232422, |
| "num_input_tokens_seen": 976121469, |
| "step": 4870, |
| "token_acc": 0.7581827865316892 |
| }, |
| { |
| "epoch": 1.1452812918065107, |
| "grad_norm": 1.7178629684971727, |
| "learning_rate": 7.391584937101033e-07, |
| "loss": 0.9193226814270019, |
| "num_input_tokens_seen": 978125502, |
| "step": 4880, |
| "token_acc": 0.7524842758549445 |
| }, |
| { |
| "epoch": 1.147628323984322, |
| "grad_norm": 1.7659656442538727, |
| "learning_rate": 7.351170127372191e-07, |
| "loss": 0.8870782852172852, |
| "num_input_tokens_seen": 980151348, |
| "step": 4890, |
| "token_acc": 0.7591273127875505 |
| }, |
| { |
| "epoch": 1.149975356162133, |
| "grad_norm": 2.512190986249406, |
| "learning_rate": 7.310801793847343e-07, |
| "loss": 0.9009071350097656, |
| "num_input_tokens_seen": 982116819, |
| "step": 4900, |
| "token_acc": 0.7555736532655332 |
| }, |
| { |
| "epoch": 1.149975356162133, |
| "eval_loss": 0.967960000038147, |
| "eval_runtime": 33.0751, |
| "eval_samples_per_second": 30.234, |
| "eval_steps_per_second": 1.27, |
| "eval_token_acc": 0.7421296830636412, |
| "num_input_tokens_seen": 982116819, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.152322388339944, |
| "grad_norm": 1.6304789397632498, |
| "learning_rate": 7.270480644826749e-07, |
| "loss": 0.9345785140991211, |
| "num_input_tokens_seen": 984113586, |
| "step": 4910, |
| "token_acc": 0.7481138414862325 |
| }, |
| { |
| "epoch": 1.1546694205177552, |
| "grad_norm": 1.6773663128682315, |
| "learning_rate": 7.230207387782776e-07, |
| "loss": 0.9058225631713868, |
| "num_input_tokens_seen": 986134590, |
| "step": 4920, |
| "token_acc": 0.7572044267504292 |
| }, |
| { |
| "epoch": 1.1570164526955664, |
| "grad_norm": 2.467806440031501, |
| "learning_rate": 7.18998272934749e-07, |
| "loss": 0.905792236328125, |
| "num_input_tokens_seen": 988128006, |
| "step": 4930, |
| "token_acc": 0.7552128911554362 |
| }, |
| { |
| "epoch": 1.1593634848733776, |
| "grad_norm": 2.3456217936104613, |
| "learning_rate": 7.149807375300238e-07, |
| "loss": 0.8924792289733887, |
| "num_input_tokens_seen": 990097689, |
| "step": 4940, |
| "token_acc": 0.7572508060847032 |
| }, |
| { |
| "epoch": 1.1617105170511888, |
| "grad_norm": 2.059131762842591, |
| "learning_rate": 7.109682030555282e-07, |
| "loss": 0.8982337951660156, |
| "num_input_tokens_seen": 992129379, |
| "step": 4950, |
| "token_acc": 0.7551930966690015 |
| }, |
| { |
| "epoch": 1.164057549229, |
| "grad_norm": 2.9573480896222772, |
| "learning_rate": 7.069607399149426e-07, |
| "loss": 0.8968988418579101, |
| "num_input_tokens_seen": 994140366, |
| "step": 4960, |
| "token_acc": 0.7568408887934138 |
| }, |
| { |
| "epoch": 1.1664045814068111, |
| "grad_norm": 1.7230957488152536, |
| "learning_rate": 7.029584184229652e-07, |
| "loss": 0.909503173828125, |
| "num_input_tokens_seen": 996159930, |
| "step": 4970, |
| "token_acc": 0.7549473717210192 |
| }, |
| { |
| "epoch": 1.1687516135846223, |
| "grad_norm": 1.7012209659002009, |
| "learning_rate": 6.989613088040795e-07, |
| "loss": 0.8788484573364258, |
| "num_input_tokens_seen": 998200734, |
| "step": 4980, |
| "token_acc": 0.7586579539038453 |
| }, |
| { |
| "epoch": 1.1710986457624335, |
| "grad_norm": 1.592891016055058, |
| "learning_rate": 6.949694811913225e-07, |
| "loss": 0.9113107681274414, |
| "num_input_tokens_seen": 1000131159, |
| "step": 4990, |
| "token_acc": 0.7557149987259054 |
| }, |
| { |
| "epoch": 1.1734456779402445, |
| "grad_norm": 5.935924197471871, |
| "learning_rate": 6.909830056250526e-07, |
| "loss": 0.900279426574707, |
| "num_input_tokens_seen": 1002152949, |
| "step": 5000, |
| "token_acc": 0.7555415584180373 |
| }, |
| { |
| "epoch": 1.1734456779402445, |
| "eval_loss": 0.9671830534934998, |
| "eval_runtime": 33.371, |
| "eval_samples_per_second": 29.966, |
| "eval_steps_per_second": 1.259, |
| "eval_token_acc": 0.7425636527319314, |
| "num_input_tokens_seen": 1002152949, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.1757927101180556, |
| "grad_norm": 2.200894548140831, |
| "learning_rate": 6.870019520517217e-07, |
| "loss": 0.8960202217102051, |
| "num_input_tokens_seen": 1004157984, |
| "step": 5010, |
| "token_acc": 0.7569971090628078 |
| }, |
| { |
| "epoch": 1.1781397422958668, |
| "grad_norm": 1.566572723585561, |
| "learning_rate": 6.830263903226482e-07, |
| "loss": 0.9069774627685547, |
| "num_input_tokens_seen": 1006144677, |
| "step": 5020, |
| "token_acc": 0.7552951138157661 |
| }, |
| { |
| "epoch": 1.180486774473678, |
| "grad_norm": 2.012794050429991, |
| "learning_rate": 6.790563901927906e-07, |
| "loss": 0.903378677368164, |
| "num_input_tokens_seen": 1008183480, |
| "step": 5030, |
| "token_acc": 0.7542571237096386 |
| }, |
| { |
| "epoch": 1.1828338066514892, |
| "grad_norm": 2.6190444654182663, |
| "learning_rate": 6.750920213195237e-07, |
| "loss": 0.9192432403564453, |
| "num_input_tokens_seen": 1010200815, |
| "step": 5040, |
| "token_acc": 0.752674829722257 |
| }, |
| { |
| "epoch": 1.1851808388293004, |
| "grad_norm": 1.876294751139499, |
| "learning_rate": 6.711333532614167e-07, |
| "loss": 0.8876149177551269, |
| "num_input_tokens_seen": 1012244334, |
| "step": 5050, |
| "token_acc": 0.7581334816982072 |
| }, |
| { |
| "epoch": 1.1875278710071115, |
| "grad_norm": 3.023714292115771, |
| "learning_rate": 6.671804554770134e-07, |
| "loss": 0.9129764556884765, |
| "num_input_tokens_seen": 1014307173, |
| "step": 5060, |
| "token_acc": 0.7553209579424762 |
| }, |
| { |
| "epoch": 1.1898749031849227, |
| "grad_norm": 1.9132860678026469, |
| "learning_rate": 6.63233397323612e-07, |
| "loss": 0.9299371719360352, |
| "num_input_tokens_seen": 1016348544, |
| "step": 5070, |
| "token_acc": 0.7510845945047212 |
| }, |
| { |
| "epoch": 1.192221935362734, |
| "grad_norm": 1.7646493320200434, |
| "learning_rate": 6.592922480560483e-07, |
| "loss": 0.8976167678833008, |
| "num_input_tokens_seen": 1018332171, |
| "step": 5080, |
| "token_acc": 0.7562631418499035 |
| }, |
| { |
| "epoch": 1.1945689675405449, |
| "grad_norm": 1.6538220495495426, |
| "learning_rate": 6.55357076825483e-07, |
| "loss": 0.9083082199096679, |
| "num_input_tokens_seen": 1020317589, |
| "step": 5090, |
| "token_acc": 0.7535232253620915 |
| }, |
| { |
| "epoch": 1.196915999718356, |
| "grad_norm": 1.8373787795166967, |
| "learning_rate": 6.51427952678185e-07, |
| "loss": 0.897801399230957, |
| "num_input_tokens_seen": 1022291424, |
| "step": 5100, |
| "token_acc": 0.7568812436238018 |
| }, |
| { |
| "epoch": 1.196915999718356, |
| "eval_loss": 0.965643048286438, |
| "eval_runtime": 32.457, |
| "eval_samples_per_second": 30.81, |
| "eval_steps_per_second": 1.294, |
| "eval_token_acc": 0.7426790701969022, |
| "num_input_tokens_seen": 1022291424, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.1992630318961672, |
| "grad_norm": 1.6643922341831798, |
| "learning_rate": 6.475049445543214e-07, |
| "loss": 0.8832623481750488, |
| "num_input_tokens_seen": 1024326642, |
| "step": 5110, |
| "token_acc": 0.7609418407772097 |
| }, |
| { |
| "epoch": 1.2016100640739784, |
| "grad_norm": 2.8760528519429527, |
| "learning_rate": 6.435881212867493e-07, |
| "loss": 0.8896665573120117, |
| "num_input_tokens_seen": 1026358665, |
| "step": 5120, |
| "token_acc": 0.7582770940849544 |
| }, |
| { |
| "epoch": 1.2039570962517896, |
| "grad_norm": 2.002315720555266, |
| "learning_rate": 6.396775515998054e-07, |
| "loss": 0.9143696784973144, |
| "num_input_tokens_seen": 1028363571, |
| "step": 5130, |
| "token_acc": 0.7524985799614379 |
| }, |
| { |
| "epoch": 1.2063041284296008, |
| "grad_norm": 2.371576045666034, |
| "learning_rate": 6.357733041081017e-07, |
| "loss": 0.9304786682128906, |
| "num_input_tokens_seen": 1030342941, |
| "step": 5140, |
| "token_acc": 0.7486818472638695 |
| }, |
| { |
| "epoch": 1.208651160607412, |
| "grad_norm": 2.346943055260075, |
| "learning_rate": 6.31875447315322e-07, |
| "loss": 0.9241456031799317, |
| "num_input_tokens_seen": 1032378225, |
| "step": 5150, |
| "token_acc": 0.7510933676127989 |
| }, |
| { |
| "epoch": 1.2109981927852231, |
| "grad_norm": 2.231488980986392, |
| "learning_rate": 6.279840496130188e-07, |
| "loss": 0.9039559364318848, |
| "num_input_tokens_seen": 1034346864, |
| "step": 5160, |
| "token_acc": 0.7524411349410404 |
| }, |
| { |
| "epoch": 1.2133452249630343, |
| "grad_norm": 1.9646213179831136, |
| "learning_rate": 6.240991792794133e-07, |
| "loss": 0.9074276924133301, |
| "num_input_tokens_seen": 1036368729, |
| "step": 5170, |
| "token_acc": 0.7546195549754318 |
| }, |
| { |
| "epoch": 1.2156922571408453, |
| "grad_norm": 1.722457316805155, |
| "learning_rate": 6.202209044781989e-07, |
| "loss": 0.8936328887939453, |
| "num_input_tokens_seen": 1038356424, |
| "step": 5180, |
| "token_acc": 0.7567584358948151 |
| }, |
| { |
| "epoch": 1.2180392893186567, |
| "grad_norm": 3.480235780891435, |
| "learning_rate": 6.163492932573438e-07, |
| "loss": 0.8924088478088379, |
| "num_input_tokens_seen": 1040404614, |
| "step": 5190, |
| "token_acc": 0.759963029202667 |
| }, |
| { |
| "epoch": 1.2203863214964676, |
| "grad_norm": 3.7980987371120305, |
| "learning_rate": 6.124844135478971e-07, |
| "loss": 0.9037540435791016, |
| "num_input_tokens_seen": 1042409814, |
| "step": 5200, |
| "token_acc": 0.7544269749931005 |
| }, |
| { |
| "epoch": 1.2203863214964676, |
| "eval_loss": 0.9651933908462524, |
| "eval_runtime": 32.4721, |
| "eval_samples_per_second": 30.796, |
| "eval_steps_per_second": 1.293, |
| "eval_token_acc": 0.7432146072343667, |
| "num_input_tokens_seen": 1042409814, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.2227333536742788, |
| "grad_norm": 2.0301844609252324, |
| "learning_rate": 6.086263331627975e-07, |
| "loss": 0.8960711479187011, |
| "num_input_tokens_seen": 1044474747, |
| "step": 5210, |
| "token_acc": 0.7566766133085695 |
| }, |
| { |
| "epoch": 1.22508038585209, |
| "grad_norm": 2.006861134477907, |
| "learning_rate": 6.047751197956838e-07, |
| "loss": 0.8874652862548829, |
| "num_input_tokens_seen": 1046542701, |
| "step": 5220, |
| "token_acc": 0.7577207817130738 |
| }, |
| { |
| "epoch": 1.2274274180299012, |
| "grad_norm": 1.64084154179337, |
| "learning_rate": 6.009308410197047e-07, |
| "loss": 0.9375964164733886, |
| "num_input_tokens_seen": 1048531923, |
| "step": 5230, |
| "token_acc": 0.7477447658832623 |
| }, |
| { |
| "epoch": 1.2297744502077124, |
| "grad_norm": 2.376806108677906, |
| "learning_rate": 5.970935642863374e-07, |
| "loss": 0.9305553436279297, |
| "num_input_tokens_seen": 1050497172, |
| "step": 5240, |
| "token_acc": 0.7477491309741687 |
| }, |
| { |
| "epoch": 1.2321214823855235, |
| "grad_norm": 2.0017133938943603, |
| "learning_rate": 5.932633569241999e-07, |
| "loss": 0.9117889404296875, |
| "num_input_tokens_seen": 1052489067, |
| "step": 5250, |
| "token_acc": 0.7528568241041047 |
| }, |
| { |
| "epoch": 1.2344685145633347, |
| "grad_norm": 1.676786348660199, |
| "learning_rate": 5.89440286137872e-07, |
| "loss": 0.9003104209899903, |
| "num_input_tokens_seen": 1054479834, |
| "step": 5260, |
| "token_acc": 0.7555148409000024 |
| }, |
| { |
| "epoch": 1.236815546741146, |
| "grad_norm": 3.0440164850905087, |
| "learning_rate": 5.856244190067159e-07, |
| "loss": 0.9047473907470703, |
| "num_input_tokens_seen": 1056426330, |
| "step": 5270, |
| "token_acc": 0.755049574664931 |
| }, |
| { |
| "epoch": 1.239162578918957, |
| "grad_norm": 2.869133561984615, |
| "learning_rate": 5.818158224836987e-07, |
| "loss": 0.9154601097106934, |
| "num_input_tokens_seen": 1058453490, |
| "step": 5280, |
| "token_acc": 0.7520037800567009 |
| }, |
| { |
| "epoch": 1.241509611096768, |
| "grad_norm": 3.801710317044165, |
| "learning_rate": 5.780145633942173e-07, |
| "loss": 0.9164340972900391, |
| "num_input_tokens_seen": 1060486977, |
| "step": 5290, |
| "token_acc": 0.752695566601707 |
| }, |
| { |
| "epoch": 1.2438566432745792, |
| "grad_norm": 2.741349679065458, |
| "learning_rate": 5.742207084349273e-07, |
| "loss": 0.871244239807129, |
| "num_input_tokens_seen": 1062507609, |
| "step": 5300, |
| "token_acc": 0.7623417495900512 |
| }, |
| { |
| "epoch": 1.2438566432745792, |
| "eval_loss": 0.9639586210250854, |
| "eval_runtime": 32.2213, |
| "eval_samples_per_second": 31.035, |
| "eval_steps_per_second": 1.303, |
| "eval_token_acc": 0.7430853396735994, |
| "num_input_tokens_seen": 1062507609, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.2462036754523904, |
| "grad_norm": 2.0632482933314273, |
| "learning_rate": 5.704343241725719e-07, |
| "loss": 0.902606201171875, |
| "num_input_tokens_seen": 1064565387, |
| "step": 5310, |
| "token_acc": 0.7573666940890565 |
| }, |
| { |
| "epoch": 1.2485507076302016, |
| "grad_norm": 2.5206259888398805, |
| "learning_rate": 5.666554770428128e-07, |
| "loss": 0.8999618530273438, |
| "num_input_tokens_seen": 1066547697, |
| "step": 5320, |
| "token_acc": 0.7568080644124454 |
| }, |
| { |
| "epoch": 1.2508977398080128, |
| "grad_norm": 2.5949843975238664, |
| "learning_rate": 5.628842333490673e-07, |
| "loss": 0.9164423942565918, |
| "num_input_tokens_seen": 1068581145, |
| "step": 5330, |
| "token_acc": 0.7550268878909339 |
| }, |
| { |
| "epoch": 1.253244771985824, |
| "grad_norm": 4.036566885568054, |
| "learning_rate": 5.591206592613416e-07, |
| "loss": 0.905246353149414, |
| "num_input_tokens_seen": 1070601372, |
| "step": 5340, |
| "token_acc": 0.7552413610147676 |
| }, |
| { |
| "epoch": 1.2555918041636351, |
| "grad_norm": 5.474286064221549, |
| "learning_rate": 5.553648208150728e-07, |
| "loss": 0.8880559921264648, |
| "num_input_tokens_seen": 1072560906, |
| "step": 5350, |
| "token_acc": 0.7592060617200068 |
| }, |
| { |
| "epoch": 1.2579388363414463, |
| "grad_norm": 1.7453040114014564, |
| "learning_rate": 5.51616783909968e-07, |
| "loss": 0.9003293991088868, |
| "num_input_tokens_seen": 1074501144, |
| "step": 5360, |
| "token_acc": 0.7574462673279918 |
| }, |
| { |
| "epoch": 1.2602858685192575, |
| "grad_norm": 2.437893460638386, |
| "learning_rate": 5.478766143088491e-07, |
| "loss": 0.8865642547607422, |
| "num_input_tokens_seen": 1076535018, |
| "step": 5370, |
| "token_acc": 0.7606810169616077 |
| }, |
| { |
| "epoch": 1.2626329006970685, |
| "grad_norm": 1.8609894370837823, |
| "learning_rate": 5.441443776365002e-07, |
| "loss": 0.8910144805908203, |
| "num_input_tokens_seen": 1078579935, |
| "step": 5380, |
| "token_acc": 0.7576510815314375 |
| }, |
| { |
| "epoch": 1.2649799328748796, |
| "grad_norm": 2.6436944735193184, |
| "learning_rate": 5.404201393785122e-07, |
| "loss": 0.8772344589233398, |
| "num_input_tokens_seen": 1080564321, |
| "step": 5390, |
| "token_acc": 0.7608925444457297 |
| }, |
| { |
| "epoch": 1.2673269650526908, |
| "grad_norm": 2.992758801643484, |
| "learning_rate": 5.367039648801385e-07, |
| "loss": 0.9159189224243164, |
| "num_input_tokens_seen": 1082533953, |
| "step": 5400, |
| "token_acc": 0.7533061633594679 |
| }, |
| { |
| "epoch": 1.2673269650526908, |
| "eval_loss": 0.9630009531974792, |
| "eval_runtime": 32.5066, |
| "eval_samples_per_second": 30.763, |
| "eval_steps_per_second": 1.292, |
| "eval_token_acc": 0.7429468387156345, |
| "num_input_tokens_seen": 1082533953, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.269673997230502, |
| "grad_norm": 2.692503141737527, |
| "learning_rate": 5.329959193451448e-07, |
| "loss": 0.8941567420959473, |
| "num_input_tokens_seen": 1084574751, |
| "step": 5410, |
| "token_acc": 0.7571006112607204 |
| }, |
| { |
| "epoch": 1.2720210294083132, |
| "grad_norm": 1.5669484406824739, |
| "learning_rate": 5.292960678346674e-07, |
| "loss": 0.8758008003234863, |
| "num_input_tokens_seen": 1086604491, |
| "step": 5420, |
| "token_acc": 0.7609121373438931 |
| }, |
| { |
| "epoch": 1.2743680615861244, |
| "grad_norm": 2.7196923900884538, |
| "learning_rate": 5.256044752660709e-07, |
| "loss": 0.8903736114501953, |
| "num_input_tokens_seen": 1088619414, |
| "step": 5430, |
| "token_acc": 0.7592087326109695 |
| }, |
| { |
| "epoch": 1.2767150937639355, |
| "grad_norm": 3.3252231876281044, |
| "learning_rate": 5.219212064118078e-07, |
| "loss": 0.8977795600891113, |
| "num_input_tokens_seen": 1090588407, |
| "step": 5440, |
| "token_acc": 0.7549231473500579 |
| }, |
| { |
| "epoch": 1.2790621259417467, |
| "grad_norm": 2.7540341423324115, |
| "learning_rate": 5.182463258982846e-07, |
| "loss": 0.9006612777709961, |
| "num_input_tokens_seen": 1092638625, |
| "step": 5450, |
| "token_acc": 0.7552658524098589 |
| }, |
| { |
| "epoch": 1.281409158119558, |
| "grad_norm": 3.5072503422513153, |
| "learning_rate": 5.14579898204726e-07, |
| "loss": 0.907337760925293, |
| "num_input_tokens_seen": 1094630577, |
| "step": 5460, |
| "token_acc": 0.7542059011906609 |
| }, |
| { |
| "epoch": 1.2837561902973689, |
| "grad_norm": 5.240311266290964, |
| "learning_rate": 5.109219876620441e-07, |
| "loss": 0.8758956909179687, |
| "num_input_tokens_seen": 1096660965, |
| "step": 5470, |
| "token_acc": 0.7625046517718082 |
| }, |
| { |
| "epoch": 1.28610322247518, |
| "grad_norm": 3.910915359433382, |
| "learning_rate": 5.072726584517085e-07, |
| "loss": 0.8722602844238281, |
| "num_input_tokens_seen": 1098640854, |
| "step": 5480, |
| "token_acc": 0.7603257317050821 |
| }, |
| { |
| "epoch": 1.2884502546529912, |
| "grad_norm": 1.6770275314193752, |
| "learning_rate": 5.036319746046231e-07, |
| "loss": 0.8983705520629883, |
| "num_input_tokens_seen": 1100637150, |
| "step": 5490, |
| "token_acc": 0.7550046700338503 |
| }, |
| { |
| "epoch": 1.2907972868308024, |
| "grad_norm": 2.5881524894297745, |
| "learning_rate": 5.000000000000002e-07, |
| "loss": 0.894923210144043, |
| "num_input_tokens_seen": 1102652097, |
| "step": 5500, |
| "token_acc": 0.7564139373070671 |
| }, |
| { |
| "epoch": 1.2907972868308024, |
| "eval_loss": 0.9622647762298584, |
| "eval_runtime": 32.3358, |
| "eval_samples_per_second": 30.925, |
| "eval_steps_per_second": 1.299, |
| "eval_token_acc": 0.7433207913021398, |
| "num_input_tokens_seen": 1102652097, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.2931443190086136, |
| "grad_norm": 2.4458432348948125, |
| "learning_rate": 4.963767983642391e-07, |
| "loss": 0.9156219482421875, |
| "num_input_tokens_seen": 1104675948, |
| "step": 5510, |
| "token_acc": 0.7537255650881494 |
| }, |
| { |
| "epoch": 1.2954913511864248, |
| "grad_norm": 1.6909965435703207, |
| "learning_rate": 4.927624332698109e-07, |
| "loss": 0.8871401786804199, |
| "num_input_tokens_seen": 1106680473, |
| "step": 5520, |
| "token_acc": 0.7581608722152928 |
| }, |
| { |
| "epoch": 1.297838383364236, |
| "grad_norm": 2.752441639954046, |
| "learning_rate": 4.891569681341402e-07, |
| "loss": 0.8774595260620117, |
| "num_input_tokens_seen": 1108675587, |
| "step": 5530, |
| "token_acc": 0.7608597953994441 |
| }, |
| { |
| "epoch": 1.3001854155420471, |
| "grad_norm": 4.312103259940411, |
| "learning_rate": 4.855604662184934e-07, |
| "loss": 0.94571533203125, |
| "num_input_tokens_seen": 1110676452, |
| "step": 5540, |
| "token_acc": 0.7557507607034466 |
| }, |
| { |
| "epoch": 1.3025324477198583, |
| "grad_norm": 15.655690028463368, |
| "learning_rate": 4.819729906268699e-07, |
| "loss": 0.906065559387207, |
| "num_input_tokens_seen": 1112710338, |
| "step": 5550, |
| "token_acc": 0.7553128935752625 |
| }, |
| { |
| "epoch": 1.3048794798976693, |
| "grad_norm": 4.05493729865088, |
| "learning_rate": 4.783946043048922e-07, |
| "loss": 0.8648593902587891, |
| "num_input_tokens_seen": 1114786149, |
| "step": 5560, |
| "token_acc": 0.763232807351506 |
| }, |
| { |
| "epoch": 1.3072265120754807, |
| "grad_norm": 18.132742646186717, |
| "learning_rate": 4.748253700387042e-07, |
| "loss": 0.9057920455932618, |
| "num_input_tokens_seen": 1116792414, |
| "step": 5570, |
| "token_acc": 0.7558468058389578 |
| }, |
| { |
| "epoch": 1.3095735442532916, |
| "grad_norm": 4.473293823942013, |
| "learning_rate": 4.712653504538683e-07, |
| "loss": 0.9168581008911133, |
| "num_input_tokens_seen": 1118755668, |
| "step": 5580, |
| "token_acc": 0.7533578569509507 |
| }, |
| { |
| "epoch": 1.3119205764311028, |
| "grad_norm": 1.8718331058830788, |
| "learning_rate": 4.677146080142663e-07, |
| "loss": 0.8930509567260743, |
| "num_input_tokens_seen": 1120786350, |
| "step": 5590, |
| "token_acc": 0.7578146339884224 |
| }, |
| { |
| "epoch": 1.314267608608914, |
| "grad_norm": 3.8006137217544853, |
| "learning_rate": 4.641732050210031e-07, |
| "loss": 0.8965305328369141, |
| "num_input_tokens_seen": 1122830253, |
| "step": 5600, |
| "token_acc": 0.757193734996655 |
| }, |
| { |
| "epoch": 1.314267608608914, |
| "eval_loss": 0.9616973996162415, |
| "eval_runtime": 32.7101, |
| "eval_samples_per_second": 30.572, |
| "eval_steps_per_second": 1.284, |
| "eval_token_acc": 0.7441794972415225, |
| "num_input_tokens_seen": 1122830253, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.3166146407867252, |
| "grad_norm": 2.1310327327750103, |
| "learning_rate": 4.6064120361131654e-07, |
| "loss": 0.8685415267944336, |
| "num_input_tokens_seen": 1124770431, |
| "step": 5610, |
| "token_acc": 0.7614362220849722 |
| }, |
| { |
| "epoch": 1.3189616729645364, |
| "grad_norm": 5.594205953222117, |
| "learning_rate": 4.571186657574827e-07, |
| "loss": 0.8749109268188476, |
| "num_input_tokens_seen": 1126803909, |
| "step": 5620, |
| "token_acc": 0.7609511594419571 |
| }, |
| { |
| "epoch": 1.3213087051423476, |
| "grad_norm": 1.4907604209575505, |
| "learning_rate": 4.5360565326573097e-07, |
| "loss": 0.8923271179199219, |
| "num_input_tokens_seen": 1128846693, |
| "step": 5630, |
| "token_acc": 0.7566236892264636 |
| }, |
| { |
| "epoch": 1.3236557373201587, |
| "grad_norm": 1.6487071255049761, |
| "learning_rate": 4.5010222777516016e-07, |
| "loss": 0.8908859252929687, |
| "num_input_tokens_seen": 1130851539, |
| "step": 5640, |
| "token_acc": 0.7570941516923059 |
| }, |
| { |
| "epoch": 1.3260027694979697, |
| "grad_norm": 3.588061851379213, |
| "learning_rate": 4.46608450756656e-07, |
| "loss": 0.8966587066650391, |
| "num_input_tokens_seen": 1132815081, |
| "step": 5650, |
| "token_acc": 0.7556865728413845 |
| }, |
| { |
| "epoch": 1.328349801675781, |
| "grad_norm": 1.8146830930493871, |
| "learning_rate": 4.431243835118124e-07, |
| "loss": 0.8989040374755859, |
| "num_input_tokens_seen": 1134802443, |
| "step": 5660, |
| "token_acc": 0.7558611844953211 |
| }, |
| { |
| "epoch": 1.330696833853592, |
| "grad_norm": 1.549860770891768, |
| "learning_rate": 4.3965008717185546e-07, |
| "loss": 0.9029041290283203, |
| "num_input_tokens_seen": 1136825982, |
| "step": 5670, |
| "token_acc": 0.7547953414140695 |
| }, |
| { |
| "epoch": 1.3330438660314032, |
| "grad_norm": 8.564808279417944, |
| "learning_rate": 4.361856226965732e-07, |
| "loss": 0.9094319343566895, |
| "num_input_tokens_seen": 1138844418, |
| "step": 5680, |
| "token_acc": 0.7534089471178856 |
| }, |
| { |
| "epoch": 1.3353908982092144, |
| "grad_norm": 2.4122342846590117, |
| "learning_rate": 4.327310508732437e-07, |
| "loss": 0.9330079078674316, |
| "num_input_tokens_seen": 1140865437, |
| "step": 5690, |
| "token_acc": 0.7480073371962428 |
| }, |
| { |
| "epoch": 1.3377379303870256, |
| "grad_norm": 2.222842201988777, |
| "learning_rate": 4.292864323155684e-07, |
| "loss": 0.9154201507568359, |
| "num_input_tokens_seen": 1142840739, |
| "step": 5700, |
| "token_acc": 0.7531476710355994 |
| }, |
| { |
| "epoch": 1.3377379303870256, |
| "eval_loss": 0.9612286686897278, |
| "eval_runtime": 32.3029, |
| "eval_samples_per_second": 30.957, |
| "eval_steps_per_second": 1.3, |
| "eval_token_acc": 0.7439994459961682, |
| "num_input_tokens_seen": 1142840739, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.3400849625648368, |
| "grad_norm": 1.810955978874136, |
| "learning_rate": 4.258518274626103e-07, |
| "loss": 0.8730932235717773, |
| "num_input_tokens_seen": 1144886610, |
| "step": 5710, |
| "token_acc": 0.763370671624448 |
| }, |
| { |
| "epoch": 1.342431994742648, |
| "grad_norm": 4.997305168302919, |
| "learning_rate": 4.224272965777326e-07, |
| "loss": 0.8956947326660156, |
| "num_input_tokens_seen": 1146863130, |
| "step": 5720, |
| "token_acc": 0.756512774681123 |
| }, |
| { |
| "epoch": 1.3447790269204591, |
| "grad_norm": 1.7714657210450584, |
| "learning_rate": 4.1901289974754017e-07, |
| "loss": 0.9034318923950195, |
| "num_input_tokens_seen": 1148825958, |
| "step": 5730, |
| "token_acc": 0.7528903974023187 |
| }, |
| { |
| "epoch": 1.34712605909827, |
| "grad_norm": 1.7970727143535203, |
| "learning_rate": 4.15608696880828e-07, |
| "loss": 0.9018034934997559, |
| "num_input_tokens_seen": 1150869660, |
| "step": 5740, |
| "token_acc": 0.7552370910083663 |
| }, |
| { |
| "epoch": 1.3494730912760815, |
| "grad_norm": 2.3962942580845765, |
| "learning_rate": 4.1221474770752696e-07, |
| "loss": 0.8888204574584961, |
| "num_input_tokens_seen": 1152904527, |
| "step": 5750, |
| "token_acc": 0.7579487303127656 |
| }, |
| { |
| "epoch": 1.3518201234538925, |
| "grad_norm": 4.2459307299089355, |
| "learning_rate": 4.0883111177765793e-07, |
| "loss": 0.882927131652832, |
| "num_input_tokens_seen": 1154856621, |
| "step": 5760, |
| "token_acc": 0.760532270444878 |
| }, |
| { |
| "epoch": 1.3541671556317036, |
| "grad_norm": 8.805122520612176, |
| "learning_rate": 4.05457848460287e-07, |
| "loss": 0.8931197166442871, |
| "num_input_tokens_seen": 1156841811, |
| "step": 5770, |
| "token_acc": 0.7581117296199616 |
| }, |
| { |
| "epoch": 1.3565141878095148, |
| "grad_norm": 1.8029745033128655, |
| "learning_rate": 4.020950169424815e-07, |
| "loss": 0.8755680084228515, |
| "num_input_tokens_seen": 1158825375, |
| "step": 5780, |
| "token_acc": 0.7617876391236407 |
| }, |
| { |
| "epoch": 1.358861219987326, |
| "grad_norm": 2.3214932218170348, |
| "learning_rate": 3.9874267622827326e-07, |
| "loss": 0.8934176445007325, |
| "num_input_tokens_seen": 1160840175, |
| "step": 5790, |
| "token_acc": 0.7589212683515132 |
| }, |
| { |
| "epoch": 1.3612082521651372, |
| "grad_norm": 2.3985162877965873, |
| "learning_rate": 3.9540088513762516e-07, |
| "loss": 0.8847217559814453, |
| "num_input_tokens_seen": 1162829856, |
| "step": 5800, |
| "token_acc": 0.7612809344881545 |
| }, |
| { |
| "epoch": 1.3612082521651372, |
| "eval_loss": 0.9602800607681274, |
| "eval_runtime": 32.5062, |
| "eval_samples_per_second": 30.763, |
| "eval_steps_per_second": 1.292, |
| "eval_token_acc": 0.7438470949424066, |
| "num_input_tokens_seen": 1162829856, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.3635552843429484, |
| "grad_norm": 2.0010713169478738, |
| "learning_rate": 3.9206970230539484e-07, |
| "loss": 0.8922606468200683, |
| "num_input_tokens_seen": 1164855291, |
| "step": 5810, |
| "token_acc": 0.7569838860463012 |
| }, |
| { |
| "epoch": 1.3659023165207596, |
| "grad_norm": 3.272109870466011, |
| "learning_rate": 3.887491861803085e-07, |
| "loss": 0.9000480651855469, |
| "num_input_tokens_seen": 1166861097, |
| "step": 5820, |
| "token_acc": 0.7566891172207229 |
| }, |
| { |
| "epoch": 1.3682493486985707, |
| "grad_norm": 1.544077325900252, |
| "learning_rate": 3.8543939502393553e-07, |
| "loss": 0.8689347267150879, |
| "num_input_tokens_seen": 1168887147, |
| "step": 5830, |
| "token_acc": 0.7627186945780682 |
| }, |
| { |
| "epoch": 1.370596380876382, |
| "grad_norm": 2.103440792541161, |
| "learning_rate": 3.8214038690966577e-07, |
| "loss": 0.8851211547851563, |
| "num_input_tokens_seen": 1170981615, |
| "step": 5840, |
| "token_acc": 0.7597759262487763 |
| }, |
| { |
| "epoch": 1.3729434130541929, |
| "grad_norm": 1.7677876308306728, |
| "learning_rate": 3.788522197216897e-07, |
| "loss": 0.9024602890014648, |
| "num_input_tokens_seen": 1172878617, |
| "step": 5850, |
| "token_acc": 0.7557560328803166 |
| }, |
| { |
| "epoch": 1.375290445232004, |
| "grad_norm": 1.9402726241839798, |
| "learning_rate": 3.7557495115398443e-07, |
| "loss": 0.9134780883789062, |
| "num_input_tokens_seen": 1174893015, |
| "step": 5860, |
| "token_acc": 0.753564070544764 |
| }, |
| { |
| "epoch": 1.3776374774098152, |
| "grad_norm": 1.72330146825218, |
| "learning_rate": 3.7230863870929963e-07, |
| "loss": 0.8972689628601074, |
| "num_input_tokens_seen": 1176936135, |
| "step": 5870, |
| "token_acc": 0.7560207487897523 |
| }, |
| { |
| "epoch": 1.3799845095876264, |
| "grad_norm": 1.8072698773269937, |
| "learning_rate": 3.690533396981503e-07, |
| "loss": 0.8984692573547364, |
| "num_input_tokens_seen": 1178895693, |
| "step": 5880, |
| "token_acc": 0.756615972827414 |
| }, |
| { |
| "epoch": 1.3823315417654376, |
| "grad_norm": 2.0171801198061714, |
| "learning_rate": 3.6580911123781056e-07, |
| "loss": 0.8955293655395508, |
| "num_input_tokens_seen": 1180888149, |
| "step": 5890, |
| "token_acc": 0.75720176277118 |
| }, |
| { |
| "epoch": 1.3846785739432488, |
| "grad_norm": 1.5526432676933917, |
| "learning_rate": 3.625760102513102e-07, |
| "loss": 0.8883472442626953, |
| "num_input_tokens_seen": 1182949920, |
| "step": 5900, |
| "token_acc": 0.7599701073124605 |
| }, |
| { |
| "epoch": 1.3846785739432488, |
| "eval_loss": 0.9595866799354553, |
| "eval_runtime": 32.1786, |
| "eval_samples_per_second": 31.077, |
| "eval_steps_per_second": 1.305, |
| "eval_token_acc": 0.7442902980078946, |
| "num_input_tokens_seen": 1182949920, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.38702560612106, |
| "grad_norm": 7.97384438350482, |
| "learning_rate": 3.593540934664383e-07, |
| "loss": 0.889987564086914, |
| "num_input_tokens_seen": 1184970120, |
| "step": 5910, |
| "token_acc": 0.758863473503418 |
| }, |
| { |
| "epoch": 1.3893726382988711, |
| "grad_norm": 1.537068043139113, |
| "learning_rate": 3.561434174147463e-07, |
| "loss": 0.911767578125, |
| "num_input_tokens_seen": 1186953870, |
| "step": 5920, |
| "token_acc": 0.7535046522800585 |
| }, |
| { |
| "epoch": 1.3917196704766823, |
| "grad_norm": 2.792916091074644, |
| "learning_rate": 3.5294403843055597e-07, |
| "loss": 0.8944547653198243, |
| "num_input_tokens_seen": 1188957102, |
| "step": 5930, |
| "token_acc": 0.7568861383047926 |
| }, |
| { |
| "epoch": 1.3940667026544933, |
| "grad_norm": 2.5956068773284557, |
| "learning_rate": 3.497560126499709e-07, |
| "loss": 0.8902932167053222, |
| "num_input_tokens_seen": 1190999568, |
| "step": 5940, |
| "token_acc": 0.7563681534101937 |
| }, |
| { |
| "epoch": 1.3964137348323047, |
| "grad_norm": 1.606752628432743, |
| "learning_rate": 3.465793960098945e-07, |
| "loss": 0.8962507247924805, |
| "num_input_tokens_seen": 1193049609, |
| "step": 5950, |
| "token_acc": 0.7568774963666619 |
| }, |
| { |
| "epoch": 1.3987607670101156, |
| "grad_norm": 3.201548177908894, |
| "learning_rate": 3.434142442470437e-07, |
| "loss": 0.8878293037414551, |
| "num_input_tokens_seen": 1195126131, |
| "step": 5960, |
| "token_acc": 0.7593972961018481 |
| }, |
| { |
| "epoch": 1.4011077991879268, |
| "grad_norm": 2.0402971482769034, |
| "learning_rate": 3.4026061289697396e-07, |
| "loss": 0.8985117912292481, |
| "num_input_tokens_seen": 1197179763, |
| "step": 5970, |
| "token_acc": 0.7568663489501413 |
| }, |
| { |
| "epoch": 1.403454831365738, |
| "grad_norm": 2.0743463496848085, |
| "learning_rate": 3.371185572931048e-07, |
| "loss": 0.9137758255004883, |
| "num_input_tokens_seen": 1199156916, |
| "step": 5980, |
| "token_acc": 0.7521096549123137 |
| }, |
| { |
| "epoch": 1.4058018635435492, |
| "grad_norm": 1.908781885304316, |
| "learning_rate": 3.3398813256574843e-07, |
| "loss": 0.8940442085266114, |
| "num_input_tokens_seen": 1201161525, |
| "step": 5990, |
| "token_acc": 0.7591090088367569 |
| }, |
| { |
| "epoch": 1.4081488957213604, |
| "grad_norm": 1.8574032864901908, |
| "learning_rate": 3.308693936411421e-07, |
| "loss": 0.8737678527832031, |
| "num_input_tokens_seen": 1203195084, |
| "step": 6000, |
| "token_acc": 0.7614718846052603 |
| }, |
| { |
| "epoch": 1.4081488957213604, |
| "eval_loss": 0.9593000411987305, |
| "eval_runtime": 32.4448, |
| "eval_samples_per_second": 30.822, |
| "eval_steps_per_second": 1.295, |
| "eval_token_acc": 0.7439671291059763, |
| "num_input_tokens_seen": 1203195084, |
| "step": 6000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 8000, |
| "num_input_tokens_seen": 1203195084, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3956677446926336e+16, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|