| { |
| "best_global_step": 96000, |
| "best_metric": 3.5370290279388428, |
| "best_model_checkpoint": "/scratch/cl5625/exceptions/models/swap_0.7_last_to_push_2128/checkpoint-40000", |
| "epoch": 33.809818097014926, |
| "eval_steps": 1000, |
| "global_step": 116000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.014575559701492538, |
| "grad_norm": 0.9652895927429199, |
| "learning_rate": 0.000294, |
| "loss": 8.4539, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.029151119402985076, |
| "grad_norm": 0.9208124279975891, |
| "learning_rate": 0.0005939999999999999, |
| "loss": 6.749, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04372667910447761, |
| "grad_norm": 0.525312602519989, |
| "learning_rate": 0.0005998285214348206, |
| "loss": 6.365, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05830223880597015, |
| "grad_norm": 0.47033366560935974, |
| "learning_rate": 0.0005996535433070866, |
| "loss": 6.1474, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07287779850746269, |
| "grad_norm": 0.5568996071815491, |
| "learning_rate": 0.0005994785651793525, |
| "loss": 6.0107, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08745335820895522, |
| "grad_norm": 0.46809056401252747, |
| "learning_rate": 0.0005993035870516185, |
| "loss": 5.8928, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10202891791044776, |
| "grad_norm": 0.4372883141040802, |
| "learning_rate": 0.0005991286089238845, |
| "loss": 5.774, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1166044776119403, |
| "grad_norm": 0.48103225231170654, |
| "learning_rate": 0.0005989536307961504, |
| "loss": 5.6236, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13118003731343283, |
| "grad_norm": 0.5330483913421631, |
| "learning_rate": 0.0005987786526684164, |
| "loss": 5.522, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.14575559701492538, |
| "grad_norm": 0.4759756922721863, |
| "learning_rate": 0.0005986036745406824, |
| "loss": 5.442, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1603311567164179, |
| "grad_norm": 0.41947564482688904, |
| "learning_rate": 0.0005984286964129484, |
| "loss": 5.3423, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.17490671641791045, |
| "grad_norm": 0.4382849335670471, |
| "learning_rate": 0.0005982537182852143, |
| "loss": 5.2748, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.189482276119403, |
| "grad_norm": 0.47097328305244446, |
| "learning_rate": 0.0005980787401574803, |
| "loss": 5.1983, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2040578358208955, |
| "grad_norm": 0.47024649381637573, |
| "learning_rate": 0.0005979037620297463, |
| "loss": 5.1564, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.21863339552238806, |
| "grad_norm": 0.4276546537876129, |
| "learning_rate": 0.0005977287839020123, |
| "loss": 5.0876, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2332089552238806, |
| "grad_norm": 0.41784146428108215, |
| "learning_rate": 0.0005975538057742782, |
| "loss": 5.0278, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.24778451492537312, |
| "grad_norm": 0.4272647500038147, |
| "learning_rate": 0.0005973788276465442, |
| "loss": 4.9741, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.26236007462686567, |
| "grad_norm": 0.496510773897171, |
| "learning_rate": 0.0005972038495188102, |
| "loss": 4.9425, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2769356343283582, |
| "grad_norm": 0.5926377773284912, |
| "learning_rate": 0.000597028871391076, |
| "loss": 4.8891, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.29151119402985076, |
| "grad_norm": 0.43382635712623596, |
| "learning_rate": 0.000596853893263342, |
| "loss": 4.8405, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.29151119402985076, |
| "eval_accuracy": 0.2538303000671932, |
| "eval_loss": 4.754518032073975, |
| "eval_runtime": 179.7864, |
| "eval_samples_per_second": 92.482, |
| "eval_steps_per_second": 5.785, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3060867537313433, |
| "grad_norm": 0.4467147886753082, |
| "learning_rate": 0.000596678915135608, |
| "loss": 4.7826, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3206623134328358, |
| "grad_norm": 0.4645467698574066, |
| "learning_rate": 0.0005965039370078739, |
| "loss": 4.7438, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.33523787313432835, |
| "grad_norm": 0.4239553213119507, |
| "learning_rate": 0.0005963289588801399, |
| "loss": 4.705, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.3498134328358209, |
| "grad_norm": 0.4928569495677948, |
| "learning_rate": 0.0005961539807524059, |
| "loss": 4.6655, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.36438899253731344, |
| "grad_norm": 0.39162495732307434, |
| "learning_rate": 0.0005959790026246719, |
| "loss": 4.6408, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.378964552238806, |
| "grad_norm": 0.4309455454349518, |
| "learning_rate": 0.0005958040244969378, |
| "loss": 4.6164, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.39354011194029853, |
| "grad_norm": 0.41590699553489685, |
| "learning_rate": 0.0005956290463692038, |
| "loss": 4.5852, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.408115671641791, |
| "grad_norm": 0.43023690581321716, |
| "learning_rate": 0.0005954540682414698, |
| "loss": 4.5578, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.42269123134328357, |
| "grad_norm": 0.42544642090797424, |
| "learning_rate": 0.0005952790901137357, |
| "loss": 4.5267, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.4372667910447761, |
| "grad_norm": 0.438936710357666, |
| "learning_rate": 0.0005951041119860017, |
| "loss": 4.5129, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.45184235074626866, |
| "grad_norm": 0.49558717012405396, |
| "learning_rate": 0.0005949291338582677, |
| "loss": 4.5006, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.4664179104477612, |
| "grad_norm": 0.4251459836959839, |
| "learning_rate": 0.0005947541557305336, |
| "loss": 4.4832, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.48099347014925375, |
| "grad_norm": 0.4005463421344757, |
| "learning_rate": 0.0005945791776027996, |
| "loss": 4.4631, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.49556902985074625, |
| "grad_norm": 0.3984415531158447, |
| "learning_rate": 0.0005944041994750656, |
| "loss": 4.4357, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5101445895522388, |
| "grad_norm": 0.43978074193000793, |
| "learning_rate": 0.0005942292213473315, |
| "loss": 4.4312, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5247201492537313, |
| "grad_norm": 0.40760400891304016, |
| "learning_rate": 0.0005940542432195975, |
| "loss": 4.3969, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5392957089552238, |
| "grad_norm": 0.45624998211860657, |
| "learning_rate": 0.0005938792650918635, |
| "loss": 4.3909, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.5538712686567164, |
| "grad_norm": 0.3956749439239502, |
| "learning_rate": 0.0005937042869641295, |
| "loss": 4.3708, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5684468283582089, |
| "grad_norm": 0.3885388672351837, |
| "learning_rate": 0.0005935293088363953, |
| "loss": 4.3676, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.5830223880597015, |
| "grad_norm": 0.3808089792728424, |
| "learning_rate": 0.0005933543307086613, |
| "loss": 4.3421, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5830223880597015, |
| "eval_accuracy": 0.2982805535099172, |
| "eval_loss": 4.293015956878662, |
| "eval_runtime": 179.2043, |
| "eval_samples_per_second": 92.782, |
| "eval_steps_per_second": 5.803, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.597597947761194, |
| "grad_norm": 0.4052446782588959, |
| "learning_rate": 0.0005931793525809273, |
| "loss": 4.3384, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.6121735074626866, |
| "grad_norm": 0.4344984292984009, |
| "learning_rate": 0.0005930043744531933, |
| "loss": 4.3264, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.6267490671641791, |
| "grad_norm": 0.4268837869167328, |
| "learning_rate": 0.0005928293963254592, |
| "loss": 4.3188, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.6413246268656716, |
| "grad_norm": 0.4293970763683319, |
| "learning_rate": 0.0005926544181977252, |
| "loss": 4.3085, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6559001865671642, |
| "grad_norm": 0.40907061100006104, |
| "learning_rate": 0.0005924794400699912, |
| "loss": 4.2966, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.6704757462686567, |
| "grad_norm": 0.3840656280517578, |
| "learning_rate": 0.0005923044619422571, |
| "loss": 4.2739, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6850513059701493, |
| "grad_norm": 0.37641018629074097, |
| "learning_rate": 0.0005921294838145231, |
| "loss": 4.2741, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.6996268656716418, |
| "grad_norm": 0.38551846146583557, |
| "learning_rate": 0.0005919545056867891, |
| "loss": 4.2682, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7142024253731343, |
| "grad_norm": 0.34833261370658875, |
| "learning_rate": 0.0005917795275590551, |
| "loss": 4.243, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.7287779850746269, |
| "grad_norm": 0.4164075255393982, |
| "learning_rate": 0.000591604549431321, |
| "loss": 4.2357, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7433535447761194, |
| "grad_norm": 0.3375675678253174, |
| "learning_rate": 0.000591429571303587, |
| "loss": 4.2287, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.757929104477612, |
| "grad_norm": 0.3546270430088043, |
| "learning_rate": 0.000591254593175853, |
| "loss": 4.2134, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7725046641791045, |
| "grad_norm": 0.3893532156944275, |
| "learning_rate": 0.000591079615048119, |
| "loss": 4.2053, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.7870802238805971, |
| "grad_norm": 0.37014102935791016, |
| "learning_rate": 0.0005909046369203849, |
| "loss": 4.2145, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8016557835820896, |
| "grad_norm": 0.3541073799133301, |
| "learning_rate": 0.0005907296587926509, |
| "loss": 4.1914, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.816231343283582, |
| "grad_norm": 0.43208348751068115, |
| "learning_rate": 0.0005905546806649169, |
| "loss": 4.1925, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.8308069029850746, |
| "grad_norm": 0.36169344186782837, |
| "learning_rate": 0.0005903797025371829, |
| "loss": 4.1852, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.8453824626865671, |
| "grad_norm": 0.40239009261131287, |
| "learning_rate": 0.0005902047244094488, |
| "loss": 4.1766, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.8599580223880597, |
| "grad_norm": 0.38017842173576355, |
| "learning_rate": 0.0005900297462817148, |
| "loss": 4.1628, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.8745335820895522, |
| "grad_norm": 0.34212303161621094, |
| "learning_rate": 0.0005898547681539808, |
| "loss": 4.1511, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8745335820895522, |
| "eval_accuracy": 0.31430675850010303, |
| "eval_loss": 4.104100704193115, |
| "eval_runtime": 179.1747, |
| "eval_samples_per_second": 92.798, |
| "eval_steps_per_second": 5.804, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8891091417910447, |
| "grad_norm": 0.35792702436447144, |
| "learning_rate": 0.0005896797900262466, |
| "loss": 4.1473, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.9036847014925373, |
| "grad_norm": 0.36840522289276123, |
| "learning_rate": 0.0005895048118985126, |
| "loss": 4.1411, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.9182602611940298, |
| "grad_norm": 0.35885506868362427, |
| "learning_rate": 0.0005893298337707786, |
| "loss": 4.1293, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.9328358208955224, |
| "grad_norm": 0.34642812609672546, |
| "learning_rate": 0.0005891548556430446, |
| "loss": 4.1281, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.9474113805970149, |
| "grad_norm": 0.3401717245578766, |
| "learning_rate": 0.0005889798775153105, |
| "loss": 4.1295, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.9619869402985075, |
| "grad_norm": 0.3363310396671295, |
| "learning_rate": 0.0005888048993875765, |
| "loss": 4.1209, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.9765625, |
| "grad_norm": 0.3683488965034485, |
| "learning_rate": 0.0005886299212598425, |
| "loss": 4.1139, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.9911380597014925, |
| "grad_norm": 0.34876251220703125, |
| "learning_rate": 0.0005884549431321084, |
| "loss": 4.1054, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.0055387126865671, |
| "grad_norm": 0.32942771911621094, |
| "learning_rate": 0.0005882799650043744, |
| "loss": 4.0683, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.0201142723880596, |
| "grad_norm": 0.36168619990348816, |
| "learning_rate": 0.0005881049868766404, |
| "loss": 4.0293, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0346898320895523, |
| "grad_norm": 0.35932570695877075, |
| "learning_rate": 0.0005879300087489063, |
| "loss": 4.02, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.0492653917910448, |
| "grad_norm": 0.428357869386673, |
| "learning_rate": 0.0005877550306211723, |
| "loss": 4.0357, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.0638409514925373, |
| "grad_norm": 0.35688987374305725, |
| "learning_rate": 0.0005875800524934383, |
| "loss": 4.0291, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.0784165111940298, |
| "grad_norm": 0.3705274164676666, |
| "learning_rate": 0.0005874050743657042, |
| "loss": 4.0165, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.0929920708955223, |
| "grad_norm": 0.3511093556880951, |
| "learning_rate": 0.0005872300962379702, |
| "loss": 4.0205, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.107567630597015, |
| "grad_norm": 0.32943227887153625, |
| "learning_rate": 0.0005870551181102362, |
| "loss": 3.9982, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.1221431902985075, |
| "grad_norm": 0.36079925298690796, |
| "learning_rate": 0.0005868801399825022, |
| "loss": 4.0146, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.13671875, |
| "grad_norm": 0.36787062883377075, |
| "learning_rate": 0.0005867051618547681, |
| "loss": 4.0149, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.1512943097014925, |
| "grad_norm": 0.34740957617759705, |
| "learning_rate": 0.0005865301837270341, |
| "loss": 4.0014, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.165869869402985, |
| "grad_norm": 0.3389481008052826, |
| "learning_rate": 0.0005863552055993001, |
| "loss": 4.0075, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.165869869402985, |
| "eval_accuracy": 0.3235803364649745, |
| "eval_loss": 4.001465320587158, |
| "eval_runtime": 179.5462, |
| "eval_samples_per_second": 92.606, |
| "eval_steps_per_second": 5.792, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.1804454291044777, |
| "grad_norm": 0.33885183930397034, |
| "learning_rate": 0.0005861802274715659, |
| "loss": 3.9982, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.1950209888059702, |
| "grad_norm": 0.3423576056957245, |
| "learning_rate": 0.0005860052493438319, |
| "loss": 3.9952, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.2095965485074627, |
| "grad_norm": 0.3361322283744812, |
| "learning_rate": 0.0005858302712160979, |
| "loss": 3.9863, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.2241721082089552, |
| "grad_norm": 0.35886096954345703, |
| "learning_rate": 0.0005856552930883638, |
| "loss": 3.9995, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.2387476679104479, |
| "grad_norm": 0.3898662328720093, |
| "learning_rate": 0.0005854803149606298, |
| "loss": 3.9802, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.2533232276119404, |
| "grad_norm": 0.35210487246513367, |
| "learning_rate": 0.0005853053368328958, |
| "loss": 3.9833, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.2678987873134329, |
| "grad_norm": 0.3297649919986725, |
| "learning_rate": 0.0005851303587051618, |
| "loss": 3.979, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.2824743470149254, |
| "grad_norm": 0.33918461203575134, |
| "learning_rate": 0.0005849553805774277, |
| "loss": 3.9715, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.2970499067164178, |
| "grad_norm": 0.3349836468696594, |
| "learning_rate": 0.0005847804024496937, |
| "loss": 3.9705, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.3116254664179103, |
| "grad_norm": 0.335245817899704, |
| "learning_rate": 0.0005846054243219597, |
| "loss": 3.9666, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.326201026119403, |
| "grad_norm": 0.3286707401275635, |
| "learning_rate": 0.0005844304461942257, |
| "loss": 3.9713, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.3407765858208955, |
| "grad_norm": 0.3266748785972595, |
| "learning_rate": 0.0005842554680664916, |
| "loss": 3.9634, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.355352145522388, |
| "grad_norm": 0.34682974219322205, |
| "learning_rate": 0.0005840804899387576, |
| "loss": 3.9671, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.3699277052238805, |
| "grad_norm": 0.34403321146965027, |
| "learning_rate": 0.0005839055118110236, |
| "loss": 3.9689, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.3845032649253732, |
| "grad_norm": 0.3424343466758728, |
| "learning_rate": 0.0005837305336832896, |
| "loss": 3.9673, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.3990788246268657, |
| "grad_norm": 0.3347780704498291, |
| "learning_rate": 0.0005835555555555555, |
| "loss": 3.9669, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.4136543843283582, |
| "grad_norm": 0.3549891710281372, |
| "learning_rate": 0.0005833805774278215, |
| "loss": 3.944, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.4282299440298507, |
| "grad_norm": 0.336101233959198, |
| "learning_rate": 0.0005832055993000875, |
| "loss": 3.9374, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.4428055037313432, |
| "grad_norm": 0.33941197395324707, |
| "learning_rate": 0.0005830306211723534, |
| "loss": 3.9447, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.4573810634328357, |
| "grad_norm": 0.32939550280570984, |
| "learning_rate": 0.0005828556430446194, |
| "loss": 3.9398, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.4573810634328357, |
| "eval_accuracy": 0.33042841571550857, |
| "eval_loss": 3.9233782291412354, |
| "eval_runtime": 179.5532, |
| "eval_samples_per_second": 92.602, |
| "eval_steps_per_second": 5.792, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.4719566231343284, |
| "grad_norm": 0.3391132950782776, |
| "learning_rate": 0.0005826806649168854, |
| "loss": 3.9332, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.486532182835821, |
| "grad_norm": 0.32835477590560913, |
| "learning_rate": 0.0005825056867891514, |
| "loss": 3.949, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.5011077425373134, |
| "grad_norm": 0.33751094341278076, |
| "learning_rate": 0.0005823307086614172, |
| "loss": 3.9421, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.515683302238806, |
| "grad_norm": 0.3318590223789215, |
| "learning_rate": 0.0005821557305336832, |
| "loss": 3.9229, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.5302588619402986, |
| "grad_norm": 0.31536349654197693, |
| "learning_rate": 0.0005819807524059492, |
| "loss": 3.9301, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.544834421641791, |
| "grad_norm": 0.34051841497421265, |
| "learning_rate": 0.0005818057742782152, |
| "loss": 3.913, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.5594099813432836, |
| "grad_norm": 0.32359224557876587, |
| "learning_rate": 0.0005816307961504811, |
| "loss": 3.9288, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.573985541044776, |
| "grad_norm": 0.3286752998828888, |
| "learning_rate": 0.0005814558180227471, |
| "loss": 3.923, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.5885611007462686, |
| "grad_norm": 0.3351253867149353, |
| "learning_rate": 0.0005812808398950131, |
| "loss": 3.9175, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.603136660447761, |
| "grad_norm": 0.3059863746166229, |
| "learning_rate": 0.0005811058617672791, |
| "loss": 3.9218, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.6177122201492538, |
| "grad_norm": 0.3618924617767334, |
| "learning_rate": 0.000580930883639545, |
| "loss": 3.9225, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.6322877798507462, |
| "grad_norm": 0.33408987522125244, |
| "learning_rate": 0.000580755905511811, |
| "loss": 3.9018, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.646863339552239, |
| "grad_norm": 0.331617534160614, |
| "learning_rate": 0.000580580927384077, |
| "loss": 3.9062, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.6614388992537314, |
| "grad_norm": 0.3273450434207916, |
| "learning_rate": 0.0005804059492563429, |
| "loss": 3.9078, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.676014458955224, |
| "grad_norm": 0.32543638348579407, |
| "learning_rate": 0.0005802309711286089, |
| "loss": 3.9034, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.6905900186567164, |
| "grad_norm": 0.3410203158855438, |
| "learning_rate": 0.0005800559930008749, |
| "loss": 3.8986, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.705165578358209, |
| "grad_norm": 0.32851478457450867, |
| "learning_rate": 0.0005798810148731408, |
| "loss": 3.9116, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.7197411380597014, |
| "grad_norm": 0.32616376876831055, |
| "learning_rate": 0.0005797060367454068, |
| "loss": 3.9015, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.734316697761194, |
| "grad_norm": 0.3180261254310608, |
| "learning_rate": 0.0005795310586176728, |
| "loss": 3.881, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.7488922574626866, |
| "grad_norm": 0.31671977043151855, |
| "learning_rate": 0.0005793560804899387, |
| "loss": 3.8836, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.7488922574626866, |
| "eval_accuracy": 0.3354620788082289, |
| "eval_loss": 3.8672115802764893, |
| "eval_runtime": 179.177, |
| "eval_samples_per_second": 92.797, |
| "eval_steps_per_second": 5.804, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.763467817164179, |
| "grad_norm": 0.3349725902080536, |
| "learning_rate": 0.0005791811023622047, |
| "loss": 3.8863, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.7780433768656716, |
| "grad_norm": 0.31868061423301697, |
| "learning_rate": 0.0005790061242344707, |
| "loss": 3.8875, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.7926189365671643, |
| "grad_norm": 0.31870362162590027, |
| "learning_rate": 0.0005788311461067365, |
| "loss": 3.8957, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.8071944962686568, |
| "grad_norm": 0.32470956444740295, |
| "learning_rate": 0.0005786561679790025, |
| "loss": 3.88, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.8217700559701493, |
| "grad_norm": 0.31178992986679077, |
| "learning_rate": 0.0005784811898512685, |
| "loss": 3.8796, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.8363456156716418, |
| "grad_norm": 0.34831923246383667, |
| "learning_rate": 0.0005783062117235344, |
| "loss": 3.8669, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.8509211753731343, |
| "grad_norm": 0.32867011427879333, |
| "learning_rate": 0.0005781312335958004, |
| "loss": 3.8775, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.8654967350746268, |
| "grad_norm": 0.34291231632232666, |
| "learning_rate": 0.0005779562554680664, |
| "loss": 3.8788, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.8800722947761193, |
| "grad_norm": 0.32128509879112244, |
| "learning_rate": 0.0005777812773403324, |
| "loss": 3.8689, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.894647854477612, |
| "grad_norm": 0.3260379135608673, |
| "learning_rate": 0.0005776062992125983, |
| "loss": 3.8723, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.9092234141791045, |
| "grad_norm": 0.3153153657913208, |
| "learning_rate": 0.0005774313210848643, |
| "loss": 3.8804, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.9237989738805972, |
| "grad_norm": 0.30965352058410645, |
| "learning_rate": 0.0005772563429571303, |
| "loss": 3.8792, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.9383745335820897, |
| "grad_norm": 0.3271130919456482, |
| "learning_rate": 0.0005770813648293962, |
| "loss": 3.8668, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.9529500932835822, |
| "grad_norm": 0.30338090658187866, |
| "learning_rate": 0.0005769063867016622, |
| "loss": 3.8572, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.9675256529850746, |
| "grad_norm": 0.3297083377838135, |
| "learning_rate": 0.0005767314085739282, |
| "loss": 3.8625, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.9821012126865671, |
| "grad_norm": 0.3095833957195282, |
| "learning_rate": 0.0005765564304461942, |
| "loss": 3.86, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.9966767723880596, |
| "grad_norm": 0.29658767580986023, |
| "learning_rate": 0.0005763814523184601, |
| "loss": 3.8555, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.0110774253731343, |
| "grad_norm": 0.3722371459007263, |
| "learning_rate": 0.0005762064741907261, |
| "loss": 3.7759, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.0256529850746268, |
| "grad_norm": 0.31017979979515076, |
| "learning_rate": 0.0005760314960629921, |
| "loss": 3.7559, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.0402285447761193, |
| "grad_norm": 0.3352357745170593, |
| "learning_rate": 0.0005758565179352581, |
| "loss": 3.7667, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.0402285447761193, |
| "eval_accuracy": 0.33953509940743115, |
| "eval_loss": 3.8253839015960693, |
| "eval_runtime": 179.2981, |
| "eval_samples_per_second": 92.734, |
| "eval_steps_per_second": 5.8, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.0548041044776117, |
| "grad_norm": 0.3400469422340393, |
| "learning_rate": 0.000575681539807524, |
| "loss": 3.7712, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.0693796641791047, |
| "grad_norm": 0.3268464505672455, |
| "learning_rate": 0.00057550656167979, |
| "loss": 3.7554, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.083955223880597, |
| "grad_norm": 0.3046896159648895, |
| "learning_rate": 0.000575331583552056, |
| "loss": 3.7594, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.0985307835820897, |
| "grad_norm": 0.3351455628871918, |
| "learning_rate": 0.000575156605424322, |
| "loss": 3.7651, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.113106343283582, |
| "grad_norm": 0.3188965618610382, |
| "learning_rate": 0.0005749816272965878, |
| "loss": 3.7658, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.1276819029850746, |
| "grad_norm": 0.34054598212242126, |
| "learning_rate": 0.0005748066491688538, |
| "loss": 3.7516, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.142257462686567, |
| "grad_norm": 0.31955304741859436, |
| "learning_rate": 0.0005746316710411198, |
| "loss": 3.7661, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.1568330223880596, |
| "grad_norm": 0.32968559861183167, |
| "learning_rate": 0.0005744566929133858, |
| "loss": 3.7657, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.171408582089552, |
| "grad_norm": 0.3298960030078888, |
| "learning_rate": 0.0005742817147856517, |
| "loss": 3.7712, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.1859841417910446, |
| "grad_norm": 0.332374632358551, |
| "learning_rate": 0.0005741067366579177, |
| "loss": 3.7827, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.200559701492537, |
| "grad_norm": 0.3308345377445221, |
| "learning_rate": 0.0005739317585301837, |
| "loss": 3.7748, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.21513526119403, |
| "grad_norm": 0.3271458148956299, |
| "learning_rate": 0.0005737567804024496, |
| "loss": 3.7587, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.2297108208955225, |
| "grad_norm": 0.3361659348011017, |
| "learning_rate": 0.0005735818022747156, |
| "loss": 3.7688, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.244286380597015, |
| "grad_norm": 0.3123524785041809, |
| "learning_rate": 0.0005734068241469816, |
| "loss": 3.7575, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.2588619402985075, |
| "grad_norm": 0.32984843850135803, |
| "learning_rate": 0.0005732318460192476, |
| "loss": 3.7734, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.2734375, |
| "grad_norm": 0.3111555874347687, |
| "learning_rate": 0.0005730568678915135, |
| "loss": 3.7567, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.2880130597014925, |
| "grad_norm": 0.3495906591415405, |
| "learning_rate": 0.0005728818897637795, |
| "loss": 3.7766, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.302588619402985, |
| "grad_norm": 0.3339783549308777, |
| "learning_rate": 0.0005727069116360455, |
| "loss": 3.7592, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.3171641791044775, |
| "grad_norm": 0.3110646605491638, |
| "learning_rate": 0.0005725319335083115, |
| "loss": 3.7721, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.33173973880597, |
| "grad_norm": 0.3329947292804718, |
| "learning_rate": 0.0005723569553805774, |
| "loss": 3.7647, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.33173973880597, |
| "eval_accuracy": 0.3429995090860279, |
| "eval_loss": 3.794593334197998, |
| "eval_runtime": 179.1534, |
| "eval_samples_per_second": 92.809, |
| "eval_steps_per_second": 5.805, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.346315298507463, |
| "grad_norm": 0.31891825795173645, |
| "learning_rate": 0.0005721819772528434, |
| "loss": 3.7522, |
| "step": 8050 |
| }, |
| { |
| "epoch": 2.3608908582089554, |
| "grad_norm": 0.3220847547054291, |
| "learning_rate": 0.0005720069991251094, |
| "loss": 3.7619, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.375466417910448, |
| "grad_norm": 0.3521746098995209, |
| "learning_rate": 0.0005718320209973753, |
| "loss": 3.762, |
| "step": 8150 |
| }, |
| { |
| "epoch": 2.3900419776119404, |
| "grad_norm": 0.3538142442703247, |
| "learning_rate": 0.0005716570428696413, |
| "loss": 3.7666, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.404617537313433, |
| "grad_norm": 0.31546488404273987, |
| "learning_rate": 0.0005714820647419073, |
| "loss": 3.7655, |
| "step": 8250 |
| }, |
| { |
| "epoch": 2.4191930970149254, |
| "grad_norm": 0.32537829875946045, |
| "learning_rate": 0.0005713070866141731, |
| "loss": 3.7601, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.433768656716418, |
| "grad_norm": 0.3402611017227173, |
| "learning_rate": 0.0005711321084864391, |
| "loss": 3.7643, |
| "step": 8350 |
| }, |
| { |
| "epoch": 2.4483442164179103, |
| "grad_norm": 0.32665756344795227, |
| "learning_rate": 0.0005709571303587051, |
| "loss": 3.7538, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.462919776119403, |
| "grad_norm": 0.3143838047981262, |
| "learning_rate": 0.000570782152230971, |
| "loss": 3.7657, |
| "step": 8450 |
| }, |
| { |
| "epoch": 2.4774953358208958, |
| "grad_norm": 0.3173374533653259, |
| "learning_rate": 0.000570607174103237, |
| "loss": 3.7716, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.4920708955223883, |
| "grad_norm": 0.3324846625328064, |
| "learning_rate": 0.000570432195975503, |
| "loss": 3.7515, |
| "step": 8550 |
| }, |
| { |
| "epoch": 2.5066464552238807, |
| "grad_norm": 0.31547409296035767, |
| "learning_rate": 0.0005702572178477689, |
| "loss": 3.7565, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.5212220149253732, |
| "grad_norm": 0.32761460542678833, |
| "learning_rate": 0.0005700822397200349, |
| "loss": 3.7614, |
| "step": 8650 |
| }, |
| { |
| "epoch": 2.5357975746268657, |
| "grad_norm": 0.3202444314956665, |
| "learning_rate": 0.0005699072615923009, |
| "loss": 3.7589, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.550373134328358, |
| "grad_norm": 0.3207978308200836, |
| "learning_rate": 0.0005697322834645668, |
| "loss": 3.7514, |
| "step": 8750 |
| }, |
| { |
| "epoch": 2.5649486940298507, |
| "grad_norm": 0.31655776500701904, |
| "learning_rate": 0.0005695573053368328, |
| "loss": 3.76, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.579524253731343, |
| "grad_norm": 0.31503432989120483, |
| "learning_rate": 0.0005693823272090988, |
| "loss": 3.748, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.5940998134328357, |
| "grad_norm": 0.32607924938201904, |
| "learning_rate": 0.0005692073490813648, |
| "loss": 3.7602, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.6086753731343286, |
| "grad_norm": 0.30219122767448425, |
| "learning_rate": 0.0005690323709536307, |
| "loss": 3.7443, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.6232509328358207, |
| "grad_norm": 0.3029521107673645, |
| "learning_rate": 0.0005688573928258967, |
| "loss": 3.7476, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.6232509328358207, |
| "eval_accuracy": 0.34559260825500504, |
| "eval_loss": 3.7627193927764893, |
| "eval_runtime": 179.6396, |
| "eval_samples_per_second": 92.558, |
| "eval_steps_per_second": 5.789, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.6378264925373136, |
| "grad_norm": 0.3177446722984314, |
| "learning_rate": 0.0005686824146981627, |
| "loss": 3.7555, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.652402052238806, |
| "grad_norm": 0.3198622763156891, |
| "learning_rate": 0.0005685074365704287, |
| "loss": 3.7526, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.6669776119402986, |
| "grad_norm": 0.3512043058872223, |
| "learning_rate": 0.0005683324584426946, |
| "loss": 3.735, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.681553171641791, |
| "grad_norm": 0.32351306080818176, |
| "learning_rate": 0.0005681574803149606, |
| "loss": 3.7329, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.6961287313432836, |
| "grad_norm": 0.32276666164398193, |
| "learning_rate": 0.0005679825021872266, |
| "loss": 3.7585, |
| "step": 9250 |
| }, |
| { |
| "epoch": 2.710704291044776, |
| "grad_norm": 0.31967878341674805, |
| "learning_rate": 0.0005678075240594926, |
| "loss": 3.7465, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.7252798507462686, |
| "grad_norm": 0.31496620178222656, |
| "learning_rate": 0.0005676325459317584, |
| "loss": 3.7507, |
| "step": 9350 |
| }, |
| { |
| "epoch": 2.739855410447761, |
| "grad_norm": 0.31688955426216125, |
| "learning_rate": 0.0005674575678040244, |
| "loss": 3.7486, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.7544309701492535, |
| "grad_norm": 0.300443559885025, |
| "learning_rate": 0.0005672825896762904, |
| "loss": 3.7405, |
| "step": 9450 |
| }, |
| { |
| "epoch": 2.7690065298507465, |
| "grad_norm": 0.3132357597351074, |
| "learning_rate": 0.0005671076115485563, |
| "loss": 3.7294, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.783582089552239, |
| "grad_norm": 0.3266209363937378, |
| "learning_rate": 0.0005669326334208223, |
| "loss": 3.746, |
| "step": 9550 |
| }, |
| { |
| "epoch": 2.7981576492537314, |
| "grad_norm": 0.3130579888820648, |
| "learning_rate": 0.0005667576552930883, |
| "loss": 3.7399, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.812733208955224, |
| "grad_norm": 0.32442179322242737, |
| "learning_rate": 0.0005665826771653543, |
| "loss": 3.7519, |
| "step": 9650 |
| }, |
| { |
| "epoch": 2.8273087686567164, |
| "grad_norm": 0.3083656430244446, |
| "learning_rate": 0.0005664076990376202, |
| "loss": 3.7361, |
| "step": 9700 |
| }, |
| { |
| "epoch": 2.841884328358209, |
| "grad_norm": 0.3123016655445099, |
| "learning_rate": 0.0005662327209098862, |
| "loss": 3.7319, |
| "step": 9750 |
| }, |
| { |
| "epoch": 2.8564598880597014, |
| "grad_norm": 0.30334439873695374, |
| "learning_rate": 0.0005660577427821522, |
| "loss": 3.7272, |
| "step": 9800 |
| }, |
| { |
| "epoch": 2.871035447761194, |
| "grad_norm": 0.3039874732494354, |
| "learning_rate": 0.0005658827646544182, |
| "loss": 3.7239, |
| "step": 9850 |
| }, |
| { |
| "epoch": 2.8856110074626864, |
| "grad_norm": 0.330578088760376, |
| "learning_rate": 0.0005657077865266841, |
| "loss": 3.7356, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.9001865671641793, |
| "grad_norm": 0.34357017278671265, |
| "learning_rate": 0.0005655328083989501, |
| "loss": 3.7436, |
| "step": 9950 |
| }, |
| { |
| "epoch": 2.9147621268656714, |
| "grad_norm": 0.3257700800895691, |
| "learning_rate": 0.0005653578302712161, |
| "loss": 3.7253, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.9147621268656714, |
| "eval_accuracy": 0.34803717387499666, |
| "eval_loss": 3.739212989807129, |
| "eval_runtime": 179.5993, |
| "eval_samples_per_second": 92.578, |
| "eval_steps_per_second": 5.791, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.9293376865671643, |
| "grad_norm": 0.3030923902988434, |
| "learning_rate": 0.0005651828521434821, |
| "loss": 3.7311, |
| "step": 10050 |
| }, |
| { |
| "epoch": 2.943913246268657, |
| "grad_norm": 0.3330658972263336, |
| "learning_rate": 0.000565007874015748, |
| "loss": 3.7252, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.9584888059701493, |
| "grad_norm": 0.30661967396736145, |
| "learning_rate": 0.000564832895888014, |
| "loss": 3.7154, |
| "step": 10150 |
| }, |
| { |
| "epoch": 2.973064365671642, |
| "grad_norm": 0.3306799829006195, |
| "learning_rate": 0.00056465791776028, |
| "loss": 3.7316, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.9876399253731343, |
| "grad_norm": 0.3185974359512329, |
| "learning_rate": 0.0005644829396325459, |
| "loss": 3.7361, |
| "step": 10250 |
| }, |
| { |
| "epoch": 3.002040578358209, |
| "grad_norm": 0.33761319518089294, |
| "learning_rate": 0.0005643079615048119, |
| "loss": 3.71, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.0166161380597014, |
| "grad_norm": 0.31549760699272156, |
| "learning_rate": 0.0005641329833770779, |
| "loss": 3.6183, |
| "step": 10350 |
| }, |
| { |
| "epoch": 3.031191697761194, |
| "grad_norm": 0.32368534803390503, |
| "learning_rate": 0.0005639580052493437, |
| "loss": 3.6255, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.0457672574626864, |
| "grad_norm": 0.30971264839172363, |
| "learning_rate": 0.0005637830271216097, |
| "loss": 3.6278, |
| "step": 10450 |
| }, |
| { |
| "epoch": 3.0603428171641793, |
| "grad_norm": 0.33048370480537415, |
| "learning_rate": 0.0005636080489938757, |
| "loss": 3.6361, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.074918376865672, |
| "grad_norm": 0.31469184160232544, |
| "learning_rate": 0.0005634330708661417, |
| "loss": 3.6288, |
| "step": 10550 |
| }, |
| { |
| "epoch": 3.0894939365671643, |
| "grad_norm": 0.3115905225276947, |
| "learning_rate": 0.0005632580927384076, |
| "loss": 3.6349, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.104069496268657, |
| "grad_norm": 0.321036159992218, |
| "learning_rate": 0.0005630831146106736, |
| "loss": 3.6364, |
| "step": 10650 |
| }, |
| { |
| "epoch": 3.1186450559701493, |
| "grad_norm": 0.30595359206199646, |
| "learning_rate": 0.0005629081364829396, |
| "loss": 3.6337, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.133220615671642, |
| "grad_norm": 0.3319248557090759, |
| "learning_rate": 0.0005627331583552055, |
| "loss": 3.6406, |
| "step": 10750 |
| }, |
| { |
| "epoch": 3.1477961753731343, |
| "grad_norm": 0.3592631220817566, |
| "learning_rate": 0.0005625581802274715, |
| "loss": 3.6457, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.1623717350746268, |
| "grad_norm": 0.30832141637802124, |
| "learning_rate": 0.0005623832020997375, |
| "loss": 3.6465, |
| "step": 10850 |
| }, |
| { |
| "epoch": 3.1769472947761193, |
| "grad_norm": 0.30971667170524597, |
| "learning_rate": 0.0005622082239720034, |
| "loss": 3.6442, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.1915228544776117, |
| "grad_norm": 0.32361266016960144, |
| "learning_rate": 0.0005620332458442694, |
| "loss": 3.6469, |
| "step": 10950 |
| }, |
| { |
| "epoch": 3.2060984141791047, |
| "grad_norm": 0.3234313726425171, |
| "learning_rate": 0.0005618582677165354, |
| "loss": 3.6522, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.2060984141791047, |
| "eval_accuracy": 0.34968304800258276, |
| "eval_loss": 3.725311756134033, |
| "eval_runtime": 179.4792, |
| "eval_samples_per_second": 92.64, |
| "eval_steps_per_second": 5.795, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.220673973880597, |
| "grad_norm": 0.32695841789245605, |
| "learning_rate": 0.0005616832895888013, |
| "loss": 3.6504, |
| "step": 11050 |
| }, |
| { |
| "epoch": 3.2352495335820897, |
| "grad_norm": 0.31431734561920166, |
| "learning_rate": 0.0005615083114610673, |
| "loss": 3.6456, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.249825093283582, |
| "grad_norm": 0.3156610131263733, |
| "learning_rate": 0.0005613333333333333, |
| "loss": 3.6549, |
| "step": 11150 |
| }, |
| { |
| "epoch": 3.2644006529850746, |
| "grad_norm": 0.31469911336898804, |
| "learning_rate": 0.0005611583552055992, |
| "loss": 3.6604, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.278976212686567, |
| "grad_norm": 0.32614484429359436, |
| "learning_rate": 0.0005609833770778652, |
| "loss": 3.6538, |
| "step": 11250 |
| }, |
| { |
| "epoch": 3.2935517723880596, |
| "grad_norm": 0.3248981535434723, |
| "learning_rate": 0.0005608083989501312, |
| "loss": 3.6495, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.308127332089552, |
| "grad_norm": 0.31083929538726807, |
| "learning_rate": 0.0005606334208223972, |
| "loss": 3.6475, |
| "step": 11350 |
| }, |
| { |
| "epoch": 3.3227028917910446, |
| "grad_norm": 0.2996702492237091, |
| "learning_rate": 0.000560458442694663, |
| "loss": 3.6517, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.337278451492537, |
| "grad_norm": 0.31601446866989136, |
| "learning_rate": 0.000560283464566929, |
| "loss": 3.651, |
| "step": 11450 |
| }, |
| { |
| "epoch": 3.35185401119403, |
| "grad_norm": 0.30847907066345215, |
| "learning_rate": 0.000560108486439195, |
| "loss": 3.6554, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.3664295708955225, |
| "grad_norm": 0.3323810398578644, |
| "learning_rate": 0.000559933508311461, |
| "loss": 3.6488, |
| "step": 11550 |
| }, |
| { |
| "epoch": 3.381005130597015, |
| "grad_norm": 0.3122812807559967, |
| "learning_rate": 0.0005597585301837269, |
| "loss": 3.6552, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.3955806902985075, |
| "grad_norm": 0.3094134032726288, |
| "learning_rate": 0.0005595835520559929, |
| "loss": 3.6454, |
| "step": 11650 |
| }, |
| { |
| "epoch": 3.41015625, |
| "grad_norm": 0.2967991828918457, |
| "learning_rate": 0.0005594085739282589, |
| "loss": 3.672, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.4247318097014925, |
| "grad_norm": 0.31317925453186035, |
| "learning_rate": 0.0005592335958005249, |
| "loss": 3.6597, |
| "step": 11750 |
| }, |
| { |
| "epoch": 3.439307369402985, |
| "grad_norm": 0.30682000517845154, |
| "learning_rate": 0.0005590586176727908, |
| "loss": 3.6456, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.4538829291044775, |
| "grad_norm": 0.31732603907585144, |
| "learning_rate": 0.0005588836395450568, |
| "loss": 3.6532, |
| "step": 11850 |
| }, |
| { |
| "epoch": 3.46845848880597, |
| "grad_norm": 0.3260912001132965, |
| "learning_rate": 0.0005587086614173228, |
| "loss": 3.6662, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.483034048507463, |
| "grad_norm": 0.3180249333381653, |
| "learning_rate": 0.0005585336832895888, |
| "loss": 3.6511, |
| "step": 11950 |
| }, |
| { |
| "epoch": 3.4976096082089554, |
| "grad_norm": 0.31996291875839233, |
| "learning_rate": 0.0005583587051618547, |
| "loss": 3.66, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.4976096082089554, |
| "eval_accuracy": 0.3516450561337941, |
| "eval_loss": 3.7062184810638428, |
| "eval_runtime": 180.5583, |
| "eval_samples_per_second": 92.087, |
| "eval_steps_per_second": 5.76, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.512185167910448, |
| "grad_norm": 0.313018262386322, |
| "learning_rate": 0.0005581837270341207, |
| "loss": 3.6495, |
| "step": 12050 |
| }, |
| { |
| "epoch": 3.5267607276119404, |
| "grad_norm": 0.31375324726104736, |
| "learning_rate": 0.0005580087489063867, |
| "loss": 3.6595, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.541336287313433, |
| "grad_norm": 0.32115986943244934, |
| "learning_rate": 0.0005578337707786526, |
| "loss": 3.6507, |
| "step": 12150 |
| }, |
| { |
| "epoch": 3.5559118470149254, |
| "grad_norm": 0.32580527663230896, |
| "learning_rate": 0.0005576587926509186, |
| "loss": 3.6527, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.570487406716418, |
| "grad_norm": 0.2964605689048767, |
| "learning_rate": 0.0005574838145231846, |
| "loss": 3.657, |
| "step": 12250 |
| }, |
| { |
| "epoch": 3.5850629664179103, |
| "grad_norm": 0.31582698225975037, |
| "learning_rate": 0.0005573088363954506, |
| "loss": 3.6419, |
| "step": 12300 |
| }, |
| { |
| "epoch": 3.599638526119403, |
| "grad_norm": 0.3034908175468445, |
| "learning_rate": 0.0005571338582677165, |
| "loss": 3.6547, |
| "step": 12350 |
| }, |
| { |
| "epoch": 3.6142140858208958, |
| "grad_norm": 0.3223356008529663, |
| "learning_rate": 0.0005569588801399825, |
| "loss": 3.6425, |
| "step": 12400 |
| }, |
| { |
| "epoch": 3.628789645522388, |
| "grad_norm": 0.3192656338214874, |
| "learning_rate": 0.0005567839020122485, |
| "loss": 3.6651, |
| "step": 12450 |
| }, |
| { |
| "epoch": 3.6433652052238807, |
| "grad_norm": 0.3013507127761841, |
| "learning_rate": 0.0005566089238845145, |
| "loss": 3.6463, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.6579407649253732, |
| "grad_norm": 0.30855220556259155, |
| "learning_rate": 0.0005564339457567803, |
| "loss": 3.6542, |
| "step": 12550 |
| }, |
| { |
| "epoch": 3.6725163246268657, |
| "grad_norm": 0.30979233980178833, |
| "learning_rate": 0.0005562589676290463, |
| "loss": 3.6514, |
| "step": 12600 |
| }, |
| { |
| "epoch": 3.687091884328358, |
| "grad_norm": 0.30794402956962585, |
| "learning_rate": 0.0005560839895013123, |
| "loss": 3.6469, |
| "step": 12650 |
| }, |
| { |
| "epoch": 3.7016674440298507, |
| "grad_norm": 0.30499374866485596, |
| "learning_rate": 0.0005559090113735782, |
| "loss": 3.6526, |
| "step": 12700 |
| }, |
| { |
| "epoch": 3.716243003731343, |
| "grad_norm": 0.31229689717292786, |
| "learning_rate": 0.0005557340332458442, |
| "loss": 3.6539, |
| "step": 12750 |
| }, |
| { |
| "epoch": 3.7308185634328357, |
| "grad_norm": 0.3160341680049896, |
| "learning_rate": 0.0005555590551181102, |
| "loss": 3.6505, |
| "step": 12800 |
| }, |
| { |
| "epoch": 3.7453941231343286, |
| "grad_norm": 0.3231920599937439, |
| "learning_rate": 0.0005553840769903761, |
| "loss": 3.6575, |
| "step": 12850 |
| }, |
| { |
| "epoch": 3.7599696828358207, |
| "grad_norm": 0.32196515798568726, |
| "learning_rate": 0.0005552090988626421, |
| "loss": 3.6436, |
| "step": 12900 |
| }, |
| { |
| "epoch": 3.7745452425373136, |
| "grad_norm": 0.3200373947620392, |
| "learning_rate": 0.0005550341207349081, |
| "loss": 3.6538, |
| "step": 12950 |
| }, |
| { |
| "epoch": 3.789120802238806, |
| "grad_norm": 0.3132128417491913, |
| "learning_rate": 0.000554859142607174, |
| "loss": 3.6552, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.789120802238806, |
| "eval_accuracy": 0.3530933170848773, |
| "eval_loss": 3.689838409423828, |
| "eval_runtime": 184.45, |
| "eval_samples_per_second": 90.144, |
| "eval_steps_per_second": 5.638, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.8036963619402986, |
| "grad_norm": 0.31602323055267334, |
| "learning_rate": 0.00055468416447944, |
| "loss": 3.6588, |
| "step": 13050 |
| }, |
| { |
| "epoch": 3.818271921641791, |
| "grad_norm": 0.3273375332355499, |
| "learning_rate": 0.000554509186351706, |
| "loss": 3.6435, |
| "step": 13100 |
| }, |
| { |
| "epoch": 3.8328474813432836, |
| "grad_norm": 0.3076334297657013, |
| "learning_rate": 0.000554334208223972, |
| "loss": 3.6514, |
| "step": 13150 |
| }, |
| { |
| "epoch": 3.847423041044776, |
| "grad_norm": 0.3193327486515045, |
| "learning_rate": 0.0005541592300962379, |
| "loss": 3.6496, |
| "step": 13200 |
| }, |
| { |
| "epoch": 3.8619986007462686, |
| "grad_norm": 0.2980865240097046, |
| "learning_rate": 0.0005539842519685039, |
| "loss": 3.6406, |
| "step": 13250 |
| }, |
| { |
| "epoch": 3.876574160447761, |
| "grad_norm": 0.30861976742744446, |
| "learning_rate": 0.0005538092738407699, |
| "loss": 3.6524, |
| "step": 13300 |
| }, |
| { |
| "epoch": 3.8911497201492535, |
| "grad_norm": 0.31307452917099, |
| "learning_rate": 0.0005536342957130358, |
| "loss": 3.6432, |
| "step": 13350 |
| }, |
| { |
| "epoch": 3.9057252798507465, |
| "grad_norm": 0.30774036049842834, |
| "learning_rate": 0.0005534593175853018, |
| "loss": 3.6442, |
| "step": 13400 |
| }, |
| { |
| "epoch": 3.920300839552239, |
| "grad_norm": 0.3179526925086975, |
| "learning_rate": 0.0005532843394575678, |
| "loss": 3.6473, |
| "step": 13450 |
| }, |
| { |
| "epoch": 3.9348763992537314, |
| "grad_norm": 0.3184812068939209, |
| "learning_rate": 0.0005531093613298337, |
| "loss": 3.6276, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.949451958955224, |
| "grad_norm": 0.33337730169296265, |
| "learning_rate": 0.0005529343832020997, |
| "loss": 3.6487, |
| "step": 13550 |
| }, |
| { |
| "epoch": 3.9640275186567164, |
| "grad_norm": 0.30484265089035034, |
| "learning_rate": 0.0005527594050743656, |
| "loss": 3.6475, |
| "step": 13600 |
| }, |
| { |
| "epoch": 3.978603078358209, |
| "grad_norm": 0.31063055992126465, |
| "learning_rate": 0.0005525844269466316, |
| "loss": 3.6491, |
| "step": 13650 |
| }, |
| { |
| "epoch": 3.9931786380597014, |
| "grad_norm": 0.3012693226337433, |
| "learning_rate": 0.0005524094488188975, |
| "loss": 3.6502, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.0075792910447765, |
| "grad_norm": 0.29927513003349304, |
| "learning_rate": 0.0005522344706911635, |
| "loss": 3.5919, |
| "step": 13750 |
| }, |
| { |
| "epoch": 4.0221548507462686, |
| "grad_norm": 0.325898140668869, |
| "learning_rate": 0.0005520594925634295, |
| "loss": 3.5521, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.0367304104477615, |
| "grad_norm": 0.3279491662979126, |
| "learning_rate": 0.0005518845144356954, |
| "loss": 3.5461, |
| "step": 13850 |
| }, |
| { |
| "epoch": 4.0513059701492535, |
| "grad_norm": 0.32043692469596863, |
| "learning_rate": 0.0005517095363079614, |
| "loss": 3.5494, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.0658815298507465, |
| "grad_norm": 0.3268143832683563, |
| "learning_rate": 0.0005515345581802274, |
| "loss": 3.5446, |
| "step": 13950 |
| }, |
| { |
| "epoch": 4.0804570895522385, |
| "grad_norm": 0.2917918264865875, |
| "learning_rate": 0.0005513595800524934, |
| "loss": 3.5443, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.0804570895522385, |
| "eval_accuracy": 0.35479980514093207, |
| "eval_loss": 3.680328369140625, |
| "eval_runtime": 180.7539, |
| "eval_samples_per_second": 91.987, |
| "eval_steps_per_second": 5.754, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.0950326492537314, |
| "grad_norm": 0.3293069303035736, |
| "learning_rate": 0.0005511846019247593, |
| "loss": 3.553, |
| "step": 14050 |
| }, |
| { |
| "epoch": 4.1096082089552235, |
| "grad_norm": 0.31176093220710754, |
| "learning_rate": 0.0005510096237970253, |
| "loss": 3.5489, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.124183768656716, |
| "grad_norm": 0.3254378139972687, |
| "learning_rate": 0.0005508346456692913, |
| "loss": 3.5478, |
| "step": 14150 |
| }, |
| { |
| "epoch": 4.138759328358209, |
| "grad_norm": 0.32653385400772095, |
| "learning_rate": 0.0005506596675415573, |
| "loss": 3.5658, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.153334888059701, |
| "grad_norm": 0.3056170344352722, |
| "learning_rate": 0.0005504846894138232, |
| "loss": 3.5698, |
| "step": 14250 |
| }, |
| { |
| "epoch": 4.167910447761194, |
| "grad_norm": 0.3116225004196167, |
| "learning_rate": 0.0005503097112860892, |
| "loss": 3.562, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.182486007462686, |
| "grad_norm": 0.3115183413028717, |
| "learning_rate": 0.0005501347331583552, |
| "loss": 3.5626, |
| "step": 14350 |
| }, |
| { |
| "epoch": 4.197061567164179, |
| "grad_norm": 0.332135409116745, |
| "learning_rate": 0.0005499597550306212, |
| "loss": 3.5885, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.211637126865671, |
| "grad_norm": 0.30620288848876953, |
| "learning_rate": 0.0005497847769028871, |
| "loss": 3.5675, |
| "step": 14450 |
| }, |
| { |
| "epoch": 4.226212686567164, |
| "grad_norm": 0.3426160216331482, |
| "learning_rate": 0.0005496097987751531, |
| "loss": 3.5712, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.240788246268656, |
| "grad_norm": 0.3171519339084625, |
| "learning_rate": 0.0005494348206474191, |
| "loss": 3.5806, |
| "step": 14550 |
| }, |
| { |
| "epoch": 4.255363805970149, |
| "grad_norm": 0.30158156156539917, |
| "learning_rate": 0.0005492598425196851, |
| "loss": 3.5669, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.269939365671641, |
| "grad_norm": 0.31135430932044983, |
| "learning_rate": 0.000549084864391951, |
| "loss": 3.5638, |
| "step": 14650 |
| }, |
| { |
| "epoch": 4.284514925373134, |
| "grad_norm": 0.32262277603149414, |
| "learning_rate": 0.000548909886264217, |
| "loss": 3.5744, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.299090485074627, |
| "grad_norm": 0.33498984575271606, |
| "learning_rate": 0.000548734908136483, |
| "loss": 3.5667, |
| "step": 14750 |
| }, |
| { |
| "epoch": 4.313666044776119, |
| "grad_norm": 0.31680381298065186, |
| "learning_rate": 0.0005485599300087488, |
| "loss": 3.5725, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.328241604477612, |
| "grad_norm": 0.330207496881485, |
| "learning_rate": 0.0005483849518810148, |
| "loss": 3.5772, |
| "step": 14850 |
| }, |
| { |
| "epoch": 4.342817164179104, |
| "grad_norm": 0.31242871284484863, |
| "learning_rate": 0.0005482099737532808, |
| "loss": 3.5764, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.357392723880597, |
| "grad_norm": 0.30855998396873474, |
| "learning_rate": 0.0005480349956255468, |
| "loss": 3.5655, |
| "step": 14950 |
| }, |
| { |
| "epoch": 4.371968283582089, |
| "grad_norm": 0.3287501335144043, |
| "learning_rate": 0.0005478600174978127, |
| "loss": 3.5694, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.371968283582089, |
| "eval_accuracy": 0.3560954131498328, |
| "eval_loss": 3.6704261302948, |
| "eval_runtime": 180.8213, |
| "eval_samples_per_second": 91.953, |
| "eval_steps_per_second": 5.752, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.386543843283582, |
| "grad_norm": 0.3167351484298706, |
| "learning_rate": 0.0005476850393700787, |
| "loss": 3.5763, |
| "step": 15050 |
| }, |
| { |
| "epoch": 4.401119402985074, |
| "grad_norm": 0.31553417444229126, |
| "learning_rate": 0.0005475100612423447, |
| "loss": 3.5838, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.415694962686567, |
| "grad_norm": 0.3282777667045593, |
| "learning_rate": 0.0005473350831146106, |
| "loss": 3.585, |
| "step": 15150 |
| }, |
| { |
| "epoch": 4.43027052238806, |
| "grad_norm": 0.3105928897857666, |
| "learning_rate": 0.0005471601049868766, |
| "loss": 3.5828, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.444846082089552, |
| "grad_norm": 0.2984945476055145, |
| "learning_rate": 0.0005469851268591426, |
| "loss": 3.5808, |
| "step": 15250 |
| }, |
| { |
| "epoch": 4.459421641791045, |
| "grad_norm": 0.30299872159957886, |
| "learning_rate": 0.0005468101487314085, |
| "loss": 3.582, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.473997201492537, |
| "grad_norm": 0.3066897392272949, |
| "learning_rate": 0.0005466351706036745, |
| "loss": 3.5935, |
| "step": 15350 |
| }, |
| { |
| "epoch": 4.48857276119403, |
| "grad_norm": 0.30703720450401306, |
| "learning_rate": 0.0005464601924759405, |
| "loss": 3.5741, |
| "step": 15400 |
| }, |
| { |
| "epoch": 4.503148320895522, |
| "grad_norm": 0.31966084241867065, |
| "learning_rate": 0.0005462852143482064, |
| "loss": 3.5844, |
| "step": 15450 |
| }, |
| { |
| "epoch": 4.517723880597015, |
| "grad_norm": 0.3157017230987549, |
| "learning_rate": 0.0005461102362204724, |
| "loss": 3.5834, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.532299440298507, |
| "grad_norm": 0.31914374232292175, |
| "learning_rate": 0.0005459352580927384, |
| "loss": 3.5824, |
| "step": 15550 |
| }, |
| { |
| "epoch": 4.546875, |
| "grad_norm": 0.3196133077144623, |
| "learning_rate": 0.0005457602799650043, |
| "loss": 3.5838, |
| "step": 15600 |
| }, |
| { |
| "epoch": 4.561450559701493, |
| "grad_norm": 0.3134230971336365, |
| "learning_rate": 0.0005455853018372703, |
| "loss": 3.5862, |
| "step": 15650 |
| }, |
| { |
| "epoch": 4.576026119402985, |
| "grad_norm": 0.2948553264141083, |
| "learning_rate": 0.0005454103237095363, |
| "loss": 3.5771, |
| "step": 15700 |
| }, |
| { |
| "epoch": 4.590601679104478, |
| "grad_norm": 0.31631046533584595, |
| "learning_rate": 0.0005452353455818022, |
| "loss": 3.6015, |
| "step": 15750 |
| }, |
| { |
| "epoch": 4.60517723880597, |
| "grad_norm": 0.3305026590824127, |
| "learning_rate": 0.0005450603674540681, |
| "loss": 3.5892, |
| "step": 15800 |
| }, |
| { |
| "epoch": 4.619752798507463, |
| "grad_norm": 0.3080248534679413, |
| "learning_rate": 0.0005448853893263341, |
| "loss": 3.5883, |
| "step": 15850 |
| }, |
| { |
| "epoch": 4.634328358208955, |
| "grad_norm": 0.317914754152298, |
| "learning_rate": 0.0005447104111986001, |
| "loss": 3.5963, |
| "step": 15900 |
| }, |
| { |
| "epoch": 4.648903917910448, |
| "grad_norm": 0.30993831157684326, |
| "learning_rate": 0.000544535433070866, |
| "loss": 3.5872, |
| "step": 15950 |
| }, |
| { |
| "epoch": 4.66347947761194, |
| "grad_norm": 0.32297852635383606, |
| "learning_rate": 0.000544360454943132, |
| "loss": 3.5998, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.66347947761194, |
| "eval_accuracy": 0.35684573119641183, |
| "eval_loss": 3.659557819366455, |
| "eval_runtime": 197.5912, |
| "eval_samples_per_second": 84.148, |
| "eval_steps_per_second": 5.263, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.678055037313433, |
| "grad_norm": 0.3044687509536743, |
| "learning_rate": 0.000544185476815398, |
| "loss": 3.5989, |
| "step": 16050 |
| }, |
| { |
| "epoch": 4.692630597014926, |
| "grad_norm": 0.3438224792480469, |
| "learning_rate": 0.000544010498687664, |
| "loss": 3.5888, |
| "step": 16100 |
| }, |
| { |
| "epoch": 4.707206156716418, |
| "grad_norm": 0.3074466288089752, |
| "learning_rate": 0.0005438355205599299, |
| "loss": 3.5969, |
| "step": 16150 |
| }, |
| { |
| "epoch": 4.721781716417911, |
| "grad_norm": 0.3279782235622406, |
| "learning_rate": 0.0005436605424321959, |
| "loss": 3.5828, |
| "step": 16200 |
| }, |
| { |
| "epoch": 4.736357276119403, |
| "grad_norm": 0.3192928731441498, |
| "learning_rate": 0.0005434855643044619, |
| "loss": 3.586, |
| "step": 16250 |
| }, |
| { |
| "epoch": 4.750932835820896, |
| "grad_norm": 0.3126852810382843, |
| "learning_rate": 0.0005433105861767279, |
| "loss": 3.5905, |
| "step": 16300 |
| }, |
| { |
| "epoch": 4.765508395522388, |
| "grad_norm": 0.3209400177001953, |
| "learning_rate": 0.0005431356080489938, |
| "loss": 3.5821, |
| "step": 16350 |
| }, |
| { |
| "epoch": 4.780083955223881, |
| "grad_norm": 0.31030383706092834, |
| "learning_rate": 0.0005429606299212598, |
| "loss": 3.5863, |
| "step": 16400 |
| }, |
| { |
| "epoch": 4.794659514925373, |
| "grad_norm": 0.29958170652389526, |
| "learning_rate": 0.0005427856517935258, |
| "loss": 3.5929, |
| "step": 16450 |
| }, |
| { |
| "epoch": 4.809235074626866, |
| "grad_norm": 0.3295001685619354, |
| "learning_rate": 0.0005426106736657917, |
| "loss": 3.5957, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.823810634328359, |
| "grad_norm": 0.32404083013534546, |
| "learning_rate": 0.0005424356955380577, |
| "loss": 3.5826, |
| "step": 16550 |
| }, |
| { |
| "epoch": 4.838386194029851, |
| "grad_norm": 0.33932703733444214, |
| "learning_rate": 0.0005422607174103237, |
| "loss": 3.5873, |
| "step": 16600 |
| }, |
| { |
| "epoch": 4.852961753731344, |
| "grad_norm": 0.3206491470336914, |
| "learning_rate": 0.0005420857392825897, |
| "loss": 3.5869, |
| "step": 16650 |
| }, |
| { |
| "epoch": 4.867537313432836, |
| "grad_norm": 0.3008648157119751, |
| "learning_rate": 0.0005419107611548556, |
| "loss": 3.576, |
| "step": 16700 |
| }, |
| { |
| "epoch": 4.882112873134329, |
| "grad_norm": 0.3127002716064453, |
| "learning_rate": 0.0005417357830271216, |
| "loss": 3.5996, |
| "step": 16750 |
| }, |
| { |
| "epoch": 4.896688432835821, |
| "grad_norm": 0.315496563911438, |
| "learning_rate": 0.0005415608048993876, |
| "loss": 3.5802, |
| "step": 16800 |
| }, |
| { |
| "epoch": 4.911263992537314, |
| "grad_norm": 0.3381385803222656, |
| "learning_rate": 0.0005413858267716535, |
| "loss": 3.5946, |
| "step": 16850 |
| }, |
| { |
| "epoch": 4.925839552238806, |
| "grad_norm": 0.314251571893692, |
| "learning_rate": 0.0005412108486439194, |
| "loss": 3.5767, |
| "step": 16900 |
| }, |
| { |
| "epoch": 4.940415111940299, |
| "grad_norm": 0.3200497329235077, |
| "learning_rate": 0.0005410358705161854, |
| "loss": 3.589, |
| "step": 16950 |
| }, |
| { |
| "epoch": 4.9549906716417915, |
| "grad_norm": 0.29789999127388, |
| "learning_rate": 0.0005408608923884514, |
| "loss": 3.5871, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.9549906716417915, |
| "eval_accuracy": 0.35848654435521315, |
| "eval_loss": 3.642368793487549, |
| "eval_runtime": 181.4525, |
| "eval_samples_per_second": 91.633, |
| "eval_steps_per_second": 5.732, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.969566231343284, |
| "grad_norm": 0.29378968477249146, |
| "learning_rate": 0.0005406859142607174, |
| "loss": 3.5946, |
| "step": 17050 |
| }, |
| { |
| "epoch": 4.9841417910447765, |
| "grad_norm": 0.29139405488967896, |
| "learning_rate": 0.0005405109361329833, |
| "loss": 3.5816, |
| "step": 17100 |
| }, |
| { |
| "epoch": 4.9987173507462686, |
| "grad_norm": 0.3059278130531311, |
| "learning_rate": 0.0005403359580052493, |
| "loss": 3.5769, |
| "step": 17150 |
| }, |
| { |
| "epoch": 5.013118003731344, |
| "grad_norm": 0.3037010133266449, |
| "learning_rate": 0.0005401609798775153, |
| "loss": 3.4897, |
| "step": 17200 |
| }, |
| { |
| "epoch": 5.027693563432836, |
| "grad_norm": 0.32655635476112366, |
| "learning_rate": 0.0005399860017497813, |
| "loss": 3.479, |
| "step": 17250 |
| }, |
| { |
| "epoch": 5.042269123134329, |
| "grad_norm": 0.3251640796661377, |
| "learning_rate": 0.0005398110236220472, |
| "loss": 3.4859, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.056844682835821, |
| "grad_norm": 0.31693679094314575, |
| "learning_rate": 0.0005396360454943132, |
| "loss": 3.4711, |
| "step": 17350 |
| }, |
| { |
| "epoch": 5.071420242537314, |
| "grad_norm": 0.32700759172439575, |
| "learning_rate": 0.0005394610673665792, |
| "loss": 3.481, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.085995802238806, |
| "grad_norm": 0.3072807192802429, |
| "learning_rate": 0.0005392860892388451, |
| "loss": 3.5028, |
| "step": 17450 |
| }, |
| { |
| "epoch": 5.100571361940299, |
| "grad_norm": 0.3221571743488312, |
| "learning_rate": 0.0005391111111111111, |
| "loss": 3.4889, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.115146921641791, |
| "grad_norm": 0.3130948841571808, |
| "learning_rate": 0.0005389361329833771, |
| "loss": 3.5, |
| "step": 17550 |
| }, |
| { |
| "epoch": 5.129722481343284, |
| "grad_norm": 0.31084439158439636, |
| "learning_rate": 0.000538761154855643, |
| "loss": 3.5014, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.1442980410447765, |
| "grad_norm": 0.3255762457847595, |
| "learning_rate": 0.000538586176727909, |
| "loss": 3.5074, |
| "step": 17650 |
| }, |
| { |
| "epoch": 5.1588736007462686, |
| "grad_norm": 0.33458569645881653, |
| "learning_rate": 0.000538411198600175, |
| "loss": 3.5101, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.1734491604477615, |
| "grad_norm": 0.327533096075058, |
| "learning_rate": 0.0005382362204724409, |
| "loss": 3.5184, |
| "step": 17750 |
| }, |
| { |
| "epoch": 5.1880247201492535, |
| "grad_norm": 0.31040626764297485, |
| "learning_rate": 0.0005380612423447069, |
| "loss": 3.5054, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.2026002798507465, |
| "grad_norm": 0.3363685607910156, |
| "learning_rate": 0.0005378862642169729, |
| "loss": 3.4981, |
| "step": 17850 |
| }, |
| { |
| "epoch": 5.2171758395522385, |
| "grad_norm": 0.3423328995704651, |
| "learning_rate": 0.0005377112860892387, |
| "loss": 3.52, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.2317513992537314, |
| "grad_norm": 0.33025994896888733, |
| "learning_rate": 0.0005375363079615047, |
| "loss": 3.5243, |
| "step": 17950 |
| }, |
| { |
| "epoch": 5.2463269589552235, |
| "grad_norm": 0.33021610975265503, |
| "learning_rate": 0.0005373613298337707, |
| "loss": 3.4993, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.2463269589552235, |
| "eval_accuracy": 0.3589595683911663, |
| "eval_loss": 3.6441898345947266, |
| "eval_runtime": 180.4815, |
| "eval_samples_per_second": 92.126, |
| "eval_steps_per_second": 5.762, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.260902518656716, |
| "grad_norm": 0.3091402053833008, |
| "learning_rate": 0.0005371863517060366, |
| "loss": 3.5146, |
| "step": 18050 |
| }, |
| { |
| "epoch": 5.275478078358209, |
| "grad_norm": 0.32835206389427185, |
| "learning_rate": 0.0005370113735783026, |
| "loss": 3.5231, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.290053638059701, |
| "grad_norm": 0.3306668698787689, |
| "learning_rate": 0.0005368363954505686, |
| "loss": 3.5071, |
| "step": 18150 |
| }, |
| { |
| "epoch": 5.304629197761194, |
| "grad_norm": 0.34048157930374146, |
| "learning_rate": 0.0005366614173228346, |
| "loss": 3.5238, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.319204757462686, |
| "grad_norm": 0.31027457118034363, |
| "learning_rate": 0.0005364864391951005, |
| "loss": 3.5294, |
| "step": 18250 |
| }, |
| { |
| "epoch": 5.333780317164179, |
| "grad_norm": 0.3100549280643463, |
| "learning_rate": 0.0005363114610673665, |
| "loss": 3.518, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.348355876865671, |
| "grad_norm": 0.3276021182537079, |
| "learning_rate": 0.0005361364829396325, |
| "loss": 3.5219, |
| "step": 18350 |
| }, |
| { |
| "epoch": 5.362931436567164, |
| "grad_norm": 0.3327234983444214, |
| "learning_rate": 0.0005359615048118984, |
| "loss": 3.5043, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.377506996268656, |
| "grad_norm": 0.3122633993625641, |
| "learning_rate": 0.0005357865266841644, |
| "loss": 3.538, |
| "step": 18450 |
| }, |
| { |
| "epoch": 5.392082555970149, |
| "grad_norm": 0.34483712911605835, |
| "learning_rate": 0.0005356115485564304, |
| "loss": 3.5303, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.406658115671641, |
| "grad_norm": 0.34489697217941284, |
| "learning_rate": 0.0005354365704286964, |
| "loss": 3.5276, |
| "step": 18550 |
| }, |
| { |
| "epoch": 5.421233675373134, |
| "grad_norm": 0.31317785382270813, |
| "learning_rate": 0.0005352615923009623, |
| "loss": 3.5298, |
| "step": 18600 |
| }, |
| { |
| "epoch": 5.435809235074627, |
| "grad_norm": 0.30463266372680664, |
| "learning_rate": 0.0005350866141732283, |
| "loss": 3.5289, |
| "step": 18650 |
| }, |
| { |
| "epoch": 5.450384794776119, |
| "grad_norm": 0.3198157250881195, |
| "learning_rate": 0.0005349116360454943, |
| "loss": 3.5255, |
| "step": 18700 |
| }, |
| { |
| "epoch": 5.464960354477612, |
| "grad_norm": 0.3108980357646942, |
| "learning_rate": 0.0005347366579177603, |
| "loss": 3.5311, |
| "step": 18750 |
| }, |
| { |
| "epoch": 5.479535914179104, |
| "grad_norm": 0.32333433628082275, |
| "learning_rate": 0.0005345616797900262, |
| "loss": 3.5315, |
| "step": 18800 |
| }, |
| { |
| "epoch": 5.494111473880597, |
| "grad_norm": 0.32552671432495117, |
| "learning_rate": 0.0005343867016622922, |
| "loss": 3.5385, |
| "step": 18850 |
| }, |
| { |
| "epoch": 5.508687033582089, |
| "grad_norm": 0.321522980928421, |
| "learning_rate": 0.0005342117235345582, |
| "loss": 3.5297, |
| "step": 18900 |
| }, |
| { |
| "epoch": 5.523262593283582, |
| "grad_norm": 0.30180302262306213, |
| "learning_rate": 0.0005340367454068242, |
| "loss": 3.5229, |
| "step": 18950 |
| }, |
| { |
| "epoch": 5.537838152985074, |
| "grad_norm": 0.3097897469997406, |
| "learning_rate": 0.00053386176727909, |
| "loss": 3.5276, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.537838152985074, |
| "eval_accuracy": 0.35964044523814037, |
| "eval_loss": 3.637159824371338, |
| "eval_runtime": 179.7064, |
| "eval_samples_per_second": 92.523, |
| "eval_steps_per_second": 5.787, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.552413712686567, |
| "grad_norm": 0.31561505794525146, |
| "learning_rate": 0.000533686789151356, |
| "loss": 3.5307, |
| "step": 19050 |
| }, |
| { |
| "epoch": 5.56698927238806, |
| "grad_norm": 0.3239782452583313, |
| "learning_rate": 0.000533511811023622, |
| "loss": 3.5387, |
| "step": 19100 |
| }, |
| { |
| "epoch": 5.581564832089552, |
| "grad_norm": 0.3201083242893219, |
| "learning_rate": 0.000533336832895888, |
| "loss": 3.5297, |
| "step": 19150 |
| }, |
| { |
| "epoch": 5.596140391791045, |
| "grad_norm": 0.3185979425907135, |
| "learning_rate": 0.0005331618547681539, |
| "loss": 3.5493, |
| "step": 19200 |
| }, |
| { |
| "epoch": 5.610715951492537, |
| "grad_norm": 0.29556897282600403, |
| "learning_rate": 0.0005329868766404199, |
| "loss": 3.5341, |
| "step": 19250 |
| }, |
| { |
| "epoch": 5.62529151119403, |
| "grad_norm": 0.30993542075157166, |
| "learning_rate": 0.0005328118985126859, |
| "loss": 3.5413, |
| "step": 19300 |
| }, |
| { |
| "epoch": 5.639867070895522, |
| "grad_norm": 0.3120933175086975, |
| "learning_rate": 0.0005326369203849518, |
| "loss": 3.5441, |
| "step": 19350 |
| }, |
| { |
| "epoch": 5.654442630597015, |
| "grad_norm": 0.33338284492492676, |
| "learning_rate": 0.0005324619422572178, |
| "loss": 3.5467, |
| "step": 19400 |
| }, |
| { |
| "epoch": 5.669018190298507, |
| "grad_norm": 0.31648966670036316, |
| "learning_rate": 0.0005322869641294838, |
| "loss": 3.5423, |
| "step": 19450 |
| }, |
| { |
| "epoch": 5.68359375, |
| "grad_norm": 0.31063228845596313, |
| "learning_rate": 0.0005321119860017498, |
| "loss": 3.525, |
| "step": 19500 |
| }, |
| { |
| "epoch": 5.698169309701493, |
| "grad_norm": 0.31778037548065186, |
| "learning_rate": 0.0005319370078740157, |
| "loss": 3.5486, |
| "step": 19550 |
| }, |
| { |
| "epoch": 5.712744869402985, |
| "grad_norm": 0.3344435393810272, |
| "learning_rate": 0.0005317620297462817, |
| "loss": 3.534, |
| "step": 19600 |
| }, |
| { |
| "epoch": 5.727320429104478, |
| "grad_norm": 0.3090979754924774, |
| "learning_rate": 0.0005315870516185477, |
| "loss": 3.5415, |
| "step": 19650 |
| }, |
| { |
| "epoch": 5.74189598880597, |
| "grad_norm": 0.3059396743774414, |
| "learning_rate": 0.0005314120734908137, |
| "loss": 3.5331, |
| "step": 19700 |
| }, |
| { |
| "epoch": 5.756471548507463, |
| "grad_norm": 0.32924172282218933, |
| "learning_rate": 0.0005312370953630796, |
| "loss": 3.5382, |
| "step": 19750 |
| }, |
| { |
| "epoch": 5.771047108208955, |
| "grad_norm": 0.30785858631134033, |
| "learning_rate": 0.0005310621172353456, |
| "loss": 3.5335, |
| "step": 19800 |
| }, |
| { |
| "epoch": 5.785622667910448, |
| "grad_norm": 0.29037636518478394, |
| "learning_rate": 0.0005308871391076116, |
| "loss": 3.5379, |
| "step": 19850 |
| }, |
| { |
| "epoch": 5.80019822761194, |
| "grad_norm": 0.30217617750167847, |
| "learning_rate": 0.0005307121609798775, |
| "loss": 3.5226, |
| "step": 19900 |
| }, |
| { |
| "epoch": 5.814773787313433, |
| "grad_norm": 0.3155075013637543, |
| "learning_rate": 0.0005305371828521435, |
| "loss": 3.5429, |
| "step": 19950 |
| }, |
| { |
| "epoch": 5.829349347014926, |
| "grad_norm": 0.3263370096683502, |
| "learning_rate": 0.0005303622047244095, |
| "loss": 3.5397, |
| "step": 20000 |
| }, |
| { |
| "epoch": 5.829349347014926, |
| "eval_accuracy": 0.3607415001911987, |
| "eval_loss": 3.6235318183898926, |
| "eval_runtime": 180.9952, |
| "eval_samples_per_second": 91.864, |
| "eval_steps_per_second": 5.746, |
| "step": 20000 |
| }, |
| { |
| "epoch": 5.843924906716418, |
| "grad_norm": 0.3229924738407135, |
| "learning_rate": 0.0005301872265966753, |
| "loss": 3.5435, |
| "step": 20050 |
| }, |
| { |
| "epoch": 5.858500466417911, |
| "grad_norm": 0.31298452615737915, |
| "learning_rate": 0.0005300122484689413, |
| "loss": 3.5387, |
| "step": 20100 |
| }, |
| { |
| "epoch": 5.873076026119403, |
| "grad_norm": 0.31652334332466125, |
| "learning_rate": 0.0005298372703412073, |
| "loss": 3.5417, |
| "step": 20150 |
| }, |
| { |
| "epoch": 5.887651585820896, |
| "grad_norm": 0.32354360818862915, |
| "learning_rate": 0.0005296622922134732, |
| "loss": 3.5316, |
| "step": 20200 |
| }, |
| { |
| "epoch": 5.902227145522388, |
| "grad_norm": 0.2969135344028473, |
| "learning_rate": 0.0005294873140857392, |
| "loss": 3.5412, |
| "step": 20250 |
| }, |
| { |
| "epoch": 5.916802705223881, |
| "grad_norm": 0.3211456537246704, |
| "learning_rate": 0.0005293123359580052, |
| "loss": 3.5363, |
| "step": 20300 |
| }, |
| { |
| "epoch": 5.931378264925373, |
| "grad_norm": 0.32332107424736023, |
| "learning_rate": 0.0005291373578302711, |
| "loss": 3.5407, |
| "step": 20350 |
| }, |
| { |
| "epoch": 5.945953824626866, |
| "grad_norm": 0.32086077332496643, |
| "learning_rate": 0.0005289623797025371, |
| "loss": 3.5501, |
| "step": 20400 |
| }, |
| { |
| "epoch": 5.960529384328359, |
| "grad_norm": 0.3089979290962219, |
| "learning_rate": 0.0005287874015748031, |
| "loss": 3.5404, |
| "step": 20450 |
| }, |
| { |
| "epoch": 5.975104944029851, |
| "grad_norm": 0.30579036474227905, |
| "learning_rate": 0.000528612423447069, |
| "loss": 3.5325, |
| "step": 20500 |
| }, |
| { |
| "epoch": 5.989680503731344, |
| "grad_norm": 0.32269933819770813, |
| "learning_rate": 0.000528437445319335, |
| "loss": 3.5472, |
| "step": 20550 |
| }, |
| { |
| "epoch": 6.004081156716418, |
| "grad_norm": 0.30791178345680237, |
| "learning_rate": 0.000528262467191601, |
| "loss": 3.5089, |
| "step": 20600 |
| }, |
| { |
| "epoch": 6.018656716417911, |
| "grad_norm": 0.3126281797885895, |
| "learning_rate": 0.000528087489063867, |
| "loss": 3.4296, |
| "step": 20650 |
| }, |
| { |
| "epoch": 6.033232276119403, |
| "grad_norm": 0.3279401957988739, |
| "learning_rate": 0.0005279125109361329, |
| "loss": 3.436, |
| "step": 20700 |
| }, |
| { |
| "epoch": 6.047807835820896, |
| "grad_norm": 0.30899009108543396, |
| "learning_rate": 0.0005277375328083989, |
| "loss": 3.4333, |
| "step": 20750 |
| }, |
| { |
| "epoch": 6.062383395522388, |
| "grad_norm": 0.32034310698509216, |
| "learning_rate": 0.0005275625546806649, |
| "loss": 3.439, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.076958955223881, |
| "grad_norm": 0.3171665370464325, |
| "learning_rate": 0.0005273875765529309, |
| "loss": 3.4449, |
| "step": 20850 |
| }, |
| { |
| "epoch": 6.091534514925373, |
| "grad_norm": 0.3259046971797943, |
| "learning_rate": 0.0005272125984251968, |
| "loss": 3.4499, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.106110074626866, |
| "grad_norm": 0.3260829448699951, |
| "learning_rate": 0.0005270376202974628, |
| "loss": 3.465, |
| "step": 20950 |
| }, |
| { |
| "epoch": 6.120685634328359, |
| "grad_norm": 0.30281195044517517, |
| "learning_rate": 0.0005268626421697288, |
| "loss": 3.4575, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.120685634328359, |
| "eval_accuracy": 0.3610256206248366, |
| "eval_loss": 3.628716468811035, |
| "eval_runtime": 180.6094, |
| "eval_samples_per_second": 92.061, |
| "eval_steps_per_second": 5.758, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.135261194029851, |
| "grad_norm": 0.32482925057411194, |
| "learning_rate": 0.0005266876640419946, |
| "loss": 3.46, |
| "step": 21050 |
| }, |
| { |
| "epoch": 6.149836753731344, |
| "grad_norm": 0.33268213272094727, |
| "learning_rate": 0.0005265126859142606, |
| "loss": 3.4493, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.164412313432836, |
| "grad_norm": 0.3173036575317383, |
| "learning_rate": 0.0005263377077865266, |
| "loss": 3.4723, |
| "step": 21150 |
| }, |
| { |
| "epoch": 6.178987873134329, |
| "grad_norm": 0.3229723870754242, |
| "learning_rate": 0.0005261627296587926, |
| "loss": 3.471, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.193563432835821, |
| "grad_norm": 0.3389780521392822, |
| "learning_rate": 0.0005259877515310585, |
| "loss": 3.4613, |
| "step": 21250 |
| }, |
| { |
| "epoch": 6.208138992537314, |
| "grad_norm": 0.31343021988868713, |
| "learning_rate": 0.0005258127734033245, |
| "loss": 3.4633, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.222714552238806, |
| "grad_norm": 0.31590187549591064, |
| "learning_rate": 0.0005256377952755905, |
| "loss": 3.4774, |
| "step": 21350 |
| }, |
| { |
| "epoch": 6.237290111940299, |
| "grad_norm": 0.3065197765827179, |
| "learning_rate": 0.0005254628171478565, |
| "loss": 3.4778, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.251865671641791, |
| "grad_norm": 0.3195898234844208, |
| "learning_rate": 0.0005252878390201224, |
| "loss": 3.4793, |
| "step": 21450 |
| }, |
| { |
| "epoch": 6.266441231343284, |
| "grad_norm": 0.30886998772621155, |
| "learning_rate": 0.0005251128608923884, |
| "loss": 3.4665, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.2810167910447765, |
| "grad_norm": 0.3227124512195587, |
| "learning_rate": 0.0005249378827646544, |
| "loss": 3.472, |
| "step": 21550 |
| }, |
| { |
| "epoch": 6.2955923507462686, |
| "grad_norm": 0.31510311365127563, |
| "learning_rate": 0.0005247629046369204, |
| "loss": 3.4859, |
| "step": 21600 |
| }, |
| { |
| "epoch": 6.3101679104477615, |
| "grad_norm": 0.32103273272514343, |
| "learning_rate": 0.0005245879265091863, |
| "loss": 3.4858, |
| "step": 21650 |
| }, |
| { |
| "epoch": 6.3247434701492535, |
| "grad_norm": 0.3214368522167206, |
| "learning_rate": 0.0005244129483814523, |
| "loss": 3.4829, |
| "step": 21700 |
| }, |
| { |
| "epoch": 6.3393190298507465, |
| "grad_norm": 0.3349897563457489, |
| "learning_rate": 0.0005242379702537183, |
| "loss": 3.4755, |
| "step": 21750 |
| }, |
| { |
| "epoch": 6.3538945895522385, |
| "grad_norm": 0.35362866520881653, |
| "learning_rate": 0.0005240629921259843, |
| "loss": 3.4807, |
| "step": 21800 |
| }, |
| { |
| "epoch": 6.3684701492537314, |
| "grad_norm": 0.34084802865982056, |
| "learning_rate": 0.0005238880139982502, |
| "loss": 3.479, |
| "step": 21850 |
| }, |
| { |
| "epoch": 6.3830457089552235, |
| "grad_norm": 0.31975167989730835, |
| "learning_rate": 0.0005237130358705162, |
| "loss": 3.4599, |
| "step": 21900 |
| }, |
| { |
| "epoch": 6.397621268656716, |
| "grad_norm": 0.3405190408229828, |
| "learning_rate": 0.0005235380577427822, |
| "loss": 3.4888, |
| "step": 21950 |
| }, |
| { |
| "epoch": 6.412196828358209, |
| "grad_norm": 0.3264370858669281, |
| "learning_rate": 0.0005233630796150481, |
| "loss": 3.4711, |
| "step": 22000 |
| }, |
| { |
| "epoch": 6.412196828358209, |
| "eval_accuracy": 0.3617952409709669, |
| "eval_loss": 3.6189749240875244, |
| "eval_runtime": 180.5749, |
| "eval_samples_per_second": 92.078, |
| "eval_steps_per_second": 5.759, |
| "step": 22000 |
| }, |
| { |
| "epoch": 6.426772388059701, |
| "grad_norm": 0.31837818026542664, |
| "learning_rate": 0.0005231881014873141, |
| "loss": 3.487, |
| "step": 22050 |
| }, |
| { |
| "epoch": 6.441347947761194, |
| "grad_norm": 0.3304750323295593, |
| "learning_rate": 0.00052301312335958, |
| "loss": 3.4812, |
| "step": 22100 |
| }, |
| { |
| "epoch": 6.455923507462686, |
| "grad_norm": 0.3154893219470978, |
| "learning_rate": 0.0005228381452318459, |
| "loss": 3.4939, |
| "step": 22150 |
| }, |
| { |
| "epoch": 6.470499067164179, |
| "grad_norm": 0.30578863620758057, |
| "learning_rate": 0.0005226631671041119, |
| "loss": 3.4839, |
| "step": 22200 |
| }, |
| { |
| "epoch": 6.485074626865671, |
| "grad_norm": 0.3136824667453766, |
| "learning_rate": 0.0005224881889763779, |
| "loss": 3.482, |
| "step": 22250 |
| }, |
| { |
| "epoch": 6.499650186567164, |
| "grad_norm": 0.3263259828090668, |
| "learning_rate": 0.0005223132108486439, |
| "loss": 3.4907, |
| "step": 22300 |
| }, |
| { |
| "epoch": 6.514225746268656, |
| "grad_norm": 0.3149360716342926, |
| "learning_rate": 0.0005221382327209098, |
| "loss": 3.4945, |
| "step": 22350 |
| }, |
| { |
| "epoch": 6.528801305970149, |
| "grad_norm": 0.3265800476074219, |
| "learning_rate": 0.0005219632545931758, |
| "loss": 3.4851, |
| "step": 22400 |
| }, |
| { |
| "epoch": 6.543376865671641, |
| "grad_norm": 0.3209088146686554, |
| "learning_rate": 0.0005217882764654418, |
| "loss": 3.5038, |
| "step": 22450 |
| }, |
| { |
| "epoch": 6.557952425373134, |
| "grad_norm": 0.318444162607193, |
| "learning_rate": 0.0005216132983377077, |
| "loss": 3.4979, |
| "step": 22500 |
| }, |
| { |
| "epoch": 6.572527985074627, |
| "grad_norm": 0.3191940188407898, |
| "learning_rate": 0.0005214383202099737, |
| "loss": 3.4907, |
| "step": 22550 |
| }, |
| { |
| "epoch": 6.587103544776119, |
| "grad_norm": 0.32639753818511963, |
| "learning_rate": 0.0005212633420822397, |
| "loss": 3.4945, |
| "step": 22600 |
| }, |
| { |
| "epoch": 6.601679104477612, |
| "grad_norm": 0.3260699510574341, |
| "learning_rate": 0.0005210883639545056, |
| "loss": 3.4953, |
| "step": 22650 |
| }, |
| { |
| "epoch": 6.616254664179104, |
| "grad_norm": 0.32714834809303284, |
| "learning_rate": 0.0005209133858267716, |
| "loss": 3.4973, |
| "step": 22700 |
| }, |
| { |
| "epoch": 6.630830223880597, |
| "grad_norm": 0.3026060163974762, |
| "learning_rate": 0.0005207384076990376, |
| "loss": 3.4976, |
| "step": 22750 |
| }, |
| { |
| "epoch": 6.645405783582089, |
| "grad_norm": 0.32032445073127747, |
| "learning_rate": 0.0005205634295713035, |
| "loss": 3.5048, |
| "step": 22800 |
| }, |
| { |
| "epoch": 6.659981343283582, |
| "grad_norm": 0.32959914207458496, |
| "learning_rate": 0.0005203884514435695, |
| "loss": 3.4923, |
| "step": 22850 |
| }, |
| { |
| "epoch": 6.674556902985074, |
| "grad_norm": 0.33499661087989807, |
| "learning_rate": 0.0005202134733158355, |
| "loss": 3.5019, |
| "step": 22900 |
| }, |
| { |
| "epoch": 6.689132462686567, |
| "grad_norm": 0.3219582140445709, |
| "learning_rate": 0.0005200384951881014, |
| "loss": 3.4957, |
| "step": 22950 |
| }, |
| { |
| "epoch": 6.70370802238806, |
| "grad_norm": 0.31592950224876404, |
| "learning_rate": 0.0005198635170603674, |
| "loss": 3.4998, |
| "step": 23000 |
| }, |
| { |
| "epoch": 6.70370802238806, |
| "eval_accuracy": 0.36258804761594826, |
| "eval_loss": 3.609095573425293, |
| "eval_runtime": 179.2047, |
| "eval_samples_per_second": 92.782, |
| "eval_steps_per_second": 5.803, |
| "step": 23000 |
| }, |
| { |
| "epoch": 6.718283582089552, |
| "grad_norm": 0.3130515217781067, |
| "learning_rate": 0.0005196885389326334, |
| "loss": 3.5013, |
| "step": 23050 |
| }, |
| { |
| "epoch": 6.732859141791045, |
| "grad_norm": 0.3212002217769623, |
| "learning_rate": 0.0005195135608048994, |
| "loss": 3.4966, |
| "step": 23100 |
| }, |
| { |
| "epoch": 6.747434701492537, |
| "grad_norm": 0.34351035952568054, |
| "learning_rate": 0.0005193385826771652, |
| "loss": 3.4956, |
| "step": 23150 |
| }, |
| { |
| "epoch": 6.76201026119403, |
| "grad_norm": 0.3154422640800476, |
| "learning_rate": 0.0005191636045494312, |
| "loss": 3.493, |
| "step": 23200 |
| }, |
| { |
| "epoch": 6.776585820895522, |
| "grad_norm": 0.3303772211074829, |
| "learning_rate": 0.0005189886264216972, |
| "loss": 3.4903, |
| "step": 23250 |
| }, |
| { |
| "epoch": 6.791161380597015, |
| "grad_norm": 0.3244941830635071, |
| "learning_rate": 0.0005188136482939632, |
| "loss": 3.4991, |
| "step": 23300 |
| }, |
| { |
| "epoch": 6.805736940298507, |
| "grad_norm": 0.3155946135520935, |
| "learning_rate": 0.0005186386701662291, |
| "loss": 3.4911, |
| "step": 23350 |
| }, |
| { |
| "epoch": 6.8203125, |
| "grad_norm": 0.31832653284072876, |
| "learning_rate": 0.0005184636920384951, |
| "loss": 3.4976, |
| "step": 23400 |
| }, |
| { |
| "epoch": 6.834888059701493, |
| "grad_norm": 0.3354407846927643, |
| "learning_rate": 0.0005182887139107611, |
| "loss": 3.4912, |
| "step": 23450 |
| }, |
| { |
| "epoch": 6.849463619402985, |
| "grad_norm": 0.3141067326068878, |
| "learning_rate": 0.0005181137357830271, |
| "loss": 3.4983, |
| "step": 23500 |
| }, |
| { |
| "epoch": 6.864039179104478, |
| "grad_norm": 0.31598249077796936, |
| "learning_rate": 0.000517938757655293, |
| "loss": 3.501, |
| "step": 23550 |
| }, |
| { |
| "epoch": 6.87861473880597, |
| "grad_norm": 0.3097233772277832, |
| "learning_rate": 0.000517763779527559, |
| "loss": 3.5097, |
| "step": 23600 |
| }, |
| { |
| "epoch": 6.893190298507463, |
| "grad_norm": 0.3032672107219696, |
| "learning_rate": 0.000517588801399825, |
| "loss": 3.4993, |
| "step": 23650 |
| }, |
| { |
| "epoch": 6.907765858208955, |
| "grad_norm": 0.3242533802986145, |
| "learning_rate": 0.0005174138232720909, |
| "loss": 3.5084, |
| "step": 23700 |
| }, |
| { |
| "epoch": 6.922341417910448, |
| "grad_norm": 0.30605459213256836, |
| "learning_rate": 0.0005172388451443569, |
| "loss": 3.5068, |
| "step": 23750 |
| }, |
| { |
| "epoch": 6.93691697761194, |
| "grad_norm": 0.31153604388237, |
| "learning_rate": 0.0005170638670166229, |
| "loss": 3.5068, |
| "step": 23800 |
| }, |
| { |
| "epoch": 6.951492537313433, |
| "grad_norm": 0.3352176547050476, |
| "learning_rate": 0.0005168888888888889, |
| "loss": 3.5053, |
| "step": 23850 |
| }, |
| { |
| "epoch": 6.966068097014926, |
| "grad_norm": 0.347098171710968, |
| "learning_rate": 0.0005167139107611548, |
| "loss": 3.4885, |
| "step": 23900 |
| }, |
| { |
| "epoch": 6.980643656716418, |
| "grad_norm": 0.31401047110557556, |
| "learning_rate": 0.0005165389326334208, |
| "loss": 3.5066, |
| "step": 23950 |
| }, |
| { |
| "epoch": 6.995219216417911, |
| "grad_norm": 0.327211856842041, |
| "learning_rate": 0.0005163639545056868, |
| "loss": 3.5047, |
| "step": 24000 |
| }, |
| { |
| "epoch": 6.995219216417911, |
| "eval_accuracy": 0.36335613790174826, |
| "eval_loss": 3.6004271507263184, |
| "eval_runtime": 185.5366, |
| "eval_samples_per_second": 89.616, |
| "eval_steps_per_second": 5.605, |
| "step": 24000 |
| }, |
| { |
| "epoch": 7.009619869402985, |
| "grad_norm": 0.30569273233413696, |
| "learning_rate": 0.0005161889763779528, |
| "loss": 3.428, |
| "step": 24050 |
| }, |
| { |
| "epoch": 7.024195429104478, |
| "grad_norm": 0.32120874524116516, |
| "learning_rate": 0.0005160139982502187, |
| "loss": 3.3881, |
| "step": 24100 |
| }, |
| { |
| "epoch": 7.03877098880597, |
| "grad_norm": 0.33526530861854553, |
| "learning_rate": 0.0005158390201224847, |
| "loss": 3.4098, |
| "step": 24150 |
| }, |
| { |
| "epoch": 7.053346548507463, |
| "grad_norm": 0.3269498348236084, |
| "learning_rate": 0.0005156640419947507, |
| "loss": 3.3953, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.067922108208955, |
| "grad_norm": 0.3256247639656067, |
| "learning_rate": 0.0005154890638670167, |
| "loss": 3.3965, |
| "step": 24250 |
| }, |
| { |
| "epoch": 7.082497667910448, |
| "grad_norm": 0.3173428177833557, |
| "learning_rate": 0.0005153140857392825, |
| "loss": 3.4165, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.09707322761194, |
| "grad_norm": 0.3311133086681366, |
| "learning_rate": 0.0005151391076115485, |
| "loss": 3.4155, |
| "step": 24350 |
| }, |
| { |
| "epoch": 7.111648787313433, |
| "grad_norm": 0.3398746848106384, |
| "learning_rate": 0.0005149641294838145, |
| "loss": 3.4204, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.126224347014926, |
| "grad_norm": 0.32222801446914673, |
| "learning_rate": 0.0005147891513560804, |
| "loss": 3.4071, |
| "step": 24450 |
| }, |
| { |
| "epoch": 7.140799906716418, |
| "grad_norm": 0.3478772044181824, |
| "learning_rate": 0.0005146141732283464, |
| "loss": 3.4268, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.155375466417911, |
| "grad_norm": 0.33055752515792847, |
| "learning_rate": 0.0005144391951006124, |
| "loss": 3.4221, |
| "step": 24550 |
| }, |
| { |
| "epoch": 7.169951026119403, |
| "grad_norm": 0.32589223980903625, |
| "learning_rate": 0.0005142642169728783, |
| "loss": 3.421, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.184526585820896, |
| "grad_norm": 0.30772513151168823, |
| "learning_rate": 0.0005140892388451443, |
| "loss": 3.4371, |
| "step": 24650 |
| }, |
| { |
| "epoch": 7.199102145522388, |
| "grad_norm": 0.32640331983566284, |
| "learning_rate": 0.0005139142607174103, |
| "loss": 3.4217, |
| "step": 24700 |
| }, |
| { |
| "epoch": 7.213677705223881, |
| "grad_norm": 0.3319501578807831, |
| "learning_rate": 0.0005137392825896762, |
| "loss": 3.4361, |
| "step": 24750 |
| }, |
| { |
| "epoch": 7.228253264925373, |
| "grad_norm": 0.323111891746521, |
| "learning_rate": 0.0005135643044619422, |
| "loss": 3.4226, |
| "step": 24800 |
| }, |
| { |
| "epoch": 7.242828824626866, |
| "grad_norm": 0.3399120569229126, |
| "learning_rate": 0.0005133893263342082, |
| "loss": 3.4465, |
| "step": 24850 |
| }, |
| { |
| "epoch": 7.257404384328359, |
| "grad_norm": 0.3136339485645294, |
| "learning_rate": 0.0005132143482064742, |
| "loss": 3.4355, |
| "step": 24900 |
| }, |
| { |
| "epoch": 7.271979944029851, |
| "grad_norm": 0.3228020668029785, |
| "learning_rate": 0.0005130393700787401, |
| "loss": 3.4474, |
| "step": 24950 |
| }, |
| { |
| "epoch": 7.286555503731344, |
| "grad_norm": 0.3219752311706543, |
| "learning_rate": 0.0005128643919510061, |
| "loss": 3.4571, |
| "step": 25000 |
| }, |
| { |
| "epoch": 7.286555503731344, |
| "eval_accuracy": 0.3633374240869394, |
| "eval_loss": 3.608869791030884, |
| "eval_runtime": 179.1593, |
| "eval_samples_per_second": 92.806, |
| "eval_steps_per_second": 5.805, |
| "step": 25000 |
| }, |
| { |
| "epoch": 7.301131063432836, |
| "grad_norm": 0.33547621965408325, |
| "learning_rate": 0.0005126894138232721, |
| "loss": 3.4439, |
| "step": 25050 |
| }, |
| { |
| "epoch": 7.315706623134329, |
| "grad_norm": 0.3228926956653595, |
| "learning_rate": 0.000512514435695538, |
| "loss": 3.425, |
| "step": 25100 |
| }, |
| { |
| "epoch": 7.330282182835821, |
| "grad_norm": 0.3242122530937195, |
| "learning_rate": 0.000512339457567804, |
| "loss": 3.4526, |
| "step": 25150 |
| }, |
| { |
| "epoch": 7.344857742537314, |
| "grad_norm": 0.32178714871406555, |
| "learning_rate": 0.00051216447944007, |
| "loss": 3.4547, |
| "step": 25200 |
| }, |
| { |
| "epoch": 7.359433302238806, |
| "grad_norm": 0.3509860932826996, |
| "learning_rate": 0.0005119895013123358, |
| "loss": 3.4411, |
| "step": 25250 |
| }, |
| { |
| "epoch": 7.374008861940299, |
| "grad_norm": 0.3500162661075592, |
| "learning_rate": 0.0005118145231846018, |
| "loss": 3.4378, |
| "step": 25300 |
| }, |
| { |
| "epoch": 7.388584421641791, |
| "grad_norm": 0.3226238191127777, |
| "learning_rate": 0.0005116395450568678, |
| "loss": 3.4573, |
| "step": 25350 |
| }, |
| { |
| "epoch": 7.403159981343284, |
| "grad_norm": 0.32828742265701294, |
| "learning_rate": 0.0005114645669291338, |
| "loss": 3.4468, |
| "step": 25400 |
| }, |
| { |
| "epoch": 7.4177355410447765, |
| "grad_norm": 0.3429689109325409, |
| "learning_rate": 0.0005112895888013997, |
| "loss": 3.4415, |
| "step": 25450 |
| }, |
| { |
| "epoch": 7.4323111007462686, |
| "grad_norm": 0.323826402425766, |
| "learning_rate": 0.0005111146106736657, |
| "loss": 3.4305, |
| "step": 25500 |
| }, |
| { |
| "epoch": 7.4468866604477615, |
| "grad_norm": 0.32293814420700073, |
| "learning_rate": 0.0005109396325459317, |
| "loss": 3.4567, |
| "step": 25550 |
| }, |
| { |
| "epoch": 7.4614622201492535, |
| "grad_norm": 0.32880374789237976, |
| "learning_rate": 0.0005107646544181976, |
| "loss": 3.4482, |
| "step": 25600 |
| }, |
| { |
| "epoch": 7.4760377798507465, |
| "grad_norm": 0.3458571135997772, |
| "learning_rate": 0.0005105896762904636, |
| "loss": 3.4586, |
| "step": 25650 |
| }, |
| { |
| "epoch": 7.4906133395522385, |
| "grad_norm": 0.3313862681388855, |
| "learning_rate": 0.0005104146981627296, |
| "loss": 3.4506, |
| "step": 25700 |
| }, |
| { |
| "epoch": 7.5051888992537314, |
| "grad_norm": 0.34324750304222107, |
| "learning_rate": 0.0005102397200349956, |
| "loss": 3.4432, |
| "step": 25750 |
| }, |
| { |
| "epoch": 7.5197644589552235, |
| "grad_norm": 0.3350330889225006, |
| "learning_rate": 0.0005100647419072615, |
| "loss": 3.4469, |
| "step": 25800 |
| }, |
| { |
| "epoch": 7.534340018656716, |
| "grad_norm": 0.340985506772995, |
| "learning_rate": 0.0005098897637795275, |
| "loss": 3.4555, |
| "step": 25850 |
| }, |
| { |
| "epoch": 7.5489155783582085, |
| "grad_norm": 0.3093026876449585, |
| "learning_rate": 0.0005097147856517935, |
| "loss": 3.4631, |
| "step": 25900 |
| }, |
| { |
| "epoch": 7.563491138059701, |
| "grad_norm": 0.33767765760421753, |
| "learning_rate": 0.0005095398075240595, |
| "loss": 3.4663, |
| "step": 25950 |
| }, |
| { |
| "epoch": 7.578066697761194, |
| "grad_norm": 0.3200538754463196, |
| "learning_rate": 0.0005093648293963254, |
| "loss": 3.4564, |
| "step": 26000 |
| }, |
| { |
| "epoch": 7.578066697761194, |
| "eval_accuracy": 0.3640933915870457, |
| "eval_loss": 3.600985288619995, |
| "eval_runtime": 179.1567, |
| "eval_samples_per_second": 92.807, |
| "eval_steps_per_second": 5.805, |
| "step": 26000 |
| }, |
| { |
| "epoch": 7.592642257462686, |
| "grad_norm": 0.3065427541732788, |
| "learning_rate": 0.0005091898512685914, |
| "loss": 3.4663, |
| "step": 26050 |
| }, |
| { |
| "epoch": 7.607217817164179, |
| "grad_norm": 0.34799593687057495, |
| "learning_rate": 0.0005090148731408574, |
| "loss": 3.4774, |
| "step": 26100 |
| }, |
| { |
| "epoch": 7.621793376865671, |
| "grad_norm": 0.31384772062301636, |
| "learning_rate": 0.0005088398950131234, |
| "loss": 3.4512, |
| "step": 26150 |
| }, |
| { |
| "epoch": 7.636368936567164, |
| "grad_norm": 0.3293142318725586, |
| "learning_rate": 0.0005086649168853893, |
| "loss": 3.4681, |
| "step": 26200 |
| }, |
| { |
| "epoch": 7.650944496268656, |
| "grad_norm": 0.3129211366176605, |
| "learning_rate": 0.0005084899387576553, |
| "loss": 3.4744, |
| "step": 26250 |
| }, |
| { |
| "epoch": 7.665520055970149, |
| "grad_norm": 0.33139219880104065, |
| "learning_rate": 0.0005083149606299213, |
| "loss": 3.4574, |
| "step": 26300 |
| }, |
| { |
| "epoch": 7.680095615671641, |
| "grad_norm": 0.3320215344429016, |
| "learning_rate": 0.0005081399825021873, |
| "loss": 3.4782, |
| "step": 26350 |
| }, |
| { |
| "epoch": 7.694671175373134, |
| "grad_norm": 0.32884448766708374, |
| "learning_rate": 0.0005079650043744531, |
| "loss": 3.46, |
| "step": 26400 |
| }, |
| { |
| "epoch": 7.709246735074627, |
| "grad_norm": 0.316210001707077, |
| "learning_rate": 0.0005077900262467191, |
| "loss": 3.4649, |
| "step": 26450 |
| }, |
| { |
| "epoch": 7.723822294776119, |
| "grad_norm": 0.3186090290546417, |
| "learning_rate": 0.0005076150481189851, |
| "loss": 3.4714, |
| "step": 26500 |
| }, |
| { |
| "epoch": 7.738397854477612, |
| "grad_norm": 0.29783856868743896, |
| "learning_rate": 0.000507440069991251, |
| "loss": 3.4626, |
| "step": 26550 |
| }, |
| { |
| "epoch": 7.752973414179104, |
| "grad_norm": 0.3331296145915985, |
| "learning_rate": 0.000507265091863517, |
| "loss": 3.4725, |
| "step": 26600 |
| }, |
| { |
| "epoch": 7.767548973880597, |
| "grad_norm": 0.3241812288761139, |
| "learning_rate": 0.000507090113735783, |
| "loss": 3.4666, |
| "step": 26650 |
| }, |
| { |
| "epoch": 7.782124533582089, |
| "grad_norm": 0.32092708349227905, |
| "learning_rate": 0.000506915135608049, |
| "loss": 3.4683, |
| "step": 26700 |
| }, |
| { |
| "epoch": 7.796700093283582, |
| "grad_norm": 0.3200468420982361, |
| "learning_rate": 0.0005067401574803149, |
| "loss": 3.4765, |
| "step": 26750 |
| }, |
| { |
| "epoch": 7.811275652985074, |
| "grad_norm": 0.30555614829063416, |
| "learning_rate": 0.0005065651793525809, |
| "loss": 3.4658, |
| "step": 26800 |
| }, |
| { |
| "epoch": 7.825851212686567, |
| "grad_norm": 0.3325377404689789, |
| "learning_rate": 0.0005063902012248469, |
| "loss": 3.4648, |
| "step": 26850 |
| }, |
| { |
| "epoch": 7.84042677238806, |
| "grad_norm": 0.32855042815208435, |
| "learning_rate": 0.0005062152230971128, |
| "loss": 3.4646, |
| "step": 26900 |
| }, |
| { |
| "epoch": 7.855002332089552, |
| "grad_norm": 0.3337857127189636, |
| "learning_rate": 0.0005060402449693788, |
| "loss": 3.4775, |
| "step": 26950 |
| }, |
| { |
| "epoch": 7.869577891791045, |
| "grad_norm": 0.33446067571640015, |
| "learning_rate": 0.0005058652668416448, |
| "loss": 3.4619, |
| "step": 27000 |
| }, |
| { |
| "epoch": 7.869577891791045, |
| "eval_accuracy": 0.3644832038804213, |
| "eval_loss": 3.592026472091675, |
| "eval_runtime": 179.258, |
| "eval_samples_per_second": 92.755, |
| "eval_steps_per_second": 5.802, |
| "step": 27000 |
| }, |
| { |
| "epoch": 7.884153451492537, |
| "grad_norm": 0.3174329400062561, |
| "learning_rate": 0.0005056902887139107, |
| "loss": 3.4611, |
| "step": 27050 |
| }, |
| { |
| "epoch": 7.89872901119403, |
| "grad_norm": 0.3485661745071411, |
| "learning_rate": 0.0005055153105861767, |
| "loss": 3.4636, |
| "step": 27100 |
| }, |
| { |
| "epoch": 7.913304570895522, |
| "grad_norm": 0.3340265154838562, |
| "learning_rate": 0.0005053403324584427, |
| "loss": 3.4708, |
| "step": 27150 |
| }, |
| { |
| "epoch": 7.927880130597015, |
| "grad_norm": 0.3334951102733612, |
| "learning_rate": 0.0005051653543307086, |
| "loss": 3.4747, |
| "step": 27200 |
| }, |
| { |
| "epoch": 7.942455690298507, |
| "grad_norm": 0.3194654583930969, |
| "learning_rate": 0.0005049903762029746, |
| "loss": 3.4683, |
| "step": 27250 |
| }, |
| { |
| "epoch": 7.95703125, |
| "grad_norm": 0.34018611907958984, |
| "learning_rate": 0.0005048153980752406, |
| "loss": 3.4781, |
| "step": 27300 |
| }, |
| { |
| "epoch": 7.971606809701493, |
| "grad_norm": 0.32004550099372864, |
| "learning_rate": 0.0005046404199475064, |
| "loss": 3.4654, |
| "step": 27350 |
| }, |
| { |
| "epoch": 7.986182369402985, |
| "grad_norm": 0.30326974391937256, |
| "learning_rate": 0.0005044654418197724, |
| "loss": 3.4611, |
| "step": 27400 |
| }, |
| { |
| "epoch": 8.00058302238806, |
| "grad_norm": 0.3574485182762146, |
| "learning_rate": 0.0005042904636920384, |
| "loss": 3.4766, |
| "step": 27450 |
| }, |
| { |
| "epoch": 8.015158582089553, |
| "grad_norm": 0.3087901771068573, |
| "learning_rate": 0.0005041154855643044, |
| "loss": 3.3484, |
| "step": 27500 |
| }, |
| { |
| "epoch": 8.029734141791044, |
| "grad_norm": 0.3568093180656433, |
| "learning_rate": 0.0005039405074365703, |
| "loss": 3.3668, |
| "step": 27550 |
| }, |
| { |
| "epoch": 8.044309701492537, |
| "grad_norm": 0.342636376619339, |
| "learning_rate": 0.0005037655293088363, |
| "loss": 3.3701, |
| "step": 27600 |
| }, |
| { |
| "epoch": 8.05888526119403, |
| "grad_norm": 0.3330060541629791, |
| "learning_rate": 0.0005035905511811023, |
| "loss": 3.3651, |
| "step": 27650 |
| }, |
| { |
| "epoch": 8.073460820895523, |
| "grad_norm": 0.3401563763618469, |
| "learning_rate": 0.0005034155730533682, |
| "loss": 3.3622, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.088036380597014, |
| "grad_norm": 0.32643309235572815, |
| "learning_rate": 0.0005032405949256342, |
| "loss": 3.3783, |
| "step": 27750 |
| }, |
| { |
| "epoch": 8.102611940298507, |
| "grad_norm": 0.35396072268486023, |
| "learning_rate": 0.0005030656167979002, |
| "loss": 3.3945, |
| "step": 27800 |
| }, |
| { |
| "epoch": 8.1171875, |
| "grad_norm": 0.32265105843544006, |
| "learning_rate": 0.0005028906386701662, |
| "loss": 3.3832, |
| "step": 27850 |
| }, |
| { |
| "epoch": 8.131763059701493, |
| "grad_norm": 0.32299792766571045, |
| "learning_rate": 0.0005027156605424321, |
| "loss": 3.3836, |
| "step": 27900 |
| }, |
| { |
| "epoch": 8.146338619402986, |
| "grad_norm": 0.3399452567100525, |
| "learning_rate": 0.0005025406824146981, |
| "loss": 3.3942, |
| "step": 27950 |
| }, |
| { |
| "epoch": 8.160914179104477, |
| "grad_norm": 0.3137241303920746, |
| "learning_rate": 0.0005023657042869641, |
| "loss": 3.3915, |
| "step": 28000 |
| }, |
| { |
| "epoch": 8.160914179104477, |
| "eval_accuracy": 0.3645743013185471, |
| "eval_loss": 3.599012613296509, |
| "eval_runtime": 179.2858, |
| "eval_samples_per_second": 92.74, |
| "eval_steps_per_second": 5.801, |
| "step": 28000 |
| }, |
| { |
| "epoch": 8.17548973880597, |
| "grad_norm": 0.3267982602119446, |
| "learning_rate": 0.0005021907261592301, |
| "loss": 3.3934, |
| "step": 28050 |
| }, |
| { |
| "epoch": 8.190065298507463, |
| "grad_norm": 0.3097693622112274, |
| "learning_rate": 0.000502015748031496, |
| "loss": 3.3953, |
| "step": 28100 |
| }, |
| { |
| "epoch": 8.204640858208956, |
| "grad_norm": 0.32897573709487915, |
| "learning_rate": 0.000501840769903762, |
| "loss": 3.4011, |
| "step": 28150 |
| }, |
| { |
| "epoch": 8.219216417910447, |
| "grad_norm": 0.3252919912338257, |
| "learning_rate": 0.000501665791776028, |
| "loss": 3.3996, |
| "step": 28200 |
| }, |
| { |
| "epoch": 8.23379197761194, |
| "grad_norm": 0.31856435537338257, |
| "learning_rate": 0.0005014908136482939, |
| "loss": 3.3998, |
| "step": 28250 |
| }, |
| { |
| "epoch": 8.248367537313433, |
| "grad_norm": 0.3390394449234009, |
| "learning_rate": 0.0005013158355205599, |
| "loss": 3.3888, |
| "step": 28300 |
| }, |
| { |
| "epoch": 8.262943097014926, |
| "grad_norm": 0.340518593788147, |
| "learning_rate": 0.0005011408573928259, |
| "loss": 3.4207, |
| "step": 28350 |
| }, |
| { |
| "epoch": 8.277518656716419, |
| "grad_norm": 0.323466956615448, |
| "learning_rate": 0.0005009658792650919, |
| "loss": 3.4109, |
| "step": 28400 |
| }, |
| { |
| "epoch": 8.29209421641791, |
| "grad_norm": 0.3533046245574951, |
| "learning_rate": 0.0005007909011373577, |
| "loss": 3.4048, |
| "step": 28450 |
| }, |
| { |
| "epoch": 8.306669776119403, |
| "grad_norm": 0.33783796429634094, |
| "learning_rate": 0.0005006159230096237, |
| "loss": 3.4087, |
| "step": 28500 |
| }, |
| { |
| "epoch": 8.321245335820896, |
| "grad_norm": 0.36026251316070557, |
| "learning_rate": 0.0005004409448818897, |
| "loss": 3.4204, |
| "step": 28550 |
| }, |
| { |
| "epoch": 8.335820895522389, |
| "grad_norm": 0.3321758508682251, |
| "learning_rate": 0.0005002659667541557, |
| "loss": 3.4211, |
| "step": 28600 |
| }, |
| { |
| "epoch": 8.35039645522388, |
| "grad_norm": 0.33608278632164, |
| "learning_rate": 0.0005000909886264216, |
| "loss": 3.4177, |
| "step": 28650 |
| }, |
| { |
| "epoch": 8.364972014925373, |
| "grad_norm": 0.3608180582523346, |
| "learning_rate": 0.0004999160104986876, |
| "loss": 3.4184, |
| "step": 28700 |
| }, |
| { |
| "epoch": 8.379547574626866, |
| "grad_norm": 0.3502536714076996, |
| "learning_rate": 0.0004997410323709536, |
| "loss": 3.4156, |
| "step": 28750 |
| }, |
| { |
| "epoch": 8.394123134328359, |
| "grad_norm": 0.3335579037666321, |
| "learning_rate": 0.0004995660542432196, |
| "loss": 3.4176, |
| "step": 28800 |
| }, |
| { |
| "epoch": 8.408698694029852, |
| "grad_norm": 0.31768515706062317, |
| "learning_rate": 0.0004993910761154855, |
| "loss": 3.4304, |
| "step": 28850 |
| }, |
| { |
| "epoch": 8.423274253731343, |
| "grad_norm": 0.3465861976146698, |
| "learning_rate": 0.0004992160979877515, |
| "loss": 3.4346, |
| "step": 28900 |
| }, |
| { |
| "epoch": 8.437849813432836, |
| "grad_norm": 0.33403196930885315, |
| "learning_rate": 0.0004990411198600175, |
| "loss": 3.4151, |
| "step": 28950 |
| }, |
| { |
| "epoch": 8.452425373134329, |
| "grad_norm": 0.3128868341445923, |
| "learning_rate": 0.0004988661417322835, |
| "loss": 3.4238, |
| "step": 29000 |
| }, |
| { |
| "epoch": 8.452425373134329, |
| "eval_accuracy": 0.3648340584838491, |
| "eval_loss": 3.5954160690307617, |
| "eval_runtime": 179.3003, |
| "eval_samples_per_second": 92.733, |
| "eval_steps_per_second": 5.8, |
| "step": 29000 |
| }, |
| { |
| "epoch": 8.467000932835822, |
| "grad_norm": 0.331660658121109, |
| "learning_rate": 0.0004986911636045494, |
| "loss": 3.425, |
| "step": 29050 |
| }, |
| { |
| "epoch": 8.481576492537313, |
| "grad_norm": 0.3326253294944763, |
| "learning_rate": 0.0004985161854768154, |
| "loss": 3.4186, |
| "step": 29100 |
| }, |
| { |
| "epoch": 8.496152052238806, |
| "grad_norm": 0.32247307896614075, |
| "learning_rate": 0.0004983412073490814, |
| "loss": 3.4258, |
| "step": 29150 |
| }, |
| { |
| "epoch": 8.510727611940299, |
| "grad_norm": 0.3508327901363373, |
| "learning_rate": 0.0004981662292213473, |
| "loss": 3.422, |
| "step": 29200 |
| }, |
| { |
| "epoch": 8.525303171641792, |
| "grad_norm": 0.33560535311698914, |
| "learning_rate": 0.0004979912510936133, |
| "loss": 3.422, |
| "step": 29250 |
| }, |
| { |
| "epoch": 8.539878731343283, |
| "grad_norm": 0.3057290017604828, |
| "learning_rate": 0.0004978162729658793, |
| "loss": 3.4313, |
| "step": 29300 |
| }, |
| { |
| "epoch": 8.554454291044776, |
| "grad_norm": 0.327250212430954, |
| "learning_rate": 0.0004976412948381452, |
| "loss": 3.4276, |
| "step": 29350 |
| }, |
| { |
| "epoch": 8.569029850746269, |
| "grad_norm": 0.32968395948410034, |
| "learning_rate": 0.0004974663167104112, |
| "loss": 3.4331, |
| "step": 29400 |
| }, |
| { |
| "epoch": 8.583605410447761, |
| "grad_norm": 0.32354989647865295, |
| "learning_rate": 0.0004972913385826772, |
| "loss": 3.426, |
| "step": 29450 |
| }, |
| { |
| "epoch": 8.598180970149254, |
| "grad_norm": 0.3291105031967163, |
| "learning_rate": 0.000497116360454943, |
| "loss": 3.4402, |
| "step": 29500 |
| }, |
| { |
| "epoch": 8.612756529850746, |
| "grad_norm": 0.329121470451355, |
| "learning_rate": 0.000496941382327209, |
| "loss": 3.4417, |
| "step": 29550 |
| }, |
| { |
| "epoch": 8.627332089552239, |
| "grad_norm": 0.3463682532310486, |
| "learning_rate": 0.000496766404199475, |
| "loss": 3.4281, |
| "step": 29600 |
| }, |
| { |
| "epoch": 8.641907649253731, |
| "grad_norm": 0.32956844568252563, |
| "learning_rate": 0.0004965914260717409, |
| "loss": 3.4311, |
| "step": 29650 |
| }, |
| { |
| "epoch": 8.656483208955224, |
| "grad_norm": 0.305973619222641, |
| "learning_rate": 0.0004964164479440069, |
| "loss": 3.4449, |
| "step": 29700 |
| }, |
| { |
| "epoch": 8.671058768656717, |
| "grad_norm": 0.32016459107398987, |
| "learning_rate": 0.0004962414698162729, |
| "loss": 3.4306, |
| "step": 29750 |
| }, |
| { |
| "epoch": 8.685634328358208, |
| "grad_norm": 0.3539639711380005, |
| "learning_rate": 0.0004960664916885388, |
| "loss": 3.4281, |
| "step": 29800 |
| }, |
| { |
| "epoch": 8.700209888059701, |
| "grad_norm": 0.3156653046607971, |
| "learning_rate": 0.0004958915135608048, |
| "loss": 3.4358, |
| "step": 29850 |
| }, |
| { |
| "epoch": 8.714785447761194, |
| "grad_norm": 0.3207005262374878, |
| "learning_rate": 0.0004957165354330708, |
| "loss": 3.4357, |
| "step": 29900 |
| }, |
| { |
| "epoch": 8.729361007462687, |
| "grad_norm": 0.3273802101612091, |
| "learning_rate": 0.0004955415573053368, |
| "loss": 3.4446, |
| "step": 29950 |
| }, |
| { |
| "epoch": 8.743936567164178, |
| "grad_norm": 0.3333602249622345, |
| "learning_rate": 0.0004953665791776027, |
| "loss": 3.433, |
| "step": 30000 |
| }, |
| { |
| "epoch": 8.743936567164178, |
| "eval_accuracy": 0.3657406780780135, |
| "eval_loss": 3.5863289833068848, |
| "eval_runtime": 179.2319, |
| "eval_samples_per_second": 92.768, |
| "eval_steps_per_second": 5.803, |
| "step": 30000 |
| }, |
| { |
| "epoch": 8.758512126865671, |
| "grad_norm": 0.31307345628738403, |
| "learning_rate": 0.0004951916010498687, |
| "loss": 3.4439, |
| "step": 30050 |
| }, |
| { |
| "epoch": 8.773087686567164, |
| "grad_norm": 0.3173314332962036, |
| "learning_rate": 0.0004950166229221347, |
| "loss": 3.4468, |
| "step": 30100 |
| }, |
| { |
| "epoch": 8.787663246268657, |
| "grad_norm": 0.3451690673828125, |
| "learning_rate": 0.0004948416447944006, |
| "loss": 3.4319, |
| "step": 30150 |
| }, |
| { |
| "epoch": 8.802238805970148, |
| "grad_norm": 0.33415162563323975, |
| "learning_rate": 0.0004946666666666666, |
| "loss": 3.4383, |
| "step": 30200 |
| }, |
| { |
| "epoch": 8.816814365671641, |
| "grad_norm": 0.32540515065193176, |
| "learning_rate": 0.0004944916885389326, |
| "loss": 3.4377, |
| "step": 30250 |
| }, |
| { |
| "epoch": 8.831389925373134, |
| "grad_norm": 0.3139285147190094, |
| "learning_rate": 0.0004943167104111986, |
| "loss": 3.4488, |
| "step": 30300 |
| }, |
| { |
| "epoch": 8.845965485074627, |
| "grad_norm": 0.3355214297771454, |
| "learning_rate": 0.0004941417322834645, |
| "loss": 3.4469, |
| "step": 30350 |
| }, |
| { |
| "epoch": 8.86054104477612, |
| "grad_norm": 0.34595736861228943, |
| "learning_rate": 0.0004939667541557305, |
| "loss": 3.4522, |
| "step": 30400 |
| }, |
| { |
| "epoch": 8.875116604477611, |
| "grad_norm": 0.3158757984638214, |
| "learning_rate": 0.0004937917760279965, |
| "loss": 3.4508, |
| "step": 30450 |
| }, |
| { |
| "epoch": 8.889692164179104, |
| "grad_norm": 0.32975685596466064, |
| "learning_rate": 0.0004936167979002625, |
| "loss": 3.4354, |
| "step": 30500 |
| }, |
| { |
| "epoch": 8.904267723880597, |
| "grad_norm": 0.31538817286491394, |
| "learning_rate": 0.0004934418197725284, |
| "loss": 3.4436, |
| "step": 30550 |
| }, |
| { |
| "epoch": 8.91884328358209, |
| "grad_norm": 0.33921748399734497, |
| "learning_rate": 0.0004932668416447943, |
| "loss": 3.4429, |
| "step": 30600 |
| }, |
| { |
| "epoch": 8.933418843283581, |
| "grad_norm": 0.3398772180080414, |
| "learning_rate": 0.0004930918635170603, |
| "loss": 3.4494, |
| "step": 30650 |
| }, |
| { |
| "epoch": 8.947994402985074, |
| "grad_norm": 0.33406862616539, |
| "learning_rate": 0.0004929168853893263, |
| "loss": 3.4469, |
| "step": 30700 |
| }, |
| { |
| "epoch": 8.962569962686567, |
| "grad_norm": 0.3257124125957489, |
| "learning_rate": 0.0004927419072615922, |
| "loss": 3.4456, |
| "step": 30750 |
| }, |
| { |
| "epoch": 8.97714552238806, |
| "grad_norm": 0.3062405288219452, |
| "learning_rate": 0.0004925669291338582, |
| "loss": 3.4359, |
| "step": 30800 |
| }, |
| { |
| "epoch": 8.991721082089553, |
| "grad_norm": 0.34552669525146484, |
| "learning_rate": 0.0004923919510061242, |
| "loss": 3.4475, |
| "step": 30850 |
| }, |
| { |
| "epoch": 9.006121735074627, |
| "grad_norm": 0.3198220431804657, |
| "learning_rate": 0.0004922169728783901, |
| "loss": 3.3947, |
| "step": 30900 |
| }, |
| { |
| "epoch": 9.02069729477612, |
| "grad_norm": 0.34176215529441833, |
| "learning_rate": 0.0004920419947506561, |
| "loss": 3.3396, |
| "step": 30950 |
| }, |
| { |
| "epoch": 9.035272854477611, |
| "grad_norm": 0.3633732497692108, |
| "learning_rate": 0.0004918670166229221, |
| "loss": 3.3406, |
| "step": 31000 |
| }, |
| { |
| "epoch": 9.035272854477611, |
| "eval_accuracy": 0.3655304713280229, |
| "eval_loss": 3.5907046794891357, |
| "eval_runtime": 179.2394, |
| "eval_samples_per_second": 92.764, |
| "eval_steps_per_second": 5.802, |
| "step": 31000 |
| }, |
| { |
| "epoch": 9.049848414179104, |
| "grad_norm": 0.32831311225891113, |
| "learning_rate": 0.0004916920384951881, |
| "loss": 3.3294, |
| "step": 31050 |
| }, |
| { |
| "epoch": 9.064423973880597, |
| "grad_norm": 0.33142951130867004, |
| "learning_rate": 0.000491517060367454, |
| "loss": 3.348, |
| "step": 31100 |
| }, |
| { |
| "epoch": 9.07899953358209, |
| "grad_norm": 0.34713199734687805, |
| "learning_rate": 0.00049134208223972, |
| "loss": 3.351, |
| "step": 31150 |
| }, |
| { |
| "epoch": 9.093575093283581, |
| "grad_norm": 0.3427370488643646, |
| "learning_rate": 0.000491167104111986, |
| "loss": 3.3562, |
| "step": 31200 |
| }, |
| { |
| "epoch": 9.108150652985074, |
| "grad_norm": 0.3258208930492401, |
| "learning_rate": 0.000490992125984252, |
| "loss": 3.3591, |
| "step": 31250 |
| }, |
| { |
| "epoch": 9.122726212686567, |
| "grad_norm": 0.3553750813007355, |
| "learning_rate": 0.0004908171478565179, |
| "loss": 3.3694, |
| "step": 31300 |
| }, |
| { |
| "epoch": 9.13730177238806, |
| "grad_norm": 0.32988446950912476, |
| "learning_rate": 0.0004906421697287839, |
| "loss": 3.3654, |
| "step": 31350 |
| }, |
| { |
| "epoch": 9.151877332089553, |
| "grad_norm": 0.33832770586013794, |
| "learning_rate": 0.0004904671916010499, |
| "loss": 3.362, |
| "step": 31400 |
| }, |
| { |
| "epoch": 9.166452891791044, |
| "grad_norm": 0.3746139109134674, |
| "learning_rate": 0.0004902922134733158, |
| "loss": 3.3804, |
| "step": 31450 |
| }, |
| { |
| "epoch": 9.181028451492537, |
| "grad_norm": 0.32792752981185913, |
| "learning_rate": 0.0004901172353455818, |
| "loss": 3.3784, |
| "step": 31500 |
| }, |
| { |
| "epoch": 9.19560401119403, |
| "grad_norm": 0.36606335639953613, |
| "learning_rate": 0.0004899422572178478, |
| "loss": 3.3747, |
| "step": 31550 |
| }, |
| { |
| "epoch": 9.210179570895523, |
| "grad_norm": 0.3486144244670868, |
| "learning_rate": 0.0004897672790901138, |
| "loss": 3.3741, |
| "step": 31600 |
| }, |
| { |
| "epoch": 9.224755130597014, |
| "grad_norm": 0.3726823925971985, |
| "learning_rate": 0.0004895923009623796, |
| "loss": 3.3728, |
| "step": 31650 |
| }, |
| { |
| "epoch": 9.239330690298507, |
| "grad_norm": 0.35447168350219727, |
| "learning_rate": 0.0004894173228346456, |
| "loss": 3.3664, |
| "step": 31700 |
| }, |
| { |
| "epoch": 9.25390625, |
| "grad_norm": 0.3537440598011017, |
| "learning_rate": 0.0004892423447069116, |
| "loss": 3.3792, |
| "step": 31750 |
| }, |
| { |
| "epoch": 9.268481809701493, |
| "grad_norm": 0.33133864402770996, |
| "learning_rate": 0.0004890673665791775, |
| "loss": 3.3743, |
| "step": 31800 |
| }, |
| { |
| "epoch": 9.283057369402986, |
| "grad_norm": 0.34640932083129883, |
| "learning_rate": 0.0004888923884514435, |
| "loss": 3.3848, |
| "step": 31850 |
| }, |
| { |
| "epoch": 9.297632929104477, |
| "grad_norm": 0.3547460734844208, |
| "learning_rate": 0.0004887174103237095, |
| "loss": 3.3836, |
| "step": 31900 |
| }, |
| { |
| "epoch": 9.31220848880597, |
| "grad_norm": 0.3356497883796692, |
| "learning_rate": 0.0004885424321959754, |
| "loss": 3.389, |
| "step": 31950 |
| }, |
| { |
| "epoch": 9.326784048507463, |
| "grad_norm": 0.33560049533843994, |
| "learning_rate": 0.0004883674540682414, |
| "loss": 3.3767, |
| "step": 32000 |
| }, |
| { |
| "epoch": 9.326784048507463, |
| "eval_accuracy": 0.36614790951976467, |
| "eval_loss": 3.587216854095459, |
| "eval_runtime": 179.274, |
| "eval_samples_per_second": 92.746, |
| "eval_steps_per_second": 5.801, |
| "step": 32000 |
| }, |
| { |
| "epoch": 9.341359608208956, |
| "grad_norm": 0.3377925753593445, |
| "learning_rate": 0.00048819247594050736, |
| "loss": 3.3874, |
| "step": 32050 |
| }, |
| { |
| "epoch": 9.355935167910447, |
| "grad_norm": 0.3350276052951813, |
| "learning_rate": 0.00048801749781277336, |
| "loss": 3.3886, |
| "step": 32100 |
| }, |
| { |
| "epoch": 9.37051072761194, |
| "grad_norm": 0.326797217130661, |
| "learning_rate": 0.00048784251968503936, |
| "loss": 3.4037, |
| "step": 32150 |
| }, |
| { |
| "epoch": 9.385086287313433, |
| "grad_norm": 0.33711764216423035, |
| "learning_rate": 0.0004876675415573053, |
| "loss": 3.384, |
| "step": 32200 |
| }, |
| { |
| "epoch": 9.399661847014926, |
| "grad_norm": 0.34087830781936646, |
| "learning_rate": 0.00048749256342957124, |
| "loss": 3.3944, |
| "step": 32250 |
| }, |
| { |
| "epoch": 9.414237406716419, |
| "grad_norm": 0.31658002734184265, |
| "learning_rate": 0.00048731758530183724, |
| "loss": 3.4046, |
| "step": 32300 |
| }, |
| { |
| "epoch": 9.42881296641791, |
| "grad_norm": 0.32896366715431213, |
| "learning_rate": 0.0004871426071741032, |
| "loss": 3.3909, |
| "step": 32350 |
| }, |
| { |
| "epoch": 9.443388526119403, |
| "grad_norm": 0.3304135799407959, |
| "learning_rate": 0.0004869676290463692, |
| "loss": 3.4053, |
| "step": 32400 |
| }, |
| { |
| "epoch": 9.457964085820896, |
| "grad_norm": 0.3395926356315613, |
| "learning_rate": 0.0004867926509186351, |
| "loss": 3.3937, |
| "step": 32450 |
| }, |
| { |
| "epoch": 9.472539645522389, |
| "grad_norm": 0.35604408383369446, |
| "learning_rate": 0.00048661767279090107, |
| "loss": 3.3919, |
| "step": 32500 |
| }, |
| { |
| "epoch": 9.48711520522388, |
| "grad_norm": 0.33686935901641846, |
| "learning_rate": 0.00048644269466316707, |
| "loss": 3.393, |
| "step": 32550 |
| }, |
| { |
| "epoch": 9.501690764925373, |
| "grad_norm": 0.3407963216304779, |
| "learning_rate": 0.00048626771653543306, |
| "loss": 3.4004, |
| "step": 32600 |
| }, |
| { |
| "epoch": 9.516266324626866, |
| "grad_norm": 0.3341701030731201, |
| "learning_rate": 0.00048609273840769895, |
| "loss": 3.4022, |
| "step": 32650 |
| }, |
| { |
| "epoch": 9.530841884328359, |
| "grad_norm": 0.3404782712459564, |
| "learning_rate": 0.00048591776027996495, |
| "loss": 3.4092, |
| "step": 32700 |
| }, |
| { |
| "epoch": 9.545417444029852, |
| "grad_norm": 0.346642404794693, |
| "learning_rate": 0.00048574278215223095, |
| "loss": 3.4079, |
| "step": 32750 |
| }, |
| { |
| "epoch": 9.559993003731343, |
| "grad_norm": 0.3336687386035919, |
| "learning_rate": 0.0004855678040244969, |
| "loss": 3.4083, |
| "step": 32800 |
| }, |
| { |
| "epoch": 9.574568563432836, |
| "grad_norm": 0.33750444650650024, |
| "learning_rate": 0.00048539282589676283, |
| "loss": 3.4093, |
| "step": 32850 |
| }, |
| { |
| "epoch": 9.589144123134329, |
| "grad_norm": 0.3416513502597809, |
| "learning_rate": 0.00048521784776902883, |
| "loss": 3.3882, |
| "step": 32900 |
| }, |
| { |
| "epoch": 9.603719682835822, |
| "grad_norm": 0.3458026051521301, |
| "learning_rate": 0.00048504286964129483, |
| "loss": 3.4058, |
| "step": 32950 |
| }, |
| { |
| "epoch": 9.618295242537313, |
| "grad_norm": 0.3294159471988678, |
| "learning_rate": 0.0004848678915135607, |
| "loss": 3.4124, |
| "step": 33000 |
| }, |
| { |
| "epoch": 9.618295242537313, |
| "eval_accuracy": 0.3663903652336396, |
| "eval_loss": 3.578124523162842, |
| "eval_runtime": 179.3194, |
| "eval_samples_per_second": 92.723, |
| "eval_steps_per_second": 5.8, |
| "step": 33000 |
| }, |
| { |
| "epoch": 9.632870802238806, |
| "grad_norm": 0.3293006718158722, |
| "learning_rate": 0.0004846929133858267, |
| "loss": 3.4163, |
| "step": 33050 |
| }, |
| { |
| "epoch": 9.647446361940299, |
| "grad_norm": 0.33910810947418213, |
| "learning_rate": 0.0004845179352580927, |
| "loss": 3.4063, |
| "step": 33100 |
| }, |
| { |
| "epoch": 9.662021921641792, |
| "grad_norm": 0.32959282398223877, |
| "learning_rate": 0.0004843429571303587, |
| "loss": 3.4139, |
| "step": 33150 |
| }, |
| { |
| "epoch": 9.676597481343283, |
| "grad_norm": 0.3160514235496521, |
| "learning_rate": 0.0004841679790026246, |
| "loss": 3.4103, |
| "step": 33200 |
| }, |
| { |
| "epoch": 9.691173041044776, |
| "grad_norm": 0.32494455575942993, |
| "learning_rate": 0.0004839930008748906, |
| "loss": 3.4003, |
| "step": 33250 |
| }, |
| { |
| "epoch": 9.705748600746269, |
| "grad_norm": 0.33086997270584106, |
| "learning_rate": 0.0004838180227471566, |
| "loss": 3.4161, |
| "step": 33300 |
| }, |
| { |
| "epoch": 9.720324160447761, |
| "grad_norm": 0.3385337293148041, |
| "learning_rate": 0.00048364304461942254, |
| "loss": 3.4096, |
| "step": 33350 |
| }, |
| { |
| "epoch": 9.734899720149254, |
| "grad_norm": 0.31951361894607544, |
| "learning_rate": 0.0004834680664916885, |
| "loss": 3.4144, |
| "step": 33400 |
| }, |
| { |
| "epoch": 9.749475279850746, |
| "grad_norm": 0.32688412070274353, |
| "learning_rate": 0.0004832930883639545, |
| "loss": 3.4109, |
| "step": 33450 |
| }, |
| { |
| "epoch": 9.764050839552239, |
| "grad_norm": 0.3271602392196655, |
| "learning_rate": 0.0004831181102362204, |
| "loss": 3.4155, |
| "step": 33500 |
| }, |
| { |
| "epoch": 9.778626399253731, |
| "grad_norm": 0.30692335963249207, |
| "learning_rate": 0.00048294313210848637, |
| "loss": 3.4077, |
| "step": 33550 |
| }, |
| { |
| "epoch": 9.793201958955224, |
| "grad_norm": 0.35177081823349, |
| "learning_rate": 0.00048276815398075237, |
| "loss": 3.419, |
| "step": 33600 |
| }, |
| { |
| "epoch": 9.807777518656717, |
| "grad_norm": 0.3389143645763397, |
| "learning_rate": 0.0004825931758530183, |
| "loss": 3.419, |
| "step": 33650 |
| }, |
| { |
| "epoch": 9.822353078358208, |
| "grad_norm": 0.3317539691925049, |
| "learning_rate": 0.0004824181977252843, |
| "loss": 3.4218, |
| "step": 33700 |
| }, |
| { |
| "epoch": 9.836928638059701, |
| "grad_norm": 0.3414798378944397, |
| "learning_rate": 0.00048224321959755025, |
| "loss": 3.4083, |
| "step": 33750 |
| }, |
| { |
| "epoch": 9.851504197761194, |
| "grad_norm": 0.32761162519454956, |
| "learning_rate": 0.0004820682414698162, |
| "loss": 3.4208, |
| "step": 33800 |
| }, |
| { |
| "epoch": 9.866079757462687, |
| "grad_norm": 0.346387654542923, |
| "learning_rate": 0.0004818932633420822, |
| "loss": 3.4237, |
| "step": 33850 |
| }, |
| { |
| "epoch": 9.880655317164178, |
| "grad_norm": 0.3281957805156708, |
| "learning_rate": 0.0004817182852143482, |
| "loss": 3.4228, |
| "step": 33900 |
| }, |
| { |
| "epoch": 9.895230876865671, |
| "grad_norm": 0.3266942799091339, |
| "learning_rate": 0.0004815433070866141, |
| "loss": 3.4279, |
| "step": 33950 |
| }, |
| { |
| "epoch": 9.909806436567164, |
| "grad_norm": 0.314626544713974, |
| "learning_rate": 0.0004813683289588801, |
| "loss": 3.4173, |
| "step": 34000 |
| }, |
| { |
| "epoch": 9.909806436567164, |
| "eval_accuracy": 0.367117614678316, |
| "eval_loss": 3.571929931640625, |
| "eval_runtime": 179.3176, |
| "eval_samples_per_second": 92.724, |
| "eval_steps_per_second": 5.8, |
| "step": 34000 |
| }, |
| { |
| "epoch": 9.924381996268657, |
| "grad_norm": 0.3587126135826111, |
| "learning_rate": 0.0004811933508311461, |
| "loss": 3.4121, |
| "step": 34050 |
| }, |
| { |
| "epoch": 9.938957555970148, |
| "grad_norm": 0.32173195481300354, |
| "learning_rate": 0.00048101837270341207, |
| "loss": 3.4217, |
| "step": 34100 |
| }, |
| { |
| "epoch": 9.953533115671641, |
| "grad_norm": 0.3450935184955597, |
| "learning_rate": 0.00048084339457567796, |
| "loss": 3.425, |
| "step": 34150 |
| }, |
| { |
| "epoch": 9.968108675373134, |
| "grad_norm": 0.35819029808044434, |
| "learning_rate": 0.00048066841644794396, |
| "loss": 3.4197, |
| "step": 34200 |
| }, |
| { |
| "epoch": 9.982684235074627, |
| "grad_norm": 0.33679142594337463, |
| "learning_rate": 0.00048049343832020996, |
| "loss": 3.4399, |
| "step": 34250 |
| }, |
| { |
| "epoch": 9.99725979477612, |
| "grad_norm": 0.32246530055999756, |
| "learning_rate": 0.00048031846019247595, |
| "loss": 3.4234, |
| "step": 34300 |
| }, |
| { |
| "epoch": 10.011660447761194, |
| "grad_norm": 0.35621923208236694, |
| "learning_rate": 0.00048014348206474184, |
| "loss": 3.3286, |
| "step": 34350 |
| }, |
| { |
| "epoch": 10.026236007462687, |
| "grad_norm": 0.3410172760486603, |
| "learning_rate": 0.00047996850393700784, |
| "loss": 3.317, |
| "step": 34400 |
| }, |
| { |
| "epoch": 10.040811567164178, |
| "grad_norm": 0.3566003441810608, |
| "learning_rate": 0.00047979352580927384, |
| "loss": 3.3056, |
| "step": 34450 |
| }, |
| { |
| "epoch": 10.055387126865671, |
| "grad_norm": 0.3618554174900055, |
| "learning_rate": 0.00047961854768153973, |
| "loss": 3.324, |
| "step": 34500 |
| }, |
| { |
| "epoch": 10.069962686567164, |
| "grad_norm": 0.33970338106155396, |
| "learning_rate": 0.0004794435695538057, |
| "loss": 3.3137, |
| "step": 34550 |
| }, |
| { |
| "epoch": 10.084538246268657, |
| "grad_norm": 0.38800761103630066, |
| "learning_rate": 0.0004792685914260717, |
| "loss": 3.3341, |
| "step": 34600 |
| }, |
| { |
| "epoch": 10.099113805970148, |
| "grad_norm": 0.3676946759223938, |
| "learning_rate": 0.00047909361329833767, |
| "loss": 3.3303, |
| "step": 34650 |
| }, |
| { |
| "epoch": 10.113689365671641, |
| "grad_norm": 0.31517815589904785, |
| "learning_rate": 0.0004789186351706036, |
| "loss": 3.3275, |
| "step": 34700 |
| }, |
| { |
| "epoch": 10.128264925373134, |
| "grad_norm": 0.3755510151386261, |
| "learning_rate": 0.0004787436570428696, |
| "loss": 3.3427, |
| "step": 34750 |
| }, |
| { |
| "epoch": 10.142840485074627, |
| "grad_norm": 0.3374476134777069, |
| "learning_rate": 0.00047856867891513555, |
| "loss": 3.3345, |
| "step": 34800 |
| }, |
| { |
| "epoch": 10.15741604477612, |
| "grad_norm": 0.3470100462436676, |
| "learning_rate": 0.00047839370078740155, |
| "loss": 3.3368, |
| "step": 34850 |
| }, |
| { |
| "epoch": 10.171991604477611, |
| "grad_norm": 0.349649578332901, |
| "learning_rate": 0.0004782187226596675, |
| "loss": 3.3515, |
| "step": 34900 |
| }, |
| { |
| "epoch": 10.186567164179104, |
| "grad_norm": 0.35084185004234314, |
| "learning_rate": 0.00047804374453193344, |
| "loss": 3.3456, |
| "step": 34950 |
| }, |
| { |
| "epoch": 10.201142723880597, |
| "grad_norm": 0.3345482647418976, |
| "learning_rate": 0.00047786876640419943, |
| "loss": 3.353, |
| "step": 35000 |
| }, |
| { |
| "epoch": 10.201142723880597, |
| "eval_accuracy": 0.3668088955824451, |
| "eval_loss": 3.5851173400878906, |
| "eval_runtime": 179.1694, |
| "eval_samples_per_second": 92.8, |
| "eval_steps_per_second": 5.805, |
| "step": 35000 |
| }, |
| { |
| "epoch": 10.21571828358209, |
| "grad_norm": 0.34957993030548096, |
| "learning_rate": 0.00047769378827646543, |
| "loss": 3.3386, |
| "step": 35050 |
| }, |
| { |
| "epoch": 10.230293843283581, |
| "grad_norm": 0.3573317229747772, |
| "learning_rate": 0.0004775188101487313, |
| "loss": 3.3499, |
| "step": 35100 |
| }, |
| { |
| "epoch": 10.244869402985074, |
| "grad_norm": 0.3407445251941681, |
| "learning_rate": 0.0004773438320209973, |
| "loss": 3.367, |
| "step": 35150 |
| }, |
| { |
| "epoch": 10.259444962686567, |
| "grad_norm": 0.34660759568214417, |
| "learning_rate": 0.0004771688538932633, |
| "loss": 3.3599, |
| "step": 35200 |
| }, |
| { |
| "epoch": 10.27402052238806, |
| "grad_norm": 0.3838936388492584, |
| "learning_rate": 0.0004769938757655293, |
| "loss": 3.3592, |
| "step": 35250 |
| }, |
| { |
| "epoch": 10.288596082089553, |
| "grad_norm": 0.343936949968338, |
| "learning_rate": 0.0004768188976377952, |
| "loss": 3.3729, |
| "step": 35300 |
| }, |
| { |
| "epoch": 10.303171641791044, |
| "grad_norm": 0.35758844017982483, |
| "learning_rate": 0.0004766439195100612, |
| "loss": 3.3567, |
| "step": 35350 |
| }, |
| { |
| "epoch": 10.317747201492537, |
| "grad_norm": 0.36581915616989136, |
| "learning_rate": 0.0004764689413823272, |
| "loss": 3.3649, |
| "step": 35400 |
| }, |
| { |
| "epoch": 10.33232276119403, |
| "grad_norm": 0.342654287815094, |
| "learning_rate": 0.0004762939632545931, |
| "loss": 3.371, |
| "step": 35450 |
| }, |
| { |
| "epoch": 10.346898320895523, |
| "grad_norm": 0.37132593989372253, |
| "learning_rate": 0.0004761189851268591, |
| "loss": 3.3667, |
| "step": 35500 |
| }, |
| { |
| "epoch": 10.361473880597014, |
| "grad_norm": 0.32585781812667847, |
| "learning_rate": 0.0004759440069991251, |
| "loss": 3.363, |
| "step": 35550 |
| }, |
| { |
| "epoch": 10.376049440298507, |
| "grad_norm": 0.3182028532028198, |
| "learning_rate": 0.0004757690288713911, |
| "loss": 3.3668, |
| "step": 35600 |
| }, |
| { |
| "epoch": 10.390625, |
| "grad_norm": 0.3395622968673706, |
| "learning_rate": 0.00047559405074365697, |
| "loss": 3.3641, |
| "step": 35650 |
| }, |
| { |
| "epoch": 10.405200559701493, |
| "grad_norm": 0.3391034007072449, |
| "learning_rate": 0.00047541907261592297, |
| "loss": 3.3705, |
| "step": 35700 |
| }, |
| { |
| "epoch": 10.419776119402986, |
| "grad_norm": 0.3554554879665375, |
| "learning_rate": 0.00047524409448818897, |
| "loss": 3.3717, |
| "step": 35750 |
| }, |
| { |
| "epoch": 10.434351679104477, |
| "grad_norm": 0.3345549404621124, |
| "learning_rate": 0.0004750691163604549, |
| "loss": 3.3593, |
| "step": 35800 |
| }, |
| { |
| "epoch": 10.44892723880597, |
| "grad_norm": 0.33761605620384216, |
| "learning_rate": 0.00047489413823272085, |
| "loss": 3.3697, |
| "step": 35850 |
| }, |
| { |
| "epoch": 10.463502798507463, |
| "grad_norm": 0.3620910942554474, |
| "learning_rate": 0.00047471916010498685, |
| "loss": 3.3873, |
| "step": 35900 |
| }, |
| { |
| "epoch": 10.478078358208956, |
| "grad_norm": 0.3357434570789337, |
| "learning_rate": 0.0004745441819772528, |
| "loss": 3.3693, |
| "step": 35950 |
| }, |
| { |
| "epoch": 10.492653917910447, |
| "grad_norm": 0.3724294900894165, |
| "learning_rate": 0.0004743692038495188, |
| "loss": 3.386, |
| "step": 36000 |
| }, |
| { |
| "epoch": 10.492653917910447, |
| "eval_accuracy": 0.36732217197477945, |
| "eval_loss": 3.575730323791504, |
| "eval_runtime": 179.2041, |
| "eval_samples_per_second": 92.782, |
| "eval_steps_per_second": 5.803, |
| "step": 36000 |
| }, |
| { |
| "epoch": 10.50722947761194, |
| "grad_norm": 0.3459193706512451, |
| "learning_rate": 0.00047419422572178474, |
| "loss": 3.3686, |
| "step": 36050 |
| }, |
| { |
| "epoch": 10.521805037313433, |
| "grad_norm": 0.34317150712013245, |
| "learning_rate": 0.0004740192475940507, |
| "loss": 3.3915, |
| "step": 36100 |
| }, |
| { |
| "epoch": 10.536380597014926, |
| "grad_norm": 0.3495369553565979, |
| "learning_rate": 0.0004738442694663167, |
| "loss": 3.3801, |
| "step": 36150 |
| }, |
| { |
| "epoch": 10.550956156716419, |
| "grad_norm": 0.32712510228157043, |
| "learning_rate": 0.0004736692913385827, |
| "loss": 3.3806, |
| "step": 36200 |
| }, |
| { |
| "epoch": 10.56553171641791, |
| "grad_norm": 0.3268829584121704, |
| "learning_rate": 0.00047349431321084856, |
| "loss": 3.3778, |
| "step": 36250 |
| }, |
| { |
| "epoch": 10.580107276119403, |
| "grad_norm": 0.3383152484893799, |
| "learning_rate": 0.00047331933508311456, |
| "loss": 3.3905, |
| "step": 36300 |
| }, |
| { |
| "epoch": 10.594682835820896, |
| "grad_norm": 0.3256557881832123, |
| "learning_rate": 0.00047314435695538056, |
| "loss": 3.3769, |
| "step": 36350 |
| }, |
| { |
| "epoch": 10.609258395522389, |
| "grad_norm": 0.3504721224308014, |
| "learning_rate": 0.00047296937882764645, |
| "loss": 3.3907, |
| "step": 36400 |
| }, |
| { |
| "epoch": 10.62383395522388, |
| "grad_norm": 0.3189436197280884, |
| "learning_rate": 0.00047279440069991245, |
| "loss": 3.3956, |
| "step": 36450 |
| }, |
| { |
| "epoch": 10.638409514925373, |
| "grad_norm": 0.3360075056552887, |
| "learning_rate": 0.00047261942257217844, |
| "loss": 3.3905, |
| "step": 36500 |
| }, |
| { |
| "epoch": 10.652985074626866, |
| "grad_norm": 0.35313287377357483, |
| "learning_rate": 0.00047244444444444444, |
| "loss": 3.3966, |
| "step": 36550 |
| }, |
| { |
| "epoch": 10.667560634328359, |
| "grad_norm": 0.34554523229599, |
| "learning_rate": 0.00047226946631671033, |
| "loss": 3.3831, |
| "step": 36600 |
| }, |
| { |
| "epoch": 10.682136194029852, |
| "grad_norm": 0.3466152548789978, |
| "learning_rate": 0.00047209448818897633, |
| "loss": 3.379, |
| "step": 36650 |
| }, |
| { |
| "epoch": 10.696711753731343, |
| "grad_norm": 0.36634284257888794, |
| "learning_rate": 0.0004719195100612423, |
| "loss": 3.3928, |
| "step": 36700 |
| }, |
| { |
| "epoch": 10.711287313432836, |
| "grad_norm": 0.3495074510574341, |
| "learning_rate": 0.00047174453193350827, |
| "loss": 3.3986, |
| "step": 36750 |
| }, |
| { |
| "epoch": 10.725862873134329, |
| "grad_norm": 0.3547740578651428, |
| "learning_rate": 0.0004715695538057742, |
| "loss": 3.3958, |
| "step": 36800 |
| }, |
| { |
| "epoch": 10.740438432835822, |
| "grad_norm": 0.3183799684047699, |
| "learning_rate": 0.0004713945756780402, |
| "loss": 3.3926, |
| "step": 36850 |
| }, |
| { |
| "epoch": 10.755013992537313, |
| "grad_norm": 0.3465604782104492, |
| "learning_rate": 0.0004712195975503062, |
| "loss": 3.4043, |
| "step": 36900 |
| }, |
| { |
| "epoch": 10.769589552238806, |
| "grad_norm": 0.3227628767490387, |
| "learning_rate": 0.00047104461942257215, |
| "loss": 3.4051, |
| "step": 36950 |
| }, |
| { |
| "epoch": 10.784165111940299, |
| "grad_norm": 0.3621023893356323, |
| "learning_rate": 0.0004708696412948381, |
| "loss": 3.3967, |
| "step": 37000 |
| }, |
| { |
| "epoch": 10.784165111940299, |
| "eval_accuracy": 0.3680770802023493, |
| "eval_loss": 3.568211793899536, |
| "eval_runtime": 179.2961, |
| "eval_samples_per_second": 92.735, |
| "eval_steps_per_second": 5.8, |
| "step": 37000 |
| }, |
| { |
| "epoch": 10.798740671641792, |
| "grad_norm": 0.33479082584381104, |
| "learning_rate": 0.0004706946631671041, |
| "loss": 3.3971, |
| "step": 37050 |
| }, |
| { |
| "epoch": 10.813316231343283, |
| "grad_norm": 0.36273670196533203, |
| "learning_rate": 0.00047051968503937004, |
| "loss": 3.3936, |
| "step": 37100 |
| }, |
| { |
| "epoch": 10.827891791044776, |
| "grad_norm": 0.3388984799385071, |
| "learning_rate": 0.000470344706911636, |
| "loss": 3.4087, |
| "step": 37150 |
| }, |
| { |
| "epoch": 10.842467350746269, |
| "grad_norm": 0.33692026138305664, |
| "learning_rate": 0.000470169728783902, |
| "loss": 3.4024, |
| "step": 37200 |
| }, |
| { |
| "epoch": 10.857042910447761, |
| "grad_norm": 0.34697890281677246, |
| "learning_rate": 0.0004699947506561679, |
| "loss": 3.3852, |
| "step": 37250 |
| }, |
| { |
| "epoch": 10.871618470149254, |
| "grad_norm": 0.33639323711395264, |
| "learning_rate": 0.0004698197725284339, |
| "loss": 3.4011, |
| "step": 37300 |
| }, |
| { |
| "epoch": 10.886194029850746, |
| "grad_norm": 0.3242666721343994, |
| "learning_rate": 0.00046964479440069986, |
| "loss": 3.3955, |
| "step": 37350 |
| }, |
| { |
| "epoch": 10.900769589552239, |
| "grad_norm": 0.34894293546676636, |
| "learning_rate": 0.0004694698162729658, |
| "loss": 3.4065, |
| "step": 37400 |
| }, |
| { |
| "epoch": 10.915345149253731, |
| "grad_norm": 0.3249868154525757, |
| "learning_rate": 0.0004692948381452318, |
| "loss": 3.4048, |
| "step": 37450 |
| }, |
| { |
| "epoch": 10.929920708955224, |
| "grad_norm": 0.3601526618003845, |
| "learning_rate": 0.0004691198600174978, |
| "loss": 3.4165, |
| "step": 37500 |
| }, |
| { |
| "epoch": 10.944496268656717, |
| "grad_norm": 0.3195374310016632, |
| "learning_rate": 0.0004689448818897637, |
| "loss": 3.3931, |
| "step": 37550 |
| }, |
| { |
| "epoch": 10.959071828358208, |
| "grad_norm": 0.35160696506500244, |
| "learning_rate": 0.0004687699037620297, |
| "loss": 3.4037, |
| "step": 37600 |
| }, |
| { |
| "epoch": 10.973647388059701, |
| "grad_norm": 0.39584487676620483, |
| "learning_rate": 0.0004685949256342957, |
| "loss": 3.3957, |
| "step": 37650 |
| }, |
| { |
| "epoch": 10.988222947761194, |
| "grad_norm": 0.3482179343700409, |
| "learning_rate": 0.0004684199475065617, |
| "loss": 3.3878, |
| "step": 37700 |
| }, |
| { |
| "epoch": 11.002623600746269, |
| "grad_norm": 0.3683445453643799, |
| "learning_rate": 0.0004682449693788276, |
| "loss": 3.3851, |
| "step": 37750 |
| }, |
| { |
| "epoch": 11.017199160447761, |
| "grad_norm": 0.35708582401275635, |
| "learning_rate": 0.00046806999125109357, |
| "loss": 3.2881, |
| "step": 37800 |
| }, |
| { |
| "epoch": 11.031774720149254, |
| "grad_norm": 0.33956989645957947, |
| "learning_rate": 0.00046789501312335957, |
| "loss": 3.2884, |
| "step": 37850 |
| }, |
| { |
| "epoch": 11.046350279850746, |
| "grad_norm": 0.35305795073509216, |
| "learning_rate": 0.0004677200349956255, |
| "loss": 3.2951, |
| "step": 37900 |
| }, |
| { |
| "epoch": 11.060925839552239, |
| "grad_norm": 0.3429839313030243, |
| "learning_rate": 0.00046754505686789146, |
| "loss": 3.3103, |
| "step": 37950 |
| }, |
| { |
| "epoch": 11.075501399253731, |
| "grad_norm": 0.36126893758773804, |
| "learning_rate": 0.00046737007874015745, |
| "loss": 3.3008, |
| "step": 38000 |
| }, |
| { |
| "epoch": 11.075501399253731, |
| "eval_accuracy": 0.3674310416521262, |
| "eval_loss": 3.5771067142486572, |
| "eval_runtime": 179.37, |
| "eval_samples_per_second": 92.697, |
| "eval_steps_per_second": 5.798, |
| "step": 38000 |
| }, |
| { |
| "epoch": 11.090076958955224, |
| "grad_norm": 0.3234972357749939, |
| "learning_rate": 0.0004671951006124234, |
| "loss": 3.3033, |
| "step": 38050 |
| }, |
| { |
| "epoch": 11.104652518656716, |
| "grad_norm": 0.3429054021835327, |
| "learning_rate": 0.00046702012248468934, |
| "loss": 3.3104, |
| "step": 38100 |
| }, |
| { |
| "epoch": 11.119228078358208, |
| "grad_norm": 0.366301566362381, |
| "learning_rate": 0.00046684514435695534, |
| "loss": 3.3158, |
| "step": 38150 |
| }, |
| { |
| "epoch": 11.133803638059701, |
| "grad_norm": 0.35824158787727356, |
| "learning_rate": 0.00046667016622922134, |
| "loss": 3.3167, |
| "step": 38200 |
| }, |
| { |
| "epoch": 11.148379197761194, |
| "grad_norm": 0.3471207618713379, |
| "learning_rate": 0.0004664951881014873, |
| "loss": 3.3236, |
| "step": 38250 |
| }, |
| { |
| "epoch": 11.162954757462687, |
| "grad_norm": 0.33413344621658325, |
| "learning_rate": 0.0004663202099737532, |
| "loss": 3.335, |
| "step": 38300 |
| }, |
| { |
| "epoch": 11.177530317164178, |
| "grad_norm": 0.35084760189056396, |
| "learning_rate": 0.0004661452318460192, |
| "loss": 3.3378, |
| "step": 38350 |
| }, |
| { |
| "epoch": 11.192105876865671, |
| "grad_norm": 0.3245983421802521, |
| "learning_rate": 0.00046597025371828516, |
| "loss": 3.3289, |
| "step": 38400 |
| }, |
| { |
| "epoch": 11.206681436567164, |
| "grad_norm": 0.3432168960571289, |
| "learning_rate": 0.00046579527559055116, |
| "loss": 3.3173, |
| "step": 38450 |
| }, |
| { |
| "epoch": 11.221256996268657, |
| "grad_norm": 0.3392120599746704, |
| "learning_rate": 0.0004656202974628171, |
| "loss": 3.3176, |
| "step": 38500 |
| }, |
| { |
| "epoch": 11.235832555970148, |
| "grad_norm": 0.30957579612731934, |
| "learning_rate": 0.00046544531933508305, |
| "loss": 3.3328, |
| "step": 38550 |
| }, |
| { |
| "epoch": 11.250408115671641, |
| "grad_norm": 0.3354186415672302, |
| "learning_rate": 0.00046527034120734905, |
| "loss": 3.343, |
| "step": 38600 |
| }, |
| { |
| "epoch": 11.264983675373134, |
| "grad_norm": 0.3629128634929657, |
| "learning_rate": 0.00046509536307961504, |
| "loss": 3.3423, |
| "step": 38650 |
| }, |
| { |
| "epoch": 11.279559235074627, |
| "grad_norm": 0.3577345311641693, |
| "learning_rate": 0.00046492038495188093, |
| "loss": 3.3461, |
| "step": 38700 |
| }, |
| { |
| "epoch": 11.29413479477612, |
| "grad_norm": 0.35234689712524414, |
| "learning_rate": 0.00046474540682414693, |
| "loss": 3.3383, |
| "step": 38750 |
| }, |
| { |
| "epoch": 11.308710354477611, |
| "grad_norm": 0.35899555683135986, |
| "learning_rate": 0.00046457042869641293, |
| "loss": 3.3405, |
| "step": 38800 |
| }, |
| { |
| "epoch": 11.323285914179104, |
| "grad_norm": 0.3531411290168762, |
| "learning_rate": 0.0004643954505686789, |
| "loss": 3.3408, |
| "step": 38850 |
| }, |
| { |
| "epoch": 11.337861473880597, |
| "grad_norm": 0.3381483256816864, |
| "learning_rate": 0.0004642204724409448, |
| "loss": 3.3351, |
| "step": 38900 |
| }, |
| { |
| "epoch": 11.35243703358209, |
| "grad_norm": 0.36588019132614136, |
| "learning_rate": 0.0004640454943132108, |
| "loss": 3.3432, |
| "step": 38950 |
| }, |
| { |
| "epoch": 11.367012593283581, |
| "grad_norm": 0.36804234981536865, |
| "learning_rate": 0.0004638705161854768, |
| "loss": 3.3421, |
| "step": 39000 |
| }, |
| { |
| "epoch": 11.367012593283581, |
| "eval_accuracy": 0.3676348927668987, |
| "eval_loss": 3.5748021602630615, |
| "eval_runtime": 179.2112, |
| "eval_samples_per_second": 92.779, |
| "eval_steps_per_second": 5.803, |
| "step": 39000 |
| }, |
| { |
| "epoch": 11.381588152985074, |
| "grad_norm": 0.36152568459510803, |
| "learning_rate": 0.0004636955380577427, |
| "loss": 3.3403, |
| "step": 39050 |
| }, |
| { |
| "epoch": 11.396163712686567, |
| "grad_norm": 0.327120304107666, |
| "learning_rate": 0.0004635205599300087, |
| "loss": 3.3424, |
| "step": 39100 |
| }, |
| { |
| "epoch": 11.41073927238806, |
| "grad_norm": 0.3252420425415039, |
| "learning_rate": 0.0004633455818022747, |
| "loss": 3.3589, |
| "step": 39150 |
| }, |
| { |
| "epoch": 11.425314832089553, |
| "grad_norm": 0.3580802083015442, |
| "learning_rate": 0.00046317060367454064, |
| "loss": 3.3514, |
| "step": 39200 |
| }, |
| { |
| "epoch": 11.439890391791044, |
| "grad_norm": 0.3448558449745178, |
| "learning_rate": 0.0004629956255468066, |
| "loss": 3.3533, |
| "step": 39250 |
| }, |
| { |
| "epoch": 11.454465951492537, |
| "grad_norm": 0.35852617025375366, |
| "learning_rate": 0.0004628206474190726, |
| "loss": 3.3519, |
| "step": 39300 |
| }, |
| { |
| "epoch": 11.46904151119403, |
| "grad_norm": 0.3588276505470276, |
| "learning_rate": 0.0004626456692913385, |
| "loss": 3.3636, |
| "step": 39350 |
| }, |
| { |
| "epoch": 11.483617070895523, |
| "grad_norm": 0.34626030921936035, |
| "learning_rate": 0.0004624706911636045, |
| "loss": 3.3617, |
| "step": 39400 |
| }, |
| { |
| "epoch": 11.498192630597014, |
| "grad_norm": 0.3265782594680786, |
| "learning_rate": 0.00046229571303587046, |
| "loss": 3.3584, |
| "step": 39450 |
| }, |
| { |
| "epoch": 11.512768190298507, |
| "grad_norm": 0.36340591311454773, |
| "learning_rate": 0.00046212073490813646, |
| "loss": 3.3619, |
| "step": 39500 |
| }, |
| { |
| "epoch": 11.52734375, |
| "grad_norm": 0.3383502960205078, |
| "learning_rate": 0.0004619457567804024, |
| "loss": 3.3671, |
| "step": 39550 |
| }, |
| { |
| "epoch": 11.541919309701493, |
| "grad_norm": 0.32055920362472534, |
| "learning_rate": 0.0004617707786526684, |
| "loss": 3.3657, |
| "step": 39600 |
| }, |
| { |
| "epoch": 11.556494869402986, |
| "grad_norm": 0.3531268835067749, |
| "learning_rate": 0.00046159580052493435, |
| "loss": 3.359, |
| "step": 39650 |
| }, |
| { |
| "epoch": 11.571070429104477, |
| "grad_norm": 0.38840246200561523, |
| "learning_rate": 0.0004614208223972003, |
| "loss": 3.3683, |
| "step": 39700 |
| }, |
| { |
| "epoch": 11.58564598880597, |
| "grad_norm": 0.350590318441391, |
| "learning_rate": 0.0004612458442694663, |
| "loss": 3.3655, |
| "step": 39750 |
| }, |
| { |
| "epoch": 11.600221548507463, |
| "grad_norm": 0.35112035274505615, |
| "learning_rate": 0.0004610708661417323, |
| "loss": 3.3695, |
| "step": 39800 |
| }, |
| { |
| "epoch": 11.614797108208956, |
| "grad_norm": 0.37844619154930115, |
| "learning_rate": 0.0004608958880139982, |
| "loss": 3.3694, |
| "step": 39850 |
| }, |
| { |
| "epoch": 11.629372667910447, |
| "grad_norm": 0.32265302538871765, |
| "learning_rate": 0.00046072090988626417, |
| "loss": 3.3716, |
| "step": 39900 |
| }, |
| { |
| "epoch": 11.64394822761194, |
| "grad_norm": 0.3645426332950592, |
| "learning_rate": 0.00046054593175853017, |
| "loss": 3.3769, |
| "step": 39950 |
| }, |
| { |
| "epoch": 11.658523787313433, |
| "grad_norm": 0.35819122195243835, |
| "learning_rate": 0.00046037095363079606, |
| "loss": 3.3865, |
| "step": 40000 |
| }, |
| { |
| "epoch": 11.658523787313433, |
| "eval_accuracy": 0.3685178552744181, |
| "eval_loss": 3.5665090084075928, |
| "eval_runtime": 179.4054, |
| "eval_samples_per_second": 92.678, |
| "eval_steps_per_second": 5.797, |
| "step": 40000 |
| }, |
| { |
| "epoch": 11.673099347014926, |
| "grad_norm": 0.34585466980934143, |
| "learning_rate": 0.00046019597550306206, |
| "loss": 3.3688, |
| "step": 40050 |
| }, |
| { |
| "epoch": 11.687674906716419, |
| "grad_norm": 0.3452220559120178, |
| "learning_rate": 0.00046002099737532806, |
| "loss": 3.3689, |
| "step": 40100 |
| }, |
| { |
| "epoch": 11.70225046641791, |
| "grad_norm": 0.3693149983882904, |
| "learning_rate": 0.00045984601924759405, |
| "loss": 3.3746, |
| "step": 40150 |
| }, |
| { |
| "epoch": 11.716826026119403, |
| "grad_norm": 0.32303112745285034, |
| "learning_rate": 0.00045967104111985994, |
| "loss": 3.3624, |
| "step": 40200 |
| }, |
| { |
| "epoch": 11.731401585820896, |
| "grad_norm": 0.33094048500061035, |
| "learning_rate": 0.00045949606299212594, |
| "loss": 3.3638, |
| "step": 40250 |
| }, |
| { |
| "epoch": 11.745977145522389, |
| "grad_norm": 0.3415735960006714, |
| "learning_rate": 0.00045932108486439194, |
| "loss": 3.3793, |
| "step": 40300 |
| }, |
| { |
| "epoch": 11.76055270522388, |
| "grad_norm": 0.35656100511550903, |
| "learning_rate": 0.0004591461067366579, |
| "loss": 3.3694, |
| "step": 40350 |
| }, |
| { |
| "epoch": 11.775128264925373, |
| "grad_norm": 0.34617358446121216, |
| "learning_rate": 0.0004589711286089238, |
| "loss": 3.3741, |
| "step": 40400 |
| }, |
| { |
| "epoch": 11.789703824626866, |
| "grad_norm": 0.36538684368133545, |
| "learning_rate": 0.0004587961504811898, |
| "loss": 3.379, |
| "step": 40450 |
| }, |
| { |
| "epoch": 11.804279384328359, |
| "grad_norm": 0.36401626467704773, |
| "learning_rate": 0.00045862117235345577, |
| "loss": 3.3815, |
| "step": 40500 |
| }, |
| { |
| "epoch": 11.818854944029852, |
| "grad_norm": 0.33819693326950073, |
| "learning_rate": 0.00045844619422572176, |
| "loss": 3.3767, |
| "step": 40550 |
| }, |
| { |
| "epoch": 11.833430503731343, |
| "grad_norm": 0.3567153811454773, |
| "learning_rate": 0.0004582712160979877, |
| "loss": 3.3868, |
| "step": 40600 |
| }, |
| { |
| "epoch": 11.848006063432836, |
| "grad_norm": 0.3180365562438965, |
| "learning_rate": 0.00045809623797025365, |
| "loss": 3.3854, |
| "step": 40650 |
| }, |
| { |
| "epoch": 11.862581623134329, |
| "grad_norm": 0.3368315100669861, |
| "learning_rate": 0.00045792125984251965, |
| "loss": 3.3879, |
| "step": 40700 |
| }, |
| { |
| "epoch": 11.877157182835822, |
| "grad_norm": 0.3494505286216736, |
| "learning_rate": 0.0004577462817147856, |
| "loss": 3.3775, |
| "step": 40750 |
| }, |
| { |
| "epoch": 11.891732742537313, |
| "grad_norm": 0.3420652747154236, |
| "learning_rate": 0.0004575713035870516, |
| "loss": 3.3773, |
| "step": 40800 |
| }, |
| { |
| "epoch": 11.906308302238806, |
| "grad_norm": 0.343851774930954, |
| "learning_rate": 0.00045739632545931753, |
| "loss": 3.3788, |
| "step": 40850 |
| }, |
| { |
| "epoch": 11.920883861940299, |
| "grad_norm": 0.3384932279586792, |
| "learning_rate": 0.00045722134733158353, |
| "loss": 3.3777, |
| "step": 40900 |
| }, |
| { |
| "epoch": 11.935459421641792, |
| "grad_norm": 0.35529130697250366, |
| "learning_rate": 0.0004570463692038495, |
| "loss": 3.3728, |
| "step": 40950 |
| }, |
| { |
| "epoch": 11.950034981343283, |
| "grad_norm": 0.3340221643447876, |
| "learning_rate": 0.0004568713910761154, |
| "loss": 3.3826, |
| "step": 41000 |
| }, |
| { |
| "epoch": 11.950034981343283, |
| "eval_accuracy": 0.3690850368691576, |
| "eval_loss": 3.5591351985931396, |
| "eval_runtime": 179.5419, |
| "eval_samples_per_second": 92.608, |
| "eval_steps_per_second": 5.793, |
| "step": 41000 |
| }, |
| { |
| "epoch": 11.964610541044776, |
| "grad_norm": 0.33717867732048035, |
| "learning_rate": 0.0004566964129483814, |
| "loss": 3.3718, |
| "step": 41050 |
| }, |
| { |
| "epoch": 11.979186100746269, |
| "grad_norm": 0.34679439663887024, |
| "learning_rate": 0.0004565214348206474, |
| "loss": 3.3889, |
| "step": 41100 |
| }, |
| { |
| "epoch": 11.993761660447761, |
| "grad_norm": 0.3585461378097534, |
| "learning_rate": 0.0004563464566929133, |
| "loss": 3.3777, |
| "step": 41150 |
| }, |
| { |
| "epoch": 12.008162313432836, |
| "grad_norm": 0.33981046080589294, |
| "learning_rate": 0.0004561714785651793, |
| "loss": 3.3212, |
| "step": 41200 |
| }, |
| { |
| "epoch": 12.022737873134329, |
| "grad_norm": 0.3614978790283203, |
| "learning_rate": 0.0004559965004374453, |
| "loss": 3.2726, |
| "step": 41250 |
| }, |
| { |
| "epoch": 12.037313432835822, |
| "grad_norm": 0.33403632044792175, |
| "learning_rate": 0.0004558215223097113, |
| "loss": 3.2689, |
| "step": 41300 |
| }, |
| { |
| "epoch": 12.051888992537313, |
| "grad_norm": 0.34065091609954834, |
| "learning_rate": 0.0004556465441819772, |
| "loss": 3.2765, |
| "step": 41350 |
| }, |
| { |
| "epoch": 12.066464552238806, |
| "grad_norm": 0.34842050075531006, |
| "learning_rate": 0.0004554715660542432, |
| "loss": 3.2862, |
| "step": 41400 |
| }, |
| { |
| "epoch": 12.081040111940299, |
| "grad_norm": 0.3776961863040924, |
| "learning_rate": 0.0004552965879265092, |
| "loss": 3.297, |
| "step": 41450 |
| }, |
| { |
| "epoch": 12.095615671641792, |
| "grad_norm": 0.3554358184337616, |
| "learning_rate": 0.0004551216097987751, |
| "loss": 3.2924, |
| "step": 41500 |
| }, |
| { |
| "epoch": 12.110191231343284, |
| "grad_norm": 0.33396828174591064, |
| "learning_rate": 0.00045494663167104107, |
| "loss": 3.29, |
| "step": 41550 |
| }, |
| { |
| "epoch": 12.124766791044776, |
| "grad_norm": 0.3485698699951172, |
| "learning_rate": 0.00045477165354330706, |
| "loss": 3.3019, |
| "step": 41600 |
| }, |
| { |
| "epoch": 12.139342350746269, |
| "grad_norm": 0.32045650482177734, |
| "learning_rate": 0.000454596675415573, |
| "loss": 3.3071, |
| "step": 41650 |
| }, |
| { |
| "epoch": 12.153917910447761, |
| "grad_norm": 0.35876530408859253, |
| "learning_rate": 0.00045442169728783895, |
| "loss": 3.3071, |
| "step": 41700 |
| }, |
| { |
| "epoch": 12.168493470149254, |
| "grad_norm": 0.3410411477088928, |
| "learning_rate": 0.00045424671916010495, |
| "loss": 3.3152, |
| "step": 41750 |
| }, |
| { |
| "epoch": 12.183069029850746, |
| "grad_norm": 0.36790353059768677, |
| "learning_rate": 0.0004540717410323709, |
| "loss": 3.3032, |
| "step": 41800 |
| }, |
| { |
| "epoch": 12.197644589552239, |
| "grad_norm": 0.33544808626174927, |
| "learning_rate": 0.0004538967629046369, |
| "loss": 3.3031, |
| "step": 41850 |
| }, |
| { |
| "epoch": 12.212220149253731, |
| "grad_norm": 0.35613003373146057, |
| "learning_rate": 0.00045372178477690283, |
| "loss": 3.3092, |
| "step": 41900 |
| }, |
| { |
| "epoch": 12.226795708955224, |
| "grad_norm": 0.3690536320209503, |
| "learning_rate": 0.0004535468066491688, |
| "loss": 3.3118, |
| "step": 41950 |
| }, |
| { |
| "epoch": 12.241371268656717, |
| "grad_norm": 0.35943329334259033, |
| "learning_rate": 0.0004533718285214348, |
| "loss": 3.3125, |
| "step": 42000 |
| }, |
| { |
| "epoch": 12.241371268656717, |
| "eval_accuracy": 0.36872500190374813, |
| "eval_loss": 3.57114839553833, |
| "eval_runtime": 179.3203, |
| "eval_samples_per_second": 92.722, |
| "eval_steps_per_second": 5.8, |
| "step": 42000 |
| }, |
| { |
| "epoch": 12.255946828358208, |
| "grad_norm": 0.3558652102947235, |
| "learning_rate": 0.00045319685039370077, |
| "loss": 3.3197, |
| "step": 42050 |
| }, |
| { |
| "epoch": 12.270522388059701, |
| "grad_norm": 0.36180514097213745, |
| "learning_rate": 0.0004530218722659667, |
| "loss": 3.3216, |
| "step": 42100 |
| }, |
| { |
| "epoch": 12.285097947761194, |
| "grad_norm": 0.34139835834503174, |
| "learning_rate": 0.00045284689413823266, |
| "loss": 3.3106, |
| "step": 42150 |
| }, |
| { |
| "epoch": 12.299673507462687, |
| "grad_norm": 0.3567177951335907, |
| "learning_rate": 0.00045267191601049866, |
| "loss": 3.3276, |
| "step": 42200 |
| }, |
| { |
| "epoch": 12.314249067164178, |
| "grad_norm": 0.3402513861656189, |
| "learning_rate": 0.00045249693788276465, |
| "loss": 3.3162, |
| "step": 42250 |
| }, |
| { |
| "epoch": 12.328824626865671, |
| "grad_norm": 0.3365866243839264, |
| "learning_rate": 0.00045232195975503054, |
| "loss": 3.3296, |
| "step": 42300 |
| }, |
| { |
| "epoch": 12.343400186567164, |
| "grad_norm": 0.39586734771728516, |
| "learning_rate": 0.00045214698162729654, |
| "loss": 3.3355, |
| "step": 42350 |
| }, |
| { |
| "epoch": 12.357975746268657, |
| "grad_norm": 0.3556031882762909, |
| "learning_rate": 0.00045197200349956254, |
| "loss": 3.3272, |
| "step": 42400 |
| }, |
| { |
| "epoch": 12.372551305970148, |
| "grad_norm": 0.342041939496994, |
| "learning_rate": 0.00045179702537182854, |
| "loss": 3.3228, |
| "step": 42450 |
| }, |
| { |
| "epoch": 12.387126865671641, |
| "grad_norm": 0.35764795541763306, |
| "learning_rate": 0.0004516220472440944, |
| "loss": 3.3353, |
| "step": 42500 |
| }, |
| { |
| "epoch": 12.401702425373134, |
| "grad_norm": 0.37403562664985657, |
| "learning_rate": 0.0004514470691163604, |
| "loss": 3.3313, |
| "step": 42550 |
| }, |
| { |
| "epoch": 12.416277985074627, |
| "grad_norm": 0.33595719933509827, |
| "learning_rate": 0.0004512720909886264, |
| "loss": 3.3267, |
| "step": 42600 |
| }, |
| { |
| "epoch": 12.43085354477612, |
| "grad_norm": 0.3399653732776642, |
| "learning_rate": 0.0004510971128608923, |
| "loss": 3.3374, |
| "step": 42650 |
| }, |
| { |
| "epoch": 12.445429104477611, |
| "grad_norm": 0.37351250648498535, |
| "learning_rate": 0.0004509221347331583, |
| "loss": 3.3454, |
| "step": 42700 |
| }, |
| { |
| "epoch": 12.460004664179104, |
| "grad_norm": 0.37687429785728455, |
| "learning_rate": 0.0004507471566054243, |
| "loss": 3.3437, |
| "step": 42750 |
| }, |
| { |
| "epoch": 12.474580223880597, |
| "grad_norm": 0.3576935827732086, |
| "learning_rate": 0.00045057217847769025, |
| "loss": 3.3292, |
| "step": 42800 |
| }, |
| { |
| "epoch": 12.48915578358209, |
| "grad_norm": 0.3471689522266388, |
| "learning_rate": 0.0004503972003499562, |
| "loss": 3.345, |
| "step": 42850 |
| }, |
| { |
| "epoch": 12.503731343283581, |
| "grad_norm": 0.350300133228302, |
| "learning_rate": 0.0004502222222222222, |
| "loss": 3.3493, |
| "step": 42900 |
| }, |
| { |
| "epoch": 12.518306902985074, |
| "grad_norm": 0.3431214988231659, |
| "learning_rate": 0.00045004724409448813, |
| "loss": 3.3476, |
| "step": 42950 |
| }, |
| { |
| "epoch": 12.532882462686567, |
| "grad_norm": 0.32879090309143066, |
| "learning_rate": 0.00044987226596675413, |
| "loss": 3.3482, |
| "step": 43000 |
| }, |
| { |
| "epoch": 12.532882462686567, |
| "eval_accuracy": 0.36874771741480533, |
| "eval_loss": 3.5677340030670166, |
| "eval_runtime": 179.625, |
| "eval_samples_per_second": 92.565, |
| "eval_steps_per_second": 5.79, |
| "step": 43000 |
| }, |
| { |
| "epoch": 12.54745802238806, |
| "grad_norm": 0.3496828079223633, |
| "learning_rate": 0.0004496972878390201, |
| "loss": 3.3493, |
| "step": 43050 |
| }, |
| { |
| "epoch": 12.562033582089553, |
| "grad_norm": 0.33506613969802856, |
| "learning_rate": 0.000449522309711286, |
| "loss": 3.34, |
| "step": 43100 |
| }, |
| { |
| "epoch": 12.576609141791044, |
| "grad_norm": 0.3366851210594177, |
| "learning_rate": 0.000449347331583552, |
| "loss": 3.3462, |
| "step": 43150 |
| }, |
| { |
| "epoch": 12.591184701492537, |
| "grad_norm": 0.3410353660583496, |
| "learning_rate": 0.000449172353455818, |
| "loss": 3.3391, |
| "step": 43200 |
| }, |
| { |
| "epoch": 12.60576026119403, |
| "grad_norm": 0.3703427314758301, |
| "learning_rate": 0.0004489973753280839, |
| "loss": 3.3415, |
| "step": 43250 |
| }, |
| { |
| "epoch": 12.620335820895523, |
| "grad_norm": 0.3844320476055145, |
| "learning_rate": 0.0004488223972003499, |
| "loss": 3.345, |
| "step": 43300 |
| }, |
| { |
| "epoch": 12.634911380597014, |
| "grad_norm": 0.3453065752983093, |
| "learning_rate": 0.0004486474190726159, |
| "loss": 3.3518, |
| "step": 43350 |
| }, |
| { |
| "epoch": 12.649486940298507, |
| "grad_norm": 0.3349737524986267, |
| "learning_rate": 0.0004484724409448819, |
| "loss": 3.3642, |
| "step": 43400 |
| }, |
| { |
| "epoch": 12.6640625, |
| "grad_norm": 0.36272352933883667, |
| "learning_rate": 0.0004482974628171478, |
| "loss": 3.3583, |
| "step": 43450 |
| }, |
| { |
| "epoch": 12.678638059701493, |
| "grad_norm": 0.3413692116737366, |
| "learning_rate": 0.0004481224846894138, |
| "loss": 3.3481, |
| "step": 43500 |
| }, |
| { |
| "epoch": 12.693213619402986, |
| "grad_norm": 0.33000117540359497, |
| "learning_rate": 0.0004479475065616798, |
| "loss": 3.3618, |
| "step": 43550 |
| }, |
| { |
| "epoch": 12.707789179104477, |
| "grad_norm": 0.35906121134757996, |
| "learning_rate": 0.00044777252843394567, |
| "loss": 3.3522, |
| "step": 43600 |
| }, |
| { |
| "epoch": 12.72236473880597, |
| "grad_norm": 0.3547782003879547, |
| "learning_rate": 0.00044759755030621167, |
| "loss": 3.3482, |
| "step": 43650 |
| }, |
| { |
| "epoch": 12.736940298507463, |
| "grad_norm": 0.346333384513855, |
| "learning_rate": 0.00044742257217847767, |
| "loss": 3.3648, |
| "step": 43700 |
| }, |
| { |
| "epoch": 12.751515858208956, |
| "grad_norm": 0.313652366399765, |
| "learning_rate": 0.00044724759405074366, |
| "loss": 3.3496, |
| "step": 43750 |
| }, |
| { |
| "epoch": 12.766091417910447, |
| "grad_norm": 0.3224954605102539, |
| "learning_rate": 0.00044707261592300955, |
| "loss": 3.3401, |
| "step": 43800 |
| }, |
| { |
| "epoch": 12.78066697761194, |
| "grad_norm": 0.3389032781124115, |
| "learning_rate": 0.00044689763779527555, |
| "loss": 3.3629, |
| "step": 43850 |
| }, |
| { |
| "epoch": 12.795242537313433, |
| "grad_norm": 0.3401406705379486, |
| "learning_rate": 0.00044672265966754155, |
| "loss": 3.3652, |
| "step": 43900 |
| }, |
| { |
| "epoch": 12.809818097014926, |
| "grad_norm": 0.337515264749527, |
| "learning_rate": 0.0004465476815398075, |
| "loss": 3.3559, |
| "step": 43950 |
| }, |
| { |
| "epoch": 12.824393656716419, |
| "grad_norm": 0.368762344121933, |
| "learning_rate": 0.00044637270341207344, |
| "loss": 3.3548, |
| "step": 44000 |
| }, |
| { |
| "epoch": 12.824393656716419, |
| "eval_accuracy": 0.3694424825017004, |
| "eval_loss": 3.5560858249664307, |
| "eval_runtime": 179.4895, |
| "eval_samples_per_second": 92.635, |
| "eval_steps_per_second": 5.794, |
| "step": 44000 |
| }, |
| { |
| "epoch": 12.83896921641791, |
| "grad_norm": 0.3303767740726471, |
| "learning_rate": 0.00044619772528433943, |
| "loss": 3.3674, |
| "step": 44050 |
| }, |
| { |
| "epoch": 12.853544776119403, |
| "grad_norm": 0.3471069931983948, |
| "learning_rate": 0.0004460227471566054, |
| "loss": 3.3569, |
| "step": 44100 |
| }, |
| { |
| "epoch": 12.868120335820896, |
| "grad_norm": 0.3295342028141022, |
| "learning_rate": 0.0004458477690288714, |
| "loss": 3.3561, |
| "step": 44150 |
| }, |
| { |
| "epoch": 12.882695895522389, |
| "grad_norm": 0.352363646030426, |
| "learning_rate": 0.0004456727909011373, |
| "loss": 3.3687, |
| "step": 44200 |
| }, |
| { |
| "epoch": 12.89727145522388, |
| "grad_norm": 0.3398921489715576, |
| "learning_rate": 0.00044549781277340326, |
| "loss": 3.3718, |
| "step": 44250 |
| }, |
| { |
| "epoch": 12.911847014925373, |
| "grad_norm": 0.354624480009079, |
| "learning_rate": 0.00044532283464566926, |
| "loss": 3.3674, |
| "step": 44300 |
| }, |
| { |
| "epoch": 12.926422574626866, |
| "grad_norm": 0.3335941433906555, |
| "learning_rate": 0.0004451478565179352, |
| "loss": 3.3619, |
| "step": 44350 |
| }, |
| { |
| "epoch": 12.940998134328359, |
| "grad_norm": 0.3419336974620819, |
| "learning_rate": 0.00044497287839020115, |
| "loss": 3.3593, |
| "step": 44400 |
| }, |
| { |
| "epoch": 12.955573694029852, |
| "grad_norm": 0.3105829358100891, |
| "learning_rate": 0.00044479790026246714, |
| "loss": 3.3697, |
| "step": 44450 |
| }, |
| { |
| "epoch": 12.970149253731343, |
| "grad_norm": 0.32766225934028625, |
| "learning_rate": 0.00044462292213473314, |
| "loss": 3.3784, |
| "step": 44500 |
| }, |
| { |
| "epoch": 12.984724813432836, |
| "grad_norm": 0.3454512059688568, |
| "learning_rate": 0.00044444794400699903, |
| "loss": 3.3585, |
| "step": 44550 |
| }, |
| { |
| "epoch": 12.999300373134329, |
| "grad_norm": 0.3578594923019409, |
| "learning_rate": 0.00044427296587926503, |
| "loss": 3.3594, |
| "step": 44600 |
| }, |
| { |
| "epoch": 13.013701026119403, |
| "grad_norm": 0.34886470437049866, |
| "learning_rate": 0.000444097987751531, |
| "loss": 3.2495, |
| "step": 44650 |
| }, |
| { |
| "epoch": 13.028276585820896, |
| "grad_norm": 0.3374779522418976, |
| "learning_rate": 0.000443923009623797, |
| "loss": 3.2504, |
| "step": 44700 |
| }, |
| { |
| "epoch": 13.042852145522389, |
| "grad_norm": 0.34586066007614136, |
| "learning_rate": 0.0004437480314960629, |
| "loss": 3.2695, |
| "step": 44750 |
| }, |
| { |
| "epoch": 13.05742770522388, |
| "grad_norm": 0.3878658413887024, |
| "learning_rate": 0.0004435730533683289, |
| "loss": 3.2661, |
| "step": 44800 |
| }, |
| { |
| "epoch": 13.072003264925373, |
| "grad_norm": 0.3439328372478485, |
| "learning_rate": 0.0004433980752405949, |
| "loss": 3.2634, |
| "step": 44850 |
| }, |
| { |
| "epoch": 13.086578824626866, |
| "grad_norm": 0.3657079339027405, |
| "learning_rate": 0.0004432230971128609, |
| "loss": 3.2741, |
| "step": 44900 |
| }, |
| { |
| "epoch": 13.101154384328359, |
| "grad_norm": 0.36978641152381897, |
| "learning_rate": 0.0004430481189851268, |
| "loss": 3.29, |
| "step": 44950 |
| }, |
| { |
| "epoch": 13.115729944029852, |
| "grad_norm": 0.3556627333164215, |
| "learning_rate": 0.0004428731408573928, |
| "loss": 3.2723, |
| "step": 45000 |
| }, |
| { |
| "epoch": 13.115729944029852, |
| "eval_accuracy": 0.3688253974008041, |
| "eval_loss": 3.573399066925049, |
| "eval_runtime": 179.4112, |
| "eval_samples_per_second": 92.675, |
| "eval_steps_per_second": 5.797, |
| "step": 45000 |
| }, |
| { |
| "epoch": 13.130305503731343, |
| "grad_norm": 0.3487858474254608, |
| "learning_rate": 0.0004426981627296588, |
| "loss": 3.2886, |
| "step": 45050 |
| }, |
| { |
| "epoch": 13.144881063432836, |
| "grad_norm": 0.3508056104183197, |
| "learning_rate": 0.00044252318460192473, |
| "loss": 3.2857, |
| "step": 45100 |
| }, |
| { |
| "epoch": 13.159456623134329, |
| "grad_norm": 0.3573300242424011, |
| "learning_rate": 0.0004423482064741907, |
| "loss": 3.2804, |
| "step": 45150 |
| }, |
| { |
| "epoch": 13.174032182835822, |
| "grad_norm": 0.364335298538208, |
| "learning_rate": 0.0004421732283464567, |
| "loss": 3.3004, |
| "step": 45200 |
| }, |
| { |
| "epoch": 13.188607742537313, |
| "grad_norm": 0.35763829946517944, |
| "learning_rate": 0.0004419982502187226, |
| "loss": 3.2839, |
| "step": 45250 |
| }, |
| { |
| "epoch": 13.203183302238806, |
| "grad_norm": 0.39192068576812744, |
| "learning_rate": 0.00044182327209098856, |
| "loss": 3.2913, |
| "step": 45300 |
| }, |
| { |
| "epoch": 13.217758861940299, |
| "grad_norm": 0.3512227535247803, |
| "learning_rate": 0.00044164829396325456, |
| "loss": 3.3005, |
| "step": 45350 |
| }, |
| { |
| "epoch": 13.232334421641792, |
| "grad_norm": 0.36455291509628296, |
| "learning_rate": 0.0004414733158355205, |
| "loss": 3.3018, |
| "step": 45400 |
| }, |
| { |
| "epoch": 13.246909981343283, |
| "grad_norm": 0.3636734187602997, |
| "learning_rate": 0.0004412983377077865, |
| "loss": 3.3025, |
| "step": 45450 |
| }, |
| { |
| "epoch": 13.261485541044776, |
| "grad_norm": 0.36011579632759094, |
| "learning_rate": 0.00044112335958005244, |
| "loss": 3.3056, |
| "step": 45500 |
| }, |
| { |
| "epoch": 13.276061100746269, |
| "grad_norm": 0.36715275049209595, |
| "learning_rate": 0.0004409483814523184, |
| "loss": 3.3089, |
| "step": 45550 |
| }, |
| { |
| "epoch": 13.290636660447761, |
| "grad_norm": 0.3400384187698364, |
| "learning_rate": 0.0004407734033245844, |
| "loss": 3.2957, |
| "step": 45600 |
| }, |
| { |
| "epoch": 13.305212220149254, |
| "grad_norm": 0.3540400266647339, |
| "learning_rate": 0.0004405984251968504, |
| "loss": 3.3109, |
| "step": 45650 |
| }, |
| { |
| "epoch": 13.319787779850746, |
| "grad_norm": 0.37732866406440735, |
| "learning_rate": 0.0004404234470691163, |
| "loss": 3.3153, |
| "step": 45700 |
| }, |
| { |
| "epoch": 13.334363339552239, |
| "grad_norm": 0.37615445256233215, |
| "learning_rate": 0.00044024846894138227, |
| "loss": 3.3183, |
| "step": 45750 |
| }, |
| { |
| "epoch": 13.348938899253731, |
| "grad_norm": 0.33871808648109436, |
| "learning_rate": 0.00044007349081364827, |
| "loss": 3.3111, |
| "step": 45800 |
| }, |
| { |
| "epoch": 13.363514458955224, |
| "grad_norm": 0.3805575966835022, |
| "learning_rate": 0.00043989851268591427, |
| "loss": 3.3073, |
| "step": 45850 |
| }, |
| { |
| "epoch": 13.378090018656717, |
| "grad_norm": 0.3696669340133667, |
| "learning_rate": 0.00043972353455818016, |
| "loss": 3.3173, |
| "step": 45900 |
| }, |
| { |
| "epoch": 13.392665578358208, |
| "grad_norm": 0.36460769176483154, |
| "learning_rate": 0.00043954855643044615, |
| "loss": 3.318, |
| "step": 45950 |
| }, |
| { |
| "epoch": 13.407241138059701, |
| "grad_norm": 0.36172017455101013, |
| "learning_rate": 0.00043937357830271215, |
| "loss": 3.3128, |
| "step": 46000 |
| }, |
| { |
| "epoch": 13.407241138059701, |
| "eval_accuracy": 0.36958960368730415, |
| "eval_loss": 3.5665695667266846, |
| "eval_runtime": 179.2795, |
| "eval_samples_per_second": 92.743, |
| "eval_steps_per_second": 5.801, |
| "step": 46000 |
| }, |
| { |
| "epoch": 13.421816697761194, |
| "grad_norm": 0.339266836643219, |
| "learning_rate": 0.0004391986001749781, |
| "loss": 3.3222, |
| "step": 46050 |
| }, |
| { |
| "epoch": 13.436392257462687, |
| "grad_norm": 0.33610814809799194, |
| "learning_rate": 0.00043902362204724404, |
| "loss": 3.3171, |
| "step": 46100 |
| }, |
| { |
| "epoch": 13.450967817164178, |
| "grad_norm": 0.34440693259239197, |
| "learning_rate": 0.00043884864391951004, |
| "loss": 3.3146, |
| "step": 46150 |
| }, |
| { |
| "epoch": 13.465543376865671, |
| "grad_norm": 0.35377469658851624, |
| "learning_rate": 0.00043867366579177603, |
| "loss": 3.3186, |
| "step": 46200 |
| }, |
| { |
| "epoch": 13.480118936567164, |
| "grad_norm": 0.3478979766368866, |
| "learning_rate": 0.0004384986876640419, |
| "loss": 3.3134, |
| "step": 46250 |
| }, |
| { |
| "epoch": 13.494694496268657, |
| "grad_norm": 0.3551417887210846, |
| "learning_rate": 0.0004383237095363079, |
| "loss": 3.321, |
| "step": 46300 |
| }, |
| { |
| "epoch": 13.509270055970148, |
| "grad_norm": 0.3469100594520569, |
| "learning_rate": 0.0004381487314085739, |
| "loss": 3.3224, |
| "step": 46350 |
| }, |
| { |
| "epoch": 13.523845615671641, |
| "grad_norm": 0.3445916771888733, |
| "learning_rate": 0.00043797375328083986, |
| "loss": 3.3258, |
| "step": 46400 |
| }, |
| { |
| "epoch": 13.538421175373134, |
| "grad_norm": 0.33777162432670593, |
| "learning_rate": 0.0004377987751531058, |
| "loss": 3.3198, |
| "step": 46450 |
| }, |
| { |
| "epoch": 13.552996735074627, |
| "grad_norm": 0.34429651498794556, |
| "learning_rate": 0.0004376237970253718, |
| "loss": 3.3506, |
| "step": 46500 |
| }, |
| { |
| "epoch": 13.56757229477612, |
| "grad_norm": 0.35852015018463135, |
| "learning_rate": 0.00043744881889763775, |
| "loss": 3.3238, |
| "step": 46550 |
| }, |
| { |
| "epoch": 13.582147854477611, |
| "grad_norm": 0.34117794036865234, |
| "learning_rate": 0.00043727384076990374, |
| "loss": 3.3247, |
| "step": 46600 |
| }, |
| { |
| "epoch": 13.596723414179104, |
| "grad_norm": 0.39330777525901794, |
| "learning_rate": 0.0004370988626421697, |
| "loss": 3.3289, |
| "step": 46650 |
| }, |
| { |
| "epoch": 13.611298973880597, |
| "grad_norm": 0.34990638494491577, |
| "learning_rate": 0.00043692388451443563, |
| "loss": 3.341, |
| "step": 46700 |
| }, |
| { |
| "epoch": 13.62587453358209, |
| "grad_norm": 0.330219566822052, |
| "learning_rate": 0.00043674890638670163, |
| "loss": 3.3356, |
| "step": 46750 |
| }, |
| { |
| "epoch": 13.640450093283581, |
| "grad_norm": 0.38839754462242126, |
| "learning_rate": 0.0004365739282589676, |
| "loss": 3.3439, |
| "step": 46800 |
| }, |
| { |
| "epoch": 13.655025652985074, |
| "grad_norm": 0.33825257420539856, |
| "learning_rate": 0.0004363989501312335, |
| "loss": 3.3384, |
| "step": 46850 |
| }, |
| { |
| "epoch": 13.669601212686567, |
| "grad_norm": 0.3526383638381958, |
| "learning_rate": 0.0004362239720034995, |
| "loss": 3.3325, |
| "step": 46900 |
| }, |
| { |
| "epoch": 13.68417677238806, |
| "grad_norm": 0.38543063402175903, |
| "learning_rate": 0.0004360489938757655, |
| "loss": 3.3314, |
| "step": 46950 |
| }, |
| { |
| "epoch": 13.698752332089553, |
| "grad_norm": 0.3445545434951782, |
| "learning_rate": 0.0004358740157480315, |
| "loss": 3.3554, |
| "step": 47000 |
| }, |
| { |
| "epoch": 13.698752332089553, |
| "eval_accuracy": 0.37001095876287327, |
| "eval_loss": 3.55552339553833, |
| "eval_runtime": 179.3748, |
| "eval_samples_per_second": 92.694, |
| "eval_steps_per_second": 5.798, |
| "step": 47000 |
| }, |
| { |
| "epoch": 13.713327891791044, |
| "grad_norm": 0.34910300374031067, |
| "learning_rate": 0.0004356990376202974, |
| "loss": 3.3439, |
| "step": 47050 |
| }, |
| { |
| "epoch": 13.727903451492537, |
| "grad_norm": 0.3361404240131378, |
| "learning_rate": 0.0004355240594925634, |
| "loss": 3.3431, |
| "step": 47100 |
| }, |
| { |
| "epoch": 13.74247901119403, |
| "grad_norm": 0.37516549229621887, |
| "learning_rate": 0.0004353490813648294, |
| "loss": 3.3323, |
| "step": 47150 |
| }, |
| { |
| "epoch": 13.757054570895523, |
| "grad_norm": 0.3573398292064667, |
| "learning_rate": 0.0004351741032370953, |
| "loss": 3.3324, |
| "step": 47200 |
| }, |
| { |
| "epoch": 13.771630130597014, |
| "grad_norm": 0.3558635115623474, |
| "learning_rate": 0.0004349991251093613, |
| "loss": 3.3542, |
| "step": 47250 |
| }, |
| { |
| "epoch": 13.786205690298507, |
| "grad_norm": 0.36393576860427856, |
| "learning_rate": 0.0004348241469816273, |
| "loss": 3.3417, |
| "step": 47300 |
| }, |
| { |
| "epoch": 13.80078125, |
| "grad_norm": 0.34120824933052063, |
| "learning_rate": 0.0004346491688538932, |
| "loss": 3.3343, |
| "step": 47350 |
| }, |
| { |
| "epoch": 13.815356809701493, |
| "grad_norm": 0.36425623297691345, |
| "learning_rate": 0.00043447419072615916, |
| "loss": 3.3608, |
| "step": 47400 |
| }, |
| { |
| "epoch": 13.829932369402986, |
| "grad_norm": 0.34796497225761414, |
| "learning_rate": 0.00043429921259842516, |
| "loss": 3.3407, |
| "step": 47450 |
| }, |
| { |
| "epoch": 13.844507929104477, |
| "grad_norm": 0.33243417739868164, |
| "learning_rate": 0.00043412423447069116, |
| "loss": 3.3386, |
| "step": 47500 |
| }, |
| { |
| "epoch": 13.85908348880597, |
| "grad_norm": 0.3396977186203003, |
| "learning_rate": 0.0004339492563429571, |
| "loss": 3.3475, |
| "step": 47550 |
| }, |
| { |
| "epoch": 13.873659048507463, |
| "grad_norm": 0.33732032775878906, |
| "learning_rate": 0.00043377427821522305, |
| "loss": 3.34, |
| "step": 47600 |
| }, |
| { |
| "epoch": 13.888234608208956, |
| "grad_norm": 0.3958549499511719, |
| "learning_rate": 0.00043359930008748904, |
| "loss": 3.3435, |
| "step": 47650 |
| }, |
| { |
| "epoch": 13.902810167910447, |
| "grad_norm": 0.35972535610198975, |
| "learning_rate": 0.000433424321959755, |
| "loss": 3.3638, |
| "step": 47700 |
| }, |
| { |
| "epoch": 13.91738572761194, |
| "grad_norm": 0.37601491808891296, |
| "learning_rate": 0.000433249343832021, |
| "loss": 3.3502, |
| "step": 47750 |
| }, |
| { |
| "epoch": 13.931961287313433, |
| "grad_norm": 0.3627243936061859, |
| "learning_rate": 0.00043307436570428693, |
| "loss": 3.3341, |
| "step": 47800 |
| }, |
| { |
| "epoch": 13.946536847014926, |
| "grad_norm": 0.34104883670806885, |
| "learning_rate": 0.00043289938757655287, |
| "loss": 3.3529, |
| "step": 47850 |
| }, |
| { |
| "epoch": 13.961112406716419, |
| "grad_norm": 0.35523277521133423, |
| "learning_rate": 0.00043272440944881887, |
| "loss": 3.3577, |
| "step": 47900 |
| }, |
| { |
| "epoch": 13.97568796641791, |
| "grad_norm": 0.3545657694339752, |
| "learning_rate": 0.0004325494313210848, |
| "loss": 3.348, |
| "step": 47950 |
| }, |
| { |
| "epoch": 13.990263526119403, |
| "grad_norm": 0.3553575277328491, |
| "learning_rate": 0.00043237445319335076, |
| "loss": 3.3418, |
| "step": 48000 |
| }, |
| { |
| "epoch": 13.990263526119403, |
| "eval_accuracy": 0.3703100267089685, |
| "eval_loss": 3.550926923751831, |
| "eval_runtime": 179.4655, |
| "eval_samples_per_second": 92.647, |
| "eval_steps_per_second": 5.795, |
| "step": 48000 |
| }, |
| { |
| "epoch": 14.004664179104477, |
| "grad_norm": 0.3258715271949768, |
| "learning_rate": 0.00043219947506561676, |
| "loss": 3.3135, |
| "step": 48050 |
| }, |
| { |
| "epoch": 14.01923973880597, |
| "grad_norm": 0.36811158061027527, |
| "learning_rate": 0.00043202449693788275, |
| "loss": 3.2429, |
| "step": 48100 |
| }, |
| { |
| "epoch": 14.033815298507463, |
| "grad_norm": 0.3549216687679291, |
| "learning_rate": 0.00043184951881014864, |
| "loss": 3.2336, |
| "step": 48150 |
| }, |
| { |
| "epoch": 14.048390858208956, |
| "grad_norm": 0.37687474489212036, |
| "learning_rate": 0.00043167454068241464, |
| "loss": 3.2607, |
| "step": 48200 |
| }, |
| { |
| "epoch": 14.062966417910447, |
| "grad_norm": 0.37216871976852417, |
| "learning_rate": 0.00043149956255468064, |
| "loss": 3.2459, |
| "step": 48250 |
| }, |
| { |
| "epoch": 14.07754197761194, |
| "grad_norm": 0.36139625310897827, |
| "learning_rate": 0.00043132458442694664, |
| "loss": 3.2678, |
| "step": 48300 |
| }, |
| { |
| "epoch": 14.092117537313433, |
| "grad_norm": 0.33921825885772705, |
| "learning_rate": 0.0004311496062992125, |
| "loss": 3.2559, |
| "step": 48350 |
| }, |
| { |
| "epoch": 14.106693097014926, |
| "grad_norm": 0.3851219415664673, |
| "learning_rate": 0.0004309746281714785, |
| "loss": 3.271, |
| "step": 48400 |
| }, |
| { |
| "epoch": 14.121268656716419, |
| "grad_norm": 0.38296273350715637, |
| "learning_rate": 0.0004307996500437445, |
| "loss": 3.2576, |
| "step": 48450 |
| }, |
| { |
| "epoch": 14.13584421641791, |
| "grad_norm": 0.3686645030975342, |
| "learning_rate": 0.00043062467191601046, |
| "loss": 3.2749, |
| "step": 48500 |
| }, |
| { |
| "epoch": 14.150419776119403, |
| "grad_norm": 0.3671486973762512, |
| "learning_rate": 0.0004304496937882764, |
| "loss": 3.2747, |
| "step": 48550 |
| }, |
| { |
| "epoch": 14.164995335820896, |
| "grad_norm": 0.3503707945346832, |
| "learning_rate": 0.0004302747156605424, |
| "loss": 3.2658, |
| "step": 48600 |
| }, |
| { |
| "epoch": 14.179570895522389, |
| "grad_norm": 0.3670405447483063, |
| "learning_rate": 0.00043009973753280835, |
| "loss": 3.2812, |
| "step": 48650 |
| }, |
| { |
| "epoch": 14.19414645522388, |
| "grad_norm": 0.37047263979911804, |
| "learning_rate": 0.00042992475940507435, |
| "loss": 3.2761, |
| "step": 48700 |
| }, |
| { |
| "epoch": 14.208722014925373, |
| "grad_norm": 0.34551745653152466, |
| "learning_rate": 0.0004297497812773403, |
| "loss": 3.2868, |
| "step": 48750 |
| }, |
| { |
| "epoch": 14.223297574626866, |
| "grad_norm": 0.3535717725753784, |
| "learning_rate": 0.0004295748031496063, |
| "loss": 3.2819, |
| "step": 48800 |
| }, |
| { |
| "epoch": 14.237873134328359, |
| "grad_norm": 0.3632153868675232, |
| "learning_rate": 0.00042939982502187223, |
| "loss": 3.2709, |
| "step": 48850 |
| }, |
| { |
| "epoch": 14.252448694029852, |
| "grad_norm": 0.3754744231700897, |
| "learning_rate": 0.0004292248468941382, |
| "loss": 3.2988, |
| "step": 48900 |
| }, |
| { |
| "epoch": 14.267024253731343, |
| "grad_norm": 0.35462021827697754, |
| "learning_rate": 0.00042904986876640417, |
| "loss": 3.2895, |
| "step": 48950 |
| }, |
| { |
| "epoch": 14.281599813432836, |
| "grad_norm": 0.37853652238845825, |
| "learning_rate": 0.0004288748906386701, |
| "loss": 3.2873, |
| "step": 49000 |
| }, |
| { |
| "epoch": 14.281599813432836, |
| "eval_accuracy": 0.3699119756291991, |
| "eval_loss": 3.564150333404541, |
| "eval_runtime": 179.6002, |
| "eval_samples_per_second": 92.578, |
| "eval_steps_per_second": 5.791, |
| "step": 49000 |
| }, |
| { |
| "epoch": 14.296175373134329, |
| "grad_norm": 0.38061970472335815, |
| "learning_rate": 0.0004286999125109361, |
| "loss": 3.2949, |
| "step": 49050 |
| }, |
| { |
| "epoch": 14.310750932835822, |
| "grad_norm": 0.35941916704177856, |
| "learning_rate": 0.00042852493438320206, |
| "loss": 3.2906, |
| "step": 49100 |
| }, |
| { |
| "epoch": 14.325326492537313, |
| "grad_norm": 0.37919649481773376, |
| "learning_rate": 0.000428349956255468, |
| "loss": 3.2912, |
| "step": 49150 |
| }, |
| { |
| "epoch": 14.339902052238806, |
| "grad_norm": 0.3739522099494934, |
| "learning_rate": 0.000428174978127734, |
| "loss": 3.2909, |
| "step": 49200 |
| }, |
| { |
| "epoch": 14.354477611940299, |
| "grad_norm": 0.40394482016563416, |
| "learning_rate": 0.000428, |
| "loss": 3.299, |
| "step": 49250 |
| }, |
| { |
| "epoch": 14.369053171641792, |
| "grad_norm": 0.37621989846229553, |
| "learning_rate": 0.0004278250218722659, |
| "loss": 3.3031, |
| "step": 49300 |
| }, |
| { |
| "epoch": 14.383628731343283, |
| "grad_norm": 0.3909219205379486, |
| "learning_rate": 0.0004276500437445319, |
| "loss": 3.2998, |
| "step": 49350 |
| }, |
| { |
| "epoch": 14.398204291044776, |
| "grad_norm": 0.3651144802570343, |
| "learning_rate": 0.0004274750656167979, |
| "loss": 3.2946, |
| "step": 49400 |
| }, |
| { |
| "epoch": 14.412779850746269, |
| "grad_norm": 0.35360461473464966, |
| "learning_rate": 0.0004273000874890639, |
| "loss": 3.3135, |
| "step": 49450 |
| }, |
| { |
| "epoch": 14.427355410447761, |
| "grad_norm": 0.4259098172187805, |
| "learning_rate": 0.00042712510936132977, |
| "loss": 3.2984, |
| "step": 49500 |
| }, |
| { |
| "epoch": 14.441930970149254, |
| "grad_norm": 0.36055222153663635, |
| "learning_rate": 0.00042695013123359576, |
| "loss": 3.3084, |
| "step": 49550 |
| }, |
| { |
| "epoch": 14.456506529850746, |
| "grad_norm": 0.3428303897380829, |
| "learning_rate": 0.00042677515310586176, |
| "loss": 3.3099, |
| "step": 49600 |
| }, |
| { |
| "epoch": 14.471082089552239, |
| "grad_norm": 0.38759660720825195, |
| "learning_rate": 0.0004266001749781277, |
| "loss": 3.31, |
| "step": 49650 |
| }, |
| { |
| "epoch": 14.485657649253731, |
| "grad_norm": 0.36404454708099365, |
| "learning_rate": 0.00042642519685039365, |
| "loss": 3.3048, |
| "step": 49700 |
| }, |
| { |
| "epoch": 14.500233208955224, |
| "grad_norm": 0.37980198860168457, |
| "learning_rate": 0.00042625021872265965, |
| "loss": 3.2926, |
| "step": 49750 |
| }, |
| { |
| "epoch": 14.514808768656717, |
| "grad_norm": 0.34508880972862244, |
| "learning_rate": 0.0004260752405949256, |
| "loss": 3.301, |
| "step": 49800 |
| }, |
| { |
| "epoch": 14.529384328358208, |
| "grad_norm": 0.3469710946083069, |
| "learning_rate": 0.00042590026246719153, |
| "loss": 3.3027, |
| "step": 49850 |
| }, |
| { |
| "epoch": 14.543959888059701, |
| "grad_norm": 0.3309904634952545, |
| "learning_rate": 0.00042572528433945753, |
| "loss": 3.3137, |
| "step": 49900 |
| }, |
| { |
| "epoch": 14.558535447761194, |
| "grad_norm": 0.3597103953361511, |
| "learning_rate": 0.0004255503062117235, |
| "loss": 3.3229, |
| "step": 49950 |
| }, |
| { |
| "epoch": 14.573111007462687, |
| "grad_norm": 0.3572462499141693, |
| "learning_rate": 0.00042537532808398947, |
| "loss": 3.3214, |
| "step": 50000 |
| }, |
| { |
| "epoch": 14.573111007462687, |
| "eval_accuracy": 0.36993763356396836, |
| "eval_loss": 3.559805393218994, |
| "eval_runtime": 179.3822, |
| "eval_samples_per_second": 92.69, |
| "eval_steps_per_second": 5.798, |
| "step": 50000 |
| }, |
| { |
| "epoch": 14.587686567164178, |
| "grad_norm": 0.3618316054344177, |
| "learning_rate": 0.0004252003499562554, |
| "loss": 3.319, |
| "step": 50050 |
| }, |
| { |
| "epoch": 14.602262126865671, |
| "grad_norm": 0.36564192175865173, |
| "learning_rate": 0.0004250253718285214, |
| "loss": 3.315, |
| "step": 50100 |
| }, |
| { |
| "epoch": 14.616837686567164, |
| "grad_norm": 0.35237547755241394, |
| "learning_rate": 0.00042485039370078736, |
| "loss": 3.3143, |
| "step": 50150 |
| }, |
| { |
| "epoch": 14.631413246268657, |
| "grad_norm": 0.3858044445514679, |
| "learning_rate": 0.00042467541557305335, |
| "loss": 3.3314, |
| "step": 50200 |
| }, |
| { |
| "epoch": 14.645988805970148, |
| "grad_norm": 0.34664201736450195, |
| "learning_rate": 0.0004245004374453193, |
| "loss": 3.3147, |
| "step": 50250 |
| }, |
| { |
| "epoch": 14.660564365671641, |
| "grad_norm": 0.3514772653579712, |
| "learning_rate": 0.00042432545931758524, |
| "loss": 3.3161, |
| "step": 50300 |
| }, |
| { |
| "epoch": 14.675139925373134, |
| "grad_norm": 0.3883207142353058, |
| "learning_rate": 0.00042415048118985124, |
| "loss": 3.3136, |
| "step": 50350 |
| }, |
| { |
| "epoch": 14.689715485074627, |
| "grad_norm": 0.3385624885559082, |
| "learning_rate": 0.00042397550306211724, |
| "loss": 3.3091, |
| "step": 50400 |
| }, |
| { |
| "epoch": 14.70429104477612, |
| "grad_norm": 0.35351499915122986, |
| "learning_rate": 0.0004238005249343831, |
| "loss": 3.3274, |
| "step": 50450 |
| }, |
| { |
| "epoch": 14.718866604477611, |
| "grad_norm": 0.3634480834007263, |
| "learning_rate": 0.0004236255468066491, |
| "loss": 3.3179, |
| "step": 50500 |
| }, |
| { |
| "epoch": 14.733442164179104, |
| "grad_norm": 0.366725891828537, |
| "learning_rate": 0.0004234505686789151, |
| "loss": 3.3205, |
| "step": 50550 |
| }, |
| { |
| "epoch": 14.748017723880597, |
| "grad_norm": 0.34701037406921387, |
| "learning_rate": 0.0004232755905511811, |
| "loss": 3.33, |
| "step": 50600 |
| }, |
| { |
| "epoch": 14.76259328358209, |
| "grad_norm": 0.35697057843208313, |
| "learning_rate": 0.000423100612423447, |
| "loss": 3.3354, |
| "step": 50650 |
| }, |
| { |
| "epoch": 14.777168843283581, |
| "grad_norm": 0.3638821542263031, |
| "learning_rate": 0.000422925634295713, |
| "loss": 3.3334, |
| "step": 50700 |
| }, |
| { |
| "epoch": 14.791744402985074, |
| "grad_norm": 0.36784827709198, |
| "learning_rate": 0.000422750656167979, |
| "loss": 3.3183, |
| "step": 50750 |
| }, |
| { |
| "epoch": 14.806319962686567, |
| "grad_norm": 0.335401713848114, |
| "learning_rate": 0.0004225756780402449, |
| "loss": 3.3289, |
| "step": 50800 |
| }, |
| { |
| "epoch": 14.82089552238806, |
| "grad_norm": 0.3848132789134979, |
| "learning_rate": 0.0004224006999125109, |
| "loss": 3.3264, |
| "step": 50850 |
| }, |
| { |
| "epoch": 14.835471082089553, |
| "grad_norm": 0.36380621790885925, |
| "learning_rate": 0.0004222257217847769, |
| "loss": 3.3303, |
| "step": 50900 |
| }, |
| { |
| "epoch": 14.850046641791044, |
| "grad_norm": 0.3595868945121765, |
| "learning_rate": 0.00042205074365704283, |
| "loss": 3.3333, |
| "step": 50950 |
| }, |
| { |
| "epoch": 14.864622201492537, |
| "grad_norm": 0.3353583514690399, |
| "learning_rate": 0.0004218757655293088, |
| "loss": 3.3372, |
| "step": 51000 |
| }, |
| { |
| "epoch": 14.864622201492537, |
| "eval_accuracy": 0.3704376102011241, |
| "eval_loss": 3.549607753753662, |
| "eval_runtime": 179.4313, |
| "eval_samples_per_second": 92.665, |
| "eval_steps_per_second": 5.796, |
| "step": 51000 |
| }, |
| { |
| "epoch": 14.87919776119403, |
| "grad_norm": 0.3326234817504883, |
| "learning_rate": 0.0004217007874015748, |
| "loss": 3.3247, |
| "step": 51050 |
| }, |
| { |
| "epoch": 14.893773320895523, |
| "grad_norm": 0.35429471731185913, |
| "learning_rate": 0.0004215258092738407, |
| "loss": 3.3338, |
| "step": 51100 |
| }, |
| { |
| "epoch": 14.908348880597014, |
| "grad_norm": 0.348736971616745, |
| "learning_rate": 0.0004213508311461067, |
| "loss": 3.3281, |
| "step": 51150 |
| }, |
| { |
| "epoch": 14.922924440298507, |
| "grad_norm": 0.3448042571544647, |
| "learning_rate": 0.00042117585301837266, |
| "loss": 3.325, |
| "step": 51200 |
| }, |
| { |
| "epoch": 14.9375, |
| "grad_norm": 0.37077659368515015, |
| "learning_rate": 0.0004210008748906386, |
| "loss": 3.3462, |
| "step": 51250 |
| }, |
| { |
| "epoch": 14.952075559701493, |
| "grad_norm": 0.34058451652526855, |
| "learning_rate": 0.0004208258967629046, |
| "loss": 3.3353, |
| "step": 51300 |
| }, |
| { |
| "epoch": 14.966651119402986, |
| "grad_norm": 0.3427557647228241, |
| "learning_rate": 0.0004206509186351706, |
| "loss": 3.3382, |
| "step": 51350 |
| }, |
| { |
| "epoch": 14.981226679104477, |
| "grad_norm": 0.3424375653266907, |
| "learning_rate": 0.00042047594050743654, |
| "loss": 3.3204, |
| "step": 51400 |
| }, |
| { |
| "epoch": 14.99580223880597, |
| "grad_norm": 0.3815460503101349, |
| "learning_rate": 0.0004203009623797025, |
| "loss": 3.3426, |
| "step": 51450 |
| }, |
| { |
| "epoch": 15.010202891791044, |
| "grad_norm": 0.34294381737709045, |
| "learning_rate": 0.0004201259842519685, |
| "loss": 3.2526, |
| "step": 51500 |
| }, |
| { |
| "epoch": 15.024778451492537, |
| "grad_norm": 0.35977619886398315, |
| "learning_rate": 0.0004199510061242344, |
| "loss": 3.2222, |
| "step": 51550 |
| }, |
| { |
| "epoch": 15.03935401119403, |
| "grad_norm": 0.35569271445274353, |
| "learning_rate": 0.00041977602799650037, |
| "loss": 3.2289, |
| "step": 51600 |
| }, |
| { |
| "epoch": 15.053929570895523, |
| "grad_norm": 0.37896642088890076, |
| "learning_rate": 0.00041960104986876637, |
| "loss": 3.2352, |
| "step": 51650 |
| }, |
| { |
| "epoch": 15.068505130597014, |
| "grad_norm": 0.34995055198669434, |
| "learning_rate": 0.00041942607174103236, |
| "loss": 3.2267, |
| "step": 51700 |
| }, |
| { |
| "epoch": 15.083080690298507, |
| "grad_norm": 0.3554458022117615, |
| "learning_rate": 0.00041925109361329825, |
| "loss": 3.2503, |
| "step": 51750 |
| }, |
| { |
| "epoch": 15.09765625, |
| "grad_norm": 0.36070436239242554, |
| "learning_rate": 0.00041907611548556425, |
| "loss": 3.2567, |
| "step": 51800 |
| }, |
| { |
| "epoch": 15.112231809701493, |
| "grad_norm": 0.3662464916706085, |
| "learning_rate": 0.00041890113735783025, |
| "loss": 3.2466, |
| "step": 51850 |
| }, |
| { |
| "epoch": 15.126807369402986, |
| "grad_norm": 0.34961169958114624, |
| "learning_rate": 0.00041872615923009625, |
| "loss": 3.2488, |
| "step": 51900 |
| }, |
| { |
| "epoch": 15.141382929104477, |
| "grad_norm": 0.3582487106323242, |
| "learning_rate": 0.00041855118110236214, |
| "loss": 3.2564, |
| "step": 51950 |
| }, |
| { |
| "epoch": 15.15595848880597, |
| "grad_norm": 0.3753519058227539, |
| "learning_rate": 0.00041837620297462813, |
| "loss": 3.2634, |
| "step": 52000 |
| }, |
| { |
| "epoch": 15.15595848880597, |
| "eval_accuracy": 0.370012841914049, |
| "eval_loss": 3.565525531768799, |
| "eval_runtime": 179.4874, |
| "eval_samples_per_second": 92.636, |
| "eval_steps_per_second": 5.794, |
| "step": 52000 |
| }, |
| { |
| "epoch": 15.170534048507463, |
| "grad_norm": 0.3757287859916687, |
| "learning_rate": 0.00041820122484689413, |
| "loss": 3.274, |
| "step": 52050 |
| }, |
| { |
| "epoch": 15.185109608208956, |
| "grad_norm": 0.3474057912826538, |
| "learning_rate": 0.0004180262467191601, |
| "loss": 3.2527, |
| "step": 52100 |
| }, |
| { |
| "epoch": 15.199685167910447, |
| "grad_norm": 0.36459994316101074, |
| "learning_rate": 0.000417851268591426, |
| "loss": 3.2632, |
| "step": 52150 |
| }, |
| { |
| "epoch": 15.21426072761194, |
| "grad_norm": 0.3778489828109741, |
| "learning_rate": 0.000417676290463692, |
| "loss": 3.2657, |
| "step": 52200 |
| }, |
| { |
| "epoch": 15.228836287313433, |
| "grad_norm": 0.3785831332206726, |
| "learning_rate": 0.00041750131233595796, |
| "loss": 3.2661, |
| "step": 52250 |
| }, |
| { |
| "epoch": 15.243411847014926, |
| "grad_norm": 0.35805240273475647, |
| "learning_rate": 0.00041732633420822396, |
| "loss": 3.2598, |
| "step": 52300 |
| }, |
| { |
| "epoch": 15.257987406716419, |
| "grad_norm": 0.3630155026912689, |
| "learning_rate": 0.0004171513560804899, |
| "loss": 3.2674, |
| "step": 52350 |
| }, |
| { |
| "epoch": 15.27256296641791, |
| "grad_norm": 0.33566099405288696, |
| "learning_rate": 0.00041697637795275584, |
| "loss": 3.277, |
| "step": 52400 |
| }, |
| { |
| "epoch": 15.287138526119403, |
| "grad_norm": 0.3608446419239044, |
| "learning_rate": 0.00041680139982502184, |
| "loss": 3.2707, |
| "step": 52450 |
| }, |
| { |
| "epoch": 15.301714085820896, |
| "grad_norm": 0.34851962327957153, |
| "learning_rate": 0.0004166264216972878, |
| "loss": 3.2775, |
| "step": 52500 |
| }, |
| { |
| "epoch": 15.316289645522389, |
| "grad_norm": 0.3692467212677002, |
| "learning_rate": 0.00041645144356955373, |
| "loss": 3.2781, |
| "step": 52550 |
| }, |
| { |
| "epoch": 15.33086520522388, |
| "grad_norm": 0.38889679312705994, |
| "learning_rate": 0.0004162764654418197, |
| "loss": 3.2882, |
| "step": 52600 |
| }, |
| { |
| "epoch": 15.345440764925373, |
| "grad_norm": 0.35008713603019714, |
| "learning_rate": 0.0004161014873140857, |
| "loss": 3.2761, |
| "step": 52650 |
| }, |
| { |
| "epoch": 15.360016324626866, |
| "grad_norm": 0.3493787944316864, |
| "learning_rate": 0.00041592650918635167, |
| "loss": 3.2752, |
| "step": 52700 |
| }, |
| { |
| "epoch": 15.374591884328359, |
| "grad_norm": 0.37880319356918335, |
| "learning_rate": 0.0004157515310586176, |
| "loss": 3.2865, |
| "step": 52750 |
| }, |
| { |
| "epoch": 15.389167444029852, |
| "grad_norm": 0.3520691692829132, |
| "learning_rate": 0.0004155765529308836, |
| "loss": 3.2837, |
| "step": 52800 |
| }, |
| { |
| "epoch": 15.403743003731343, |
| "grad_norm": 0.38314563035964966, |
| "learning_rate": 0.0004154015748031496, |
| "loss": 3.2907, |
| "step": 52850 |
| }, |
| { |
| "epoch": 15.418318563432836, |
| "grad_norm": 0.3624465763568878, |
| "learning_rate": 0.0004152265966754155, |
| "loss": 3.3018, |
| "step": 52900 |
| }, |
| { |
| "epoch": 15.432894123134329, |
| "grad_norm": 0.35036274790763855, |
| "learning_rate": 0.0004150516185476815, |
| "loss": 3.2947, |
| "step": 52950 |
| }, |
| { |
| "epoch": 15.447469682835822, |
| "grad_norm": 0.35341790318489075, |
| "learning_rate": 0.0004148766404199475, |
| "loss": 3.3005, |
| "step": 53000 |
| }, |
| { |
| "epoch": 15.447469682835822, |
| "eval_accuracy": 0.3703226202824562, |
| "eval_loss": 3.559825897216797, |
| "eval_runtime": 179.392, |
| "eval_samples_per_second": 92.685, |
| "eval_steps_per_second": 5.797, |
| "step": 53000 |
| }, |
| { |
| "epoch": 15.462045242537313, |
| "grad_norm": 0.35088276863098145, |
| "learning_rate": 0.0004147016622922135, |
| "loss": 3.3018, |
| "step": 53050 |
| }, |
| { |
| "epoch": 15.476620802238806, |
| "grad_norm": 0.34915563464164734, |
| "learning_rate": 0.0004145266841644794, |
| "loss": 3.2953, |
| "step": 53100 |
| }, |
| { |
| "epoch": 15.491196361940299, |
| "grad_norm": 0.35356277227401733, |
| "learning_rate": 0.0004143517060367454, |
| "loss": 3.2968, |
| "step": 53150 |
| }, |
| { |
| "epoch": 15.505771921641792, |
| "grad_norm": 0.37322476506233215, |
| "learning_rate": 0.0004141767279090114, |
| "loss": 3.2879, |
| "step": 53200 |
| }, |
| { |
| "epoch": 15.520347481343283, |
| "grad_norm": 0.3452324867248535, |
| "learning_rate": 0.0004140017497812773, |
| "loss": 3.308, |
| "step": 53250 |
| }, |
| { |
| "epoch": 15.534923041044776, |
| "grad_norm": 0.3493434190750122, |
| "learning_rate": 0.00041382677165354326, |
| "loss": 3.3076, |
| "step": 53300 |
| }, |
| { |
| "epoch": 15.549498600746269, |
| "grad_norm": 0.34777626395225525, |
| "learning_rate": 0.00041365179352580926, |
| "loss": 3.3032, |
| "step": 53350 |
| }, |
| { |
| "epoch": 15.564074160447761, |
| "grad_norm": 0.3296518623828888, |
| "learning_rate": 0.0004134768153980752, |
| "loss": 3.3017, |
| "step": 53400 |
| }, |
| { |
| "epoch": 15.578649720149254, |
| "grad_norm": 0.3763023614883423, |
| "learning_rate": 0.00041330183727034114, |
| "loss": 3.2968, |
| "step": 53450 |
| }, |
| { |
| "epoch": 15.593225279850746, |
| "grad_norm": 0.36298853158950806, |
| "learning_rate": 0.00041312685914260714, |
| "loss": 3.3098, |
| "step": 53500 |
| }, |
| { |
| "epoch": 15.607800839552239, |
| "grad_norm": 0.3488393723964691, |
| "learning_rate": 0.0004129518810148731, |
| "loss": 3.3003, |
| "step": 53550 |
| }, |
| { |
| "epoch": 15.622376399253731, |
| "grad_norm": 0.3801731765270233, |
| "learning_rate": 0.0004127769028871391, |
| "loss": 3.3079, |
| "step": 53600 |
| }, |
| { |
| "epoch": 15.636951958955224, |
| "grad_norm": 0.36680352687835693, |
| "learning_rate": 0.00041260192475940503, |
| "loss": 3.3055, |
| "step": 53650 |
| }, |
| { |
| "epoch": 15.651527518656717, |
| "grad_norm": 0.3656584918498993, |
| "learning_rate": 0.00041242694663167097, |
| "loss": 3.3073, |
| "step": 53700 |
| }, |
| { |
| "epoch": 15.666103078358208, |
| "grad_norm": 0.34342220425605774, |
| "learning_rate": 0.00041225196850393697, |
| "loss": 3.3053, |
| "step": 53750 |
| }, |
| { |
| "epoch": 15.680678638059701, |
| "grad_norm": 0.352065771818161, |
| "learning_rate": 0.00041207699037620297, |
| "loss": 3.3134, |
| "step": 53800 |
| }, |
| { |
| "epoch": 15.695254197761194, |
| "grad_norm": 0.3709762692451477, |
| "learning_rate": 0.00041190201224846886, |
| "loss": 3.3165, |
| "step": 53850 |
| }, |
| { |
| "epoch": 15.709829757462687, |
| "grad_norm": 0.3582940995693207, |
| "learning_rate": 0.00041172703412073485, |
| "loss": 3.3011, |
| "step": 53900 |
| }, |
| { |
| "epoch": 15.724405317164178, |
| "grad_norm": 0.3479735255241394, |
| "learning_rate": 0.00041155205599300085, |
| "loss": 3.3088, |
| "step": 53950 |
| }, |
| { |
| "epoch": 15.738980876865671, |
| "grad_norm": 0.3477837145328522, |
| "learning_rate": 0.00041137707786526685, |
| "loss": 3.3022, |
| "step": 54000 |
| }, |
| { |
| "epoch": 15.738980876865671, |
| "eval_accuracy": 0.37073397111740425, |
| "eval_loss": 3.554640054702759, |
| "eval_runtime": 179.3272, |
| "eval_samples_per_second": 92.719, |
| "eval_steps_per_second": 5.799, |
| "step": 54000 |
| }, |
| { |
| "epoch": 15.753556436567164, |
| "grad_norm": 0.34925585985183716, |
| "learning_rate": 0.00041120209973753274, |
| "loss": 3.3033, |
| "step": 54050 |
| }, |
| { |
| "epoch": 15.768131996268657, |
| "grad_norm": 0.373879998922348, |
| "learning_rate": 0.00041102712160979874, |
| "loss": 3.3086, |
| "step": 54100 |
| }, |
| { |
| "epoch": 15.782707555970148, |
| "grad_norm": 0.405393123626709, |
| "learning_rate": 0.00041085214348206473, |
| "loss": 3.3134, |
| "step": 54150 |
| }, |
| { |
| "epoch": 15.797283115671641, |
| "grad_norm": 0.3593122661113739, |
| "learning_rate": 0.0004106771653543306, |
| "loss": 3.3102, |
| "step": 54200 |
| }, |
| { |
| "epoch": 15.811858675373134, |
| "grad_norm": 0.38658249378204346, |
| "learning_rate": 0.0004105021872265966, |
| "loss": 3.3161, |
| "step": 54250 |
| }, |
| { |
| "epoch": 15.826434235074627, |
| "grad_norm": 0.3754787743091583, |
| "learning_rate": 0.0004103272090988626, |
| "loss": 3.3118, |
| "step": 54300 |
| }, |
| { |
| "epoch": 15.84100979477612, |
| "grad_norm": 0.3744620382785797, |
| "learning_rate": 0.0004101522309711286, |
| "loss": 3.3125, |
| "step": 54350 |
| }, |
| { |
| "epoch": 15.855585354477611, |
| "grad_norm": 0.3404674530029297, |
| "learning_rate": 0.0004099772528433945, |
| "loss": 3.3051, |
| "step": 54400 |
| }, |
| { |
| "epoch": 15.870160914179104, |
| "grad_norm": 0.3725360631942749, |
| "learning_rate": 0.0004098022747156605, |
| "loss": 3.3122, |
| "step": 54450 |
| }, |
| { |
| "epoch": 15.884736473880597, |
| "grad_norm": 0.3606867790222168, |
| "learning_rate": 0.0004096272965879265, |
| "loss": 3.3167, |
| "step": 54500 |
| }, |
| { |
| "epoch": 15.89931203358209, |
| "grad_norm": 0.37651240825653076, |
| "learning_rate": 0.00040945231846019244, |
| "loss": 3.3221, |
| "step": 54550 |
| }, |
| { |
| "epoch": 15.913887593283581, |
| "grad_norm": 0.3563765585422516, |
| "learning_rate": 0.0004092773403324584, |
| "loss": 3.315, |
| "step": 54600 |
| }, |
| { |
| "epoch": 15.928463152985074, |
| "grad_norm": 0.3644365668296814, |
| "learning_rate": 0.0004091023622047244, |
| "loss": 3.3119, |
| "step": 54650 |
| }, |
| { |
| "epoch": 15.943038712686567, |
| "grad_norm": 0.3580692410469055, |
| "learning_rate": 0.00040892738407699033, |
| "loss": 3.3243, |
| "step": 54700 |
| }, |
| { |
| "epoch": 15.95761427238806, |
| "grad_norm": 0.3598160147666931, |
| "learning_rate": 0.0004087524059492563, |
| "loss": 3.3212, |
| "step": 54750 |
| }, |
| { |
| "epoch": 15.972189832089553, |
| "grad_norm": 0.35726791620254517, |
| "learning_rate": 0.00040857742782152227, |
| "loss": 3.3214, |
| "step": 54800 |
| }, |
| { |
| "epoch": 15.986765391791044, |
| "grad_norm": 0.367314875125885, |
| "learning_rate": 0.0004084024496937882, |
| "loss": 3.3345, |
| "step": 54850 |
| }, |
| { |
| "epoch": 16.00116604477612, |
| "grad_norm": 0.42804643511772156, |
| "learning_rate": 0.0004082274715660542, |
| "loss": 3.3104, |
| "step": 54900 |
| }, |
| { |
| "epoch": 16.01574160447761, |
| "grad_norm": 0.3623397648334503, |
| "learning_rate": 0.0004080524934383202, |
| "loss": 3.214, |
| "step": 54950 |
| }, |
| { |
| "epoch": 16.030317164179106, |
| "grad_norm": 0.37068092823028564, |
| "learning_rate": 0.0004078775153105861, |
| "loss": 3.2172, |
| "step": 55000 |
| }, |
| { |
| "epoch": 16.030317164179106, |
| "eval_accuracy": 0.37059956120223664, |
| "eval_loss": 3.559025526046753, |
| "eval_runtime": 179.3964, |
| "eval_samples_per_second": 92.683, |
| "eval_steps_per_second": 5.797, |
| "step": 55000 |
| }, |
| { |
| "epoch": 16.044892723880597, |
| "grad_norm": 0.35877496004104614, |
| "learning_rate": 0.0004077025371828521, |
| "loss": 3.2115, |
| "step": 55050 |
| }, |
| { |
| "epoch": 16.05946828358209, |
| "grad_norm": 0.38903719186782837, |
| "learning_rate": 0.0004075275590551181, |
| "loss": 3.2158, |
| "step": 55100 |
| }, |
| { |
| "epoch": 16.074043843283583, |
| "grad_norm": 0.3581124544143677, |
| "learning_rate": 0.000407352580927384, |
| "loss": 3.2269, |
| "step": 55150 |
| }, |
| { |
| "epoch": 16.088619402985074, |
| "grad_norm": 0.3499051630496979, |
| "learning_rate": 0.00040717760279965, |
| "loss": 3.225, |
| "step": 55200 |
| }, |
| { |
| "epoch": 16.10319496268657, |
| "grad_norm": 0.3736487030982971, |
| "learning_rate": 0.000407002624671916, |
| "loss": 3.2432, |
| "step": 55250 |
| }, |
| { |
| "epoch": 16.11777052238806, |
| "grad_norm": 0.38322770595550537, |
| "learning_rate": 0.000406827646544182, |
| "loss": 3.2391, |
| "step": 55300 |
| }, |
| { |
| "epoch": 16.13234608208955, |
| "grad_norm": 0.3570607304573059, |
| "learning_rate": 0.00040665266841644786, |
| "loss": 3.2254, |
| "step": 55350 |
| }, |
| { |
| "epoch": 16.146921641791046, |
| "grad_norm": 0.3845062553882599, |
| "learning_rate": 0.00040647769028871386, |
| "loss": 3.2414, |
| "step": 55400 |
| }, |
| { |
| "epoch": 16.161497201492537, |
| "grad_norm": 0.37037938833236694, |
| "learning_rate": 0.00040630271216097986, |
| "loss": 3.2536, |
| "step": 55450 |
| }, |
| { |
| "epoch": 16.17607276119403, |
| "grad_norm": 0.3952064514160156, |
| "learning_rate": 0.00040612773403324586, |
| "loss": 3.2415, |
| "step": 55500 |
| }, |
| { |
| "epoch": 16.190648320895523, |
| "grad_norm": 0.3778436779975891, |
| "learning_rate": 0.00040595275590551175, |
| "loss": 3.2437, |
| "step": 55550 |
| }, |
| { |
| "epoch": 16.205223880597014, |
| "grad_norm": 0.3779846131801605, |
| "learning_rate": 0.00040577777777777774, |
| "loss": 3.2486, |
| "step": 55600 |
| }, |
| { |
| "epoch": 16.21979944029851, |
| "grad_norm": 0.35539406538009644, |
| "learning_rate": 0.00040560279965004374, |
| "loss": 3.2471, |
| "step": 55650 |
| }, |
| { |
| "epoch": 16.234375, |
| "grad_norm": 0.3961225152015686, |
| "learning_rate": 0.0004054278215223097, |
| "loss": 3.2613, |
| "step": 55700 |
| }, |
| { |
| "epoch": 16.24895055970149, |
| "grad_norm": 0.3713584244251251, |
| "learning_rate": 0.00040525284339457563, |
| "loss": 3.264, |
| "step": 55750 |
| }, |
| { |
| "epoch": 16.263526119402986, |
| "grad_norm": 0.39257341623306274, |
| "learning_rate": 0.0004050778652668416, |
| "loss": 3.2574, |
| "step": 55800 |
| }, |
| { |
| "epoch": 16.278101679104477, |
| "grad_norm": 0.3771331310272217, |
| "learning_rate": 0.00040490288713910757, |
| "loss": 3.2638, |
| "step": 55850 |
| }, |
| { |
| "epoch": 16.29267723880597, |
| "grad_norm": 0.3533707559108734, |
| "learning_rate": 0.00040472790901137357, |
| "loss": 3.2598, |
| "step": 55900 |
| }, |
| { |
| "epoch": 16.307252798507463, |
| "grad_norm": 0.3563488721847534, |
| "learning_rate": 0.0004045529308836395, |
| "loss": 3.2619, |
| "step": 55950 |
| }, |
| { |
| "epoch": 16.321828358208954, |
| "grad_norm": 0.37279897928237915, |
| "learning_rate": 0.00040437795275590546, |
| "loss": 3.2679, |
| "step": 56000 |
| }, |
| { |
| "epoch": 16.321828358208954, |
| "eval_accuracy": 0.3706797128241536, |
| "eval_loss": 3.5606961250305176, |
| "eval_runtime": 179.3046, |
| "eval_samples_per_second": 92.73, |
| "eval_steps_per_second": 5.8, |
| "step": 56000 |
| }, |
| { |
| "epoch": 16.33640391791045, |
| "grad_norm": 0.35383620858192444, |
| "learning_rate": 0.00040420297462817145, |
| "loss": 3.2789, |
| "step": 56050 |
| }, |
| { |
| "epoch": 16.35097947761194, |
| "grad_norm": 0.36741262674331665, |
| "learning_rate": 0.0004040279965004374, |
| "loss": 3.2644, |
| "step": 56100 |
| }, |
| { |
| "epoch": 16.365555037313435, |
| "grad_norm": 0.34609347581863403, |
| "learning_rate": 0.00040385301837270334, |
| "loss": 3.2825, |
| "step": 56150 |
| }, |
| { |
| "epoch": 16.380130597014926, |
| "grad_norm": 0.3590814769268036, |
| "learning_rate": 0.00040367804024496934, |
| "loss": 3.257, |
| "step": 56200 |
| }, |
| { |
| "epoch": 16.394706156716417, |
| "grad_norm": 0.3929997682571411, |
| "learning_rate": 0.00040350306211723534, |
| "loss": 3.2757, |
| "step": 56250 |
| }, |
| { |
| "epoch": 16.40928171641791, |
| "grad_norm": 0.3860103487968445, |
| "learning_rate": 0.0004033280839895012, |
| "loss": 3.2667, |
| "step": 56300 |
| }, |
| { |
| "epoch": 16.423857276119403, |
| "grad_norm": 0.3654019832611084, |
| "learning_rate": 0.0004031531058617672, |
| "loss": 3.2709, |
| "step": 56350 |
| }, |
| { |
| "epoch": 16.438432835820894, |
| "grad_norm": 0.3494860529899597, |
| "learning_rate": 0.0004029781277340332, |
| "loss": 3.2721, |
| "step": 56400 |
| }, |
| { |
| "epoch": 16.45300839552239, |
| "grad_norm": 0.38559070229530334, |
| "learning_rate": 0.0004028031496062992, |
| "loss": 3.2807, |
| "step": 56450 |
| }, |
| { |
| "epoch": 16.46758395522388, |
| "grad_norm": 0.37908729910850525, |
| "learning_rate": 0.0004026281714785651, |
| "loss": 3.2763, |
| "step": 56500 |
| }, |
| { |
| "epoch": 16.482159514925375, |
| "grad_norm": 0.38921189308166504, |
| "learning_rate": 0.0004024531933508311, |
| "loss": 3.2863, |
| "step": 56550 |
| }, |
| { |
| "epoch": 16.496735074626866, |
| "grad_norm": 0.3636137843132019, |
| "learning_rate": 0.0004022782152230971, |
| "loss": 3.2822, |
| "step": 56600 |
| }, |
| { |
| "epoch": 16.511310634328357, |
| "grad_norm": 0.3770190179347992, |
| "learning_rate": 0.0004021032370953631, |
| "loss": 3.2813, |
| "step": 56650 |
| }, |
| { |
| "epoch": 16.52588619402985, |
| "grad_norm": 0.3771943151950836, |
| "learning_rate": 0.000401928258967629, |
| "loss": 3.2899, |
| "step": 56700 |
| }, |
| { |
| "epoch": 16.540461753731343, |
| "grad_norm": 0.359430193901062, |
| "learning_rate": 0.000401753280839895, |
| "loss": 3.2897, |
| "step": 56750 |
| }, |
| { |
| "epoch": 16.555037313432837, |
| "grad_norm": 0.3965089023113251, |
| "learning_rate": 0.000401578302712161, |
| "loss": 3.2841, |
| "step": 56800 |
| }, |
| { |
| "epoch": 16.56961287313433, |
| "grad_norm": 0.35867807269096375, |
| "learning_rate": 0.00040140332458442693, |
| "loss": 3.2832, |
| "step": 56850 |
| }, |
| { |
| "epoch": 16.58418843283582, |
| "grad_norm": 0.3748142719268799, |
| "learning_rate": 0.00040122834645669287, |
| "loss": 3.2781, |
| "step": 56900 |
| }, |
| { |
| "epoch": 16.598763992537314, |
| "grad_norm": 0.35872289538383484, |
| "learning_rate": 0.00040105336832895887, |
| "loss": 3.2906, |
| "step": 56950 |
| }, |
| { |
| "epoch": 16.613339552238806, |
| "grad_norm": 0.36718112230300903, |
| "learning_rate": 0.0004008783902012248, |
| "loss": 3.2927, |
| "step": 57000 |
| }, |
| { |
| "epoch": 16.613339552238806, |
| "eval_accuracy": 0.37125619247782327, |
| "eval_loss": 3.5517942905426025, |
| "eval_runtime": 179.3724, |
| "eval_samples_per_second": 92.695, |
| "eval_steps_per_second": 5.798, |
| "step": 57000 |
| }, |
| { |
| "epoch": 16.627915111940297, |
| "grad_norm": 0.36552923917770386, |
| "learning_rate": 0.00040070341207349076, |
| "loss": 3.2982, |
| "step": 57050 |
| }, |
| { |
| "epoch": 16.64249067164179, |
| "grad_norm": 0.39370307326316833, |
| "learning_rate": 0.00040052843394575675, |
| "loss": 3.2764, |
| "step": 57100 |
| }, |
| { |
| "epoch": 16.657066231343283, |
| "grad_norm": 0.3543391227722168, |
| "learning_rate": 0.0004003534558180227, |
| "loss": 3.287, |
| "step": 57150 |
| }, |
| { |
| "epoch": 16.671641791044777, |
| "grad_norm": 0.35714074969291687, |
| "learning_rate": 0.0004001784776902887, |
| "loss": 3.2802, |
| "step": 57200 |
| }, |
| { |
| "epoch": 16.68621735074627, |
| "grad_norm": 0.3787291646003723, |
| "learning_rate": 0.00040000349956255464, |
| "loss": 3.2908, |
| "step": 57250 |
| }, |
| { |
| "epoch": 16.70079291044776, |
| "grad_norm": 0.36080485582351685, |
| "learning_rate": 0.0003998285214348206, |
| "loss": 3.304, |
| "step": 57300 |
| }, |
| { |
| "epoch": 16.715368470149254, |
| "grad_norm": 0.35946592688560486, |
| "learning_rate": 0.0003996535433070866, |
| "loss": 3.3054, |
| "step": 57350 |
| }, |
| { |
| "epoch": 16.729944029850746, |
| "grad_norm": 0.41039395332336426, |
| "learning_rate": 0.0003994785651793526, |
| "loss": 3.3145, |
| "step": 57400 |
| }, |
| { |
| "epoch": 16.74451958955224, |
| "grad_norm": 0.36457979679107666, |
| "learning_rate": 0.00039930358705161847, |
| "loss": 3.3108, |
| "step": 57450 |
| }, |
| { |
| "epoch": 16.75909514925373, |
| "grad_norm": 0.35925135016441345, |
| "learning_rate": 0.00039912860892388446, |
| "loss": 3.3023, |
| "step": 57500 |
| }, |
| { |
| "epoch": 16.773670708955223, |
| "grad_norm": 0.3933243453502655, |
| "learning_rate": 0.00039895363079615046, |
| "loss": 3.305, |
| "step": 57550 |
| }, |
| { |
| "epoch": 16.788246268656717, |
| "grad_norm": 0.3717454969882965, |
| "learning_rate": 0.00039877865266841646, |
| "loss": 3.3088, |
| "step": 57600 |
| }, |
| { |
| "epoch": 16.80282182835821, |
| "grad_norm": 0.3560434579849243, |
| "learning_rate": 0.00039860367454068235, |
| "loss": 3.3019, |
| "step": 57650 |
| }, |
| { |
| "epoch": 16.817397388059703, |
| "grad_norm": 0.3487250804901123, |
| "learning_rate": 0.00039842869641294835, |
| "loss": 3.2972, |
| "step": 57700 |
| }, |
| { |
| "epoch": 16.831972947761194, |
| "grad_norm": 0.366042822599411, |
| "learning_rate": 0.00039825371828521434, |
| "loss": 3.3067, |
| "step": 57750 |
| }, |
| { |
| "epoch": 16.846548507462686, |
| "grad_norm": 0.3558788597583771, |
| "learning_rate": 0.00039807874015748023, |
| "loss": 3.3003, |
| "step": 57800 |
| }, |
| { |
| "epoch": 16.86112406716418, |
| "grad_norm": 0.35664811730384827, |
| "learning_rate": 0.00039790376202974623, |
| "loss": 3.3094, |
| "step": 57850 |
| }, |
| { |
| "epoch": 16.87569962686567, |
| "grad_norm": 0.3727872967720032, |
| "learning_rate": 0.00039772878390201223, |
| "loss": 3.3029, |
| "step": 57900 |
| }, |
| { |
| "epoch": 16.890275186567163, |
| "grad_norm": 0.3959354758262634, |
| "learning_rate": 0.0003975538057742782, |
| "loss": 3.2972, |
| "step": 57950 |
| }, |
| { |
| "epoch": 16.904850746268657, |
| "grad_norm": 0.3455987572669983, |
| "learning_rate": 0.0003973788276465441, |
| "loss": 3.3154, |
| "step": 58000 |
| }, |
| { |
| "epoch": 16.904850746268657, |
| "eval_accuracy": 0.371655538224026, |
| "eval_loss": 3.5446152687072754, |
| "eval_runtime": 179.4456, |
| "eval_samples_per_second": 92.658, |
| "eval_steps_per_second": 5.796, |
| "step": 58000 |
| }, |
| { |
| "epoch": 16.91942630597015, |
| "grad_norm": 0.36653903126716614, |
| "learning_rate": 0.0003972038495188101, |
| "loss": 3.312, |
| "step": 58050 |
| }, |
| { |
| "epoch": 16.934001865671643, |
| "grad_norm": 0.3434462249279022, |
| "learning_rate": 0.0003970288713910761, |
| "loss": 3.3182, |
| "step": 58100 |
| }, |
| { |
| "epoch": 16.948577425373134, |
| "grad_norm": 0.3869427740573883, |
| "learning_rate": 0.00039685389326334205, |
| "loss": 3.3116, |
| "step": 58150 |
| }, |
| { |
| "epoch": 16.963152985074625, |
| "grad_norm": 0.3616868853569031, |
| "learning_rate": 0.000396678915135608, |
| "loss": 3.3154, |
| "step": 58200 |
| }, |
| { |
| "epoch": 16.97772854477612, |
| "grad_norm": 0.36797189712524414, |
| "learning_rate": 0.000396503937007874, |
| "loss": 3.2952, |
| "step": 58250 |
| }, |
| { |
| "epoch": 16.99230410447761, |
| "grad_norm": 0.3591371774673462, |
| "learning_rate": 0.00039632895888013994, |
| "loss": 3.3028, |
| "step": 58300 |
| }, |
| { |
| "epoch": 17.006704757462686, |
| "grad_norm": 0.38673022389411926, |
| "learning_rate": 0.00039615398075240594, |
| "loss": 3.2497, |
| "step": 58350 |
| }, |
| { |
| "epoch": 17.02128031716418, |
| "grad_norm": 0.3777545392513275, |
| "learning_rate": 0.0003959790026246719, |
| "loss": 3.2017, |
| "step": 58400 |
| }, |
| { |
| "epoch": 17.03585587686567, |
| "grad_norm": 0.41391587257385254, |
| "learning_rate": 0.0003958040244969378, |
| "loss": 3.2017, |
| "step": 58450 |
| }, |
| { |
| "epoch": 17.050431436567163, |
| "grad_norm": 0.38846704363822937, |
| "learning_rate": 0.0003956290463692038, |
| "loss": 3.211, |
| "step": 58500 |
| }, |
| { |
| "epoch": 17.065006996268657, |
| "grad_norm": 0.3742265999317169, |
| "learning_rate": 0.0003954540682414698, |
| "loss": 3.2093, |
| "step": 58550 |
| }, |
| { |
| "epoch": 17.07958255597015, |
| "grad_norm": 0.3785288333892822, |
| "learning_rate": 0.0003952790901137357, |
| "loss": 3.2154, |
| "step": 58600 |
| }, |
| { |
| "epoch": 17.094158115671643, |
| "grad_norm": 0.38964998722076416, |
| "learning_rate": 0.0003951041119860017, |
| "loss": 3.2223, |
| "step": 58650 |
| }, |
| { |
| "epoch": 17.108733675373134, |
| "grad_norm": 0.370022714138031, |
| "learning_rate": 0.0003949291338582677, |
| "loss": 3.2128, |
| "step": 58700 |
| }, |
| { |
| "epoch": 17.123309235074625, |
| "grad_norm": 0.3854983448982239, |
| "learning_rate": 0.0003947541557305336, |
| "loss": 3.2228, |
| "step": 58750 |
| }, |
| { |
| "epoch": 17.13788479477612, |
| "grad_norm": 0.3856055438518524, |
| "learning_rate": 0.0003945791776027996, |
| "loss": 3.2329, |
| "step": 58800 |
| }, |
| { |
| "epoch": 17.15246035447761, |
| "grad_norm": 0.38632187247276306, |
| "learning_rate": 0.0003944041994750656, |
| "loss": 3.2304, |
| "step": 58850 |
| }, |
| { |
| "epoch": 17.167035914179106, |
| "grad_norm": 0.3841564655303955, |
| "learning_rate": 0.0003942292213473316, |
| "loss": 3.2327, |
| "step": 58900 |
| }, |
| { |
| "epoch": 17.181611473880597, |
| "grad_norm": 0.3514139950275421, |
| "learning_rate": 0.0003940542432195975, |
| "loss": 3.221, |
| "step": 58950 |
| }, |
| { |
| "epoch": 17.19618703358209, |
| "grad_norm": 0.3656717538833618, |
| "learning_rate": 0.0003938792650918635, |
| "loss": 3.2442, |
| "step": 59000 |
| }, |
| { |
| "epoch": 17.19618703358209, |
| "eval_accuracy": 0.37105704924099003, |
| "eval_loss": 3.560926914215088, |
| "eval_runtime": 179.492, |
| "eval_samples_per_second": 92.634, |
| "eval_steps_per_second": 5.794, |
| "step": 59000 |
| }, |
| { |
| "epoch": 17.210762593283583, |
| "grad_norm": 0.3702145516872406, |
| "learning_rate": 0.00039370428696412947, |
| "loss": 3.2338, |
| "step": 59050 |
| }, |
| { |
| "epoch": 17.225338152985074, |
| "grad_norm": 0.38071298599243164, |
| "learning_rate": 0.0003935293088363954, |
| "loss": 3.2492, |
| "step": 59100 |
| }, |
| { |
| "epoch": 17.23991371268657, |
| "grad_norm": 0.3649649918079376, |
| "learning_rate": 0.00039335433070866136, |
| "loss": 3.2408, |
| "step": 59150 |
| }, |
| { |
| "epoch": 17.25448927238806, |
| "grad_norm": 0.3958336412906647, |
| "learning_rate": 0.00039317935258092736, |
| "loss": 3.2552, |
| "step": 59200 |
| }, |
| { |
| "epoch": 17.26906483208955, |
| "grad_norm": 0.3653128147125244, |
| "learning_rate": 0.00039300437445319335, |
| "loss": 3.2593, |
| "step": 59250 |
| }, |
| { |
| "epoch": 17.283640391791046, |
| "grad_norm": 0.3596879243850708, |
| "learning_rate": 0.0003928293963254593, |
| "loss": 3.2644, |
| "step": 59300 |
| }, |
| { |
| "epoch": 17.298215951492537, |
| "grad_norm": 0.35950595140457153, |
| "learning_rate": 0.00039265441819772524, |
| "loss": 3.2618, |
| "step": 59350 |
| }, |
| { |
| "epoch": 17.31279151119403, |
| "grad_norm": 0.3889482319355011, |
| "learning_rate": 0.00039247944006999124, |
| "loss": 3.2517, |
| "step": 59400 |
| }, |
| { |
| "epoch": 17.327367070895523, |
| "grad_norm": 0.35707592964172363, |
| "learning_rate": 0.0003923044619422572, |
| "loss": 3.2599, |
| "step": 59450 |
| }, |
| { |
| "epoch": 17.341942630597014, |
| "grad_norm": 0.35822805762290955, |
| "learning_rate": 0.0003921294838145232, |
| "loss": 3.2736, |
| "step": 59500 |
| }, |
| { |
| "epoch": 17.35651819029851, |
| "grad_norm": 0.355954110622406, |
| "learning_rate": 0.0003919545056867891, |
| "loss": 3.2614, |
| "step": 59550 |
| }, |
| { |
| "epoch": 17.37109375, |
| "grad_norm": 0.38183078169822693, |
| "learning_rate": 0.00039177952755905507, |
| "loss": 3.256, |
| "step": 59600 |
| }, |
| { |
| "epoch": 17.38566930970149, |
| "grad_norm": 0.35589709877967834, |
| "learning_rate": 0.00039160454943132106, |
| "loss": 3.26, |
| "step": 59650 |
| }, |
| { |
| "epoch": 17.400244869402986, |
| "grad_norm": 0.38540026545524597, |
| "learning_rate": 0.000391429571303587, |
| "loss": 3.2505, |
| "step": 59700 |
| }, |
| { |
| "epoch": 17.414820429104477, |
| "grad_norm": 0.3964499533176422, |
| "learning_rate": 0.00039125459317585295, |
| "loss": 3.2584, |
| "step": 59750 |
| }, |
| { |
| "epoch": 17.42939598880597, |
| "grad_norm": 0.3757520616054535, |
| "learning_rate": 0.00039107961504811895, |
| "loss": 3.2646, |
| "step": 59800 |
| }, |
| { |
| "epoch": 17.443971548507463, |
| "grad_norm": 0.3785048723220825, |
| "learning_rate": 0.00039090463692038495, |
| "loss": 3.2672, |
| "step": 59850 |
| }, |
| { |
| "epoch": 17.458547108208954, |
| "grad_norm": 0.3456498980522156, |
| "learning_rate": 0.00039072965879265084, |
| "loss": 3.2603, |
| "step": 59900 |
| }, |
| { |
| "epoch": 17.47312266791045, |
| "grad_norm": 0.38184770941734314, |
| "learning_rate": 0.00039055468066491683, |
| "loss": 3.2643, |
| "step": 59950 |
| }, |
| { |
| "epoch": 17.48769822761194, |
| "grad_norm": 0.3528321385383606, |
| "learning_rate": 0.00039037970253718283, |
| "loss": 3.2761, |
| "step": 60000 |
| }, |
| { |
| "epoch": 17.48769822761194, |
| "eval_accuracy": 0.37141108166202685, |
| "eval_loss": 3.552334785461426, |
| "eval_runtime": 179.4922, |
| "eval_samples_per_second": 92.634, |
| "eval_steps_per_second": 5.794, |
| "step": 60000 |
| }, |
| { |
| "epoch": 17.502273787313435, |
| "grad_norm": 0.3572315573692322, |
| "learning_rate": 0.00039020472440944883, |
| "loss": 3.2563, |
| "step": 60050 |
| }, |
| { |
| "epoch": 17.516849347014926, |
| "grad_norm": 0.3947022557258606, |
| "learning_rate": 0.0003900297462817147, |
| "loss": 3.2788, |
| "step": 60100 |
| }, |
| { |
| "epoch": 17.531424906716417, |
| "grad_norm": 0.3718763291835785, |
| "learning_rate": 0.0003898547681539807, |
| "loss": 3.2717, |
| "step": 60150 |
| }, |
| { |
| "epoch": 17.54600046641791, |
| "grad_norm": 0.33914080262184143, |
| "learning_rate": 0.0003896797900262467, |
| "loss": 3.2747, |
| "step": 60200 |
| }, |
| { |
| "epoch": 17.560576026119403, |
| "grad_norm": 0.38070231676101685, |
| "learning_rate": 0.00038950481189851266, |
| "loss": 3.2827, |
| "step": 60250 |
| }, |
| { |
| "epoch": 17.575151585820894, |
| "grad_norm": 0.3630342483520508, |
| "learning_rate": 0.0003893298337707786, |
| "loss": 3.2707, |
| "step": 60300 |
| }, |
| { |
| "epoch": 17.58972714552239, |
| "grad_norm": 0.38142305612564087, |
| "learning_rate": 0.0003891548556430446, |
| "loss": 3.2791, |
| "step": 60350 |
| }, |
| { |
| "epoch": 17.60430270522388, |
| "grad_norm": 0.36522653698921204, |
| "learning_rate": 0.00038897987751531054, |
| "loss": 3.2805, |
| "step": 60400 |
| }, |
| { |
| "epoch": 17.618878264925375, |
| "grad_norm": 0.38595816493034363, |
| "learning_rate": 0.00038880489938757654, |
| "loss": 3.2822, |
| "step": 60450 |
| }, |
| { |
| "epoch": 17.633453824626866, |
| "grad_norm": 0.3888775706291199, |
| "learning_rate": 0.0003886299212598425, |
| "loss": 3.284, |
| "step": 60500 |
| }, |
| { |
| "epoch": 17.648029384328357, |
| "grad_norm": 0.38401997089385986, |
| "learning_rate": 0.0003884549431321085, |
| "loss": 3.274, |
| "step": 60550 |
| }, |
| { |
| "epoch": 17.66260494402985, |
| "grad_norm": 0.36174026131629944, |
| "learning_rate": 0.0003882799650043744, |
| "loss": 3.2895, |
| "step": 60600 |
| }, |
| { |
| "epoch": 17.677180503731343, |
| "grad_norm": 0.36874690651893616, |
| "learning_rate": 0.00038810498687664037, |
| "loss": 3.2842, |
| "step": 60650 |
| }, |
| { |
| "epoch": 17.691756063432837, |
| "grad_norm": 0.38380053639411926, |
| "learning_rate": 0.00038793000874890637, |
| "loss": 3.2882, |
| "step": 60700 |
| }, |
| { |
| "epoch": 17.70633162313433, |
| "grad_norm": 0.36726170778274536, |
| "learning_rate": 0.0003877550306211723, |
| "loss": 3.274, |
| "step": 60750 |
| }, |
| { |
| "epoch": 17.72090718283582, |
| "grad_norm": 0.35661062598228455, |
| "learning_rate": 0.0003875800524934383, |
| "loss": 3.2791, |
| "step": 60800 |
| }, |
| { |
| "epoch": 17.735482742537314, |
| "grad_norm": 0.3487260639667511, |
| "learning_rate": 0.00038740507436570425, |
| "loss": 3.2784, |
| "step": 60850 |
| }, |
| { |
| "epoch": 17.750058302238806, |
| "grad_norm": 0.3786361813545227, |
| "learning_rate": 0.0003872300962379702, |
| "loss": 3.2846, |
| "step": 60900 |
| }, |
| { |
| "epoch": 17.764633861940297, |
| "grad_norm": 0.37210702896118164, |
| "learning_rate": 0.0003870551181102362, |
| "loss": 3.291, |
| "step": 60950 |
| }, |
| { |
| "epoch": 17.77920942164179, |
| "grad_norm": 0.3575860559940338, |
| "learning_rate": 0.0003868801399825022, |
| "loss": 3.291, |
| "step": 61000 |
| }, |
| { |
| "epoch": 17.77920942164179, |
| "eval_accuracy": 0.37191305914730677, |
| "eval_loss": 3.5460619926452637, |
| "eval_runtime": 179.5326, |
| "eval_samples_per_second": 92.613, |
| "eval_steps_per_second": 5.793, |
| "step": 61000 |
| }, |
| { |
| "epoch": 17.793784981343283, |
| "grad_norm": 0.3741329610347748, |
| "learning_rate": 0.0003867051618547681, |
| "loss": 3.2749, |
| "step": 61050 |
| }, |
| { |
| "epoch": 17.808360541044777, |
| "grad_norm": 0.3822123408317566, |
| "learning_rate": 0.0003865301837270341, |
| "loss": 3.2878, |
| "step": 61100 |
| }, |
| { |
| "epoch": 17.82293610074627, |
| "grad_norm": 0.3695724904537201, |
| "learning_rate": 0.0003863552055993001, |
| "loss": 3.2917, |
| "step": 61150 |
| }, |
| { |
| "epoch": 17.83751166044776, |
| "grad_norm": 0.37312400341033936, |
| "learning_rate": 0.00038618022747156607, |
| "loss": 3.289, |
| "step": 61200 |
| }, |
| { |
| "epoch": 17.852087220149254, |
| "grad_norm": 0.38065680861473083, |
| "learning_rate": 0.00038600524934383196, |
| "loss": 3.2949, |
| "step": 61250 |
| }, |
| { |
| "epoch": 17.866662779850746, |
| "grad_norm": 0.3558042347431183, |
| "learning_rate": 0.00038583027121609796, |
| "loss": 3.2938, |
| "step": 61300 |
| }, |
| { |
| "epoch": 17.88123833955224, |
| "grad_norm": 0.40853044390678406, |
| "learning_rate": 0.00038565529308836396, |
| "loss": 3.2928, |
| "step": 61350 |
| }, |
| { |
| "epoch": 17.89581389925373, |
| "grad_norm": 0.36038708686828613, |
| "learning_rate": 0.00038548031496062984, |
| "loss": 3.2871, |
| "step": 61400 |
| }, |
| { |
| "epoch": 17.910389458955223, |
| "grad_norm": 0.3773646056652069, |
| "learning_rate": 0.00038530533683289584, |
| "loss": 3.2862, |
| "step": 61450 |
| }, |
| { |
| "epoch": 17.924965018656717, |
| "grad_norm": 0.3764312267303467, |
| "learning_rate": 0.00038513035870516184, |
| "loss": 3.2862, |
| "step": 61500 |
| }, |
| { |
| "epoch": 17.93954057835821, |
| "grad_norm": 0.37428849935531616, |
| "learning_rate": 0.0003849553805774278, |
| "loss": 3.2922, |
| "step": 61550 |
| }, |
| { |
| "epoch": 17.954116138059703, |
| "grad_norm": 0.36383602023124695, |
| "learning_rate": 0.00038478040244969373, |
| "loss": 3.2902, |
| "step": 61600 |
| }, |
| { |
| "epoch": 17.968691697761194, |
| "grad_norm": 0.3712141215801239, |
| "learning_rate": 0.0003846054243219597, |
| "loss": 3.2926, |
| "step": 61650 |
| }, |
| { |
| "epoch": 17.983267257462686, |
| "grad_norm": 0.37419530749320984, |
| "learning_rate": 0.00038443044619422567, |
| "loss": 3.2895, |
| "step": 61700 |
| }, |
| { |
| "epoch": 17.99784281716418, |
| "grad_norm": 0.36220309138298035, |
| "learning_rate": 0.00038425546806649167, |
| "loss": 3.309, |
| "step": 61750 |
| }, |
| { |
| "epoch": 18.012243470149254, |
| "grad_norm": 0.38369911909103394, |
| "learning_rate": 0.0003840804899387576, |
| "loss": 3.2132, |
| "step": 61800 |
| }, |
| { |
| "epoch": 18.026819029850746, |
| "grad_norm": 0.38627973198890686, |
| "learning_rate": 0.0003839055118110236, |
| "loss": 3.1929, |
| "step": 61850 |
| }, |
| { |
| "epoch": 18.04139458955224, |
| "grad_norm": 0.38564252853393555, |
| "learning_rate": 0.00038373053368328955, |
| "loss": 3.1907, |
| "step": 61900 |
| }, |
| { |
| "epoch": 18.05597014925373, |
| "grad_norm": 0.37583598494529724, |
| "learning_rate": 0.00038355555555555555, |
| "loss": 3.2055, |
| "step": 61950 |
| }, |
| { |
| "epoch": 18.070545708955223, |
| "grad_norm": 0.38370102643966675, |
| "learning_rate": 0.0003833805774278215, |
| "loss": 3.1997, |
| "step": 62000 |
| }, |
| { |
| "epoch": 18.070545708955223, |
| "eval_accuracy": 0.3712240612108874, |
| "eval_loss": 3.563174247741699, |
| "eval_runtime": 179.5785, |
| "eval_samples_per_second": 92.589, |
| "eval_steps_per_second": 5.791, |
| "step": 62000 |
| }, |
| { |
| "epoch": 18.085121268656717, |
| "grad_norm": 0.3597595691680908, |
| "learning_rate": 0.00038320559930008744, |
| "loss": 3.2005, |
| "step": 62050 |
| }, |
| { |
| "epoch": 18.09969682835821, |
| "grad_norm": 0.40490153431892395, |
| "learning_rate": 0.00038303062117235343, |
| "loss": 3.2116, |
| "step": 62100 |
| }, |
| { |
| "epoch": 18.114272388059703, |
| "grad_norm": 0.3927886188030243, |
| "learning_rate": 0.00038285564304461943, |
| "loss": 3.2149, |
| "step": 62150 |
| }, |
| { |
| "epoch": 18.128847947761194, |
| "grad_norm": 0.40592485666275024, |
| "learning_rate": 0.0003826806649168853, |
| "loss": 3.224, |
| "step": 62200 |
| }, |
| { |
| "epoch": 18.143423507462686, |
| "grad_norm": 0.41048359870910645, |
| "learning_rate": 0.0003825056867891513, |
| "loss": 3.2146, |
| "step": 62250 |
| }, |
| { |
| "epoch": 18.15799906716418, |
| "grad_norm": 0.3761942386627197, |
| "learning_rate": 0.0003823307086614173, |
| "loss": 3.2287, |
| "step": 62300 |
| }, |
| { |
| "epoch": 18.17257462686567, |
| "grad_norm": 0.38858354091644287, |
| "learning_rate": 0.0003821557305336832, |
| "loss": 3.2138, |
| "step": 62350 |
| }, |
| { |
| "epoch": 18.187150186567163, |
| "grad_norm": 0.3984103202819824, |
| "learning_rate": 0.0003819807524059492, |
| "loss": 3.2247, |
| "step": 62400 |
| }, |
| { |
| "epoch": 18.201725746268657, |
| "grad_norm": 0.378798246383667, |
| "learning_rate": 0.0003818057742782152, |
| "loss": 3.2323, |
| "step": 62450 |
| }, |
| { |
| "epoch": 18.21630130597015, |
| "grad_norm": 0.4035341143608093, |
| "learning_rate": 0.0003816307961504812, |
| "loss": 3.2271, |
| "step": 62500 |
| }, |
| { |
| "epoch": 18.230876865671643, |
| "grad_norm": 0.38941821455955505, |
| "learning_rate": 0.0003814558180227471, |
| "loss": 3.2255, |
| "step": 62550 |
| }, |
| { |
| "epoch": 18.245452425373134, |
| "grad_norm": 0.40656763315200806, |
| "learning_rate": 0.0003812808398950131, |
| "loss": 3.2411, |
| "step": 62600 |
| }, |
| { |
| "epoch": 18.260027985074625, |
| "grad_norm": 0.3786965310573578, |
| "learning_rate": 0.0003811058617672791, |
| "loss": 3.2374, |
| "step": 62650 |
| }, |
| { |
| "epoch": 18.27460354477612, |
| "grad_norm": 0.3779425323009491, |
| "learning_rate": 0.000380930883639545, |
| "loss": 3.232, |
| "step": 62700 |
| }, |
| { |
| "epoch": 18.28917910447761, |
| "grad_norm": 0.3786343038082123, |
| "learning_rate": 0.00038075590551181097, |
| "loss": 3.2342, |
| "step": 62750 |
| }, |
| { |
| "epoch": 18.303754664179106, |
| "grad_norm": 0.3741774260997772, |
| "learning_rate": 0.00038058092738407697, |
| "loss": 3.2366, |
| "step": 62800 |
| }, |
| { |
| "epoch": 18.318330223880597, |
| "grad_norm": 0.4231685400009155, |
| "learning_rate": 0.0003804059492563429, |
| "loss": 3.2425, |
| "step": 62850 |
| }, |
| { |
| "epoch": 18.33290578358209, |
| "grad_norm": 0.3765699863433838, |
| "learning_rate": 0.0003802309711286089, |
| "loss": 3.2423, |
| "step": 62900 |
| }, |
| { |
| "epoch": 18.347481343283583, |
| "grad_norm": 0.37712177634239197, |
| "learning_rate": 0.00038005599300087485, |
| "loss": 3.2477, |
| "step": 62950 |
| }, |
| { |
| "epoch": 18.362056902985074, |
| "grad_norm": 0.37962380051612854, |
| "learning_rate": 0.0003798810148731408, |
| "loss": 3.2501, |
| "step": 63000 |
| }, |
| { |
| "epoch": 18.362056902985074, |
| "eval_accuracy": 0.3715983375070633, |
| "eval_loss": 3.5540771484375, |
| "eval_runtime": 179.3741, |
| "eval_samples_per_second": 92.695, |
| "eval_steps_per_second": 5.798, |
| "step": 63000 |
| }, |
| { |
| "epoch": 18.376632462686565, |
| "grad_norm": 0.3669949173927307, |
| "learning_rate": 0.0003797060367454068, |
| "loss": 3.2477, |
| "step": 63050 |
| }, |
| { |
| "epoch": 18.39120802238806, |
| "grad_norm": 0.39625945687294006, |
| "learning_rate": 0.0003795310586176728, |
| "loss": 3.2461, |
| "step": 63100 |
| }, |
| { |
| "epoch": 18.40578358208955, |
| "grad_norm": 0.37177714705467224, |
| "learning_rate": 0.00037935608048993873, |
| "loss": 3.2521, |
| "step": 63150 |
| }, |
| { |
| "epoch": 18.420359141791046, |
| "grad_norm": 0.39759817719459534, |
| "learning_rate": 0.0003791811023622047, |
| "loss": 3.2596, |
| "step": 63200 |
| }, |
| { |
| "epoch": 18.434934701492537, |
| "grad_norm": 0.36642131209373474, |
| "learning_rate": 0.0003790061242344707, |
| "loss": 3.2562, |
| "step": 63250 |
| }, |
| { |
| "epoch": 18.44951026119403, |
| "grad_norm": 0.37006133794784546, |
| "learning_rate": 0.0003788311461067366, |
| "loss": 3.2456, |
| "step": 63300 |
| }, |
| { |
| "epoch": 18.464085820895523, |
| "grad_norm": 0.4009372591972351, |
| "learning_rate": 0.00037865616797900256, |
| "loss": 3.2532, |
| "step": 63350 |
| }, |
| { |
| "epoch": 18.478661380597014, |
| "grad_norm": 0.3532605469226837, |
| "learning_rate": 0.00037848118985126856, |
| "loss": 3.2627, |
| "step": 63400 |
| }, |
| { |
| "epoch": 18.49323694029851, |
| "grad_norm": 0.42864686250686646, |
| "learning_rate": 0.00037830621172353456, |
| "loss": 3.256, |
| "step": 63450 |
| }, |
| { |
| "epoch": 18.5078125, |
| "grad_norm": 0.35426709055900574, |
| "learning_rate": 0.00037813123359580045, |
| "loss": 3.26, |
| "step": 63500 |
| }, |
| { |
| "epoch": 18.52238805970149, |
| "grad_norm": 0.3834630250930786, |
| "learning_rate": 0.00037795625546806644, |
| "loss": 3.2534, |
| "step": 63550 |
| }, |
| { |
| "epoch": 18.536963619402986, |
| "grad_norm": 0.39207953214645386, |
| "learning_rate": 0.00037778127734033244, |
| "loss": 3.2513, |
| "step": 63600 |
| }, |
| { |
| "epoch": 18.551539179104477, |
| "grad_norm": 0.36887794733047485, |
| "learning_rate": 0.00037760629921259844, |
| "loss": 3.2679, |
| "step": 63650 |
| }, |
| { |
| "epoch": 18.56611473880597, |
| "grad_norm": 0.41247034072875977, |
| "learning_rate": 0.00037743132108486433, |
| "loss": 3.2684, |
| "step": 63700 |
| }, |
| { |
| "epoch": 18.580690298507463, |
| "grad_norm": 0.4239455759525299, |
| "learning_rate": 0.0003772563429571303, |
| "loss": 3.2613, |
| "step": 63750 |
| }, |
| { |
| "epoch": 18.595265858208954, |
| "grad_norm": 0.3865257203578949, |
| "learning_rate": 0.0003770813648293963, |
| "loss": 3.2729, |
| "step": 63800 |
| }, |
| { |
| "epoch": 18.60984141791045, |
| "grad_norm": 0.4032337963581085, |
| "learning_rate": 0.00037690638670166227, |
| "loss": 3.2639, |
| "step": 63850 |
| }, |
| { |
| "epoch": 18.62441697761194, |
| "grad_norm": 0.3696430027484894, |
| "learning_rate": 0.0003767314085739282, |
| "loss": 3.2581, |
| "step": 63900 |
| }, |
| { |
| "epoch": 18.638992537313435, |
| "grad_norm": 0.3973971903324127, |
| "learning_rate": 0.0003765564304461942, |
| "loss": 3.2707, |
| "step": 63950 |
| }, |
| { |
| "epoch": 18.653568097014926, |
| "grad_norm": 0.3754875361919403, |
| "learning_rate": 0.00037638145231846015, |
| "loss": 3.2619, |
| "step": 64000 |
| }, |
| { |
| "epoch": 18.653568097014926, |
| "eval_accuracy": 0.3718132521349932, |
| "eval_loss": 3.5468034744262695, |
| "eval_runtime": 180.8218, |
| "eval_samples_per_second": 91.952, |
| "eval_steps_per_second": 5.752, |
| "step": 64000 |
| }, |
| { |
| "epoch": 18.668143656716417, |
| "grad_norm": 0.3698972463607788, |
| "learning_rate": 0.00037620647419072615, |
| "loss": 3.2717, |
| "step": 64050 |
| }, |
| { |
| "epoch": 18.68271921641791, |
| "grad_norm": 0.37259402871131897, |
| "learning_rate": 0.0003760314960629921, |
| "loss": 3.2705, |
| "step": 64100 |
| }, |
| { |
| "epoch": 18.697294776119403, |
| "grad_norm": 0.3753381371498108, |
| "learning_rate": 0.00037585651793525804, |
| "loss": 3.2766, |
| "step": 64150 |
| }, |
| { |
| "epoch": 18.711870335820894, |
| "grad_norm": 0.3642279803752899, |
| "learning_rate": 0.00037568153980752404, |
| "loss": 3.2636, |
| "step": 64200 |
| }, |
| { |
| "epoch": 18.72644589552239, |
| "grad_norm": 0.3768105208873749, |
| "learning_rate": 0.00037550656167979, |
| "loss": 3.2737, |
| "step": 64250 |
| }, |
| { |
| "epoch": 18.74102145522388, |
| "grad_norm": 0.36915749311447144, |
| "learning_rate": 0.0003753315835520559, |
| "loss": 3.2668, |
| "step": 64300 |
| }, |
| { |
| "epoch": 18.755597014925375, |
| "grad_norm": 0.35579395294189453, |
| "learning_rate": 0.0003751566054243219, |
| "loss": 3.2714, |
| "step": 64350 |
| }, |
| { |
| "epoch": 18.770172574626866, |
| "grad_norm": 0.3788515031337738, |
| "learning_rate": 0.0003749816272965879, |
| "loss": 3.2906, |
| "step": 64400 |
| }, |
| { |
| "epoch": 18.784748134328357, |
| "grad_norm": 0.38902297616004944, |
| "learning_rate": 0.00037480664916885386, |
| "loss": 3.2725, |
| "step": 64450 |
| }, |
| { |
| "epoch": 18.79932369402985, |
| "grad_norm": 0.420396089553833, |
| "learning_rate": 0.0003746316710411198, |
| "loss": 3.2659, |
| "step": 64500 |
| }, |
| { |
| "epoch": 18.813899253731343, |
| "grad_norm": 0.37137776613235474, |
| "learning_rate": 0.0003744566929133858, |
| "loss": 3.2694, |
| "step": 64550 |
| }, |
| { |
| "epoch": 18.828474813432837, |
| "grad_norm": 0.3613051176071167, |
| "learning_rate": 0.0003742817147856518, |
| "loss": 3.2652, |
| "step": 64600 |
| }, |
| { |
| "epoch": 18.84305037313433, |
| "grad_norm": 0.3822399973869324, |
| "learning_rate": 0.0003741067366579177, |
| "loss": 3.2897, |
| "step": 64650 |
| }, |
| { |
| "epoch": 18.85762593283582, |
| "grad_norm": 0.38065946102142334, |
| "learning_rate": 0.0003739317585301837, |
| "loss": 3.2776, |
| "step": 64700 |
| }, |
| { |
| "epoch": 18.872201492537314, |
| "grad_norm": 0.36325129866600037, |
| "learning_rate": 0.0003737567804024497, |
| "loss": 3.2708, |
| "step": 64750 |
| }, |
| { |
| "epoch": 18.886777052238806, |
| "grad_norm": 0.3650025427341461, |
| "learning_rate": 0.0003735818022747157, |
| "loss": 3.2868, |
| "step": 64800 |
| }, |
| { |
| "epoch": 18.901352611940297, |
| "grad_norm": 0.3612610101699829, |
| "learning_rate": 0.00037340682414698157, |
| "loss": 3.2961, |
| "step": 64850 |
| }, |
| { |
| "epoch": 18.91592817164179, |
| "grad_norm": 0.354300856590271, |
| "learning_rate": 0.00037323184601924757, |
| "loss": 3.2865, |
| "step": 64900 |
| }, |
| { |
| "epoch": 18.930503731343283, |
| "grad_norm": 0.3676280081272125, |
| "learning_rate": 0.00037305686789151357, |
| "loss": 3.2864, |
| "step": 64950 |
| }, |
| { |
| "epoch": 18.945079291044777, |
| "grad_norm": 0.3943864107131958, |
| "learning_rate": 0.00037288188976377946, |
| "loss": 3.2919, |
| "step": 65000 |
| }, |
| { |
| "epoch": 18.945079291044777, |
| "eval_accuracy": 0.37216375364757553, |
| "eval_loss": 3.5422606468200684, |
| "eval_runtime": 180.2835, |
| "eval_samples_per_second": 92.227, |
| "eval_steps_per_second": 5.769, |
| "step": 65000 |
| }, |
| { |
| "epoch": 18.95965485074627, |
| "grad_norm": 0.37015199661254883, |
| "learning_rate": 0.00037270691163604545, |
| "loss": 3.2765, |
| "step": 65050 |
| }, |
| { |
| "epoch": 18.97423041044776, |
| "grad_norm": 0.416165828704834, |
| "learning_rate": 0.00037253193350831145, |
| "loss": 3.2774, |
| "step": 65100 |
| }, |
| { |
| "epoch": 18.988805970149254, |
| "grad_norm": 0.37499311566352844, |
| "learning_rate": 0.0003723569553805774, |
| "loss": 3.2861, |
| "step": 65150 |
| }, |
| { |
| "epoch": 19.00320662313433, |
| "grad_norm": 0.3819681406021118, |
| "learning_rate": 0.00037218197725284334, |
| "loss": 3.2615, |
| "step": 65200 |
| }, |
| { |
| "epoch": 19.01778218283582, |
| "grad_norm": 0.3987461030483246, |
| "learning_rate": 0.00037200699912510934, |
| "loss": 3.1719, |
| "step": 65250 |
| }, |
| { |
| "epoch": 19.032357742537314, |
| "grad_norm": 0.4024551510810852, |
| "learning_rate": 0.0003718320209973753, |
| "loss": 3.1772, |
| "step": 65300 |
| }, |
| { |
| "epoch": 19.046933302238806, |
| "grad_norm": 0.36730852723121643, |
| "learning_rate": 0.0003716570428696413, |
| "loss": 3.1886, |
| "step": 65350 |
| }, |
| { |
| "epoch": 19.061508861940297, |
| "grad_norm": 0.3820883631706238, |
| "learning_rate": 0.0003714820647419072, |
| "loss": 3.1853, |
| "step": 65400 |
| }, |
| { |
| "epoch": 19.07608442164179, |
| "grad_norm": 0.3786020278930664, |
| "learning_rate": 0.00037130708661417316, |
| "loss": 3.194, |
| "step": 65450 |
| }, |
| { |
| "epoch": 19.090659981343283, |
| "grad_norm": 0.37983399629592896, |
| "learning_rate": 0.00037113210848643916, |
| "loss": 3.197, |
| "step": 65500 |
| }, |
| { |
| "epoch": 19.105235541044777, |
| "grad_norm": 0.3765254318714142, |
| "learning_rate": 0.00037095713035870516, |
| "loss": 3.197, |
| "step": 65550 |
| }, |
| { |
| "epoch": 19.11981110074627, |
| "grad_norm": 0.4143414795398712, |
| "learning_rate": 0.00037078215223097105, |
| "loss": 3.2023, |
| "step": 65600 |
| }, |
| { |
| "epoch": 19.13438666044776, |
| "grad_norm": 0.3956504166126251, |
| "learning_rate": 0.00037060717410323705, |
| "loss": 3.2054, |
| "step": 65650 |
| }, |
| { |
| "epoch": 19.148962220149254, |
| "grad_norm": 0.3792484998703003, |
| "learning_rate": 0.00037043219597550304, |
| "loss": 3.2109, |
| "step": 65700 |
| }, |
| { |
| "epoch": 19.163537779850746, |
| "grad_norm": 0.3993787467479706, |
| "learning_rate": 0.00037025721784776904, |
| "loss": 3.2066, |
| "step": 65750 |
| }, |
| { |
| "epoch": 19.17811333955224, |
| "grad_norm": 0.38693755865097046, |
| "learning_rate": 0.00037008223972003493, |
| "loss": 3.2079, |
| "step": 65800 |
| }, |
| { |
| "epoch": 19.19268889925373, |
| "grad_norm": 0.3611437678337097, |
| "learning_rate": 0.00036990726159230093, |
| "loss": 3.2081, |
| "step": 65850 |
| }, |
| { |
| "epoch": 19.207264458955223, |
| "grad_norm": 0.39916324615478516, |
| "learning_rate": 0.0003697322834645669, |
| "loss": 3.2189, |
| "step": 65900 |
| }, |
| { |
| "epoch": 19.221840018656717, |
| "grad_norm": 0.3750282824039459, |
| "learning_rate": 0.0003695573053368328, |
| "loss": 3.2194, |
| "step": 65950 |
| }, |
| { |
| "epoch": 19.23641557835821, |
| "grad_norm": 0.3601391315460205, |
| "learning_rate": 0.0003693823272090988, |
| "loss": 3.2264, |
| "step": 66000 |
| }, |
| { |
| "epoch": 19.23641557835821, |
| "eval_accuracy": 0.37148440686093176, |
| "eval_loss": 3.5561089515686035, |
| "eval_runtime": 179.3982, |
| "eval_samples_per_second": 92.682, |
| "eval_steps_per_second": 5.797, |
| "step": 66000 |
| }, |
| { |
| "epoch": 19.250991138059703, |
| "grad_norm": 0.3798132538795471, |
| "learning_rate": 0.0003692073490813648, |
| "loss": 3.2224, |
| "step": 66050 |
| }, |
| { |
| "epoch": 19.265566697761194, |
| "grad_norm": 0.3911631405353546, |
| "learning_rate": 0.0003690323709536308, |
| "loss": 3.2158, |
| "step": 66100 |
| }, |
| { |
| "epoch": 19.280142257462686, |
| "grad_norm": 0.3523401916027069, |
| "learning_rate": 0.0003688573928258967, |
| "loss": 3.2245, |
| "step": 66150 |
| }, |
| { |
| "epoch": 19.29471781716418, |
| "grad_norm": 0.3657352328300476, |
| "learning_rate": 0.0003686824146981627, |
| "loss": 3.2299, |
| "step": 66200 |
| }, |
| { |
| "epoch": 19.30929337686567, |
| "grad_norm": 0.4035662114620209, |
| "learning_rate": 0.0003685074365704287, |
| "loss": 3.2245, |
| "step": 66250 |
| }, |
| { |
| "epoch": 19.323868936567163, |
| "grad_norm": 0.41193512082099915, |
| "learning_rate": 0.00036833245844269464, |
| "loss": 3.2311, |
| "step": 66300 |
| }, |
| { |
| "epoch": 19.338444496268657, |
| "grad_norm": 0.368966668844223, |
| "learning_rate": 0.0003681574803149606, |
| "loss": 3.2366, |
| "step": 66350 |
| }, |
| { |
| "epoch": 19.35302005597015, |
| "grad_norm": 0.37729912996292114, |
| "learning_rate": 0.0003679825021872266, |
| "loss": 3.2232, |
| "step": 66400 |
| }, |
| { |
| "epoch": 19.367595615671643, |
| "grad_norm": 0.39348021149635315, |
| "learning_rate": 0.0003678075240594925, |
| "loss": 3.2442, |
| "step": 66450 |
| }, |
| { |
| "epoch": 19.382171175373134, |
| "grad_norm": 0.4021179676055908, |
| "learning_rate": 0.0003676325459317585, |
| "loss": 3.2287, |
| "step": 66500 |
| }, |
| { |
| "epoch": 19.396746735074625, |
| "grad_norm": 0.39004001021385193, |
| "learning_rate": 0.00036745756780402446, |
| "loss": 3.2393, |
| "step": 66550 |
| }, |
| { |
| "epoch": 19.41132229477612, |
| "grad_norm": 0.4309486746788025, |
| "learning_rate": 0.0003672825896762904, |
| "loss": 3.2398, |
| "step": 66600 |
| }, |
| { |
| "epoch": 19.42589785447761, |
| "grad_norm": 0.3987930119037628, |
| "learning_rate": 0.0003671076115485564, |
| "loss": 3.2451, |
| "step": 66650 |
| }, |
| { |
| "epoch": 19.440473414179106, |
| "grad_norm": 0.3985956907272339, |
| "learning_rate": 0.0003669326334208224, |
| "loss": 3.2425, |
| "step": 66700 |
| }, |
| { |
| "epoch": 19.455048973880597, |
| "grad_norm": 0.38769444823265076, |
| "learning_rate": 0.0003667576552930883, |
| "loss": 3.2463, |
| "step": 66750 |
| }, |
| { |
| "epoch": 19.46962453358209, |
| "grad_norm": 0.36316731572151184, |
| "learning_rate": 0.0003665826771653543, |
| "loss": 3.2502, |
| "step": 66800 |
| }, |
| { |
| "epoch": 19.484200093283583, |
| "grad_norm": 0.374905526638031, |
| "learning_rate": 0.0003664076990376203, |
| "loss": 3.2422, |
| "step": 66850 |
| }, |
| { |
| "epoch": 19.498775652985074, |
| "grad_norm": 0.38985154032707214, |
| "learning_rate": 0.0003662327209098862, |
| "loss": 3.2589, |
| "step": 66900 |
| }, |
| { |
| "epoch": 19.513351212686565, |
| "grad_norm": 0.37939122319221497, |
| "learning_rate": 0.0003660577427821522, |
| "loss": 3.2459, |
| "step": 66950 |
| }, |
| { |
| "epoch": 19.52792677238806, |
| "grad_norm": 0.3842551112174988, |
| "learning_rate": 0.00036588276465441817, |
| "loss": 3.2394, |
| "step": 67000 |
| }, |
| { |
| "epoch": 19.52792677238806, |
| "eval_accuracy": 0.37200721670609316, |
| "eval_loss": 3.5495340824127197, |
| "eval_runtime": 179.2821, |
| "eval_samples_per_second": 92.742, |
| "eval_steps_per_second": 5.801, |
| "step": 67000 |
| }, |
| { |
| "epoch": 19.54250233208955, |
| "grad_norm": 0.37161803245544434, |
| "learning_rate": 0.00036570778652668417, |
| "loss": 3.2461, |
| "step": 67050 |
| }, |
| { |
| "epoch": 19.557077891791046, |
| "grad_norm": 0.39652761816978455, |
| "learning_rate": 0.00036553280839895006, |
| "loss": 3.2559, |
| "step": 67100 |
| }, |
| { |
| "epoch": 19.571653451492537, |
| "grad_norm": 0.403962641954422, |
| "learning_rate": 0.00036535783027121606, |
| "loss": 3.2747, |
| "step": 67150 |
| }, |
| { |
| "epoch": 19.58622901119403, |
| "grad_norm": 0.3734639585018158, |
| "learning_rate": 0.00036518285214348205, |
| "loss": 3.2566, |
| "step": 67200 |
| }, |
| { |
| "epoch": 19.600804570895523, |
| "grad_norm": 0.3627129793167114, |
| "learning_rate": 0.00036500787401574805, |
| "loss": 3.2476, |
| "step": 67250 |
| }, |
| { |
| "epoch": 19.615380130597014, |
| "grad_norm": 0.3672148585319519, |
| "learning_rate": 0.00036483289588801394, |
| "loss": 3.2564, |
| "step": 67300 |
| }, |
| { |
| "epoch": 19.62995569029851, |
| "grad_norm": 0.38739728927612305, |
| "learning_rate": 0.00036465791776027994, |
| "loss": 3.2598, |
| "step": 67350 |
| }, |
| { |
| "epoch": 19.64453125, |
| "grad_norm": 0.352742463350296, |
| "learning_rate": 0.00036448293963254594, |
| "loss": 3.2527, |
| "step": 67400 |
| }, |
| { |
| "epoch": 19.65910680970149, |
| "grad_norm": 0.3656388521194458, |
| "learning_rate": 0.0003643079615048119, |
| "loss": 3.2553, |
| "step": 67450 |
| }, |
| { |
| "epoch": 19.673682369402986, |
| "grad_norm": 0.3881314694881439, |
| "learning_rate": 0.0003641329833770778, |
| "loss": 3.2561, |
| "step": 67500 |
| }, |
| { |
| "epoch": 19.688257929104477, |
| "grad_norm": 0.3891017436981201, |
| "learning_rate": 0.0003639580052493438, |
| "loss": 3.2463, |
| "step": 67550 |
| }, |
| { |
| "epoch": 19.70283348880597, |
| "grad_norm": 0.39045625925064087, |
| "learning_rate": 0.00036378302712160976, |
| "loss": 3.2573, |
| "step": 67600 |
| }, |
| { |
| "epoch": 19.717409048507463, |
| "grad_norm": 0.3692318797111511, |
| "learning_rate": 0.00036360804899387576, |
| "loss": 3.2656, |
| "step": 67650 |
| }, |
| { |
| "epoch": 19.731984608208954, |
| "grad_norm": 0.3797418177127838, |
| "learning_rate": 0.0003634330708661417, |
| "loss": 3.2651, |
| "step": 67700 |
| }, |
| { |
| "epoch": 19.74656016791045, |
| "grad_norm": 0.40278735756874084, |
| "learning_rate": 0.00036325809273840765, |
| "loss": 3.2616, |
| "step": 67750 |
| }, |
| { |
| "epoch": 19.76113572761194, |
| "grad_norm": 0.36150145530700684, |
| "learning_rate": 0.00036308311461067365, |
| "loss": 3.2544, |
| "step": 67800 |
| }, |
| { |
| "epoch": 19.775711287313435, |
| "grad_norm": 0.3682866394519806, |
| "learning_rate": 0.0003629081364829396, |
| "loss": 3.266, |
| "step": 67850 |
| }, |
| { |
| "epoch": 19.790286847014926, |
| "grad_norm": 0.3680150508880615, |
| "learning_rate": 0.00036273315835520553, |
| "loss": 3.259, |
| "step": 67900 |
| }, |
| { |
| "epoch": 19.804862406716417, |
| "grad_norm": 0.4011524021625519, |
| "learning_rate": 0.00036255818022747153, |
| "loss": 3.2745, |
| "step": 67950 |
| }, |
| { |
| "epoch": 19.81943796641791, |
| "grad_norm": 0.361175000667572, |
| "learning_rate": 0.00036238320209973753, |
| "loss": 3.2665, |
| "step": 68000 |
| }, |
| { |
| "epoch": 19.81943796641791, |
| "eval_accuracy": 0.3724805938328917, |
| "eval_loss": 3.5455946922302246, |
| "eval_runtime": 179.659, |
| "eval_samples_per_second": 92.548, |
| "eval_steps_per_second": 5.789, |
| "step": 68000 |
| }, |
| { |
| "epoch": 19.834013526119403, |
| "grad_norm": 0.3918949365615845, |
| "learning_rate": 0.0003622082239720034, |
| "loss": 3.2757, |
| "step": 68050 |
| }, |
| { |
| "epoch": 19.848589085820894, |
| "grad_norm": 0.4035460352897644, |
| "learning_rate": 0.0003620332458442694, |
| "loss": 3.2647, |
| "step": 68100 |
| }, |
| { |
| "epoch": 19.86316464552239, |
| "grad_norm": 0.39110511541366577, |
| "learning_rate": 0.0003618582677165354, |
| "loss": 3.2762, |
| "step": 68150 |
| }, |
| { |
| "epoch": 19.87774020522388, |
| "grad_norm": 0.3898833692073822, |
| "learning_rate": 0.0003616832895888014, |
| "loss": 3.2705, |
| "step": 68200 |
| }, |
| { |
| "epoch": 19.892315764925375, |
| "grad_norm": 0.3959086835384369, |
| "learning_rate": 0.0003615083114610673, |
| "loss": 3.2747, |
| "step": 68250 |
| }, |
| { |
| "epoch": 19.906891324626866, |
| "grad_norm": 0.40734153985977173, |
| "learning_rate": 0.0003613333333333333, |
| "loss": 3.2645, |
| "step": 68300 |
| }, |
| { |
| "epoch": 19.921466884328357, |
| "grad_norm": 0.38103732466697693, |
| "learning_rate": 0.0003611583552055993, |
| "loss": 3.2809, |
| "step": 68350 |
| }, |
| { |
| "epoch": 19.93604244402985, |
| "grad_norm": 0.3835790753364563, |
| "learning_rate": 0.00036098337707786524, |
| "loss": 3.2718, |
| "step": 68400 |
| }, |
| { |
| "epoch": 19.950618003731343, |
| "grad_norm": 0.3652678430080414, |
| "learning_rate": 0.0003608083989501312, |
| "loss": 3.2677, |
| "step": 68450 |
| }, |
| { |
| "epoch": 19.965193563432837, |
| "grad_norm": 0.36673831939697266, |
| "learning_rate": 0.0003606334208223972, |
| "loss": 3.2655, |
| "step": 68500 |
| }, |
| { |
| "epoch": 19.97976912313433, |
| "grad_norm": 0.39470627903938293, |
| "learning_rate": 0.0003604584426946632, |
| "loss": 3.2774, |
| "step": 68550 |
| }, |
| { |
| "epoch": 19.99434468283582, |
| "grad_norm": 0.3817540407180786, |
| "learning_rate": 0.00036028346456692907, |
| "loss": 3.2817, |
| "step": 68600 |
| }, |
| { |
| "epoch": 20.008745335820894, |
| "grad_norm": 0.39281952381134033, |
| "learning_rate": 0.00036010848643919507, |
| "loss": 3.2112, |
| "step": 68650 |
| }, |
| { |
| "epoch": 20.02332089552239, |
| "grad_norm": 0.4034322500228882, |
| "learning_rate": 0.00035993350831146106, |
| "loss": 3.1723, |
| "step": 68700 |
| }, |
| { |
| "epoch": 20.03789645522388, |
| "grad_norm": 0.370498389005661, |
| "learning_rate": 0.000359758530183727, |
| "loss": 3.1685, |
| "step": 68750 |
| }, |
| { |
| "epoch": 20.052472014925375, |
| "grad_norm": 0.39685019850730896, |
| "learning_rate": 0.00035958355205599295, |
| "loss": 3.184, |
| "step": 68800 |
| }, |
| { |
| "epoch": 20.067047574626866, |
| "grad_norm": 0.37462735176086426, |
| "learning_rate": 0.00035940857392825895, |
| "loss": 3.1755, |
| "step": 68850 |
| }, |
| { |
| "epoch": 20.081623134328357, |
| "grad_norm": 0.4028313159942627, |
| "learning_rate": 0.0003592335958005249, |
| "loss": 3.1769, |
| "step": 68900 |
| }, |
| { |
| "epoch": 20.09619869402985, |
| "grad_norm": 0.3951454162597656, |
| "learning_rate": 0.0003590586176727909, |
| "loss": 3.1853, |
| "step": 68950 |
| }, |
| { |
| "epoch": 20.110774253731343, |
| "grad_norm": 0.41274914145469666, |
| "learning_rate": 0.00035888363954505683, |
| "loss": 3.1939, |
| "step": 69000 |
| }, |
| { |
| "epoch": 20.110774253731343, |
| "eval_accuracy": 0.3716299979862052, |
| "eval_loss": 3.5588364601135254, |
| "eval_runtime": 179.4501, |
| "eval_samples_per_second": 92.655, |
| "eval_steps_per_second": 5.795, |
| "step": 69000 |
| }, |
| { |
| "epoch": 20.125349813432837, |
| "grad_norm": 0.3881000280380249, |
| "learning_rate": 0.0003587086614173228, |
| "loss": 3.2051, |
| "step": 69050 |
| }, |
| { |
| "epoch": 20.13992537313433, |
| "grad_norm": 0.38626691699028015, |
| "learning_rate": 0.0003585336832895888, |
| "loss": 3.1925, |
| "step": 69100 |
| }, |
| { |
| "epoch": 20.15450093283582, |
| "grad_norm": 0.37013718485832214, |
| "learning_rate": 0.00035835870516185477, |
| "loss": 3.1998, |
| "step": 69150 |
| }, |
| { |
| "epoch": 20.169076492537314, |
| "grad_norm": 0.4230809509754181, |
| "learning_rate": 0.00035818372703412066, |
| "loss": 3.2044, |
| "step": 69200 |
| }, |
| { |
| "epoch": 20.183652052238806, |
| "grad_norm": 0.39065060019493103, |
| "learning_rate": 0.00035800874890638666, |
| "loss": 3.2022, |
| "step": 69250 |
| }, |
| { |
| "epoch": 20.198227611940297, |
| "grad_norm": 0.38382846117019653, |
| "learning_rate": 0.00035783377077865266, |
| "loss": 3.2117, |
| "step": 69300 |
| }, |
| { |
| "epoch": 20.21280317164179, |
| "grad_norm": 0.3768085837364197, |
| "learning_rate": 0.00035765879265091865, |
| "loss": 3.2136, |
| "step": 69350 |
| }, |
| { |
| "epoch": 20.227378731343283, |
| "grad_norm": 0.42641666531562805, |
| "learning_rate": 0.00035748381452318454, |
| "loss": 3.1956, |
| "step": 69400 |
| }, |
| { |
| "epoch": 20.241954291044777, |
| "grad_norm": 0.3793436288833618, |
| "learning_rate": 0.00035730883639545054, |
| "loss": 3.2154, |
| "step": 69450 |
| }, |
| { |
| "epoch": 20.25652985074627, |
| "grad_norm": 0.38998252153396606, |
| "learning_rate": 0.00035713385826771654, |
| "loss": 3.2057, |
| "step": 69500 |
| }, |
| { |
| "epoch": 20.27110541044776, |
| "grad_norm": 0.4177810847759247, |
| "learning_rate": 0.00035695888013998243, |
| "loss": 3.2119, |
| "step": 69550 |
| }, |
| { |
| "epoch": 20.285680970149254, |
| "grad_norm": 0.37320512533187866, |
| "learning_rate": 0.0003567839020122484, |
| "loss": 3.2149, |
| "step": 69600 |
| }, |
| { |
| "epoch": 20.300256529850746, |
| "grad_norm": 0.3940044939517975, |
| "learning_rate": 0.0003566089238845144, |
| "loss": 3.2202, |
| "step": 69650 |
| }, |
| { |
| "epoch": 20.31483208955224, |
| "grad_norm": 0.38435447216033936, |
| "learning_rate": 0.00035643394575678037, |
| "loss": 3.2195, |
| "step": 69700 |
| }, |
| { |
| "epoch": 20.32940764925373, |
| "grad_norm": 0.38087761402130127, |
| "learning_rate": 0.0003562589676290463, |
| "loss": 3.2175, |
| "step": 69750 |
| }, |
| { |
| "epoch": 20.343983208955223, |
| "grad_norm": 0.4099285900592804, |
| "learning_rate": 0.0003560839895013123, |
| "loss": 3.2241, |
| "step": 69800 |
| }, |
| { |
| "epoch": 20.358558768656717, |
| "grad_norm": 0.4072279632091522, |
| "learning_rate": 0.0003559090113735783, |
| "loss": 3.2172, |
| "step": 69850 |
| }, |
| { |
| "epoch": 20.37313432835821, |
| "grad_norm": 0.3808026909828186, |
| "learning_rate": 0.00035573403324584425, |
| "loss": 3.2092, |
| "step": 69900 |
| }, |
| { |
| "epoch": 20.387709888059703, |
| "grad_norm": 0.3662373721599579, |
| "learning_rate": 0.0003555590551181102, |
| "loss": 3.2168, |
| "step": 69950 |
| }, |
| { |
| "epoch": 20.402285447761194, |
| "grad_norm": 0.3988702893257141, |
| "learning_rate": 0.0003553840769903762, |
| "loss": 3.2149, |
| "step": 70000 |
| }, |
| { |
| "epoch": 20.402285447761194, |
| "eval_accuracy": 0.37192118023675214, |
| "eval_loss": 3.555593490600586, |
| "eval_runtime": 179.2627, |
| "eval_samples_per_second": 92.752, |
| "eval_steps_per_second": 5.802, |
| "step": 70000 |
| }, |
| { |
| "epoch": 20.416861007462686, |
| "grad_norm": 0.37926796078681946, |
| "learning_rate": 0.00035520909886264213, |
| "loss": 3.2228, |
| "step": 70050 |
| }, |
| { |
| "epoch": 20.43143656716418, |
| "grad_norm": 0.38676461577415466, |
| "learning_rate": 0.00035503412073490813, |
| "loss": 3.2301, |
| "step": 70100 |
| }, |
| { |
| "epoch": 20.44601212686567, |
| "grad_norm": 0.3928930163383484, |
| "learning_rate": 0.0003548591426071741, |
| "loss": 3.2322, |
| "step": 70150 |
| }, |
| { |
| "epoch": 20.460587686567163, |
| "grad_norm": 0.3859976530075073, |
| "learning_rate": 0.00035468416447944, |
| "loss": 3.2353, |
| "step": 70200 |
| }, |
| { |
| "epoch": 20.475163246268657, |
| "grad_norm": 0.42515480518341064, |
| "learning_rate": 0.000354509186351706, |
| "loss": 3.2248, |
| "step": 70250 |
| }, |
| { |
| "epoch": 20.48973880597015, |
| "grad_norm": 0.4182749092578888, |
| "learning_rate": 0.000354334208223972, |
| "loss": 3.2393, |
| "step": 70300 |
| }, |
| { |
| "epoch": 20.504314365671643, |
| "grad_norm": 0.39793065190315247, |
| "learning_rate": 0.0003541592300962379, |
| "loss": 3.2468, |
| "step": 70350 |
| }, |
| { |
| "epoch": 20.518889925373134, |
| "grad_norm": 0.3741270899772644, |
| "learning_rate": 0.0003539842519685039, |
| "loss": 3.2475, |
| "step": 70400 |
| }, |
| { |
| "epoch": 20.533465485074625, |
| "grad_norm": 0.38552579283714294, |
| "learning_rate": 0.0003538092738407699, |
| "loss": 3.2452, |
| "step": 70450 |
| }, |
| { |
| "epoch": 20.54804104477612, |
| "grad_norm": 0.3872421383857727, |
| "learning_rate": 0.0003536342957130358, |
| "loss": 3.2376, |
| "step": 70500 |
| }, |
| { |
| "epoch": 20.56261660447761, |
| "grad_norm": 0.40599504113197327, |
| "learning_rate": 0.0003534593175853018, |
| "loss": 3.2374, |
| "step": 70550 |
| }, |
| { |
| "epoch": 20.577192164179106, |
| "grad_norm": 0.38468611240386963, |
| "learning_rate": 0.0003532843394575678, |
| "loss": 3.2457, |
| "step": 70600 |
| }, |
| { |
| "epoch": 20.591767723880597, |
| "grad_norm": 0.40714865922927856, |
| "learning_rate": 0.0003531093613298338, |
| "loss": 3.2432, |
| "step": 70650 |
| }, |
| { |
| "epoch": 20.60634328358209, |
| "grad_norm": 0.38764360547065735, |
| "learning_rate": 0.00035293438320209967, |
| "loss": 3.2479, |
| "step": 70700 |
| }, |
| { |
| "epoch": 20.620918843283583, |
| "grad_norm": 0.36900195479393005, |
| "learning_rate": 0.00035275940507436567, |
| "loss": 3.2318, |
| "step": 70750 |
| }, |
| { |
| "epoch": 20.635494402985074, |
| "grad_norm": 0.4236606955528259, |
| "learning_rate": 0.00035258442694663166, |
| "loss": 3.249, |
| "step": 70800 |
| }, |
| { |
| "epoch": 20.650069962686565, |
| "grad_norm": 0.4084267020225525, |
| "learning_rate": 0.0003524094488188976, |
| "loss": 3.2455, |
| "step": 70850 |
| }, |
| { |
| "epoch": 20.66464552238806, |
| "grad_norm": 0.39864709973335266, |
| "learning_rate": 0.00035223447069116355, |
| "loss": 3.2535, |
| "step": 70900 |
| }, |
| { |
| "epoch": 20.67922108208955, |
| "grad_norm": 0.4109085202217102, |
| "learning_rate": 0.00035205949256342955, |
| "loss": 3.2512, |
| "step": 70950 |
| }, |
| { |
| "epoch": 20.693796641791046, |
| "grad_norm": 0.4137507975101471, |
| "learning_rate": 0.0003518845144356955, |
| "loss": 3.2447, |
| "step": 71000 |
| }, |
| { |
| "epoch": 20.693796641791046, |
| "eval_accuracy": 0.3722022405497295, |
| "eval_loss": 3.5451698303222656, |
| "eval_runtime": 179.3907, |
| "eval_samples_per_second": 92.686, |
| "eval_steps_per_second": 5.797, |
| "step": 71000 |
| }, |
| { |
| "epoch": 20.708372201492537, |
| "grad_norm": 0.36888283491134644, |
| "learning_rate": 0.0003517095363079615, |
| "loss": 3.2481, |
| "step": 71050 |
| }, |
| { |
| "epoch": 20.72294776119403, |
| "grad_norm": 0.4099491238594055, |
| "learning_rate": 0.00035153455818022743, |
| "loss": 3.2501, |
| "step": 71100 |
| }, |
| { |
| "epoch": 20.737523320895523, |
| "grad_norm": 0.37443771958351135, |
| "learning_rate": 0.00035135958005249343, |
| "loss": 3.2593, |
| "step": 71150 |
| }, |
| { |
| "epoch": 20.752098880597014, |
| "grad_norm": 0.36958447098731995, |
| "learning_rate": 0.0003511846019247594, |
| "loss": 3.249, |
| "step": 71200 |
| }, |
| { |
| "epoch": 20.76667444029851, |
| "grad_norm": 0.38606441020965576, |
| "learning_rate": 0.0003510096237970253, |
| "loss": 3.2524, |
| "step": 71250 |
| }, |
| { |
| "epoch": 20.78125, |
| "grad_norm": 0.39055198431015015, |
| "learning_rate": 0.0003508346456692913, |
| "loss": 3.2566, |
| "step": 71300 |
| }, |
| { |
| "epoch": 20.79582555970149, |
| "grad_norm": 0.37249884009361267, |
| "learning_rate": 0.00035065966754155726, |
| "loss": 3.2574, |
| "step": 71350 |
| }, |
| { |
| "epoch": 20.810401119402986, |
| "grad_norm": 0.3753904402256012, |
| "learning_rate": 0.00035048468941382326, |
| "loss": 3.2471, |
| "step": 71400 |
| }, |
| { |
| "epoch": 20.824976679104477, |
| "grad_norm": 0.37924399971961975, |
| "learning_rate": 0.0003503097112860892, |
| "loss": 3.26, |
| "step": 71450 |
| }, |
| { |
| "epoch": 20.83955223880597, |
| "grad_norm": 0.3716459274291992, |
| "learning_rate": 0.00035013473315835514, |
| "loss": 3.2668, |
| "step": 71500 |
| }, |
| { |
| "epoch": 20.854127798507463, |
| "grad_norm": 0.37003329396247864, |
| "learning_rate": 0.00034995975503062114, |
| "loss": 3.2646, |
| "step": 71550 |
| }, |
| { |
| "epoch": 20.868703358208954, |
| "grad_norm": 0.40597912669181824, |
| "learning_rate": 0.00034978477690288714, |
| "loss": 3.259, |
| "step": 71600 |
| }, |
| { |
| "epoch": 20.88327891791045, |
| "grad_norm": 0.4224961996078491, |
| "learning_rate": 0.00034960979877515303, |
| "loss": 3.277, |
| "step": 71650 |
| }, |
| { |
| "epoch": 20.89785447761194, |
| "grad_norm": 0.3840181529521942, |
| "learning_rate": 0.000349434820647419, |
| "loss": 3.2468, |
| "step": 71700 |
| }, |
| { |
| "epoch": 20.912430037313435, |
| "grad_norm": 0.39738985896110535, |
| "learning_rate": 0.000349259842519685, |
| "loss": 3.2666, |
| "step": 71750 |
| }, |
| { |
| "epoch": 20.927005597014926, |
| "grad_norm": 0.3849341571331024, |
| "learning_rate": 0.000349084864391951, |
| "loss": 3.2541, |
| "step": 71800 |
| }, |
| { |
| "epoch": 20.941581156716417, |
| "grad_norm": 0.38834768533706665, |
| "learning_rate": 0.0003489098862642169, |
| "loss": 3.2643, |
| "step": 71850 |
| }, |
| { |
| "epoch": 20.95615671641791, |
| "grad_norm": 0.36527711153030396, |
| "learning_rate": 0.0003487349081364829, |
| "loss": 3.2632, |
| "step": 71900 |
| }, |
| { |
| "epoch": 20.970732276119403, |
| "grad_norm": 0.40803590416908264, |
| "learning_rate": 0.0003485599300087489, |
| "loss": 3.263, |
| "step": 71950 |
| }, |
| { |
| "epoch": 20.985307835820894, |
| "grad_norm": 0.3897600769996643, |
| "learning_rate": 0.00034838495188101485, |
| "loss": 3.2733, |
| "step": 72000 |
| }, |
| { |
| "epoch": 20.985307835820894, |
| "eval_accuracy": 0.3729873968930595, |
| "eval_loss": 3.538719415664673, |
| "eval_runtime": 179.3409, |
| "eval_samples_per_second": 92.712, |
| "eval_steps_per_second": 5.799, |
| "step": 72000 |
| }, |
| { |
| "epoch": 20.99988339552239, |
| "grad_norm": 0.359904021024704, |
| "learning_rate": 0.0003482099737532808, |
| "loss": 3.2644, |
| "step": 72050 |
| }, |
| { |
| "epoch": 21.014284048507463, |
| "grad_norm": 0.38795262575149536, |
| "learning_rate": 0.0003480349956255468, |
| "loss": 3.144, |
| "step": 72100 |
| }, |
| { |
| "epoch": 21.028859608208954, |
| "grad_norm": 0.41623035073280334, |
| "learning_rate": 0.00034786001749781274, |
| "loss": 3.1663, |
| "step": 72150 |
| }, |
| { |
| "epoch": 21.04343516791045, |
| "grad_norm": 0.40783727169036865, |
| "learning_rate": 0.0003476850393700787, |
| "loss": 3.1662, |
| "step": 72200 |
| }, |
| { |
| "epoch": 21.05801072761194, |
| "grad_norm": 0.4014035165309906, |
| "learning_rate": 0.0003475100612423447, |
| "loss": 3.1651, |
| "step": 72250 |
| }, |
| { |
| "epoch": 21.07258628731343, |
| "grad_norm": 0.3629119098186493, |
| "learning_rate": 0.0003473350831146106, |
| "loss": 3.1723, |
| "step": 72300 |
| }, |
| { |
| "epoch": 21.087161847014926, |
| "grad_norm": 0.40349218249320984, |
| "learning_rate": 0.0003471601049868766, |
| "loss": 3.1713, |
| "step": 72350 |
| }, |
| { |
| "epoch": 21.101737406716417, |
| "grad_norm": 0.41936585307121277, |
| "learning_rate": 0.00034698512685914256, |
| "loss": 3.1753, |
| "step": 72400 |
| }, |
| { |
| "epoch": 21.11631296641791, |
| "grad_norm": 0.38126733899116516, |
| "learning_rate": 0.00034681014873140856, |
| "loss": 3.1804, |
| "step": 72450 |
| }, |
| { |
| "epoch": 21.130888526119403, |
| "grad_norm": 0.4074459969997406, |
| "learning_rate": 0.0003466351706036745, |
| "loss": 3.197, |
| "step": 72500 |
| }, |
| { |
| "epoch": 21.145464085820894, |
| "grad_norm": 0.40520617365837097, |
| "learning_rate": 0.0003464601924759405, |
| "loss": 3.1886, |
| "step": 72550 |
| }, |
| { |
| "epoch": 21.16003964552239, |
| "grad_norm": 0.3764589726924896, |
| "learning_rate": 0.00034628521434820644, |
| "loss": 3.1782, |
| "step": 72600 |
| }, |
| { |
| "epoch": 21.17461520522388, |
| "grad_norm": 0.389152467250824, |
| "learning_rate": 0.0003461102362204724, |
| "loss": 3.1987, |
| "step": 72650 |
| }, |
| { |
| "epoch": 21.189190764925375, |
| "grad_norm": 0.4187963604927063, |
| "learning_rate": 0.0003459352580927384, |
| "loss": 3.1922, |
| "step": 72700 |
| }, |
| { |
| "epoch": 21.203766324626866, |
| "grad_norm": 0.38693854212760925, |
| "learning_rate": 0.0003457602799650044, |
| "loss": 3.194, |
| "step": 72750 |
| }, |
| { |
| "epoch": 21.218341884328357, |
| "grad_norm": 0.37709715962409973, |
| "learning_rate": 0.00034558530183727027, |
| "loss": 3.1903, |
| "step": 72800 |
| }, |
| { |
| "epoch": 21.23291744402985, |
| "grad_norm": 0.3794679641723633, |
| "learning_rate": 0.00034541032370953627, |
| "loss": 3.201, |
| "step": 72850 |
| }, |
| { |
| "epoch": 21.247493003731343, |
| "grad_norm": 0.3991202414035797, |
| "learning_rate": 0.00034523534558180227, |
| "loss": 3.1958, |
| "step": 72900 |
| }, |
| { |
| "epoch": 21.262068563432837, |
| "grad_norm": 0.41374242305755615, |
| "learning_rate": 0.00034506036745406826, |
| "loss": 3.2002, |
| "step": 72950 |
| }, |
| { |
| "epoch": 21.27664412313433, |
| "grad_norm": 0.37337031960487366, |
| "learning_rate": 0.00034488538932633415, |
| "loss": 3.2131, |
| "step": 73000 |
| }, |
| { |
| "epoch": 21.27664412313433, |
| "eval_accuracy": 0.37209054614561915, |
| "eval_loss": 3.558110475540161, |
| "eval_runtime": 179.4815, |
| "eval_samples_per_second": 92.639, |
| "eval_steps_per_second": 5.794, |
| "step": 73000 |
| }, |
| { |
| "epoch": 21.29121968283582, |
| "grad_norm": 0.4104592502117157, |
| "learning_rate": 0.00034471041119860015, |
| "loss": 3.2005, |
| "step": 73050 |
| }, |
| { |
| "epoch": 21.305795242537314, |
| "grad_norm": 0.40162068605422974, |
| "learning_rate": 0.00034453543307086615, |
| "loss": 3.204, |
| "step": 73100 |
| }, |
| { |
| "epoch": 21.320370802238806, |
| "grad_norm": 0.3967791497707367, |
| "learning_rate": 0.00034436045494313204, |
| "loss": 3.2127, |
| "step": 73150 |
| }, |
| { |
| "epoch": 21.334946361940297, |
| "grad_norm": 0.41675812005996704, |
| "learning_rate": 0.00034418547681539804, |
| "loss": 3.209, |
| "step": 73200 |
| }, |
| { |
| "epoch": 21.34952192164179, |
| "grad_norm": 0.37078526616096497, |
| "learning_rate": 0.00034401049868766403, |
| "loss": 3.2243, |
| "step": 73250 |
| }, |
| { |
| "epoch": 21.364097481343283, |
| "grad_norm": 0.3804914355278015, |
| "learning_rate": 0.00034383552055993, |
| "loss": 3.1965, |
| "step": 73300 |
| }, |
| { |
| "epoch": 21.378673041044777, |
| "grad_norm": 0.4345126450061798, |
| "learning_rate": 0.0003436605424321959, |
| "loss": 3.2157, |
| "step": 73350 |
| }, |
| { |
| "epoch": 21.39324860074627, |
| "grad_norm": 0.3758656978607178, |
| "learning_rate": 0.0003434855643044619, |
| "loss": 3.2316, |
| "step": 73400 |
| }, |
| { |
| "epoch": 21.40782416044776, |
| "grad_norm": 0.40299421548843384, |
| "learning_rate": 0.00034331058617672786, |
| "loss": 3.2177, |
| "step": 73450 |
| }, |
| { |
| "epoch": 21.422399720149254, |
| "grad_norm": 0.3812105655670166, |
| "learning_rate": 0.00034313560804899386, |
| "loss": 3.213, |
| "step": 73500 |
| }, |
| { |
| "epoch": 21.436975279850746, |
| "grad_norm": 0.37582188844680786, |
| "learning_rate": 0.0003429606299212598, |
| "loss": 3.2124, |
| "step": 73550 |
| }, |
| { |
| "epoch": 21.45155083955224, |
| "grad_norm": 0.4020504951477051, |
| "learning_rate": 0.00034278565179352575, |
| "loss": 3.2213, |
| "step": 73600 |
| }, |
| { |
| "epoch": 21.46612639925373, |
| "grad_norm": 0.4116267263889313, |
| "learning_rate": 0.00034261067366579174, |
| "loss": 3.2287, |
| "step": 73650 |
| }, |
| { |
| "epoch": 21.480701958955223, |
| "grad_norm": 0.36419060826301575, |
| "learning_rate": 0.00034243569553805774, |
| "loss": 3.2206, |
| "step": 73700 |
| }, |
| { |
| "epoch": 21.495277518656717, |
| "grad_norm": 0.3853178322315216, |
| "learning_rate": 0.0003422607174103237, |
| "loss": 3.2365, |
| "step": 73750 |
| }, |
| { |
| "epoch": 21.50985307835821, |
| "grad_norm": 0.3858824670314789, |
| "learning_rate": 0.00034208573928258963, |
| "loss": 3.2259, |
| "step": 73800 |
| }, |
| { |
| "epoch": 21.524428638059703, |
| "grad_norm": 0.3743896484375, |
| "learning_rate": 0.0003419107611548556, |
| "loss": 3.2361, |
| "step": 73850 |
| }, |
| { |
| "epoch": 21.539004197761194, |
| "grad_norm": 0.38600412011146545, |
| "learning_rate": 0.0003417357830271216, |
| "loss": 3.2351, |
| "step": 73900 |
| }, |
| { |
| "epoch": 21.553579757462686, |
| "grad_norm": 0.41251125931739807, |
| "learning_rate": 0.0003415608048993875, |
| "loss": 3.2241, |
| "step": 73950 |
| }, |
| { |
| "epoch": 21.56815531716418, |
| "grad_norm": 0.4111970067024231, |
| "learning_rate": 0.0003413858267716535, |
| "loss": 3.2312, |
| "step": 74000 |
| }, |
| { |
| "epoch": 21.56815531716418, |
| "eval_accuracy": 0.3726626710121949, |
| "eval_loss": 3.5494892597198486, |
| "eval_runtime": 179.3043, |
| "eval_samples_per_second": 92.731, |
| "eval_steps_per_second": 5.8, |
| "step": 74000 |
| }, |
| { |
| "epoch": 21.58273087686567, |
| "grad_norm": 0.4084521532058716, |
| "learning_rate": 0.0003412108486439195, |
| "loss": 3.2352, |
| "step": 74050 |
| }, |
| { |
| "epoch": 21.597306436567163, |
| "grad_norm": 0.37888526916503906, |
| "learning_rate": 0.0003410358705161854, |
| "loss": 3.2443, |
| "step": 74100 |
| }, |
| { |
| "epoch": 21.611881996268657, |
| "grad_norm": 0.3779131770133972, |
| "learning_rate": 0.0003408608923884514, |
| "loss": 3.2356, |
| "step": 74150 |
| }, |
| { |
| "epoch": 21.62645755597015, |
| "grad_norm": 0.39611154794692993, |
| "learning_rate": 0.0003406859142607174, |
| "loss": 3.223, |
| "step": 74200 |
| }, |
| { |
| "epoch": 21.641033115671643, |
| "grad_norm": 0.3650185763835907, |
| "learning_rate": 0.0003405109361329834, |
| "loss": 3.2348, |
| "step": 74250 |
| }, |
| { |
| "epoch": 21.655608675373134, |
| "grad_norm": 0.40089526772499084, |
| "learning_rate": 0.0003403359580052493, |
| "loss": 3.23, |
| "step": 74300 |
| }, |
| { |
| "epoch": 21.670184235074625, |
| "grad_norm": 0.3926517069339752, |
| "learning_rate": 0.0003401609798775153, |
| "loss": 3.2305, |
| "step": 74350 |
| }, |
| { |
| "epoch": 21.68475979477612, |
| "grad_norm": 0.415294349193573, |
| "learning_rate": 0.0003399860017497813, |
| "loss": 3.2289, |
| "step": 74400 |
| }, |
| { |
| "epoch": 21.69933535447761, |
| "grad_norm": 0.3892729580402374, |
| "learning_rate": 0.0003398110236220472, |
| "loss": 3.2243, |
| "step": 74450 |
| }, |
| { |
| "epoch": 21.713910914179106, |
| "grad_norm": 0.394654780626297, |
| "learning_rate": 0.00033963604549431316, |
| "loss": 3.2401, |
| "step": 74500 |
| }, |
| { |
| "epoch": 21.728486473880597, |
| "grad_norm": 0.4018441438674927, |
| "learning_rate": 0.00033946106736657916, |
| "loss": 3.2441, |
| "step": 74550 |
| }, |
| { |
| "epoch": 21.74306203358209, |
| "grad_norm": 0.36875492334365845, |
| "learning_rate": 0.0003392860892388451, |
| "loss": 3.2437, |
| "step": 74600 |
| }, |
| { |
| "epoch": 21.757637593283583, |
| "grad_norm": 0.37119260430336, |
| "learning_rate": 0.0003391111111111111, |
| "loss": 3.2423, |
| "step": 74650 |
| }, |
| { |
| "epoch": 21.772213152985074, |
| "grad_norm": 0.40055206418037415, |
| "learning_rate": 0.00033893613298337705, |
| "loss": 3.2489, |
| "step": 74700 |
| }, |
| { |
| "epoch": 21.786788712686565, |
| "grad_norm": 0.40984848141670227, |
| "learning_rate": 0.000338761154855643, |
| "loss": 3.2415, |
| "step": 74750 |
| }, |
| { |
| "epoch": 21.80136427238806, |
| "grad_norm": 0.3929216265678406, |
| "learning_rate": 0.000338586176727909, |
| "loss": 3.2455, |
| "step": 74800 |
| }, |
| { |
| "epoch": 21.81593983208955, |
| "grad_norm": 0.3937998414039612, |
| "learning_rate": 0.00033841119860017493, |
| "loss": 3.2448, |
| "step": 74850 |
| }, |
| { |
| "epoch": 21.830515391791046, |
| "grad_norm": 0.38548168540000916, |
| "learning_rate": 0.0003382362204724409, |
| "loss": 3.2507, |
| "step": 74900 |
| }, |
| { |
| "epoch": 21.845090951492537, |
| "grad_norm": 0.47423017024993896, |
| "learning_rate": 0.00033806124234470687, |
| "loss": 3.2418, |
| "step": 74950 |
| }, |
| { |
| "epoch": 21.85966651119403, |
| "grad_norm": 0.4023411273956299, |
| "learning_rate": 0.00033788626421697287, |
| "loss": 3.2569, |
| "step": 75000 |
| }, |
| { |
| "epoch": 21.85966651119403, |
| "eval_accuracy": 0.3730542487597978, |
| "eval_loss": 3.5411148071289062, |
| "eval_runtime": 179.3158, |
| "eval_samples_per_second": 92.725, |
| "eval_steps_per_second": 5.8, |
| "step": 75000 |
| }, |
| { |
| "epoch": 21.874242070895523, |
| "grad_norm": 0.3734757602214813, |
| "learning_rate": 0.0003377112860892388, |
| "loss": 3.2426, |
| "step": 75050 |
| }, |
| { |
| "epoch": 21.888817630597014, |
| "grad_norm": 0.36795511841773987, |
| "learning_rate": 0.00033753630796150476, |
| "loss": 3.2502, |
| "step": 75100 |
| }, |
| { |
| "epoch": 21.90339319029851, |
| "grad_norm": 0.40975630283355713, |
| "learning_rate": 0.00033736132983377075, |
| "loss": 3.2659, |
| "step": 75150 |
| }, |
| { |
| "epoch": 21.91796875, |
| "grad_norm": 0.44392845034599304, |
| "learning_rate": 0.00033718635170603675, |
| "loss": 3.2527, |
| "step": 75200 |
| }, |
| { |
| "epoch": 21.93254430970149, |
| "grad_norm": 0.38623732328414917, |
| "learning_rate": 0.00033701137357830264, |
| "loss": 3.2656, |
| "step": 75250 |
| }, |
| { |
| "epoch": 21.947119869402986, |
| "grad_norm": 0.39194947481155396, |
| "learning_rate": 0.00033683639545056864, |
| "loss": 3.2549, |
| "step": 75300 |
| }, |
| { |
| "epoch": 21.961695429104477, |
| "grad_norm": 0.41067978739738464, |
| "learning_rate": 0.00033666141732283464, |
| "loss": 3.2378, |
| "step": 75350 |
| }, |
| { |
| "epoch": 21.97627098880597, |
| "grad_norm": 0.3773854672908783, |
| "learning_rate": 0.00033648643919510063, |
| "loss": 3.2617, |
| "step": 75400 |
| }, |
| { |
| "epoch": 21.990846548507463, |
| "grad_norm": 0.4150872528553009, |
| "learning_rate": 0.0003363114610673665, |
| "loss": 3.254, |
| "step": 75450 |
| }, |
| { |
| "epoch": 22.005247201492537, |
| "grad_norm": 0.41412967443466187, |
| "learning_rate": 0.0003361364829396325, |
| "loss": 3.2217, |
| "step": 75500 |
| }, |
| { |
| "epoch": 22.01982276119403, |
| "grad_norm": 0.3943207859992981, |
| "learning_rate": 0.0003359615048118985, |
| "loss": 3.1477, |
| "step": 75550 |
| }, |
| { |
| "epoch": 22.034398320895523, |
| "grad_norm": 0.41345933079719543, |
| "learning_rate": 0.00033578652668416446, |
| "loss": 3.1496, |
| "step": 75600 |
| }, |
| { |
| "epoch": 22.048973880597014, |
| "grad_norm": 0.41332539916038513, |
| "learning_rate": 0.0003356115485564304, |
| "loss": 3.1548, |
| "step": 75650 |
| }, |
| { |
| "epoch": 22.06354944029851, |
| "grad_norm": 0.4008597731590271, |
| "learning_rate": 0.0003354365704286964, |
| "loss": 3.1602, |
| "step": 75700 |
| }, |
| { |
| "epoch": 22.078125, |
| "grad_norm": 0.4184229373931885, |
| "learning_rate": 0.00033526159230096235, |
| "loss": 3.158, |
| "step": 75750 |
| }, |
| { |
| "epoch": 22.09270055970149, |
| "grad_norm": 0.41302576661109924, |
| "learning_rate": 0.0003350866141732283, |
| "loss": 3.1706, |
| "step": 75800 |
| }, |
| { |
| "epoch": 22.107276119402986, |
| "grad_norm": 0.4041043519973755, |
| "learning_rate": 0.0003349116360454943, |
| "loss": 3.1706, |
| "step": 75850 |
| }, |
| { |
| "epoch": 22.121851679104477, |
| "grad_norm": 0.37200263142585754, |
| "learning_rate": 0.00033473665791776023, |
| "loss": 3.1662, |
| "step": 75900 |
| }, |
| { |
| "epoch": 22.13642723880597, |
| "grad_norm": 0.42748311161994934, |
| "learning_rate": 0.00033456167979002623, |
| "loss": 3.1711, |
| "step": 75950 |
| }, |
| { |
| "epoch": 22.151002798507463, |
| "grad_norm": 0.4101450741291046, |
| "learning_rate": 0.00033438670166229217, |
| "loss": 3.1789, |
| "step": 76000 |
| }, |
| { |
| "epoch": 22.151002798507463, |
| "eval_accuracy": 0.37194919211049104, |
| "eval_loss": 3.5588977336883545, |
| "eval_runtime": 179.9243, |
| "eval_samples_per_second": 92.411, |
| "eval_steps_per_second": 5.78, |
| "step": 76000 |
| }, |
| { |
| "epoch": 22.165578358208954, |
| "grad_norm": 0.4208793640136719, |
| "learning_rate": 0.0003342117235345581, |
| "loss": 3.1745, |
| "step": 76050 |
| }, |
| { |
| "epoch": 22.18015391791045, |
| "grad_norm": 0.4094926118850708, |
| "learning_rate": 0.0003340367454068241, |
| "loss": 3.1825, |
| "step": 76100 |
| }, |
| { |
| "epoch": 22.19472947761194, |
| "grad_norm": 0.40947505831718445, |
| "learning_rate": 0.0003338617672790901, |
| "loss": 3.1789, |
| "step": 76150 |
| }, |
| { |
| "epoch": 22.20930503731343, |
| "grad_norm": 0.4133168160915375, |
| "learning_rate": 0.000333686789151356, |
| "loss": 3.1793, |
| "step": 76200 |
| }, |
| { |
| "epoch": 22.223880597014926, |
| "grad_norm": 0.4098125696182251, |
| "learning_rate": 0.000333511811023622, |
| "loss": 3.1828, |
| "step": 76250 |
| }, |
| { |
| "epoch": 22.238456156716417, |
| "grad_norm": 0.40815654397010803, |
| "learning_rate": 0.000333336832895888, |
| "loss": 3.1904, |
| "step": 76300 |
| }, |
| { |
| "epoch": 22.25303171641791, |
| "grad_norm": 0.39737969636917114, |
| "learning_rate": 0.000333161854768154, |
| "loss": 3.1966, |
| "step": 76350 |
| }, |
| { |
| "epoch": 22.267607276119403, |
| "grad_norm": 0.43529120087623596, |
| "learning_rate": 0.0003329868766404199, |
| "loss": 3.1996, |
| "step": 76400 |
| }, |
| { |
| "epoch": 22.282182835820894, |
| "grad_norm": 0.4474215507507324, |
| "learning_rate": 0.0003328118985126859, |
| "loss": 3.185, |
| "step": 76450 |
| }, |
| { |
| "epoch": 22.29675839552239, |
| "grad_norm": 0.4055737853050232, |
| "learning_rate": 0.0003326369203849519, |
| "loss": 3.1962, |
| "step": 76500 |
| }, |
| { |
| "epoch": 22.31133395522388, |
| "grad_norm": 0.39701855182647705, |
| "learning_rate": 0.0003324619422572179, |
| "loss": 3.2104, |
| "step": 76550 |
| }, |
| { |
| "epoch": 22.325909514925375, |
| "grad_norm": 0.3831678628921509, |
| "learning_rate": 0.00033228696412948377, |
| "loss": 3.1951, |
| "step": 76600 |
| }, |
| { |
| "epoch": 22.340485074626866, |
| "grad_norm": 0.3953789472579956, |
| "learning_rate": 0.00033211198600174976, |
| "loss": 3.2057, |
| "step": 76650 |
| }, |
| { |
| "epoch": 22.355060634328357, |
| "grad_norm": 0.3927507996559143, |
| "learning_rate": 0.00033193700787401576, |
| "loss": 3.204, |
| "step": 76700 |
| }, |
| { |
| "epoch": 22.36963619402985, |
| "grad_norm": 0.43750348687171936, |
| "learning_rate": 0.00033176202974628165, |
| "loss": 3.2142, |
| "step": 76750 |
| }, |
| { |
| "epoch": 22.384211753731343, |
| "grad_norm": 0.4124338626861572, |
| "learning_rate": 0.00033158705161854765, |
| "loss": 3.2031, |
| "step": 76800 |
| }, |
| { |
| "epoch": 22.398787313432837, |
| "grad_norm": 0.4088267385959625, |
| "learning_rate": 0.00033141207349081365, |
| "loss": 3.2098, |
| "step": 76850 |
| }, |
| { |
| "epoch": 22.41336287313433, |
| "grad_norm": 0.39260512590408325, |
| "learning_rate": 0.0003312370953630796, |
| "loss": 3.2081, |
| "step": 76900 |
| }, |
| { |
| "epoch": 22.42793843283582, |
| "grad_norm": 0.409219890832901, |
| "learning_rate": 0.00033106211723534553, |
| "loss": 3.2058, |
| "step": 76950 |
| }, |
| { |
| "epoch": 22.442513992537314, |
| "grad_norm": 0.40991613268852234, |
| "learning_rate": 0.00033088713910761153, |
| "loss": 3.202, |
| "step": 77000 |
| }, |
| { |
| "epoch": 22.442513992537314, |
| "eval_accuracy": 0.37240091299876876, |
| "eval_loss": 3.5495948791503906, |
| "eval_runtime": 179.3301, |
| "eval_samples_per_second": 92.717, |
| "eval_steps_per_second": 5.799, |
| "step": 77000 |
| }, |
| { |
| "epoch": 22.457089552238806, |
| "grad_norm": 0.41137585043907166, |
| "learning_rate": 0.0003307121609798775, |
| "loss": 3.2186, |
| "step": 77050 |
| }, |
| { |
| "epoch": 22.471665111940297, |
| "grad_norm": 0.4068697392940521, |
| "learning_rate": 0.00033053718285214347, |
| "loss": 3.2289, |
| "step": 77100 |
| }, |
| { |
| "epoch": 22.48624067164179, |
| "grad_norm": 0.40242230892181396, |
| "learning_rate": 0.0003303622047244094, |
| "loss": 3.2167, |
| "step": 77150 |
| }, |
| { |
| "epoch": 22.500816231343283, |
| "grad_norm": 0.4122253656387329, |
| "learning_rate": 0.00033018722659667536, |
| "loss": 3.2109, |
| "step": 77200 |
| }, |
| { |
| "epoch": 22.515391791044777, |
| "grad_norm": 0.39489126205444336, |
| "learning_rate": 0.00033001224846894136, |
| "loss": 3.2146, |
| "step": 77250 |
| }, |
| { |
| "epoch": 22.52996735074627, |
| "grad_norm": 0.38545411825180054, |
| "learning_rate": 0.00032983727034120735, |
| "loss": 3.2177, |
| "step": 77300 |
| }, |
| { |
| "epoch": 22.54454291044776, |
| "grad_norm": 0.39428555965423584, |
| "learning_rate": 0.00032966229221347324, |
| "loss": 3.2321, |
| "step": 77350 |
| }, |
| { |
| "epoch": 22.559118470149254, |
| "grad_norm": 0.3956550657749176, |
| "learning_rate": 0.00032948731408573924, |
| "loss": 3.2223, |
| "step": 77400 |
| }, |
| { |
| "epoch": 22.573694029850746, |
| "grad_norm": 0.4145047962665558, |
| "learning_rate": 0.00032931233595800524, |
| "loss": 3.2189, |
| "step": 77450 |
| }, |
| { |
| "epoch": 22.58826958955224, |
| "grad_norm": 0.4277302920818329, |
| "learning_rate": 0.00032913735783027124, |
| "loss": 3.2234, |
| "step": 77500 |
| }, |
| { |
| "epoch": 22.60284514925373, |
| "grad_norm": 0.37812212109565735, |
| "learning_rate": 0.0003289623797025371, |
| "loss": 3.2148, |
| "step": 77550 |
| }, |
| { |
| "epoch": 22.617420708955223, |
| "grad_norm": 0.40783196687698364, |
| "learning_rate": 0.0003287874015748031, |
| "loss": 3.2286, |
| "step": 77600 |
| }, |
| { |
| "epoch": 22.631996268656717, |
| "grad_norm": 0.3943743407726288, |
| "learning_rate": 0.0003286124234470691, |
| "loss": 3.2179, |
| "step": 77650 |
| }, |
| { |
| "epoch": 22.64657182835821, |
| "grad_norm": 0.42744752764701843, |
| "learning_rate": 0.000328437445319335, |
| "loss": 3.2319, |
| "step": 77700 |
| }, |
| { |
| "epoch": 22.661147388059703, |
| "grad_norm": 0.4282462000846863, |
| "learning_rate": 0.000328262467191601, |
| "loss": 3.2319, |
| "step": 77750 |
| }, |
| { |
| "epoch": 22.675722947761194, |
| "grad_norm": 0.3943832218647003, |
| "learning_rate": 0.000328087489063867, |
| "loss": 3.2355, |
| "step": 77800 |
| }, |
| { |
| "epoch": 22.690298507462686, |
| "grad_norm": 0.41673794388771057, |
| "learning_rate": 0.000327912510936133, |
| "loss": 3.2374, |
| "step": 77850 |
| }, |
| { |
| "epoch": 22.70487406716418, |
| "grad_norm": 0.396979957818985, |
| "learning_rate": 0.0003277375328083989, |
| "loss": 3.2315, |
| "step": 77900 |
| }, |
| { |
| "epoch": 22.71944962686567, |
| "grad_norm": 0.3709559440612793, |
| "learning_rate": 0.0003275625546806649, |
| "loss": 3.2202, |
| "step": 77950 |
| }, |
| { |
| "epoch": 22.734025186567163, |
| "grad_norm": 0.3927803039550781, |
| "learning_rate": 0.0003273875765529309, |
| "loss": 3.2272, |
| "step": 78000 |
| }, |
| { |
| "epoch": 22.734025186567163, |
| "eval_accuracy": 0.37302141131117106, |
| "eval_loss": 3.542844772338867, |
| "eval_runtime": 181.1051, |
| "eval_samples_per_second": 91.809, |
| "eval_steps_per_second": 5.743, |
| "step": 78000 |
| }, |
| { |
| "epoch": 22.748600746268657, |
| "grad_norm": 0.4058307707309723, |
| "learning_rate": 0.00032721259842519683, |
| "loss": 3.2307, |
| "step": 78050 |
| }, |
| { |
| "epoch": 22.76317630597015, |
| "grad_norm": 0.4223111569881439, |
| "learning_rate": 0.0003270376202974628, |
| "loss": 3.2307, |
| "step": 78100 |
| }, |
| { |
| "epoch": 22.777751865671643, |
| "grad_norm": 0.4018338620662689, |
| "learning_rate": 0.00032686264216972877, |
| "loss": 3.2383, |
| "step": 78150 |
| }, |
| { |
| "epoch": 22.792327425373134, |
| "grad_norm": 0.3855460584163666, |
| "learning_rate": 0.0003266876640419947, |
| "loss": 3.2252, |
| "step": 78200 |
| }, |
| { |
| "epoch": 22.806902985074625, |
| "grad_norm": 0.3729098439216614, |
| "learning_rate": 0.0003265126859142607, |
| "loss": 3.2308, |
| "step": 78250 |
| }, |
| { |
| "epoch": 22.82147854477612, |
| "grad_norm": 0.4244031012058258, |
| "learning_rate": 0.00032633770778652666, |
| "loss": 3.2405, |
| "step": 78300 |
| }, |
| { |
| "epoch": 22.83605410447761, |
| "grad_norm": 0.38624075055122375, |
| "learning_rate": 0.0003261627296587926, |
| "loss": 3.2393, |
| "step": 78350 |
| }, |
| { |
| "epoch": 22.850629664179106, |
| "grad_norm": 0.38735702633857727, |
| "learning_rate": 0.0003259877515310586, |
| "loss": 3.2464, |
| "step": 78400 |
| }, |
| { |
| "epoch": 22.865205223880597, |
| "grad_norm": 0.423250675201416, |
| "learning_rate": 0.00032581277340332454, |
| "loss": 3.2473, |
| "step": 78450 |
| }, |
| { |
| "epoch": 22.87978078358209, |
| "grad_norm": 0.3985595703125, |
| "learning_rate": 0.0003256377952755905, |
| "loss": 3.235, |
| "step": 78500 |
| }, |
| { |
| "epoch": 22.894356343283583, |
| "grad_norm": 0.4058718979358673, |
| "learning_rate": 0.0003254628171478565, |
| "loss": 3.2463, |
| "step": 78550 |
| }, |
| { |
| "epoch": 22.908931902985074, |
| "grad_norm": 0.4019133150577545, |
| "learning_rate": 0.0003252878390201225, |
| "loss": 3.2384, |
| "step": 78600 |
| }, |
| { |
| "epoch": 22.923507462686565, |
| "grad_norm": 0.37895283102989197, |
| "learning_rate": 0.00032511286089238837, |
| "loss": 3.248, |
| "step": 78650 |
| }, |
| { |
| "epoch": 22.93808302238806, |
| "grad_norm": 0.42432135343551636, |
| "learning_rate": 0.00032493788276465437, |
| "loss": 3.2476, |
| "step": 78700 |
| }, |
| { |
| "epoch": 22.95265858208955, |
| "grad_norm": 0.3959765136241913, |
| "learning_rate": 0.00032476290463692036, |
| "loss": 3.2421, |
| "step": 78750 |
| }, |
| { |
| "epoch": 22.967234141791046, |
| "grad_norm": 0.40325257182121277, |
| "learning_rate": 0.00032458792650918636, |
| "loss": 3.2377, |
| "step": 78800 |
| }, |
| { |
| "epoch": 22.981809701492537, |
| "grad_norm": 0.38290831446647644, |
| "learning_rate": 0.00032441294838145225, |
| "loss": 3.2383, |
| "step": 78850 |
| }, |
| { |
| "epoch": 22.99638526119403, |
| "grad_norm": 0.45766332745552063, |
| "learning_rate": 0.00032423797025371825, |
| "loss": 3.2512, |
| "step": 78900 |
| }, |
| { |
| "epoch": 23.010785914179106, |
| "grad_norm": 0.38711297512054443, |
| "learning_rate": 0.00032406299212598425, |
| "loss": 3.1644, |
| "step": 78950 |
| }, |
| { |
| "epoch": 23.025361473880597, |
| "grad_norm": 0.4188516139984131, |
| "learning_rate": 0.0003238880139982502, |
| "loss": 3.1514, |
| "step": 79000 |
| }, |
| { |
| "epoch": 23.025361473880597, |
| "eval_accuracy": 0.3726237133222471, |
| "eval_loss": 3.5549256801605225, |
| "eval_runtime": 179.4078, |
| "eval_samples_per_second": 92.677, |
| "eval_steps_per_second": 5.797, |
| "step": 79000 |
| }, |
| { |
| "epoch": 23.03993703358209, |
| "grad_norm": 0.42309534549713135, |
| "learning_rate": 0.00032371303587051613, |
| "loss": 3.1538, |
| "step": 79050 |
| }, |
| { |
| "epoch": 23.054512593283583, |
| "grad_norm": 0.402117520570755, |
| "learning_rate": 0.00032353805774278213, |
| "loss": 3.1334, |
| "step": 79100 |
| }, |
| { |
| "epoch": 23.069088152985074, |
| "grad_norm": 0.41363510489463806, |
| "learning_rate": 0.00032336307961504813, |
| "loss": 3.1541, |
| "step": 79150 |
| }, |
| { |
| "epoch": 23.08366371268657, |
| "grad_norm": 0.40389856696128845, |
| "learning_rate": 0.0003231881014873141, |
| "loss": 3.1565, |
| "step": 79200 |
| }, |
| { |
| "epoch": 23.09823927238806, |
| "grad_norm": 0.37992772459983826, |
| "learning_rate": 0.00032301312335958, |
| "loss": 3.1577, |
| "step": 79250 |
| }, |
| { |
| "epoch": 23.11281483208955, |
| "grad_norm": 0.4105381667613983, |
| "learning_rate": 0.000322838145231846, |
| "loss": 3.1503, |
| "step": 79300 |
| }, |
| { |
| "epoch": 23.127390391791046, |
| "grad_norm": 0.3969649374485016, |
| "learning_rate": 0.00032266316710411196, |
| "loss": 3.1627, |
| "step": 79350 |
| }, |
| { |
| "epoch": 23.141965951492537, |
| "grad_norm": 0.4166598320007324, |
| "learning_rate": 0.0003224881889763779, |
| "loss": 3.1695, |
| "step": 79400 |
| }, |
| { |
| "epoch": 23.15654151119403, |
| "grad_norm": 0.4184854328632355, |
| "learning_rate": 0.0003223132108486439, |
| "loss": 3.1737, |
| "step": 79450 |
| }, |
| { |
| "epoch": 23.171117070895523, |
| "grad_norm": 0.392345666885376, |
| "learning_rate": 0.00032213823272090984, |
| "loss": 3.1738, |
| "step": 79500 |
| }, |
| { |
| "epoch": 23.185692630597014, |
| "grad_norm": 0.39596453309059143, |
| "learning_rate": 0.00032196325459317584, |
| "loss": 3.187, |
| "step": 79550 |
| }, |
| { |
| "epoch": 23.20026819029851, |
| "grad_norm": 0.3976131081581116, |
| "learning_rate": 0.0003217882764654418, |
| "loss": 3.174, |
| "step": 79600 |
| }, |
| { |
| "epoch": 23.21484375, |
| "grad_norm": 0.409440279006958, |
| "learning_rate": 0.0003216132983377077, |
| "loss": 3.1926, |
| "step": 79650 |
| }, |
| { |
| "epoch": 23.22941930970149, |
| "grad_norm": 0.43629515171051025, |
| "learning_rate": 0.0003214383202099737, |
| "loss": 3.173, |
| "step": 79700 |
| }, |
| { |
| "epoch": 23.243994869402986, |
| "grad_norm": 0.40185999870300293, |
| "learning_rate": 0.0003212633420822397, |
| "loss": 3.1817, |
| "step": 79750 |
| }, |
| { |
| "epoch": 23.258570429104477, |
| "grad_norm": 0.39592161774635315, |
| "learning_rate": 0.0003210883639545056, |
| "loss": 3.1945, |
| "step": 79800 |
| }, |
| { |
| "epoch": 23.27314598880597, |
| "grad_norm": 0.4166530668735504, |
| "learning_rate": 0.0003209133858267716, |
| "loss": 3.1855, |
| "step": 79850 |
| }, |
| { |
| "epoch": 23.287721548507463, |
| "grad_norm": 0.4228636920452118, |
| "learning_rate": 0.0003207384076990376, |
| "loss": 3.1971, |
| "step": 79900 |
| }, |
| { |
| "epoch": 23.302297108208954, |
| "grad_norm": 0.41409003734588623, |
| "learning_rate": 0.0003205634295713036, |
| "loss": 3.1935, |
| "step": 79950 |
| }, |
| { |
| "epoch": 23.31687266791045, |
| "grad_norm": 0.41710057854652405, |
| "learning_rate": 0.0003203884514435695, |
| "loss": 3.1818, |
| "step": 80000 |
| }, |
| { |
| "epoch": 23.31687266791045, |
| "eval_accuracy": 0.3723583067034179, |
| "eval_loss": 3.553128957748413, |
| "eval_runtime": 179.3491, |
| "eval_samples_per_second": 92.707, |
| "eval_steps_per_second": 5.799, |
| "step": 80000 |
| }, |
| { |
| "epoch": 23.33144822761194, |
| "grad_norm": 0.42076575756073, |
| "learning_rate": 0.0003202134733158355, |
| "loss": 3.1417, |
| "step": 80050 |
| }, |
| { |
| "epoch": 23.346023787313435, |
| "grad_norm": 0.3984379470348358, |
| "learning_rate": 0.0003200384951881015, |
| "loss": 3.1554, |
| "step": 80100 |
| }, |
| { |
| "epoch": 23.360599347014926, |
| "grad_norm": 0.42404845356941223, |
| "learning_rate": 0.00031986351706036743, |
| "loss": 3.1558, |
| "step": 80150 |
| }, |
| { |
| "epoch": 23.375174906716417, |
| "grad_norm": 0.4230729937553406, |
| "learning_rate": 0.0003196885389326334, |
| "loss": 3.1552, |
| "step": 80200 |
| }, |
| { |
| "epoch": 23.38975046641791, |
| "grad_norm": 0.4097963273525238, |
| "learning_rate": 0.0003195135608048994, |
| "loss": 3.1626, |
| "step": 80250 |
| }, |
| { |
| "epoch": 23.404326026119403, |
| "grad_norm": 0.4160501956939697, |
| "learning_rate": 0.0003193385826771653, |
| "loss": 3.1572, |
| "step": 80300 |
| }, |
| { |
| "epoch": 23.418901585820894, |
| "grad_norm": 0.39919206500053406, |
| "learning_rate": 0.00031916360454943126, |
| "loss": 3.1587, |
| "step": 80350 |
| }, |
| { |
| "epoch": 23.43347714552239, |
| "grad_norm": 0.3985087275505066, |
| "learning_rate": 0.00031898862642169726, |
| "loss": 3.1623, |
| "step": 80400 |
| }, |
| { |
| "epoch": 23.44805270522388, |
| "grad_norm": 0.40297672152519226, |
| "learning_rate": 0.00031881364829396326, |
| "loss": 3.176, |
| "step": 80450 |
| }, |
| { |
| "epoch": 23.462628264925375, |
| "grad_norm": 0.42020031809806824, |
| "learning_rate": 0.0003186386701662292, |
| "loss": 3.1747, |
| "step": 80500 |
| }, |
| { |
| "epoch": 23.477203824626866, |
| "grad_norm": 0.45471012592315674, |
| "learning_rate": 0.00031846369203849514, |
| "loss": 3.1711, |
| "step": 80550 |
| }, |
| { |
| "epoch": 23.491779384328357, |
| "grad_norm": 0.4380848705768585, |
| "learning_rate": 0.00031828871391076114, |
| "loss": 3.18, |
| "step": 80600 |
| }, |
| { |
| "epoch": 23.50635494402985, |
| "grad_norm": 0.4077417254447937, |
| "learning_rate": 0.0003181137357830271, |
| "loss": 3.1749, |
| "step": 80650 |
| }, |
| { |
| "epoch": 23.520930503731343, |
| "grad_norm": 0.41786298155784607, |
| "learning_rate": 0.0003179387576552931, |
| "loss": 3.1713, |
| "step": 80700 |
| }, |
| { |
| "epoch": 23.535506063432837, |
| "grad_norm": 0.4201817512512207, |
| "learning_rate": 0.000317763779527559, |
| "loss": 3.1766, |
| "step": 80750 |
| }, |
| { |
| "epoch": 23.55008162313433, |
| "grad_norm": 0.3985823690891266, |
| "learning_rate": 0.00031758880139982497, |
| "loss": 3.1784, |
| "step": 80800 |
| }, |
| { |
| "epoch": 23.56465718283582, |
| "grad_norm": 0.4040225148200989, |
| "learning_rate": 0.00031741382327209097, |
| "loss": 3.1852, |
| "step": 80850 |
| }, |
| { |
| "epoch": 23.579232742537314, |
| "grad_norm": 0.4432525634765625, |
| "learning_rate": 0.00031723884514435696, |
| "loss": 3.1815, |
| "step": 80900 |
| }, |
| { |
| "epoch": 23.593808302238806, |
| "grad_norm": 0.40925121307373047, |
| "learning_rate": 0.00031706386701662285, |
| "loss": 3.1782, |
| "step": 80950 |
| }, |
| { |
| "epoch": 23.608383861940297, |
| "grad_norm": 0.40709611773490906, |
| "learning_rate": 0.00031688888888888885, |
| "loss": 3.1943, |
| "step": 81000 |
| }, |
| { |
| "epoch": 23.608383861940297, |
| "eval_accuracy": 0.372661964830504, |
| "eval_loss": 3.5582470893859863, |
| "eval_runtime": 82.6312, |
| "eval_samples_per_second": 201.219, |
| "eval_steps_per_second": 12.586, |
| "step": 81000 |
| }, |
| { |
| "epoch": 23.62295942164179, |
| "grad_norm": 0.3898576498031616, |
| "learning_rate": 0.00031671391076115485, |
| "loss": 3.186, |
| "step": 81050 |
| }, |
| { |
| "epoch": 23.637534981343283, |
| "grad_norm": 0.4021594524383545, |
| "learning_rate": 0.00031653893263342085, |
| "loss": 3.2017, |
| "step": 81100 |
| }, |
| { |
| "epoch": 23.652110541044777, |
| "grad_norm": 0.42967769503593445, |
| "learning_rate": 0.00031636395450568674, |
| "loss": 3.2032, |
| "step": 81150 |
| }, |
| { |
| "epoch": 23.66668610074627, |
| "grad_norm": 0.4002310037612915, |
| "learning_rate": 0.00031618897637795273, |
| "loss": 3.197, |
| "step": 81200 |
| }, |
| { |
| "epoch": 23.68126166044776, |
| "grad_norm": 0.4102286398410797, |
| "learning_rate": 0.00031601399825021873, |
| "loss": 3.194, |
| "step": 81250 |
| }, |
| { |
| "epoch": 23.695837220149254, |
| "grad_norm": 0.3905355632305145, |
| "learning_rate": 0.0003158390201224846, |
| "loss": 3.2018, |
| "step": 81300 |
| }, |
| { |
| "epoch": 23.710412779850746, |
| "grad_norm": 0.4037608802318573, |
| "learning_rate": 0.0003156640419947506, |
| "loss": 3.1943, |
| "step": 81350 |
| }, |
| { |
| "epoch": 23.72498833955224, |
| "grad_norm": 0.44810715317726135, |
| "learning_rate": 0.0003154890638670166, |
| "loss": 3.1964, |
| "step": 81400 |
| }, |
| { |
| "epoch": 23.73956389925373, |
| "grad_norm": 0.4065055549144745, |
| "learning_rate": 0.00031531408573928256, |
| "loss": 3.2001, |
| "step": 81450 |
| }, |
| { |
| "epoch": 23.754139458955223, |
| "grad_norm": 0.39197850227355957, |
| "learning_rate": 0.0003151391076115485, |
| "loss": 3.1975, |
| "step": 81500 |
| }, |
| { |
| "epoch": 23.768715018656717, |
| "grad_norm": 0.4424740970134735, |
| "learning_rate": 0.0003149641294838145, |
| "loss": 3.1983, |
| "step": 81550 |
| }, |
| { |
| "epoch": 23.78329057835821, |
| "grad_norm": 0.42625612020492554, |
| "learning_rate": 0.00031478915135608044, |
| "loss": 3.2098, |
| "step": 81600 |
| }, |
| { |
| "epoch": 23.797866138059703, |
| "grad_norm": 0.39148834347724915, |
| "learning_rate": 0.00031461417322834644, |
| "loss": 3.204, |
| "step": 81650 |
| }, |
| { |
| "epoch": 23.812441697761194, |
| "grad_norm": 0.42868733406066895, |
| "learning_rate": 0.0003144391951006124, |
| "loss": 3.219, |
| "step": 81700 |
| }, |
| { |
| "epoch": 23.827017257462686, |
| "grad_norm": 0.43013501167297363, |
| "learning_rate": 0.0003142642169728784, |
| "loss": 3.2065, |
| "step": 81750 |
| }, |
| { |
| "epoch": 23.84159281716418, |
| "grad_norm": 0.40283554792404175, |
| "learning_rate": 0.0003140892388451443, |
| "loss": 3.2081, |
| "step": 81800 |
| }, |
| { |
| "epoch": 23.85616837686567, |
| "grad_norm": 0.421036034822464, |
| "learning_rate": 0.0003139142607174103, |
| "loss": 3.2085, |
| "step": 81850 |
| }, |
| { |
| "epoch": 23.870743936567163, |
| "grad_norm": 0.40139567852020264, |
| "learning_rate": 0.00031373928258967627, |
| "loss": 3.2001, |
| "step": 81900 |
| }, |
| { |
| "epoch": 23.885319496268657, |
| "grad_norm": 0.4077606797218323, |
| "learning_rate": 0.0003135643044619422, |
| "loss": 3.2076, |
| "step": 81950 |
| }, |
| { |
| "epoch": 23.89989505597015, |
| "grad_norm": 0.40206170082092285, |
| "learning_rate": 0.0003133893263342082, |
| "loss": 3.1961, |
| "step": 82000 |
| }, |
| { |
| "epoch": 23.89989505597015, |
| "eval_accuracy": 0.3726993924601216, |
| "eval_loss": 3.550748109817505, |
| "eval_runtime": 81.036, |
| "eval_samples_per_second": 205.18, |
| "eval_steps_per_second": 12.834, |
| "step": 82000 |
| }, |
| { |
| "epoch": 23.914470615671643, |
| "grad_norm": 0.43046948313713074, |
| "learning_rate": 0.00031321434820647415, |
| "loss": 3.2226, |
| "step": 82050 |
| }, |
| { |
| "epoch": 23.929046175373134, |
| "grad_norm": 0.39953580498695374, |
| "learning_rate": 0.0003130393700787401, |
| "loss": 3.2032, |
| "step": 82100 |
| }, |
| { |
| "epoch": 23.943621735074625, |
| "grad_norm": 0.3996121883392334, |
| "learning_rate": 0.0003128643919510061, |
| "loss": 3.2241, |
| "step": 82150 |
| }, |
| { |
| "epoch": 23.95819729477612, |
| "grad_norm": 0.38362568616867065, |
| "learning_rate": 0.0003126894138232721, |
| "loss": 3.211, |
| "step": 82200 |
| }, |
| { |
| "epoch": 23.97277285447761, |
| "grad_norm": 0.3902480900287628, |
| "learning_rate": 0.000312514435695538, |
| "loss": 3.2343, |
| "step": 82250 |
| }, |
| { |
| "epoch": 23.987348414179106, |
| "grad_norm": 0.37919190526008606, |
| "learning_rate": 0.000312339457567804, |
| "loss": 3.2164, |
| "step": 82300 |
| }, |
| { |
| "epoch": 24.00204057835821, |
| "grad_norm": 0.4392635226249695, |
| "learning_rate": 0.00031216447944007, |
| "loss": 3.2676, |
| "step": 82350 |
| }, |
| { |
| "epoch": 24.016616138059703, |
| "grad_norm": 0.43110543489456177, |
| "learning_rate": 0.000311989501312336, |
| "loss": 3.1351, |
| "step": 82400 |
| }, |
| { |
| "epoch": 24.031191697761194, |
| "grad_norm": 0.4461026191711426, |
| "learning_rate": 0.00031181452318460186, |
| "loss": 3.1418, |
| "step": 82450 |
| }, |
| { |
| "epoch": 24.045767257462686, |
| "grad_norm": 0.4295453429222107, |
| "learning_rate": 0.00031163954505686786, |
| "loss": 3.1488, |
| "step": 82500 |
| }, |
| { |
| "epoch": 24.06034281716418, |
| "grad_norm": 0.4268800616264343, |
| "learning_rate": 0.00031146456692913386, |
| "loss": 3.1561, |
| "step": 82550 |
| }, |
| { |
| "epoch": 24.07491837686567, |
| "grad_norm": 0.4661655128002167, |
| "learning_rate": 0.0003112895888013998, |
| "loss": 3.155, |
| "step": 82600 |
| }, |
| { |
| "epoch": 24.089493936567163, |
| "grad_norm": 0.4239647388458252, |
| "learning_rate": 0.00031111461067366575, |
| "loss": 3.1525, |
| "step": 82650 |
| }, |
| { |
| "epoch": 24.104069496268657, |
| "grad_norm": 0.4007156789302826, |
| "learning_rate": 0.00031093963254593174, |
| "loss": 3.1583, |
| "step": 82700 |
| }, |
| { |
| "epoch": 24.11864505597015, |
| "grad_norm": 0.4243454039096832, |
| "learning_rate": 0.0003107646544181977, |
| "loss": 3.148, |
| "step": 82750 |
| }, |
| { |
| "epoch": 24.133220615671643, |
| "grad_norm": 0.43287578225135803, |
| "learning_rate": 0.0003105896762904637, |
| "loss": 3.1769, |
| "step": 82800 |
| }, |
| { |
| "epoch": 24.147796175373134, |
| "grad_norm": 0.4111781418323517, |
| "learning_rate": 0.00031041469816272963, |
| "loss": 3.1748, |
| "step": 82850 |
| }, |
| { |
| "epoch": 24.162371735074625, |
| "grad_norm": 0.3968662917613983, |
| "learning_rate": 0.00031023972003499557, |
| "loss": 3.1692, |
| "step": 82900 |
| }, |
| { |
| "epoch": 24.17694729477612, |
| "grad_norm": 0.40891578793525696, |
| "learning_rate": 0.00031006474190726157, |
| "loss": 3.1799, |
| "step": 82950 |
| }, |
| { |
| "epoch": 24.19152285447761, |
| "grad_norm": 0.4242601990699768, |
| "learning_rate": 0.0003098897637795275, |
| "loss": 3.1603, |
| "step": 83000 |
| }, |
| { |
| "epoch": 24.19152285447761, |
| "eval_accuracy": 0.3723005175017128, |
| "eval_loss": 3.563079357147217, |
| "eval_runtime": 80.9798, |
| "eval_samples_per_second": 205.323, |
| "eval_steps_per_second": 12.843, |
| "step": 83000 |
| }, |
| { |
| "epoch": 24.206098414179106, |
| "grad_norm": 0.44086217880249023, |
| "learning_rate": 0.0003097147856517935, |
| "loss": 3.1747, |
| "step": 83050 |
| }, |
| { |
| "epoch": 24.220673973880597, |
| "grad_norm": 0.4039420485496521, |
| "learning_rate": 0.00030953980752405945, |
| "loss": 3.1845, |
| "step": 83100 |
| }, |
| { |
| "epoch": 24.23524953358209, |
| "grad_norm": 0.4216541051864624, |
| "learning_rate": 0.00030936482939632545, |
| "loss": 3.176, |
| "step": 83150 |
| }, |
| { |
| "epoch": 24.249825093283583, |
| "grad_norm": 0.41587021946907043, |
| "learning_rate": 0.0003091898512685914, |
| "loss": 3.1717, |
| "step": 83200 |
| }, |
| { |
| "epoch": 24.264400652985074, |
| "grad_norm": 0.404705286026001, |
| "learning_rate": 0.00030901487314085734, |
| "loss": 3.1731, |
| "step": 83250 |
| }, |
| { |
| "epoch": 24.278976212686565, |
| "grad_norm": 0.42235511541366577, |
| "learning_rate": 0.00030883989501312334, |
| "loss": 3.1763, |
| "step": 83300 |
| }, |
| { |
| "epoch": 24.29355177238806, |
| "grad_norm": 0.47101321816444397, |
| "learning_rate": 0.00030866491688538933, |
| "loss": 3.1887, |
| "step": 83350 |
| }, |
| { |
| "epoch": 24.30812733208955, |
| "grad_norm": 0.3986760675907135, |
| "learning_rate": 0.0003084899387576552, |
| "loss": 3.1907, |
| "step": 83400 |
| }, |
| { |
| "epoch": 24.322702891791046, |
| "grad_norm": 0.41290879249572754, |
| "learning_rate": 0.0003083149606299212, |
| "loss": 3.1921, |
| "step": 83450 |
| }, |
| { |
| "epoch": 24.337278451492537, |
| "grad_norm": 0.4050918519496918, |
| "learning_rate": 0.0003081399825021872, |
| "loss": 3.1869, |
| "step": 83500 |
| }, |
| { |
| "epoch": 24.35185401119403, |
| "grad_norm": 0.41474130749702454, |
| "learning_rate": 0.0003079650043744532, |
| "loss": 3.1767, |
| "step": 83550 |
| }, |
| { |
| "epoch": 24.366429570895523, |
| "grad_norm": 0.3922690451145172, |
| "learning_rate": 0.0003077900262467191, |
| "loss": 3.1937, |
| "step": 83600 |
| }, |
| { |
| "epoch": 24.381005130597014, |
| "grad_norm": 0.3993853032588959, |
| "learning_rate": 0.0003076150481189851, |
| "loss": 3.199, |
| "step": 83650 |
| }, |
| { |
| "epoch": 24.39558069029851, |
| "grad_norm": 0.41509875655174255, |
| "learning_rate": 0.0003074400699912511, |
| "loss": 3.1946, |
| "step": 83700 |
| }, |
| { |
| "epoch": 24.41015625, |
| "grad_norm": 0.40810710191726685, |
| "learning_rate": 0.00030726509186351704, |
| "loss": 3.19, |
| "step": 83750 |
| }, |
| { |
| "epoch": 24.42473180970149, |
| "grad_norm": 0.43359851837158203, |
| "learning_rate": 0.000307090113735783, |
| "loss": 3.1959, |
| "step": 83800 |
| }, |
| { |
| "epoch": 24.439307369402986, |
| "grad_norm": 0.4185155928134918, |
| "learning_rate": 0.000306915135608049, |
| "loss": 3.1878, |
| "step": 83850 |
| }, |
| { |
| "epoch": 24.453882929104477, |
| "grad_norm": 0.41894349455833435, |
| "learning_rate": 0.00030674015748031493, |
| "loss": 3.1987, |
| "step": 83900 |
| }, |
| { |
| "epoch": 24.46845848880597, |
| "grad_norm": 0.4216829538345337, |
| "learning_rate": 0.00030656517935258087, |
| "loss": 3.2036, |
| "step": 83950 |
| }, |
| { |
| "epoch": 24.483034048507463, |
| "grad_norm": 0.4279070496559143, |
| "learning_rate": 0.00030639020122484687, |
| "loss": 3.1921, |
| "step": 84000 |
| }, |
| { |
| "epoch": 24.483034048507463, |
| "eval_accuracy": 0.3727687159627781, |
| "eval_loss": 3.5538084506988525, |
| "eval_runtime": 82.2654, |
| "eval_samples_per_second": 202.114, |
| "eval_steps_per_second": 12.642, |
| "step": 84000 |
| }, |
| { |
| "epoch": 24.497609608208954, |
| "grad_norm": 0.3983027935028076, |
| "learning_rate": 0.0003062152230971128, |
| "loss": 3.208, |
| "step": 84050 |
| }, |
| { |
| "epoch": 24.51218516791045, |
| "grad_norm": 0.44559016823768616, |
| "learning_rate": 0.0003060402449693788, |
| "loss": 3.2071, |
| "step": 84100 |
| }, |
| { |
| "epoch": 24.52676072761194, |
| "grad_norm": 0.40929609537124634, |
| "learning_rate": 0.00030586526684164475, |
| "loss": 3.2088, |
| "step": 84150 |
| }, |
| { |
| "epoch": 24.541336287313435, |
| "grad_norm": 0.42126762866973877, |
| "learning_rate": 0.0003056902887139107, |
| "loss": 3.2002, |
| "step": 84200 |
| }, |
| { |
| "epoch": 24.555911847014926, |
| "grad_norm": 0.4303717017173767, |
| "learning_rate": 0.0003055153105861767, |
| "loss": 3.2064, |
| "step": 84250 |
| }, |
| { |
| "epoch": 24.570487406716417, |
| "grad_norm": 0.39315250515937805, |
| "learning_rate": 0.0003053403324584427, |
| "loss": 3.2003, |
| "step": 84300 |
| }, |
| { |
| "epoch": 24.58506296641791, |
| "grad_norm": 0.4045599699020386, |
| "learning_rate": 0.00030516535433070864, |
| "loss": 3.2225, |
| "step": 84350 |
| }, |
| { |
| "epoch": 24.599638526119403, |
| "grad_norm": 0.44561147689819336, |
| "learning_rate": 0.0003049903762029746, |
| "loss": 3.2121, |
| "step": 84400 |
| }, |
| { |
| "epoch": 24.614214085820894, |
| "grad_norm": 0.38626208901405334, |
| "learning_rate": 0.0003048153980752406, |
| "loss": 3.2035, |
| "step": 84450 |
| }, |
| { |
| "epoch": 24.62878964552239, |
| "grad_norm": 0.40461352467536926, |
| "learning_rate": 0.0003046404199475066, |
| "loss": 3.2219, |
| "step": 84500 |
| }, |
| { |
| "epoch": 24.64336520522388, |
| "grad_norm": 0.4504241943359375, |
| "learning_rate": 0.00030446544181977247, |
| "loss": 3.2145, |
| "step": 84550 |
| }, |
| { |
| "epoch": 24.657940764925375, |
| "grad_norm": 0.40991654992103577, |
| "learning_rate": 0.00030429046369203846, |
| "loss": 3.2162, |
| "step": 84600 |
| }, |
| { |
| "epoch": 24.672516324626866, |
| "grad_norm": 0.45852404832839966, |
| "learning_rate": 0.00030411548556430446, |
| "loss": 3.2053, |
| "step": 84650 |
| }, |
| { |
| "epoch": 24.687091884328357, |
| "grad_norm": 0.39378029108047485, |
| "learning_rate": 0.00030394050743657046, |
| "loss": 3.2044, |
| "step": 84700 |
| }, |
| { |
| "epoch": 24.70166744402985, |
| "grad_norm": 0.4019394814968109, |
| "learning_rate": 0.00030376552930883635, |
| "loss": 3.2135, |
| "step": 84750 |
| }, |
| { |
| "epoch": 24.716243003731343, |
| "grad_norm": 0.40645793080329895, |
| "learning_rate": 0.00030359055118110235, |
| "loss": 3.2192, |
| "step": 84800 |
| }, |
| { |
| "epoch": 24.730818563432837, |
| "grad_norm": 0.43606048822402954, |
| "learning_rate": 0.00030341557305336834, |
| "loss": 3.2137, |
| "step": 84850 |
| }, |
| { |
| "epoch": 24.74539412313433, |
| "grad_norm": 0.4315530061721802, |
| "learning_rate": 0.00030324059492563423, |
| "loss": 3.2111, |
| "step": 84900 |
| }, |
| { |
| "epoch": 24.75996968283582, |
| "grad_norm": 0.4235876500606537, |
| "learning_rate": 0.00030306561679790023, |
| "loss": 3.2289, |
| "step": 84950 |
| }, |
| { |
| "epoch": 24.774545242537314, |
| "grad_norm": 0.40020737051963806, |
| "learning_rate": 0.00030289063867016623, |
| "loss": 3.2218, |
| "step": 85000 |
| }, |
| { |
| "epoch": 24.774545242537314, |
| "eval_accuracy": 0.37339298057753184, |
| "eval_loss": 3.542487621307373, |
| "eval_runtime": 82.3315, |
| "eval_samples_per_second": 201.952, |
| "eval_steps_per_second": 12.632, |
| "step": 85000 |
| }, |
| { |
| "epoch": 24.789120802238806, |
| "grad_norm": 0.44500306248664856, |
| "learning_rate": 0.00030271566054243217, |
| "loss": 3.2194, |
| "step": 85050 |
| }, |
| { |
| "epoch": 24.803696361940297, |
| "grad_norm": 0.3997686207294464, |
| "learning_rate": 0.0003025406824146981, |
| "loss": 3.2125, |
| "step": 85100 |
| }, |
| { |
| "epoch": 24.81827192164179, |
| "grad_norm": 0.4181106686592102, |
| "learning_rate": 0.0003023657042869641, |
| "loss": 3.2152, |
| "step": 85150 |
| }, |
| { |
| "epoch": 24.832847481343283, |
| "grad_norm": 0.40245386958122253, |
| "learning_rate": 0.00030219072615923006, |
| "loss": 3.2156, |
| "step": 85200 |
| }, |
| { |
| "epoch": 24.847423041044777, |
| "grad_norm": 0.4032931625843048, |
| "learning_rate": 0.00030201574803149605, |
| "loss": 3.2211, |
| "step": 85250 |
| }, |
| { |
| "epoch": 24.86199860074627, |
| "grad_norm": 0.45445653796195984, |
| "learning_rate": 0.000301840769903762, |
| "loss": 3.2241, |
| "step": 85300 |
| }, |
| { |
| "epoch": 24.87657416044776, |
| "grad_norm": 0.41785797476768494, |
| "learning_rate": 0.00030166579177602794, |
| "loss": 3.2274, |
| "step": 85350 |
| }, |
| { |
| "epoch": 24.891149720149254, |
| "grad_norm": 0.42762765288352966, |
| "learning_rate": 0.00030149081364829394, |
| "loss": 3.2243, |
| "step": 85400 |
| }, |
| { |
| "epoch": 24.905725279850746, |
| "grad_norm": 0.4247051179409027, |
| "learning_rate": 0.00030131583552055994, |
| "loss": 3.2184, |
| "step": 85450 |
| }, |
| { |
| "epoch": 24.92030083955224, |
| "grad_norm": 0.4020553231239319, |
| "learning_rate": 0.0003011408573928258, |
| "loss": 3.219, |
| "step": 85500 |
| }, |
| { |
| "epoch": 24.93487639925373, |
| "grad_norm": 0.41240084171295166, |
| "learning_rate": 0.0003009658792650918, |
| "loss": 3.225, |
| "step": 85550 |
| }, |
| { |
| "epoch": 24.949451958955223, |
| "grad_norm": 0.4297904968261719, |
| "learning_rate": 0.0003007909011373578, |
| "loss": 3.2173, |
| "step": 85600 |
| }, |
| { |
| "epoch": 24.964027518656717, |
| "grad_norm": 0.41779932379722595, |
| "learning_rate": 0.00030061592300962376, |
| "loss": 3.2068, |
| "step": 85650 |
| }, |
| { |
| "epoch": 24.97860307835821, |
| "grad_norm": 0.40487775206565857, |
| "learning_rate": 0.0003004409448818897, |
| "loss": 3.2201, |
| "step": 85700 |
| }, |
| { |
| "epoch": 24.993178638059703, |
| "grad_norm": 0.3969780206680298, |
| "learning_rate": 0.0003002659667541557, |
| "loss": 3.2145, |
| "step": 85750 |
| }, |
| { |
| "epoch": 25.007579291044777, |
| "grad_norm": 0.44225025177001953, |
| "learning_rate": 0.0003000909886264217, |
| "loss": 3.1819, |
| "step": 85800 |
| }, |
| { |
| "epoch": 25.02215485074627, |
| "grad_norm": 0.42038342356681824, |
| "learning_rate": 0.00029991601049868765, |
| "loss": 3.1194, |
| "step": 85850 |
| }, |
| { |
| "epoch": 25.03673041044776, |
| "grad_norm": 0.42238423228263855, |
| "learning_rate": 0.0002997410323709536, |
| "loss": 3.1306, |
| "step": 85900 |
| }, |
| { |
| "epoch": 25.051305970149254, |
| "grad_norm": 0.4110691249370575, |
| "learning_rate": 0.0002995660542432196, |
| "loss": 3.1498, |
| "step": 85950 |
| }, |
| { |
| "epoch": 25.065881529850746, |
| "grad_norm": 0.38763201236724854, |
| "learning_rate": 0.00029939107611548553, |
| "loss": 3.1492, |
| "step": 86000 |
| }, |
| { |
| "epoch": 25.065881529850746, |
| "eval_accuracy": 0.3726318344116924, |
| "eval_loss": 3.5576071739196777, |
| "eval_runtime": 82.1603, |
| "eval_samples_per_second": 202.373, |
| "eval_steps_per_second": 12.658, |
| "step": 86000 |
| }, |
| { |
| "epoch": 25.08045708955224, |
| "grad_norm": 0.4149741232395172, |
| "learning_rate": 0.00029921609798775153, |
| "loss": 3.1375, |
| "step": 86050 |
| }, |
| { |
| "epoch": 25.09503264925373, |
| "grad_norm": 0.4215436279773712, |
| "learning_rate": 0.00029904111986001747, |
| "loss": 3.136, |
| "step": 86100 |
| }, |
| { |
| "epoch": 25.109608208955223, |
| "grad_norm": 0.41278237104415894, |
| "learning_rate": 0.00029886614173228347, |
| "loss": 3.1427, |
| "step": 86150 |
| }, |
| { |
| "epoch": 25.124183768656717, |
| "grad_norm": 0.40497392416000366, |
| "learning_rate": 0.0002986911636045494, |
| "loss": 3.1452, |
| "step": 86200 |
| }, |
| { |
| "epoch": 25.13875932835821, |
| "grad_norm": 0.42836251854896545, |
| "learning_rate": 0.0002985161854768154, |
| "loss": 3.1472, |
| "step": 86250 |
| }, |
| { |
| "epoch": 25.153334888059703, |
| "grad_norm": 0.43723875284194946, |
| "learning_rate": 0.00029834120734908135, |
| "loss": 3.1573, |
| "step": 86300 |
| }, |
| { |
| "epoch": 25.167910447761194, |
| "grad_norm": 0.4214979410171509, |
| "learning_rate": 0.0002981662292213473, |
| "loss": 3.1555, |
| "step": 86350 |
| }, |
| { |
| "epoch": 25.182486007462686, |
| "grad_norm": 0.4392107129096985, |
| "learning_rate": 0.0002979912510936133, |
| "loss": 3.1658, |
| "step": 86400 |
| }, |
| { |
| "epoch": 25.19706156716418, |
| "grad_norm": 0.481117308139801, |
| "learning_rate": 0.00029781627296587924, |
| "loss": 3.166, |
| "step": 86450 |
| }, |
| { |
| "epoch": 25.21163712686567, |
| "grad_norm": 0.43255847692489624, |
| "learning_rate": 0.0002976412948381452, |
| "loss": 3.1608, |
| "step": 86500 |
| }, |
| { |
| "epoch": 25.226212686567163, |
| "grad_norm": 0.4154725968837738, |
| "learning_rate": 0.0002974663167104112, |
| "loss": 3.166, |
| "step": 86550 |
| }, |
| { |
| "epoch": 25.240788246268657, |
| "grad_norm": 0.4099873900413513, |
| "learning_rate": 0.0002972913385826771, |
| "loss": 3.1486, |
| "step": 86600 |
| }, |
| { |
| "epoch": 25.25536380597015, |
| "grad_norm": 0.404623806476593, |
| "learning_rate": 0.00029711636045494307, |
| "loss": 3.1691, |
| "step": 86650 |
| }, |
| { |
| "epoch": 25.269939365671643, |
| "grad_norm": 0.4402453303337097, |
| "learning_rate": 0.00029694138232720906, |
| "loss": 3.1663, |
| "step": 86700 |
| }, |
| { |
| "epoch": 25.284514925373134, |
| "grad_norm": 0.4077349305152893, |
| "learning_rate": 0.000296766404199475, |
| "loss": 3.1617, |
| "step": 86750 |
| }, |
| { |
| "epoch": 25.299090485074625, |
| "grad_norm": 0.42283713817596436, |
| "learning_rate": 0.000296591426071741, |
| "loss": 3.1748, |
| "step": 86800 |
| }, |
| { |
| "epoch": 25.31366604477612, |
| "grad_norm": 0.4206119477748871, |
| "learning_rate": 0.00029641644794400695, |
| "loss": 3.1683, |
| "step": 86850 |
| }, |
| { |
| "epoch": 25.32824160447761, |
| "grad_norm": 0.4248672127723694, |
| "learning_rate": 0.00029624146981627295, |
| "loss": 3.1759, |
| "step": 86900 |
| }, |
| { |
| "epoch": 25.342817164179106, |
| "grad_norm": 0.4154689311981201, |
| "learning_rate": 0.0002960664916885389, |
| "loss": 3.1812, |
| "step": 86950 |
| }, |
| { |
| "epoch": 25.357392723880597, |
| "grad_norm": 0.4337415099143982, |
| "learning_rate": 0.0002958915135608049, |
| "loss": 3.1824, |
| "step": 87000 |
| }, |
| { |
| "epoch": 25.357392723880597, |
| "eval_accuracy": 0.37313086947326024, |
| "eval_loss": 3.550499200820923, |
| "eval_runtime": 82.2769, |
| "eval_samples_per_second": 202.086, |
| "eval_steps_per_second": 12.64, |
| "step": 87000 |
| }, |
| { |
| "epoch": 25.37196828358209, |
| "grad_norm": 0.39931657910346985, |
| "learning_rate": 0.00029571653543307083, |
| "loss": 3.1751, |
| "step": 87050 |
| }, |
| { |
| "epoch": 25.386543843283583, |
| "grad_norm": 0.429696649312973, |
| "learning_rate": 0.00029554155730533683, |
| "loss": 3.1885, |
| "step": 87100 |
| }, |
| { |
| "epoch": 25.401119402985074, |
| "grad_norm": 0.41605037450790405, |
| "learning_rate": 0.0002953665791776028, |
| "loss": 3.1804, |
| "step": 87150 |
| }, |
| { |
| "epoch": 25.415694962686565, |
| "grad_norm": 0.4128526449203491, |
| "learning_rate": 0.00029519160104986877, |
| "loss": 3.1737, |
| "step": 87200 |
| }, |
| { |
| "epoch": 25.43027052238806, |
| "grad_norm": 0.4425789713859558, |
| "learning_rate": 0.0002950166229221347, |
| "loss": 3.1946, |
| "step": 87250 |
| }, |
| { |
| "epoch": 25.44484608208955, |
| "grad_norm": 0.4459783732891083, |
| "learning_rate": 0.0002948416447944007, |
| "loss": 3.1741, |
| "step": 87300 |
| }, |
| { |
| "epoch": 25.459421641791046, |
| "grad_norm": 0.44101038575172424, |
| "learning_rate": 0.00029466666666666666, |
| "loss": 3.1864, |
| "step": 87350 |
| }, |
| { |
| "epoch": 25.473997201492537, |
| "grad_norm": 0.4281041920185089, |
| "learning_rate": 0.0002944916885389326, |
| "loss": 3.1938, |
| "step": 87400 |
| }, |
| { |
| "epoch": 25.48857276119403, |
| "grad_norm": 0.4300467073917389, |
| "learning_rate": 0.0002943167104111986, |
| "loss": 3.1894, |
| "step": 87450 |
| }, |
| { |
| "epoch": 25.503148320895523, |
| "grad_norm": 0.4109787940979004, |
| "learning_rate": 0.00029414173228346454, |
| "loss": 3.206, |
| "step": 87500 |
| }, |
| { |
| "epoch": 25.517723880597014, |
| "grad_norm": 0.4145699143409729, |
| "learning_rate": 0.00029396675415573054, |
| "loss": 3.1949, |
| "step": 87550 |
| }, |
| { |
| "epoch": 25.53229944029851, |
| "grad_norm": 0.4070979654788971, |
| "learning_rate": 0.0002937917760279965, |
| "loss": 3.189, |
| "step": 87600 |
| }, |
| { |
| "epoch": 25.546875, |
| "grad_norm": 0.43455639481544495, |
| "learning_rate": 0.0002936167979002624, |
| "loss": 3.1924, |
| "step": 87650 |
| }, |
| { |
| "epoch": 25.56145055970149, |
| "grad_norm": 0.4693121910095215, |
| "learning_rate": 0.0002934418197725284, |
| "loss": 3.1972, |
| "step": 87700 |
| }, |
| { |
| "epoch": 25.576026119402986, |
| "grad_norm": 0.40905654430389404, |
| "learning_rate": 0.00029326684164479437, |
| "loss": 3.1879, |
| "step": 87750 |
| }, |
| { |
| "epoch": 25.590601679104477, |
| "grad_norm": 0.399454265832901, |
| "learning_rate": 0.0002930918635170603, |
| "loss": 3.2004, |
| "step": 87800 |
| }, |
| { |
| "epoch": 25.60517723880597, |
| "grad_norm": 0.4239746034145355, |
| "learning_rate": 0.0002929168853893263, |
| "loss": 3.2113, |
| "step": 87850 |
| }, |
| { |
| "epoch": 25.619752798507463, |
| "grad_norm": 0.4122619330883026, |
| "learning_rate": 0.00029274190726159225, |
| "loss": 3.2005, |
| "step": 87900 |
| }, |
| { |
| "epoch": 25.634328358208954, |
| "grad_norm": 0.42600637674331665, |
| "learning_rate": 0.00029256692913385825, |
| "loss": 3.1965, |
| "step": 87950 |
| }, |
| { |
| "epoch": 25.64890391791045, |
| "grad_norm": 0.43111228942871094, |
| "learning_rate": 0.0002923919510061242, |
| "loss": 3.201, |
| "step": 88000 |
| }, |
| { |
| "epoch": 25.64890391791045, |
| "eval_accuracy": 0.37345865547478535, |
| "eval_loss": 3.546783924102783, |
| "eval_runtime": 82.2087, |
| "eval_samples_per_second": 202.254, |
| "eval_steps_per_second": 12.651, |
| "step": 88000 |
| }, |
| { |
| "epoch": 25.66347947761194, |
| "grad_norm": 0.4582037329673767, |
| "learning_rate": 0.0002922169728783902, |
| "loss": 3.2048, |
| "step": 88050 |
| }, |
| { |
| "epoch": 25.678055037313435, |
| "grad_norm": 0.4343770742416382, |
| "learning_rate": 0.00029204199475065613, |
| "loss": 3.1926, |
| "step": 88100 |
| }, |
| { |
| "epoch": 25.692630597014926, |
| "grad_norm": 0.4335322678089142, |
| "learning_rate": 0.00029186701662292213, |
| "loss": 3.1958, |
| "step": 88150 |
| }, |
| { |
| "epoch": 25.707206156716417, |
| "grad_norm": 0.41262751817703247, |
| "learning_rate": 0.0002916920384951881, |
| "loss": 3.2129, |
| "step": 88200 |
| }, |
| { |
| "epoch": 25.72178171641791, |
| "grad_norm": 0.42446255683898926, |
| "learning_rate": 0.00029151706036745407, |
| "loss": 3.2049, |
| "step": 88250 |
| }, |
| { |
| "epoch": 25.736357276119403, |
| "grad_norm": 0.4038170278072357, |
| "learning_rate": 0.00029134208223972, |
| "loss": 3.2032, |
| "step": 88300 |
| }, |
| { |
| "epoch": 25.750932835820894, |
| "grad_norm": 0.4331628382205963, |
| "learning_rate": 0.00029116710411198596, |
| "loss": 3.2015, |
| "step": 88350 |
| }, |
| { |
| "epoch": 25.76550839552239, |
| "grad_norm": 0.42481672763824463, |
| "learning_rate": 0.00029099212598425196, |
| "loss": 3.211, |
| "step": 88400 |
| }, |
| { |
| "epoch": 25.78008395522388, |
| "grad_norm": 0.4009307622909546, |
| "learning_rate": 0.0002908171478565179, |
| "loss": 3.2169, |
| "step": 88450 |
| }, |
| { |
| "epoch": 25.794659514925375, |
| "grad_norm": 0.4292212724685669, |
| "learning_rate": 0.0002906421697287839, |
| "loss": 3.2051, |
| "step": 88500 |
| }, |
| { |
| "epoch": 25.809235074626866, |
| "grad_norm": 0.4238194525241852, |
| "learning_rate": 0.00029046719160104984, |
| "loss": 3.2081, |
| "step": 88550 |
| }, |
| { |
| "epoch": 25.823810634328357, |
| "grad_norm": 0.45732471346855164, |
| "learning_rate": 0.00029029221347331584, |
| "loss": 3.2074, |
| "step": 88600 |
| }, |
| { |
| "epoch": 25.83838619402985, |
| "grad_norm": 0.41441765427589417, |
| "learning_rate": 0.0002901172353455818, |
| "loss": 3.2065, |
| "step": 88650 |
| }, |
| { |
| "epoch": 25.852961753731343, |
| "grad_norm": 0.40707236528396606, |
| "learning_rate": 0.0002899422572178477, |
| "loss": 3.2126, |
| "step": 88700 |
| }, |
| { |
| "epoch": 25.867537313432837, |
| "grad_norm": 0.4202680289745331, |
| "learning_rate": 0.0002897672790901137, |
| "loss": 3.2131, |
| "step": 88750 |
| }, |
| { |
| "epoch": 25.88211287313433, |
| "grad_norm": 0.41881847381591797, |
| "learning_rate": 0.00028959230096237967, |
| "loss": 3.2088, |
| "step": 88800 |
| }, |
| { |
| "epoch": 25.89668843283582, |
| "grad_norm": 0.39432790875434875, |
| "learning_rate": 0.00028941732283464566, |
| "loss": 3.2116, |
| "step": 88850 |
| }, |
| { |
| "epoch": 25.911263992537314, |
| "grad_norm": 0.41786453127861023, |
| "learning_rate": 0.0002892423447069116, |
| "loss": 3.2152, |
| "step": 88900 |
| }, |
| { |
| "epoch": 25.925839552238806, |
| "grad_norm": 0.4230845272541046, |
| "learning_rate": 0.00028906736657917755, |
| "loss": 3.2125, |
| "step": 88950 |
| }, |
| { |
| "epoch": 25.940415111940297, |
| "grad_norm": 0.42243847250938416, |
| "learning_rate": 0.00028889238845144355, |
| "loss": 3.2114, |
| "step": 89000 |
| }, |
| { |
| "epoch": 25.940415111940297, |
| "eval_accuracy": 0.3738018597765618, |
| "eval_loss": 3.5390872955322266, |
| "eval_runtime": 82.2336, |
| "eval_samples_per_second": 202.192, |
| "eval_steps_per_second": 12.647, |
| "step": 89000 |
| }, |
| { |
| "epoch": 25.95499067164179, |
| "grad_norm": 0.42381802201271057, |
| "learning_rate": 0.0002887174103237095, |
| "loss": 3.2058, |
| "step": 89050 |
| }, |
| { |
| "epoch": 25.969566231343283, |
| "grad_norm": 0.45048123598098755, |
| "learning_rate": 0.0002885424321959755, |
| "loss": 3.2144, |
| "step": 89100 |
| }, |
| { |
| "epoch": 25.984141791044777, |
| "grad_norm": 0.40106144547462463, |
| "learning_rate": 0.00028836745406824143, |
| "loss": 3.2167, |
| "step": 89150 |
| }, |
| { |
| "epoch": 25.99871735074627, |
| "grad_norm": 0.44207993149757385, |
| "learning_rate": 0.00028819247594050743, |
| "loss": 3.2194, |
| "step": 89200 |
| }, |
| { |
| "epoch": 26.013118003731343, |
| "grad_norm": 0.440739244222641, |
| "learning_rate": 0.0002880174978127734, |
| "loss": 3.1396, |
| "step": 89250 |
| }, |
| { |
| "epoch": 26.027693563432837, |
| "grad_norm": 0.49055179953575134, |
| "learning_rate": 0.0002878425196850393, |
| "loss": 3.1256, |
| "step": 89300 |
| }, |
| { |
| "epoch": 26.04226912313433, |
| "grad_norm": 0.41953858733177185, |
| "learning_rate": 0.0002876675415573053, |
| "loss": 3.1163, |
| "step": 89350 |
| }, |
| { |
| "epoch": 26.05684468283582, |
| "grad_norm": 0.4146621525287628, |
| "learning_rate": 0.00028749256342957126, |
| "loss": 3.1292, |
| "step": 89400 |
| }, |
| { |
| "epoch": 26.071420242537314, |
| "grad_norm": 0.42104142904281616, |
| "learning_rate": 0.00028731758530183726, |
| "loss": 3.1225, |
| "step": 89450 |
| }, |
| { |
| "epoch": 26.085995802238806, |
| "grad_norm": 0.4284878373146057, |
| "learning_rate": 0.0002871426071741032, |
| "loss": 3.1216, |
| "step": 89500 |
| }, |
| { |
| "epoch": 26.100571361940297, |
| "grad_norm": 0.4166851341724396, |
| "learning_rate": 0.0002869676290463692, |
| "loss": 3.1399, |
| "step": 89550 |
| }, |
| { |
| "epoch": 26.11514692164179, |
| "grad_norm": 0.4328397214412689, |
| "learning_rate": 0.00028679265091863514, |
| "loss": 3.1384, |
| "step": 89600 |
| }, |
| { |
| "epoch": 26.129722481343283, |
| "grad_norm": 0.4359130859375, |
| "learning_rate": 0.00028661767279090114, |
| "loss": 3.1342, |
| "step": 89650 |
| }, |
| { |
| "epoch": 26.144298041044777, |
| "grad_norm": 0.4311699867248535, |
| "learning_rate": 0.0002864426946631671, |
| "loss": 3.14, |
| "step": 89700 |
| }, |
| { |
| "epoch": 26.15887360074627, |
| "grad_norm": 0.41819536685943604, |
| "learning_rate": 0.0002862677165354331, |
| "loss": 3.1558, |
| "step": 89750 |
| }, |
| { |
| "epoch": 26.17344916044776, |
| "grad_norm": 0.44823819398880005, |
| "learning_rate": 0.000286092738407699, |
| "loss": 3.1426, |
| "step": 89800 |
| }, |
| { |
| "epoch": 26.188024720149254, |
| "grad_norm": 0.4144200384616852, |
| "learning_rate": 0.00028591776027996497, |
| "loss": 3.1473, |
| "step": 89850 |
| }, |
| { |
| "epoch": 26.202600279850746, |
| "grad_norm": 0.4270482659339905, |
| "learning_rate": 0.00028574278215223097, |
| "loss": 3.1601, |
| "step": 89900 |
| }, |
| { |
| "epoch": 26.21717583955224, |
| "grad_norm": 0.4241775870323181, |
| "learning_rate": 0.0002855678040244969, |
| "loss": 3.1421, |
| "step": 89950 |
| }, |
| { |
| "epoch": 26.23175139925373, |
| "grad_norm": 0.4102860987186432, |
| "learning_rate": 0.00028539282589676285, |
| "loss": 3.1724, |
| "step": 90000 |
| }, |
| { |
| "epoch": 26.23175139925373, |
| "eval_accuracy": 0.37293608102352094, |
| "eval_loss": 3.5586137771606445, |
| "eval_runtime": 82.1555, |
| "eval_samples_per_second": 202.384, |
| "eval_steps_per_second": 12.659, |
| "step": 90000 |
| }, |
| { |
| "epoch": 26.246326958955223, |
| "grad_norm": 0.4413115084171295, |
| "learning_rate": 0.00028521784776902885, |
| "loss": 3.1418, |
| "step": 90050 |
| }, |
| { |
| "epoch": 26.260902518656717, |
| "grad_norm": 0.4471030533313751, |
| "learning_rate": 0.0002850428696412948, |
| "loss": 3.1503, |
| "step": 90100 |
| }, |
| { |
| "epoch": 26.27547807835821, |
| "grad_norm": 0.41441798210144043, |
| "learning_rate": 0.00028486789151356074, |
| "loss": 3.1568, |
| "step": 90150 |
| }, |
| { |
| "epoch": 26.290053638059703, |
| "grad_norm": 0.40715059638023376, |
| "learning_rate": 0.00028469291338582673, |
| "loss": 3.1696, |
| "step": 90200 |
| }, |
| { |
| "epoch": 26.304629197761194, |
| "grad_norm": 0.4507633149623871, |
| "learning_rate": 0.0002845179352580927, |
| "loss": 3.166, |
| "step": 90250 |
| }, |
| { |
| "epoch": 26.319204757462686, |
| "grad_norm": 0.4515264332294464, |
| "learning_rate": 0.0002843429571303587, |
| "loss": 3.1736, |
| "step": 90300 |
| }, |
| { |
| "epoch": 26.33378031716418, |
| "grad_norm": 0.45111826062202454, |
| "learning_rate": 0.0002841679790026246, |
| "loss": 3.1645, |
| "step": 90350 |
| }, |
| { |
| "epoch": 26.34835587686567, |
| "grad_norm": 0.4295303225517273, |
| "learning_rate": 0.0002839930008748906, |
| "loss": 3.1699, |
| "step": 90400 |
| }, |
| { |
| "epoch": 26.362931436567163, |
| "grad_norm": 0.4211674928665161, |
| "learning_rate": 0.00028381802274715656, |
| "loss": 3.175, |
| "step": 90450 |
| }, |
| { |
| "epoch": 26.377506996268657, |
| "grad_norm": 0.4354601800441742, |
| "learning_rate": 0.00028364304461942256, |
| "loss": 3.1635, |
| "step": 90500 |
| }, |
| { |
| "epoch": 26.39208255597015, |
| "grad_norm": 0.44711926579475403, |
| "learning_rate": 0.0002834680664916885, |
| "loss": 3.1796, |
| "step": 90550 |
| }, |
| { |
| "epoch": 26.406658115671643, |
| "grad_norm": 0.43272531032562256, |
| "learning_rate": 0.0002832930883639545, |
| "loss": 3.161, |
| "step": 90600 |
| }, |
| { |
| "epoch": 26.421233675373134, |
| "grad_norm": 0.4213297963142395, |
| "learning_rate": 0.00028311811023622044, |
| "loss": 3.1676, |
| "step": 90650 |
| }, |
| { |
| "epoch": 26.435809235074625, |
| "grad_norm": 0.4430896043777466, |
| "learning_rate": 0.00028294313210848644, |
| "loss": 3.185, |
| "step": 90700 |
| }, |
| { |
| "epoch": 26.45038479477612, |
| "grad_norm": 0.4318443834781647, |
| "learning_rate": 0.0002827681539807524, |
| "loss": 3.1677, |
| "step": 90750 |
| }, |
| { |
| "epoch": 26.46496035447761, |
| "grad_norm": 0.42491278052330017, |
| "learning_rate": 0.0002825931758530184, |
| "loss": 3.1867, |
| "step": 90800 |
| }, |
| { |
| "epoch": 26.479535914179106, |
| "grad_norm": 0.42508071660995483, |
| "learning_rate": 0.0002824181977252843, |
| "loss": 3.1799, |
| "step": 90850 |
| }, |
| { |
| "epoch": 26.494111473880597, |
| "grad_norm": 0.46238982677459717, |
| "learning_rate": 0.0002822432195975503, |
| "loss": 3.1668, |
| "step": 90900 |
| }, |
| { |
| "epoch": 26.50868703358209, |
| "grad_norm": 0.43022239208221436, |
| "learning_rate": 0.00028206824146981627, |
| "loss": 3.1945, |
| "step": 90950 |
| }, |
| { |
| "epoch": 26.523262593283583, |
| "grad_norm": 0.44237610697746277, |
| "learning_rate": 0.0002818932633420822, |
| "loss": 3.1831, |
| "step": 91000 |
| }, |
| { |
| "epoch": 26.523262593283583, |
| "eval_accuracy": 0.3733205969542148, |
| "eval_loss": 3.5528080463409424, |
| "eval_runtime": 82.3087, |
| "eval_samples_per_second": 202.008, |
| "eval_steps_per_second": 12.635, |
| "step": 91000 |
| }, |
| { |
| "epoch": 26.537838152985074, |
| "grad_norm": 0.4143082797527313, |
| "learning_rate": 0.0002817182852143482, |
| "loss": 3.1941, |
| "step": 91050 |
| }, |
| { |
| "epoch": 26.552413712686565, |
| "grad_norm": 0.4099169373512268, |
| "learning_rate": 0.00028154330708661415, |
| "loss": 3.1927, |
| "step": 91100 |
| }, |
| { |
| "epoch": 26.56698927238806, |
| "grad_norm": 0.4147176742553711, |
| "learning_rate": 0.0002813683289588801, |
| "loss": 3.1883, |
| "step": 91150 |
| }, |
| { |
| "epoch": 26.58156483208955, |
| "grad_norm": 0.4103856682777405, |
| "learning_rate": 0.0002811933508311461, |
| "loss": 3.196, |
| "step": 91200 |
| }, |
| { |
| "epoch": 26.596140391791046, |
| "grad_norm": 0.4222527742385864, |
| "learning_rate": 0.00028101837270341204, |
| "loss": 3.1797, |
| "step": 91250 |
| }, |
| { |
| "epoch": 26.610715951492537, |
| "grad_norm": 0.41271039843559265, |
| "learning_rate": 0.000280843394575678, |
| "loss": 3.196, |
| "step": 91300 |
| }, |
| { |
| "epoch": 26.62529151119403, |
| "grad_norm": 0.42893654108047485, |
| "learning_rate": 0.000280668416447944, |
| "loss": 3.1862, |
| "step": 91350 |
| }, |
| { |
| "epoch": 26.639867070895523, |
| "grad_norm": 0.4422917068004608, |
| "learning_rate": 0.0002804934383202099, |
| "loss": 3.1866, |
| "step": 91400 |
| }, |
| { |
| "epoch": 26.654442630597014, |
| "grad_norm": 0.4190319776535034, |
| "learning_rate": 0.0002803184601924759, |
| "loss": 3.1852, |
| "step": 91450 |
| }, |
| { |
| "epoch": 26.66901819029851, |
| "grad_norm": 0.4198403060436249, |
| "learning_rate": 0.00028014348206474186, |
| "loss": 3.198, |
| "step": 91500 |
| }, |
| { |
| "epoch": 26.68359375, |
| "grad_norm": 0.4143039286136627, |
| "learning_rate": 0.00027996850393700786, |
| "loss": 3.1877, |
| "step": 91550 |
| }, |
| { |
| "epoch": 26.69816930970149, |
| "grad_norm": 0.4511352479457855, |
| "learning_rate": 0.0002797935258092738, |
| "loss": 3.1815, |
| "step": 91600 |
| }, |
| { |
| "epoch": 26.712744869402986, |
| "grad_norm": 0.427504301071167, |
| "learning_rate": 0.0002796185476815398, |
| "loss": 3.1959, |
| "step": 91650 |
| }, |
| { |
| "epoch": 26.727320429104477, |
| "grad_norm": 0.45200663805007935, |
| "learning_rate": 0.00027944356955380574, |
| "loss": 3.1907, |
| "step": 91700 |
| }, |
| { |
| "epoch": 26.74189598880597, |
| "grad_norm": 0.4165610671043396, |
| "learning_rate": 0.00027926859142607174, |
| "loss": 3.1929, |
| "step": 91750 |
| }, |
| { |
| "epoch": 26.756471548507463, |
| "grad_norm": 0.4394925832748413, |
| "learning_rate": 0.0002790936132983377, |
| "loss": 3.1997, |
| "step": 91800 |
| }, |
| { |
| "epoch": 26.771047108208954, |
| "grad_norm": 0.39829355478286743, |
| "learning_rate": 0.0002789186351706037, |
| "loss": 3.1901, |
| "step": 91850 |
| }, |
| { |
| "epoch": 26.78562266791045, |
| "grad_norm": 0.4587573707103729, |
| "learning_rate": 0.0002787436570428696, |
| "loss": 3.2027, |
| "step": 91900 |
| }, |
| { |
| "epoch": 26.80019822761194, |
| "grad_norm": 0.44898635149002075, |
| "learning_rate": 0.00027856867891513557, |
| "loss": 3.1916, |
| "step": 91950 |
| }, |
| { |
| "epoch": 26.814773787313435, |
| "grad_norm": 0.4305431544780731, |
| "learning_rate": 0.00027839370078740157, |
| "loss": 3.2083, |
| "step": 92000 |
| }, |
| { |
| "epoch": 26.814773787313435, |
| "eval_accuracy": 0.37361601629490715, |
| "eval_loss": 3.544487953186035, |
| "eval_runtime": 82.1477, |
| "eval_samples_per_second": 202.404, |
| "eval_steps_per_second": 12.66, |
| "step": 92000 |
| }, |
| { |
| "epoch": 26.829349347014926, |
| "grad_norm": 0.44197413325309753, |
| "learning_rate": 0.0002782187226596675, |
| "loss": 3.2056, |
| "step": 92050 |
| }, |
| { |
| "epoch": 26.843924906716417, |
| "grad_norm": 0.40853598713874817, |
| "learning_rate": 0.0002780437445319335, |
| "loss": 3.2028, |
| "step": 92100 |
| }, |
| { |
| "epoch": 26.85850046641791, |
| "grad_norm": 0.42029932141304016, |
| "learning_rate": 0.00027786876640419945, |
| "loss": 3.1965, |
| "step": 92150 |
| }, |
| { |
| "epoch": 26.873076026119403, |
| "grad_norm": 0.44294652342796326, |
| "learning_rate": 0.00027769378827646545, |
| "loss": 3.2025, |
| "step": 92200 |
| }, |
| { |
| "epoch": 26.887651585820894, |
| "grad_norm": 0.44561460614204407, |
| "learning_rate": 0.0002775188101487314, |
| "loss": 3.2205, |
| "step": 92250 |
| }, |
| { |
| "epoch": 26.90222714552239, |
| "grad_norm": 0.3962169289588928, |
| "learning_rate": 0.00027734383202099734, |
| "loss": 3.1954, |
| "step": 92300 |
| }, |
| { |
| "epoch": 26.91680270522388, |
| "grad_norm": 0.41185733675956726, |
| "learning_rate": 0.00027716885389326333, |
| "loss": 3.2069, |
| "step": 92350 |
| }, |
| { |
| "epoch": 26.931378264925375, |
| "grad_norm": 0.4333236515522003, |
| "learning_rate": 0.0002769938757655293, |
| "loss": 3.1982, |
| "step": 92400 |
| }, |
| { |
| "epoch": 26.945953824626866, |
| "grad_norm": 0.4122789204120636, |
| "learning_rate": 0.0002768188976377952, |
| "loss": 3.2089, |
| "step": 92450 |
| }, |
| { |
| "epoch": 26.960529384328357, |
| "grad_norm": 0.4543864130973816, |
| "learning_rate": 0.0002766439195100612, |
| "loss": 3.2081, |
| "step": 92500 |
| }, |
| { |
| "epoch": 26.97510494402985, |
| "grad_norm": 0.39395999908447266, |
| "learning_rate": 0.00027646894138232716, |
| "loss": 3.21, |
| "step": 92550 |
| }, |
| { |
| "epoch": 26.989680503731343, |
| "grad_norm": 0.42809590697288513, |
| "learning_rate": 0.00027629396325459316, |
| "loss": 3.2076, |
| "step": 92600 |
| }, |
| { |
| "epoch": 27.004081156716417, |
| "grad_norm": 0.420581579208374, |
| "learning_rate": 0.0002761189851268591, |
| "loss": 3.1817, |
| "step": 92650 |
| }, |
| { |
| "epoch": 27.01865671641791, |
| "grad_norm": 0.4725584387779236, |
| "learning_rate": 0.0002759440069991251, |
| "loss": 3.0986, |
| "step": 92700 |
| }, |
| { |
| "epoch": 27.033232276119403, |
| "grad_norm": 0.43019962310791016, |
| "learning_rate": 0.00027576902887139105, |
| "loss": 3.1164, |
| "step": 92750 |
| }, |
| { |
| "epoch": 27.047807835820894, |
| "grad_norm": 0.44071322679519653, |
| "learning_rate": 0.000275594050743657, |
| "loss": 3.1244, |
| "step": 92800 |
| }, |
| { |
| "epoch": 27.06238339552239, |
| "grad_norm": 0.4143284857273102, |
| "learning_rate": 0.000275419072615923, |
| "loss": 3.1148, |
| "step": 92850 |
| }, |
| { |
| "epoch": 27.07695895522388, |
| "grad_norm": 0.43484529852867126, |
| "learning_rate": 0.00027524409448818893, |
| "loss": 3.1153, |
| "step": 92900 |
| }, |
| { |
| "epoch": 27.091534514925375, |
| "grad_norm": 0.4603308439254761, |
| "learning_rate": 0.00027506911636045493, |
| "loss": 3.1139, |
| "step": 92950 |
| }, |
| { |
| "epoch": 27.106110074626866, |
| "grad_norm": 0.4302753210067749, |
| "learning_rate": 0.00027489413823272087, |
| "loss": 3.1286, |
| "step": 93000 |
| }, |
| { |
| "epoch": 27.106110074626866, |
| "eval_accuracy": 0.37318289152448975, |
| "eval_loss": 3.5575826168060303, |
| "eval_runtime": 82.136, |
| "eval_samples_per_second": 202.433, |
| "eval_steps_per_second": 12.662, |
| "step": 93000 |
| }, |
| { |
| "epoch": 27.120685634328357, |
| "grad_norm": 0.4867883622646332, |
| "learning_rate": 0.00027471916010498687, |
| "loss": 3.1335, |
| "step": 93050 |
| }, |
| { |
| "epoch": 27.13526119402985, |
| "grad_norm": 0.4239311218261719, |
| "learning_rate": 0.0002745441819772528, |
| "loss": 3.1383, |
| "step": 93100 |
| }, |
| { |
| "epoch": 27.149836753731343, |
| "grad_norm": 0.44329917430877686, |
| "learning_rate": 0.0002743692038495188, |
| "loss": 3.1343, |
| "step": 93150 |
| }, |
| { |
| "epoch": 27.164412313432837, |
| "grad_norm": 0.47699853777885437, |
| "learning_rate": 0.00027419422572178475, |
| "loss": 3.1357, |
| "step": 93200 |
| }, |
| { |
| "epoch": 27.17898787313433, |
| "grad_norm": 0.4764823317527771, |
| "learning_rate": 0.00027401924759405075, |
| "loss": 3.1396, |
| "step": 93250 |
| }, |
| { |
| "epoch": 27.19356343283582, |
| "grad_norm": 0.4707601070404053, |
| "learning_rate": 0.0002738442694663167, |
| "loss": 3.1536, |
| "step": 93300 |
| }, |
| { |
| "epoch": 27.208138992537314, |
| "grad_norm": 0.42642828822135925, |
| "learning_rate": 0.00027366929133858264, |
| "loss": 3.1399, |
| "step": 93350 |
| }, |
| { |
| "epoch": 27.222714552238806, |
| "grad_norm": 0.4552763104438782, |
| "learning_rate": 0.00027349431321084864, |
| "loss": 3.1456, |
| "step": 93400 |
| }, |
| { |
| "epoch": 27.237290111940297, |
| "grad_norm": 0.4463469684123993, |
| "learning_rate": 0.0002733193350831146, |
| "loss": 3.1482, |
| "step": 93450 |
| }, |
| { |
| "epoch": 27.25186567164179, |
| "grad_norm": 0.47506290674209595, |
| "learning_rate": 0.0002731443569553806, |
| "loss": 3.1453, |
| "step": 93500 |
| }, |
| { |
| "epoch": 27.266441231343283, |
| "grad_norm": 0.4418078660964966, |
| "learning_rate": 0.0002729693788276465, |
| "loss": 3.1503, |
| "step": 93550 |
| }, |
| { |
| "epoch": 27.281016791044777, |
| "grad_norm": 0.4288392663002014, |
| "learning_rate": 0.00027279440069991246, |
| "loss": 3.1563, |
| "step": 93600 |
| }, |
| { |
| "epoch": 27.29559235074627, |
| "grad_norm": 0.4385755956172943, |
| "learning_rate": 0.00027261942257217846, |
| "loss": 3.1549, |
| "step": 93650 |
| }, |
| { |
| "epoch": 27.31016791044776, |
| "grad_norm": 0.4316536486148834, |
| "learning_rate": 0.0002724444444444444, |
| "loss": 3.163, |
| "step": 93700 |
| }, |
| { |
| "epoch": 27.324743470149254, |
| "grad_norm": 0.4343269467353821, |
| "learning_rate": 0.00027226946631671035, |
| "loss": 3.1476, |
| "step": 93750 |
| }, |
| { |
| "epoch": 27.339319029850746, |
| "grad_norm": 0.45259353518486023, |
| "learning_rate": 0.00027209448818897635, |
| "loss": 3.1591, |
| "step": 93800 |
| }, |
| { |
| "epoch": 27.35389458955224, |
| "grad_norm": 0.44386130571365356, |
| "learning_rate": 0.0002719195100612423, |
| "loss": 3.1696, |
| "step": 93850 |
| }, |
| { |
| "epoch": 27.36847014925373, |
| "grad_norm": 0.43997707962989807, |
| "learning_rate": 0.0002717445319335083, |
| "loss": 3.1696, |
| "step": 93900 |
| }, |
| { |
| "epoch": 27.383045708955223, |
| "grad_norm": 0.4574432671070099, |
| "learning_rate": 0.00027156955380577423, |
| "loss": 3.1684, |
| "step": 93950 |
| }, |
| { |
| "epoch": 27.397621268656717, |
| "grad_norm": 0.41324666142463684, |
| "learning_rate": 0.00027139457567804023, |
| "loss": 3.1577, |
| "step": 94000 |
| }, |
| { |
| "epoch": 27.397621268656717, |
| "eval_accuracy": 0.37325468666306433, |
| "eval_loss": 3.5531723499298096, |
| "eval_runtime": 81.4446, |
| "eval_samples_per_second": 204.151, |
| "eval_steps_per_second": 12.769, |
| "step": 94000 |
| }, |
| { |
| "epoch": 27.41219682835821, |
| "grad_norm": 0.4798290431499481, |
| "learning_rate": 0.00027121959755030617, |
| "loss": 3.1687, |
| "step": 94050 |
| }, |
| { |
| "epoch": 27.426772388059703, |
| "grad_norm": 0.4669507145881653, |
| "learning_rate": 0.00027104461942257217, |
| "loss": 3.1611, |
| "step": 94100 |
| }, |
| { |
| "epoch": 27.441347947761194, |
| "grad_norm": 0.6186783909797668, |
| "learning_rate": 0.0002708696412948381, |
| "loss": 3.1633, |
| "step": 94150 |
| }, |
| { |
| "epoch": 27.455923507462686, |
| "grad_norm": 0.44731244444847107, |
| "learning_rate": 0.0002706946631671041, |
| "loss": 3.1642, |
| "step": 94200 |
| }, |
| { |
| "epoch": 27.47049906716418, |
| "grad_norm": 0.4521823227405548, |
| "learning_rate": 0.00027051968503937005, |
| "loss": 3.1794, |
| "step": 94250 |
| }, |
| { |
| "epoch": 27.48507462686567, |
| "grad_norm": 0.4513274133205414, |
| "learning_rate": 0.00027034470691163605, |
| "loss": 3.1605, |
| "step": 94300 |
| }, |
| { |
| "epoch": 27.499650186567163, |
| "grad_norm": 0.44645029306411743, |
| "learning_rate": 0.000270169728783902, |
| "loss": 3.1658, |
| "step": 94350 |
| }, |
| { |
| "epoch": 27.514225746268657, |
| "grad_norm": 0.42641523480415344, |
| "learning_rate": 0.000269994750656168, |
| "loss": 3.1763, |
| "step": 94400 |
| }, |
| { |
| "epoch": 27.52880130597015, |
| "grad_norm": 0.48925453424453735, |
| "learning_rate": 0.00026981977252843394, |
| "loss": 3.1741, |
| "step": 94450 |
| }, |
| { |
| "epoch": 27.543376865671643, |
| "grad_norm": 0.40931984782218933, |
| "learning_rate": 0.0002696447944006999, |
| "loss": 3.1692, |
| "step": 94500 |
| }, |
| { |
| "epoch": 27.557952425373134, |
| "grad_norm": 0.4484209716320038, |
| "learning_rate": 0.0002694698162729659, |
| "loss": 3.1979, |
| "step": 94550 |
| }, |
| { |
| "epoch": 27.572527985074625, |
| "grad_norm": 0.4323810636997223, |
| "learning_rate": 0.0002692948381452318, |
| "loss": 3.1729, |
| "step": 94600 |
| }, |
| { |
| "epoch": 27.58710354477612, |
| "grad_norm": 0.43009236454963684, |
| "learning_rate": 0.00026911986001749776, |
| "loss": 3.1841, |
| "step": 94650 |
| }, |
| { |
| "epoch": 27.60167910447761, |
| "grad_norm": 0.4245539605617523, |
| "learning_rate": 0.00026894488188976376, |
| "loss": 3.1874, |
| "step": 94700 |
| }, |
| { |
| "epoch": 27.616254664179106, |
| "grad_norm": 0.42876526713371277, |
| "learning_rate": 0.0002687699037620297, |
| "loss": 3.184, |
| "step": 94750 |
| }, |
| { |
| "epoch": 27.630830223880597, |
| "grad_norm": 0.4382181167602539, |
| "learning_rate": 0.0002685949256342957, |
| "loss": 3.182, |
| "step": 94800 |
| }, |
| { |
| "epoch": 27.64540578358209, |
| "grad_norm": 0.4394516050815582, |
| "learning_rate": 0.00026841994750656165, |
| "loss": 3.1818, |
| "step": 94850 |
| }, |
| { |
| "epoch": 27.659981343283583, |
| "grad_norm": 0.4382858872413635, |
| "learning_rate": 0.0002682449693788276, |
| "loss": 3.1715, |
| "step": 94900 |
| }, |
| { |
| "epoch": 27.674556902985074, |
| "grad_norm": 0.4355790317058563, |
| "learning_rate": 0.0002680699912510936, |
| "loss": 3.188, |
| "step": 94950 |
| }, |
| { |
| "epoch": 27.689132462686565, |
| "grad_norm": 0.44806766510009766, |
| "learning_rate": 0.00026789501312335953, |
| "loss": 3.1856, |
| "step": 95000 |
| }, |
| { |
| "epoch": 27.689132462686565, |
| "eval_accuracy": 0.37393956520628685, |
| "eval_loss": 3.54496693611145, |
| "eval_runtime": 81.364, |
| "eval_samples_per_second": 204.353, |
| "eval_steps_per_second": 12.782, |
| "step": 95000 |
| }, |
| { |
| "epoch": 27.70370802238806, |
| "grad_norm": 0.4786415696144104, |
| "learning_rate": 0.00026772003499562553, |
| "loss": 3.1954, |
| "step": 95050 |
| }, |
| { |
| "epoch": 27.71828358208955, |
| "grad_norm": 0.43282392621040344, |
| "learning_rate": 0.0002675450568678915, |
| "loss": 3.1868, |
| "step": 95100 |
| }, |
| { |
| "epoch": 27.732859141791046, |
| "grad_norm": 0.4446009695529938, |
| "learning_rate": 0.00026737007874015747, |
| "loss": 3.1852, |
| "step": 95150 |
| }, |
| { |
| "epoch": 27.747434701492537, |
| "grad_norm": 0.4616422951221466, |
| "learning_rate": 0.0002671951006124234, |
| "loss": 3.1974, |
| "step": 95200 |
| }, |
| { |
| "epoch": 27.76201026119403, |
| "grad_norm": 0.40891557931900024, |
| "learning_rate": 0.0002670201224846894, |
| "loss": 3.1842, |
| "step": 95250 |
| }, |
| { |
| "epoch": 27.776585820895523, |
| "grad_norm": 0.42072343826293945, |
| "learning_rate": 0.00026684514435695536, |
| "loss": 3.1819, |
| "step": 95300 |
| }, |
| { |
| "epoch": 27.791161380597014, |
| "grad_norm": 0.4344806671142578, |
| "learning_rate": 0.00026667016622922135, |
| "loss": 3.1801, |
| "step": 95350 |
| }, |
| { |
| "epoch": 27.80573694029851, |
| "grad_norm": 0.44849181175231934, |
| "learning_rate": 0.0002664951881014873, |
| "loss": 3.1914, |
| "step": 95400 |
| }, |
| { |
| "epoch": 27.8203125, |
| "grad_norm": 0.4324977695941925, |
| "learning_rate": 0.0002663202099737533, |
| "loss": 3.1815, |
| "step": 95450 |
| }, |
| { |
| "epoch": 27.83488805970149, |
| "grad_norm": 0.4600885510444641, |
| "learning_rate": 0.00026614523184601924, |
| "loss": 3.198, |
| "step": 95500 |
| }, |
| { |
| "epoch": 27.849463619402986, |
| "grad_norm": 0.43553200364112854, |
| "learning_rate": 0.0002659702537182852, |
| "loss": 3.1871, |
| "step": 95550 |
| }, |
| { |
| "epoch": 27.864039179104477, |
| "grad_norm": 0.4483141601085663, |
| "learning_rate": 0.0002657952755905512, |
| "loss": 3.2024, |
| "step": 95600 |
| }, |
| { |
| "epoch": 27.87861473880597, |
| "grad_norm": 0.4489576518535614, |
| "learning_rate": 0.0002656202974628171, |
| "loss": 3.1831, |
| "step": 95650 |
| }, |
| { |
| "epoch": 27.893190298507463, |
| "grad_norm": 0.4157513380050659, |
| "learning_rate": 0.0002654453193350831, |
| "loss": 3.1921, |
| "step": 95700 |
| }, |
| { |
| "epoch": 27.907765858208954, |
| "grad_norm": 0.41209346055984497, |
| "learning_rate": 0.00026527034120734906, |
| "loss": 3.1953, |
| "step": 95750 |
| }, |
| { |
| "epoch": 27.92234141791045, |
| "grad_norm": 0.4335402250289917, |
| "learning_rate": 0.000265095363079615, |
| "loss": 3.1873, |
| "step": 95800 |
| }, |
| { |
| "epoch": 27.93691697761194, |
| "grad_norm": 0.4793190658092499, |
| "learning_rate": 0.000264920384951881, |
| "loss": 3.1879, |
| "step": 95850 |
| }, |
| { |
| "epoch": 27.951492537313435, |
| "grad_norm": 0.43012735247612, |
| "learning_rate": 0.00026474540682414695, |
| "loss": 3.1829, |
| "step": 95900 |
| }, |
| { |
| "epoch": 27.966068097014926, |
| "grad_norm": 0.47469058632850647, |
| "learning_rate": 0.0002645704286964129, |
| "loss": 3.2055, |
| "step": 95950 |
| }, |
| { |
| "epoch": 27.980643656716417, |
| "grad_norm": 0.4229576289653778, |
| "learning_rate": 0.0002643954505686789, |
| "loss": 3.211, |
| "step": 96000 |
| }, |
| { |
| "epoch": 27.980643656716417, |
| "eval_accuracy": 0.37441765021102474, |
| "eval_loss": 3.5370290279388428, |
| "eval_runtime": 81.4563, |
| "eval_samples_per_second": 204.122, |
| "eval_steps_per_second": 12.768, |
| "step": 96000 |
| }, |
| { |
| "epoch": 27.99521921641791, |
| "grad_norm": 0.42650726437568665, |
| "learning_rate": 0.00026422047244094483, |
| "loss": 3.2048, |
| "step": 96050 |
| }, |
| { |
| "epoch": 28.009619869402986, |
| "grad_norm": 0.4413430094718933, |
| "learning_rate": 0.00026404549431321083, |
| "loss": 3.13, |
| "step": 96100 |
| }, |
| { |
| "epoch": 28.024195429104477, |
| "grad_norm": 0.4401211738586426, |
| "learning_rate": 0.0002638705161854768, |
| "loss": 3.1019, |
| "step": 96150 |
| }, |
| { |
| "epoch": 28.03877098880597, |
| "grad_norm": 0.4243675470352173, |
| "learning_rate": 0.00026369553805774277, |
| "loss": 3.109, |
| "step": 96200 |
| }, |
| { |
| "epoch": 28.053346548507463, |
| "grad_norm": 0.44328224658966064, |
| "learning_rate": 0.0002635205599300087, |
| "loss": 3.1023, |
| "step": 96250 |
| }, |
| { |
| "epoch": 28.067922108208954, |
| "grad_norm": 0.4328586459159851, |
| "learning_rate": 0.0002633455818022747, |
| "loss": 3.1122, |
| "step": 96300 |
| }, |
| { |
| "epoch": 28.08249766791045, |
| "grad_norm": 0.4224425256252289, |
| "learning_rate": 0.00026317060367454066, |
| "loss": 3.1039, |
| "step": 96350 |
| }, |
| { |
| "epoch": 28.09707322761194, |
| "grad_norm": 0.43421924114227295, |
| "learning_rate": 0.0002629956255468066, |
| "loss": 3.1199, |
| "step": 96400 |
| }, |
| { |
| "epoch": 28.11164878731343, |
| "grad_norm": 0.43485110998153687, |
| "learning_rate": 0.0002628206474190726, |
| "loss": 3.1217, |
| "step": 96450 |
| }, |
| { |
| "epoch": 28.126224347014926, |
| "grad_norm": 0.4528997838497162, |
| "learning_rate": 0.00026264566929133854, |
| "loss": 3.1155, |
| "step": 96500 |
| }, |
| { |
| "epoch": 28.140799906716417, |
| "grad_norm": 0.4375012516975403, |
| "learning_rate": 0.00026247069116360454, |
| "loss": 3.1368, |
| "step": 96550 |
| }, |
| { |
| "epoch": 28.15537546641791, |
| "grad_norm": 0.4785500764846802, |
| "learning_rate": 0.0002622957130358705, |
| "loss": 3.1282, |
| "step": 96600 |
| }, |
| { |
| "epoch": 28.169951026119403, |
| "grad_norm": 0.4440264403820038, |
| "learning_rate": 0.0002621207349081365, |
| "loss": 3.1369, |
| "step": 96650 |
| }, |
| { |
| "epoch": 28.184526585820894, |
| "grad_norm": 0.4505453109741211, |
| "learning_rate": 0.0002619457567804024, |
| "loss": 3.1321, |
| "step": 96700 |
| }, |
| { |
| "epoch": 28.19910214552239, |
| "grad_norm": 0.42353829741477966, |
| "learning_rate": 0.0002617707786526684, |
| "loss": 3.1444, |
| "step": 96750 |
| }, |
| { |
| "epoch": 28.21367770522388, |
| "grad_norm": 0.4531719386577606, |
| "learning_rate": 0.00026159580052493436, |
| "loss": 3.1437, |
| "step": 96800 |
| }, |
| { |
| "epoch": 28.228253264925375, |
| "grad_norm": 0.4176923632621765, |
| "learning_rate": 0.00026142082239720036, |
| "loss": 3.1305, |
| "step": 96850 |
| }, |
| { |
| "epoch": 28.242828824626866, |
| "grad_norm": 0.4476938247680664, |
| "learning_rate": 0.0002612458442694663, |
| "loss": 3.1518, |
| "step": 96900 |
| }, |
| { |
| "epoch": 28.257404384328357, |
| "grad_norm": 0.4284321665763855, |
| "learning_rate": 0.00026107086614173225, |
| "loss": 3.1459, |
| "step": 96950 |
| }, |
| { |
| "epoch": 28.27197994402985, |
| "grad_norm": 0.4615371823310852, |
| "learning_rate": 0.00026089588801399825, |
| "loss": 3.1377, |
| "step": 97000 |
| }, |
| { |
| "epoch": 28.27197994402985, |
| "eval_accuracy": 0.37340392639374076, |
| "eval_loss": 3.5557925701141357, |
| "eval_runtime": 81.4099, |
| "eval_samples_per_second": 204.238, |
| "eval_steps_per_second": 12.775, |
| "step": 97000 |
| }, |
| { |
| "epoch": 28.286555503731343, |
| "grad_norm": 0.4477929472923279, |
| "learning_rate": 0.0002607209098862642, |
| "loss": 3.1388, |
| "step": 97050 |
| }, |
| { |
| "epoch": 28.301131063432837, |
| "grad_norm": 0.46377018094062805, |
| "learning_rate": 0.00026054593175853013, |
| "loss": 3.1423, |
| "step": 97100 |
| }, |
| { |
| "epoch": 28.31570662313433, |
| "grad_norm": 0.4375309944152832, |
| "learning_rate": 0.00026037095363079613, |
| "loss": 3.1502, |
| "step": 97150 |
| }, |
| { |
| "epoch": 28.33028218283582, |
| "grad_norm": 0.42167800664901733, |
| "learning_rate": 0.0002601959755030621, |
| "loss": 3.1636, |
| "step": 97200 |
| }, |
| { |
| "epoch": 28.344857742537314, |
| "grad_norm": 0.4603038430213928, |
| "learning_rate": 0.00026002099737532807, |
| "loss": 3.1435, |
| "step": 97250 |
| }, |
| { |
| "epoch": 28.359433302238806, |
| "grad_norm": 0.46064409613609314, |
| "learning_rate": 0.000259846019247594, |
| "loss": 3.1646, |
| "step": 97300 |
| }, |
| { |
| "epoch": 28.374008861940297, |
| "grad_norm": 0.4541272819042206, |
| "learning_rate": 0.00025967104111985996, |
| "loss": 3.1524, |
| "step": 97350 |
| }, |
| { |
| "epoch": 28.38858442164179, |
| "grad_norm": 0.45432737469673157, |
| "learning_rate": 0.00025949606299212596, |
| "loss": 3.1483, |
| "step": 97400 |
| }, |
| { |
| "epoch": 28.403159981343283, |
| "grad_norm": 0.4208422899246216, |
| "learning_rate": 0.0002593210848643919, |
| "loss": 3.148, |
| "step": 97450 |
| }, |
| { |
| "epoch": 28.417735541044777, |
| "grad_norm": 0.44372475147247314, |
| "learning_rate": 0.0002591461067366579, |
| "loss": 3.1474, |
| "step": 97500 |
| }, |
| { |
| "epoch": 28.43231110074627, |
| "grad_norm": 0.4374051094055176, |
| "learning_rate": 0.00025897112860892384, |
| "loss": 3.163, |
| "step": 97550 |
| }, |
| { |
| "epoch": 28.44688666044776, |
| "grad_norm": 0.44096410274505615, |
| "learning_rate": 0.00025879615048118984, |
| "loss": 3.1587, |
| "step": 97600 |
| }, |
| { |
| "epoch": 28.461462220149254, |
| "grad_norm": 0.47338542342185974, |
| "learning_rate": 0.0002586211723534558, |
| "loss": 3.1599, |
| "step": 97650 |
| }, |
| { |
| "epoch": 28.476037779850746, |
| "grad_norm": 0.4479065239429474, |
| "learning_rate": 0.0002584461942257218, |
| "loss": 3.1591, |
| "step": 97700 |
| }, |
| { |
| "epoch": 28.49061333955224, |
| "grad_norm": 0.46635714173316956, |
| "learning_rate": 0.0002582712160979877, |
| "loss": 3.1686, |
| "step": 97750 |
| }, |
| { |
| "epoch": 28.50518889925373, |
| "grad_norm": 0.4383736550807953, |
| "learning_rate": 0.0002580962379702537, |
| "loss": 3.1631, |
| "step": 97800 |
| }, |
| { |
| "epoch": 28.519764458955223, |
| "grad_norm": 0.46040958166122437, |
| "learning_rate": 0.00025792125984251967, |
| "loss": 3.1537, |
| "step": 97850 |
| }, |
| { |
| "epoch": 28.534340018656717, |
| "grad_norm": 0.43736448884010315, |
| "learning_rate": 0.00025774628171478566, |
| "loss": 3.1609, |
| "step": 97900 |
| }, |
| { |
| "epoch": 28.54891557835821, |
| "grad_norm": 0.4490288496017456, |
| "learning_rate": 0.0002575713035870516, |
| "loss": 3.1711, |
| "step": 97950 |
| }, |
| { |
| "epoch": 28.563491138059703, |
| "grad_norm": 0.450336754322052, |
| "learning_rate": 0.00025739632545931755, |
| "loss": 3.1703, |
| "step": 98000 |
| }, |
| { |
| "epoch": 28.563491138059703, |
| "eval_accuracy": 0.37376537372253205, |
| "eval_loss": 3.5499653816223145, |
| "eval_runtime": 81.4058, |
| "eval_samples_per_second": 204.248, |
| "eval_steps_per_second": 12.776, |
| "step": 98000 |
| }, |
| { |
| "epoch": 28.578066697761194, |
| "grad_norm": 0.47243165969848633, |
| "learning_rate": 0.00025722134733158355, |
| "loss": 3.1704, |
| "step": 98050 |
| }, |
| { |
| "epoch": 28.592642257462686, |
| "grad_norm": 0.5251398682594299, |
| "learning_rate": 0.0002570463692038495, |
| "loss": 3.1747, |
| "step": 98100 |
| }, |
| { |
| "epoch": 28.60721781716418, |
| "grad_norm": 0.46638402342796326, |
| "learning_rate": 0.0002568713910761155, |
| "loss": 3.1742, |
| "step": 98150 |
| }, |
| { |
| "epoch": 28.62179337686567, |
| "grad_norm": 0.4190913140773773, |
| "learning_rate": 0.00025669641294838143, |
| "loss": 3.1682, |
| "step": 98200 |
| }, |
| { |
| "epoch": 28.636368936567163, |
| "grad_norm": 0.47515663504600525, |
| "learning_rate": 0.0002565214348206474, |
| "loss": 3.1723, |
| "step": 98250 |
| }, |
| { |
| "epoch": 28.650944496268657, |
| "grad_norm": 0.4323152005672455, |
| "learning_rate": 0.0002563464566929134, |
| "loss": 3.1577, |
| "step": 98300 |
| }, |
| { |
| "epoch": 28.66552005597015, |
| "grad_norm": 0.4200900197029114, |
| "learning_rate": 0.0002561714785651793, |
| "loss": 3.1871, |
| "step": 98350 |
| }, |
| { |
| "epoch": 28.680095615671643, |
| "grad_norm": 0.45298445224761963, |
| "learning_rate": 0.00025599650043744526, |
| "loss": 3.1659, |
| "step": 98400 |
| }, |
| { |
| "epoch": 28.694671175373134, |
| "grad_norm": 0.4306495785713196, |
| "learning_rate": 0.00025582152230971126, |
| "loss": 3.1648, |
| "step": 98450 |
| }, |
| { |
| "epoch": 28.709246735074625, |
| "grad_norm": 0.4497992992401123, |
| "learning_rate": 0.0002556465441819772, |
| "loss": 3.1659, |
| "step": 98500 |
| }, |
| { |
| "epoch": 28.72382229477612, |
| "grad_norm": 0.4279501438140869, |
| "learning_rate": 0.0002554715660542432, |
| "loss": 3.1657, |
| "step": 98550 |
| }, |
| { |
| "epoch": 28.73839785447761, |
| "grad_norm": 0.438489705324173, |
| "learning_rate": 0.00025529658792650914, |
| "loss": 3.1907, |
| "step": 98600 |
| }, |
| { |
| "epoch": 28.752973414179106, |
| "grad_norm": 0.42497625946998596, |
| "learning_rate": 0.00025512160979877514, |
| "loss": 3.1752, |
| "step": 98650 |
| }, |
| { |
| "epoch": 28.767548973880597, |
| "grad_norm": 0.46575412154197693, |
| "learning_rate": 0.0002549466316710411, |
| "loss": 3.177, |
| "step": 98700 |
| }, |
| { |
| "epoch": 28.78212453358209, |
| "grad_norm": 0.4322008788585663, |
| "learning_rate": 0.0002547716535433071, |
| "loss": 3.1806, |
| "step": 98750 |
| }, |
| { |
| "epoch": 28.796700093283583, |
| "grad_norm": 0.45955249667167664, |
| "learning_rate": 0.000254596675415573, |
| "loss": 3.1863, |
| "step": 98800 |
| }, |
| { |
| "epoch": 28.811275652985074, |
| "grad_norm": 0.44759657979011536, |
| "learning_rate": 0.000254421697287839, |
| "loss": 3.1943, |
| "step": 98850 |
| }, |
| { |
| "epoch": 28.825851212686565, |
| "grad_norm": 0.4237118065357208, |
| "learning_rate": 0.00025424671916010497, |
| "loss": 3.1861, |
| "step": 98900 |
| }, |
| { |
| "epoch": 28.84042677238806, |
| "grad_norm": 0.4191685914993286, |
| "learning_rate": 0.00025407174103237096, |
| "loss": 3.1886, |
| "step": 98950 |
| }, |
| { |
| "epoch": 28.85500233208955, |
| "grad_norm": 0.45205187797546387, |
| "learning_rate": 0.0002538967629046369, |
| "loss": 3.1761, |
| "step": 99000 |
| }, |
| { |
| "epoch": 28.85500233208955, |
| "eval_accuracy": 0.37389130945740884, |
| "eval_loss": 3.5445897579193115, |
| "eval_runtime": 81.4173, |
| "eval_samples_per_second": 204.22, |
| "eval_steps_per_second": 12.774, |
| "step": 99000 |
| }, |
| { |
| "epoch": 28.869577891791046, |
| "grad_norm": 0.4248482584953308, |
| "learning_rate": 0.0002537217847769029, |
| "loss": 3.1808, |
| "step": 99050 |
| }, |
| { |
| "epoch": 28.884153451492537, |
| "grad_norm": 0.44186240434646606, |
| "learning_rate": 0.00025354680664916885, |
| "loss": 3.1812, |
| "step": 99100 |
| }, |
| { |
| "epoch": 28.89872901119403, |
| "grad_norm": 0.46802234649658203, |
| "learning_rate": 0.0002533718285214348, |
| "loss": 3.1865, |
| "step": 99150 |
| }, |
| { |
| "epoch": 28.913304570895523, |
| "grad_norm": 0.4645196795463562, |
| "learning_rate": 0.0002531968503937008, |
| "loss": 3.1839, |
| "step": 99200 |
| }, |
| { |
| "epoch": 28.927880130597014, |
| "grad_norm": 0.4409431219100952, |
| "learning_rate": 0.00025302187226596673, |
| "loss": 3.1905, |
| "step": 99250 |
| }, |
| { |
| "epoch": 28.94245569029851, |
| "grad_norm": 0.45034196972846985, |
| "learning_rate": 0.0002528468941382327, |
| "loss": 3.188, |
| "step": 99300 |
| }, |
| { |
| "epoch": 28.95703125, |
| "grad_norm": 0.4407288730144501, |
| "learning_rate": 0.0002526719160104987, |
| "loss": 3.1961, |
| "step": 99350 |
| }, |
| { |
| "epoch": 28.97160680970149, |
| "grad_norm": 0.42619070410728455, |
| "learning_rate": 0.0002524969378827646, |
| "loss": 3.1995, |
| "step": 99400 |
| }, |
| { |
| "epoch": 28.986182369402986, |
| "grad_norm": 0.4555329382419586, |
| "learning_rate": 0.0002523219597550306, |
| "loss": 3.1866, |
| "step": 99450 |
| }, |
| { |
| "epoch": 29.00058302238806, |
| "grad_norm": 0.46135783195495605, |
| "learning_rate": 0.00025214698162729656, |
| "loss": 3.1917, |
| "step": 99500 |
| }, |
| { |
| "epoch": 29.01515858208955, |
| "grad_norm": 0.444700688123703, |
| "learning_rate": 0.0002519720034995625, |
| "loss": 3.0955, |
| "step": 99550 |
| }, |
| { |
| "epoch": 29.029734141791046, |
| "grad_norm": 0.44991371035575867, |
| "learning_rate": 0.0002517970253718285, |
| "loss": 3.0921, |
| "step": 99600 |
| }, |
| { |
| "epoch": 29.044309701492537, |
| "grad_norm": 0.4495794475078583, |
| "learning_rate": 0.00025162204724409444, |
| "loss": 3.105, |
| "step": 99650 |
| }, |
| { |
| "epoch": 29.05888526119403, |
| "grad_norm": 0.4699917435646057, |
| "learning_rate": 0.00025144706911636044, |
| "loss": 3.0964, |
| "step": 99700 |
| }, |
| { |
| "epoch": 29.073460820895523, |
| "grad_norm": 0.4349089562892914, |
| "learning_rate": 0.0002512720909886264, |
| "loss": 3.1254, |
| "step": 99750 |
| }, |
| { |
| "epoch": 29.088036380597014, |
| "grad_norm": 0.4257027208805084, |
| "learning_rate": 0.0002510971128608924, |
| "loss": 3.1008, |
| "step": 99800 |
| }, |
| { |
| "epoch": 29.10261194029851, |
| "grad_norm": 0.43259721994400024, |
| "learning_rate": 0.0002509221347331583, |
| "loss": 3.1116, |
| "step": 99850 |
| }, |
| { |
| "epoch": 29.1171875, |
| "grad_norm": 0.4446471631526947, |
| "learning_rate": 0.0002507471566054243, |
| "loss": 3.1186, |
| "step": 99900 |
| }, |
| { |
| "epoch": 29.13176305970149, |
| "grad_norm": 0.4739801585674286, |
| "learning_rate": 0.00025057217847769027, |
| "loss": 3.1133, |
| "step": 99950 |
| }, |
| { |
| "epoch": 29.146338619402986, |
| "grad_norm": 0.4532054662704468, |
| "learning_rate": 0.0002503972003499562, |
| "loss": 3.1259, |
| "step": 100000 |
| }, |
| { |
| "epoch": 29.146338619402986, |
| "eval_accuracy": 0.37309544269176687, |
| "eval_loss": 3.5599021911621094, |
| "eval_runtime": 81.5499, |
| "eval_samples_per_second": 203.887, |
| "eval_steps_per_second": 12.753, |
| "step": 100000 |
| }, |
| { |
| "epoch": 29.160914179104477, |
| "grad_norm": 0.44886332750320435, |
| "learning_rate": 0.0002502222222222222, |
| "loss": 3.1373, |
| "step": 100050 |
| }, |
| { |
| "epoch": 29.17548973880597, |
| "grad_norm": 0.4415283203125, |
| "learning_rate": 0.00025004724409448815, |
| "loss": 3.1204, |
| "step": 100100 |
| }, |
| { |
| "epoch": 29.190065298507463, |
| "grad_norm": 0.41481301188468933, |
| "learning_rate": 0.00024987226596675415, |
| "loss": 3.1203, |
| "step": 100150 |
| }, |
| { |
| "epoch": 29.204640858208954, |
| "grad_norm": 0.469487726688385, |
| "learning_rate": 0.0002496972878390201, |
| "loss": 3.1224, |
| "step": 100200 |
| }, |
| { |
| "epoch": 29.21921641791045, |
| "grad_norm": 0.46850454807281494, |
| "learning_rate": 0.0002495223097112861, |
| "loss": 3.1309, |
| "step": 100250 |
| }, |
| { |
| "epoch": 29.23379197761194, |
| "grad_norm": 0.4412026107311249, |
| "learning_rate": 0.00024934733158355203, |
| "loss": 3.1222, |
| "step": 100300 |
| }, |
| { |
| "epoch": 29.24836753731343, |
| "grad_norm": 0.43051907420158386, |
| "learning_rate": 0.00024917235345581803, |
| "loss": 3.1292, |
| "step": 100350 |
| }, |
| { |
| "epoch": 29.262943097014926, |
| "grad_norm": 0.4601272642612457, |
| "learning_rate": 0.000248997375328084, |
| "loss": 3.1311, |
| "step": 100400 |
| }, |
| { |
| "epoch": 29.277518656716417, |
| "grad_norm": 0.4913773834705353, |
| "learning_rate": 0.0002488223972003499, |
| "loss": 3.1256, |
| "step": 100450 |
| }, |
| { |
| "epoch": 29.29209421641791, |
| "grad_norm": 0.4545249938964844, |
| "learning_rate": 0.0002486474190726159, |
| "loss": 3.1333, |
| "step": 100500 |
| }, |
| { |
| "epoch": 29.306669776119403, |
| "grad_norm": 0.439251571893692, |
| "learning_rate": 0.00024847244094488186, |
| "loss": 3.1338, |
| "step": 100550 |
| }, |
| { |
| "epoch": 29.321245335820894, |
| "grad_norm": 0.4394617974758148, |
| "learning_rate": 0.0002482974628171478, |
| "loss": 3.1343, |
| "step": 100600 |
| }, |
| { |
| "epoch": 29.33582089552239, |
| "grad_norm": 0.4542093873023987, |
| "learning_rate": 0.0002481224846894138, |
| "loss": 3.151, |
| "step": 100650 |
| }, |
| { |
| "epoch": 29.35039645522388, |
| "grad_norm": 0.45754221081733704, |
| "learning_rate": 0.00024794750656167975, |
| "loss": 3.1493, |
| "step": 100700 |
| }, |
| { |
| "epoch": 29.364972014925375, |
| "grad_norm": 0.4488230049610138, |
| "learning_rate": 0.00024777252843394574, |
| "loss": 3.1521, |
| "step": 100750 |
| }, |
| { |
| "epoch": 29.379547574626866, |
| "grad_norm": 0.45819759368896484, |
| "learning_rate": 0.0002475975503062117, |
| "loss": 3.1485, |
| "step": 100800 |
| }, |
| { |
| "epoch": 29.394123134328357, |
| "grad_norm": 0.4417685270309448, |
| "learning_rate": 0.0002474225721784777, |
| "loss": 3.147, |
| "step": 100850 |
| }, |
| { |
| "epoch": 29.40869869402985, |
| "grad_norm": 0.4662354588508606, |
| "learning_rate": 0.00024724759405074363, |
| "loss": 3.1439, |
| "step": 100900 |
| }, |
| { |
| "epoch": 29.423274253731343, |
| "grad_norm": 0.46339571475982666, |
| "learning_rate": 0.00024707261592300957, |
| "loss": 3.1516, |
| "step": 100950 |
| }, |
| { |
| "epoch": 29.437849813432837, |
| "grad_norm": 0.45555153489112854, |
| "learning_rate": 0.00024689763779527557, |
| "loss": 3.1412, |
| "step": 101000 |
| }, |
| { |
| "epoch": 29.437849813432837, |
| "eval_accuracy": 0.3736002449038104, |
| "eval_loss": 3.554413080215454, |
| "eval_runtime": 81.1357, |
| "eval_samples_per_second": 204.928, |
| "eval_steps_per_second": 12.818, |
| "step": 101000 |
| }, |
| { |
| "epoch": 29.45242537313433, |
| "grad_norm": 0.4727424681186676, |
| "learning_rate": 0.0002467226596675415, |
| "loss": 3.1543, |
| "step": 101050 |
| }, |
| { |
| "epoch": 29.46700093283582, |
| "grad_norm": 0.43927642703056335, |
| "learning_rate": 0.0002465476815398075, |
| "loss": 3.1567, |
| "step": 101100 |
| }, |
| { |
| "epoch": 29.481576492537314, |
| "grad_norm": 0.4496012330055237, |
| "learning_rate": 0.00024637270341207345, |
| "loss": 3.1537, |
| "step": 101150 |
| }, |
| { |
| "epoch": 29.496152052238806, |
| "grad_norm": 0.49772119522094727, |
| "learning_rate": 0.00024619772528433945, |
| "loss": 3.1574, |
| "step": 101200 |
| }, |
| { |
| "epoch": 29.510727611940297, |
| "grad_norm": 0.44245609641075134, |
| "learning_rate": 0.0002460227471566054, |
| "loss": 3.1533, |
| "step": 101250 |
| }, |
| { |
| "epoch": 29.52530317164179, |
| "grad_norm": 0.4604763090610504, |
| "learning_rate": 0.0002458477690288714, |
| "loss": 3.1584, |
| "step": 101300 |
| }, |
| { |
| "epoch": 29.539878731343283, |
| "grad_norm": 0.4395926296710968, |
| "learning_rate": 0.00024567279090113734, |
| "loss": 3.1496, |
| "step": 101350 |
| }, |
| { |
| "epoch": 29.554454291044777, |
| "grad_norm": 0.4405112862586975, |
| "learning_rate": 0.00024549781277340333, |
| "loss": 3.1496, |
| "step": 101400 |
| }, |
| { |
| "epoch": 29.56902985074627, |
| "grad_norm": 0.4369037449359894, |
| "learning_rate": 0.0002453228346456693, |
| "loss": 3.1631, |
| "step": 101450 |
| }, |
| { |
| "epoch": 29.58360541044776, |
| "grad_norm": 0.4483332931995392, |
| "learning_rate": 0.0002451478565179353, |
| "loss": 3.1673, |
| "step": 101500 |
| }, |
| { |
| "epoch": 29.598180970149254, |
| "grad_norm": 0.4601835310459137, |
| "learning_rate": 0.0002449728783902012, |
| "loss": 3.1583, |
| "step": 101550 |
| }, |
| { |
| "epoch": 29.612756529850746, |
| "grad_norm": 0.4867148697376251, |
| "learning_rate": 0.00024479790026246716, |
| "loss": 3.1646, |
| "step": 101600 |
| }, |
| { |
| "epoch": 29.62733208955224, |
| "grad_norm": 0.42792022228240967, |
| "learning_rate": 0.00024462292213473316, |
| "loss": 3.1623, |
| "step": 101650 |
| }, |
| { |
| "epoch": 29.64190764925373, |
| "grad_norm": 0.4424441158771515, |
| "learning_rate": 0.0002444479440069991, |
| "loss": 3.1556, |
| "step": 101700 |
| }, |
| { |
| "epoch": 29.656483208955223, |
| "grad_norm": 0.46660882234573364, |
| "learning_rate": 0.00024427296587926505, |
| "loss": 3.1703, |
| "step": 101750 |
| }, |
| { |
| "epoch": 29.671058768656717, |
| "grad_norm": 0.4472818374633789, |
| "learning_rate": 0.00024409798775153102, |
| "loss": 3.1639, |
| "step": 101800 |
| }, |
| { |
| "epoch": 29.68563432835821, |
| "grad_norm": 0.4481259286403656, |
| "learning_rate": 0.00024392300962379701, |
| "loss": 3.1763, |
| "step": 101850 |
| }, |
| { |
| "epoch": 29.700209888059703, |
| "grad_norm": 0.4559357762336731, |
| "learning_rate": 0.00024374803149606296, |
| "loss": 3.1714, |
| "step": 101900 |
| }, |
| { |
| "epoch": 29.714785447761194, |
| "grad_norm": 0.4513476490974426, |
| "learning_rate": 0.00024357305336832893, |
| "loss": 3.1579, |
| "step": 101950 |
| }, |
| { |
| "epoch": 29.729361007462686, |
| "grad_norm": 0.43404948711395264, |
| "learning_rate": 0.0002433980752405949, |
| "loss": 3.1537, |
| "step": 102000 |
| }, |
| { |
| "epoch": 29.729361007462686, |
| "eval_accuracy": 0.37413376517128377, |
| "eval_loss": 3.5457191467285156, |
| "eval_runtime": 113.0692, |
| "eval_samples_per_second": 147.052, |
| "eval_steps_per_second": 9.198, |
| "step": 102000 |
| }, |
| { |
| "epoch": 29.74393656716418, |
| "grad_norm": 0.44890788197517395, |
| "learning_rate": 0.00024322309711286087, |
| "loss": 3.1716, |
| "step": 102050 |
| }, |
| { |
| "epoch": 29.75851212686567, |
| "grad_norm": 0.44202056527137756, |
| "learning_rate": 0.00024304811898512684, |
| "loss": 3.1689, |
| "step": 102100 |
| }, |
| { |
| "epoch": 29.773087686567163, |
| "grad_norm": 0.46225395798683167, |
| "learning_rate": 0.0002428731408573928, |
| "loss": 3.1651, |
| "step": 102150 |
| }, |
| { |
| "epoch": 29.787663246268657, |
| "grad_norm": 0.42368146777153015, |
| "learning_rate": 0.00024269816272965875, |
| "loss": 3.1662, |
| "step": 102200 |
| }, |
| { |
| "epoch": 29.80223880597015, |
| "grad_norm": 0.44670891761779785, |
| "learning_rate": 0.00024252318460192475, |
| "loss": 3.1722, |
| "step": 102250 |
| }, |
| { |
| "epoch": 29.816814365671643, |
| "grad_norm": 0.44104140996932983, |
| "learning_rate": 0.0002423482064741907, |
| "loss": 3.1713, |
| "step": 102300 |
| }, |
| { |
| "epoch": 29.831389925373134, |
| "grad_norm": 0.43549516797065735, |
| "learning_rate": 0.0002421732283464567, |
| "loss": 3.182, |
| "step": 102350 |
| }, |
| { |
| "epoch": 29.845965485074625, |
| "grad_norm": 0.4488188624382019, |
| "learning_rate": 0.00024199825021872264, |
| "loss": 3.1851, |
| "step": 102400 |
| }, |
| { |
| "epoch": 29.86054104477612, |
| "grad_norm": 0.46105653047561646, |
| "learning_rate": 0.0002418232720909886, |
| "loss": 3.1763, |
| "step": 102450 |
| }, |
| { |
| "epoch": 29.87511660447761, |
| "grad_norm": 0.44759291410446167, |
| "learning_rate": 0.00024164829396325458, |
| "loss": 3.1744, |
| "step": 102500 |
| }, |
| { |
| "epoch": 29.889692164179106, |
| "grad_norm": 0.45466265082359314, |
| "learning_rate": 0.00024147331583552055, |
| "loss": 3.1666, |
| "step": 102550 |
| }, |
| { |
| "epoch": 29.904267723880597, |
| "grad_norm": 0.48013535141944885, |
| "learning_rate": 0.0002412983377077865, |
| "loss": 3.1687, |
| "step": 102600 |
| }, |
| { |
| "epoch": 29.91884328358209, |
| "grad_norm": 0.4551558196544647, |
| "learning_rate": 0.0002411233595800525, |
| "loss": 3.1802, |
| "step": 102650 |
| }, |
| { |
| "epoch": 29.933418843283583, |
| "grad_norm": 0.46371686458587646, |
| "learning_rate": 0.00024094838145231843, |
| "loss": 3.1774, |
| "step": 102700 |
| }, |
| { |
| "epoch": 29.947994402985074, |
| "grad_norm": 0.4950931668281555, |
| "learning_rate": 0.0002407734033245844, |
| "loss": 3.1714, |
| "step": 102750 |
| }, |
| { |
| "epoch": 29.962569962686565, |
| "grad_norm": 0.45876121520996094, |
| "learning_rate": 0.00024059842519685037, |
| "loss": 3.1855, |
| "step": 102800 |
| }, |
| { |
| "epoch": 29.97714552238806, |
| "grad_norm": 0.4418525695800781, |
| "learning_rate": 0.00024042344706911632, |
| "loss": 3.1874, |
| "step": 102850 |
| }, |
| { |
| "epoch": 29.99172108208955, |
| "grad_norm": 0.4665625989437103, |
| "learning_rate": 0.00024024846894138232, |
| "loss": 3.1899, |
| "step": 102900 |
| }, |
| { |
| "epoch": 30.006121735074625, |
| "grad_norm": 0.4641040861606598, |
| "learning_rate": 0.00024007349081364826, |
| "loss": 3.1525, |
| "step": 102950 |
| }, |
| { |
| "epoch": 30.02069729477612, |
| "grad_norm": 0.4659494459629059, |
| "learning_rate": 0.00023989851268591426, |
| "loss": 3.0842, |
| "step": 103000 |
| }, |
| { |
| "epoch": 30.02069729477612, |
| "eval_accuracy": 0.3736993457344331, |
| "eval_loss": 3.555461883544922, |
| "eval_runtime": 80.9964, |
| "eval_samples_per_second": 205.281, |
| "eval_steps_per_second": 12.84, |
| "step": 103000 |
| }, |
| { |
| "epoch": 30.03527285447761, |
| "grad_norm": 0.46675172448158264, |
| "learning_rate": 0.0002397235345581802, |
| "loss": 3.0937, |
| "step": 103050 |
| }, |
| { |
| "epoch": 30.049848414179106, |
| "grad_norm": 0.4507794678211212, |
| "learning_rate": 0.00023954855643044617, |
| "loss": 3.0836, |
| "step": 103100 |
| }, |
| { |
| "epoch": 30.064423973880597, |
| "grad_norm": 0.45438939332962036, |
| "learning_rate": 0.00023937357830271214, |
| "loss": 3.0987, |
| "step": 103150 |
| }, |
| { |
| "epoch": 30.07899953358209, |
| "grad_norm": 0.44462865591049194, |
| "learning_rate": 0.0002391986001749781, |
| "loss": 3.0901, |
| "step": 103200 |
| }, |
| { |
| "epoch": 30.093575093283583, |
| "grad_norm": 0.46132996678352356, |
| "learning_rate": 0.00023902362204724406, |
| "loss": 3.1067, |
| "step": 103250 |
| }, |
| { |
| "epoch": 30.108150652985074, |
| "grad_norm": 0.45434698462486267, |
| "learning_rate": 0.00023884864391951005, |
| "loss": 3.1135, |
| "step": 103300 |
| }, |
| { |
| "epoch": 30.12272621268657, |
| "grad_norm": 0.46210694313049316, |
| "learning_rate": 0.000238673665791776, |
| "loss": 3.1125, |
| "step": 103350 |
| }, |
| { |
| "epoch": 30.13730177238806, |
| "grad_norm": 0.4344474673271179, |
| "learning_rate": 0.000238498687664042, |
| "loss": 3.105, |
| "step": 103400 |
| }, |
| { |
| "epoch": 30.15187733208955, |
| "grad_norm": 0.4553607404232025, |
| "learning_rate": 0.00023832370953630794, |
| "loss": 3.1198, |
| "step": 103450 |
| }, |
| { |
| "epoch": 30.166452891791046, |
| "grad_norm": 0.4717164933681488, |
| "learning_rate": 0.00023814873140857394, |
| "loss": 3.1242, |
| "step": 103500 |
| }, |
| { |
| "epoch": 30.181028451492537, |
| "grad_norm": 0.45845305919647217, |
| "learning_rate": 0.00023797375328083988, |
| "loss": 3.117, |
| "step": 103550 |
| }, |
| { |
| "epoch": 30.19560401119403, |
| "grad_norm": 0.45949000120162964, |
| "learning_rate": 0.00023779877515310582, |
| "loss": 3.1081, |
| "step": 103600 |
| }, |
| { |
| "epoch": 30.210179570895523, |
| "grad_norm": 0.4732789695262909, |
| "learning_rate": 0.00023762379702537182, |
| "loss": 3.1121, |
| "step": 103650 |
| }, |
| { |
| "epoch": 30.224755130597014, |
| "grad_norm": 0.4642253518104553, |
| "learning_rate": 0.00023744881889763776, |
| "loss": 3.1267, |
| "step": 103700 |
| }, |
| { |
| "epoch": 30.23933069029851, |
| "grad_norm": 0.47492390871047974, |
| "learning_rate": 0.00023727384076990373, |
| "loss": 3.1203, |
| "step": 103750 |
| }, |
| { |
| "epoch": 30.25390625, |
| "grad_norm": 0.47091248631477356, |
| "learning_rate": 0.0002370988626421697, |
| "loss": 3.1275, |
| "step": 103800 |
| }, |
| { |
| "epoch": 30.26848180970149, |
| "grad_norm": 0.4485725164413452, |
| "learning_rate": 0.00023692388451443568, |
| "loss": 3.1242, |
| "step": 103850 |
| }, |
| { |
| "epoch": 30.283057369402986, |
| "grad_norm": 0.43855518102645874, |
| "learning_rate": 0.00023674890638670162, |
| "loss": 3.1274, |
| "step": 103900 |
| }, |
| { |
| "epoch": 30.297632929104477, |
| "grad_norm": 0.46146199107170105, |
| "learning_rate": 0.00023657392825896762, |
| "loss": 3.1225, |
| "step": 103950 |
| }, |
| { |
| "epoch": 30.31220848880597, |
| "grad_norm": 0.45113933086395264, |
| "learning_rate": 0.00023639895013123356, |
| "loss": 3.1355, |
| "step": 104000 |
| }, |
| { |
| "epoch": 30.31220848880597, |
| "eval_accuracy": 0.373506322738921, |
| "eval_loss": 3.556199550628662, |
| "eval_runtime": 80.9658, |
| "eval_samples_per_second": 205.358, |
| "eval_steps_per_second": 12.845, |
| "step": 104000 |
| }, |
| { |
| "epoch": 30.326784048507463, |
| "grad_norm": 0.46137019991874695, |
| "learning_rate": 0.00023622397200349956, |
| "loss": 3.1415, |
| "step": 104050 |
| }, |
| { |
| "epoch": 30.341359608208954, |
| "grad_norm": 0.4407903254032135, |
| "learning_rate": 0.0002360489938757655, |
| "loss": 3.1313, |
| "step": 104100 |
| }, |
| { |
| "epoch": 30.35593516791045, |
| "grad_norm": 0.47344106435775757, |
| "learning_rate": 0.0002358740157480315, |
| "loss": 3.1358, |
| "step": 104150 |
| }, |
| { |
| "epoch": 30.37051072761194, |
| "grad_norm": 0.49034038186073303, |
| "learning_rate": 0.00023569903762029744, |
| "loss": 3.1396, |
| "step": 104200 |
| }, |
| { |
| "epoch": 30.385086287313435, |
| "grad_norm": 0.4473479092121124, |
| "learning_rate": 0.0002355240594925634, |
| "loss": 3.1308, |
| "step": 104250 |
| }, |
| { |
| "epoch": 30.399661847014926, |
| "grad_norm": 0.4641791582107544, |
| "learning_rate": 0.00023534908136482938, |
| "loss": 3.1368, |
| "step": 104300 |
| }, |
| { |
| "epoch": 30.414237406716417, |
| "grad_norm": 0.4826582670211792, |
| "learning_rate": 0.00023517410323709535, |
| "loss": 3.135, |
| "step": 104350 |
| }, |
| { |
| "epoch": 30.42881296641791, |
| "grad_norm": 0.4261675775051117, |
| "learning_rate": 0.0002349991251093613, |
| "loss": 3.1401, |
| "step": 104400 |
| }, |
| { |
| "epoch": 30.443388526119403, |
| "grad_norm": 0.4646788239479065, |
| "learning_rate": 0.0002348241469816273, |
| "loss": 3.1331, |
| "step": 104450 |
| }, |
| { |
| "epoch": 30.457964085820894, |
| "grad_norm": 0.4778652787208557, |
| "learning_rate": 0.00023464916885389324, |
| "loss": 3.14, |
| "step": 104500 |
| }, |
| { |
| "epoch": 30.47253964552239, |
| "grad_norm": 0.4688096046447754, |
| "learning_rate": 0.00023447419072615918, |
| "loss": 3.1459, |
| "step": 104550 |
| }, |
| { |
| "epoch": 30.48711520522388, |
| "grad_norm": 0.4571461081504822, |
| "learning_rate": 0.00023429921259842518, |
| "loss": 3.1536, |
| "step": 104600 |
| }, |
| { |
| "epoch": 30.501690764925375, |
| "grad_norm": 0.46317562460899353, |
| "learning_rate": 0.00023412423447069112, |
| "loss": 3.1514, |
| "step": 104650 |
| }, |
| { |
| "epoch": 30.516266324626866, |
| "grad_norm": 0.440745085477829, |
| "learning_rate": 0.00023394925634295712, |
| "loss": 3.151, |
| "step": 104700 |
| }, |
| { |
| "epoch": 30.530841884328357, |
| "grad_norm": 0.4459381103515625, |
| "learning_rate": 0.00023377427821522306, |
| "loss": 3.1491, |
| "step": 104750 |
| }, |
| { |
| "epoch": 30.54541744402985, |
| "grad_norm": 0.43897759914398193, |
| "learning_rate": 0.00023359930008748906, |
| "loss": 3.1535, |
| "step": 104800 |
| }, |
| { |
| "epoch": 30.559993003731343, |
| "grad_norm": 0.45239999890327454, |
| "learning_rate": 0.000233424321959755, |
| "loss": 3.1449, |
| "step": 104850 |
| }, |
| { |
| "epoch": 30.574568563432837, |
| "grad_norm": 0.46996092796325684, |
| "learning_rate": 0.00023324934383202098, |
| "loss": 3.1482, |
| "step": 104900 |
| }, |
| { |
| "epoch": 30.58914412313433, |
| "grad_norm": 0.4657279849052429, |
| "learning_rate": 0.00023307436570428695, |
| "loss": 3.1442, |
| "step": 104950 |
| }, |
| { |
| "epoch": 30.60371968283582, |
| "grad_norm": 0.4449421763420105, |
| "learning_rate": 0.00023289938757655292, |
| "loss": 3.1478, |
| "step": 105000 |
| }, |
| { |
| "epoch": 30.60371968283582, |
| "eval_accuracy": 0.37399841368052833, |
| "eval_loss": 3.5493507385253906, |
| "eval_runtime": 81.065, |
| "eval_samples_per_second": 205.107, |
| "eval_steps_per_second": 12.829, |
| "step": 105000 |
| }, |
| { |
| "epoch": 30.618295242537314, |
| "grad_norm": 0.4685562252998352, |
| "learning_rate": 0.00023272440944881886, |
| "loss": 3.1475, |
| "step": 105050 |
| }, |
| { |
| "epoch": 30.632870802238806, |
| "grad_norm": 0.46623244881629944, |
| "learning_rate": 0.00023254943132108486, |
| "loss": 3.1542, |
| "step": 105100 |
| }, |
| { |
| "epoch": 30.647446361940297, |
| "grad_norm": 0.4504511058330536, |
| "learning_rate": 0.0002323744531933508, |
| "loss": 3.1613, |
| "step": 105150 |
| }, |
| { |
| "epoch": 30.66202192164179, |
| "grad_norm": 0.4688693881034851, |
| "learning_rate": 0.0002321994750656168, |
| "loss": 3.166, |
| "step": 105200 |
| }, |
| { |
| "epoch": 30.676597481343283, |
| "grad_norm": 0.4453798830509186, |
| "learning_rate": 0.00023202449693788274, |
| "loss": 3.162, |
| "step": 105250 |
| }, |
| { |
| "epoch": 30.691173041044777, |
| "grad_norm": 0.4550257921218872, |
| "learning_rate": 0.00023184951881014871, |
| "loss": 3.1528, |
| "step": 105300 |
| }, |
| { |
| "epoch": 30.70574860074627, |
| "grad_norm": 0.4807048439979553, |
| "learning_rate": 0.00023167454068241468, |
| "loss": 3.1511, |
| "step": 105350 |
| }, |
| { |
| "epoch": 30.72032416044776, |
| "grad_norm": 0.44397810101509094, |
| "learning_rate": 0.00023149956255468063, |
| "loss": 3.1635, |
| "step": 105400 |
| }, |
| { |
| "epoch": 30.734899720149254, |
| "grad_norm": 0.4527873992919922, |
| "learning_rate": 0.00023132458442694663, |
| "loss": 3.1681, |
| "step": 105450 |
| }, |
| { |
| "epoch": 30.749475279850746, |
| "grad_norm": 0.44800546765327454, |
| "learning_rate": 0.00023114960629921257, |
| "loss": 3.1506, |
| "step": 105500 |
| }, |
| { |
| "epoch": 30.76405083955224, |
| "grad_norm": 0.4325464367866516, |
| "learning_rate": 0.00023097462817147854, |
| "loss": 3.1631, |
| "step": 105550 |
| }, |
| { |
| "epoch": 30.77862639925373, |
| "grad_norm": 0.4772892892360687, |
| "learning_rate": 0.0002307996500437445, |
| "loss": 3.1606, |
| "step": 105600 |
| }, |
| { |
| "epoch": 30.793201958955223, |
| "grad_norm": 0.46441707015037537, |
| "learning_rate": 0.00023062467191601048, |
| "loss": 3.1652, |
| "step": 105650 |
| }, |
| { |
| "epoch": 30.807777518656717, |
| "grad_norm": 0.5042896270751953, |
| "learning_rate": 0.00023044969378827642, |
| "loss": 3.1667, |
| "step": 105700 |
| }, |
| { |
| "epoch": 30.82235307835821, |
| "grad_norm": 0.47036123275756836, |
| "learning_rate": 0.00023027471566054242, |
| "loss": 3.164, |
| "step": 105750 |
| }, |
| { |
| "epoch": 30.836928638059703, |
| "grad_norm": 0.45908039808273315, |
| "learning_rate": 0.00023009973753280837, |
| "loss": 3.174, |
| "step": 105800 |
| }, |
| { |
| "epoch": 30.851504197761194, |
| "grad_norm": 0.46668148040771484, |
| "learning_rate": 0.00022992475940507436, |
| "loss": 3.1733, |
| "step": 105850 |
| }, |
| { |
| "epoch": 30.866079757462686, |
| "grad_norm": 0.4751679301261902, |
| "learning_rate": 0.0002297497812773403, |
| "loss": 3.1681, |
| "step": 105900 |
| }, |
| { |
| "epoch": 30.88065531716418, |
| "grad_norm": 0.46648067235946655, |
| "learning_rate": 0.00022957480314960628, |
| "loss": 3.167, |
| "step": 105950 |
| }, |
| { |
| "epoch": 30.89523087686567, |
| "grad_norm": 0.4647907018661499, |
| "learning_rate": 0.00022939982502187225, |
| "loss": 3.166, |
| "step": 106000 |
| }, |
| { |
| "epoch": 30.89523087686567, |
| "eval_accuracy": 0.37426323181461507, |
| "eval_loss": 3.54299259185791, |
| "eval_runtime": 80.981, |
| "eval_samples_per_second": 205.32, |
| "eval_steps_per_second": 12.843, |
| "step": 106000 |
| }, |
| { |
| "epoch": 30.909806436567163, |
| "grad_norm": 0.5013905763626099, |
| "learning_rate": 0.00022922484689413822, |
| "loss": 3.1729, |
| "step": 106050 |
| }, |
| { |
| "epoch": 30.924381996268657, |
| "grad_norm": 0.43408867716789246, |
| "learning_rate": 0.0002290498687664042, |
| "loss": 3.1664, |
| "step": 106100 |
| }, |
| { |
| "epoch": 30.93895755597015, |
| "grad_norm": 0.476228266954422, |
| "learning_rate": 0.00022887489063867016, |
| "loss": 3.1778, |
| "step": 106150 |
| }, |
| { |
| "epoch": 30.953533115671643, |
| "grad_norm": 0.4531143009662628, |
| "learning_rate": 0.0002286999125109361, |
| "loss": 3.1769, |
| "step": 106200 |
| }, |
| { |
| "epoch": 30.968108675373134, |
| "grad_norm": 0.4749349057674408, |
| "learning_rate": 0.0002285249343832021, |
| "loss": 3.1645, |
| "step": 106250 |
| }, |
| { |
| "epoch": 30.982684235074625, |
| "grad_norm": 0.454899400472641, |
| "learning_rate": 0.00022834995625546804, |
| "loss": 3.1611, |
| "step": 106300 |
| }, |
| { |
| "epoch": 30.99725979477612, |
| "grad_norm": 0.4563228487968445, |
| "learning_rate": 0.000228174978127734, |
| "loss": 3.1717, |
| "step": 106350 |
| }, |
| { |
| "epoch": 31.011660447761194, |
| "grad_norm": 0.4625491499900818, |
| "learning_rate": 0.00022799999999999999, |
| "loss": 3.102, |
| "step": 106400 |
| }, |
| { |
| "epoch": 31.026236007462686, |
| "grad_norm": 0.4476287364959717, |
| "learning_rate": 0.00022782502187226593, |
| "loss": 3.0799, |
| "step": 106450 |
| }, |
| { |
| "epoch": 31.04081156716418, |
| "grad_norm": 0.4789726436138153, |
| "learning_rate": 0.00022765004374453193, |
| "loss": 3.0748, |
| "step": 106500 |
| }, |
| { |
| "epoch": 31.05538712686567, |
| "grad_norm": 0.45199573040008545, |
| "learning_rate": 0.00022747506561679787, |
| "loss": 3.0959, |
| "step": 106550 |
| }, |
| { |
| "epoch": 31.069962686567163, |
| "grad_norm": 0.4647940993309021, |
| "learning_rate": 0.00022730008748906384, |
| "loss": 3.1029, |
| "step": 106600 |
| }, |
| { |
| "epoch": 31.084538246268657, |
| "grad_norm": 0.48996156454086304, |
| "learning_rate": 0.0002271251093613298, |
| "loss": 3.0849, |
| "step": 106650 |
| }, |
| { |
| "epoch": 31.09911380597015, |
| "grad_norm": 0.45512181520462036, |
| "learning_rate": 0.00022695013123359578, |
| "loss": 3.0943, |
| "step": 106700 |
| }, |
| { |
| "epoch": 31.113689365671643, |
| "grad_norm": 0.4582662582397461, |
| "learning_rate": 0.00022677515310586175, |
| "loss": 3.0874, |
| "step": 106750 |
| }, |
| { |
| "epoch": 31.128264925373134, |
| "grad_norm": 0.45606929063796997, |
| "learning_rate": 0.00022660017497812772, |
| "loss": 3.1053, |
| "step": 106800 |
| }, |
| { |
| "epoch": 31.142840485074625, |
| "grad_norm": 0.4730152189731598, |
| "learning_rate": 0.00022642519685039367, |
| "loss": 3.0969, |
| "step": 106850 |
| }, |
| { |
| "epoch": 31.15741604477612, |
| "grad_norm": 0.5055202841758728, |
| "learning_rate": 0.00022625021872265966, |
| "loss": 3.1064, |
| "step": 106900 |
| }, |
| { |
| "epoch": 31.17199160447761, |
| "grad_norm": 0.4526134729385376, |
| "learning_rate": 0.0002260752405949256, |
| "loss": 3.1017, |
| "step": 106950 |
| }, |
| { |
| "epoch": 31.186567164179106, |
| "grad_norm": 0.47562670707702637, |
| "learning_rate": 0.0002259002624671916, |
| "loss": 3.113, |
| "step": 107000 |
| }, |
| { |
| "epoch": 31.186567164179106, |
| "eval_accuracy": 0.3738705947944758, |
| "eval_loss": 3.557934284210205, |
| "eval_runtime": 80.9833, |
| "eval_samples_per_second": 205.314, |
| "eval_steps_per_second": 12.842, |
| "step": 107000 |
| }, |
| { |
| "epoch": 31.201142723880597, |
| "grad_norm": 0.4674347937107086, |
| "learning_rate": 0.00022572528433945755, |
| "loss": 3.1066, |
| "step": 107050 |
| }, |
| { |
| "epoch": 31.21571828358209, |
| "grad_norm": 0.44288942217826843, |
| "learning_rate": 0.00022555030621172352, |
| "loss": 3.109, |
| "step": 107100 |
| }, |
| { |
| "epoch": 31.230293843283583, |
| "grad_norm": 0.48940151929855347, |
| "learning_rate": 0.0002253753280839895, |
| "loss": 3.1168, |
| "step": 107150 |
| }, |
| { |
| "epoch": 31.244869402985074, |
| "grad_norm": 0.45776811242103577, |
| "learning_rate": 0.00022520034995625543, |
| "loss": 3.1157, |
| "step": 107200 |
| }, |
| { |
| "epoch": 31.259444962686565, |
| "grad_norm": 0.4868299067020416, |
| "learning_rate": 0.0002250253718285214, |
| "loss": 3.113, |
| "step": 107250 |
| }, |
| { |
| "epoch": 31.27402052238806, |
| "grad_norm": 0.4617083966732025, |
| "learning_rate": 0.00022485039370078737, |
| "loss": 3.1225, |
| "step": 107300 |
| }, |
| { |
| "epoch": 31.28859608208955, |
| "grad_norm": 0.4782308340072632, |
| "learning_rate": 0.00022467541557305335, |
| "loss": 3.1277, |
| "step": 107350 |
| }, |
| { |
| "epoch": 31.303171641791046, |
| "grad_norm": 0.4509190320968628, |
| "learning_rate": 0.00022450043744531932, |
| "loss": 3.126, |
| "step": 107400 |
| }, |
| { |
| "epoch": 31.317747201492537, |
| "grad_norm": 0.45488241314888, |
| "learning_rate": 0.0002243254593175853, |
| "loss": 3.1286, |
| "step": 107450 |
| }, |
| { |
| "epoch": 31.33232276119403, |
| "grad_norm": 0.47506406903266907, |
| "learning_rate": 0.00022415048118985123, |
| "loss": 3.1146, |
| "step": 107500 |
| }, |
| { |
| "epoch": 31.346898320895523, |
| "grad_norm": 0.4671951234340668, |
| "learning_rate": 0.00022397550306211723, |
| "loss": 3.1226, |
| "step": 107550 |
| }, |
| { |
| "epoch": 31.361473880597014, |
| "grad_norm": 0.4664361774921417, |
| "learning_rate": 0.00022380052493438317, |
| "loss": 3.1306, |
| "step": 107600 |
| }, |
| { |
| "epoch": 31.37604944029851, |
| "grad_norm": 0.49000778794288635, |
| "learning_rate": 0.00022362554680664917, |
| "loss": 3.1322, |
| "step": 107650 |
| }, |
| { |
| "epoch": 31.390625, |
| "grad_norm": 0.49472030997276306, |
| "learning_rate": 0.0002234505686789151, |
| "loss": 3.1277, |
| "step": 107700 |
| }, |
| { |
| "epoch": 31.40520055970149, |
| "grad_norm": 0.4811898171901703, |
| "learning_rate": 0.00022327559055118108, |
| "loss": 3.1323, |
| "step": 107750 |
| }, |
| { |
| "epoch": 31.419776119402986, |
| "grad_norm": 0.4730585515499115, |
| "learning_rate": 0.00022310061242344705, |
| "loss": 3.1362, |
| "step": 107800 |
| }, |
| { |
| "epoch": 31.434351679104477, |
| "grad_norm": 0.4924510419368744, |
| "learning_rate": 0.00022292563429571302, |
| "loss": 3.1341, |
| "step": 107850 |
| }, |
| { |
| "epoch": 31.44892723880597, |
| "grad_norm": 0.45863619446754456, |
| "learning_rate": 0.00022275065616797897, |
| "loss": 3.1376, |
| "step": 107900 |
| }, |
| { |
| "epoch": 31.463502798507463, |
| "grad_norm": 0.4936143457889557, |
| "learning_rate": 0.00022257567804024497, |
| "loss": 3.1326, |
| "step": 107950 |
| }, |
| { |
| "epoch": 31.478078358208954, |
| "grad_norm": 0.4574803411960602, |
| "learning_rate": 0.0002224006999125109, |
| "loss": 3.1249, |
| "step": 108000 |
| }, |
| { |
| "epoch": 31.478078358208954, |
| "eval_accuracy": 0.3737095853689511, |
| "eval_loss": 3.5542073249816895, |
| "eval_runtime": 80.9813, |
| "eval_samples_per_second": 205.319, |
| "eval_steps_per_second": 12.842, |
| "step": 108000 |
| }, |
| { |
| "epoch": 31.49265391791045, |
| "grad_norm": 0.4639244079589844, |
| "learning_rate": 0.0002222257217847769, |
| "loss": 3.1312, |
| "step": 108050 |
| }, |
| { |
| "epoch": 31.50722947761194, |
| "grad_norm": 0.48924142122268677, |
| "learning_rate": 0.00022205074365704285, |
| "loss": 3.1482, |
| "step": 108100 |
| }, |
| { |
| "epoch": 31.521805037313435, |
| "grad_norm": 0.4623509347438812, |
| "learning_rate": 0.0002218757655293088, |
| "loss": 3.146, |
| "step": 108150 |
| }, |
| { |
| "epoch": 31.536380597014926, |
| "grad_norm": 0.46016931533813477, |
| "learning_rate": 0.0002217007874015748, |
| "loss": 3.147, |
| "step": 108200 |
| }, |
| { |
| "epoch": 31.550956156716417, |
| "grad_norm": 0.4692668318748474, |
| "learning_rate": 0.00022152580927384073, |
| "loss": 3.1318, |
| "step": 108250 |
| }, |
| { |
| "epoch": 31.56553171641791, |
| "grad_norm": 0.4955618977546692, |
| "learning_rate": 0.00022135083114610673, |
| "loss": 3.1512, |
| "step": 108300 |
| }, |
| { |
| "epoch": 31.580107276119403, |
| "grad_norm": 0.45453202724456787, |
| "learning_rate": 0.00022117585301837268, |
| "loss": 3.1565, |
| "step": 108350 |
| }, |
| { |
| "epoch": 31.594682835820894, |
| "grad_norm": 0.4675719141960144, |
| "learning_rate": 0.00022100087489063865, |
| "loss": 3.1458, |
| "step": 108400 |
| }, |
| { |
| "epoch": 31.60925839552239, |
| "grad_norm": 0.4647286832332611, |
| "learning_rate": 0.00022082589676290462, |
| "loss": 3.1483, |
| "step": 108450 |
| }, |
| { |
| "epoch": 31.62383395522388, |
| "grad_norm": 0.46763309836387634, |
| "learning_rate": 0.0002206509186351706, |
| "loss": 3.1558, |
| "step": 108500 |
| }, |
| { |
| "epoch": 31.638409514925375, |
| "grad_norm": 0.4608073830604553, |
| "learning_rate": 0.00022047594050743653, |
| "loss": 3.1411, |
| "step": 108550 |
| }, |
| { |
| "epoch": 31.652985074626866, |
| "grad_norm": 0.4496559202671051, |
| "learning_rate": 0.00022030096237970253, |
| "loss": 3.149, |
| "step": 108600 |
| }, |
| { |
| "epoch": 31.667560634328357, |
| "grad_norm": 0.4729684293270111, |
| "learning_rate": 0.00022012598425196847, |
| "loss": 3.1499, |
| "step": 108650 |
| }, |
| { |
| "epoch": 31.68213619402985, |
| "grad_norm": 0.48932141065597534, |
| "learning_rate": 0.00021995100612423447, |
| "loss": 3.1466, |
| "step": 108700 |
| }, |
| { |
| "epoch": 31.696711753731343, |
| "grad_norm": 0.46674734354019165, |
| "learning_rate": 0.0002197760279965004, |
| "loss": 3.1553, |
| "step": 108750 |
| }, |
| { |
| "epoch": 31.711287313432837, |
| "grad_norm": 0.45925435423851013, |
| "learning_rate": 0.0002196010498687664, |
| "loss": 3.1536, |
| "step": 108800 |
| }, |
| { |
| "epoch": 31.72586287313433, |
| "grad_norm": 0.4495401382446289, |
| "learning_rate": 0.00021942607174103235, |
| "loss": 3.15, |
| "step": 108850 |
| }, |
| { |
| "epoch": 31.74043843283582, |
| "grad_norm": 0.4811021387577057, |
| "learning_rate": 0.00021925109361329833, |
| "loss": 3.1417, |
| "step": 108900 |
| }, |
| { |
| "epoch": 31.755013992537314, |
| "grad_norm": 0.469148725271225, |
| "learning_rate": 0.0002190761154855643, |
| "loss": 3.1587, |
| "step": 108950 |
| }, |
| { |
| "epoch": 31.769589552238806, |
| "grad_norm": 0.5063664317131042, |
| "learning_rate": 0.00021890113735783024, |
| "loss": 3.1627, |
| "step": 109000 |
| }, |
| { |
| "epoch": 31.769589552238806, |
| "eval_accuracy": 0.37450910073999605, |
| "eval_loss": 3.544800043106079, |
| "eval_runtime": 81.0093, |
| "eval_samples_per_second": 205.248, |
| "eval_steps_per_second": 12.838, |
| "step": 109000 |
| }, |
| { |
| "epoch": 31.784165111940297, |
| "grad_norm": 0.4680740237236023, |
| "learning_rate": 0.0002187261592300962, |
| "loss": 3.1437, |
| "step": 109050 |
| }, |
| { |
| "epoch": 31.79874067164179, |
| "grad_norm": 0.465648353099823, |
| "learning_rate": 0.00021855118110236218, |
| "loss": 3.1471, |
| "step": 109100 |
| }, |
| { |
| "epoch": 31.813316231343283, |
| "grad_norm": 0.46208086609840393, |
| "learning_rate": 0.00021837620297462815, |
| "loss": 3.1582, |
| "step": 109150 |
| }, |
| { |
| "epoch": 31.827891791044777, |
| "grad_norm": 0.4603714942932129, |
| "learning_rate": 0.0002182012248468941, |
| "loss": 3.1493, |
| "step": 109200 |
| }, |
| { |
| "epoch": 31.84246735074627, |
| "grad_norm": 0.4754786491394043, |
| "learning_rate": 0.0002180262467191601, |
| "loss": 3.1541, |
| "step": 109250 |
| }, |
| { |
| "epoch": 31.85704291044776, |
| "grad_norm": 0.47437232732772827, |
| "learning_rate": 0.00021785126859142604, |
| "loss": 3.1483, |
| "step": 109300 |
| }, |
| { |
| "epoch": 31.871618470149254, |
| "grad_norm": 0.4557444453239441, |
| "learning_rate": 0.00021767629046369203, |
| "loss": 3.1499, |
| "step": 109350 |
| }, |
| { |
| "epoch": 31.886194029850746, |
| "grad_norm": 0.4662182033061981, |
| "learning_rate": 0.00021750131233595798, |
| "loss": 3.1529, |
| "step": 109400 |
| }, |
| { |
| "epoch": 31.90076958955224, |
| "grad_norm": 0.5064869523048401, |
| "learning_rate": 0.00021732633420822397, |
| "loss": 3.151, |
| "step": 109450 |
| }, |
| { |
| "epoch": 31.91534514925373, |
| "grad_norm": 0.47745054960250854, |
| "learning_rate": 0.00021715135608048992, |
| "loss": 3.1649, |
| "step": 109500 |
| }, |
| { |
| "epoch": 31.929920708955223, |
| "grad_norm": 0.45016202330589294, |
| "learning_rate": 0.0002169763779527559, |
| "loss": 3.1732, |
| "step": 109550 |
| }, |
| { |
| "epoch": 31.944496268656717, |
| "grad_norm": 0.46144726872444153, |
| "learning_rate": 0.00021680139982502186, |
| "loss": 3.1816, |
| "step": 109600 |
| }, |
| { |
| "epoch": 31.95907182835821, |
| "grad_norm": 0.4760236144065857, |
| "learning_rate": 0.00021662642169728783, |
| "loss": 3.1569, |
| "step": 109650 |
| }, |
| { |
| "epoch": 31.973647388059703, |
| "grad_norm": 0.4745504558086395, |
| "learning_rate": 0.00021645144356955377, |
| "loss": 3.16, |
| "step": 109700 |
| }, |
| { |
| "epoch": 31.988222947761194, |
| "grad_norm": 0.43345993757247925, |
| "learning_rate": 0.00021627646544181977, |
| "loss": 3.1708, |
| "step": 109750 |
| }, |
| { |
| "epoch": 32.00262360074627, |
| "grad_norm": 0.48590293526649475, |
| "learning_rate": 0.00021610148731408571, |
| "loss": 3.1383, |
| "step": 109800 |
| }, |
| { |
| "epoch": 32.01719916044776, |
| "grad_norm": 0.4533041715621948, |
| "learning_rate": 0.00021592650918635166, |
| "loss": 3.0848, |
| "step": 109850 |
| }, |
| { |
| "epoch": 32.03177472014925, |
| "grad_norm": 0.4639461636543274, |
| "learning_rate": 0.00021575153105861766, |
| "loss": 3.079, |
| "step": 109900 |
| }, |
| { |
| "epoch": 32.04635027985075, |
| "grad_norm": 0.5041998624801636, |
| "learning_rate": 0.0002155765529308836, |
| "loss": 3.0844, |
| "step": 109950 |
| }, |
| { |
| "epoch": 32.06092583955224, |
| "grad_norm": 0.45482882857322693, |
| "learning_rate": 0.0002154015748031496, |
| "loss": 3.0785, |
| "step": 110000 |
| }, |
| { |
| "epoch": 32.06092583955224, |
| "eval_accuracy": 0.3737993881406436, |
| "eval_loss": 3.5565266609191895, |
| "eval_runtime": 80.9883, |
| "eval_samples_per_second": 205.301, |
| "eval_steps_per_second": 12.841, |
| "step": 110000 |
| }, |
| { |
| "epoch": 32.07550139925373, |
| "grad_norm": 0.4785085618495941, |
| "learning_rate": 0.00021522659667541554, |
| "loss": 3.0837, |
| "step": 110050 |
| }, |
| { |
| "epoch": 32.09007695895522, |
| "grad_norm": 0.46124953031539917, |
| "learning_rate": 0.00021505161854768154, |
| "loss": 3.073, |
| "step": 110100 |
| }, |
| { |
| "epoch": 32.104652518656714, |
| "grad_norm": 0.45453038811683655, |
| "learning_rate": 0.00021487664041994748, |
| "loss": 3.0919, |
| "step": 110150 |
| }, |
| { |
| "epoch": 32.11922807835821, |
| "grad_norm": 0.47057774662971497, |
| "learning_rate": 0.00021470166229221345, |
| "loss": 3.1097, |
| "step": 110200 |
| }, |
| { |
| "epoch": 32.1338036380597, |
| "grad_norm": 0.4835250675678253, |
| "learning_rate": 0.00021452668416447942, |
| "loss": 3.0993, |
| "step": 110250 |
| }, |
| { |
| "epoch": 32.148379197761194, |
| "grad_norm": 0.4598732888698578, |
| "learning_rate": 0.0002143517060367454, |
| "loss": 3.0939, |
| "step": 110300 |
| }, |
| { |
| "epoch": 32.162954757462686, |
| "grad_norm": 0.47464901208877563, |
| "learning_rate": 0.00021417672790901134, |
| "loss": 3.0838, |
| "step": 110350 |
| }, |
| { |
| "epoch": 32.17753031716418, |
| "grad_norm": 0.4422462582588196, |
| "learning_rate": 0.00021400174978127733, |
| "loss": 3.1083, |
| "step": 110400 |
| }, |
| { |
| "epoch": 32.192105876865675, |
| "grad_norm": 0.4594365656375885, |
| "learning_rate": 0.00021382677165354328, |
| "loss": 3.1046, |
| "step": 110450 |
| }, |
| { |
| "epoch": 32.206681436567166, |
| "grad_norm": 0.49147263169288635, |
| "learning_rate": 0.00021365179352580928, |
| "loss": 3.1058, |
| "step": 110500 |
| }, |
| { |
| "epoch": 32.22125699626866, |
| "grad_norm": 0.46081140637397766, |
| "learning_rate": 0.00021347681539807522, |
| "loss": 3.1097, |
| "step": 110550 |
| }, |
| { |
| "epoch": 32.23583255597015, |
| "grad_norm": 0.4752926528453827, |
| "learning_rate": 0.0002133018372703412, |
| "loss": 3.1038, |
| "step": 110600 |
| }, |
| { |
| "epoch": 32.25040811567164, |
| "grad_norm": 0.4841436743736267, |
| "learning_rate": 0.00021312685914260716, |
| "loss": 3.1024, |
| "step": 110650 |
| }, |
| { |
| "epoch": 32.26498367537314, |
| "grad_norm": 0.49166467785835266, |
| "learning_rate": 0.00021295188101487313, |
| "loss": 3.1066, |
| "step": 110700 |
| }, |
| { |
| "epoch": 32.27955923507463, |
| "grad_norm": 0.48016849160194397, |
| "learning_rate": 0.0002127769028871391, |
| "loss": 3.1129, |
| "step": 110750 |
| }, |
| { |
| "epoch": 32.29413479477612, |
| "grad_norm": 0.47091442346572876, |
| "learning_rate": 0.00021260192475940504, |
| "loss": 3.1195, |
| "step": 110800 |
| }, |
| { |
| "epoch": 32.30871035447761, |
| "grad_norm": 0.49015673995018005, |
| "learning_rate": 0.00021242694663167102, |
| "loss": 3.116, |
| "step": 110850 |
| }, |
| { |
| "epoch": 32.3232859141791, |
| "grad_norm": 0.4439619481563568, |
| "learning_rate": 0.00021225196850393699, |
| "loss": 3.1255, |
| "step": 110900 |
| }, |
| { |
| "epoch": 32.337861473880594, |
| "grad_norm": 0.4515083134174347, |
| "learning_rate": 0.00021207699037620296, |
| "loss": 3.1216, |
| "step": 110950 |
| }, |
| { |
| "epoch": 32.35243703358209, |
| "grad_norm": 0.47670596837997437, |
| "learning_rate": 0.0002119020122484689, |
| "loss": 3.1107, |
| "step": 111000 |
| }, |
| { |
| "epoch": 32.35243703358209, |
| "eval_accuracy": 0.37408480324071486, |
| "eval_loss": 3.5553367137908936, |
| "eval_runtime": 81.0462, |
| "eval_samples_per_second": 205.155, |
| "eval_steps_per_second": 12.832, |
| "step": 111000 |
| }, |
| { |
| "epoch": 32.36701259328358, |
| "grad_norm": 0.5058426260948181, |
| "learning_rate": 0.0002117270341207349, |
| "loss": 3.1131, |
| "step": 111050 |
| }, |
| { |
| "epoch": 32.381588152985074, |
| "grad_norm": 0.47986745834350586, |
| "learning_rate": 0.00021155205599300084, |
| "loss": 3.1077, |
| "step": 111100 |
| }, |
| { |
| "epoch": 32.396163712686565, |
| "grad_norm": 0.46111610531806946, |
| "learning_rate": 0.00021137707786526684, |
| "loss": 3.1129, |
| "step": 111150 |
| }, |
| { |
| "epoch": 32.41073927238806, |
| "grad_norm": 0.439547061920166, |
| "learning_rate": 0.00021120209973753278, |
| "loss": 3.1247, |
| "step": 111200 |
| }, |
| { |
| "epoch": 32.425314832089555, |
| "grad_norm": 0.466490238904953, |
| "learning_rate": 0.00021102712160979875, |
| "loss": 3.1274, |
| "step": 111250 |
| }, |
| { |
| "epoch": 32.439890391791046, |
| "grad_norm": 0.48718830943107605, |
| "learning_rate": 0.00021085214348206472, |
| "loss": 3.1277, |
| "step": 111300 |
| }, |
| { |
| "epoch": 32.45446595149254, |
| "grad_norm": 0.47072693705558777, |
| "learning_rate": 0.0002106771653543307, |
| "loss": 3.1258, |
| "step": 111350 |
| }, |
| { |
| "epoch": 32.46904151119403, |
| "grad_norm": 0.47657322883605957, |
| "learning_rate": 0.00021050218722659666, |
| "loss": 3.1353, |
| "step": 111400 |
| }, |
| { |
| "epoch": 32.48361707089552, |
| "grad_norm": 0.49400582909584045, |
| "learning_rate": 0.00021032720909886264, |
| "loss": 3.1181, |
| "step": 111450 |
| }, |
| { |
| "epoch": 32.49819263059702, |
| "grad_norm": 0.47606533765792847, |
| "learning_rate": 0.00021015223097112858, |
| "loss": 3.1306, |
| "step": 111500 |
| }, |
| { |
| "epoch": 32.51276819029851, |
| "grad_norm": 0.4946952164173126, |
| "learning_rate": 0.00020997725284339458, |
| "loss": 3.1323, |
| "step": 111550 |
| }, |
| { |
| "epoch": 32.52734375, |
| "grad_norm": 0.45804789662361145, |
| "learning_rate": 0.00020980227471566052, |
| "loss": 3.121, |
| "step": 111600 |
| }, |
| { |
| "epoch": 32.54191930970149, |
| "grad_norm": 0.4503360986709595, |
| "learning_rate": 0.00020962729658792646, |
| "loss": 3.1391, |
| "step": 111650 |
| }, |
| { |
| "epoch": 32.55649486940298, |
| "grad_norm": 0.49889054894447327, |
| "learning_rate": 0.00020945231846019246, |
| "loss": 3.1246, |
| "step": 111700 |
| }, |
| { |
| "epoch": 32.57107042910448, |
| "grad_norm": 0.46974316239356995, |
| "learning_rate": 0.0002092773403324584, |
| "loss": 3.1442, |
| "step": 111750 |
| }, |
| { |
| "epoch": 32.58564598880597, |
| "grad_norm": 0.46693581342697144, |
| "learning_rate": 0.0002091023622047244, |
| "loss": 3.139, |
| "step": 111800 |
| }, |
| { |
| "epoch": 32.60022154850746, |
| "grad_norm": 0.46260032057762146, |
| "learning_rate": 0.00020892738407699035, |
| "loss": 3.1357, |
| "step": 111850 |
| }, |
| { |
| "epoch": 32.614797108208954, |
| "grad_norm": 0.47752782702445984, |
| "learning_rate": 0.00020875240594925632, |
| "loss": 3.1336, |
| "step": 111900 |
| }, |
| { |
| "epoch": 32.629372667910445, |
| "grad_norm": 0.5107750296592712, |
| "learning_rate": 0.0002085774278215223, |
| "loss": 3.1347, |
| "step": 111950 |
| }, |
| { |
| "epoch": 32.64394822761194, |
| "grad_norm": 0.4821491539478302, |
| "learning_rate": 0.00020840244969378826, |
| "loss": 3.1327, |
| "step": 112000 |
| }, |
| { |
| "epoch": 32.64394822761194, |
| "eval_accuracy": 0.3742285112148126, |
| "eval_loss": 3.548354387283325, |
| "eval_runtime": 80.9257, |
| "eval_samples_per_second": 205.46, |
| "eval_steps_per_second": 12.851, |
| "step": 112000 |
| }, |
| { |
| "epoch": 32.658523787313435, |
| "grad_norm": 0.4635300934314728, |
| "learning_rate": 0.00020822747156605423, |
| "loss": 3.1473, |
| "step": 112050 |
| }, |
| { |
| "epoch": 32.673099347014926, |
| "grad_norm": 0.4636472761631012, |
| "learning_rate": 0.0002080524934383202, |
| "loss": 3.1355, |
| "step": 112100 |
| }, |
| { |
| "epoch": 32.68767490671642, |
| "grad_norm": 0.49074453115463257, |
| "learning_rate": 0.00020787751531058614, |
| "loss": 3.1478, |
| "step": 112150 |
| }, |
| { |
| "epoch": 32.70225046641791, |
| "grad_norm": 0.4680987000465393, |
| "learning_rate": 0.00020770253718285214, |
| "loss": 3.1419, |
| "step": 112200 |
| }, |
| { |
| "epoch": 32.716826026119406, |
| "grad_norm": 0.46409615874290466, |
| "learning_rate": 0.00020752755905511808, |
| "loss": 3.1408, |
| "step": 112250 |
| }, |
| { |
| "epoch": 32.7314015858209, |
| "grad_norm": 0.528548538684845, |
| "learning_rate": 0.00020735258092738408, |
| "loss": 3.1428, |
| "step": 112300 |
| }, |
| { |
| "epoch": 32.74597714552239, |
| "grad_norm": 0.4838976263999939, |
| "learning_rate": 0.00020717760279965002, |
| "loss": 3.1356, |
| "step": 112350 |
| }, |
| { |
| "epoch": 32.76055270522388, |
| "grad_norm": 0.4824446737766266, |
| "learning_rate": 0.000207002624671916, |
| "loss": 3.138, |
| "step": 112400 |
| }, |
| { |
| "epoch": 32.77512826492537, |
| "grad_norm": 0.48765483498573303, |
| "learning_rate": 0.00020682764654418197, |
| "loss": 3.15, |
| "step": 112450 |
| }, |
| { |
| "epoch": 32.78970382462687, |
| "grad_norm": 0.47936880588531494, |
| "learning_rate": 0.00020665266841644794, |
| "loss": 3.1503, |
| "step": 112500 |
| }, |
| { |
| "epoch": 32.80427938432836, |
| "grad_norm": 0.48564252257347107, |
| "learning_rate": 0.00020647769028871388, |
| "loss": 3.1397, |
| "step": 112550 |
| }, |
| { |
| "epoch": 32.81885494402985, |
| "grad_norm": 0.4732897877693176, |
| "learning_rate": 0.00020630271216097985, |
| "loss": 3.1484, |
| "step": 112600 |
| }, |
| { |
| "epoch": 32.83343050373134, |
| "grad_norm": 0.474088579416275, |
| "learning_rate": 0.00020612773403324582, |
| "loss": 3.1595, |
| "step": 112650 |
| }, |
| { |
| "epoch": 32.848006063432834, |
| "grad_norm": 0.4689086079597473, |
| "learning_rate": 0.0002059527559055118, |
| "loss": 3.1495, |
| "step": 112700 |
| }, |
| { |
| "epoch": 32.862581623134325, |
| "grad_norm": 0.463306725025177, |
| "learning_rate": 0.00020577777777777776, |
| "loss": 3.1528, |
| "step": 112750 |
| }, |
| { |
| "epoch": 32.87715718283582, |
| "grad_norm": 0.4703214466571808, |
| "learning_rate": 0.0002056027996500437, |
| "loss": 3.1478, |
| "step": 112800 |
| }, |
| { |
| "epoch": 32.891732742537314, |
| "grad_norm": 0.4781394302845001, |
| "learning_rate": 0.0002054278215223097, |
| "loss": 3.1519, |
| "step": 112850 |
| }, |
| { |
| "epoch": 32.906308302238806, |
| "grad_norm": 0.4915687143802643, |
| "learning_rate": 0.00020525284339457565, |
| "loss": 3.1538, |
| "step": 112900 |
| }, |
| { |
| "epoch": 32.9208838619403, |
| "grad_norm": 0.45098525285720825, |
| "learning_rate": 0.00020507786526684164, |
| "loss": 3.1422, |
| "step": 112950 |
| }, |
| { |
| "epoch": 32.93545942164179, |
| "grad_norm": 0.492906779050827, |
| "learning_rate": 0.0002049028871391076, |
| "loss": 3.1514, |
| "step": 113000 |
| }, |
| { |
| "epoch": 32.93545942164179, |
| "eval_accuracy": 0.37444248426715465, |
| "eval_loss": 3.546422004699707, |
| "eval_runtime": 80.9923, |
| "eval_samples_per_second": 205.291, |
| "eval_steps_per_second": 12.841, |
| "step": 113000 |
| }, |
| { |
| "epoch": 32.950034981343286, |
| "grad_norm": 0.46875256299972534, |
| "learning_rate": 0.00020472790901137356, |
| "loss": 3.1516, |
| "step": 113050 |
| }, |
| { |
| "epoch": 32.96461054104478, |
| "grad_norm": 0.45780235528945923, |
| "learning_rate": 0.00020455293088363953, |
| "loss": 3.1617, |
| "step": 113100 |
| }, |
| { |
| "epoch": 32.97918610074627, |
| "grad_norm": 0.466714471578598, |
| "learning_rate": 0.0002043779527559055, |
| "loss": 3.1532, |
| "step": 113150 |
| }, |
| { |
| "epoch": 32.99376166044776, |
| "grad_norm": 0.47221630811691284, |
| "learning_rate": 0.00020420297462817144, |
| "loss": 3.1665, |
| "step": 113200 |
| }, |
| { |
| "epoch": 33.008162313432834, |
| "grad_norm": 0.46910685300827026, |
| "learning_rate": 0.00020402799650043744, |
| "loss": 3.0957, |
| "step": 113250 |
| }, |
| { |
| "epoch": 33.022737873134325, |
| "grad_norm": 0.47130286693573, |
| "learning_rate": 0.00020385301837270338, |
| "loss": 3.0662, |
| "step": 113300 |
| }, |
| { |
| "epoch": 33.03731343283582, |
| "grad_norm": 0.48261669278144836, |
| "learning_rate": 0.00020367804024496938, |
| "loss": 3.0748, |
| "step": 113350 |
| }, |
| { |
| "epoch": 33.051888992537314, |
| "grad_norm": 0.46619370579719543, |
| "learning_rate": 0.00020350306211723533, |
| "loss": 3.0783, |
| "step": 113400 |
| }, |
| { |
| "epoch": 33.066464552238806, |
| "grad_norm": 0.4570696949958801, |
| "learning_rate": 0.00020332808398950127, |
| "loss": 3.0846, |
| "step": 113450 |
| }, |
| { |
| "epoch": 33.0810401119403, |
| "grad_norm": 0.4632129967212677, |
| "learning_rate": 0.00020315310586176727, |
| "loss": 3.0783, |
| "step": 113500 |
| }, |
| { |
| "epoch": 33.09561567164179, |
| "grad_norm": 0.46962153911590576, |
| "learning_rate": 0.0002029781277340332, |
| "loss": 3.0921, |
| "step": 113550 |
| }, |
| { |
| "epoch": 33.110191231343286, |
| "grad_norm": 0.4779681861400604, |
| "learning_rate": 0.0002028031496062992, |
| "loss": 3.0828, |
| "step": 113600 |
| }, |
| { |
| "epoch": 33.12476679104478, |
| "grad_norm": 0.48863279819488525, |
| "learning_rate": 0.00020262817147856515, |
| "loss": 3.0816, |
| "step": 113650 |
| }, |
| { |
| "epoch": 33.13934235074627, |
| "grad_norm": 0.4736406207084656, |
| "learning_rate": 0.00020245319335083112, |
| "loss": 3.0919, |
| "step": 113700 |
| }, |
| { |
| "epoch": 33.15391791044776, |
| "grad_norm": 0.49636760354042053, |
| "learning_rate": 0.0002022782152230971, |
| "loss": 3.0978, |
| "step": 113750 |
| }, |
| { |
| "epoch": 33.16849347014925, |
| "grad_norm": 0.48014184832572937, |
| "learning_rate": 0.00020210323709536306, |
| "loss": 3.0944, |
| "step": 113800 |
| }, |
| { |
| "epoch": 33.18306902985075, |
| "grad_norm": 0.4676609933376312, |
| "learning_rate": 0.000201928258967629, |
| "loss": 3.0811, |
| "step": 113850 |
| }, |
| { |
| "epoch": 33.19764458955224, |
| "grad_norm": 0.46274760365486145, |
| "learning_rate": 0.000201753280839895, |
| "loss": 3.1013, |
| "step": 113900 |
| }, |
| { |
| "epoch": 33.21222014925373, |
| "grad_norm": 0.47959813475608826, |
| "learning_rate": 0.00020157830271216095, |
| "loss": 3.1003, |
| "step": 113950 |
| }, |
| { |
| "epoch": 33.22679570895522, |
| "grad_norm": 0.49425938725471497, |
| "learning_rate": 0.00020140332458442695, |
| "loss": 3.1022, |
| "step": 114000 |
| }, |
| { |
| "epoch": 33.22679570895522, |
| "eval_accuracy": 0.3739919403483618, |
| "eval_loss": 3.5567288398742676, |
| "eval_runtime": 81.0289, |
| "eval_samples_per_second": 205.198, |
| "eval_steps_per_second": 12.835, |
| "step": 114000 |
| }, |
| { |
| "epoch": 33.241371268656714, |
| "grad_norm": 0.49249497056007385, |
| "learning_rate": 0.0002012283464566929, |
| "loss": 3.0883, |
| "step": 114050 |
| }, |
| { |
| "epoch": 33.25594682835821, |
| "grad_norm": 0.5082933902740479, |
| "learning_rate": 0.0002010533683289589, |
| "loss": 3.0939, |
| "step": 114100 |
| }, |
| { |
| "epoch": 33.2705223880597, |
| "grad_norm": 0.46832650899887085, |
| "learning_rate": 0.00020087839020122483, |
| "loss": 3.1014, |
| "step": 114150 |
| }, |
| { |
| "epoch": 33.285097947761194, |
| "grad_norm": 0.4847492575645447, |
| "learning_rate": 0.0002007034120734908, |
| "loss": 3.1044, |
| "step": 114200 |
| }, |
| { |
| "epoch": 33.299673507462686, |
| "grad_norm": 0.46897026896476746, |
| "learning_rate": 0.00020052843394575677, |
| "loss": 3.099, |
| "step": 114250 |
| }, |
| { |
| "epoch": 33.31424906716418, |
| "grad_norm": 0.48055335879325867, |
| "learning_rate": 0.00020035345581802274, |
| "loss": 3.106, |
| "step": 114300 |
| }, |
| { |
| "epoch": 33.328824626865675, |
| "grad_norm": 0.5022363066673279, |
| "learning_rate": 0.00020017847769028869, |
| "loss": 3.1086, |
| "step": 114350 |
| }, |
| { |
| "epoch": 33.343400186567166, |
| "grad_norm": 0.4915386140346527, |
| "learning_rate": 0.00020000349956255466, |
| "loss": 3.1036, |
| "step": 114400 |
| }, |
| { |
| "epoch": 33.35797574626866, |
| "grad_norm": 0.4777482748031616, |
| "learning_rate": 0.00019982852143482063, |
| "loss": 3.1066, |
| "step": 114450 |
| }, |
| { |
| "epoch": 33.37255130597015, |
| "grad_norm": 0.46391403675079346, |
| "learning_rate": 0.00019965354330708657, |
| "loss": 3.1072, |
| "step": 114500 |
| }, |
| { |
| "epoch": 33.38712686567164, |
| "grad_norm": 0.48125559091567993, |
| "learning_rate": 0.00019947856517935257, |
| "loss": 3.1137, |
| "step": 114550 |
| }, |
| { |
| "epoch": 33.40170242537314, |
| "grad_norm": 0.4890342354774475, |
| "learning_rate": 0.0001993035870516185, |
| "loss": 3.1129, |
| "step": 114600 |
| }, |
| { |
| "epoch": 33.41627798507463, |
| "grad_norm": 0.4634007513523102, |
| "learning_rate": 0.0001991286089238845, |
| "loss": 3.1203, |
| "step": 114650 |
| }, |
| { |
| "epoch": 33.43085354477612, |
| "grad_norm": 0.5099166035652161, |
| "learning_rate": 0.00019895363079615045, |
| "loss": 3.1126, |
| "step": 114700 |
| }, |
| { |
| "epoch": 33.44542910447761, |
| "grad_norm": 0.48662975430488586, |
| "learning_rate": 0.00019877865266841645, |
| "loss": 3.1255, |
| "step": 114750 |
| }, |
| { |
| "epoch": 33.4600046641791, |
| "grad_norm": 0.46207714080810547, |
| "learning_rate": 0.0001986036745406824, |
| "loss": 3.1308, |
| "step": 114800 |
| }, |
| { |
| "epoch": 33.474580223880594, |
| "grad_norm": 0.4985235035419464, |
| "learning_rate": 0.00019842869641294836, |
| "loss": 3.125, |
| "step": 114850 |
| }, |
| { |
| "epoch": 33.48915578358209, |
| "grad_norm": 0.511412501335144, |
| "learning_rate": 0.00019825371828521433, |
| "loss": 3.116, |
| "step": 114900 |
| }, |
| { |
| "epoch": 33.50373134328358, |
| "grad_norm": 0.47149890661239624, |
| "learning_rate": 0.0001980787401574803, |
| "loss": 3.1232, |
| "step": 114950 |
| }, |
| { |
| "epoch": 33.518306902985074, |
| "grad_norm": 0.48918309807777405, |
| "learning_rate": 0.00019790376202974625, |
| "loss": 3.1056, |
| "step": 115000 |
| }, |
| { |
| "epoch": 33.518306902985074, |
| "eval_accuracy": 0.37393838823680203, |
| "eval_loss": 3.5551340579986572, |
| "eval_runtime": 81.0751, |
| "eval_samples_per_second": 205.082, |
| "eval_steps_per_second": 12.828, |
| "step": 115000 |
| }, |
| { |
| "epoch": 33.532882462686565, |
| "grad_norm": 0.48534640669822693, |
| "learning_rate": 0.00019772878390201225, |
| "loss": 3.1205, |
| "step": 115050 |
| }, |
| { |
| "epoch": 33.54745802238806, |
| "grad_norm": 0.46837061643600464, |
| "learning_rate": 0.0001975538057742782, |
| "loss": 3.1311, |
| "step": 115100 |
| }, |
| { |
| "epoch": 33.562033582089555, |
| "grad_norm": 0.4587228000164032, |
| "learning_rate": 0.0001973788276465442, |
| "loss": 3.1203, |
| "step": 115150 |
| }, |
| { |
| "epoch": 33.576609141791046, |
| "grad_norm": 0.4965645670890808, |
| "learning_rate": 0.00019720384951881013, |
| "loss": 3.1255, |
| "step": 115200 |
| }, |
| { |
| "epoch": 33.59118470149254, |
| "grad_norm": 0.48364168405532837, |
| "learning_rate": 0.00019702887139107607, |
| "loss": 3.134, |
| "step": 115250 |
| }, |
| { |
| "epoch": 33.60576026119403, |
| "grad_norm": 0.46811044216156006, |
| "learning_rate": 0.00019685389326334207, |
| "loss": 3.1181, |
| "step": 115300 |
| }, |
| { |
| "epoch": 33.62033582089552, |
| "grad_norm": 0.48604077100753784, |
| "learning_rate": 0.00019667891513560802, |
| "loss": 3.1307, |
| "step": 115350 |
| }, |
| { |
| "epoch": 33.63491138059702, |
| "grad_norm": 0.4842829406261444, |
| "learning_rate": 0.00019650393700787401, |
| "loss": 3.1237, |
| "step": 115400 |
| }, |
| { |
| "epoch": 33.64948694029851, |
| "grad_norm": 0.45538443326950073, |
| "learning_rate": 0.00019632895888013996, |
| "loss": 3.1286, |
| "step": 115450 |
| }, |
| { |
| "epoch": 33.6640625, |
| "grad_norm": 0.49470046162605286, |
| "learning_rate": 0.00019615398075240593, |
| "loss": 3.1251, |
| "step": 115500 |
| }, |
| { |
| "epoch": 33.67863805970149, |
| "grad_norm": 0.4743928015232086, |
| "learning_rate": 0.0001959790026246719, |
| "loss": 3.1342, |
| "step": 115550 |
| }, |
| { |
| "epoch": 33.69321361940298, |
| "grad_norm": 0.4796803295612335, |
| "learning_rate": 0.00019580402449693787, |
| "loss": 3.1392, |
| "step": 115600 |
| }, |
| { |
| "epoch": 33.70778917910448, |
| "grad_norm": 0.5133796334266663, |
| "learning_rate": 0.0001956290463692038, |
| "loss": 3.1432, |
| "step": 115650 |
| }, |
| { |
| "epoch": 33.72236473880597, |
| "grad_norm": 0.5081549882888794, |
| "learning_rate": 0.0001954540682414698, |
| "loss": 3.1458, |
| "step": 115700 |
| }, |
| { |
| "epoch": 33.73694029850746, |
| "grad_norm": 0.4586086571216583, |
| "learning_rate": 0.00019527909011373575, |
| "loss": 3.1322, |
| "step": 115750 |
| }, |
| { |
| "epoch": 33.751515858208954, |
| "grad_norm": 0.48370683193206787, |
| "learning_rate": 0.00019510411198600175, |
| "loss": 3.1351, |
| "step": 115800 |
| }, |
| { |
| "epoch": 33.766091417910445, |
| "grad_norm": 0.4725615382194519, |
| "learning_rate": 0.0001949291338582677, |
| "loss": 3.1196, |
| "step": 115850 |
| }, |
| { |
| "epoch": 33.78066697761194, |
| "grad_norm": 0.4887787103652954, |
| "learning_rate": 0.00019475415573053367, |
| "loss": 3.1356, |
| "step": 115900 |
| }, |
| { |
| "epoch": 33.795242537313435, |
| "grad_norm": 0.4757724702358246, |
| "learning_rate": 0.00019457917760279964, |
| "loss": 3.1503, |
| "step": 115950 |
| }, |
| { |
| "epoch": 33.809818097014926, |
| "grad_norm": 0.45303794741630554, |
| "learning_rate": 0.0001944041994750656, |
| "loss": 3.1411, |
| "step": 116000 |
| }, |
| { |
| "epoch": 33.809818097014926, |
| "eval_accuracy": 0.37475544045317094, |
| "eval_loss": 3.541987657546997, |
| "eval_runtime": 81.014, |
| "eval_samples_per_second": 205.236, |
| "eval_steps_per_second": 12.837, |
| "step": 116000 |
| }, |
| { |
| "epoch": 33.809818097014926, |
| "step": 116000, |
| "total_flos": 2.424310847373312e+18, |
| "train_loss": 0.9799438093777361, |
| "train_runtime": 26721.6103, |
| "train_samples_per_second": 513.483, |
| "train_steps_per_second": 6.42 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 171550, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 20, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 14 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.424310847373312e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|