diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18912 @@ +{ + "best_metric": 1.00823629, + "best_model_checkpoint": "/mnt/bn/haiyang-dataset-lq/medical/outputclass/qwen2-vl-2b-instruct/v5-20241113-121646/checkpoint-9430", + "epoch": 10.0, + "eval_steps": 10000, + "global_step": 9430, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "acc": 0.91473788, + "epoch": 0.0010604453870625664, + "grad_norm": 13.708544731140137, + "learning_rate": 0.0, + "loss": 0.25200555, + "memory(GiB)": 22.83, + "step": 1, + "train_speed(iter/s)": 0.097186 + }, + { + "acc": 0.91393471, + "epoch": 0.005302226935312832, + "grad_norm": 11.364119529724121, + "learning_rate": 2.6140058561220443e-06, + "loss": 0.25148368, + "memory(GiB)": 26.31, + "step": 5, + "train_speed(iter/s)": 0.291554 + }, + { + "acc": 0.93851728, + "epoch": 0.010604453870625663, + "grad_norm": 8.12688159942627, + "learning_rate": 3.73979690102038e-06, + "loss": 0.1808401, + "memory(GiB)": 26.31, + "step": 10, + "train_speed(iter/s)": 0.389217 + }, + { + "acc": 0.95571146, + "epoch": 0.015906680805938492, + "grad_norm": 4.948174476623535, + "learning_rate": 4.398342445933593e-06, + "loss": 0.13787798, + "memory(GiB)": 26.31, + "step": 15, + "train_speed(iter/s)": 0.438702 + }, + { + "acc": 0.96221857, + "epoch": 0.021208907741251327, + "grad_norm": 2.2628302574157715, + "learning_rate": 4.865587945918714e-06, + "loss": 0.11728274, + "memory(GiB)": 26.31, + "step": 20, + "train_speed(iter/s)": 0.46779 + }, + { + "acc": 0.96878824, + "epoch": 0.026511134676564158, + "grad_norm": 1.461674451828003, + "learning_rate": 5.2280117122440885e-06, + "loss": 0.08844265, + "memory(GiB)": 26.31, + "step": 25, + "train_speed(iter/s)": 0.4881 + }, + { + "acc": 0.97463093, + "epoch": 0.031813361611876985, + "grad_norm": 1.4498542547225952, + "learning_rate": 5.524133490831929e-06, + "loss": 0.07447982, + "memory(GiB)": 26.31, + "step": 30, + "train_speed(iter/s)": 0.501732 + }, + { + "acc": 0.97848167, + "epoch": 0.03711558854718982, + "grad_norm": 1.6475602388381958, + "learning_rate": 5.774500887225759e-06, + "loss": 0.06788331, + "memory(GiB)": 26.31, + "step": 35, + "train_speed(iter/s)": 0.512 + }, + { + "acc": 0.97805767, + "epoch": 0.042417815482502653, + "grad_norm": 1.8446458578109741, + "learning_rate": 5.99137899081705e-06, + "loss": 0.06491201, + "memory(GiB)": 26.31, + "step": 40, + "train_speed(iter/s)": 0.520488 + }, + { + "acc": 0.97699718, + "epoch": 0.04772004241781548, + "grad_norm": 1.640552043914795, + "learning_rate": 6.182679035745141e-06, + "loss": 0.06084052, + "memory(GiB)": 26.31, + "step": 45, + "train_speed(iter/s)": 0.527455 + }, + { + "acc": 0.98118534, + "epoch": 0.053022269353128315, + "grad_norm": 1.425261378288269, + "learning_rate": 6.353802757142424e-06, + "loss": 0.05228539, + "memory(GiB)": 26.31, + "step": 50, + "train_speed(iter/s)": 0.532614 + }, + { + "acc": 0.98107777, + "epoch": 0.05832449628844114, + "grad_norm": 1.407269835472107, + "learning_rate": 6.508602992822067e-06, + "loss": 0.05452412, + "memory(GiB)": 26.31, + "step": 55, + "train_speed(iter/s)": 0.536881 + }, + { + "acc": 0.98615017, + "epoch": 0.06362672322375397, + "grad_norm": 1.4538545608520508, + "learning_rate": 6.649924535730263e-06, + "loss": 0.0399454, + "memory(GiB)": 26.31, + "step": 60, + "train_speed(iter/s)": 0.541033 + }, + { + "acc": 0.98120651, + "epoch": 0.0689289501590668, + "grad_norm": 2.002455711364746, + "learning_rate": 6.7799277529914054e-06, + "loss": 0.05406591, + "memory(GiB)": 26.31, + "step": 65, + "train_speed(iter/s)": 0.544033 + }, + { + "acc": 0.98403168, + "epoch": 0.07423117709437964, + "grad_norm": 1.1853930950164795, + "learning_rate": 6.900291932124094e-06, + "loss": 0.04809659, + "memory(GiB)": 26.31, + "step": 70, + "train_speed(iter/s)": 0.546646 + }, + { + "acc": 0.98352365, + "epoch": 0.07953340402969247, + "grad_norm": 1.3366847038269043, + "learning_rate": 7.012348302055637e-06, + "loss": 0.05014178, + "memory(GiB)": 26.31, + "step": 75, + "train_speed(iter/s)": 0.549242 + }, + { + "acc": 0.98384953, + "epoch": 0.08483563096500531, + "grad_norm": 1.5215312242507935, + "learning_rate": 7.117170035715385e-06, + "loss": 0.04720557, + "memory(GiB)": 26.31, + "step": 80, + "train_speed(iter/s)": 0.551303 + }, + { + "acc": 0.98184252, + "epoch": 0.09013785790031813, + "grad_norm": 1.1831401586532593, + "learning_rate": 7.215634919156383e-06, + "loss": 0.04520521, + "memory(GiB)": 26.31, + "step": 85, + "train_speed(iter/s)": 0.55303 + }, + { + "acc": 0.98633242, + "epoch": 0.09544008483563096, + "grad_norm": 0.7181859612464905, + "learning_rate": 7.308470080643477e-06, + "loss": 0.03698838, + "memory(GiB)": 26.31, + "step": 90, + "train_speed(iter/s)": 0.554866 + }, + { + "acc": 0.98487539, + "epoch": 0.1007423117709438, + "grad_norm": 0.8339945077896118, + "learning_rate": 7.396284610134086e-06, + "loss": 0.04118965, + "memory(GiB)": 26.31, + "step": 95, + "train_speed(iter/s)": 0.55628 + }, + { + "acc": 0.98523464, + "epoch": 0.10604453870625663, + "grad_norm": 1.019827961921692, + "learning_rate": 7.47959380204076e-06, + "loss": 0.04264031, + "memory(GiB)": 26.31, + "step": 100, + "train_speed(iter/s)": 0.557577 + }, + { + "acc": 0.98460913, + "epoch": 0.11134676564156946, + "grad_norm": 1.2514287233352661, + "learning_rate": 7.558837477037307e-06, + "loss": 0.04326624, + "memory(GiB)": 26.31, + "step": 105, + "train_speed(iter/s)": 0.558938 + }, + { + "acc": 0.98482342, + "epoch": 0.11664899257688228, + "grad_norm": 1.3463575839996338, + "learning_rate": 7.634394037720401e-06, + "loss": 0.04483083, + "memory(GiB)": 26.31, + "step": 110, + "train_speed(iter/s)": 0.560167 + }, + { + "acc": 0.98519001, + "epoch": 0.12195121951219512, + "grad_norm": 1.130424976348877, + "learning_rate": 7.706591397293826e-06, + "loss": 0.04231756, + "memory(GiB)": 26.31, + "step": 115, + "train_speed(iter/s)": 0.561101 + }, + { + "acc": 0.98767662, + "epoch": 0.12725344644750794, + "grad_norm": 1.0245696306228638, + "learning_rate": 7.775715580628599e-06, + "loss": 0.03614664, + "memory(GiB)": 26.31, + "step": 120, + "train_speed(iter/s)": 0.561979 + }, + { + "acc": 0.98472729, + "epoch": 0.1325556733828208, + "grad_norm": 1.5159035921096802, + "learning_rate": 7.842017568366133e-06, + "loss": 0.04319248, + "memory(GiB)": 26.31, + "step": 125, + "train_speed(iter/s)": 0.562763 + }, + { + "acc": 0.98606205, + "epoch": 0.1378579003181336, + "grad_norm": 1.2546530961990356, + "learning_rate": 7.90571879788974e-06, + "loss": 0.04082851, + "memory(GiB)": 26.31, + "step": 130, + "train_speed(iter/s)": 0.56355 + }, + { + "acc": 0.98801918, + "epoch": 0.14316012725344646, + "grad_norm": 0.9412282109260559, + "learning_rate": 7.96701562555669e-06, + "loss": 0.03775344, + "memory(GiB)": 26.31, + "step": 135, + "train_speed(iter/s)": 0.56423 + }, + { + "acc": 0.98443604, + "epoch": 0.14846235418875928, + "grad_norm": 1.3335416316986084, + "learning_rate": 8.02608297702243e-06, + "loss": 0.04015646, + "memory(GiB)": 26.31, + "step": 140, + "train_speed(iter/s)": 0.564868 + }, + { + "acc": 0.98708563, + "epoch": 0.1537645811240721, + "grad_norm": 1.6027027368545532, + "learning_rate": 8.083077356722968e-06, + "loss": 0.04044313, + "memory(GiB)": 26.31, + "step": 145, + "train_speed(iter/s)": 0.565465 + }, + { + "acc": 0.98780003, + "epoch": 0.15906680805938495, + "grad_norm": 1.1246064901351929, + "learning_rate": 8.138139346953973e-06, + "loss": 0.03837634, + "memory(GiB)": 26.31, + "step": 150, + "train_speed(iter/s)": 0.566141 + }, + { + "acc": 0.98864136, + "epoch": 0.16436903499469777, + "grad_norm": 1.0349862575531006, + "learning_rate": 8.191395697023962e-06, + "loss": 0.03336571, + "memory(GiB)": 26.31, + "step": 155, + "train_speed(iter/s)": 0.566841 + }, + { + "acc": 0.98549376, + "epoch": 0.16967126193001061, + "grad_norm": 1.4518648386001587, + "learning_rate": 8.24296108061372e-06, + "loss": 0.04370693, + "memory(GiB)": 26.31, + "step": 160, + "train_speed(iter/s)": 0.567431 + }, + { + "acc": 0.98450546, + "epoch": 0.17497348886532343, + "grad_norm": 1.8398545980453491, + "learning_rate": 8.292939582633615e-06, + "loss": 0.04571437, + "memory(GiB)": 26.31, + "step": 165, + "train_speed(iter/s)": 0.567932 + }, + { + "acc": 0.98669834, + "epoch": 0.18027571580063625, + "grad_norm": 1.194319248199463, + "learning_rate": 8.341425964054718e-06, + "loss": 0.03776242, + "memory(GiB)": 26.31, + "step": 170, + "train_speed(iter/s)": 0.568358 + }, + { + "acc": 0.9872467, + "epoch": 0.1855779427359491, + "grad_norm": 1.2717541456222534, + "learning_rate": 8.388506743347804e-06, + "loss": 0.03852434, + "memory(GiB)": 26.31, + "step": 175, + "train_speed(iter/s)": 0.568766 + }, + { + "acc": 0.98648386, + "epoch": 0.19088016967126192, + "grad_norm": 1.1042882204055786, + "learning_rate": 8.434261125541812e-06, + "loss": 0.0422026, + "memory(GiB)": 26.31, + "step": 180, + "train_speed(iter/s)": 0.569335 + }, + { + "acc": 0.98854618, + "epoch": 0.19618239660657477, + "grad_norm": 1.5004727840423584, + "learning_rate": 8.478761803962609e-06, + "loss": 0.03735459, + "memory(GiB)": 26.31, + "step": 185, + "train_speed(iter/s)": 0.56973 + }, + { + "acc": 0.98710842, + "epoch": 0.2014846235418876, + "grad_norm": 0.9797543883323669, + "learning_rate": 8.52207565503242e-06, + "loss": 0.03711139, + "memory(GiB)": 26.31, + "step": 190, + "train_speed(iter/s)": 0.570088 + }, + { + "acc": 0.98806124, + "epoch": 0.2067868504772004, + "grad_norm": 0.6779474020004272, + "learning_rate": 8.564264342802955e-06, + "loss": 0.03549838, + "memory(GiB)": 26.31, + "step": 195, + "train_speed(iter/s)": 0.570432 + }, + { + "acc": 0.9885643, + "epoch": 0.21208907741251326, + "grad_norm": 0.9963794350624084, + "learning_rate": 8.605384846939095e-06, + "loss": 0.0340589, + "memory(GiB)": 26.31, + "step": 200, + "train_speed(iter/s)": 0.570722 + }, + { + "acc": 0.98800755, + "epoch": 0.21739130434782608, + "grad_norm": 0.8053348660469055, + "learning_rate": 8.645489925498207e-06, + "loss": 0.03213402, + "memory(GiB)": 26.31, + "step": 205, + "train_speed(iter/s)": 0.571029 + }, + { + "acc": 0.98844814, + "epoch": 0.22269353128313893, + "grad_norm": 0.9053013920783997, + "learning_rate": 8.684628521935642e-06, + "loss": 0.03458236, + "memory(GiB)": 26.31, + "step": 210, + "train_speed(iter/s)": 0.571289 + }, + { + "acc": 0.98876429, + "epoch": 0.22799575821845175, + "grad_norm": 0.9718145728111267, + "learning_rate": 8.722846124213128e-06, + "loss": 0.03245977, + "memory(GiB)": 26.31, + "step": 215, + "train_speed(iter/s)": 0.571635 + }, + { + "acc": 0.9877038, + "epoch": 0.23329798515376457, + "grad_norm": 0.8481668829917908, + "learning_rate": 8.760185082618737e-06, + "loss": 0.03817012, + "memory(GiB)": 26.31, + "step": 220, + "train_speed(iter/s)": 0.571873 + }, + { + "acc": 0.98829918, + "epoch": 0.23860021208907742, + "grad_norm": 0.8947212100028992, + "learning_rate": 8.796684891867187e-06, + "loss": 0.03497087, + "memory(GiB)": 26.31, + "step": 225, + "train_speed(iter/s)": 0.572187 + }, + { + "acc": 0.98882122, + "epoch": 0.24390243902439024, + "grad_norm": 0.9148950576782227, + "learning_rate": 8.83238244219216e-06, + "loss": 0.03352001, + "memory(GiB)": 26.31, + "step": 230, + "train_speed(iter/s)": 0.572426 + }, + { + "acc": 0.9899559, + "epoch": 0.2492046659597031, + "grad_norm": 0.8286107182502747, + "learning_rate": 8.867312243432854e-06, + "loss": 0.02834421, + "memory(GiB)": 26.31, + "step": 235, + "train_speed(iter/s)": 0.572807 + }, + { + "acc": 0.98945627, + "epoch": 0.2545068928950159, + "grad_norm": 0.7633819580078125, + "learning_rate": 8.901506625526934e-06, + "loss": 0.03042876, + "memory(GiB)": 26.31, + "step": 240, + "train_speed(iter/s)": 0.573113 + }, + { + "acc": 0.98813782, + "epoch": 0.2598091198303287, + "grad_norm": 1.1705169677734375, + "learning_rate": 8.934995918329474e-06, + "loss": 0.03317465, + "memory(GiB)": 26.31, + "step": 245, + "train_speed(iter/s)": 0.573356 + }, + { + "acc": 0.98917551, + "epoch": 0.2651113467656416, + "grad_norm": 0.8647633194923401, + "learning_rate": 8.967808613264469e-06, + "loss": 0.03340689, + "memory(GiB)": 26.31, + "step": 250, + "train_speed(iter/s)": 0.573591 + }, + { + "acc": 0.98857431, + "epoch": 0.2704135737009544, + "grad_norm": 1.1097129583358765, + "learning_rate": 8.999971508967931e-06, + "loss": 0.03664353, + "memory(GiB)": 26.31, + "step": 255, + "train_speed(iter/s)": 0.573787 + }, + { + "acc": 0.98884392, + "epoch": 0.2757158006362672, + "grad_norm": 0.6874321103096008, + "learning_rate": 9.031509842788075e-06, + "loss": 0.02958034, + "memory(GiB)": 26.31, + "step": 260, + "train_speed(iter/s)": 0.573963 + }, + { + "acc": 0.9894619, + "epoch": 0.28101802757158006, + "grad_norm": 1.0628961324691772, + "learning_rate": 9.062447409759295e-06, + "loss": 0.02991056, + "memory(GiB)": 26.31, + "step": 265, + "train_speed(iter/s)": 0.574138 + }, + { + "acc": 0.98855505, + "epoch": 0.2863202545068929, + "grad_norm": 1.0344219207763672, + "learning_rate": 9.092806670455026e-06, + "loss": 0.03874294, + "memory(GiB)": 26.31, + "step": 270, + "train_speed(iter/s)": 0.574331 + }, + { + "acc": 0.98721523, + "epoch": 0.2916224814422057, + "grad_norm": 1.0998539924621582, + "learning_rate": 9.122608848944111e-06, + "loss": 0.03713042, + "memory(GiB)": 26.31, + "step": 275, + "train_speed(iter/s)": 0.574549 + }, + { + "acc": 0.98913212, + "epoch": 0.29692470837751855, + "grad_norm": 0.7507591247558594, + "learning_rate": 9.151874021920764e-06, + "loss": 0.03179551, + "memory(GiB)": 26.31, + "step": 280, + "train_speed(iter/s)": 0.57478 + }, + { + "acc": 0.98866091, + "epoch": 0.3022269353128314, + "grad_norm": 0.7731961607933044, + "learning_rate": 9.180621199945635e-06, + "loss": 0.03505718, + "memory(GiB)": 26.31, + "step": 285, + "train_speed(iter/s)": 0.574926 + }, + { + "acc": 0.9891468, + "epoch": 0.3075291622481442, + "grad_norm": 1.2755072116851807, + "learning_rate": 9.208868401621302e-06, + "loss": 0.0319423, + "memory(GiB)": 26.31, + "step": 290, + "train_speed(iter/s)": 0.575064 + }, + { + "acc": 0.98575668, + "epoch": 0.31283138918345704, + "grad_norm": 1.1947649717330933, + "learning_rate": 9.236632721427041e-06, + "loss": 0.04200953, + "memory(GiB)": 26.31, + "step": 295, + "train_speed(iter/s)": 0.575254 + }, + { + "acc": 0.98714046, + "epoch": 0.3181336161187699, + "grad_norm": 0.9724829196929932, + "learning_rate": 9.263930391852308e-06, + "loss": 0.03815084, + "memory(GiB)": 26.31, + "step": 300, + "train_speed(iter/s)": 0.575407 + }, + { + "acc": 0.99066544, + "epoch": 0.32343584305408274, + "grad_norm": 1.0233286619186401, + "learning_rate": 9.290776840394537e-06, + "loss": 0.0282487, + "memory(GiB)": 26.31, + "step": 305, + "train_speed(iter/s)": 0.575641 + }, + { + "acc": 0.989046, + "epoch": 0.32873806998939553, + "grad_norm": 0.9235794544219971, + "learning_rate": 9.317186741922297e-06, + "loss": 0.02938327, + "memory(GiB)": 26.31, + "step": 310, + "train_speed(iter/s)": 0.575756 + }, + { + "acc": 0.98994274, + "epoch": 0.3340402969247084, + "grad_norm": 0.9956262707710266, + "learning_rate": 9.343174066848856e-06, + "loss": 0.02921431, + "memory(GiB)": 26.31, + "step": 315, + "train_speed(iter/s)": 0.575954 + }, + { + "acc": 0.98916187, + "epoch": 0.33934252386002123, + "grad_norm": 1.35161292552948, + "learning_rate": 9.368752125512056e-06, + "loss": 0.0287899, + "memory(GiB)": 26.31, + "step": 320, + "train_speed(iter/s)": 0.57606 + }, + { + "acc": 0.98746319, + "epoch": 0.344644750795334, + "grad_norm": 1.062467098236084, + "learning_rate": 9.39393360911345e-06, + "loss": 0.03575181, + "memory(GiB)": 26.31, + "step": 325, + "train_speed(iter/s)": 0.576176 + }, + { + "acc": 0.98824463, + "epoch": 0.34994697773064687, + "grad_norm": 0.9505136609077454, + "learning_rate": 9.41873062753195e-06, + "loss": 0.03047816, + "memory(GiB)": 26.31, + "step": 330, + "train_speed(iter/s)": 0.576314 + }, + { + "acc": 0.9914609, + "epoch": 0.3552492046659597, + "grad_norm": 0.632502019405365, + "learning_rate": 9.443154744293996e-06, + "loss": 0.0260011, + "memory(GiB)": 26.31, + "step": 335, + "train_speed(iter/s)": 0.576483 + }, + { + "acc": 0.98916855, + "epoch": 0.3605514316012725, + "grad_norm": 1.034669280052185, + "learning_rate": 9.467217008953053e-06, + "loss": 0.03403499, + "memory(GiB)": 26.31, + "step": 340, + "train_speed(iter/s)": 0.57658 + }, + { + "acc": 0.98895273, + "epoch": 0.36585365853658536, + "grad_norm": 0.952576756477356, + "learning_rate": 9.490927987105374e-06, + "loss": 0.0317608, + "memory(GiB)": 26.31, + "step": 345, + "train_speed(iter/s)": 0.576739 + }, + { + "acc": 0.98866873, + "epoch": 0.3711558854718982, + "grad_norm": 0.8197150230407715, + "learning_rate": 9.514297788246138e-06, + "loss": 0.03127484, + "memory(GiB)": 26.31, + "step": 350, + "train_speed(iter/s)": 0.576816 + }, + { + "acc": 0.99059963, + "epoch": 0.37645811240721105, + "grad_norm": 0.825374960899353, + "learning_rate": 9.537336091649749e-06, + "loss": 0.02805768, + "memory(GiB)": 26.31, + "step": 355, + "train_speed(iter/s)": 0.576892 + }, + { + "acc": 0.98838682, + "epoch": 0.38176033934252385, + "grad_norm": 0.8780854940414429, + "learning_rate": 9.560052170440148e-06, + "loss": 0.03393146, + "memory(GiB)": 26.31, + "step": 360, + "train_speed(iter/s)": 0.576976 + }, + { + "acc": 0.99057932, + "epoch": 0.3870625662778367, + "grad_norm": 0.5594467520713806, + "learning_rate": 9.582454914000955e-06, + "loss": 0.02473587, + "memory(GiB)": 26.31, + "step": 365, + "train_speed(iter/s)": 0.577044 + }, + { + "acc": 0.99060621, + "epoch": 0.39236479321314954, + "grad_norm": 0.9039445519447327, + "learning_rate": 9.604552848860942e-06, + "loss": 0.02671352, + "memory(GiB)": 26.31, + "step": 370, + "train_speed(iter/s)": 0.577136 + }, + { + "acc": 0.98958836, + "epoch": 0.39766702014846234, + "grad_norm": 1.2218246459960938, + "learning_rate": 9.626354158177683e-06, + "loss": 0.03268912, + "memory(GiB)": 26.31, + "step": 375, + "train_speed(iter/s)": 0.577268 + }, + { + "acc": 0.98990612, + "epoch": 0.4029692470837752, + "grad_norm": 1.1614571809768677, + "learning_rate": 9.647866699930756e-06, + "loss": 0.02865369, + "memory(GiB)": 26.31, + "step": 380, + "train_speed(iter/s)": 0.57737 + }, + { + "acc": 0.99105186, + "epoch": 0.40827147401908803, + "grad_norm": 0.7430466413497925, + "learning_rate": 9.669098023925782e-06, + "loss": 0.02913875, + "memory(GiB)": 26.31, + "step": 385, + "train_speed(iter/s)": 0.577492 + }, + { + "acc": 0.9908083, + "epoch": 0.4135737009544008, + "grad_norm": 0.6815354228019714, + "learning_rate": 9.690055387701289e-06, + "loss": 0.02702239, + "memory(GiB)": 26.31, + "step": 390, + "train_speed(iter/s)": 0.577623 + }, + { + "acc": 0.98966265, + "epoch": 0.4188759278897137, + "grad_norm": 0.7089868783950806, + "learning_rate": 9.710745771422355e-06, + "loss": 0.02905208, + "memory(GiB)": 26.31, + "step": 395, + "train_speed(iter/s)": 0.577742 + }, + { + "acc": 0.99098186, + "epoch": 0.4241781548250265, + "grad_norm": 0.6910232901573181, + "learning_rate": 9.731175891837428e-06, + "loss": 0.02598582, + "memory(GiB)": 26.31, + "step": 400, + "train_speed(iter/s)": 0.577853 + }, + { + "acc": 0.98882151, + "epoch": 0.42948038176033937, + "grad_norm": 1.0936506986618042, + "learning_rate": 9.751352215368239e-06, + "loss": 0.03153276, + "memory(GiB)": 26.31, + "step": 405, + "train_speed(iter/s)": 0.577953 + }, + { + "acc": 0.99092979, + "epoch": 0.43478260869565216, + "grad_norm": 0.7421805262565613, + "learning_rate": 9.771280970396543e-06, + "loss": 0.02831423, + "memory(GiB)": 26.31, + "step": 410, + "train_speed(iter/s)": 0.57803 + }, + { + "acc": 0.98938055, + "epoch": 0.440084835630965, + "grad_norm": 0.9013299345970154, + "learning_rate": 9.790968158806186e-06, + "loss": 0.02966608, + "memory(GiB)": 26.31, + "step": 415, + "train_speed(iter/s)": 0.578126 + }, + { + "acc": 0.98750973, + "epoch": 0.44538706256627786, + "grad_norm": 0.8295337557792664, + "learning_rate": 9.810419566833978e-06, + "loss": 0.0346707, + "memory(GiB)": 26.31, + "step": 420, + "train_speed(iter/s)": 0.578189 + }, + { + "acc": 0.98876076, + "epoch": 0.45068928950159065, + "grad_norm": 0.7757723927497864, + "learning_rate": 9.829640775278427e-06, + "loss": 0.0301866, + "memory(GiB)": 26.31, + "step": 425, + "train_speed(iter/s)": 0.578228 + }, + { + "acc": 0.99074221, + "epoch": 0.4559915164369035, + "grad_norm": 0.9902310967445374, + "learning_rate": 9.848637169111462e-06, + "loss": 0.02895352, + "memory(GiB)": 26.31, + "step": 430, + "train_speed(iter/s)": 0.578321 + }, + { + "acc": 0.98980846, + "epoch": 0.46129374337221635, + "grad_norm": 0.916560709476471, + "learning_rate": 9.867413946534518e-06, + "loss": 0.030442, + "memory(GiB)": 26.31, + "step": 435, + "train_speed(iter/s)": 0.578384 + }, + { + "acc": 0.9902401, + "epoch": 0.46659597030752914, + "grad_norm": 0.7796283960342407, + "learning_rate": 9.885976127517072e-06, + "loss": 0.02879334, + "memory(GiB)": 26.31, + "step": 440, + "train_speed(iter/s)": 0.578533 + }, + { + "acc": 0.99136429, + "epoch": 0.471898197242842, + "grad_norm": 0.7088432908058167, + "learning_rate": 9.904328561852786e-06, + "loss": 0.0259961, + "memory(GiB)": 26.31, + "step": 445, + "train_speed(iter/s)": 0.578598 + }, + { + "acc": 0.99091301, + "epoch": 0.47720042417815484, + "grad_norm": 0.6826531887054443, + "learning_rate": 9.922475936765522e-06, + "loss": 0.02690938, + "memory(GiB)": 26.31, + "step": 450, + "train_speed(iter/s)": 0.578648 + }, + { + "acc": 0.99094028, + "epoch": 0.48250265111346763, + "grad_norm": 0.753305971622467, + "learning_rate": 9.94042278409512e-06, + "loss": 0.0271632, + "memory(GiB)": 26.31, + "step": 455, + "train_speed(iter/s)": 0.578709 + }, + { + "acc": 0.99208088, + "epoch": 0.4878048780487805, + "grad_norm": 0.7565575838088989, + "learning_rate": 9.958173487090496e-06, + "loss": 0.02474675, + "memory(GiB)": 26.31, + "step": 460, + "train_speed(iter/s)": 0.578762 + }, + { + "acc": 0.9922308, + "epoch": 0.4931071049840933, + "grad_norm": 0.5841274261474609, + "learning_rate": 9.97573228683551e-06, + "loss": 0.02355065, + "memory(GiB)": 26.31, + "step": 465, + "train_speed(iter/s)": 0.578809 + }, + { + "acc": 0.99055576, + "epoch": 0.4984093319194062, + "grad_norm": 0.6141323447227478, + "learning_rate": 9.99310328833119e-06, + "loss": 0.02669106, + "memory(GiB)": 26.31, + "step": 470, + "train_speed(iter/s)": 0.578891 + }, + { + "acc": 0.98941097, + "epoch": 0.503711558854719, + "grad_norm": 0.8238556385040283, + "learning_rate": 9.99999723295211e-06, + "loss": 0.02945265, + "memory(GiB)": 26.31, + "step": 475, + "train_speed(iter/s)": 0.578983 + }, + { + "acc": 0.99244938, + "epoch": 0.5090137857900318, + "grad_norm": 0.6722385287284851, + "learning_rate": 9.999980323226098e-06, + "loss": 0.02124254, + "memory(GiB)": 26.31, + "step": 480, + "train_speed(iter/s)": 0.579061 + }, + { + "acc": 0.99193668, + "epoch": 0.5143160127253447, + "grad_norm": 1.1237002611160278, + "learning_rate": 9.999948041074835e-06, + "loss": 0.02350252, + "memory(GiB)": 26.31, + "step": 485, + "train_speed(iter/s)": 0.579118 + }, + { + "acc": 0.99161243, + "epoch": 0.5196182396606575, + "grad_norm": 0.8993910551071167, + "learning_rate": 9.99990038659758e-06, + "loss": 0.02440767, + "memory(GiB)": 26.31, + "step": 490, + "train_speed(iter/s)": 0.579193 + }, + { + "acc": 0.99120369, + "epoch": 0.5249204665959704, + "grad_norm": 0.9947673678398132, + "learning_rate": 9.999837359940859e-06, + "loss": 0.02465511, + "memory(GiB)": 26.31, + "step": 495, + "train_speed(iter/s)": 0.579226 + }, + { + "acc": 0.99091749, + "epoch": 0.5302226935312832, + "grad_norm": 1.0027878284454346, + "learning_rate": 9.999758961298472e-06, + "loss": 0.02779818, + "memory(GiB)": 26.31, + "step": 500, + "train_speed(iter/s)": 0.579296 + }, + { + "acc": 0.99098568, + "epoch": 0.5355249204665959, + "grad_norm": 0.8108927607536316, + "learning_rate": 9.999665190911476e-06, + "loss": 0.02721919, + "memory(GiB)": 26.31, + "step": 505, + "train_speed(iter/s)": 0.579374 + }, + { + "acc": 0.99209585, + "epoch": 0.5408271474019088, + "grad_norm": 0.4550963044166565, + "learning_rate": 9.999556049068198e-06, + "loss": 0.02487307, + "memory(GiB)": 26.31, + "step": 510, + "train_speed(iter/s)": 0.579448 + }, + { + "acc": 0.99234419, + "epoch": 0.5461293743372216, + "grad_norm": 0.5903543829917908, + "learning_rate": 9.999431536104226e-06, + "loss": 0.02234256, + "memory(GiB)": 26.31, + "step": 515, + "train_speed(iter/s)": 0.579509 + }, + { + "acc": 0.99038715, + "epoch": 0.5514316012725344, + "grad_norm": 0.7893358469009399, + "learning_rate": 9.999291652402414e-06, + "loss": 0.02926616, + "memory(GiB)": 26.31, + "step": 520, + "train_speed(iter/s)": 0.579546 + }, + { + "acc": 0.99203892, + "epoch": 0.5567338282078473, + "grad_norm": 0.8482871055603027, + "learning_rate": 9.999136398392877e-06, + "loss": 0.02198004, + "memory(GiB)": 26.31, + "step": 525, + "train_speed(iter/s)": 0.579597 + }, + { + "acc": 0.99342957, + "epoch": 0.5620360551431601, + "grad_norm": 0.5417046546936035, + "learning_rate": 9.998965774552995e-06, + "loss": 0.02237912, + "memory(GiB)": 26.31, + "step": 530, + "train_speed(iter/s)": 0.579673 + }, + { + "acc": 0.99193487, + "epoch": 0.5673382820784729, + "grad_norm": 0.5985880494117737, + "learning_rate": 9.998779781407395e-06, + "loss": 0.02462097, + "memory(GiB)": 26.31, + "step": 535, + "train_speed(iter/s)": 0.579695 + }, + { + "acc": 0.99162283, + "epoch": 0.5726405090137858, + "grad_norm": 0.7096518278121948, + "learning_rate": 9.998578419527974e-06, + "loss": 0.02340071, + "memory(GiB)": 26.31, + "step": 540, + "train_speed(iter/s)": 0.579769 + }, + { + "acc": 0.98985367, + "epoch": 0.5779427359490986, + "grad_norm": 1.0494897365570068, + "learning_rate": 9.998361689533882e-06, + "loss": 0.02991374, + "memory(GiB)": 26.31, + "step": 545, + "train_speed(iter/s)": 0.57983 + }, + { + "acc": 0.9917799, + "epoch": 0.5832449628844114, + "grad_norm": 0.8652428388595581, + "learning_rate": 9.998129592091518e-06, + "loss": 0.02398509, + "memory(GiB)": 26.31, + "step": 550, + "train_speed(iter/s)": 0.579848 + }, + { + "acc": 0.99160995, + "epoch": 0.5885471898197243, + "grad_norm": 0.9238954186439514, + "learning_rate": 9.99788212791454e-06, + "loss": 0.026893, + "memory(GiB)": 26.31, + "step": 555, + "train_speed(iter/s)": 0.57992 + }, + { + "acc": 0.98957453, + "epoch": 0.5938494167550371, + "grad_norm": 1.0519745349884033, + "learning_rate": 9.997619297763849e-06, + "loss": 0.03056088, + "memory(GiB)": 26.31, + "step": 560, + "train_speed(iter/s)": 0.579957 + }, + { + "acc": 0.99082956, + "epoch": 0.5991516436903499, + "grad_norm": 0.6974701285362244, + "learning_rate": 9.997341102447595e-06, + "loss": 0.02576918, + "memory(GiB)": 26.31, + "step": 565, + "train_speed(iter/s)": 0.580013 + }, + { + "acc": 0.99181623, + "epoch": 0.6044538706256628, + "grad_norm": 0.857319176197052, + "learning_rate": 9.997047542821179e-06, + "loss": 0.02497994, + "memory(GiB)": 26.31, + "step": 570, + "train_speed(iter/s)": 0.58011 + }, + { + "acc": 0.99321194, + "epoch": 0.6097560975609756, + "grad_norm": 0.6084680557250977, + "learning_rate": 9.996738619787236e-06, + "loss": 0.02231183, + "memory(GiB)": 26.31, + "step": 575, + "train_speed(iter/s)": 0.580164 + }, + { + "acc": 0.99226446, + "epoch": 0.6150583244962884, + "grad_norm": 0.6827586889266968, + "learning_rate": 9.996414334295644e-06, + "loss": 0.02310744, + "memory(GiB)": 26.31, + "step": 580, + "train_speed(iter/s)": 0.58019 + }, + { + "acc": 0.9927886, + "epoch": 0.6203605514316013, + "grad_norm": 0.4883098006248474, + "learning_rate": 9.99607468734352e-06, + "loss": 0.02214252, + "memory(GiB)": 26.31, + "step": 585, + "train_speed(iter/s)": 0.580207 + }, + { + "acc": 0.99254446, + "epoch": 0.6256627783669141, + "grad_norm": 0.8108164668083191, + "learning_rate": 9.995719679975209e-06, + "loss": 0.0238646, + "memory(GiB)": 26.31, + "step": 590, + "train_speed(iter/s)": 0.580266 + }, + { + "acc": 0.99094467, + "epoch": 0.630965005302227, + "grad_norm": 0.665600061416626, + "learning_rate": 9.995349313282291e-06, + "loss": 0.02722621, + "memory(GiB)": 26.31, + "step": 595, + "train_speed(iter/s)": 0.5803 + }, + { + "acc": 0.99339981, + "epoch": 0.6362672322375398, + "grad_norm": 0.5218392014503479, + "learning_rate": 9.994963588403572e-06, + "loss": 0.02030847, + "memory(GiB)": 26.31, + "step": 600, + "train_speed(iter/s)": 0.580317 + }, + { + "acc": 0.99143753, + "epoch": 0.6415694591728526, + "grad_norm": 0.6035894155502319, + "learning_rate": 9.99456250652508e-06, + "loss": 0.02466365, + "memory(GiB)": 26.31, + "step": 605, + "train_speed(iter/s)": 0.580325 + }, + { + "acc": 0.99280758, + "epoch": 0.6468716861081655, + "grad_norm": 0.6592385768890381, + "learning_rate": 9.994146068880061e-06, + "loss": 0.02078036, + "memory(GiB)": 26.31, + "step": 610, + "train_speed(iter/s)": 0.580376 + }, + { + "acc": 0.99108791, + "epoch": 0.6521739130434783, + "grad_norm": 0.6236558556556702, + "learning_rate": 9.993714276748982e-06, + "loss": 0.02436172, + "memory(GiB)": 26.31, + "step": 615, + "train_speed(iter/s)": 0.580431 + }, + { + "acc": 0.9920083, + "epoch": 0.6574761399787911, + "grad_norm": 0.47285163402557373, + "learning_rate": 9.993267131459518e-06, + "loss": 0.02266829, + "memory(GiB)": 26.31, + "step": 620, + "train_speed(iter/s)": 0.580493 + }, + { + "acc": 0.99268894, + "epoch": 0.662778366914104, + "grad_norm": 0.7881173491477966, + "learning_rate": 9.992804634386555e-06, + "loss": 0.01983478, + "memory(GiB)": 26.31, + "step": 625, + "train_speed(iter/s)": 0.580548 + }, + { + "acc": 0.99176426, + "epoch": 0.6680805938494168, + "grad_norm": 0.8369566798210144, + "learning_rate": 9.992326786952182e-06, + "loss": 0.0240339, + "memory(GiB)": 26.31, + "step": 630, + "train_speed(iter/s)": 0.58057 + }, + { + "acc": 0.99282837, + "epoch": 0.6733828207847296, + "grad_norm": 0.9245002269744873, + "learning_rate": 9.991833590625683e-06, + "loss": 0.02064004, + "memory(GiB)": 26.31, + "step": 635, + "train_speed(iter/s)": 0.58059 + }, + { + "acc": 0.99282904, + "epoch": 0.6786850477200425, + "grad_norm": 0.6197252869606018, + "learning_rate": 9.991325046923544e-06, + "loss": 0.02151935, + "memory(GiB)": 26.31, + "step": 640, + "train_speed(iter/s)": 0.580615 + }, + { + "acc": 0.99258642, + "epoch": 0.6839872746553552, + "grad_norm": 0.6860659122467041, + "learning_rate": 9.990801157409434e-06, + "loss": 0.02141871, + "memory(GiB)": 26.31, + "step": 645, + "train_speed(iter/s)": 0.580643 + }, + { + "acc": 0.99289961, + "epoch": 0.689289501590668, + "grad_norm": 0.6437894105911255, + "learning_rate": 9.990261923694215e-06, + "loss": 0.02073102, + "memory(GiB)": 26.31, + "step": 650, + "train_speed(iter/s)": 0.58073 + }, + { + "acc": 0.99290047, + "epoch": 0.694591728525981, + "grad_norm": 0.5864006876945496, + "learning_rate": 9.989707347435921e-06, + "loss": 0.01984381, + "memory(GiB)": 26.31, + "step": 655, + "train_speed(iter/s)": 0.580734 + }, + { + "acc": 0.99156342, + "epoch": 0.6998939554612937, + "grad_norm": 0.7833142280578613, + "learning_rate": 9.98913743033977e-06, + "loss": 0.02350827, + "memory(GiB)": 26.31, + "step": 660, + "train_speed(iter/s)": 0.58077 + }, + { + "acc": 0.99425297, + "epoch": 0.7051961823966065, + "grad_norm": 0.7044891119003296, + "learning_rate": 9.988552174158141e-06, + "loss": 0.01863719, + "memory(GiB)": 26.31, + "step": 665, + "train_speed(iter/s)": 0.580791 + }, + { + "acc": 0.99157066, + "epoch": 0.7104984093319194, + "grad_norm": 0.7697265148162842, + "learning_rate": 9.987951580690585e-06, + "loss": 0.02315737, + "memory(GiB)": 26.31, + "step": 670, + "train_speed(iter/s)": 0.580838 + }, + { + "acc": 0.99336653, + "epoch": 0.7158006362672322, + "grad_norm": 0.626921534538269, + "learning_rate": 9.987335651783809e-06, + "loss": 0.01875867, + "memory(GiB)": 26.31, + "step": 675, + "train_speed(iter/s)": 0.580889 + }, + { + "acc": 0.99421749, + "epoch": 0.721102863202545, + "grad_norm": 0.6633844375610352, + "learning_rate": 9.986704389331675e-06, + "loss": 0.01869861, + "memory(GiB)": 26.31, + "step": 680, + "train_speed(iter/s)": 0.58092 + }, + { + "acc": 0.99101582, + "epoch": 0.7264050901378579, + "grad_norm": 0.7727330327033997, + "learning_rate": 9.986057795275192e-06, + "loss": 0.02501288, + "memory(GiB)": 26.31, + "step": 685, + "train_speed(iter/s)": 0.580941 + }, + { + "acc": 0.99241905, + "epoch": 0.7317073170731707, + "grad_norm": 0.5293135643005371, + "learning_rate": 9.98539587160251e-06, + "loss": 0.02406199, + "memory(GiB)": 26.31, + "step": 690, + "train_speed(iter/s)": 0.580949 + }, + { + "acc": 0.99096756, + "epoch": 0.7370095440084835, + "grad_norm": 0.7288177013397217, + "learning_rate": 9.984718620348913e-06, + "loss": 0.0244829, + "memory(GiB)": 26.31, + "step": 695, + "train_speed(iter/s)": 0.580994 + }, + { + "acc": 0.99141579, + "epoch": 0.7423117709437964, + "grad_norm": 0.7569153308868408, + "learning_rate": 9.984026043596819e-06, + "loss": 0.02278415, + "memory(GiB)": 26.31, + "step": 700, + "train_speed(iter/s)": 0.581045 + }, + { + "acc": 0.9932703, + "epoch": 0.7476139978791092, + "grad_norm": 0.6304537057876587, + "learning_rate": 9.983318143475762e-06, + "loss": 0.01978764, + "memory(GiB)": 26.31, + "step": 705, + "train_speed(iter/s)": 0.58107 + }, + { + "acc": 0.99194145, + "epoch": 0.7529162248144221, + "grad_norm": 0.5985568165779114, + "learning_rate": 9.982594922162403e-06, + "loss": 0.02452516, + "memory(GiB)": 26.31, + "step": 710, + "train_speed(iter/s)": 0.581118 + }, + { + "acc": 0.99371014, + "epoch": 0.7582184517497349, + "grad_norm": 0.6595653891563416, + "learning_rate": 9.981856381880504e-06, + "loss": 0.01932598, + "memory(GiB)": 26.31, + "step": 715, + "train_speed(iter/s)": 0.581125 + }, + { + "acc": 0.99341488, + "epoch": 0.7635206786850477, + "grad_norm": 0.7101230025291443, + "learning_rate": 9.981102524900929e-06, + "loss": 0.02069145, + "memory(GiB)": 26.31, + "step": 720, + "train_speed(iter/s)": 0.581142 + }, + { + "acc": 0.99183006, + "epoch": 0.7688229056203606, + "grad_norm": 0.6250572800636292, + "learning_rate": 9.98033335354164e-06, + "loss": 0.02185034, + "memory(GiB)": 26.31, + "step": 725, + "train_speed(iter/s)": 0.58116 + }, + { + "acc": 0.99280148, + "epoch": 0.7741251325556734, + "grad_norm": 0.6395146250724792, + "learning_rate": 9.979548870167695e-06, + "loss": 0.02104771, + "memory(GiB)": 26.31, + "step": 730, + "train_speed(iter/s)": 0.581173 + }, + { + "acc": 0.99531593, + "epoch": 0.7794273594909862, + "grad_norm": 0.5320297479629517, + "learning_rate": 9.978749077191223e-06, + "loss": 0.01694808, + "memory(GiB)": 26.31, + "step": 735, + "train_speed(iter/s)": 0.581213 + }, + { + "acc": 0.99354649, + "epoch": 0.7847295864262991, + "grad_norm": 0.6778517961502075, + "learning_rate": 9.977933977071433e-06, + "loss": 0.01976324, + "memory(GiB)": 26.31, + "step": 740, + "train_speed(iter/s)": 0.581244 + }, + { + "acc": 0.99129896, + "epoch": 0.7900318133616119, + "grad_norm": 0.8144289255142212, + "learning_rate": 9.977103572314595e-06, + "loss": 0.02646843, + "memory(GiB)": 26.31, + "step": 745, + "train_speed(iter/s)": 0.581258 + }, + { + "acc": 0.99141903, + "epoch": 0.7953340402969247, + "grad_norm": 0.6856284141540527, + "learning_rate": 9.976257865474044e-06, + "loss": 0.02530013, + "memory(GiB)": 26.31, + "step": 750, + "train_speed(iter/s)": 0.581283 + }, + { + "acc": 0.99295807, + "epoch": 0.8006362672322376, + "grad_norm": 0.5783557295799255, + "learning_rate": 9.975396859150165e-06, + "loss": 0.02089001, + "memory(GiB)": 26.31, + "step": 755, + "train_speed(iter/s)": 0.581298 + }, + { + "acc": 0.99298162, + "epoch": 0.8059384941675504, + "grad_norm": 0.681499719619751, + "learning_rate": 9.97452055599038e-06, + "loss": 0.02061177, + "memory(GiB)": 26.31, + "step": 760, + "train_speed(iter/s)": 0.581304 + }, + { + "acc": 0.99296398, + "epoch": 0.8112407211028632, + "grad_norm": 0.7560820579528809, + "learning_rate": 9.973628958689153e-06, + "loss": 0.0210947, + "memory(GiB)": 26.31, + "step": 765, + "train_speed(iter/s)": 0.581369 + }, + { + "acc": 0.99206982, + "epoch": 0.8165429480381761, + "grad_norm": 1.0038448572158813, + "learning_rate": 9.972722069987973e-06, + "loss": 0.02418317, + "memory(GiB)": 26.31, + "step": 770, + "train_speed(iter/s)": 0.581388 + }, + { + "acc": 0.99350224, + "epoch": 0.8218451749734889, + "grad_norm": 0.678901195526123, + "learning_rate": 9.971799892675342e-06, + "loss": 0.0195865, + "memory(GiB)": 26.31, + "step": 775, + "train_speed(iter/s)": 0.581427 + }, + { + "acc": 0.99451637, + "epoch": 0.8271474019088016, + "grad_norm": 0.688715934753418, + "learning_rate": 9.970862429586775e-06, + "loss": 0.01755214, + "memory(GiB)": 26.31, + "step": 780, + "train_speed(iter/s)": 0.581463 + }, + { + "acc": 0.99366951, + "epoch": 0.8324496288441146, + "grad_norm": 0.7547446489334106, + "learning_rate": 9.969909683604791e-06, + "loss": 0.01924557, + "memory(GiB)": 26.31, + "step": 785, + "train_speed(iter/s)": 0.581476 + }, + { + "acc": 0.99350204, + "epoch": 0.8377518557794273, + "grad_norm": 0.5072101950645447, + "learning_rate": 9.968941657658897e-06, + "loss": 0.01881273, + "memory(GiB)": 26.31, + "step": 790, + "train_speed(iter/s)": 0.581485 + }, + { + "acc": 0.99326372, + "epoch": 0.8430540827147401, + "grad_norm": 0.5862351059913635, + "learning_rate": 9.96795835472558e-06, + "loss": 0.01942059, + "memory(GiB)": 26.31, + "step": 795, + "train_speed(iter/s)": 0.581506 + }, + { + "acc": 0.99427719, + "epoch": 0.848356309650053, + "grad_norm": 0.6744386553764343, + "learning_rate": 9.96695977782831e-06, + "loss": 0.01748537, + "memory(GiB)": 26.31, + "step": 800, + "train_speed(iter/s)": 0.581514 + }, + { + "acc": 0.99367943, + "epoch": 0.8536585365853658, + "grad_norm": 0.45461753010749817, + "learning_rate": 9.965945930037511e-06, + "loss": 0.01755228, + "memory(GiB)": 26.31, + "step": 805, + "train_speed(iter/s)": 0.581548 + }, + { + "acc": 0.9931777, + "epoch": 0.8589607635206787, + "grad_norm": 0.6794818043708801, + "learning_rate": 9.96491681447057e-06, + "loss": 0.02256282, + "memory(GiB)": 26.31, + "step": 810, + "train_speed(iter/s)": 0.581579 + }, + { + "acc": 0.99288502, + "epoch": 0.8642629904559915, + "grad_norm": 0.6164844036102295, + "learning_rate": 9.963872434291817e-06, + "loss": 0.02206081, + "memory(GiB)": 26.31, + "step": 815, + "train_speed(iter/s)": 0.581636 + }, + { + "acc": 0.99430141, + "epoch": 0.8695652173913043, + "grad_norm": 0.5261210799217224, + "learning_rate": 9.962812792712513e-06, + "loss": 0.01857843, + "memory(GiB)": 26.31, + "step": 820, + "train_speed(iter/s)": 0.581676 + }, + { + "acc": 0.99293432, + "epoch": 0.8748674443266172, + "grad_norm": 0.610313892364502, + "learning_rate": 9.96173789299085e-06, + "loss": 0.01938998, + "memory(GiB)": 26.31, + "step": 825, + "train_speed(iter/s)": 0.581676 + }, + { + "acc": 0.9944231, + "epoch": 0.88016967126193, + "grad_norm": 0.6256194710731506, + "learning_rate": 9.960647738431939e-06, + "loss": 0.01734181, + "memory(GiB)": 26.31, + "step": 830, + "train_speed(iter/s)": 0.581682 + }, + { + "acc": 0.99155655, + "epoch": 0.8854718981972428, + "grad_norm": 1.561480164527893, + "learning_rate": 9.959542332387785e-06, + "loss": 0.02328325, + "memory(GiB)": 26.31, + "step": 835, + "train_speed(iter/s)": 0.581692 + }, + { + "acc": 0.99236937, + "epoch": 0.8907741251325557, + "grad_norm": 0.8149193525314331, + "learning_rate": 9.958421678257304e-06, + "loss": 0.02260731, + "memory(GiB)": 26.31, + "step": 840, + "train_speed(iter/s)": 0.581707 + }, + { + "acc": 0.99417439, + "epoch": 0.8960763520678685, + "grad_norm": 0.5183711647987366, + "learning_rate": 9.95728577948628e-06, + "loss": 0.01935203, + "memory(GiB)": 26.31, + "step": 845, + "train_speed(iter/s)": 0.581739 + }, + { + "acc": 0.99458427, + "epoch": 0.9013785790031813, + "grad_norm": 0.5166410207748413, + "learning_rate": 9.956134639567388e-06, + "loss": 0.01900076, + "memory(GiB)": 26.31, + "step": 850, + "train_speed(iter/s)": 0.581748 + }, + { + "acc": 0.99384928, + "epoch": 0.9066808059384942, + "grad_norm": 0.7617852687835693, + "learning_rate": 9.954968262040152e-06, + "loss": 0.02062955, + "memory(GiB)": 26.31, + "step": 855, + "train_speed(iter/s)": 0.581761 + }, + { + "acc": 0.99329472, + "epoch": 0.911983032873807, + "grad_norm": 0.5276218056678772, + "learning_rate": 9.953786650490957e-06, + "loss": 0.01935146, + "memory(GiB)": 26.31, + "step": 860, + "train_speed(iter/s)": 0.581771 + }, + { + "acc": 0.99199495, + "epoch": 0.9172852598091198, + "grad_norm": 1.0328503847122192, + "learning_rate": 9.952589808553028e-06, + "loss": 0.02057788, + "memory(GiB)": 26.31, + "step": 865, + "train_speed(iter/s)": 0.5818 + }, + { + "acc": 0.99147606, + "epoch": 0.9225874867444327, + "grad_norm": 0.6767083406448364, + "learning_rate": 9.951377739906422e-06, + "loss": 0.02531357, + "memory(GiB)": 26.31, + "step": 870, + "train_speed(iter/s)": 0.581807 + }, + { + "acc": 0.99108963, + "epoch": 0.9278897136797455, + "grad_norm": 0.8326674103736877, + "learning_rate": 9.95015044827801e-06, + "loss": 0.02297047, + "memory(GiB)": 26.31, + "step": 875, + "train_speed(iter/s)": 0.581807 + }, + { + "acc": 0.99454174, + "epoch": 0.9331919406150583, + "grad_norm": 0.5634021162986755, + "learning_rate": 9.948907937441476e-06, + "loss": 0.01865164, + "memory(GiB)": 26.31, + "step": 880, + "train_speed(iter/s)": 0.581816 + }, + { + "acc": 0.99318018, + "epoch": 0.9384941675503712, + "grad_norm": 0.5036848783493042, + "learning_rate": 9.947650211217297e-06, + "loss": 0.02226395, + "memory(GiB)": 26.31, + "step": 885, + "train_speed(iter/s)": 0.581851 + }, + { + "acc": 0.99398918, + "epoch": 0.943796394485684, + "grad_norm": 0.5968469977378845, + "learning_rate": 9.946377273472736e-06, + "loss": 0.01695271, + "memory(GiB)": 26.31, + "step": 890, + "train_speed(iter/s)": 0.581867 + }, + { + "acc": 0.99425325, + "epoch": 0.9490986214209968, + "grad_norm": 0.7831963300704956, + "learning_rate": 9.945089128121828e-06, + "loss": 0.01738757, + "memory(GiB)": 26.31, + "step": 895, + "train_speed(iter/s)": 0.581866 + }, + { + "acc": 0.99221354, + "epoch": 0.9544008483563097, + "grad_norm": 0.950468122959137, + "learning_rate": 9.943785779125367e-06, + "loss": 0.02397622, + "memory(GiB)": 26.31, + "step": 900, + "train_speed(iter/s)": 0.581868 + }, + { + "acc": 0.99435406, + "epoch": 0.9597030752916225, + "grad_norm": 0.6189500093460083, + "learning_rate": 9.942467230490899e-06, + "loss": 0.01756911, + "memory(GiB)": 26.31, + "step": 905, + "train_speed(iter/s)": 0.581878 + }, + { + "acc": 0.99479847, + "epoch": 0.9650053022269353, + "grad_norm": 0.6108078956604004, + "learning_rate": 9.941133486272702e-06, + "loss": 0.01728452, + "memory(GiB)": 26.31, + "step": 910, + "train_speed(iter/s)": 0.581885 + }, + { + "acc": 0.99450102, + "epoch": 0.9703075291622482, + "grad_norm": 0.6411594748497009, + "learning_rate": 9.939784550571779e-06, + "loss": 0.017101, + "memory(GiB)": 26.31, + "step": 915, + "train_speed(iter/s)": 0.581895 + }, + { + "acc": 0.99435349, + "epoch": 0.975609756097561, + "grad_norm": 0.9291826486587524, + "learning_rate": 9.938420427535842e-06, + "loss": 0.01866589, + "memory(GiB)": 26.31, + "step": 920, + "train_speed(iter/s)": 0.581901 + }, + { + "acc": 0.99345455, + "epoch": 0.9809119830328739, + "grad_norm": 0.9071804881095886, + "learning_rate": 9.937041121359307e-06, + "loss": 0.01875674, + "memory(GiB)": 26.31, + "step": 925, + "train_speed(iter/s)": 0.581908 + }, + { + "acc": 0.99373541, + "epoch": 0.9862142099681867, + "grad_norm": 0.8860246539115906, + "learning_rate": 9.935646636283267e-06, + "loss": 0.01871233, + "memory(GiB)": 26.31, + "step": 930, + "train_speed(iter/s)": 0.581913 + }, + { + "acc": 0.99323978, + "epoch": 0.9915164369034994, + "grad_norm": 0.9965890645980835, + "learning_rate": 9.934236976595492e-06, + "loss": 0.01928719, + "memory(GiB)": 26.31, + "step": 935, + "train_speed(iter/s)": 0.581928 + }, + { + "acc": 0.99340267, + "epoch": 0.9968186638388123, + "grad_norm": 0.6819751858711243, + "learning_rate": 9.932812146630413e-06, + "loss": 0.02095537, + "memory(GiB)": 26.31, + "step": 940, + "train_speed(iter/s)": 0.581931 + }, + { + "acc": 0.99613075, + "epoch": 1.002120890774125, + "grad_norm": 0.4798663854598999, + "learning_rate": 9.9313721507691e-06, + "loss": 0.0142905, + "memory(GiB)": 26.31, + "step": 945, + "train_speed(iter/s)": 0.581588 + }, + { + "acc": 0.99497509, + "epoch": 1.007423117709438, + "grad_norm": 0.6852810382843018, + "learning_rate": 9.92991699343926e-06, + "loss": 0.01566033, + "memory(GiB)": 26.31, + "step": 950, + "train_speed(iter/s)": 0.581615 + }, + { + "acc": 0.99398909, + "epoch": 1.0127253446447508, + "grad_norm": 0.5741665959358215, + "learning_rate": 9.92844667911522e-06, + "loss": 0.02031869, + "memory(GiB)": 26.31, + "step": 955, + "train_speed(iter/s)": 0.58163 + }, + { + "acc": 0.99311733, + "epoch": 1.0180275715800637, + "grad_norm": 0.5430195331573486, + "learning_rate": 9.926961212317905e-06, + "loss": 0.01968945, + "memory(GiB)": 26.31, + "step": 960, + "train_speed(iter/s)": 0.581642 + }, + { + "acc": 0.99392338, + "epoch": 1.0233297985153764, + "grad_norm": 0.8401893973350525, + "learning_rate": 9.92546059761484e-06, + "loss": 0.01952956, + "memory(GiB)": 26.31, + "step": 965, + "train_speed(iter/s)": 0.581654 + }, + { + "acc": 0.99450817, + "epoch": 1.0286320254506893, + "grad_norm": 0.4842678904533386, + "learning_rate": 9.923944839620118e-06, + "loss": 0.01760198, + "memory(GiB)": 26.31, + "step": 970, + "train_speed(iter/s)": 0.581667 + }, + { + "acc": 0.99244471, + "epoch": 1.0339342523860022, + "grad_norm": 0.6450461745262146, + "learning_rate": 9.922413942994401e-06, + "loss": 0.02271796, + "memory(GiB)": 26.31, + "step": 975, + "train_speed(iter/s)": 0.581675 + }, + { + "acc": 0.99532566, + "epoch": 1.039236479321315, + "grad_norm": 0.4986313581466675, + "learning_rate": 9.920867912444895e-06, + "loss": 0.01437508, + "memory(GiB)": 26.31, + "step": 980, + "train_speed(iter/s)": 0.581702 + }, + { + "acc": 0.99303761, + "epoch": 1.0445387062566278, + "grad_norm": 0.6543801426887512, + "learning_rate": 9.919306752725346e-06, + "loss": 0.02089486, + "memory(GiB)": 26.31, + "step": 985, + "train_speed(iter/s)": 0.581733 + }, + { + "acc": 0.9940731, + "epoch": 1.0498409331919407, + "grad_norm": 0.6764442920684814, + "learning_rate": 9.917730468636012e-06, + "loss": 0.02023424, + "memory(GiB)": 26.31, + "step": 990, + "train_speed(iter/s)": 0.58174 + }, + { + "acc": 0.99587164, + "epoch": 1.0551431601272534, + "grad_norm": 0.44897225499153137, + "learning_rate": 9.916139065023656e-06, + "loss": 0.0132737, + "memory(GiB)": 26.31, + "step": 995, + "train_speed(iter/s)": 0.581747 + }, + { + "acc": 0.99454727, + "epoch": 1.0604453870625663, + "grad_norm": 0.6791054010391235, + "learning_rate": 9.914532546781538e-06, + "loss": 0.01728434, + "memory(GiB)": 26.31, + "step": 1000, + "train_speed(iter/s)": 0.581774 + }, + { + "acc": 0.99382992, + "epoch": 1.0657476139978792, + "grad_norm": 0.4228080213069916, + "learning_rate": 9.912910918849386e-06, + "loss": 0.01803771, + "memory(GiB)": 26.31, + "step": 1005, + "train_speed(iter/s)": 0.58178 + }, + { + "acc": 0.99441757, + "epoch": 1.0710498409331919, + "grad_norm": 0.5636529326438904, + "learning_rate": 9.911274186213388e-06, + "loss": 0.0167443, + "memory(GiB)": 26.31, + "step": 1010, + "train_speed(iter/s)": 0.581806 + }, + { + "acc": 0.99366875, + "epoch": 1.0763520678685048, + "grad_norm": 0.6800065636634827, + "learning_rate": 9.909622353906179e-06, + "loss": 0.01885929, + "memory(GiB)": 26.31, + "step": 1015, + "train_speed(iter/s)": 0.581833 + }, + { + "acc": 0.99449482, + "epoch": 1.0816542948038177, + "grad_norm": 0.5581928491592407, + "learning_rate": 9.90795542700682e-06, + "loss": 0.01485443, + "memory(GiB)": 26.31, + "step": 1020, + "train_speed(iter/s)": 0.581848 + }, + { + "acc": 0.99461403, + "epoch": 1.0869565217391304, + "grad_norm": 0.7231691479682922, + "learning_rate": 9.906273410640785e-06, + "loss": 0.01806219, + "memory(GiB)": 26.31, + "step": 1025, + "train_speed(iter/s)": 0.581856 + }, + { + "acc": 0.99244728, + "epoch": 1.0922587486744433, + "grad_norm": 0.9682743549346924, + "learning_rate": 9.904576309979945e-06, + "loss": 0.02174127, + "memory(GiB)": 26.31, + "step": 1030, + "train_speed(iter/s)": 0.581873 + }, + { + "acc": 0.9933219, + "epoch": 1.0975609756097562, + "grad_norm": 0.752882719039917, + "learning_rate": 9.902864130242557e-06, + "loss": 0.02086806, + "memory(GiB)": 26.31, + "step": 1035, + "train_speed(iter/s)": 0.581897 + }, + { + "acc": 0.99312601, + "epoch": 1.1028632025450689, + "grad_norm": 0.7803872227668762, + "learning_rate": 9.901136876693233e-06, + "loss": 0.02002691, + "memory(GiB)": 26.31, + "step": 1040, + "train_speed(iter/s)": 0.581919 + }, + { + "acc": 0.99472342, + "epoch": 1.1081654294803818, + "grad_norm": 0.6384313106536865, + "learning_rate": 9.899394554642943e-06, + "loss": 0.01663477, + "memory(GiB)": 26.31, + "step": 1045, + "train_speed(iter/s)": 0.581947 + }, + { + "acc": 0.99375353, + "epoch": 1.1134676564156947, + "grad_norm": 0.68864506483078, + "learning_rate": 9.897637169448988e-06, + "loss": 0.01638348, + "memory(GiB)": 26.31, + "step": 1050, + "train_speed(iter/s)": 0.581979 + }, + { + "acc": 0.99457264, + "epoch": 1.1187698833510074, + "grad_norm": 0.4632731080055237, + "learning_rate": 9.895864726514983e-06, + "loss": 0.0175137, + "memory(GiB)": 26.31, + "step": 1055, + "train_speed(iter/s)": 0.58199 + }, + { + "acc": 0.99362888, + "epoch": 1.1240721102863203, + "grad_norm": 0.6945028305053711, + "learning_rate": 9.894077231290846e-06, + "loss": 0.01872665, + "memory(GiB)": 26.31, + "step": 1060, + "train_speed(iter/s)": 0.582022 + }, + { + "acc": 0.9930809, + "epoch": 1.1293743372216332, + "grad_norm": 0.6852729320526123, + "learning_rate": 9.892274689272772e-06, + "loss": 0.01868084, + "memory(GiB)": 26.31, + "step": 1065, + "train_speed(iter/s)": 0.582048 + }, + { + "acc": 0.99468307, + "epoch": 1.1346765641569458, + "grad_norm": 0.6329808831214905, + "learning_rate": 9.890457106003228e-06, + "loss": 0.01552589, + "memory(GiB)": 26.31, + "step": 1070, + "train_speed(iter/s)": 0.582056 + }, + { + "acc": 0.9942112, + "epoch": 1.1399787910922587, + "grad_norm": 0.587793231010437, + "learning_rate": 9.888624487070926e-06, + "loss": 0.0155957, + "memory(GiB)": 26.31, + "step": 1075, + "train_speed(iter/s)": 0.58206 + }, + { + "acc": 0.9943779, + "epoch": 1.1452810180275717, + "grad_norm": 0.8524949550628662, + "learning_rate": 9.886776838110811e-06, + "loss": 0.01897177, + "memory(GiB)": 26.31, + "step": 1080, + "train_speed(iter/s)": 0.582084 + }, + { + "acc": 0.99492826, + "epoch": 1.1505832449628843, + "grad_norm": 0.48728036880493164, + "learning_rate": 9.884914164804047e-06, + "loss": 0.01589368, + "memory(GiB)": 26.31, + "step": 1085, + "train_speed(iter/s)": 0.582097 + }, + { + "acc": 0.99354877, + "epoch": 1.1558854718981972, + "grad_norm": 0.4839894771575928, + "learning_rate": 9.883036472877983e-06, + "loss": 0.01641577, + "memory(GiB)": 26.31, + "step": 1090, + "train_speed(iter/s)": 0.5821 + }, + { + "acc": 0.9963026, + "epoch": 1.1611876988335101, + "grad_norm": 0.568143367767334, + "learning_rate": 9.881143768106162e-06, + "loss": 0.01254884, + "memory(GiB)": 26.31, + "step": 1095, + "train_speed(iter/s)": 0.582105 + }, + { + "acc": 0.99448452, + "epoch": 1.1664899257688228, + "grad_norm": 0.46727454662323, + "learning_rate": 9.879236056308277e-06, + "loss": 0.01576549, + "memory(GiB)": 26.31, + "step": 1100, + "train_speed(iter/s)": 0.582132 + }, + { + "acc": 0.99468699, + "epoch": 1.1717921527041357, + "grad_norm": 0.6475072503089905, + "learning_rate": 9.877313343350169e-06, + "loss": 0.01640804, + "memory(GiB)": 26.31, + "step": 1105, + "train_speed(iter/s)": 0.582153 + }, + { + "acc": 0.99498844, + "epoch": 1.1770943796394486, + "grad_norm": 0.7032859921455383, + "learning_rate": 9.875375635143809e-06, + "loss": 0.01677711, + "memory(GiB)": 26.31, + "step": 1110, + "train_speed(iter/s)": 0.582153 + }, + { + "acc": 0.99531984, + "epoch": 1.1823966065747613, + "grad_norm": 0.5358213782310486, + "learning_rate": 9.873422937647266e-06, + "loss": 0.0163477, + "memory(GiB)": 26.31, + "step": 1115, + "train_speed(iter/s)": 0.582182 + }, + { + "acc": 0.99534531, + "epoch": 1.1876988335100742, + "grad_norm": 0.7569906711578369, + "learning_rate": 9.871455256864705e-06, + "loss": 0.01311936, + "memory(GiB)": 26.31, + "step": 1120, + "train_speed(iter/s)": 0.58219 + }, + { + "acc": 0.99419842, + "epoch": 1.1930010604453871, + "grad_norm": 0.8755477666854858, + "learning_rate": 9.869472598846362e-06, + "loss": 0.01503182, + "memory(GiB)": 26.31, + "step": 1125, + "train_speed(iter/s)": 0.582198 + }, + { + "acc": 0.99561043, + "epoch": 1.1983032873806998, + "grad_norm": 0.6605581641197205, + "learning_rate": 9.86747496968852e-06, + "loss": 0.0120577, + "memory(GiB)": 26.31, + "step": 1130, + "train_speed(iter/s)": 0.58222 + }, + { + "acc": 0.99585924, + "epoch": 1.2036055143160127, + "grad_norm": 0.3325146436691284, + "learning_rate": 9.8654623755335e-06, + "loss": 0.01220306, + "memory(GiB)": 26.31, + "step": 1135, + "train_speed(iter/s)": 0.582233 + }, + { + "acc": 0.99469805, + "epoch": 1.2089077412513256, + "grad_norm": 0.49594518542289734, + "learning_rate": 9.863434822569637e-06, + "loss": 0.0143767, + "memory(GiB)": 26.31, + "step": 1140, + "train_speed(iter/s)": 0.582272 + }, + { + "acc": 0.99370193, + "epoch": 1.2142099681866383, + "grad_norm": 1.0842325687408447, + "learning_rate": 9.861392317031256e-06, + "loss": 0.01859829, + "memory(GiB)": 26.31, + "step": 1145, + "train_speed(iter/s)": 0.582293 + }, + { + "acc": 0.99477673, + "epoch": 1.2195121951219512, + "grad_norm": 0.6422792673110962, + "learning_rate": 9.85933486519867e-06, + "loss": 0.01558067, + "memory(GiB)": 26.31, + "step": 1150, + "train_speed(iter/s)": 0.582306 + }, + { + "acc": 0.9952467, + "epoch": 1.224814422057264, + "grad_norm": 0.3615686297416687, + "learning_rate": 9.857262473398134e-06, + "loss": 0.01394599, + "memory(GiB)": 26.31, + "step": 1155, + "train_speed(iter/s)": 0.582312 + }, + { + "acc": 0.99529438, + "epoch": 1.2301166489925768, + "grad_norm": 0.6838765740394592, + "learning_rate": 9.855175148001852e-06, + "loss": 0.01169058, + "memory(GiB)": 26.31, + "step": 1160, + "train_speed(iter/s)": 0.582335 + }, + { + "acc": 0.99526882, + "epoch": 1.2354188759278897, + "grad_norm": 0.6930041313171387, + "learning_rate": 9.853072895427938e-06, + "loss": 0.01289481, + "memory(GiB)": 26.31, + "step": 1165, + "train_speed(iter/s)": 0.582338 + }, + { + "acc": 0.99560337, + "epoch": 1.2407211028632026, + "grad_norm": 0.8965883255004883, + "learning_rate": 9.850955722140412e-06, + "loss": 0.01668053, + "memory(GiB)": 26.31, + "step": 1170, + "train_speed(iter/s)": 0.582371 + }, + { + "acc": 0.99516726, + "epoch": 1.2460233297985153, + "grad_norm": 0.8131476640701294, + "learning_rate": 9.848823634649169e-06, + "loss": 0.01300157, + "memory(GiB)": 26.31, + "step": 1175, + "train_speed(iter/s)": 0.582381 + }, + { + "acc": 0.99540739, + "epoch": 1.2513255567338282, + "grad_norm": 0.5490795373916626, + "learning_rate": 9.846676639509958e-06, + "loss": 0.01537903, + "memory(GiB)": 26.31, + "step": 1180, + "train_speed(iter/s)": 0.582374 + }, + { + "acc": 0.99454231, + "epoch": 1.256627783669141, + "grad_norm": 0.6122965812683105, + "learning_rate": 9.84451474332437e-06, + "loss": 0.01733764, + "memory(GiB)": 26.31, + "step": 1185, + "train_speed(iter/s)": 0.582379 + }, + { + "acc": 0.99378347, + "epoch": 1.2619300106044538, + "grad_norm": 0.6917413473129272, + "learning_rate": 9.842337952739813e-06, + "loss": 0.01898844, + "memory(GiB)": 26.31, + "step": 1190, + "train_speed(iter/s)": 0.58238 + }, + { + "acc": 0.99473305, + "epoch": 1.2672322375397667, + "grad_norm": 0.9137494564056396, + "learning_rate": 9.840146274449497e-06, + "loss": 0.01613488, + "memory(GiB)": 26.31, + "step": 1195, + "train_speed(iter/s)": 0.582386 + }, + { + "acc": 0.99495955, + "epoch": 1.2725344644750796, + "grad_norm": 0.492419570684433, + "learning_rate": 9.8379397151924e-06, + "loss": 0.01467134, + "memory(GiB)": 26.31, + "step": 1200, + "train_speed(iter/s)": 0.58241 + }, + { + "acc": 0.99571934, + "epoch": 1.2778366914103922, + "grad_norm": 0.6594977378845215, + "learning_rate": 9.835718281753262e-06, + "loss": 0.01280947, + "memory(GiB)": 26.31, + "step": 1205, + "train_speed(iter/s)": 0.58241 + }, + { + "acc": 0.99385624, + "epoch": 1.2831389183457051, + "grad_norm": 0.6085644364356995, + "learning_rate": 9.833481980962557e-06, + "loss": 0.01779164, + "memory(GiB)": 26.31, + "step": 1210, + "train_speed(iter/s)": 0.582412 + }, + { + "acc": 0.99450674, + "epoch": 1.288441145281018, + "grad_norm": 0.9028270840644836, + "learning_rate": 9.831230819696474e-06, + "loss": 0.01454319, + "memory(GiB)": 26.31, + "step": 1215, + "train_speed(iter/s)": 0.582434 + }, + { + "acc": 0.99494953, + "epoch": 1.2937433722163307, + "grad_norm": 0.6843631863594055, + "learning_rate": 9.828964804876893e-06, + "loss": 0.01487939, + "memory(GiB)": 26.31, + "step": 1220, + "train_speed(iter/s)": 0.582442 + }, + { + "acc": 0.99503832, + "epoch": 1.2990455991516436, + "grad_norm": 0.6850240230560303, + "learning_rate": 9.826683943471366e-06, + "loss": 0.01760384, + "memory(GiB)": 26.31, + "step": 1225, + "train_speed(iter/s)": 0.582446 + }, + { + "acc": 0.99546528, + "epoch": 1.3043478260869565, + "grad_norm": 0.7692059874534607, + "learning_rate": 9.824388242493098e-06, + "loss": 0.01505646, + "memory(GiB)": 26.31, + "step": 1230, + "train_speed(iter/s)": 0.582466 + }, + { + "acc": 0.99555531, + "epoch": 1.3096500530222692, + "grad_norm": 0.767185389995575, + "learning_rate": 9.82207770900092e-06, + "loss": 0.01339123, + "memory(GiB)": 26.31, + "step": 1235, + "train_speed(iter/s)": 0.582466 + }, + { + "acc": 0.99524298, + "epoch": 1.3149522799575821, + "grad_norm": 0.6795545220375061, + "learning_rate": 9.81975235009927e-06, + "loss": 0.01468817, + "memory(GiB)": 26.31, + "step": 1240, + "train_speed(iter/s)": 0.582484 + }, + { + "acc": 0.99549789, + "epoch": 1.320254506892895, + "grad_norm": 0.7110230922698975, + "learning_rate": 9.817412172938176e-06, + "loss": 0.01409793, + "memory(GiB)": 26.31, + "step": 1245, + "train_speed(iter/s)": 0.582489 + }, + { + "acc": 0.99565172, + "epoch": 1.3255567338282077, + "grad_norm": 0.5482975840568542, + "learning_rate": 9.815057184713223e-06, + "loss": 0.01393813, + "memory(GiB)": 26.31, + "step": 1250, + "train_speed(iter/s)": 0.582524 + }, + { + "acc": 0.99554529, + "epoch": 1.3308589607635206, + "grad_norm": 0.7029886245727539, + "learning_rate": 9.81268739266554e-06, + "loss": 0.01505921, + "memory(GiB)": 26.31, + "step": 1255, + "train_speed(iter/s)": 0.582526 + }, + { + "acc": 0.99403896, + "epoch": 1.3361611876988335, + "grad_norm": 0.9024369716644287, + "learning_rate": 9.810302804081772e-06, + "loss": 0.01528788, + "memory(GiB)": 26.31, + "step": 1260, + "train_speed(iter/s)": 0.582528 + }, + { + "acc": 0.99561853, + "epoch": 1.3414634146341464, + "grad_norm": 0.6582544445991516, + "learning_rate": 9.807903426294067e-06, + "loss": 0.01325701, + "memory(GiB)": 26.31, + "step": 1265, + "train_speed(iter/s)": 0.58253 + }, + { + "acc": 0.99610195, + "epoch": 1.346765641569459, + "grad_norm": 0.7300844192504883, + "learning_rate": 9.805489266680042e-06, + "loss": 0.01429899, + "memory(GiB)": 26.31, + "step": 1270, + "train_speed(iter/s)": 0.582526 + }, + { + "acc": 0.9962471, + "epoch": 1.352067868504772, + "grad_norm": 0.7025005221366882, + "learning_rate": 9.803060332662764e-06, + "loss": 0.01404113, + "memory(GiB)": 26.31, + "step": 1275, + "train_speed(iter/s)": 0.582544 + }, + { + "acc": 0.9944252, + "epoch": 1.357370095440085, + "grad_norm": 0.6766318678855896, + "learning_rate": 9.800616631710734e-06, + "loss": 0.01653153, + "memory(GiB)": 26.31, + "step": 1280, + "train_speed(iter/s)": 0.582545 + }, + { + "acc": 0.99595785, + "epoch": 1.3626723223753976, + "grad_norm": 0.738859236240387, + "learning_rate": 9.798158171337852e-06, + "loss": 0.01370402, + "memory(GiB)": 26.31, + "step": 1285, + "train_speed(iter/s)": 0.58255 + }, + { + "acc": 0.99550438, + "epoch": 1.3679745493107105, + "grad_norm": 0.5170459747314453, + "learning_rate": 9.795684959103405e-06, + "loss": 0.01245071, + "memory(GiB)": 26.31, + "step": 1290, + "train_speed(iter/s)": 0.582555 + }, + { + "acc": 0.99478798, + "epoch": 1.3732767762460234, + "grad_norm": 0.719942569732666, + "learning_rate": 9.793197002612038e-06, + "loss": 0.01572569, + "memory(GiB)": 26.31, + "step": 1295, + "train_speed(iter/s)": 0.582558 + }, + { + "acc": 0.9956728, + "epoch": 1.378579003181336, + "grad_norm": 0.553420901298523, + "learning_rate": 9.790694309513728e-06, + "loss": 0.0132667, + "memory(GiB)": 26.31, + "step": 1300, + "train_speed(iter/s)": 0.582572 + }, + { + "acc": 0.99500141, + "epoch": 1.383881230116649, + "grad_norm": 0.6300088763237, + "learning_rate": 9.788176887503771e-06, + "loss": 0.0131365, + "memory(GiB)": 26.31, + "step": 1305, + "train_speed(iter/s)": 0.582576 + }, + { + "acc": 0.99426498, + "epoch": 1.389183457051962, + "grad_norm": 0.5392923951148987, + "learning_rate": 9.785644744322745e-06, + "loss": 0.0165678, + "memory(GiB)": 26.31, + "step": 1310, + "train_speed(iter/s)": 0.582594 + }, + { + "acc": 0.99493856, + "epoch": 1.3944856839872748, + "grad_norm": 0.5953570604324341, + "learning_rate": 9.783097887756497e-06, + "loss": 0.01508854, + "memory(GiB)": 26.31, + "step": 1315, + "train_speed(iter/s)": 0.582621 + }, + { + "acc": 0.99446144, + "epoch": 1.3997879109225875, + "grad_norm": 0.6170870065689087, + "learning_rate": 9.780536325636113e-06, + "loss": 0.01575189, + "memory(GiB)": 26.31, + "step": 1320, + "train_speed(iter/s)": 0.582638 + }, + { + "acc": 0.99507885, + "epoch": 1.4050901378579004, + "grad_norm": 0.5795525908470154, + "learning_rate": 9.777960065837898e-06, + "loss": 0.01650278, + "memory(GiB)": 26.31, + "step": 1325, + "train_speed(iter/s)": 0.582647 + }, + { + "acc": 0.9949317, + "epoch": 1.4103923647932133, + "grad_norm": 0.986248254776001, + "learning_rate": 9.775369116283346e-06, + "loss": 0.01463662, + "memory(GiB)": 26.31, + "step": 1330, + "train_speed(iter/s)": 0.582653 + }, + { + "acc": 0.99627342, + "epoch": 1.415694591728526, + "grad_norm": 0.671968400478363, + "learning_rate": 9.772763484939118e-06, + "loss": 0.01474574, + "memory(GiB)": 26.31, + "step": 1335, + "train_speed(iter/s)": 0.582666 + }, + { + "acc": 0.99520779, + "epoch": 1.4209968186638389, + "grad_norm": 0.8155642151832581, + "learning_rate": 9.770143179817025e-06, + "loss": 0.01464276, + "memory(GiB)": 26.31, + "step": 1340, + "train_speed(iter/s)": 0.582666 + }, + { + "acc": 0.99607487, + "epoch": 1.4262990455991518, + "grad_norm": 0.46176499128341675, + "learning_rate": 9.767508208973993e-06, + "loss": 0.01274185, + "memory(GiB)": 26.31, + "step": 1345, + "train_speed(iter/s)": 0.582676 + }, + { + "acc": 0.99446783, + "epoch": 1.4316012725344645, + "grad_norm": 1.2167030572891235, + "learning_rate": 9.76485858051204e-06, + "loss": 0.01575762, + "memory(GiB)": 26.31, + "step": 1350, + "train_speed(iter/s)": 0.582677 + }, + { + "acc": 0.99542255, + "epoch": 1.4369034994697774, + "grad_norm": 0.3225151300430298, + "learning_rate": 9.762194302578258e-06, + "loss": 0.01336584, + "memory(GiB)": 26.31, + "step": 1355, + "train_speed(iter/s)": 0.58268 + }, + { + "acc": 0.99579563, + "epoch": 1.4422057264050903, + "grad_norm": 0.6517772078514099, + "learning_rate": 9.759515383364782e-06, + "loss": 0.01390352, + "memory(GiB)": 26.31, + "step": 1360, + "train_speed(iter/s)": 0.582687 + }, + { + "acc": 0.99631348, + "epoch": 1.447507953340403, + "grad_norm": 0.5232001543045044, + "learning_rate": 9.756821831108764e-06, + "loss": 0.01418541, + "memory(GiB)": 26.31, + "step": 1365, + "train_speed(iter/s)": 0.582704 + }, + { + "acc": 0.99409437, + "epoch": 1.4528101802757158, + "grad_norm": 0.4395295977592468, + "learning_rate": 9.75411365409235e-06, + "loss": 0.01683905, + "memory(GiB)": 26.31, + "step": 1370, + "train_speed(iter/s)": 0.582716 + }, + { + "acc": 0.99526634, + "epoch": 1.4581124072110287, + "grad_norm": 0.6830112338066101, + "learning_rate": 9.751390860642655e-06, + "loss": 0.01343714, + "memory(GiB)": 26.31, + "step": 1375, + "train_speed(iter/s)": 0.58273 + }, + { + "acc": 0.99617195, + "epoch": 1.4634146341463414, + "grad_norm": 0.3659854829311371, + "learning_rate": 9.748653459131741e-06, + "loss": 0.01204016, + "memory(GiB)": 26.31, + "step": 1380, + "train_speed(iter/s)": 0.582732 + }, + { + "acc": 0.99550266, + "epoch": 1.4687168610816543, + "grad_norm": 0.6669169068336487, + "learning_rate": 9.745901457976578e-06, + "loss": 0.01441504, + "memory(GiB)": 26.31, + "step": 1385, + "train_speed(iter/s)": 0.582737 + }, + { + "acc": 0.99465714, + "epoch": 1.4740190880169672, + "grad_norm": 0.7347469925880432, + "learning_rate": 9.743134865639034e-06, + "loss": 0.01655078, + "memory(GiB)": 26.31, + "step": 1390, + "train_speed(iter/s)": 0.582751 + }, + { + "acc": 0.99624844, + "epoch": 1.47932131495228, + "grad_norm": 0.9294884204864502, + "learning_rate": 9.74035369062584e-06, + "loss": 0.01310423, + "memory(GiB)": 26.31, + "step": 1395, + "train_speed(iter/s)": 0.582783 + }, + { + "acc": 0.99600592, + "epoch": 1.4846235418875928, + "grad_norm": 0.4734921455383301, + "learning_rate": 9.737557941488565e-06, + "loss": 0.01109402, + "memory(GiB)": 26.31, + "step": 1400, + "train_speed(iter/s)": 0.582791 + }, + { + "acc": 0.99517355, + "epoch": 1.4899257688229057, + "grad_norm": 0.7194894552230835, + "learning_rate": 9.73474762682359e-06, + "loss": 0.01321665, + "memory(GiB)": 26.31, + "step": 1405, + "train_speed(iter/s)": 0.582798 + }, + { + "acc": 0.99582138, + "epoch": 1.4952279957582184, + "grad_norm": 0.8972120881080627, + "learning_rate": 9.73192275527209e-06, + "loss": 0.01335516, + "memory(GiB)": 26.31, + "step": 1410, + "train_speed(iter/s)": 0.5828 + }, + { + "acc": 0.99488239, + "epoch": 1.5005302226935313, + "grad_norm": 0.7436479330062866, + "learning_rate": 9.729083335519984e-06, + "loss": 0.01636965, + "memory(GiB)": 26.31, + "step": 1415, + "train_speed(iter/s)": 0.582804 + }, + { + "acc": 0.99691572, + "epoch": 1.5058324496288442, + "grad_norm": 0.36048823595046997, + "learning_rate": 9.72622937629794e-06, + "loss": 0.01027709, + "memory(GiB)": 26.31, + "step": 1420, + "train_speed(iter/s)": 0.582826 + }, + { + "acc": 0.99606276, + "epoch": 1.511134676564157, + "grad_norm": 0.4885866641998291, + "learning_rate": 9.723360886381322e-06, + "loss": 0.01266043, + "memory(GiB)": 26.31, + "step": 1425, + "train_speed(iter/s)": 0.582831 + }, + { + "acc": 0.99566917, + "epoch": 1.5164369034994698, + "grad_norm": 0.5471593141555786, + "learning_rate": 9.720477874590176e-06, + "loss": 0.01241938, + "memory(GiB)": 26.31, + "step": 1430, + "train_speed(iter/s)": 0.582847 + }, + { + "acc": 0.99473476, + "epoch": 1.5217391304347827, + "grad_norm": 0.8035010099411011, + "learning_rate": 9.717580349789203e-06, + "loss": 0.01485032, + "memory(GiB)": 26.31, + "step": 1435, + "train_speed(iter/s)": 0.582848 + }, + { + "acc": 0.9948205, + "epoch": 1.5270413573700954, + "grad_norm": 0.811253547668457, + "learning_rate": 9.714668320887722e-06, + "loss": 0.01394672, + "memory(GiB)": 26.31, + "step": 1440, + "train_speed(iter/s)": 0.582855 + }, + { + "acc": 0.99572573, + "epoch": 1.5323435843054083, + "grad_norm": 0.8021854162216187, + "learning_rate": 9.711741796839656e-06, + "loss": 0.01230425, + "memory(GiB)": 26.31, + "step": 1445, + "train_speed(iter/s)": 0.582857 + }, + { + "acc": 0.99498787, + "epoch": 1.5376458112407212, + "grad_norm": 0.7353776097297668, + "learning_rate": 9.70880078664349e-06, + "loss": 0.01455704, + "memory(GiB)": 26.31, + "step": 1450, + "train_speed(iter/s)": 0.582861 + }, + { + "acc": 0.99538994, + "epoch": 1.5429480381760339, + "grad_norm": 0.5929186344146729, + "learning_rate": 9.705845299342261e-06, + "loss": 0.01419341, + "memory(GiB)": 26.31, + "step": 1455, + "train_speed(iter/s)": 0.582874 + }, + { + "acc": 0.99502916, + "epoch": 1.5482502651113468, + "grad_norm": 0.7457852959632874, + "learning_rate": 9.70287534402351e-06, + "loss": 0.01404118, + "memory(GiB)": 26.31, + "step": 1460, + "train_speed(iter/s)": 0.582889 + }, + { + "acc": 0.99454365, + "epoch": 1.5535524920466597, + "grad_norm": 0.7059531211853027, + "learning_rate": 9.699890929819277e-06, + "loss": 0.01576061, + "memory(GiB)": 26.31, + "step": 1465, + "train_speed(iter/s)": 0.582906 + }, + { + "acc": 0.99530487, + "epoch": 1.5588547189819724, + "grad_norm": 0.594176709651947, + "learning_rate": 9.696892065906045e-06, + "loss": 0.01272766, + "memory(GiB)": 26.31, + "step": 1470, + "train_speed(iter/s)": 0.582924 + }, + { + "acc": 0.99706202, + "epoch": 1.5641569459172853, + "grad_norm": 0.45875370502471924, + "learning_rate": 9.693878761504738e-06, + "loss": 0.01053843, + "memory(GiB)": 26.31, + "step": 1475, + "train_speed(iter/s)": 0.58292 + }, + { + "acc": 0.99492579, + "epoch": 1.5694591728525982, + "grad_norm": 0.6784182786941528, + "learning_rate": 9.690851025880677e-06, + "loss": 0.01531172, + "memory(GiB)": 26.31, + "step": 1480, + "train_speed(iter/s)": 0.582924 + }, + { + "acc": 0.99470387, + "epoch": 1.5747613997879109, + "grad_norm": 0.7049443125724792, + "learning_rate": 9.687808868343558e-06, + "loss": 0.01651053, + "memory(GiB)": 26.31, + "step": 1485, + "train_speed(iter/s)": 0.582923 + }, + { + "acc": 0.99574337, + "epoch": 1.5800636267232238, + "grad_norm": 0.8059265613555908, + "learning_rate": 9.684752298247424e-06, + "loss": 0.01243878, + "memory(GiB)": 26.31, + "step": 1490, + "train_speed(iter/s)": 0.582924 + }, + { + "acc": 0.99399509, + "epoch": 1.5853658536585367, + "grad_norm": 0.9278422594070435, + "learning_rate": 9.681681324990627e-06, + "loss": 0.01400151, + "memory(GiB)": 26.31, + "step": 1495, + "train_speed(iter/s)": 0.582942 + }, + { + "acc": 0.99648762, + "epoch": 1.5906680805938493, + "grad_norm": 0.5391660928726196, + "learning_rate": 9.678595958015809e-06, + "loss": 0.01022374, + "memory(GiB)": 26.31, + "step": 1500, + "train_speed(iter/s)": 0.582956 + }, + { + "acc": 0.99651642, + "epoch": 1.5959703075291622, + "grad_norm": 0.7209609746932983, + "learning_rate": 9.675496206809875e-06, + "loss": 0.01008506, + "memory(GiB)": 26.31, + "step": 1505, + "train_speed(iter/s)": 0.582961 + }, + { + "acc": 0.99613867, + "epoch": 1.6012725344644752, + "grad_norm": 0.7365092039108276, + "learning_rate": 9.672382080903952e-06, + "loss": 0.01454763, + "memory(GiB)": 26.31, + "step": 1510, + "train_speed(iter/s)": 0.582962 + }, + { + "acc": 0.99595547, + "epoch": 1.6065747613997878, + "grad_norm": 0.7338438630104065, + "learning_rate": 9.669253589873369e-06, + "loss": 0.01647771, + "memory(GiB)": 26.31, + "step": 1515, + "train_speed(iter/s)": 0.582973 + }, + { + "acc": 0.99618711, + "epoch": 1.6118769883351007, + "grad_norm": 0.26463937759399414, + "learning_rate": 9.666110743337625e-06, + "loss": 0.01331544, + "memory(GiB)": 26.31, + "step": 1520, + "train_speed(iter/s)": 0.582974 + }, + { + "acc": 0.99598131, + "epoch": 1.6171792152704136, + "grad_norm": 0.49993637204170227, + "learning_rate": 9.662953550960357e-06, + "loss": 0.01459299, + "memory(GiB)": 26.31, + "step": 1525, + "train_speed(iter/s)": 0.582978 + }, + { + "acc": 0.99672394, + "epoch": 1.6224814422057263, + "grad_norm": 0.5060617923736572, + "learning_rate": 9.659782022449317e-06, + "loss": 0.01016317, + "memory(GiB)": 26.31, + "step": 1530, + "train_speed(iter/s)": 0.582992 + }, + { + "acc": 0.99469109, + "epoch": 1.6277836691410392, + "grad_norm": 0.4732325077056885, + "learning_rate": 9.656596167556335e-06, + "loss": 0.01715919, + "memory(GiB)": 26.31, + "step": 1535, + "train_speed(iter/s)": 0.583017 + }, + { + "acc": 0.99556866, + "epoch": 1.6330858960763521, + "grad_norm": 0.7603917717933655, + "learning_rate": 9.653395996077293e-06, + "loss": 0.01328594, + "memory(GiB)": 26.31, + "step": 1540, + "train_speed(iter/s)": 0.583006 + }, + { + "acc": 0.99626541, + "epoch": 1.6383881230116648, + "grad_norm": 0.781606912612915, + "learning_rate": 9.650181517852092e-06, + "loss": 0.0128001, + "memory(GiB)": 26.31, + "step": 1545, + "train_speed(iter/s)": 0.582998 + }, + { + "acc": 0.99559555, + "epoch": 1.6436903499469777, + "grad_norm": 0.42572420835494995, + "learning_rate": 9.646952742764624e-06, + "loss": 0.01503129, + "memory(GiB)": 26.31, + "step": 1550, + "train_speed(iter/s)": 0.582989 + }, + { + "acc": 0.99584656, + "epoch": 1.6489925768822906, + "grad_norm": 0.5383741855621338, + "learning_rate": 9.643709680742746e-06, + "loss": 0.01389532, + "memory(GiB)": 26.31, + "step": 1555, + "train_speed(iter/s)": 0.582991 + }, + { + "acc": 0.99579649, + "epoch": 1.6542948038176033, + "grad_norm": 0.6257635951042175, + "learning_rate": 9.640452341758233e-06, + "loss": 0.01323799, + "memory(GiB)": 26.31, + "step": 1560, + "train_speed(iter/s)": 0.582982 + }, + { + "acc": 0.99618187, + "epoch": 1.6595970307529162, + "grad_norm": 0.5762199759483337, + "learning_rate": 9.637180735826771e-06, + "loss": 0.01221062, + "memory(GiB)": 26.31, + "step": 1565, + "train_speed(iter/s)": 0.582976 + }, + { + "acc": 0.99726114, + "epoch": 1.664899257688229, + "grad_norm": 0.4736613929271698, + "learning_rate": 9.633894873007907e-06, + "loss": 0.00984205, + "memory(GiB)": 26.31, + "step": 1570, + "train_speed(iter/s)": 0.58297 + }, + { + "acc": 0.99648628, + "epoch": 1.6702014846235418, + "grad_norm": 0.7545337080955505, + "learning_rate": 9.63059476340503e-06, + "loss": 0.01090518, + "memory(GiB)": 26.31, + "step": 1575, + "train_speed(iter/s)": 0.582973 + }, + { + "acc": 0.99693813, + "epoch": 1.6755037115588547, + "grad_norm": 0.45600301027297974, + "learning_rate": 9.62728041716533e-06, + "loss": 0.0102604, + "memory(GiB)": 26.31, + "step": 1580, + "train_speed(iter/s)": 0.582965 + }, + { + "acc": 0.99703884, + "epoch": 1.6808059384941676, + "grad_norm": 0.5730128884315491, + "learning_rate": 9.623951844479772e-06, + "loss": 0.00968609, + "memory(GiB)": 26.31, + "step": 1585, + "train_speed(iter/s)": 0.582958 + }, + { + "acc": 0.99681854, + "epoch": 1.6861081654294803, + "grad_norm": 0.6165497303009033, + "learning_rate": 9.620609055583071e-06, + "loss": 0.01058234, + "memory(GiB)": 26.31, + "step": 1590, + "train_speed(iter/s)": 0.582963 + }, + { + "acc": 0.99705038, + "epoch": 1.6914103923647932, + "grad_norm": 0.5090743899345398, + "learning_rate": 9.617252060753647e-06, + "loss": 0.01027542, + "memory(GiB)": 26.31, + "step": 1595, + "train_speed(iter/s)": 0.582956 + }, + { + "acc": 0.99618645, + "epoch": 1.696712619300106, + "grad_norm": 0.7163627743721008, + "learning_rate": 9.613880870313604e-06, + "loss": 0.01235223, + "memory(GiB)": 26.31, + "step": 1600, + "train_speed(iter/s)": 0.582941 + }, + { + "acc": 0.997223, + "epoch": 1.7020148462354188, + "grad_norm": 0.5650729537010193, + "learning_rate": 9.610495494628696e-06, + "loss": 0.00963061, + "memory(GiB)": 26.31, + "step": 1605, + "train_speed(iter/s)": 0.582941 + }, + { + "acc": 0.99562531, + "epoch": 1.7073170731707317, + "grad_norm": 0.64713054895401, + "learning_rate": 9.60709594410829e-06, + "loss": 0.01414991, + "memory(GiB)": 26.31, + "step": 1610, + "train_speed(iter/s)": 0.582936 + }, + { + "acc": 0.996521, + "epoch": 1.7126193001060446, + "grad_norm": 0.7104651927947998, + "learning_rate": 9.603682229205338e-06, + "loss": 0.01142905, + "memory(GiB)": 26.31, + "step": 1615, + "train_speed(iter/s)": 0.582941 + }, + { + "acc": 0.99511213, + "epoch": 1.7179215270413573, + "grad_norm": 1.0390896797180176, + "learning_rate": 9.600254360416347e-06, + "loss": 0.0135205, + "memory(GiB)": 26.31, + "step": 1620, + "train_speed(iter/s)": 0.582944 + }, + { + "acc": 0.99557877, + "epoch": 1.7232237539766702, + "grad_norm": 0.7820794582366943, + "learning_rate": 9.596812348281348e-06, + "loss": 0.01177459, + "memory(GiB)": 26.31, + "step": 1625, + "train_speed(iter/s)": 0.582937 + }, + { + "acc": 0.99583693, + "epoch": 1.728525980911983, + "grad_norm": 0.8768166899681091, + "learning_rate": 9.59335620338385e-06, + "loss": 0.01301484, + "memory(GiB)": 26.31, + "step": 1630, + "train_speed(iter/s)": 0.582927 + }, + { + "acc": 0.99763937, + "epoch": 1.7338282078472957, + "grad_norm": 0.7672966718673706, + "learning_rate": 9.589885936350828e-06, + "loss": 0.00735577, + "memory(GiB)": 26.31, + "step": 1635, + "train_speed(iter/s)": 0.582922 + }, + { + "acc": 0.99700527, + "epoch": 1.7391304347826086, + "grad_norm": 0.5951688289642334, + "learning_rate": 9.586401557852673e-06, + "loss": 0.00939723, + "memory(GiB)": 26.31, + "step": 1640, + "train_speed(iter/s)": 0.582916 + }, + { + "acc": 0.99769516, + "epoch": 1.7444326617179216, + "grad_norm": 0.5226224660873413, + "learning_rate": 9.58290307860317e-06, + "loss": 0.00775344, + "memory(GiB)": 26.31, + "step": 1645, + "train_speed(iter/s)": 0.582912 + }, + { + "acc": 0.99687271, + "epoch": 1.7497348886532342, + "grad_norm": 0.6737238168716431, + "learning_rate": 9.579390509359456e-06, + "loss": 0.00943484, + "memory(GiB)": 26.31, + "step": 1650, + "train_speed(iter/s)": 0.582911 + }, + { + "acc": 0.99588261, + "epoch": 1.7550371155885471, + "grad_norm": 0.8879348635673523, + "learning_rate": 9.575863860921995e-06, + "loss": 0.01161455, + "memory(GiB)": 26.31, + "step": 1655, + "train_speed(iter/s)": 0.582904 + }, + { + "acc": 0.99591579, + "epoch": 1.76033934252386, + "grad_norm": 0.8889970183372498, + "learning_rate": 9.572323144134546e-06, + "loss": 0.01313305, + "memory(GiB)": 26.31, + "step": 1660, + "train_speed(iter/s)": 0.582896 + }, + { + "acc": 0.99647074, + "epoch": 1.7656415694591727, + "grad_norm": 0.691448986530304, + "learning_rate": 9.568768369884119e-06, + "loss": 0.01091637, + "memory(GiB)": 26.31, + "step": 1665, + "train_speed(iter/s)": 0.58289 + }, + { + "acc": 0.9959466, + "epoch": 1.7709437963944858, + "grad_norm": 0.752933144569397, + "learning_rate": 9.565199549100948e-06, + "loss": 0.01170037, + "memory(GiB)": 26.31, + "step": 1670, + "train_speed(iter/s)": 0.58289 + }, + { + "acc": 0.99625635, + "epoch": 1.7762460233297985, + "grad_norm": 0.7446765899658203, + "learning_rate": 9.561616692758463e-06, + "loss": 0.01202907, + "memory(GiB)": 26.31, + "step": 1675, + "train_speed(iter/s)": 0.582879 + }, + { + "acc": 0.996422, + "epoch": 1.7815482502651112, + "grad_norm": 0.6502768397331238, + "learning_rate": 9.558019811873248e-06, + "loss": 0.01007521, + "memory(GiB)": 26.31, + "step": 1680, + "train_speed(iter/s)": 0.582874 + }, + { + "acc": 0.99729595, + "epoch": 1.7868504772004243, + "grad_norm": 0.6278144121170044, + "learning_rate": 9.554408917505007e-06, + "loss": 0.00926385, + "memory(GiB)": 26.31, + "step": 1685, + "train_speed(iter/s)": 0.582863 + }, + { + "acc": 0.99589462, + "epoch": 1.792152704135737, + "grad_norm": 0.8441500663757324, + "learning_rate": 9.550784020756535e-06, + "loss": 0.01101158, + "memory(GiB)": 26.31, + "step": 1690, + "train_speed(iter/s)": 0.582853 + }, + { + "acc": 0.99660549, + "epoch": 1.7974549310710497, + "grad_norm": 0.6982602477073669, + "learning_rate": 9.54714513277368e-06, + "loss": 0.00878714, + "memory(GiB)": 26.31, + "step": 1695, + "train_speed(iter/s)": 0.582859 + }, + { + "acc": 0.99487324, + "epoch": 1.8027571580063628, + "grad_norm": 0.4580039083957672, + "learning_rate": 9.543492264745314e-06, + "loss": 0.0145345, + "memory(GiB)": 26.31, + "step": 1700, + "train_speed(iter/s)": 0.582852 + }, + { + "acc": 0.9955162, + "epoch": 1.8080593849416755, + "grad_norm": 0.47468751668930054, + "learning_rate": 9.539825427903293e-06, + "loss": 0.01137172, + "memory(GiB)": 26.31, + "step": 1705, + "train_speed(iter/s)": 0.582862 + }, + { + "acc": 0.99595766, + "epoch": 1.8133616118769882, + "grad_norm": 0.8541509509086609, + "learning_rate": 9.536144633522422e-06, + "loss": 0.01074421, + "memory(GiB)": 26.31, + "step": 1710, + "train_speed(iter/s)": 0.582867 + }, + { + "acc": 0.99637671, + "epoch": 1.8186638388123013, + "grad_norm": 0.4724918305873871, + "learning_rate": 9.532449892920423e-06, + "loss": 0.00988533, + "memory(GiB)": 26.31, + "step": 1715, + "train_speed(iter/s)": 0.582875 + }, + { + "acc": 0.99641857, + "epoch": 1.823966065747614, + "grad_norm": 0.584002673625946, + "learning_rate": 9.528741217457906e-06, + "loss": 0.0138078, + "memory(GiB)": 26.31, + "step": 1720, + "train_speed(iter/s)": 0.582865 + }, + { + "acc": 0.99653225, + "epoch": 1.8292682926829267, + "grad_norm": 0.477457195520401, + "learning_rate": 9.525018618538319e-06, + "loss": 0.00891152, + "memory(GiB)": 26.31, + "step": 1725, + "train_speed(iter/s)": 0.582862 + }, + { + "acc": 0.99620132, + "epoch": 1.8345705196182398, + "grad_norm": 0.57017982006073, + "learning_rate": 9.52128210760793e-06, + "loss": 0.0105824, + "memory(GiB)": 26.31, + "step": 1730, + "train_speed(iter/s)": 0.582852 + }, + { + "acc": 0.99704952, + "epoch": 1.8398727465535525, + "grad_norm": 0.32321634888648987, + "learning_rate": 9.51753169615578e-06, + "loss": 0.00954672, + "memory(GiB)": 26.31, + "step": 1735, + "train_speed(iter/s)": 0.582854 + }, + { + "acc": 0.99546967, + "epoch": 1.8451749734888652, + "grad_norm": 0.6538419127464294, + "learning_rate": 9.513767395713647e-06, + "loss": 0.01392893, + "memory(GiB)": 26.31, + "step": 1740, + "train_speed(iter/s)": 0.582844 + }, + { + "acc": 0.99725323, + "epoch": 1.8504772004241783, + "grad_norm": 0.5553358793258667, + "learning_rate": 9.509989217856022e-06, + "loss": 0.00883438, + "memory(GiB)": 26.31, + "step": 1745, + "train_speed(iter/s)": 0.582835 + }, + { + "acc": 0.99713745, + "epoch": 1.855779427359491, + "grad_norm": 0.5093122124671936, + "learning_rate": 9.506197174200066e-06, + "loss": 0.01078411, + "memory(GiB)": 26.31, + "step": 1750, + "train_speed(iter/s)": 0.582837 + }, + { + "acc": 0.99583797, + "epoch": 1.8610816542948037, + "grad_norm": 0.6216493844985962, + "learning_rate": 9.502391276405571e-06, + "loss": 0.01271906, + "memory(GiB)": 26.31, + "step": 1755, + "train_speed(iter/s)": 0.582826 + }, + { + "acc": 0.99700794, + "epoch": 1.8663838812301168, + "grad_norm": 0.9486095309257507, + "learning_rate": 9.498571536174927e-06, + "loss": 0.01122559, + "memory(GiB)": 26.31, + "step": 1760, + "train_speed(iter/s)": 0.582819 + }, + { + "acc": 0.99467316, + "epoch": 1.8716861081654295, + "grad_norm": 0.5736285448074341, + "learning_rate": 9.494737965253094e-06, + "loss": 0.01514575, + "memory(GiB)": 26.31, + "step": 1765, + "train_speed(iter/s)": 0.582812 + }, + { + "acc": 0.99623184, + "epoch": 1.8769883351007424, + "grad_norm": 0.7306117415428162, + "learning_rate": 9.490890575427549e-06, + "loss": 0.01168928, + "memory(GiB)": 26.31, + "step": 1770, + "train_speed(iter/s)": 0.582805 + }, + { + "acc": 0.99540997, + "epoch": 1.8822905620360553, + "grad_norm": 0.7581856846809387, + "learning_rate": 9.487029378528265e-06, + "loss": 0.01336176, + "memory(GiB)": 26.31, + "step": 1775, + "train_speed(iter/s)": 0.582793 + }, + { + "acc": 0.99567566, + "epoch": 1.887592788971368, + "grad_norm": 0.6911209225654602, + "learning_rate": 9.483154386427669e-06, + "loss": 0.01130526, + "memory(GiB)": 26.31, + "step": 1780, + "train_speed(iter/s)": 0.582791 + }, + { + "acc": 0.99613075, + "epoch": 1.8928950159066809, + "grad_norm": 0.535045325756073, + "learning_rate": 9.479265611040605e-06, + "loss": 0.01026231, + "memory(GiB)": 26.31, + "step": 1785, + "train_speed(iter/s)": 0.582792 + }, + { + "acc": 0.99551563, + "epoch": 1.8981972428419938, + "grad_norm": 0.626434326171875, + "learning_rate": 9.475363064324295e-06, + "loss": 0.01337331, + "memory(GiB)": 26.31, + "step": 1790, + "train_speed(iter/s)": 0.582783 + }, + { + "acc": 0.99523849, + "epoch": 1.9034994697773064, + "grad_norm": 0.62039715051651, + "learning_rate": 9.47144675827831e-06, + "loss": 0.01312898, + "memory(GiB)": 26.31, + "step": 1795, + "train_speed(iter/s)": 0.582786 + }, + { + "acc": 0.99659777, + "epoch": 1.9088016967126193, + "grad_norm": 0.5358000993728638, + "learning_rate": 9.467516704944526e-06, + "loss": 0.01025775, + "memory(GiB)": 26.31, + "step": 1800, + "train_speed(iter/s)": 0.582775 + }, + { + "acc": 0.99596386, + "epoch": 1.9141039236479322, + "grad_norm": 0.5743092894554138, + "learning_rate": 9.46357291640709e-06, + "loss": 0.01595644, + "memory(GiB)": 26.31, + "step": 1805, + "train_speed(iter/s)": 0.582773 + }, + { + "acc": 0.99663105, + "epoch": 1.919406150583245, + "grad_norm": 0.8163368105888367, + "learning_rate": 9.459615404792381e-06, + "loss": 0.01093026, + "memory(GiB)": 26.31, + "step": 1810, + "train_speed(iter/s)": 0.582761 + }, + { + "acc": 0.99598207, + "epoch": 1.9247083775185578, + "grad_norm": 0.8356859087944031, + "learning_rate": 9.45564418226897e-06, + "loss": 0.0133225, + "memory(GiB)": 26.31, + "step": 1815, + "train_speed(iter/s)": 0.582743 + }, + { + "acc": 0.99689674, + "epoch": 1.9300106044538707, + "grad_norm": 0.6908077597618103, + "learning_rate": 9.451659261047595e-06, + "loss": 0.00967747, + "memory(GiB)": 26.31, + "step": 1820, + "train_speed(iter/s)": 0.582732 + }, + { + "acc": 0.99649096, + "epoch": 1.9353128313891834, + "grad_norm": 0.3842003345489502, + "learning_rate": 9.447660653381107e-06, + "loss": 0.01140719, + "memory(GiB)": 26.31, + "step": 1825, + "train_speed(iter/s)": 0.582738 + }, + { + "acc": 0.99603977, + "epoch": 1.9406150583244963, + "grad_norm": 0.95283043384552, + "learning_rate": 9.443648371564445e-06, + "loss": 0.01253378, + "memory(GiB)": 26.31, + "step": 1830, + "train_speed(iter/s)": 0.582727 + }, + { + "acc": 0.99565907, + "epoch": 1.9459172852598092, + "grad_norm": 0.5776175856590271, + "learning_rate": 9.439622427934594e-06, + "loss": 0.01431614, + "memory(GiB)": 26.31, + "step": 1835, + "train_speed(iter/s)": 0.582718 + }, + { + "acc": 0.99552002, + "epoch": 1.951219512195122, + "grad_norm": 0.516409158706665, + "learning_rate": 9.435582834870539e-06, + "loss": 0.01414626, + "memory(GiB)": 26.31, + "step": 1840, + "train_speed(iter/s)": 0.582723 + }, + { + "acc": 0.99604492, + "epoch": 1.9565217391304348, + "grad_norm": 0.5727446675300598, + "learning_rate": 9.431529604793246e-06, + "loss": 0.01338579, + "memory(GiB)": 26.31, + "step": 1845, + "train_speed(iter/s)": 0.582705 + }, + { + "acc": 0.99671364, + "epoch": 1.9618239660657477, + "grad_norm": 0.5135867595672607, + "learning_rate": 9.4274627501656e-06, + "loss": 0.01212858, + "memory(GiB)": 26.31, + "step": 1850, + "train_speed(iter/s)": 0.582702 + }, + { + "acc": 0.99795332, + "epoch": 1.9671261930010604, + "grad_norm": 0.27841368317604065, + "learning_rate": 9.423382283492386e-06, + "loss": 0.00771015, + "memory(GiB)": 26.31, + "step": 1855, + "train_speed(iter/s)": 0.582688 + }, + { + "acc": 0.99660807, + "epoch": 1.9724284199363733, + "grad_norm": 0.5688284039497375, + "learning_rate": 9.419288217320243e-06, + "loss": 0.01114456, + "memory(GiB)": 26.31, + "step": 1860, + "train_speed(iter/s)": 0.582679 + }, + { + "acc": 0.99710732, + "epoch": 1.9777306468716862, + "grad_norm": 0.4894907772541046, + "learning_rate": 9.415180564237623e-06, + "loss": 0.00835708, + "memory(GiB)": 26.31, + "step": 1865, + "train_speed(iter/s)": 0.582673 + }, + { + "acc": 0.99726276, + "epoch": 1.9830328738069989, + "grad_norm": 0.8209954500198364, + "learning_rate": 9.411059336874757e-06, + "loss": 0.01041235, + "memory(GiB)": 26.31, + "step": 1870, + "train_speed(iter/s)": 0.582657 + }, + { + "acc": 0.99661264, + "epoch": 1.9883351007423118, + "grad_norm": 0.5984225869178772, + "learning_rate": 9.406924547903615e-06, + "loss": 0.01018832, + "memory(GiB)": 26.31, + "step": 1875, + "train_speed(iter/s)": 0.582656 + }, + { + "acc": 0.99723349, + "epoch": 1.9936373276776247, + "grad_norm": 0.7700032591819763, + "learning_rate": 9.402776210037867e-06, + "loss": 0.00970466, + "memory(GiB)": 26.31, + "step": 1880, + "train_speed(iter/s)": 0.582653 + }, + { + "acc": 0.99703884, + "epoch": 1.9989395546129374, + "grad_norm": 0.49653443694114685, + "learning_rate": 9.398614336032837e-06, + "loss": 0.00890055, + "memory(GiB)": 26.31, + "step": 1885, + "train_speed(iter/s)": 0.582645 + }, + { + "acc": 0.9972353, + "epoch": 2.00424178154825, + "grad_norm": 0.6007420420646667, + "learning_rate": 9.394438938685476e-06, + "loss": 0.00938985, + "memory(GiB)": 26.31, + "step": 1890, + "train_speed(iter/s)": 0.582456 + }, + { + "acc": 0.99653854, + "epoch": 2.009544008483563, + "grad_norm": 0.6522446274757385, + "learning_rate": 9.390250030834319e-06, + "loss": 0.0111994, + "memory(GiB)": 26.31, + "step": 1895, + "train_speed(iter/s)": 0.582457 + }, + { + "acc": 0.99756927, + "epoch": 2.014846235418876, + "grad_norm": 0.6940723061561584, + "learning_rate": 9.386047625359436e-06, + "loss": 0.00796779, + "memory(GiB)": 26.31, + "step": 1900, + "train_speed(iter/s)": 0.582464 + }, + { + "acc": 0.99708109, + "epoch": 2.0201484623541885, + "grad_norm": 0.5657925009727478, + "learning_rate": 9.3818317351824e-06, + "loss": 0.00889682, + "memory(GiB)": 26.31, + "step": 1905, + "train_speed(iter/s)": 0.582458 + }, + { + "acc": 0.99729185, + "epoch": 2.0254506892895017, + "grad_norm": 0.793789267539978, + "learning_rate": 9.377602373266253e-06, + "loss": 0.01040011, + "memory(GiB)": 26.31, + "step": 1910, + "train_speed(iter/s)": 0.582452 + }, + { + "acc": 0.99636059, + "epoch": 2.0307529162248144, + "grad_norm": 0.7435290813446045, + "learning_rate": 9.373359552615459e-06, + "loss": 0.009646, + "memory(GiB)": 26.31, + "step": 1915, + "train_speed(iter/s)": 0.582452 + }, + { + "acc": 0.99647522, + "epoch": 2.0360551431601275, + "grad_norm": 1.047269344329834, + "learning_rate": 9.369103286275857e-06, + "loss": 0.00896999, + "memory(GiB)": 26.31, + "step": 1920, + "train_speed(iter/s)": 0.582465 + }, + { + "acc": 0.99617004, + "epoch": 2.04135737009544, + "grad_norm": 0.503805935382843, + "learning_rate": 9.36483358733464e-06, + "loss": 0.01182005, + "memory(GiB)": 26.31, + "step": 1925, + "train_speed(iter/s)": 0.582473 + }, + { + "acc": 0.99728832, + "epoch": 2.046659597030753, + "grad_norm": 0.503113865852356, + "learning_rate": 9.360550468920292e-06, + "loss": 0.0090973, + "memory(GiB)": 26.31, + "step": 1930, + "train_speed(iter/s)": 0.582476 + }, + { + "acc": 0.99628162, + "epoch": 2.0519618239660655, + "grad_norm": 0.5395364761352539, + "learning_rate": 9.35625394420257e-06, + "loss": 0.01315861, + "memory(GiB)": 26.31, + "step": 1935, + "train_speed(iter/s)": 0.582478 + }, + { + "acc": 0.99678946, + "epoch": 2.0572640509013786, + "grad_norm": 0.8396658301353455, + "learning_rate": 9.351944026392447e-06, + "loss": 0.00879585, + "memory(GiB)": 26.31, + "step": 1940, + "train_speed(iter/s)": 0.582479 + }, + { + "acc": 0.99789181, + "epoch": 2.0625662778366913, + "grad_norm": 0.5394271016120911, + "learning_rate": 9.347620728742079e-06, + "loss": 0.00796552, + "memory(GiB)": 26.31, + "step": 1945, + "train_speed(iter/s)": 0.582487 + }, + { + "acc": 0.99663525, + "epoch": 2.0678685047720045, + "grad_norm": 0.47398996353149414, + "learning_rate": 9.343284064544758e-06, + "loss": 0.00746697, + "memory(GiB)": 26.31, + "step": 1950, + "train_speed(iter/s)": 0.582499 + }, + { + "acc": 0.99564629, + "epoch": 2.073170731707317, + "grad_norm": 0.7614686489105225, + "learning_rate": 9.338934047134884e-06, + "loss": 0.01439559, + "memory(GiB)": 26.31, + "step": 1955, + "train_speed(iter/s)": 0.582505 + }, + { + "acc": 0.99717464, + "epoch": 2.07847295864263, + "grad_norm": 0.4342188537120819, + "learning_rate": 9.334570689887908e-06, + "loss": 0.00976835, + "memory(GiB)": 26.31, + "step": 1960, + "train_speed(iter/s)": 0.582507 + }, + { + "acc": 0.99687204, + "epoch": 2.083775185577943, + "grad_norm": 0.42526519298553467, + "learning_rate": 9.330194006220301e-06, + "loss": 0.00783934, + "memory(GiB)": 26.31, + "step": 1965, + "train_speed(iter/s)": 0.582505 + }, + { + "acc": 0.99659252, + "epoch": 2.0890774125132556, + "grad_norm": 0.707332968711853, + "learning_rate": 9.325804009589513e-06, + "loss": 0.0122927, + "memory(GiB)": 26.31, + "step": 1970, + "train_speed(iter/s)": 0.582508 + }, + { + "acc": 0.9970993, + "epoch": 2.0943796394485683, + "grad_norm": 0.43426114320755005, + "learning_rate": 9.32140071349392e-06, + "loss": 0.00998023, + "memory(GiB)": 26.31, + "step": 1975, + "train_speed(iter/s)": 0.58251 + }, + { + "acc": 0.99600496, + "epoch": 2.0996818663838814, + "grad_norm": 0.5607444047927856, + "learning_rate": 9.316984131472804e-06, + "loss": 0.01285059, + "memory(GiB)": 26.31, + "step": 1980, + "train_speed(iter/s)": 0.58253 + }, + { + "acc": 0.99762526, + "epoch": 2.104984093319194, + "grad_norm": 0.4278039336204529, + "learning_rate": 9.312554277106289e-06, + "loss": 0.00749103, + "memory(GiB)": 26.31, + "step": 1985, + "train_speed(iter/s)": 0.582541 + }, + { + "acc": 0.99604216, + "epoch": 2.110286320254507, + "grad_norm": 0.5797408223152161, + "learning_rate": 9.30811116401531e-06, + "loss": 0.01161444, + "memory(GiB)": 26.31, + "step": 1990, + "train_speed(iter/s)": 0.582544 + }, + { + "acc": 0.9973856, + "epoch": 2.11558854718982, + "grad_norm": 0.6868969202041626, + "learning_rate": 9.303654805861572e-06, + "loss": 0.00661921, + "memory(GiB)": 26.31, + "step": 1995, + "train_speed(iter/s)": 0.582553 + }, + { + "acc": 0.99702368, + "epoch": 2.1208907741251326, + "grad_norm": 0.42404189705848694, + "learning_rate": 9.29918521634751e-06, + "loss": 0.008734, + "memory(GiB)": 26.31, + "step": 2000, + "train_speed(iter/s)": 0.582562 + }, + { + "acc": 0.99814854, + "epoch": 2.1261930010604453, + "grad_norm": 0.5553742051124573, + "learning_rate": 9.294702409216235e-06, + "loss": 0.00553756, + "memory(GiB)": 26.31, + "step": 2005, + "train_speed(iter/s)": 0.582563 + }, + { + "acc": 0.99660997, + "epoch": 2.1314952279957584, + "grad_norm": 0.7405137419700623, + "learning_rate": 9.290206398251501e-06, + "loss": 0.01015682, + "memory(GiB)": 26.31, + "step": 2010, + "train_speed(iter/s)": 0.582562 + }, + { + "acc": 0.99767761, + "epoch": 2.136797454931071, + "grad_norm": 1.012142300605774, + "learning_rate": 9.285697197277668e-06, + "loss": 0.00823362, + "memory(GiB)": 26.31, + "step": 2015, + "train_speed(iter/s)": 0.582563 + }, + { + "acc": 0.99828796, + "epoch": 2.1420996818663838, + "grad_norm": 0.6939220428466797, + "learning_rate": 9.281174820159645e-06, + "loss": 0.00619173, + "memory(GiB)": 26.31, + "step": 2020, + "train_speed(iter/s)": 0.582563 + }, + { + "acc": 0.99645348, + "epoch": 2.147401908801697, + "grad_norm": 0.8721736073493958, + "learning_rate": 9.276639280802859e-06, + "loss": 0.01071875, + "memory(GiB)": 26.31, + "step": 2025, + "train_speed(iter/s)": 0.582584 + }, + { + "acc": 0.9964325, + "epoch": 2.1527041357370096, + "grad_norm": 0.6826593279838562, + "learning_rate": 9.272090593153209e-06, + "loss": 0.01154263, + "memory(GiB)": 26.31, + "step": 2030, + "train_speed(iter/s)": 0.582597 + }, + { + "acc": 0.99698286, + "epoch": 2.1580063626723223, + "grad_norm": 0.5387014150619507, + "learning_rate": 9.267528771197019e-06, + "loss": 0.01052478, + "memory(GiB)": 26.31, + "step": 2035, + "train_speed(iter/s)": 0.582604 + }, + { + "acc": 0.99636288, + "epoch": 2.1633085896076354, + "grad_norm": 0.8106936812400818, + "learning_rate": 9.262953828961e-06, + "loss": 0.01369351, + "memory(GiB)": 26.31, + "step": 2040, + "train_speed(iter/s)": 0.582603 + }, + { + "acc": 0.99709806, + "epoch": 2.168610816542948, + "grad_norm": 0.48297539353370667, + "learning_rate": 9.258365780512206e-06, + "loss": 0.00937909, + "memory(GiB)": 26.31, + "step": 2045, + "train_speed(iter/s)": 0.582612 + }, + { + "acc": 0.99722157, + "epoch": 2.1739130434782608, + "grad_norm": 0.6017871499061584, + "learning_rate": 9.25376463995799e-06, + "loss": 0.00824606, + "memory(GiB)": 26.31, + "step": 2050, + "train_speed(iter/s)": 0.58261 + }, + { + "acc": 0.99659395, + "epoch": 2.179215270413574, + "grad_norm": 0.6640163064002991, + "learning_rate": 9.249150421445962e-06, + "loss": 0.01057127, + "memory(GiB)": 26.31, + "step": 2055, + "train_speed(iter/s)": 0.582614 + }, + { + "acc": 0.99606657, + "epoch": 2.1845174973488866, + "grad_norm": 0.718140184879303, + "learning_rate": 9.24452313916394e-06, + "loss": 0.01253057, + "memory(GiB)": 26.31, + "step": 2060, + "train_speed(iter/s)": 0.582622 + }, + { + "acc": 0.9973835, + "epoch": 2.1898197242841992, + "grad_norm": 0.20768648386001587, + "learning_rate": 9.239882807339911e-06, + "loss": 0.00806569, + "memory(GiB)": 26.31, + "step": 2065, + "train_speed(iter/s)": 0.582621 + }, + { + "acc": 0.9973218, + "epoch": 2.1951219512195124, + "grad_norm": 0.37041446566581726, + "learning_rate": 9.235229440241993e-06, + "loss": 0.00846872, + "memory(GiB)": 26.31, + "step": 2070, + "train_speed(iter/s)": 0.582633 + }, + { + "acc": 0.99718323, + "epoch": 2.200424178154825, + "grad_norm": 0.31240540742874146, + "learning_rate": 9.23056305217838e-06, + "loss": 0.00764921, + "memory(GiB)": 26.31, + "step": 2075, + "train_speed(iter/s)": 0.582634 + }, + { + "acc": 0.99602003, + "epoch": 2.2057264050901377, + "grad_norm": 0.5981764197349548, + "learning_rate": 9.2258836574973e-06, + "loss": 0.01258934, + "memory(GiB)": 26.31, + "step": 2080, + "train_speed(iter/s)": 0.582646 + }, + { + "acc": 0.99759808, + "epoch": 2.211028632025451, + "grad_norm": 0.5735823512077332, + "learning_rate": 9.221191270586983e-06, + "loss": 0.00761086, + "memory(GiB)": 26.31, + "step": 2085, + "train_speed(iter/s)": 0.582657 + }, + { + "acc": 0.99688892, + "epoch": 2.2163308589607635, + "grad_norm": 0.4943540692329407, + "learning_rate": 9.216485905875599e-06, + "loss": 0.00940942, + "memory(GiB)": 26.31, + "step": 2090, + "train_speed(iter/s)": 0.582656 + }, + { + "acc": 0.99643354, + "epoch": 2.221633085896076, + "grad_norm": 0.5726579427719116, + "learning_rate": 9.211767577831225e-06, + "loss": 0.01135425, + "memory(GiB)": 26.31, + "step": 2095, + "train_speed(iter/s)": 0.582674 + }, + { + "acc": 0.99656467, + "epoch": 2.2269353128313893, + "grad_norm": 0.6457775831222534, + "learning_rate": 9.207036300961796e-06, + "loss": 0.0108216, + "memory(GiB)": 26.31, + "step": 2100, + "train_speed(iter/s)": 0.582683 + }, + { + "acc": 0.99748325, + "epoch": 2.232237539766702, + "grad_norm": 0.8732754588127136, + "learning_rate": 9.202292089815069e-06, + "loss": 0.00855574, + "memory(GiB)": 26.31, + "step": 2105, + "train_speed(iter/s)": 0.582682 + }, + { + "acc": 0.99805489, + "epoch": 2.2375397667020147, + "grad_norm": 0.48590216040611267, + "learning_rate": 9.197534958978562e-06, + "loss": 0.00908014, + "memory(GiB)": 26.31, + "step": 2110, + "train_speed(iter/s)": 0.582684 + }, + { + "acc": 0.99843454, + "epoch": 2.242841993637328, + "grad_norm": 0.4169938564300537, + "learning_rate": 9.192764923079526e-06, + "loss": 0.0069149, + "memory(GiB)": 26.31, + "step": 2115, + "train_speed(iter/s)": 0.582693 + }, + { + "acc": 0.99805841, + "epoch": 2.2481442205726405, + "grad_norm": 0.6880154609680176, + "learning_rate": 9.187981996784886e-06, + "loss": 0.00760795, + "memory(GiB)": 26.31, + "step": 2120, + "train_speed(iter/s)": 0.582693 + }, + { + "acc": 0.99637814, + "epoch": 2.253446447507953, + "grad_norm": 0.6809533834457397, + "learning_rate": 9.18318619480121e-06, + "loss": 0.00879649, + "memory(GiB)": 26.31, + "step": 2125, + "train_speed(iter/s)": 0.582695 + }, + { + "acc": 0.99643812, + "epoch": 2.2587486744432663, + "grad_norm": 0.7628903388977051, + "learning_rate": 9.17837753187465e-06, + "loss": 0.0110072, + "memory(GiB)": 26.31, + "step": 2130, + "train_speed(iter/s)": 0.582694 + }, + { + "acc": 0.99702005, + "epoch": 2.264050901378579, + "grad_norm": 0.49134159088134766, + "learning_rate": 9.173556022790905e-06, + "loss": 0.01013456, + "memory(GiB)": 26.31, + "step": 2135, + "train_speed(iter/s)": 0.582705 + }, + { + "acc": 0.99699507, + "epoch": 2.2693531283138917, + "grad_norm": 0.553747832775116, + "learning_rate": 9.168721682375173e-06, + "loss": 0.00931663, + "memory(GiB)": 26.31, + "step": 2140, + "train_speed(iter/s)": 0.582705 + }, + { + "acc": 0.99674129, + "epoch": 2.274655355249205, + "grad_norm": 0.662503719329834, + "learning_rate": 9.163874525492108e-06, + "loss": 0.00786655, + "memory(GiB)": 26.31, + "step": 2145, + "train_speed(iter/s)": 0.582705 + }, + { + "acc": 0.9980854, + "epoch": 2.2799575821845175, + "grad_norm": 0.28207457065582275, + "learning_rate": 9.15901456704577e-06, + "loss": 0.00622527, + "memory(GiB)": 26.31, + "step": 2150, + "train_speed(iter/s)": 0.582702 + }, + { + "acc": 0.99700918, + "epoch": 2.28525980911983, + "grad_norm": 0.6629775166511536, + "learning_rate": 9.15414182197958e-06, + "loss": 0.00998119, + "memory(GiB)": 26.31, + "step": 2155, + "train_speed(iter/s)": 0.582698 + }, + { + "acc": 0.9979825, + "epoch": 2.2905620360551433, + "grad_norm": 0.43215715885162354, + "learning_rate": 9.149256305276277e-06, + "loss": 0.00786603, + "memory(GiB)": 26.31, + "step": 2160, + "train_speed(iter/s)": 0.582692 + }, + { + "acc": 0.99758244, + "epoch": 2.295864262990456, + "grad_norm": 0.487210750579834, + "learning_rate": 9.144358031957872e-06, + "loss": 0.00788484, + "memory(GiB)": 26.31, + "step": 2165, + "train_speed(iter/s)": 0.582701 + }, + { + "acc": 0.99730835, + "epoch": 2.3011664899257687, + "grad_norm": 0.3774445652961731, + "learning_rate": 9.1394470170856e-06, + "loss": 0.00904835, + "memory(GiB)": 26.31, + "step": 2170, + "train_speed(iter/s)": 0.5827 + }, + { + "acc": 0.99750452, + "epoch": 2.306468716861082, + "grad_norm": 0.7599917650222778, + "learning_rate": 9.134523275759872e-06, + "loss": 0.00717712, + "memory(GiB)": 26.31, + "step": 2175, + "train_speed(iter/s)": 0.582711 + }, + { + "acc": 0.99696083, + "epoch": 2.3117709437963945, + "grad_norm": 0.6776115894317627, + "learning_rate": 9.129586823120224e-06, + "loss": 0.00911687, + "memory(GiB)": 26.31, + "step": 2180, + "train_speed(iter/s)": 0.582718 + }, + { + "acc": 0.99689713, + "epoch": 2.317073170731707, + "grad_norm": 0.5783346891403198, + "learning_rate": 9.124637674345291e-06, + "loss": 0.00991484, + "memory(GiB)": 26.31, + "step": 2185, + "train_speed(iter/s)": 0.582721 + }, + { + "acc": 0.99779062, + "epoch": 2.3223753976670203, + "grad_norm": 0.6449788212776184, + "learning_rate": 9.119675844652737e-06, + "loss": 0.0080985, + "memory(GiB)": 26.31, + "step": 2190, + "train_speed(iter/s)": 0.582731 + }, + { + "acc": 0.99645386, + "epoch": 2.327677624602333, + "grad_norm": 0.7729847431182861, + "learning_rate": 9.114701349299217e-06, + "loss": 0.01042995, + "memory(GiB)": 26.31, + "step": 2195, + "train_speed(iter/s)": 0.582731 + }, + { + "acc": 0.99811287, + "epoch": 2.3329798515376456, + "grad_norm": 0.5190842747688293, + "learning_rate": 9.109714203580334e-06, + "loss": 0.00516622, + "memory(GiB)": 26.31, + "step": 2200, + "train_speed(iter/s)": 0.582741 + }, + { + "acc": 0.99637775, + "epoch": 2.3382820784729588, + "grad_norm": 0.48969125747680664, + "learning_rate": 9.104714422830586e-06, + "loss": 0.00895324, + "memory(GiB)": 26.31, + "step": 2205, + "train_speed(iter/s)": 0.582743 + }, + { + "acc": 0.99677792, + "epoch": 2.3435843054082715, + "grad_norm": 0.6211754083633423, + "learning_rate": 9.09970202242332e-06, + "loss": 0.0104282, + "memory(GiB)": 26.31, + "step": 2210, + "train_speed(iter/s)": 0.582743 + }, + { + "acc": 0.99684505, + "epoch": 2.348886532343584, + "grad_norm": 0.6650230884552002, + "learning_rate": 9.094677017770692e-06, + "loss": 0.01132647, + "memory(GiB)": 26.31, + "step": 2215, + "train_speed(iter/s)": 0.58276 + }, + { + "acc": 0.99717751, + "epoch": 2.3541887592788973, + "grad_norm": 0.4855519235134125, + "learning_rate": 9.089639424323608e-06, + "loss": 0.00853827, + "memory(GiB)": 26.31, + "step": 2220, + "train_speed(iter/s)": 0.582764 + }, + { + "acc": 0.99725456, + "epoch": 2.35949098621421, + "grad_norm": 0.8926551938056946, + "learning_rate": 9.084589257571682e-06, + "loss": 0.00890769, + "memory(GiB)": 26.31, + "step": 2225, + "train_speed(iter/s)": 0.582764 + }, + { + "acc": 0.99793701, + "epoch": 2.3647932131495226, + "grad_norm": 0.8140351176261902, + "learning_rate": 9.079526533043192e-06, + "loss": 0.00827439, + "memory(GiB)": 26.31, + "step": 2230, + "train_speed(iter/s)": 0.582764 + }, + { + "acc": 0.99684944, + "epoch": 2.3700954400848357, + "grad_norm": 0.7877880334854126, + "learning_rate": 9.074451266305026e-06, + "loss": 0.00902928, + "memory(GiB)": 26.31, + "step": 2235, + "train_speed(iter/s)": 0.582768 + }, + { + "acc": 0.99745779, + "epoch": 2.3753976670201484, + "grad_norm": 0.4703558087348938, + "learning_rate": 9.069363472962639e-06, + "loss": 0.00938336, + "memory(GiB)": 26.31, + "step": 2240, + "train_speed(iter/s)": 0.582771 + }, + { + "acc": 0.99804792, + "epoch": 2.380699893955461, + "grad_norm": 0.49165722727775574, + "learning_rate": 9.064263168659999e-06, + "loss": 0.00661491, + "memory(GiB)": 26.31, + "step": 2245, + "train_speed(iter/s)": 0.582767 + }, + { + "acc": 0.99777336, + "epoch": 2.3860021208907742, + "grad_norm": 0.34549641609191895, + "learning_rate": 9.059150369079548e-06, + "loss": 0.00687485, + "memory(GiB)": 26.31, + "step": 2250, + "train_speed(iter/s)": 0.582768 + }, + { + "acc": 0.99720001, + "epoch": 2.391304347826087, + "grad_norm": 0.45710110664367676, + "learning_rate": 9.054025089942143e-06, + "loss": 0.00874545, + "memory(GiB)": 26.31, + "step": 2255, + "train_speed(iter/s)": 0.582772 + }, + { + "acc": 0.99818325, + "epoch": 2.3966065747613996, + "grad_norm": 0.3455301523208618, + "learning_rate": 9.048887347007018e-06, + "loss": 0.00510659, + "memory(GiB)": 26.31, + "step": 2260, + "train_speed(iter/s)": 0.582786 + }, + { + "acc": 0.99830542, + "epoch": 2.4019088016967127, + "grad_norm": 0.621752142906189, + "learning_rate": 9.043737156071728e-06, + "loss": 0.00655494, + "memory(GiB)": 26.31, + "step": 2265, + "train_speed(iter/s)": 0.582785 + }, + { + "acc": 0.99765148, + "epoch": 2.4072110286320254, + "grad_norm": 0.7850625514984131, + "learning_rate": 9.038574532972108e-06, + "loss": 0.00760993, + "memory(GiB)": 26.31, + "step": 2270, + "train_speed(iter/s)": 0.582794 + }, + { + "acc": 0.99838095, + "epoch": 2.412513255567338, + "grad_norm": 0.8043009638786316, + "learning_rate": 9.033399493582208e-06, + "loss": 0.0075691, + "memory(GiB)": 26.31, + "step": 2275, + "train_speed(iter/s)": 0.582803 + }, + { + "acc": 0.99808836, + "epoch": 2.417815482502651, + "grad_norm": 0.609122097492218, + "learning_rate": 9.028212053814269e-06, + "loss": 0.00771583, + "memory(GiB)": 26.31, + "step": 2280, + "train_speed(iter/s)": 0.582776 + }, + { + "acc": 0.99779768, + "epoch": 2.423117709437964, + "grad_norm": 0.6378943920135498, + "learning_rate": 9.023012229618651e-06, + "loss": 0.00826254, + "memory(GiB)": 26.31, + "step": 2285, + "train_speed(iter/s)": 0.582742 + }, + { + "acc": 0.99719963, + "epoch": 2.4284199363732766, + "grad_norm": 0.5774968266487122, + "learning_rate": 9.0178000369838e-06, + "loss": 0.00730962, + "memory(GiB)": 26.31, + "step": 2290, + "train_speed(iter/s)": 0.582725 + }, + { + "acc": 0.99702272, + "epoch": 2.4337221633085897, + "grad_norm": 0.7756372094154358, + "learning_rate": 9.012575491936189e-06, + "loss": 0.01053988, + "memory(GiB)": 26.31, + "step": 2295, + "train_speed(iter/s)": 0.582741 + }, + { + "acc": 0.99702606, + "epoch": 2.4390243902439024, + "grad_norm": 0.4530414342880249, + "learning_rate": 9.007338610540274e-06, + "loss": 0.00954269, + "memory(GiB)": 26.31, + "step": 2300, + "train_speed(iter/s)": 0.582737 + }, + { + "acc": 0.99730387, + "epoch": 2.4443266171792155, + "grad_norm": 0.4954255223274231, + "learning_rate": 9.00208940889844e-06, + "loss": 0.00952832, + "memory(GiB)": 26.31, + "step": 2305, + "train_speed(iter/s)": 0.582736 + }, + { + "acc": 0.99814606, + "epoch": 2.449628844114528, + "grad_norm": 0.5628678202629089, + "learning_rate": 8.996827903150959e-06, + "loss": 0.00628898, + "memory(GiB)": 26.31, + "step": 2310, + "train_speed(iter/s)": 0.582742 + }, + { + "acc": 0.99818916, + "epoch": 2.454931071049841, + "grad_norm": 0.5463048815727234, + "learning_rate": 8.991554109475933e-06, + "loss": 0.00808192, + "memory(GiB)": 26.31, + "step": 2315, + "train_speed(iter/s)": 0.582743 + }, + { + "acc": 0.99799252, + "epoch": 2.4602332979851536, + "grad_norm": 0.40346306562423706, + "learning_rate": 8.986268044089247e-06, + "loss": 0.00596498, + "memory(GiB)": 26.31, + "step": 2320, + "train_speed(iter/s)": 0.58275 + }, + { + "acc": 0.99781971, + "epoch": 2.4655355249204667, + "grad_norm": 0.6184868812561035, + "learning_rate": 8.980969723244518e-06, + "loss": 0.00875518, + "memory(GiB)": 26.31, + "step": 2325, + "train_speed(iter/s)": 0.58275 + }, + { + "acc": 0.99754887, + "epoch": 2.4708377518557794, + "grad_norm": 0.45224523544311523, + "learning_rate": 8.97565916323305e-06, + "loss": 0.00927275, + "memory(GiB)": 26.31, + "step": 2330, + "train_speed(iter/s)": 0.582754 + }, + { + "acc": 0.99780216, + "epoch": 2.4761399787910925, + "grad_norm": 0.8157827258110046, + "learning_rate": 8.970336380383773e-06, + "loss": 0.00661554, + "memory(GiB)": 26.31, + "step": 2335, + "train_speed(iter/s)": 0.582754 + }, + { + "acc": 0.99679346, + "epoch": 2.481442205726405, + "grad_norm": 0.8614205121994019, + "learning_rate": 8.965001391063212e-06, + "loss": 0.01007259, + "memory(GiB)": 26.31, + "step": 2340, + "train_speed(iter/s)": 0.582759 + }, + { + "acc": 0.99660053, + "epoch": 2.486744432661718, + "grad_norm": 0.7390111684799194, + "learning_rate": 8.95965421167541e-06, + "loss": 0.00984244, + "memory(GiB)": 26.31, + "step": 2345, + "train_speed(iter/s)": 0.58276 + }, + { + "acc": 0.99715099, + "epoch": 2.4920466595970305, + "grad_norm": 0.7102832794189453, + "learning_rate": 8.9542948586619e-06, + "loss": 0.00887956, + "memory(GiB)": 26.31, + "step": 2350, + "train_speed(iter/s)": 0.582762 + }, + { + "acc": 0.99650688, + "epoch": 2.4973488865323437, + "grad_norm": 0.7877767086029053, + "learning_rate": 8.948923348501646e-06, + "loss": 0.01136077, + "memory(GiB)": 26.31, + "step": 2355, + "train_speed(iter/s)": 0.582769 + }, + { + "acc": 0.9968586, + "epoch": 2.5026511134676563, + "grad_norm": 0.6240387558937073, + "learning_rate": 8.943539697710996e-06, + "loss": 0.00829753, + "memory(GiB)": 26.31, + "step": 2360, + "train_speed(iter/s)": 0.582782 + }, + { + "acc": 0.99727345, + "epoch": 2.5079533404029695, + "grad_norm": 0.6722913980484009, + "learning_rate": 8.93814392284362e-06, + "loss": 0.0081228, + "memory(GiB)": 26.31, + "step": 2365, + "train_speed(iter/s)": 0.58278 + }, + { + "acc": 0.99726486, + "epoch": 2.513255567338282, + "grad_norm": 0.6057406067848206, + "learning_rate": 8.932736040490472e-06, + "loss": 0.00857747, + "memory(GiB)": 26.31, + "step": 2370, + "train_speed(iter/s)": 0.582779 + }, + { + "acc": 0.99796257, + "epoch": 2.518557794273595, + "grad_norm": 0.32492902874946594, + "learning_rate": 8.927316067279736e-06, + "loss": 0.00711781, + "memory(GiB)": 26.31, + "step": 2375, + "train_speed(iter/s)": 0.582783 + }, + { + "acc": 0.99653759, + "epoch": 2.5238600212089075, + "grad_norm": 0.4648723304271698, + "learning_rate": 8.921884019876768e-06, + "loss": 0.01062218, + "memory(GiB)": 26.31, + "step": 2380, + "train_speed(iter/s)": 0.582786 + }, + { + "acc": 0.99777451, + "epoch": 2.5291622481442206, + "grad_norm": 0.44860148429870605, + "learning_rate": 8.916439914984055e-06, + "loss": 0.00815452, + "memory(GiB)": 26.31, + "step": 2385, + "train_speed(iter/s)": 0.582782 + }, + { + "acc": 0.99838314, + "epoch": 2.5344644750795333, + "grad_norm": 0.6119086146354675, + "learning_rate": 8.910983769341154e-06, + "loss": 0.00666094, + "memory(GiB)": 26.31, + "step": 2390, + "train_speed(iter/s)": 0.582789 + }, + { + "acc": 0.99723606, + "epoch": 2.5397667020148464, + "grad_norm": 0.46494340896606445, + "learning_rate": 8.905515599724649e-06, + "loss": 0.00702635, + "memory(GiB)": 26.31, + "step": 2395, + "train_speed(iter/s)": 0.58279 + }, + { + "acc": 0.99754772, + "epoch": 2.545068928950159, + "grad_norm": 0.5167229771614075, + "learning_rate": 8.90003542294809e-06, + "loss": 0.00611775, + "memory(GiB)": 26.31, + "step": 2400, + "train_speed(iter/s)": 0.582791 + }, + { + "acc": 0.99728813, + "epoch": 2.550371155885472, + "grad_norm": 0.5445597767829895, + "learning_rate": 8.894543255861953e-06, + "loss": 0.00897106, + "memory(GiB)": 26.31, + "step": 2405, + "train_speed(iter/s)": 0.582791 + }, + { + "acc": 0.99713316, + "epoch": 2.5556733828207845, + "grad_norm": 0.5003531575202942, + "learning_rate": 8.889039115353577e-06, + "loss": 0.00824212, + "memory(GiB)": 26.31, + "step": 2410, + "train_speed(iter/s)": 0.582792 + }, + { + "acc": 0.99826508, + "epoch": 2.5609756097560976, + "grad_norm": 0.6428782343864441, + "learning_rate": 8.883523018347122e-06, + "loss": 0.00542104, + "memory(GiB)": 26.31, + "step": 2415, + "train_speed(iter/s)": 0.582795 + }, + { + "acc": 0.99676142, + "epoch": 2.5662778366914103, + "grad_norm": 1.0514658689498901, + "learning_rate": 8.877994981803503e-06, + "loss": 0.01014628, + "memory(GiB)": 26.31, + "step": 2420, + "train_speed(iter/s)": 0.582802 + }, + { + "acc": 0.99781837, + "epoch": 2.5715800636267234, + "grad_norm": 0.49720627069473267, + "learning_rate": 8.872455022720356e-06, + "loss": 0.00676076, + "memory(GiB)": 26.31, + "step": 2425, + "train_speed(iter/s)": 0.582817 + }, + { + "acc": 0.99802704, + "epoch": 2.576882290562036, + "grad_norm": 0.5802571773529053, + "learning_rate": 8.866903158131972e-06, + "loss": 0.00726605, + "memory(GiB)": 26.31, + "step": 2430, + "train_speed(iter/s)": 0.582818 + }, + { + "acc": 0.99644756, + "epoch": 2.582184517497349, + "grad_norm": 1.120002031326294, + "learning_rate": 8.861339405109253e-06, + "loss": 0.01294475, + "memory(GiB)": 26.31, + "step": 2435, + "train_speed(iter/s)": 0.582841 + }, + { + "acc": 0.99776955, + "epoch": 2.5874867444326615, + "grad_norm": 0.4678545892238617, + "learning_rate": 8.855763780759646e-06, + "loss": 0.00850418, + "memory(GiB)": 26.31, + "step": 2440, + "train_speed(iter/s)": 0.582849 + }, + { + "acc": 0.99819298, + "epoch": 2.5927889713679746, + "grad_norm": 0.4603610932826996, + "learning_rate": 8.850176302227113e-06, + "loss": 0.0065477, + "memory(GiB)": 26.31, + "step": 2445, + "train_speed(iter/s)": 0.582848 + }, + { + "acc": 0.99753914, + "epoch": 2.5980911983032873, + "grad_norm": 0.7381535172462463, + "learning_rate": 8.844576986692056e-06, + "loss": 0.00721828, + "memory(GiB)": 26.31, + "step": 2450, + "train_speed(iter/s)": 0.58285 + }, + { + "acc": 0.99687433, + "epoch": 2.6033934252386004, + "grad_norm": 0.42761534452438354, + "learning_rate": 8.838965851371274e-06, + "loss": 0.00944657, + "memory(GiB)": 26.31, + "step": 2455, + "train_speed(iter/s)": 0.582857 + }, + { + "acc": 0.99773045, + "epoch": 2.608695652173913, + "grad_norm": 0.5802911520004272, + "learning_rate": 8.833342913517916e-06, + "loss": 0.00601513, + "memory(GiB)": 26.31, + "step": 2460, + "train_speed(iter/s)": 0.582836 + }, + { + "acc": 0.99647751, + "epoch": 2.6139978791092258, + "grad_norm": 0.5661634802818298, + "learning_rate": 8.827708190421416e-06, + "loss": 0.01108431, + "memory(GiB)": 26.31, + "step": 2465, + "train_speed(iter/s)": 0.582849 + }, + { + "acc": 0.99830036, + "epoch": 2.6193001060445384, + "grad_norm": 0.5017287731170654, + "learning_rate": 8.822061699407447e-06, + "loss": 0.00618156, + "memory(GiB)": 26.31, + "step": 2470, + "train_speed(iter/s)": 0.582828 + }, + { + "acc": 0.9973937, + "epoch": 2.6246023329798516, + "grad_norm": 0.5929028391838074, + "learning_rate": 8.816403457837865e-06, + "loss": 0.0077717, + "memory(GiB)": 26.31, + "step": 2475, + "train_speed(iter/s)": 0.582848 + }, + { + "acc": 0.99757099, + "epoch": 2.6299045599151643, + "grad_norm": 0.6684390902519226, + "learning_rate": 8.810733483110656e-06, + "loss": 0.00653151, + "memory(GiB)": 26.31, + "step": 2480, + "train_speed(iter/s)": 0.582856 + }, + { + "acc": 0.99810505, + "epoch": 2.6352067868504774, + "grad_norm": 0.3855116069316864, + "learning_rate": 8.805051792659887e-06, + "loss": 0.00685595, + "memory(GiB)": 26.31, + "step": 2485, + "train_speed(iter/s)": 0.582866 + }, + { + "acc": 0.99766197, + "epoch": 2.64050901378579, + "grad_norm": 0.641624927520752, + "learning_rate": 8.799358403955646e-06, + "loss": 0.00616185, + "memory(GiB)": 26.31, + "step": 2490, + "train_speed(iter/s)": 0.582875 + }, + { + "acc": 0.99728374, + "epoch": 2.6458112407211027, + "grad_norm": 0.594316840171814, + "learning_rate": 8.79365333450399e-06, + "loss": 0.00788632, + "memory(GiB)": 26.31, + "step": 2495, + "train_speed(iter/s)": 0.582888 + }, + { + "acc": 0.99828377, + "epoch": 2.6511134676564154, + "grad_norm": 0.34747976064682007, + "learning_rate": 8.787936601846892e-06, + "loss": 0.00582717, + "memory(GiB)": 26.31, + "step": 2500, + "train_speed(iter/s)": 0.582886 + }, + { + "acc": 0.99804268, + "epoch": 2.6564156945917285, + "grad_norm": 0.47376132011413574, + "learning_rate": 8.78220822356219e-06, + "loss": 0.00711892, + "memory(GiB)": 26.31, + "step": 2505, + "train_speed(iter/s)": 0.582886 + }, + { + "acc": 0.9984189, + "epoch": 2.6617179215270412, + "grad_norm": 0.38001590967178345, + "learning_rate": 8.776468217263526e-06, + "loss": 0.00497809, + "memory(GiB)": 26.31, + "step": 2510, + "train_speed(iter/s)": 0.582885 + }, + { + "acc": 0.99746351, + "epoch": 2.6670201484623544, + "grad_norm": 0.9638261198997498, + "learning_rate": 8.770716600600301e-06, + "loss": 0.00803462, + "memory(GiB)": 26.31, + "step": 2515, + "train_speed(iter/s)": 0.582892 + }, + { + "acc": 0.99772987, + "epoch": 2.672322375397667, + "grad_norm": 0.3004850149154663, + "learning_rate": 8.764953391257611e-06, + "loss": 0.00633329, + "memory(GiB)": 26.31, + "step": 2520, + "train_speed(iter/s)": 0.582891 + }, + { + "acc": 0.99645672, + "epoch": 2.6776246023329797, + "grad_norm": 1.1040234565734863, + "learning_rate": 8.759178606956197e-06, + "loss": 0.00878785, + "memory(GiB)": 26.31, + "step": 2525, + "train_speed(iter/s)": 0.582889 + }, + { + "acc": 0.99747276, + "epoch": 2.682926829268293, + "grad_norm": 0.582634687423706, + "learning_rate": 8.753392265452395e-06, + "loss": 0.00888529, + "memory(GiB)": 26.31, + "step": 2530, + "train_speed(iter/s)": 0.582886 + }, + { + "acc": 0.99669046, + "epoch": 2.6882290562036055, + "grad_norm": 0.5359108448028564, + "learning_rate": 8.747594384538073e-06, + "loss": 0.00974076, + "memory(GiB)": 26.31, + "step": 2535, + "train_speed(iter/s)": 0.582886 + }, + { + "acc": 0.99788017, + "epoch": 2.693531283138918, + "grad_norm": 1.008933663368225, + "learning_rate": 8.741784982040583e-06, + "loss": 0.0074301, + "memory(GiB)": 26.31, + "step": 2540, + "train_speed(iter/s)": 0.582892 + }, + { + "acc": 0.99824905, + "epoch": 2.6988335100742313, + "grad_norm": 0.43502724170684814, + "learning_rate": 8.735964075822702e-06, + "loss": 0.00669004, + "memory(GiB)": 26.31, + "step": 2545, + "train_speed(iter/s)": 0.582879 + }, + { + "acc": 0.99731178, + "epoch": 2.704135737009544, + "grad_norm": 0.8386071920394897, + "learning_rate": 8.730131683782583e-06, + "loss": 0.00993623, + "memory(GiB)": 26.31, + "step": 2550, + "train_speed(iter/s)": 0.582877 + }, + { + "acc": 0.99741039, + "epoch": 2.7094379639448567, + "grad_norm": 0.5602560639381409, + "learning_rate": 8.724287823853687e-06, + "loss": 0.00661452, + "memory(GiB)": 26.31, + "step": 2555, + "train_speed(iter/s)": 0.582883 + }, + { + "acc": 0.99765863, + "epoch": 2.71474019088017, + "grad_norm": 0.7029976844787598, + "learning_rate": 8.718432514004743e-06, + "loss": 0.00765093, + "memory(GiB)": 26.31, + "step": 2560, + "train_speed(iter/s)": 0.582883 + }, + { + "acc": 0.99744787, + "epoch": 2.7200424178154825, + "grad_norm": 0.8403060436248779, + "learning_rate": 8.712565772239685e-06, + "loss": 0.00870134, + "memory(GiB)": 26.31, + "step": 2565, + "train_speed(iter/s)": 0.582882 + }, + { + "acc": 0.99806385, + "epoch": 2.725344644750795, + "grad_norm": 0.7096578478813171, + "learning_rate": 8.706687616597599e-06, + "loss": 0.00648894, + "memory(GiB)": 26.31, + "step": 2570, + "train_speed(iter/s)": 0.582882 + }, + { + "acc": 0.99784031, + "epoch": 2.7306468716861083, + "grad_norm": 0.9655284285545349, + "learning_rate": 8.700798065152664e-06, + "loss": 0.00564081, + "memory(GiB)": 26.31, + "step": 2575, + "train_speed(iter/s)": 0.582883 + }, + { + "acc": 0.99837542, + "epoch": 2.735949098621421, + "grad_norm": 0.5595547556877136, + "learning_rate": 8.694897136014102e-06, + "loss": 0.00597643, + "memory(GiB)": 26.31, + "step": 2580, + "train_speed(iter/s)": 0.582882 + }, + { + "acc": 0.99757414, + "epoch": 2.7412513255567337, + "grad_norm": 0.845004141330719, + "learning_rate": 8.688984847326113e-06, + "loss": 0.00632632, + "memory(GiB)": 26.31, + "step": 2585, + "train_speed(iter/s)": 0.582886 + }, + { + "acc": 0.99758339, + "epoch": 2.746553552492047, + "grad_norm": 0.8130578994750977, + "learning_rate": 8.683061217267834e-06, + "loss": 0.00771255, + "memory(GiB)": 26.31, + "step": 2590, + "train_speed(iter/s)": 0.582894 + }, + { + "acc": 0.99802599, + "epoch": 2.7518557794273595, + "grad_norm": 0.8608774542808533, + "learning_rate": 8.677126264053266e-06, + "loss": 0.00774147, + "memory(GiB)": 26.31, + "step": 2595, + "train_speed(iter/s)": 0.582895 + }, + { + "acc": 0.99850788, + "epoch": 2.757158006362672, + "grad_norm": 0.6176683902740479, + "learning_rate": 8.671180005931236e-06, + "loss": 0.0063642, + "memory(GiB)": 26.31, + "step": 2600, + "train_speed(iter/s)": 0.582892 + }, + { + "acc": 0.99837418, + "epoch": 2.7624602332979853, + "grad_norm": 0.29822659492492676, + "learning_rate": 8.665222461185323e-06, + "loss": 0.00637889, + "memory(GiB)": 26.31, + "step": 2605, + "train_speed(iter/s)": 0.582891 + }, + { + "acc": 0.99730186, + "epoch": 2.767762460233298, + "grad_norm": 0.8909950256347656, + "learning_rate": 8.659253648133812e-06, + "loss": 0.00681949, + "memory(GiB)": 26.31, + "step": 2610, + "train_speed(iter/s)": 0.582888 + }, + { + "acc": 0.99875526, + "epoch": 2.7730646871686107, + "grad_norm": 0.3553677797317505, + "learning_rate": 8.653273585129638e-06, + "loss": 0.00568229, + "memory(GiB)": 26.31, + "step": 2615, + "train_speed(iter/s)": 0.582887 + }, + { + "acc": 0.99821167, + "epoch": 2.778366914103924, + "grad_norm": 0.4157305657863617, + "learning_rate": 8.647282290560328e-06, + "loss": 0.00531367, + "memory(GiB)": 26.31, + "step": 2620, + "train_speed(iter/s)": 0.582894 + }, + { + "acc": 0.99853363, + "epoch": 2.7836691410392365, + "grad_norm": 0.48346978425979614, + "learning_rate": 8.64127978284794e-06, + "loss": 0.00515425, + "memory(GiB)": 26.31, + "step": 2625, + "train_speed(iter/s)": 0.582895 + }, + { + "acc": 0.99797735, + "epoch": 2.7889713679745496, + "grad_norm": 0.824645459651947, + "learning_rate": 8.635266080449015e-06, + "loss": 0.00621112, + "memory(GiB)": 26.31, + "step": 2630, + "train_speed(iter/s)": 0.582901 + }, + { + "acc": 0.99723949, + "epoch": 2.7942735949098623, + "grad_norm": 0.4443477690219879, + "learning_rate": 8.62924120185451e-06, + "loss": 0.00894453, + "memory(GiB)": 26.31, + "step": 2635, + "train_speed(iter/s)": 0.582915 + }, + { + "acc": 0.99760208, + "epoch": 2.799575821845175, + "grad_norm": 0.6249599456787109, + "learning_rate": 8.623205165589752e-06, + "loss": 0.00710995, + "memory(GiB)": 26.31, + "step": 2640, + "train_speed(iter/s)": 0.582927 + }, + { + "acc": 0.99724007, + "epoch": 2.8048780487804876, + "grad_norm": 0.8389932513237, + "learning_rate": 8.61715799021437e-06, + "loss": 0.00801658, + "memory(GiB)": 26.31, + "step": 2645, + "train_speed(iter/s)": 0.582928 + }, + { + "acc": 0.99735107, + "epoch": 2.8101802757158008, + "grad_norm": 0.6563589572906494, + "learning_rate": 8.61109969432225e-06, + "loss": 0.00741932, + "memory(GiB)": 26.31, + "step": 2650, + "train_speed(iter/s)": 0.582929 + }, + { + "acc": 0.9972703, + "epoch": 2.8154825026511134, + "grad_norm": 0.9069436192512512, + "learning_rate": 8.60503029654147e-06, + "loss": 0.00955604, + "memory(GiB)": 26.31, + "step": 2655, + "train_speed(iter/s)": 0.582935 + }, + { + "acc": 0.99771433, + "epoch": 2.8207847295864266, + "grad_norm": 0.540453314781189, + "learning_rate": 8.598949815534237e-06, + "loss": 0.00701306, + "memory(GiB)": 26.31, + "step": 2660, + "train_speed(iter/s)": 0.582935 + }, + { + "acc": 0.99766121, + "epoch": 2.8260869565217392, + "grad_norm": 0.49637776613235474, + "learning_rate": 8.592858269996845e-06, + "loss": 0.00793933, + "memory(GiB)": 26.31, + "step": 2665, + "train_speed(iter/s)": 0.582938 + }, + { + "acc": 0.99823942, + "epoch": 2.831389183457052, + "grad_norm": 0.752679169178009, + "learning_rate": 8.58675567865961e-06, + "loss": 0.0062173, + "memory(GiB)": 26.31, + "step": 2670, + "train_speed(iter/s)": 0.58296 + }, + { + "acc": 0.99808493, + "epoch": 2.8366914103923646, + "grad_norm": 0.2555808424949646, + "learning_rate": 8.580642060286801e-06, + "loss": 0.00525451, + "memory(GiB)": 26.31, + "step": 2675, + "train_speed(iter/s)": 0.58296 + }, + { + "acc": 0.99891806, + "epoch": 2.8419936373276777, + "grad_norm": 0.4761360287666321, + "learning_rate": 8.574517433676606e-06, + "loss": 0.00387856, + "memory(GiB)": 26.31, + "step": 2680, + "train_speed(iter/s)": 0.582968 + }, + { + "acc": 0.99823341, + "epoch": 2.8472958642629904, + "grad_norm": 0.39361944794654846, + "learning_rate": 8.56838181766105e-06, + "loss": 0.00706662, + "memory(GiB)": 26.31, + "step": 2685, + "train_speed(iter/s)": 0.582979 + }, + { + "acc": 0.99847803, + "epoch": 2.8525980911983035, + "grad_norm": 0.6813247799873352, + "learning_rate": 8.56223523110596e-06, + "loss": 0.00404909, + "memory(GiB)": 26.31, + "step": 2690, + "train_speed(iter/s)": 0.582982 + }, + { + "acc": 0.99783859, + "epoch": 2.8579003181336162, + "grad_norm": 0.539756178855896, + "learning_rate": 8.556077692910884e-06, + "loss": 0.00924466, + "memory(GiB)": 26.31, + "step": 2695, + "train_speed(iter/s)": 0.582983 + }, + { + "acc": 0.99754581, + "epoch": 2.863202545068929, + "grad_norm": 0.3688136041164398, + "learning_rate": 8.549909222009049e-06, + "loss": 0.00816878, + "memory(GiB)": 26.31, + "step": 2700, + "train_speed(iter/s)": 0.582981 + }, + { + "acc": 0.99778786, + "epoch": 2.8685047720042416, + "grad_norm": 0.6642614006996155, + "learning_rate": 8.543729837367299e-06, + "loss": 0.00844958, + "memory(GiB)": 26.31, + "step": 2705, + "train_speed(iter/s)": 0.58298 + }, + { + "acc": 0.99753714, + "epoch": 2.8738069989395547, + "grad_norm": 0.40582075715065, + "learning_rate": 8.537539557986036e-06, + "loss": 0.00675502, + "memory(GiB)": 26.31, + "step": 2710, + "train_speed(iter/s)": 0.582986 + }, + { + "acc": 0.9975502, + "epoch": 2.8791092258748674, + "grad_norm": 0.6373099684715271, + "learning_rate": 8.531338402899158e-06, + "loss": 0.00913531, + "memory(GiB)": 26.31, + "step": 2715, + "train_speed(iter/s)": 0.582992 + }, + { + "acc": 0.997229, + "epoch": 2.8844114528101805, + "grad_norm": 0.6850215792655945, + "learning_rate": 8.525126391174008e-06, + "loss": 0.00866958, + "memory(GiB)": 26.31, + "step": 2720, + "train_speed(iter/s)": 0.582989 + }, + { + "acc": 0.99879761, + "epoch": 2.889713679745493, + "grad_norm": 0.2462194859981537, + "learning_rate": 8.518903541911302e-06, + "loss": 0.00522173, + "memory(GiB)": 26.31, + "step": 2725, + "train_speed(iter/s)": 0.582995 + }, + { + "acc": 0.99818954, + "epoch": 2.895015906680806, + "grad_norm": 0.6007863879203796, + "learning_rate": 8.512669874245093e-06, + "loss": 0.00626441, + "memory(GiB)": 26.31, + "step": 2730, + "train_speed(iter/s)": 0.583001 + }, + { + "acc": 0.99893417, + "epoch": 2.9003181336161186, + "grad_norm": 0.24022957682609558, + "learning_rate": 8.506425407342687e-06, + "loss": 0.0040527, + "memory(GiB)": 26.31, + "step": 2735, + "train_speed(iter/s)": 0.583006 + }, + { + "acc": 0.9976409, + "epoch": 2.9056203605514317, + "grad_norm": 0.7551782131195068, + "learning_rate": 8.500170160404601e-06, + "loss": 0.00866469, + "memory(GiB)": 26.31, + "step": 2740, + "train_speed(iter/s)": 0.583005 + }, + { + "acc": 0.99791851, + "epoch": 2.9109225874867444, + "grad_norm": 0.48232194781303406, + "learning_rate": 8.493904152664496e-06, + "loss": 0.00649021, + "memory(GiB)": 26.31, + "step": 2745, + "train_speed(iter/s)": 0.583006 + }, + { + "acc": 0.99849854, + "epoch": 2.9162248144220575, + "grad_norm": 0.17249707877635956, + "learning_rate": 8.487627403389123e-06, + "loss": 0.005985, + "memory(GiB)": 26.31, + "step": 2750, + "train_speed(iter/s)": 0.583012 + }, + { + "acc": 0.9975729, + "epoch": 2.92152704135737, + "grad_norm": 0.4319506287574768, + "learning_rate": 8.48133993187826e-06, + "loss": 0.0076484, + "memory(GiB)": 26.31, + "step": 2755, + "train_speed(iter/s)": 0.583011 + }, + { + "acc": 0.99815025, + "epoch": 2.926829268292683, + "grad_norm": 0.5459288358688354, + "learning_rate": 8.475041757464654e-06, + "loss": 0.00624922, + "memory(GiB)": 26.31, + "step": 2760, + "train_speed(iter/s)": 0.583013 + }, + { + "acc": 0.99748049, + "epoch": 2.9321314952279955, + "grad_norm": 0.5239591002464294, + "learning_rate": 8.468732899513958e-06, + "loss": 0.00663889, + "memory(GiB)": 26.31, + "step": 2765, + "train_speed(iter/s)": 0.583014 + }, + { + "acc": 0.9986208, + "epoch": 2.9374337221633087, + "grad_norm": 0.29796159267425537, + "learning_rate": 8.462413377424682e-06, + "loss": 0.0047507, + "memory(GiB)": 26.31, + "step": 2770, + "train_speed(iter/s)": 0.583011 + }, + { + "acc": 0.99765453, + "epoch": 2.9427359490986214, + "grad_norm": 0.6221350431442261, + "learning_rate": 8.456083210628117e-06, + "loss": 0.00811146, + "memory(GiB)": 26.31, + "step": 2775, + "train_speed(iter/s)": 0.583018 + }, + { + "acc": 0.99878178, + "epoch": 2.9480381760339345, + "grad_norm": 0.45186954736709595, + "learning_rate": 8.449742418588293e-06, + "loss": 0.00400441, + "memory(GiB)": 26.31, + "step": 2780, + "train_speed(iter/s)": 0.583017 + }, + { + "acc": 0.9988843, + "epoch": 2.953340402969247, + "grad_norm": 0.4978770315647125, + "learning_rate": 8.443391020801904e-06, + "loss": 0.00453811, + "memory(GiB)": 26.31, + "step": 2785, + "train_speed(iter/s)": 0.583021 + }, + { + "acc": 0.99816494, + "epoch": 2.95864262990456, + "grad_norm": 0.46046286821365356, + "learning_rate": 8.437029036798259e-06, + "loss": 0.00635334, + "memory(GiB)": 26.31, + "step": 2790, + "train_speed(iter/s)": 0.583016 + }, + { + "acc": 0.99850636, + "epoch": 2.9639448568398725, + "grad_norm": 0.6356995105743408, + "learning_rate": 8.430656486139217e-06, + "loss": 0.00549962, + "memory(GiB)": 26.31, + "step": 2795, + "train_speed(iter/s)": 0.583014 + }, + { + "acc": 0.99839563, + "epoch": 2.9692470837751856, + "grad_norm": 0.6807392239570618, + "learning_rate": 8.424273388419122e-06, + "loss": 0.0061514, + "memory(GiB)": 26.31, + "step": 2800, + "train_speed(iter/s)": 0.583014 + }, + { + "acc": 0.99671831, + "epoch": 2.9745493107104983, + "grad_norm": 0.6449872255325317, + "learning_rate": 8.417879763264759e-06, + "loss": 0.01174504, + "memory(GiB)": 26.31, + "step": 2805, + "train_speed(iter/s)": 0.58302 + }, + { + "acc": 0.99840851, + "epoch": 2.9798515376458115, + "grad_norm": 0.5310406684875488, + "learning_rate": 8.411475630335267e-06, + "loss": 0.00404135, + "memory(GiB)": 26.31, + "step": 2810, + "train_speed(iter/s)": 0.583024 + }, + { + "acc": 0.99735947, + "epoch": 2.985153764581124, + "grad_norm": 0.4129229784011841, + "learning_rate": 8.405061009322113e-06, + "loss": 0.00868261, + "memory(GiB)": 26.31, + "step": 2815, + "train_speed(iter/s)": 0.583022 + }, + { + "acc": 0.99885445, + "epoch": 2.990455991516437, + "grad_norm": 0.4066607654094696, + "learning_rate": 8.398635919948998e-06, + "loss": 0.0049273, + "memory(GiB)": 26.31, + "step": 2820, + "train_speed(iter/s)": 0.583025 + }, + { + "acc": 0.99865417, + "epoch": 2.9957582184517495, + "grad_norm": 0.45932242274284363, + "learning_rate": 8.392200381971819e-06, + "loss": 0.00448716, + "memory(GiB)": 26.31, + "step": 2825, + "train_speed(iter/s)": 0.583027 + }, + { + "acc": 0.99809284, + "epoch": 3.0010604453870626, + "grad_norm": 0.6505632400512695, + "learning_rate": 8.385754415178594e-06, + "loss": 0.00660937, + "memory(GiB)": 26.31, + "step": 2830, + "train_speed(iter/s)": 0.582904 + }, + { + "acc": 0.99857645, + "epoch": 3.0063626723223753, + "grad_norm": 0.5474786758422852, + "learning_rate": 8.379298039389418e-06, + "loss": 0.00656885, + "memory(GiB)": 26.31, + "step": 2835, + "train_speed(iter/s)": 0.582903 + }, + { + "acc": 0.99797611, + "epoch": 3.0116648992576884, + "grad_norm": 0.5049321055412292, + "learning_rate": 8.372831274456378e-06, + "loss": 0.0067019, + "memory(GiB)": 26.31, + "step": 2840, + "train_speed(iter/s)": 0.582908 + }, + { + "acc": 0.9989069, + "epoch": 3.016967126193001, + "grad_norm": 0.25424474477767944, + "learning_rate": 8.366354140263519e-06, + "loss": 0.00447071, + "memory(GiB)": 26.31, + "step": 2845, + "train_speed(iter/s)": 0.582914 + }, + { + "acc": 0.99727268, + "epoch": 3.022269353128314, + "grad_norm": 0.8304399847984314, + "learning_rate": 8.35986665672676e-06, + "loss": 0.00797994, + "memory(GiB)": 26.31, + "step": 2850, + "train_speed(iter/s)": 0.582914 + }, + { + "acc": 0.99812622, + "epoch": 3.027571580063627, + "grad_norm": 0.5393794775009155, + "learning_rate": 8.353368843793847e-06, + "loss": 0.00611412, + "memory(GiB)": 26.31, + "step": 2855, + "train_speed(iter/s)": 0.582917 + }, + { + "acc": 0.99841595, + "epoch": 3.0328738069989396, + "grad_norm": 0.5000630021095276, + "learning_rate": 8.346860721444284e-06, + "loss": 0.00692058, + "memory(GiB)": 26.31, + "step": 2860, + "train_speed(iter/s)": 0.582916 + }, + { + "acc": 0.9972909, + "epoch": 3.0381760339342523, + "grad_norm": 0.6896098256111145, + "learning_rate": 8.340342309689274e-06, + "loss": 0.0062426, + "memory(GiB)": 26.31, + "step": 2865, + "train_speed(iter/s)": 0.582925 + }, + { + "acc": 0.99858389, + "epoch": 3.0434782608695654, + "grad_norm": 0.49193787574768066, + "learning_rate": 8.333813628571665e-06, + "loss": 0.00430128, + "memory(GiB)": 26.31, + "step": 2870, + "train_speed(iter/s)": 0.582926 + }, + { + "acc": 0.99836273, + "epoch": 3.048780487804878, + "grad_norm": 0.20785479247570038, + "learning_rate": 8.32727469816587e-06, + "loss": 0.00575757, + "memory(GiB)": 26.31, + "step": 2875, + "train_speed(iter/s)": 0.582938 + }, + { + "acc": 0.99837456, + "epoch": 3.0540827147401908, + "grad_norm": 0.4574993848800659, + "learning_rate": 8.320725538577825e-06, + "loss": 0.00428646, + "memory(GiB)": 26.31, + "step": 2880, + "train_speed(iter/s)": 0.58294 + }, + { + "acc": 0.99787979, + "epoch": 3.059384941675504, + "grad_norm": 0.3935782015323639, + "learning_rate": 8.314166169944919e-06, + "loss": 0.0048808, + "memory(GiB)": 26.31, + "step": 2885, + "train_speed(iter/s)": 0.582947 + }, + { + "acc": 0.99847889, + "epoch": 3.0646871686108166, + "grad_norm": 0.34742140769958496, + "learning_rate": 8.307596612435925e-06, + "loss": 0.00527749, + "memory(GiB)": 26.31, + "step": 2890, + "train_speed(iter/s)": 0.582951 + }, + { + "acc": 0.99888916, + "epoch": 3.0699893955461293, + "grad_norm": 0.1643511801958084, + "learning_rate": 8.30101688625095e-06, + "loss": 0.00485554, + "memory(GiB)": 26.31, + "step": 2895, + "train_speed(iter/s)": 0.582947 + }, + { + "acc": 0.99863644, + "epoch": 3.0752916224814424, + "grad_norm": 0.4972369074821472, + "learning_rate": 8.294427011621367e-06, + "loss": 0.0042267, + "memory(GiB)": 26.31, + "step": 2900, + "train_speed(iter/s)": 0.582946 + }, + { + "acc": 0.99793358, + "epoch": 3.080593849416755, + "grad_norm": 0.8058770895004272, + "learning_rate": 8.287827008809755e-06, + "loss": 0.00536377, + "memory(GiB)": 26.31, + "step": 2905, + "train_speed(iter/s)": 0.582953 + }, + { + "acc": 0.99707813, + "epoch": 3.0858960763520678, + "grad_norm": 0.595598578453064, + "learning_rate": 8.281216898109827e-06, + "loss": 0.00957358, + "memory(GiB)": 26.31, + "step": 2910, + "train_speed(iter/s)": 0.582953 + }, + { + "acc": 0.99796486, + "epoch": 3.091198303287381, + "grad_norm": 0.6424652934074402, + "learning_rate": 8.27459669984639e-06, + "loss": 0.00617009, + "memory(GiB)": 26.31, + "step": 2915, + "train_speed(iter/s)": 0.582958 + }, + { + "acc": 0.99840441, + "epoch": 3.0965005302226936, + "grad_norm": 0.7321480512619019, + "learning_rate": 8.267966434375255e-06, + "loss": 0.00683057, + "memory(GiB)": 26.31, + "step": 2920, + "train_speed(iter/s)": 0.582957 + }, + { + "acc": 0.99913998, + "epoch": 3.1018027571580062, + "grad_norm": 0.8098109364509583, + "learning_rate": 8.261326122083194e-06, + "loss": 0.00538251, + "memory(GiB)": 26.31, + "step": 2925, + "train_speed(iter/s)": 0.582962 + }, + { + "acc": 0.99811344, + "epoch": 3.1071049840933194, + "grad_norm": 0.4597850441932678, + "learning_rate": 8.25467578338787e-06, + "loss": 0.00507367, + "memory(GiB)": 26.31, + "step": 2930, + "train_speed(iter/s)": 0.582968 + }, + { + "acc": 0.99837627, + "epoch": 3.112407211028632, + "grad_norm": 0.2444171905517578, + "learning_rate": 8.248015438737775e-06, + "loss": 0.00495638, + "memory(GiB)": 26.31, + "step": 2935, + "train_speed(iter/s)": 0.582967 + }, + { + "acc": 0.99865885, + "epoch": 3.1177094379639447, + "grad_norm": 0.6311067938804626, + "learning_rate": 8.241345108612172e-06, + "loss": 0.00445099, + "memory(GiB)": 26.31, + "step": 2940, + "train_speed(iter/s)": 0.582975 + }, + { + "acc": 0.99781256, + "epoch": 3.123011664899258, + "grad_norm": 0.39378607273101807, + "learning_rate": 8.234664813521014e-06, + "loss": 0.00721694, + "memory(GiB)": 26.31, + "step": 2945, + "train_speed(iter/s)": 0.582972 + }, + { + "acc": 0.99817371, + "epoch": 3.1283138918345705, + "grad_norm": 0.5058181285858154, + "learning_rate": 8.227974574004911e-06, + "loss": 0.00749256, + "memory(GiB)": 26.31, + "step": 2950, + "train_speed(iter/s)": 0.582973 + }, + { + "acc": 0.99721832, + "epoch": 3.133616118769883, + "grad_norm": 0.814859926700592, + "learning_rate": 8.22127441063504e-06, + "loss": 0.00681448, + "memory(GiB)": 26.31, + "step": 2955, + "train_speed(iter/s)": 0.582983 + }, + { + "acc": 0.99774599, + "epoch": 3.1389183457051963, + "grad_norm": 0.5003991723060608, + "learning_rate": 8.214564344013093e-06, + "loss": 0.00531913, + "memory(GiB)": 26.31, + "step": 2960, + "train_speed(iter/s)": 0.582988 + }, + { + "acc": 0.99780588, + "epoch": 3.144220572640509, + "grad_norm": 0.5367461442947388, + "learning_rate": 8.207844394771218e-06, + "loss": 0.00721164, + "memory(GiB)": 26.31, + "step": 2965, + "train_speed(iter/s)": 0.58299 + }, + { + "acc": 0.99879112, + "epoch": 3.1495227995758217, + "grad_norm": 0.35926058888435364, + "learning_rate": 8.20111458357194e-06, + "loss": 0.00350981, + "memory(GiB)": 26.31, + "step": 2970, + "train_speed(iter/s)": 0.582989 + }, + { + "acc": 0.9974081, + "epoch": 3.154825026511135, + "grad_norm": 1.1875823736190796, + "learning_rate": 8.194374931108117e-06, + "loss": 0.00708345, + "memory(GiB)": 26.31, + "step": 2975, + "train_speed(iter/s)": 0.582989 + }, + { + "acc": 0.99869471, + "epoch": 3.1601272534464475, + "grad_norm": 0.48193642497062683, + "learning_rate": 8.187625458102865e-06, + "loss": 0.00425645, + "memory(GiB)": 26.31, + "step": 2980, + "train_speed(iter/s)": 0.582988 + }, + { + "acc": 0.99819031, + "epoch": 3.16542948038176, + "grad_norm": 0.7630764842033386, + "learning_rate": 8.180866185309493e-06, + "loss": 0.00596784, + "memory(GiB)": 26.31, + "step": 2985, + "train_speed(iter/s)": 0.582988 + }, + { + "acc": 0.99840889, + "epoch": 3.1707317073170733, + "grad_norm": 0.7666534781455994, + "learning_rate": 8.174097133511444e-06, + "loss": 0.00508381, + "memory(GiB)": 26.31, + "step": 2990, + "train_speed(iter/s)": 0.582986 + }, + { + "acc": 0.99853182, + "epoch": 3.176033934252386, + "grad_norm": 0.497490793466568, + "learning_rate": 8.167318323522232e-06, + "loss": 0.00554109, + "memory(GiB)": 26.31, + "step": 2995, + "train_speed(iter/s)": 0.582988 + }, + { + "acc": 0.99774895, + "epoch": 3.1813361611876987, + "grad_norm": 0.9739230275154114, + "learning_rate": 8.160529776185369e-06, + "loss": 0.00726523, + "memory(GiB)": 26.31, + "step": 3000, + "train_speed(iter/s)": 0.582985 + }, + { + "acc": 0.99790821, + "epoch": 3.186638388123012, + "grad_norm": 0.8522117137908936, + "learning_rate": 8.153731512374317e-06, + "loss": 0.00599418, + "memory(GiB)": 26.31, + "step": 3005, + "train_speed(iter/s)": 0.582992 + }, + { + "acc": 0.99876814, + "epoch": 3.1919406150583245, + "grad_norm": 0.3050813674926758, + "learning_rate": 8.146923552992406e-06, + "loss": 0.00465048, + "memory(GiB)": 26.31, + "step": 3010, + "train_speed(iter/s)": 0.582994 + }, + { + "acc": 0.99889717, + "epoch": 3.197242841993637, + "grad_norm": 0.42212408781051636, + "learning_rate": 8.14010591897278e-06, + "loss": 0.00312484, + "memory(GiB)": 26.31, + "step": 3015, + "train_speed(iter/s)": 0.582998 + }, + { + "acc": 0.99857616, + "epoch": 3.2025450689289503, + "grad_norm": 0.4540407061576843, + "learning_rate": 8.133278631278335e-06, + "loss": 0.00428031, + "memory(GiB)": 26.31, + "step": 3020, + "train_speed(iter/s)": 0.582996 + }, + { + "acc": 0.99821129, + "epoch": 3.207847295864263, + "grad_norm": 0.46233507990837097, + "learning_rate": 8.126441710901645e-06, + "loss": 0.00802353, + "memory(GiB)": 26.31, + "step": 3025, + "train_speed(iter/s)": 0.583002 + }, + { + "acc": 0.99873466, + "epoch": 3.2131495227995757, + "grad_norm": 0.3596537411212921, + "learning_rate": 8.119595178864904e-06, + "loss": 0.00341255, + "memory(GiB)": 26.31, + "step": 3030, + "train_speed(iter/s)": 0.583002 + }, + { + "acc": 0.99799576, + "epoch": 3.218451749734889, + "grad_norm": 0.576862633228302, + "learning_rate": 8.112739056219863e-06, + "loss": 0.00627927, + "memory(GiB)": 26.31, + "step": 3035, + "train_speed(iter/s)": 0.583007 + }, + { + "acc": 0.99877367, + "epoch": 3.2237539766702015, + "grad_norm": 0.6581805348396301, + "learning_rate": 8.105873364047757e-06, + "loss": 0.00428308, + "memory(GiB)": 26.31, + "step": 3040, + "train_speed(iter/s)": 0.583009 + }, + { + "acc": 0.9980196, + "epoch": 3.229056203605514, + "grad_norm": 0.5988194942474365, + "learning_rate": 8.098998123459246e-06, + "loss": 0.00572769, + "memory(GiB)": 26.31, + "step": 3045, + "train_speed(iter/s)": 0.583008 + }, + { + "acc": 0.9987751, + "epoch": 3.2343584305408273, + "grad_norm": 0.5027371644973755, + "learning_rate": 8.092113355594356e-06, + "loss": 0.00393928, + "memory(GiB)": 26.31, + "step": 3050, + "train_speed(iter/s)": 0.583009 + }, + { + "acc": 0.99803228, + "epoch": 3.23966065747614, + "grad_norm": 0.9007562398910522, + "learning_rate": 8.085219081622403e-06, + "loss": 0.0059559, + "memory(GiB)": 26.31, + "step": 3055, + "train_speed(iter/s)": 0.583009 + }, + { + "acc": 0.99805775, + "epoch": 3.2449628844114526, + "grad_norm": 1.047590970993042, + "learning_rate": 8.078315322741928e-06, + "loss": 0.0064345, + "memory(GiB)": 26.31, + "step": 3060, + "train_speed(iter/s)": 0.58301 + }, + { + "acc": 0.99793072, + "epoch": 3.2502651113467658, + "grad_norm": 0.828264057636261, + "learning_rate": 8.071402100180646e-06, + "loss": 0.00672631, + "memory(GiB)": 26.31, + "step": 3065, + "train_speed(iter/s)": 0.583012 + }, + { + "acc": 0.99873915, + "epoch": 3.2555673382820784, + "grad_norm": 0.2712744176387787, + "learning_rate": 8.064479435195362e-06, + "loss": 0.00426545, + "memory(GiB)": 26.31, + "step": 3070, + "train_speed(iter/s)": 0.58301 + }, + { + "acc": 0.99843769, + "epoch": 3.260869565217391, + "grad_norm": 0.7335013151168823, + "learning_rate": 8.05754734907192e-06, + "loss": 0.00619958, + "memory(GiB)": 26.31, + "step": 3075, + "train_speed(iter/s)": 0.583015 + }, + { + "acc": 0.99847326, + "epoch": 3.2661717921527043, + "grad_norm": 0.544634997844696, + "learning_rate": 8.050605863125132e-06, + "loss": 0.00407071, + "memory(GiB)": 26.31, + "step": 3080, + "train_speed(iter/s)": 0.583014 + }, + { + "acc": 0.99833088, + "epoch": 3.271474019088017, + "grad_norm": 0.6793374419212341, + "learning_rate": 8.04365499869871e-06, + "loss": 0.00461064, + "memory(GiB)": 26.31, + "step": 3085, + "train_speed(iter/s)": 0.583018 + }, + { + "acc": 0.99863014, + "epoch": 3.2767762460233296, + "grad_norm": 0.5401763319969177, + "learning_rate": 8.036694777165202e-06, + "loss": 0.00503164, + "memory(GiB)": 26.31, + "step": 3090, + "train_speed(iter/s)": 0.583023 + }, + { + "acc": 0.99816036, + "epoch": 3.2820784729586427, + "grad_norm": 0.3845391571521759, + "learning_rate": 8.029725219925932e-06, + "loss": 0.00504886, + "memory(GiB)": 26.31, + "step": 3095, + "train_speed(iter/s)": 0.583034 + }, + { + "acc": 0.99808779, + "epoch": 3.2873806998939554, + "grad_norm": 0.49690601229667664, + "learning_rate": 8.022746348410924e-06, + "loss": 0.00739293, + "memory(GiB)": 26.31, + "step": 3100, + "train_speed(iter/s)": 0.58304 + }, + { + "acc": 0.99887943, + "epoch": 3.292682926829268, + "grad_norm": 0.667151927947998, + "learning_rate": 8.015758184078849e-06, + "loss": 0.00365211, + "memory(GiB)": 26.31, + "step": 3105, + "train_speed(iter/s)": 0.583039 + }, + { + "acc": 0.99844265, + "epoch": 3.2979851537645812, + "grad_norm": 0.32358503341674805, + "learning_rate": 8.008760748416942e-06, + "loss": 0.00485204, + "memory(GiB)": 26.31, + "step": 3110, + "train_speed(iter/s)": 0.583039 + }, + { + "acc": 0.99788647, + "epoch": 3.303287380699894, + "grad_norm": 0.7840277552604675, + "learning_rate": 8.001754062940956e-06, + "loss": 0.00626139, + "memory(GiB)": 26.31, + "step": 3115, + "train_speed(iter/s)": 0.583038 + }, + { + "acc": 0.99826546, + "epoch": 3.3085896076352066, + "grad_norm": 0.38156384229660034, + "learning_rate": 7.994738149195074e-06, + "loss": 0.00547561, + "memory(GiB)": 26.31, + "step": 3120, + "train_speed(iter/s)": 0.583036 + }, + { + "acc": 0.99824371, + "epoch": 3.3138918345705197, + "grad_norm": 0.6840994358062744, + "learning_rate": 7.987713028751866e-06, + "loss": 0.00518895, + "memory(GiB)": 26.31, + "step": 3125, + "train_speed(iter/s)": 0.583035 + }, + { + "acc": 0.99864225, + "epoch": 3.3191940615058324, + "grad_norm": 0.4946794807910919, + "learning_rate": 7.9806787232122e-06, + "loss": 0.00464142, + "memory(GiB)": 26.31, + "step": 3130, + "train_speed(iter/s)": 0.583034 + }, + { + "acc": 0.99752941, + "epoch": 3.3244962884411455, + "grad_norm": 0.7485939860343933, + "learning_rate": 7.973635254205194e-06, + "loss": 0.00718605, + "memory(GiB)": 26.31, + "step": 3135, + "train_speed(iter/s)": 0.583046 + }, + { + "acc": 0.99850063, + "epoch": 3.329798515376458, + "grad_norm": 0.5883116722106934, + "learning_rate": 7.96658264338814e-06, + "loss": 0.00474916, + "memory(GiB)": 26.31, + "step": 3140, + "train_speed(iter/s)": 0.583044 + }, + { + "acc": 0.99846611, + "epoch": 3.335100742311771, + "grad_norm": 0.5782555937767029, + "learning_rate": 7.959520912446434e-06, + "loss": 0.00569647, + "memory(GiB)": 26.31, + "step": 3145, + "train_speed(iter/s)": 0.583046 + }, + { + "acc": 0.99788933, + "epoch": 3.3404029692470836, + "grad_norm": 0.5309669375419617, + "learning_rate": 7.952450083093521e-06, + "loss": 0.00625908, + "memory(GiB)": 26.31, + "step": 3150, + "train_speed(iter/s)": 0.583047 + }, + { + "acc": 0.99900951, + "epoch": 3.3457051961823967, + "grad_norm": 0.366931676864624, + "learning_rate": 7.945370177070823e-06, + "loss": 0.00502579, + "memory(GiB)": 26.31, + "step": 3155, + "train_speed(iter/s)": 0.583052 + }, + { + "acc": 0.99902477, + "epoch": 3.3510074231177094, + "grad_norm": 0.38507696986198425, + "learning_rate": 7.938281216147664e-06, + "loss": 0.00419005, + "memory(GiB)": 26.31, + "step": 3160, + "train_speed(iter/s)": 0.583057 + }, + { + "acc": 0.99903908, + "epoch": 3.3563096500530225, + "grad_norm": 0.18632544577121735, + "learning_rate": 7.931183222121217e-06, + "loss": 0.00402411, + "memory(GiB)": 26.31, + "step": 3165, + "train_speed(iter/s)": 0.583059 + }, + { + "acc": 0.99872265, + "epoch": 3.361611876988335, + "grad_norm": 0.5425890684127808, + "learning_rate": 7.924076216816423e-06, + "loss": 0.00426805, + "memory(GiB)": 26.31, + "step": 3170, + "train_speed(iter/s)": 0.58307 + }, + { + "acc": 0.99785099, + "epoch": 3.366914103923648, + "grad_norm": 0.5266686081886292, + "learning_rate": 7.916960222085938e-06, + "loss": 0.00595398, + "memory(GiB)": 26.31, + "step": 3175, + "train_speed(iter/s)": 0.583065 + }, + { + "acc": 0.99811115, + "epoch": 3.3722163308589606, + "grad_norm": 0.8457557559013367, + "learning_rate": 7.909835259810054e-06, + "loss": 0.00490667, + "memory(GiB)": 26.31, + "step": 3180, + "train_speed(iter/s)": 0.583063 + }, + { + "acc": 0.99839916, + "epoch": 3.3775185577942737, + "grad_norm": 0.762008786201477, + "learning_rate": 7.90270135189664e-06, + "loss": 0.00429381, + "memory(GiB)": 26.31, + "step": 3185, + "train_speed(iter/s)": 0.583065 + }, + { + "acc": 0.9985775, + "epoch": 3.3828207847295864, + "grad_norm": 0.6448890566825867, + "learning_rate": 7.895558520281066e-06, + "loss": 0.0047732, + "memory(GiB)": 26.31, + "step": 3190, + "train_speed(iter/s)": 0.583062 + }, + { + "acc": 0.99828854, + "epoch": 3.3881230116648995, + "grad_norm": 0.6847289204597473, + "learning_rate": 7.888406786926148e-06, + "loss": 0.00535885, + "memory(GiB)": 26.31, + "step": 3195, + "train_speed(iter/s)": 0.583062 + }, + { + "acc": 0.99830017, + "epoch": 3.393425238600212, + "grad_norm": 0.4629160165786743, + "learning_rate": 7.881246173822066e-06, + "loss": 0.00592602, + "memory(GiB)": 26.31, + "step": 3200, + "train_speed(iter/s)": 0.583058 + }, + { + "acc": 0.99878588, + "epoch": 3.398727465535525, + "grad_norm": 0.4068206250667572, + "learning_rate": 7.874076702986305e-06, + "loss": 0.00484697, + "memory(GiB)": 26.31, + "step": 3205, + "train_speed(iter/s)": 0.583052 + }, + { + "acc": 0.99862919, + "epoch": 3.4040296924708375, + "grad_norm": 0.5081034898757935, + "learning_rate": 7.866898396463588e-06, + "loss": 0.00621317, + "memory(GiB)": 26.31, + "step": 3210, + "train_speed(iter/s)": 0.583047 + }, + { + "acc": 0.99848499, + "epoch": 3.4093319194061507, + "grad_norm": 0.20022617280483246, + "learning_rate": 7.859711276325807e-06, + "loss": 0.00451258, + "memory(GiB)": 26.31, + "step": 3215, + "train_speed(iter/s)": 0.583041 + }, + { + "acc": 0.99856482, + "epoch": 3.4146341463414633, + "grad_norm": 0.7532142400741577, + "learning_rate": 7.85251536467195e-06, + "loss": 0.00636707, + "memory(GiB)": 26.31, + "step": 3220, + "train_speed(iter/s)": 0.583045 + }, + { + "acc": 0.99817324, + "epoch": 3.4199363732767765, + "grad_norm": 0.6302582621574402, + "learning_rate": 7.845310683628044e-06, + "loss": 0.00610769, + "memory(GiB)": 26.31, + "step": 3225, + "train_speed(iter/s)": 0.583041 + }, + { + "acc": 0.99841137, + "epoch": 3.425238600212089, + "grad_norm": 0.32306987047195435, + "learning_rate": 7.83809725534707e-06, + "loss": 0.00437971, + "memory(GiB)": 26.31, + "step": 3230, + "train_speed(iter/s)": 0.583039 + }, + { + "acc": 0.99868917, + "epoch": 3.430540827147402, + "grad_norm": 0.7746492028236389, + "learning_rate": 7.830875102008913e-06, + "loss": 0.00378766, + "memory(GiB)": 26.31, + "step": 3235, + "train_speed(iter/s)": 0.583036 + }, + { + "acc": 0.99835024, + "epoch": 3.4358430540827145, + "grad_norm": 0.8032320141792297, + "learning_rate": 7.823644245820282e-06, + "loss": 0.00470252, + "memory(GiB)": 26.31, + "step": 3240, + "train_speed(iter/s)": 0.583033 + }, + { + "acc": 0.99794064, + "epoch": 3.4411452810180276, + "grad_norm": 0.5391832590103149, + "learning_rate": 7.81640470901465e-06, + "loss": 0.00547033, + "memory(GiB)": 26.31, + "step": 3245, + "train_speed(iter/s)": 0.583029 + }, + { + "acc": 0.99810715, + "epoch": 3.4464475079533403, + "grad_norm": 0.7630506753921509, + "learning_rate": 7.80915651385218e-06, + "loss": 0.00489411, + "memory(GiB)": 26.31, + "step": 3250, + "train_speed(iter/s)": 0.583026 + }, + { + "acc": 0.99926071, + "epoch": 3.4517497348886534, + "grad_norm": 0.25475117564201355, + "learning_rate": 7.801899682619649e-06, + "loss": 0.00258712, + "memory(GiB)": 26.31, + "step": 3255, + "train_speed(iter/s)": 0.583022 + }, + { + "acc": 0.99808798, + "epoch": 3.457051961823966, + "grad_norm": 0.8293874263763428, + "learning_rate": 7.794634237630399e-06, + "loss": 0.00512287, + "memory(GiB)": 26.31, + "step": 3260, + "train_speed(iter/s)": 0.583018 + }, + { + "acc": 0.99783936, + "epoch": 3.462354188759279, + "grad_norm": 0.6153422594070435, + "learning_rate": 7.787360201224255e-06, + "loss": 0.00503631, + "memory(GiB)": 26.31, + "step": 3265, + "train_speed(iter/s)": 0.583018 + }, + { + "acc": 0.99905224, + "epoch": 3.4676564156945915, + "grad_norm": 0.44570717215538025, + "learning_rate": 7.780077595767458e-06, + "loss": 0.00378264, + "memory(GiB)": 26.31, + "step": 3270, + "train_speed(iter/s)": 0.583013 + }, + { + "acc": 0.99846916, + "epoch": 3.4729586426299046, + "grad_norm": 0.2163510024547577, + "learning_rate": 7.772786443652594e-06, + "loss": 0.00520158, + "memory(GiB)": 26.31, + "step": 3275, + "train_speed(iter/s)": 0.58301 + }, + { + "acc": 0.99852314, + "epoch": 3.4782608695652173, + "grad_norm": 0.646382749080658, + "learning_rate": 7.765486767298536e-06, + "loss": 0.00608887, + "memory(GiB)": 26.31, + "step": 3280, + "train_speed(iter/s)": 0.583007 + }, + { + "acc": 0.99863243, + "epoch": 3.4835630965005304, + "grad_norm": 0.19896253943443298, + "learning_rate": 7.758178589150358e-06, + "loss": 0.00385373, + "memory(GiB)": 26.31, + "step": 3285, + "train_speed(iter/s)": 0.583011 + }, + { + "acc": 0.99773331, + "epoch": 3.488865323435843, + "grad_norm": 0.7520610690116882, + "learning_rate": 7.750861931679285e-06, + "loss": 0.00724002, + "memory(GiB)": 26.31, + "step": 3290, + "train_speed(iter/s)": 0.583006 + }, + { + "acc": 0.99894562, + "epoch": 3.494167550371156, + "grad_norm": 0.26213565468788147, + "learning_rate": 7.743536817382603e-06, + "loss": 0.00309457, + "memory(GiB)": 26.31, + "step": 3295, + "train_speed(iter/s)": 0.583003 + }, + { + "acc": 0.99864101, + "epoch": 3.499469777306469, + "grad_norm": 0.6381789445877075, + "learning_rate": 7.73620326878361e-06, + "loss": 0.00587287, + "memory(GiB)": 26.31, + "step": 3300, + "train_speed(iter/s)": 0.582999 + }, + { + "acc": 0.99802217, + "epoch": 3.5047720042417816, + "grad_norm": 0.4187053442001343, + "learning_rate": 7.728861308431538e-06, + "loss": 0.00545754, + "memory(GiB)": 26.31, + "step": 3305, + "train_speed(iter/s)": 0.582994 + }, + { + "acc": 0.9990015, + "epoch": 3.5100742311770943, + "grad_norm": 0.3954903781414032, + "learning_rate": 7.721510958901476e-06, + "loss": 0.0044807, + "memory(GiB)": 26.31, + "step": 3310, + "train_speed(iter/s)": 0.582992 + }, + { + "acc": 0.99838123, + "epoch": 3.5153764581124074, + "grad_norm": 0.635510265827179, + "learning_rate": 7.714152242794319e-06, + "loss": 0.00571715, + "memory(GiB)": 26.31, + "step": 3315, + "train_speed(iter/s)": 0.582989 + }, + { + "acc": 0.99914103, + "epoch": 3.52067868504772, + "grad_norm": 0.6559579968452454, + "learning_rate": 7.706785182736675e-06, + "loss": 0.00398727, + "memory(GiB)": 26.31, + "step": 3320, + "train_speed(iter/s)": 0.582989 + }, + { + "acc": 0.99828911, + "epoch": 3.5259809119830328, + "grad_norm": 0.6975615620613098, + "learning_rate": 7.699409801380816e-06, + "loss": 0.00605518, + "memory(GiB)": 26.31, + "step": 3325, + "train_speed(iter/s)": 0.582989 + }, + { + "acc": 0.99820814, + "epoch": 3.5312831389183454, + "grad_norm": 0.4516032338142395, + "learning_rate": 7.692026121404602e-06, + "loss": 0.00592186, + "memory(GiB)": 26.31, + "step": 3330, + "train_speed(iter/s)": 0.582983 + }, + { + "acc": 0.99784756, + "epoch": 3.5365853658536586, + "grad_norm": 0.6378289461135864, + "learning_rate": 7.684634165511404e-06, + "loss": 0.0053414, + "memory(GiB)": 26.31, + "step": 3335, + "train_speed(iter/s)": 0.58299 + }, + { + "acc": 0.99784336, + "epoch": 3.5418875927889713, + "grad_norm": 0.6353384852409363, + "learning_rate": 7.677233956430041e-06, + "loss": 0.00759888, + "memory(GiB)": 26.31, + "step": 3340, + "train_speed(iter/s)": 0.582989 + }, + { + "acc": 0.9990695, + "epoch": 3.5471898197242844, + "grad_norm": 0.4579092264175415, + "learning_rate": 7.669825516914713e-06, + "loss": 0.00361843, + "memory(GiB)": 26.31, + "step": 3345, + "train_speed(iter/s)": 0.582995 + }, + { + "acc": 0.99890776, + "epoch": 3.552492046659597, + "grad_norm": 0.6212195754051208, + "learning_rate": 7.662408869744921e-06, + "loss": 0.00401128, + "memory(GiB)": 26.31, + "step": 3350, + "train_speed(iter/s)": 0.582995 + }, + { + "acc": 0.99883776, + "epoch": 3.5577942735949097, + "grad_norm": 0.2507479786872864, + "learning_rate": 7.65498403772541e-06, + "loss": 0.00377725, + "memory(GiB)": 26.31, + "step": 3355, + "train_speed(iter/s)": 0.582991 + }, + { + "acc": 0.99893551, + "epoch": 3.5630965005302224, + "grad_norm": 0.6308388113975525, + "learning_rate": 7.647551043686084e-06, + "loss": 0.00425741, + "memory(GiB)": 26.31, + "step": 3360, + "train_speed(iter/s)": 0.582986 + }, + { + "acc": 0.99891872, + "epoch": 3.5683987274655355, + "grad_norm": 0.3604690134525299, + "learning_rate": 7.640109910481947e-06, + "loss": 0.00557023, + "memory(GiB)": 26.31, + "step": 3365, + "train_speed(iter/s)": 0.582982 + }, + { + "acc": 0.99903145, + "epoch": 3.5737009544008482, + "grad_norm": 0.19093440473079681, + "learning_rate": 7.632660660993036e-06, + "loss": 0.00356767, + "memory(GiB)": 26.31, + "step": 3370, + "train_speed(iter/s)": 0.582979 + }, + { + "acc": 0.99842091, + "epoch": 3.5790031813361614, + "grad_norm": 0.8038215637207031, + "learning_rate": 7.625203318124332e-06, + "loss": 0.00609244, + "memory(GiB)": 26.31, + "step": 3375, + "train_speed(iter/s)": 0.582973 + }, + { + "acc": 0.99883862, + "epoch": 3.584305408271474, + "grad_norm": 0.42334866523742676, + "learning_rate": 7.617737904805709e-06, + "loss": 0.00335372, + "memory(GiB)": 26.31, + "step": 3380, + "train_speed(iter/s)": 0.582972 + }, + { + "acc": 0.99890652, + "epoch": 3.5896076352067867, + "grad_norm": 0.5734203457832336, + "learning_rate": 7.610264443991855e-06, + "loss": 0.0062226, + "memory(GiB)": 26.31, + "step": 3385, + "train_speed(iter/s)": 0.582974 + }, + { + "acc": 0.99906788, + "epoch": 3.5949098621421, + "grad_norm": 0.5392136573791504, + "learning_rate": 7.6027829586622016e-06, + "loss": 0.00349057, + "memory(GiB)": 26.31, + "step": 3390, + "train_speed(iter/s)": 0.582968 + }, + { + "acc": 0.99890661, + "epoch": 3.6002120890774125, + "grad_norm": 0.44584783911705017, + "learning_rate": 7.5952934718208525e-06, + "loss": 0.00261679, + "memory(GiB)": 26.31, + "step": 3395, + "train_speed(iter/s)": 0.582966 + }, + { + "acc": 0.99926147, + "epoch": 3.605514316012725, + "grad_norm": 0.3416913151741028, + "learning_rate": 7.587796006496522e-06, + "loss": 0.00221901, + "memory(GiB)": 26.31, + "step": 3400, + "train_speed(iter/s)": 0.582958 + }, + { + "acc": 0.99889278, + "epoch": 3.6108165429480383, + "grad_norm": 0.5704333186149597, + "learning_rate": 7.580290585742445e-06, + "loss": 0.0049392, + "memory(GiB)": 26.31, + "step": 3405, + "train_speed(iter/s)": 0.582953 + }, + { + "acc": 0.99948616, + "epoch": 3.616118769883351, + "grad_norm": 0.42870983481407166, + "learning_rate": 7.572777232636328e-06, + "loss": 0.00195065, + "memory(GiB)": 26.31, + "step": 3410, + "train_speed(iter/s)": 0.582951 + }, + { + "acc": 0.99882116, + "epoch": 3.6214209968186637, + "grad_norm": 0.202960804104805, + "learning_rate": 7.565255970280263e-06, + "loss": 0.00449147, + "memory(GiB)": 26.31, + "step": 3415, + "train_speed(iter/s)": 0.582946 + }, + { + "acc": 0.99910755, + "epoch": 3.626723223753977, + "grad_norm": 0.6247968077659607, + "learning_rate": 7.557726821800661e-06, + "loss": 0.00416288, + "memory(GiB)": 26.31, + "step": 3420, + "train_speed(iter/s)": 0.582942 + }, + { + "acc": 0.99842358, + "epoch": 3.6320254506892895, + "grad_norm": 0.5606263875961304, + "learning_rate": 7.550189810348183e-06, + "loss": 0.0057487, + "memory(GiB)": 26.31, + "step": 3425, + "train_speed(iter/s)": 0.582939 + }, + { + "acc": 0.99884071, + "epoch": 3.637327677624602, + "grad_norm": 0.521111786365509, + "learning_rate": 7.542644959097668e-06, + "loss": 0.00436895, + "memory(GiB)": 26.31, + "step": 3430, + "train_speed(iter/s)": 0.582937 + }, + { + "acc": 0.99864883, + "epoch": 3.6426299045599153, + "grad_norm": 0.42435264587402344, + "learning_rate": 7.535092291248058e-06, + "loss": 0.00424842, + "memory(GiB)": 26.31, + "step": 3435, + "train_speed(iter/s)": 0.582939 + }, + { + "acc": 0.99865971, + "epoch": 3.647932131495228, + "grad_norm": 0.5255548357963562, + "learning_rate": 7.5275318300223345e-06, + "loss": 0.00307308, + "memory(GiB)": 26.31, + "step": 3440, + "train_speed(iter/s)": 0.582934 + }, + { + "acc": 0.99876957, + "epoch": 3.6532343584305407, + "grad_norm": 0.34655559062957764, + "learning_rate": 7.519963598667434e-06, + "loss": 0.00385971, + "memory(GiB)": 26.31, + "step": 3445, + "train_speed(iter/s)": 0.582934 + }, + { + "acc": 0.99846992, + "epoch": 3.658536585365854, + "grad_norm": 0.617057740688324, + "learning_rate": 7.5123876204541925e-06, + "loss": 0.00614815, + "memory(GiB)": 26.31, + "step": 3450, + "train_speed(iter/s)": 0.582928 + }, + { + "acc": 0.99899044, + "epoch": 3.6638388123011665, + "grad_norm": 0.5251142978668213, + "learning_rate": 7.504803918677261e-06, + "loss": 0.00470486, + "memory(GiB)": 26.31, + "step": 3455, + "train_speed(iter/s)": 0.582919 + }, + { + "acc": 0.99908333, + "epoch": 3.669141039236479, + "grad_norm": 0.7263888120651245, + "learning_rate": 7.497212516655043e-06, + "loss": 0.00408497, + "memory(GiB)": 26.31, + "step": 3460, + "train_speed(iter/s)": 0.582912 + }, + { + "acc": 0.99742146, + "epoch": 3.6744432661717923, + "grad_norm": 0.6099832653999329, + "learning_rate": 7.489613437729614e-06, + "loss": 0.00595545, + "memory(GiB)": 26.31, + "step": 3465, + "train_speed(iter/s)": 0.582906 + }, + { + "acc": 0.99899769, + "epoch": 3.679745493107105, + "grad_norm": 1.0556585788726807, + "learning_rate": 7.482006705266659e-06, + "loss": 0.00584042, + "memory(GiB)": 26.31, + "step": 3470, + "train_speed(iter/s)": 0.582903 + }, + { + "acc": 0.99856606, + "epoch": 3.6850477200424177, + "grad_norm": 0.09941625595092773, + "learning_rate": 7.474392342655393e-06, + "loss": 0.00341795, + "memory(GiB)": 26.31, + "step": 3475, + "train_speed(iter/s)": 0.582897 + }, + { + "acc": 0.99857359, + "epoch": 3.6903499469777308, + "grad_norm": 0.6893133521080017, + "learning_rate": 7.466770373308494e-06, + "loss": 0.00646025, + "memory(GiB)": 26.31, + "step": 3480, + "train_speed(iter/s)": 0.582891 + }, + { + "acc": 0.99913788, + "epoch": 3.6956521739130435, + "grad_norm": 0.5789036750793457, + "learning_rate": 7.459140820662029e-06, + "loss": 0.00307661, + "memory(GiB)": 26.31, + "step": 3485, + "train_speed(iter/s)": 0.582889 + }, + { + "acc": 0.99864273, + "epoch": 3.7009544008483566, + "grad_norm": 0.5417762398719788, + "learning_rate": 7.451503708175382e-06, + "loss": 0.00463357, + "memory(GiB)": 26.31, + "step": 3490, + "train_speed(iter/s)": 0.582884 + }, + { + "acc": 0.99816399, + "epoch": 3.7062566277836693, + "grad_norm": 0.8491963148117065, + "learning_rate": 7.4438590593311795e-06, + "loss": 0.00563908, + "memory(GiB)": 26.31, + "step": 3495, + "train_speed(iter/s)": 0.582886 + }, + { + "acc": 0.99839602, + "epoch": 3.711558854718982, + "grad_norm": 0.42541468143463135, + "learning_rate": 7.436206897635227e-06, + "loss": 0.00373492, + "memory(GiB)": 26.31, + "step": 3500, + "train_speed(iter/s)": 0.582891 + }, + { + "acc": 0.99912691, + "epoch": 3.7168610816542946, + "grad_norm": 0.6051958799362183, + "learning_rate": 7.428547246616425e-06, + "loss": 0.00373506, + "memory(GiB)": 26.31, + "step": 3505, + "train_speed(iter/s)": 0.58289 + }, + { + "acc": 0.99895039, + "epoch": 3.7221633085896078, + "grad_norm": 0.6306867003440857, + "learning_rate": 7.420880129826703e-06, + "loss": 0.00296825, + "memory(GiB)": 26.31, + "step": 3510, + "train_speed(iter/s)": 0.582881 + }, + { + "acc": 0.99890804, + "epoch": 3.7274655355249204, + "grad_norm": 0.2302398830652237, + "learning_rate": 7.413205570840947e-06, + "loss": 0.00509791, + "memory(GiB)": 26.31, + "step": 3515, + "train_speed(iter/s)": 0.58288 + }, + { + "acc": 0.99802408, + "epoch": 3.7327677624602336, + "grad_norm": 0.7267009615898132, + "learning_rate": 7.405523593256929e-06, + "loss": 0.00455983, + "memory(GiB)": 26.31, + "step": 3520, + "train_speed(iter/s)": 0.582875 + }, + { + "acc": 0.99777699, + "epoch": 3.7380699893955462, + "grad_norm": 0.44353216886520386, + "learning_rate": 7.397834220695225e-06, + "loss": 0.00718109, + "memory(GiB)": 26.31, + "step": 3525, + "train_speed(iter/s)": 0.582872 + }, + { + "acc": 0.99837513, + "epoch": 3.743372216330859, + "grad_norm": 0.31030791997909546, + "learning_rate": 7.390137476799156e-06, + "loss": 0.00507879, + "memory(GiB)": 26.31, + "step": 3530, + "train_speed(iter/s)": 0.582872 + }, + { + "acc": 0.99864407, + "epoch": 3.7486744432661716, + "grad_norm": 0.37967705726623535, + "learning_rate": 7.382433385234707e-06, + "loss": 0.00317531, + "memory(GiB)": 26.31, + "step": 3535, + "train_speed(iter/s)": 0.582872 + }, + { + "acc": 0.99891911, + "epoch": 3.7539766702014847, + "grad_norm": 0.437358558177948, + "learning_rate": 7.374721969690455e-06, + "loss": 0.00412428, + "memory(GiB)": 26.31, + "step": 3540, + "train_speed(iter/s)": 0.582873 + }, + { + "acc": 0.99910431, + "epoch": 3.7592788971367974, + "grad_norm": 0.7306728363037109, + "learning_rate": 7.367003253877494e-06, + "loss": 0.00288328, + "memory(GiB)": 26.31, + "step": 3545, + "train_speed(iter/s)": 0.582878 + }, + { + "acc": 0.99876919, + "epoch": 3.7645811240721105, + "grad_norm": 0.45699283480644226, + "learning_rate": 7.359277261529366e-06, + "loss": 0.00326591, + "memory(GiB)": 26.31, + "step": 3550, + "train_speed(iter/s)": 0.582882 + }, + { + "acc": 0.99903383, + "epoch": 3.7698833510074232, + "grad_norm": 0.35999491810798645, + "learning_rate": 7.35154401640199e-06, + "loss": 0.00265577, + "memory(GiB)": 26.31, + "step": 3555, + "train_speed(iter/s)": 0.582879 + }, + { + "acc": 0.99812126, + "epoch": 3.775185577942736, + "grad_norm": 0.6905408501625061, + "learning_rate": 7.343803542273583e-06, + "loss": 0.00630668, + "memory(GiB)": 26.31, + "step": 3560, + "train_speed(iter/s)": 0.582879 + }, + { + "acc": 0.99930096, + "epoch": 3.7804878048780486, + "grad_norm": 0.20918644964694977, + "learning_rate": 7.336055862944592e-06, + "loss": 0.00258163, + "memory(GiB)": 26.31, + "step": 3565, + "train_speed(iter/s)": 0.582885 + }, + { + "acc": 0.99900961, + "epoch": 3.7857900318133617, + "grad_norm": 0.3650825023651123, + "learning_rate": 7.328301002237616e-06, + "loss": 0.00405125, + "memory(GiB)": 26.31, + "step": 3570, + "train_speed(iter/s)": 0.582894 + }, + { + "acc": 0.99857883, + "epoch": 3.7910922587486744, + "grad_norm": 0.4976001977920532, + "learning_rate": 7.320538983997338e-06, + "loss": 0.00360008, + "memory(GiB)": 26.31, + "step": 3575, + "train_speed(iter/s)": 0.582893 + }, + { + "acc": 0.99873447, + "epoch": 3.7963944856839875, + "grad_norm": 0.16031405329704285, + "learning_rate": 7.312769832090447e-06, + "loss": 0.00385965, + "memory(GiB)": 26.31, + "step": 3580, + "train_speed(iter/s)": 0.582893 + }, + { + "acc": 0.99866476, + "epoch": 3.8016967126193, + "grad_norm": 0.4230116307735443, + "learning_rate": 7.304993570405567e-06, + "loss": 0.0040944, + "memory(GiB)": 26.31, + "step": 3585, + "train_speed(iter/s)": 0.582891 + }, + { + "acc": 0.99939251, + "epoch": 3.806998939554613, + "grad_norm": 0.18150892853736877, + "learning_rate": 7.297210222853182e-06, + "loss": 0.00284926, + "memory(GiB)": 26.31, + "step": 3590, + "train_speed(iter/s)": 0.582889 + }, + { + "acc": 0.99903259, + "epoch": 3.8123011664899256, + "grad_norm": 0.49581795930862427, + "learning_rate": 7.2894198133655665e-06, + "loss": 0.00506911, + "memory(GiB)": 26.31, + "step": 3595, + "train_speed(iter/s)": 0.582889 + }, + { + "acc": 0.99930048, + "epoch": 3.8176033934252387, + "grad_norm": 0.1171208992600441, + "learning_rate": 7.28162236589671e-06, + "loss": 0.00261142, + "memory(GiB)": 26.31, + "step": 3600, + "train_speed(iter/s)": 0.582888 + }, + { + "acc": 0.99935799, + "epoch": 3.8229056203605514, + "grad_norm": 0.319844126701355, + "learning_rate": 7.273817904422237e-06, + "loss": 0.00172885, + "memory(GiB)": 26.31, + "step": 3605, + "train_speed(iter/s)": 0.582886 + }, + { + "acc": 0.99837723, + "epoch": 3.8282078472958645, + "grad_norm": 0.5585457682609558, + "learning_rate": 7.266006452939342e-06, + "loss": 0.0053467, + "memory(GiB)": 26.31, + "step": 3610, + "train_speed(iter/s)": 0.582884 + }, + { + "acc": 0.99833784, + "epoch": 3.833510074231177, + "grad_norm": 0.700864851474762, + "learning_rate": 7.258188035466714e-06, + "loss": 0.00526347, + "memory(GiB)": 26.31, + "step": 3615, + "train_speed(iter/s)": 0.582882 + }, + { + "acc": 0.99890614, + "epoch": 3.83881230116649, + "grad_norm": 0.5112093687057495, + "learning_rate": 7.250362676044458e-06, + "loss": 0.00356812, + "memory(GiB)": 26.31, + "step": 3620, + "train_speed(iter/s)": 0.582879 + }, + { + "acc": 0.99928646, + "epoch": 3.8441145281018025, + "grad_norm": 0.23124846816062927, + "learning_rate": 7.2425303987340236e-06, + "loss": 0.00238931, + "memory(GiB)": 26.31, + "step": 3625, + "train_speed(iter/s)": 0.582878 + }, + { + "acc": 0.99836931, + "epoch": 3.8494167550371157, + "grad_norm": 0.7917247414588928, + "learning_rate": 7.234691227618136e-06, + "loss": 0.00518584, + "memory(GiB)": 26.31, + "step": 3630, + "train_speed(iter/s)": 0.582878 + }, + { + "acc": 0.99892216, + "epoch": 3.8547189819724283, + "grad_norm": 0.30003005266189575, + "learning_rate": 7.226845186800714e-06, + "loss": 0.00322571, + "memory(GiB)": 26.31, + "step": 3635, + "train_speed(iter/s)": 0.582877 + }, + { + "acc": 0.99873142, + "epoch": 3.8600212089077415, + "grad_norm": 0.6209852695465088, + "learning_rate": 7.218992300406802e-06, + "loss": 0.00309149, + "memory(GiB)": 26.31, + "step": 3640, + "train_speed(iter/s)": 0.582878 + }, + { + "acc": 0.99855528, + "epoch": 3.865323435843054, + "grad_norm": 0.4968811571598053, + "learning_rate": 7.211132592582487e-06, + "loss": 0.00370005, + "memory(GiB)": 26.31, + "step": 3645, + "train_speed(iter/s)": 0.582878 + }, + { + "acc": 0.99924831, + "epoch": 3.870625662778367, + "grad_norm": 0.22578194737434387, + "learning_rate": 7.2032660874948405e-06, + "loss": 0.00231544, + "memory(GiB)": 26.31, + "step": 3650, + "train_speed(iter/s)": 0.582882 + }, + { + "acc": 0.99914141, + "epoch": 3.8759278897136795, + "grad_norm": 0.361869752407074, + "learning_rate": 7.195392809331824e-06, + "loss": 0.0024379, + "memory(GiB)": 26.31, + "step": 3655, + "train_speed(iter/s)": 0.582881 + }, + { + "acc": 0.9986701, + "epoch": 3.8812301166489926, + "grad_norm": 0.6052594184875488, + "learning_rate": 7.1875127823022326e-06, + "loss": 0.00489041, + "memory(GiB)": 26.31, + "step": 3660, + "train_speed(iter/s)": 0.582889 + }, + { + "acc": 0.99880333, + "epoch": 3.8865323435843053, + "grad_norm": 0.34441888332366943, + "learning_rate": 7.179626030635611e-06, + "loss": 0.00405798, + "memory(GiB)": 26.31, + "step": 3665, + "train_speed(iter/s)": 0.582888 + }, + { + "acc": 0.99863358, + "epoch": 3.8918345705196185, + "grad_norm": 0.7074710726737976, + "learning_rate": 7.171732578582176e-06, + "loss": 0.0041889, + "memory(GiB)": 26.31, + "step": 3670, + "train_speed(iter/s)": 0.582887 + }, + { + "acc": 0.99890327, + "epoch": 3.897136797454931, + "grad_norm": 0.5451133847236633, + "learning_rate": 7.163832450412752e-06, + "loss": 0.00401347, + "memory(GiB)": 26.31, + "step": 3675, + "train_speed(iter/s)": 0.582885 + }, + { + "acc": 0.99922581, + "epoch": 3.902439024390244, + "grad_norm": 0.23822365701198578, + "learning_rate": 7.155925670418691e-06, + "loss": 0.00292925, + "memory(GiB)": 26.31, + "step": 3680, + "train_speed(iter/s)": 0.582892 + }, + { + "acc": 0.99865017, + "epoch": 3.9077412513255565, + "grad_norm": 0.6244868040084839, + "learning_rate": 7.148012262911795e-06, + "loss": 0.00549463, + "memory(GiB)": 26.31, + "step": 3685, + "train_speed(iter/s)": 0.58289 + }, + { + "acc": 0.99927311, + "epoch": 3.9130434782608696, + "grad_norm": 0.47247928380966187, + "learning_rate": 7.140092252224247e-06, + "loss": 0.00199212, + "memory(GiB)": 26.31, + "step": 3690, + "train_speed(iter/s)": 0.582894 + }, + { + "acc": 0.99855614, + "epoch": 3.9183457051961823, + "grad_norm": 0.6953317523002625, + "learning_rate": 7.1321656627085315e-06, + "loss": 0.00421569, + "memory(GiB)": 26.31, + "step": 3695, + "train_speed(iter/s)": 0.582895 + }, + { + "acc": 0.99920654, + "epoch": 3.9236479321314954, + "grad_norm": 0.7520684599876404, + "learning_rate": 7.124232518737365e-06, + "loss": 0.00460742, + "memory(GiB)": 26.31, + "step": 3700, + "train_speed(iter/s)": 0.582895 + }, + { + "acc": 0.99842453, + "epoch": 3.928950159066808, + "grad_norm": 0.3329322040081024, + "learning_rate": 7.116292844703613e-06, + "loss": 0.00535532, + "memory(GiB)": 26.31, + "step": 3705, + "train_speed(iter/s)": 0.582895 + }, + { + "acc": 0.99893045, + "epoch": 3.934252386002121, + "grad_norm": 0.5064216256141663, + "learning_rate": 7.108346665020224e-06, + "loss": 0.00310686, + "memory(GiB)": 26.31, + "step": 3710, + "train_speed(iter/s)": 0.582899 + }, + { + "acc": 0.99881392, + "epoch": 3.9395546129374335, + "grad_norm": 0.5223778486251831, + "learning_rate": 7.100394004120146e-06, + "loss": 0.00423855, + "memory(GiB)": 26.31, + "step": 3715, + "train_speed(iter/s)": 0.582897 + }, + { + "acc": 0.99847584, + "epoch": 3.9448568398727466, + "grad_norm": 0.4084107279777527, + "learning_rate": 7.092434886456258e-06, + "loss": 0.00451448, + "memory(GiB)": 26.31, + "step": 3720, + "train_speed(iter/s)": 0.582896 + }, + { + "acc": 0.99924583, + "epoch": 3.9501590668080593, + "grad_norm": 0.4136032164096832, + "learning_rate": 7.084469336501293e-06, + "loss": 0.00311526, + "memory(GiB)": 26.31, + "step": 3725, + "train_speed(iter/s)": 0.582899 + }, + { + "acc": 0.9987318, + "epoch": 3.9554612937433724, + "grad_norm": 0.0378059521317482, + "learning_rate": 7.076497378747761e-06, + "loss": 0.00337362, + "memory(GiB)": 26.31, + "step": 3730, + "train_speed(iter/s)": 0.582897 + }, + { + "acc": 0.99868851, + "epoch": 3.960763520678685, + "grad_norm": 0.6353269219398499, + "learning_rate": 7.068519037707873e-06, + "loss": 0.00564696, + "memory(GiB)": 26.31, + "step": 3735, + "train_speed(iter/s)": 0.582897 + }, + { + "acc": 0.99876595, + "epoch": 3.9660657476139978, + "grad_norm": 0.6448261141777039, + "learning_rate": 7.060534337913472e-06, + "loss": 0.00475402, + "memory(GiB)": 26.31, + "step": 3740, + "train_speed(iter/s)": 0.582898 + }, + { + "acc": 0.99950418, + "epoch": 3.9713679745493105, + "grad_norm": 0.48298442363739014, + "learning_rate": 7.052543303915944e-06, + "loss": 0.00231354, + "memory(GiB)": 26.31, + "step": 3745, + "train_speed(iter/s)": 0.582907 + }, + { + "acc": 0.99877338, + "epoch": 3.9766702014846236, + "grad_norm": 0.329344242811203, + "learning_rate": 7.044545960286163e-06, + "loss": 0.00474007, + "memory(GiB)": 26.31, + "step": 3750, + "train_speed(iter/s)": 0.58291 + }, + { + "acc": 0.99898186, + "epoch": 3.9819724284199363, + "grad_norm": 0.2393208146095276, + "learning_rate": 7.036542331614395e-06, + "loss": 0.00327823, + "memory(GiB)": 26.31, + "step": 3755, + "train_speed(iter/s)": 0.58291 + }, + { + "acc": 0.99976311, + "epoch": 3.9872746553552494, + "grad_norm": 0.32558730244636536, + "learning_rate": 7.028532442510238e-06, + "loss": 0.00244776, + "memory(GiB)": 26.31, + "step": 3760, + "train_speed(iter/s)": 0.582914 + }, + { + "acc": 0.99965019, + "epoch": 3.992576882290562, + "grad_norm": 0.2150496244430542, + "learning_rate": 7.020516317602532e-06, + "loss": 0.00221206, + "memory(GiB)": 26.31, + "step": 3765, + "train_speed(iter/s)": 0.582919 + }, + { + "acc": 0.99868097, + "epoch": 3.9978791092258747, + "grad_norm": 1.0313560962677002, + "learning_rate": 7.0124939815392985e-06, + "loss": 0.00410159, + "memory(GiB)": 26.31, + "step": 3770, + "train_speed(iter/s)": 0.582924 + }, + { + "acc": 0.99927502, + "epoch": 4.003181336161187, + "grad_norm": 0.49767929315567017, + "learning_rate": 7.0044654589876526e-06, + "loss": 0.00273472, + "memory(GiB)": 26.31, + "step": 3775, + "train_speed(iter/s)": 0.58284 + }, + { + "acc": 0.99961834, + "epoch": 4.0084835630965, + "grad_norm": 0.1090165302157402, + "learning_rate": 6.996430774633731e-06, + "loss": 0.00123068, + "memory(GiB)": 26.31, + "step": 3780, + "train_speed(iter/s)": 0.582849 + }, + { + "acc": 0.99917088, + "epoch": 4.013785790031814, + "grad_norm": 0.4388119578361511, + "learning_rate": 6.988389953182618e-06, + "loss": 0.00246109, + "memory(GiB)": 26.31, + "step": 3785, + "train_speed(iter/s)": 0.582852 + }, + { + "acc": 0.99895935, + "epoch": 4.019088016967126, + "grad_norm": 0.7294406294822693, + "learning_rate": 6.980343019358272e-06, + "loss": 0.00368474, + "memory(GiB)": 26.31, + "step": 3790, + "train_speed(iter/s)": 0.582858 + }, + { + "acc": 0.99912395, + "epoch": 4.024390243902439, + "grad_norm": 0.7474674582481384, + "learning_rate": 6.9722899979034404e-06, + "loss": 0.0029763, + "memory(GiB)": 26.31, + "step": 3795, + "train_speed(iter/s)": 0.58286 + }, + { + "acc": 0.99937649, + "epoch": 4.029692470837752, + "grad_norm": 0.351416677236557, + "learning_rate": 6.964230913579589e-06, + "loss": 0.00383181, + "memory(GiB)": 26.31, + "step": 3800, + "train_speed(iter/s)": 0.582865 + }, + { + "acc": 0.99912262, + "epoch": 4.034994697773064, + "grad_norm": 0.2787763774394989, + "learning_rate": 6.956165791166834e-06, + "loss": 0.00277095, + "memory(GiB)": 26.31, + "step": 3805, + "train_speed(iter/s)": 0.582864 + }, + { + "acc": 0.99912815, + "epoch": 4.040296924708377, + "grad_norm": 0.9426448941230774, + "learning_rate": 6.948094655463843e-06, + "loss": 0.00392118, + "memory(GiB)": 26.31, + "step": 3810, + "train_speed(iter/s)": 0.58287 + }, + { + "acc": 0.99938164, + "epoch": 4.045599151643691, + "grad_norm": 0.4804195165634155, + "learning_rate": 6.940017531287786e-06, + "loss": 0.00344517, + "memory(GiB)": 26.31, + "step": 3815, + "train_speed(iter/s)": 0.582869 + }, + { + "acc": 0.99925079, + "epoch": 4.050901378579003, + "grad_norm": 0.6506812572479248, + "learning_rate": 6.9319344434742395e-06, + "loss": 0.00181158, + "memory(GiB)": 26.31, + "step": 3820, + "train_speed(iter/s)": 0.582868 + }, + { + "acc": 0.99804354, + "epoch": 4.056203605514316, + "grad_norm": 0.4788365662097931, + "learning_rate": 6.923845416877123e-06, + "loss": 0.0051, + "memory(GiB)": 26.31, + "step": 3825, + "train_speed(iter/s)": 0.58287 + }, + { + "acc": 0.99923143, + "epoch": 4.061505832449629, + "grad_norm": 0.34085676074028015, + "learning_rate": 6.91575047636861e-06, + "loss": 0.00306839, + "memory(GiB)": 26.31, + "step": 3830, + "train_speed(iter/s)": 0.58287 + }, + { + "acc": 0.99936924, + "epoch": 4.066808059384941, + "grad_norm": 0.35691240429878235, + "learning_rate": 6.907649646839062e-06, + "loss": 0.00194274, + "memory(GiB)": 26.31, + "step": 3835, + "train_speed(iter/s)": 0.58287 + }, + { + "acc": 0.99891167, + "epoch": 4.072110286320255, + "grad_norm": 0.3848298192024231, + "learning_rate": 6.899542953196948e-06, + "loss": 0.00519823, + "memory(GiB)": 26.31, + "step": 3840, + "train_speed(iter/s)": 0.582869 + }, + { + "acc": 0.99913979, + "epoch": 4.077412513255568, + "grad_norm": 0.2372390478849411, + "learning_rate": 6.891430420368765e-06, + "loss": 0.00238386, + "memory(GiB)": 26.31, + "step": 3845, + "train_speed(iter/s)": 0.582872 + }, + { + "acc": 0.99897785, + "epoch": 4.08271474019088, + "grad_norm": 0.5104010105133057, + "learning_rate": 6.883312073298965e-06, + "loss": 0.00465745, + "memory(GiB)": 26.31, + "step": 3850, + "train_speed(iter/s)": 0.582872 + }, + { + "acc": 0.99932671, + "epoch": 4.088016967126193, + "grad_norm": 0.13594026863574982, + "learning_rate": 6.875187936949884e-06, + "loss": 0.00181529, + "memory(GiB)": 26.31, + "step": 3855, + "train_speed(iter/s)": 0.582873 + }, + { + "acc": 0.99900427, + "epoch": 4.093319194061506, + "grad_norm": 0.45077431201934814, + "learning_rate": 6.867058036301653e-06, + "loss": 0.00304938, + "memory(GiB)": 26.31, + "step": 3860, + "train_speed(iter/s)": 0.582874 + }, + { + "acc": 0.99903927, + "epoch": 4.098621420996818, + "grad_norm": 0.2113030105829239, + "learning_rate": 6.858922396352126e-06, + "loss": 0.00292636, + "memory(GiB)": 26.31, + "step": 3865, + "train_speed(iter/s)": 0.582874 + }, + { + "acc": 0.99841099, + "epoch": 4.103923647932131, + "grad_norm": 0.6499300003051758, + "learning_rate": 6.850781042116808e-06, + "loss": 0.0049599, + "memory(GiB)": 26.31, + "step": 3870, + "train_speed(iter/s)": 0.582876 + }, + { + "acc": 0.99916124, + "epoch": 4.109225874867445, + "grad_norm": 0.3014342486858368, + "learning_rate": 6.842633998628772e-06, + "loss": 0.00299525, + "memory(GiB)": 26.31, + "step": 3875, + "train_speed(iter/s)": 0.582881 + }, + { + "acc": 0.99861727, + "epoch": 4.114528101802757, + "grad_norm": 1.1793856620788574, + "learning_rate": 6.834481290938586e-06, + "loss": 0.00433057, + "memory(GiB)": 26.31, + "step": 3880, + "train_speed(iter/s)": 0.582885 + }, + { + "acc": 0.99974747, + "epoch": 4.11983032873807, + "grad_norm": 0.1994122564792633, + "learning_rate": 6.8263229441142296e-06, + "loss": 0.00139203, + "memory(GiB)": 26.31, + "step": 3885, + "train_speed(iter/s)": 0.582885 + }, + { + "acc": 0.99805698, + "epoch": 4.125132555673383, + "grad_norm": 0.5237070322036743, + "learning_rate": 6.818158983241031e-06, + "loss": 0.00496773, + "memory(GiB)": 26.31, + "step": 3890, + "train_speed(iter/s)": 0.582884 + }, + { + "acc": 0.9987545, + "epoch": 4.130434782608695, + "grad_norm": 0.7213680148124695, + "learning_rate": 6.809989433421572e-06, + "loss": 0.00347864, + "memory(GiB)": 26.31, + "step": 3895, + "train_speed(iter/s)": 0.582884 + }, + { + "acc": 0.99951344, + "epoch": 4.135737009544009, + "grad_norm": 0.4179275929927826, + "learning_rate": 6.801814319775623e-06, + "loss": 0.00191055, + "memory(GiB)": 26.31, + "step": 3900, + "train_speed(iter/s)": 0.582894 + }, + { + "acc": 0.99886799, + "epoch": 4.141039236479322, + "grad_norm": 0.49606838822364807, + "learning_rate": 6.79363366744006e-06, + "loss": 0.00437418, + "memory(GiB)": 26.31, + "step": 3905, + "train_speed(iter/s)": 0.582903 + }, + { + "acc": 0.99911499, + "epoch": 4.146341463414634, + "grad_norm": 0.3049047589302063, + "learning_rate": 6.785447501568789e-06, + "loss": 0.00369115, + "memory(GiB)": 26.31, + "step": 3910, + "train_speed(iter/s)": 0.582902 + }, + { + "acc": 0.99878025, + "epoch": 4.151643690349947, + "grad_norm": 0.4471993148326874, + "learning_rate": 6.777255847332676e-06, + "loss": 0.00471104, + "memory(GiB)": 26.31, + "step": 3915, + "train_speed(iter/s)": 0.582901 + }, + { + "acc": 0.99911194, + "epoch": 4.15694591728526, + "grad_norm": 0.6976534128189087, + "learning_rate": 6.769058729919454e-06, + "loss": 0.00338649, + "memory(GiB)": 26.31, + "step": 3920, + "train_speed(iter/s)": 0.582899 + }, + { + "acc": 0.99886684, + "epoch": 4.162248144220572, + "grad_norm": 0.44167786836624146, + "learning_rate": 6.76085617453366e-06, + "loss": 0.0030509, + "memory(GiB)": 26.31, + "step": 3925, + "train_speed(iter/s)": 0.582903 + }, + { + "acc": 0.99937372, + "epoch": 4.167550371155886, + "grad_norm": 0.6355365514755249, + "learning_rate": 6.752648206396546e-06, + "loss": 0.00211098, + "memory(GiB)": 26.31, + "step": 3930, + "train_speed(iter/s)": 0.582904 + }, + { + "acc": 0.99952736, + "epoch": 4.172852598091199, + "grad_norm": 0.11797591298818588, + "learning_rate": 6.744434850746011e-06, + "loss": 0.00186485, + "memory(GiB)": 26.31, + "step": 3935, + "train_speed(iter/s)": 0.582909 + }, + { + "acc": 0.99925127, + "epoch": 4.178154825026511, + "grad_norm": 0.496011883020401, + "learning_rate": 6.736216132836522e-06, + "loss": 0.00295694, + "memory(GiB)": 26.31, + "step": 3940, + "train_speed(iter/s)": 0.582911 + }, + { + "acc": 0.99831095, + "epoch": 4.183457051961824, + "grad_norm": 0.7491662502288818, + "learning_rate": 6.727992077939027e-06, + "loss": 0.00389315, + "memory(GiB)": 26.31, + "step": 3945, + "train_speed(iter/s)": 0.582912 + }, + { + "acc": 0.99911022, + "epoch": 4.188759278897137, + "grad_norm": 0.3298349678516388, + "learning_rate": 6.7197627113408905e-06, + "loss": 0.00260169, + "memory(GiB)": 26.31, + "step": 3950, + "train_speed(iter/s)": 0.582917 + }, + { + "acc": 0.99928799, + "epoch": 4.194061505832449, + "grad_norm": 0.5814478993415833, + "learning_rate": 6.711528058345805e-06, + "loss": 0.0016897, + "memory(GiB)": 26.31, + "step": 3955, + "train_speed(iter/s)": 0.582918 + }, + { + "acc": 0.99962626, + "epoch": 4.199363732767763, + "grad_norm": 0.3780122697353363, + "learning_rate": 6.703288144273724e-06, + "loss": 0.00158315, + "memory(GiB)": 26.31, + "step": 3960, + "train_speed(iter/s)": 0.582919 + }, + { + "acc": 0.99910164, + "epoch": 4.2046659597030756, + "grad_norm": 0.5787876844406128, + "learning_rate": 6.695042994460768e-06, + "loss": 0.00327033, + "memory(GiB)": 26.31, + "step": 3965, + "train_speed(iter/s)": 0.582919 + }, + { + "acc": 0.99942017, + "epoch": 4.209968186638388, + "grad_norm": 0.538512110710144, + "learning_rate": 6.686792634259165e-06, + "loss": 0.00221283, + "memory(GiB)": 26.31, + "step": 3970, + "train_speed(iter/s)": 0.58292 + }, + { + "acc": 0.99916143, + "epoch": 4.215270413573701, + "grad_norm": 0.29662081599235535, + "learning_rate": 6.678537089037162e-06, + "loss": 0.00368981, + "memory(GiB)": 26.31, + "step": 3975, + "train_speed(iter/s)": 0.582921 + }, + { + "acc": 0.9993372, + "epoch": 4.220572640509014, + "grad_norm": 0.1427513062953949, + "learning_rate": 6.670276384178945e-06, + "loss": 0.00276616, + "memory(GiB)": 26.31, + "step": 3980, + "train_speed(iter/s)": 0.582921 + }, + { + "acc": 0.99876518, + "epoch": 4.225874867444326, + "grad_norm": 0.7543923854827881, + "learning_rate": 6.6620105450845664e-06, + "loss": 0.0031005, + "memory(GiB)": 26.31, + "step": 3985, + "train_speed(iter/s)": 0.58292 + }, + { + "acc": 0.99929838, + "epoch": 4.23117709437964, + "grad_norm": 0.41538140177726746, + "learning_rate": 6.653739597169871e-06, + "loss": 0.00201908, + "memory(GiB)": 26.31, + "step": 3990, + "train_speed(iter/s)": 0.582925 + }, + { + "acc": 0.99893169, + "epoch": 4.2364793213149525, + "grad_norm": 0.7107354402542114, + "learning_rate": 6.645463565866404e-06, + "loss": 0.00467964, + "memory(GiB)": 26.31, + "step": 3995, + "train_speed(iter/s)": 0.582923 + }, + { + "acc": 0.99891777, + "epoch": 4.241781548250265, + "grad_norm": 0.6780441999435425, + "learning_rate": 6.637182476621346e-06, + "loss": 0.00379675, + "memory(GiB)": 26.31, + "step": 4000, + "train_speed(iter/s)": 0.582929 + }, + { + "acc": 0.99919319, + "epoch": 4.247083775185578, + "grad_norm": 0.27641725540161133, + "learning_rate": 6.628896354897429e-06, + "loss": 0.00266308, + "memory(GiB)": 26.31, + "step": 4005, + "train_speed(iter/s)": 0.582929 + }, + { + "acc": 0.99924898, + "epoch": 4.252386002120891, + "grad_norm": 0.40583816170692444, + "learning_rate": 6.620605226172858e-06, + "loss": 0.00284122, + "memory(GiB)": 26.31, + "step": 4010, + "train_speed(iter/s)": 0.582927 + }, + { + "acc": 0.99864264, + "epoch": 4.257688229056203, + "grad_norm": 0.5590219497680664, + "learning_rate": 6.6123091159412335e-06, + "loss": 0.0070276, + "memory(GiB)": 26.31, + "step": 4015, + "train_speed(iter/s)": 0.582926 + }, + { + "acc": 0.99863548, + "epoch": 4.262990455991517, + "grad_norm": 0.7009803056716919, + "learning_rate": 6.604008049711474e-06, + "loss": 0.00366253, + "memory(GiB)": 26.31, + "step": 4020, + "train_speed(iter/s)": 0.582925 + }, + { + "acc": 0.99885464, + "epoch": 4.2682926829268295, + "grad_norm": 0.28384023904800415, + "learning_rate": 6.595702053007738e-06, + "loss": 0.00409203, + "memory(GiB)": 26.31, + "step": 4025, + "train_speed(iter/s)": 0.582926 + }, + { + "acc": 0.99863319, + "epoch": 4.273594909862142, + "grad_norm": 0.22047458589076996, + "learning_rate": 6.5873911513693415e-06, + "loss": 0.00477225, + "memory(GiB)": 26.31, + "step": 4030, + "train_speed(iter/s)": 0.582931 + }, + { + "acc": 0.99915199, + "epoch": 4.278897136797455, + "grad_norm": 0.4781891107559204, + "learning_rate": 6.5790753703506814e-06, + "loss": 0.00297477, + "memory(GiB)": 26.31, + "step": 4035, + "train_speed(iter/s)": 0.58293 + }, + { + "acc": 0.99888287, + "epoch": 4.2841993637327676, + "grad_norm": 0.12410692870616913, + "learning_rate": 6.570754735521163e-06, + "loss": 0.00334938, + "memory(GiB)": 26.31, + "step": 4040, + "train_speed(iter/s)": 0.58293 + }, + { + "acc": 0.99848261, + "epoch": 4.28950159066808, + "grad_norm": 0.6257500648498535, + "learning_rate": 6.56242927246511e-06, + "loss": 0.00495994, + "memory(GiB)": 26.31, + "step": 4045, + "train_speed(iter/s)": 0.582935 + }, + { + "acc": 0.99935036, + "epoch": 4.294803817603394, + "grad_norm": 0.5982515811920166, + "learning_rate": 6.554099006781696e-06, + "loss": 0.00224422, + "memory(GiB)": 26.31, + "step": 4050, + "train_speed(iter/s)": 0.582936 + }, + { + "acc": 0.99870186, + "epoch": 4.3001060445387065, + "grad_norm": 0.45738568902015686, + "learning_rate": 6.545763964084861e-06, + "loss": 0.00415319, + "memory(GiB)": 26.31, + "step": 4055, + "train_speed(iter/s)": 0.582941 + }, + { + "acc": 0.99977303, + "epoch": 4.305408271474019, + "grad_norm": 0.1528371125459671, + "learning_rate": 6.537424170003233e-06, + "loss": 0.00109178, + "memory(GiB)": 26.31, + "step": 4060, + "train_speed(iter/s)": 0.582939 + }, + { + "acc": 0.99936209, + "epoch": 4.310710498409332, + "grad_norm": 0.30072876811027527, + "learning_rate": 6.529079650180048e-06, + "loss": 0.00162833, + "memory(GiB)": 26.31, + "step": 4065, + "train_speed(iter/s)": 0.582937 + }, + { + "acc": 0.99911661, + "epoch": 4.3160127253446445, + "grad_norm": 0.7277874946594238, + "learning_rate": 6.5207304302730755e-06, + "loss": 0.00278707, + "memory(GiB)": 26.31, + "step": 4070, + "train_speed(iter/s)": 0.582938 + }, + { + "acc": 0.99898167, + "epoch": 4.321314952279957, + "grad_norm": 0.5926617980003357, + "learning_rate": 6.512376535954534e-06, + "loss": 0.00526775, + "memory(GiB)": 26.31, + "step": 4075, + "train_speed(iter/s)": 0.582938 + }, + { + "acc": 0.9995492, + "epoch": 4.326617179215271, + "grad_norm": 0.23842601478099823, + "learning_rate": 6.504017992911017e-06, + "loss": 0.00162778, + "memory(GiB)": 26.31, + "step": 4080, + "train_speed(iter/s)": 0.582937 + }, + { + "acc": 0.99949112, + "epoch": 4.3319194061505835, + "grad_norm": 0.2551683187484741, + "learning_rate": 6.495654826843414e-06, + "loss": 0.00149569, + "memory(GiB)": 26.31, + "step": 4085, + "train_speed(iter/s)": 0.582941 + }, + { + "acc": 0.99977875, + "epoch": 4.337221633085896, + "grad_norm": 0.04848746582865715, + "learning_rate": 6.487287063466824e-06, + "loss": 0.00142655, + "memory(GiB)": 26.31, + "step": 4090, + "train_speed(iter/s)": 0.582945 + }, + { + "acc": 0.99942417, + "epoch": 4.342523860021209, + "grad_norm": 0.15790040791034698, + "learning_rate": 6.478914728510485e-06, + "loss": 0.00233415, + "memory(GiB)": 26.31, + "step": 4095, + "train_speed(iter/s)": 0.582945 + }, + { + "acc": 0.99927073, + "epoch": 4.3478260869565215, + "grad_norm": 0.49188557267189026, + "learning_rate": 6.470537847717692e-06, + "loss": 0.00392356, + "memory(GiB)": 26.31, + "step": 4100, + "train_speed(iter/s)": 0.582944 + }, + { + "acc": 0.99959507, + "epoch": 4.353128313891834, + "grad_norm": 0.23772254586219788, + "learning_rate": 6.462156446845715e-06, + "loss": 0.00239973, + "memory(GiB)": 26.31, + "step": 4105, + "train_speed(iter/s)": 0.582947 + }, + { + "acc": 0.99922543, + "epoch": 4.358430540827148, + "grad_norm": 0.16435517370700836, + "learning_rate": 6.453770551665727e-06, + "loss": 0.00276972, + "memory(GiB)": 26.31, + "step": 4110, + "train_speed(iter/s)": 0.582947 + }, + { + "acc": 0.99857063, + "epoch": 4.36373276776246, + "grad_norm": 0.5997322201728821, + "learning_rate": 6.445380187962715e-06, + "loss": 0.00485413, + "memory(GiB)": 26.31, + "step": 4115, + "train_speed(iter/s)": 0.582955 + }, + { + "acc": 0.99888248, + "epoch": 4.369034994697773, + "grad_norm": 0.6237581372261047, + "learning_rate": 6.43698538153541e-06, + "loss": 0.00486804, + "memory(GiB)": 26.31, + "step": 4120, + "train_speed(iter/s)": 0.582954 + }, + { + "acc": 0.99929352, + "epoch": 4.374337221633086, + "grad_norm": 0.49355337023735046, + "learning_rate": 6.4285861581962005e-06, + "loss": 0.00364117, + "memory(GiB)": 26.31, + "step": 4125, + "train_speed(iter/s)": 0.582954 + }, + { + "acc": 0.99813728, + "epoch": 4.3796394485683985, + "grad_norm": 0.6591293215751648, + "learning_rate": 6.4201825437710565e-06, + "loss": 0.00500336, + "memory(GiB)": 26.31, + "step": 4130, + "train_speed(iter/s)": 0.582953 + }, + { + "acc": 0.9992382, + "epoch": 4.384941675503711, + "grad_norm": 0.1336752027273178, + "learning_rate": 6.411774564099454e-06, + "loss": 0.00252956, + "memory(GiB)": 26.31, + "step": 4135, + "train_speed(iter/s)": 0.582955 + }, + { + "acc": 0.99899483, + "epoch": 4.390243902439025, + "grad_norm": 0.30952557921409607, + "learning_rate": 6.403362245034283e-06, + "loss": 0.00370638, + "memory(GiB)": 26.31, + "step": 4140, + "train_speed(iter/s)": 0.58296 + }, + { + "acc": 0.9987875, + "epoch": 4.395546129374337, + "grad_norm": 0.29703906178474426, + "learning_rate": 6.3949456124417855e-06, + "loss": 0.00523653, + "memory(GiB)": 26.31, + "step": 4145, + "train_speed(iter/s)": 0.58296 + }, + { + "acc": 0.99912205, + "epoch": 4.40084835630965, + "grad_norm": 0.33913636207580566, + "learning_rate": 6.386524692201459e-06, + "loss": 0.00239438, + "memory(GiB)": 26.31, + "step": 4150, + "train_speed(iter/s)": 0.582965 + }, + { + "acc": 0.99977617, + "epoch": 4.406150583244963, + "grad_norm": 0.3019471764564514, + "learning_rate": 6.378099510205991e-06, + "loss": 0.00104185, + "memory(GiB)": 26.31, + "step": 4155, + "train_speed(iter/s)": 0.582965 + }, + { + "acc": 0.99841003, + "epoch": 4.4114528101802755, + "grad_norm": 0.7617125511169434, + "learning_rate": 6.369670092361169e-06, + "loss": 0.00464066, + "memory(GiB)": 26.31, + "step": 4160, + "train_speed(iter/s)": 0.582968 + }, + { + "acc": 0.99932842, + "epoch": 4.416755037115588, + "grad_norm": 0.7266931533813477, + "learning_rate": 6.361236464585805e-06, + "loss": 0.00196429, + "memory(GiB)": 26.31, + "step": 4165, + "train_speed(iter/s)": 0.582969 + }, + { + "acc": 0.99902029, + "epoch": 4.422057264050902, + "grad_norm": 0.5494599938392639, + "learning_rate": 6.352798652811657e-06, + "loss": 0.00302969, + "memory(GiB)": 26.31, + "step": 4170, + "train_speed(iter/s)": 0.582968 + }, + { + "acc": 0.99920559, + "epoch": 4.427359490986214, + "grad_norm": 0.44917190074920654, + "learning_rate": 6.3443566829833485e-06, + "loss": 0.00246186, + "memory(GiB)": 26.31, + "step": 4175, + "train_speed(iter/s)": 0.582969 + }, + { + "acc": 0.99954557, + "epoch": 4.432661717921527, + "grad_norm": 0.626193106174469, + "learning_rate": 6.335910581058287e-06, + "loss": 0.00248692, + "memory(GiB)": 26.31, + "step": 4180, + "train_speed(iter/s)": 0.582968 + }, + { + "acc": 0.99891415, + "epoch": 4.43796394485684, + "grad_norm": 0.5783212184906006, + "learning_rate": 6.327460373006584e-06, + "loss": 0.00308748, + "memory(GiB)": 26.31, + "step": 4185, + "train_speed(iter/s)": 0.582968 + }, + { + "acc": 0.99954481, + "epoch": 4.443266171792152, + "grad_norm": 0.3852193057537079, + "learning_rate": 6.319006084810983e-06, + "loss": 0.00142698, + "memory(GiB)": 26.31, + "step": 4190, + "train_speed(iter/s)": 0.582971 + }, + { + "acc": 0.9996376, + "epoch": 4.448568398727465, + "grad_norm": 0.11862189322710037, + "learning_rate": 6.310547742466766e-06, + "loss": 0.00133829, + "memory(GiB)": 26.31, + "step": 4195, + "train_speed(iter/s)": 0.582974 + }, + { + "acc": 0.99929657, + "epoch": 4.453870625662779, + "grad_norm": 0.7278153896331787, + "learning_rate": 6.302085371981682e-06, + "loss": 0.00171, + "memory(GiB)": 26.31, + "step": 4200, + "train_speed(iter/s)": 0.582982 + }, + { + "acc": 0.99895, + "epoch": 4.459172852598091, + "grad_norm": 0.712354302406311, + "learning_rate": 6.293618999375868e-06, + "loss": 0.00472792, + "memory(GiB)": 26.31, + "step": 4205, + "train_speed(iter/s)": 0.582981 + }, + { + "acc": 0.99929352, + "epoch": 4.464475079533404, + "grad_norm": 0.6949740648269653, + "learning_rate": 6.2851486506817635e-06, + "loss": 0.00217784, + "memory(GiB)": 26.31, + "step": 4210, + "train_speed(iter/s)": 0.582979 + }, + { + "acc": 0.99937687, + "epoch": 4.469777306468717, + "grad_norm": 0.4581748843193054, + "learning_rate": 6.276674351944042e-06, + "loss": 0.00175911, + "memory(GiB)": 26.31, + "step": 4215, + "train_speed(iter/s)": 0.582981 + }, + { + "acc": 0.99987373, + "epoch": 4.475079533404029, + "grad_norm": 0.06770281493663788, + "learning_rate": 6.2681961292195105e-06, + "loss": 0.00088498, + "memory(GiB)": 26.31, + "step": 4220, + "train_speed(iter/s)": 0.582986 + }, + { + "acc": 0.99874878, + "epoch": 4.480381760339343, + "grad_norm": 0.5159828066825867, + "learning_rate": 6.25971400857705e-06, + "loss": 0.002787, + "memory(GiB)": 26.31, + "step": 4225, + "train_speed(iter/s)": 0.582986 + }, + { + "acc": 0.999405, + "epoch": 4.485683987274656, + "grad_norm": 0.49573227763175964, + "learning_rate": 6.251228016097524e-06, + "loss": 0.00399416, + "memory(GiB)": 26.31, + "step": 4230, + "train_speed(iter/s)": 0.582995 + }, + { + "acc": 0.99900513, + "epoch": 4.490986214209968, + "grad_norm": 0.26090532541275024, + "learning_rate": 6.242738177873702e-06, + "loss": 0.00458506, + "memory(GiB)": 26.31, + "step": 4235, + "train_speed(iter/s)": 0.582992 + }, + { + "acc": 0.99961739, + "epoch": 4.496288441145281, + "grad_norm": 0.11822298914194107, + "learning_rate": 6.2342445200101755e-06, + "loss": 0.00199409, + "memory(GiB)": 26.31, + "step": 4240, + "train_speed(iter/s)": 0.582993 + }, + { + "acc": 0.99923649, + "epoch": 4.501590668080594, + "grad_norm": 0.3978080749511719, + "learning_rate": 6.2257470686232846e-06, + "loss": 0.0028736, + "memory(GiB)": 26.31, + "step": 4245, + "train_speed(iter/s)": 0.582992 + }, + { + "acc": 0.99936199, + "epoch": 4.506892895015906, + "grad_norm": 0.2833632528781891, + "learning_rate": 6.2172458498410336e-06, + "loss": 0.0029819, + "memory(GiB)": 26.31, + "step": 4250, + "train_speed(iter/s)": 0.582992 + }, + { + "acc": 0.99923077, + "epoch": 4.512195121951219, + "grad_norm": 0.23151080310344696, + "learning_rate": 6.2087408898030075e-06, + "loss": 0.00369481, + "memory(GiB)": 26.31, + "step": 4255, + "train_speed(iter/s)": 0.582993 + }, + { + "acc": 0.99963894, + "epoch": 4.517497348886533, + "grad_norm": 0.5830540060997009, + "learning_rate": 6.200232214660299e-06, + "loss": 0.00243596, + "memory(GiB)": 26.31, + "step": 4260, + "train_speed(iter/s)": 0.582998 + }, + { + "acc": 0.99961758, + "epoch": 4.522799575821845, + "grad_norm": 0.3385343551635742, + "learning_rate": 6.191719850575419e-06, + "loss": 0.0011743, + "memory(GiB)": 26.31, + "step": 4265, + "train_speed(iter/s)": 0.582996 + }, + { + "acc": 0.99963942, + "epoch": 4.528101802757158, + "grad_norm": 0.44956743717193604, + "learning_rate": 6.183203823722227e-06, + "loss": 0.00175311, + "memory(GiB)": 26.31, + "step": 4270, + "train_speed(iter/s)": 0.583003 + }, + { + "acc": 0.99880333, + "epoch": 4.533404029692471, + "grad_norm": 0.4423983693122864, + "learning_rate": 6.174684160285838e-06, + "loss": 0.00511739, + "memory(GiB)": 26.31, + "step": 4275, + "train_speed(iter/s)": 0.583007 + }, + { + "acc": 0.99928932, + "epoch": 4.538706256627783, + "grad_norm": 0.3532782196998596, + "learning_rate": 6.166160886462556e-06, + "loss": 0.00230165, + "memory(GiB)": 26.31, + "step": 4280, + "train_speed(iter/s)": 0.583012 + }, + { + "acc": 0.99882469, + "epoch": 4.544008483563097, + "grad_norm": 0.5737372040748596, + "learning_rate": 6.157634028459782e-06, + "loss": 0.00340655, + "memory(GiB)": 26.31, + "step": 4285, + "train_speed(iter/s)": 0.583014 + }, + { + "acc": 0.99885597, + "epoch": 4.54931071049841, + "grad_norm": 1.0472294092178345, + "learning_rate": 6.149103612495937e-06, + "loss": 0.00416699, + "memory(GiB)": 26.31, + "step": 4290, + "train_speed(iter/s)": 0.583027 + }, + { + "acc": 0.99931746, + "epoch": 4.554612937433722, + "grad_norm": 0.3487650454044342, + "learning_rate": 6.1405696648003845e-06, + "loss": 0.00149525, + "memory(GiB)": 26.31, + "step": 4295, + "train_speed(iter/s)": 0.583027 + }, + { + "acc": 0.99887199, + "epoch": 4.559915164369035, + "grad_norm": 0.7196864485740662, + "learning_rate": 6.132032211613346e-06, + "loss": 0.00416826, + "memory(GiB)": 26.31, + "step": 4300, + "train_speed(iter/s)": 0.583027 + }, + { + "acc": 0.99932594, + "epoch": 4.565217391304348, + "grad_norm": 0.547074019908905, + "learning_rate": 6.123491279185825e-06, + "loss": 0.00252956, + "memory(GiB)": 26.31, + "step": 4305, + "train_speed(iter/s)": 0.583032 + }, + { + "acc": 0.99934731, + "epoch": 4.57051961823966, + "grad_norm": 0.8476079702377319, + "learning_rate": 6.1149468937795145e-06, + "loss": 0.00192753, + "memory(GiB)": 26.31, + "step": 4310, + "train_speed(iter/s)": 0.583031 + }, + { + "acc": 0.99914036, + "epoch": 4.575821845174973, + "grad_norm": 0.15940773487091064, + "learning_rate": 6.106399081666734e-06, + "loss": 0.00218152, + "memory(GiB)": 26.31, + "step": 4315, + "train_speed(iter/s)": 0.583029 + }, + { + "acc": 0.99965105, + "epoch": 4.581124072110287, + "grad_norm": 0.2619980275630951, + "learning_rate": 6.0978478691303365e-06, + "loss": 0.00189752, + "memory(GiB)": 26.31, + "step": 4320, + "train_speed(iter/s)": 0.583034 + }, + { + "acc": 0.99938726, + "epoch": 4.586426299045599, + "grad_norm": 0.09260746836662292, + "learning_rate": 6.089293282463629e-06, + "loss": 0.00186144, + "memory(GiB)": 26.31, + "step": 4325, + "train_speed(iter/s)": 0.583033 + }, + { + "acc": 0.99938898, + "epoch": 4.591728525980912, + "grad_norm": 0.5282265543937683, + "learning_rate": 6.080735347970294e-06, + "loss": 0.00238004, + "memory(GiB)": 26.31, + "step": 4330, + "train_speed(iter/s)": 0.583033 + }, + { + "acc": 0.99915371, + "epoch": 4.597030752916225, + "grad_norm": 0.27696260809898376, + "learning_rate": 6.0721740919643066e-06, + "loss": 0.00277539, + "memory(GiB)": 26.31, + "step": 4335, + "train_speed(iter/s)": 0.583041 + }, + { + "acc": 0.99952774, + "epoch": 4.602332979851537, + "grad_norm": 0.2598687410354614, + "learning_rate": 6.063609540769858e-06, + "loss": 0.00203173, + "memory(GiB)": 26.31, + "step": 4340, + "train_speed(iter/s)": 0.583044 + }, + { + "acc": 0.99864531, + "epoch": 4.607635206786851, + "grad_norm": 0.7812467813491821, + "learning_rate": 6.055041720721268e-06, + "loss": 0.00370566, + "memory(GiB)": 26.31, + "step": 4345, + "train_speed(iter/s)": 0.583048 + }, + { + "acc": 0.99851122, + "epoch": 4.612937433722164, + "grad_norm": 0.7187480330467224, + "learning_rate": 6.046470658162914e-06, + "loss": 0.00516219, + "memory(GiB)": 26.31, + "step": 4350, + "train_speed(iter/s)": 0.583056 + }, + { + "acc": 0.99863443, + "epoch": 4.618239660657476, + "grad_norm": 0.3305424153804779, + "learning_rate": 6.037896379449135e-06, + "loss": 0.00443163, + "memory(GiB)": 26.31, + "step": 4355, + "train_speed(iter/s)": 0.583057 + }, + { + "acc": 0.99921246, + "epoch": 4.623541887592789, + "grad_norm": 0.3764010965824127, + "learning_rate": 6.029318910944164e-06, + "loss": 0.00273427, + "memory(GiB)": 26.31, + "step": 4360, + "train_speed(iter/s)": 0.583062 + }, + { + "acc": 0.99932175, + "epoch": 4.628844114528102, + "grad_norm": 0.29550793766975403, + "learning_rate": 6.02073827902204e-06, + "loss": 0.00184609, + "memory(GiB)": 26.31, + "step": 4365, + "train_speed(iter/s)": 0.583062 + }, + { + "acc": 0.99910212, + "epoch": 4.634146341463414, + "grad_norm": 0.5608096122741699, + "learning_rate": 6.012154510066532e-06, + "loss": 0.00197052, + "memory(GiB)": 26.31, + "step": 4370, + "train_speed(iter/s)": 0.58306 + }, + { + "acc": 0.9996316, + "epoch": 4.639448568398727, + "grad_norm": 0.13115724921226501, + "learning_rate": 6.003567630471049e-06, + "loss": 0.00155056, + "memory(GiB)": 26.31, + "step": 4375, + "train_speed(iter/s)": 0.583062 + }, + { + "acc": 0.9994236, + "epoch": 4.644750795334041, + "grad_norm": 0.6535782814025879, + "learning_rate": 5.994977666638571e-06, + "loss": 0.00236299, + "memory(GiB)": 26.31, + "step": 4380, + "train_speed(iter/s)": 0.583061 + }, + { + "acc": 0.99901667, + "epoch": 4.650053022269353, + "grad_norm": 0.6454782485961914, + "learning_rate": 5.986384644981558e-06, + "loss": 0.0031538, + "memory(GiB)": 26.31, + "step": 4385, + "train_speed(iter/s)": 0.583062 + }, + { + "acc": 0.99875145, + "epoch": 4.655355249204666, + "grad_norm": 0.8551376461982727, + "learning_rate": 5.977788591921871e-06, + "loss": 0.00375093, + "memory(GiB)": 26.31, + "step": 4390, + "train_speed(iter/s)": 0.583063 + }, + { + "acc": 0.99974909, + "epoch": 4.660657476139979, + "grad_norm": 0.2662411332130432, + "learning_rate": 5.969189533890697e-06, + "loss": 0.00120749, + "memory(GiB)": 26.31, + "step": 4395, + "train_speed(iter/s)": 0.583066 + }, + { + "acc": 0.99951324, + "epoch": 4.665959703075291, + "grad_norm": 0.23411191999912262, + "learning_rate": 5.960587497328457e-06, + "loss": 0.00206366, + "memory(GiB)": 26.31, + "step": 4400, + "train_speed(iter/s)": 0.583075 + }, + { + "acc": 0.99952765, + "epoch": 4.671261930010605, + "grad_norm": 0.2640402019023895, + "learning_rate": 5.951982508684733e-06, + "loss": 0.00111532, + "memory(GiB)": 26.31, + "step": 4405, + "train_speed(iter/s)": 0.583075 + }, + { + "acc": 0.99938078, + "epoch": 4.6765641569459175, + "grad_norm": 0.5517047047615051, + "learning_rate": 5.943374594418185e-06, + "loss": 0.00198936, + "memory(GiB)": 26.31, + "step": 4410, + "train_speed(iter/s)": 0.583076 + }, + { + "acc": 0.99863434, + "epoch": 4.68186638388123, + "grad_norm": 0.8088098764419556, + "learning_rate": 5.934763780996467e-06, + "loss": 0.00394955, + "memory(GiB)": 26.31, + "step": 4415, + "train_speed(iter/s)": 0.583074 + }, + { + "acc": 0.99955444, + "epoch": 4.687168610816543, + "grad_norm": 0.11458373069763184, + "learning_rate": 5.9261500948961496e-06, + "loss": 0.00121496, + "memory(GiB)": 26.31, + "step": 4420, + "train_speed(iter/s)": 0.583075 + }, + { + "acc": 0.99962997, + "epoch": 4.692470837751856, + "grad_norm": 0.37143048644065857, + "learning_rate": 5.917533562602632e-06, + "loss": 0.00115484, + "memory(GiB)": 26.31, + "step": 4425, + "train_speed(iter/s)": 0.583082 + }, + { + "acc": 0.99890938, + "epoch": 4.697773064687168, + "grad_norm": 0.4180762767791748, + "learning_rate": 5.90891421061007e-06, + "loss": 0.00400325, + "memory(GiB)": 26.31, + "step": 4430, + "train_speed(iter/s)": 0.583081 + }, + { + "acc": 0.9990202, + "epoch": 4.703075291622481, + "grad_norm": 0.45803967118263245, + "learning_rate": 5.900292065421285e-06, + "loss": 0.00252781, + "memory(GiB)": 26.31, + "step": 4435, + "train_speed(iter/s)": 0.583081 + }, + { + "acc": 0.9988286, + "epoch": 4.7083775185577945, + "grad_norm": 0.3728193938732147, + "learning_rate": 5.8916671535476886e-06, + "loss": 0.00431239, + "memory(GiB)": 26.31, + "step": 4440, + "train_speed(iter/s)": 0.583085 + }, + { + "acc": 0.99901409, + "epoch": 4.713679745493107, + "grad_norm": 0.6913246512413025, + "learning_rate": 5.8830395015092035e-06, + "loss": 0.00349989, + "memory(GiB)": 26.31, + "step": 4445, + "train_speed(iter/s)": 0.583085 + }, + { + "acc": 0.99923077, + "epoch": 4.71898197242842, + "grad_norm": 0.8583769202232361, + "learning_rate": 5.8744091358341706e-06, + "loss": 0.00261297, + "memory(GiB)": 26.31, + "step": 4450, + "train_speed(iter/s)": 0.583084 + }, + { + "acc": 0.9992691, + "epoch": 4.724284199363733, + "grad_norm": 0.39945945143699646, + "learning_rate": 5.865776083059279e-06, + "loss": 0.00270182, + "memory(GiB)": 26.31, + "step": 4455, + "train_speed(iter/s)": 0.583084 + }, + { + "acc": 0.99868488, + "epoch": 4.729586426299045, + "grad_norm": 0.7474715709686279, + "learning_rate": 5.8571403697294805e-06, + "loss": 0.00387175, + "memory(GiB)": 26.31, + "step": 4460, + "train_speed(iter/s)": 0.583083 + }, + { + "acc": 0.99916058, + "epoch": 4.734888653234359, + "grad_norm": 0.2578558027744293, + "learning_rate": 5.848502022397904e-06, + "loss": 0.00217835, + "memory(GiB)": 26.31, + "step": 4465, + "train_speed(iter/s)": 0.583082 + }, + { + "acc": 0.9994772, + "epoch": 4.7401908801696715, + "grad_norm": 0.14401961863040924, + "learning_rate": 5.839861067625784e-06, + "loss": 0.00253854, + "memory(GiB)": 26.31, + "step": 4470, + "train_speed(iter/s)": 0.583081 + }, + { + "acc": 0.99930344, + "epoch": 4.745493107104984, + "grad_norm": 0.2320011854171753, + "learning_rate": 5.831217531982364e-06, + "loss": 0.00262179, + "memory(GiB)": 26.31, + "step": 4475, + "train_speed(iter/s)": 0.583085 + }, + { + "acc": 0.99925175, + "epoch": 4.750795334040297, + "grad_norm": 0.36902445554733276, + "learning_rate": 5.822571442044829e-06, + "loss": 0.00252635, + "memory(GiB)": 26.31, + "step": 4480, + "train_speed(iter/s)": 0.583085 + }, + { + "acc": 0.99939899, + "epoch": 4.7560975609756095, + "grad_norm": 0.5605974197387695, + "learning_rate": 5.813922824398218e-06, + "loss": 0.00225574, + "memory(GiB)": 26.31, + "step": 4485, + "train_speed(iter/s)": 0.583089 + }, + { + "acc": 0.99903069, + "epoch": 4.761399787910922, + "grad_norm": 0.5396479964256287, + "learning_rate": 5.805271705635339e-06, + "loss": 0.00240382, + "memory(GiB)": 26.31, + "step": 4490, + "train_speed(iter/s)": 0.583087 + }, + { + "acc": 0.99966717, + "epoch": 4.766702014846236, + "grad_norm": 0.5953534245491028, + "learning_rate": 5.796618112356691e-06, + "loss": 0.00141264, + "memory(GiB)": 26.31, + "step": 4495, + "train_speed(iter/s)": 0.583086 + }, + { + "acc": 0.99952297, + "epoch": 4.7720042417815485, + "grad_norm": 0.21910008788108826, + "learning_rate": 5.787962071170385e-06, + "loss": 0.00160427, + "memory(GiB)": 26.31, + "step": 4500, + "train_speed(iter/s)": 0.583087 + }, + { + "acc": 0.99864674, + "epoch": 4.777306468716861, + "grad_norm": 0.26406243443489075, + "learning_rate": 5.779303608692054e-06, + "loss": 0.00467952, + "memory(GiB)": 26.31, + "step": 4505, + "train_speed(iter/s)": 0.583088 + }, + { + "acc": 0.99888878, + "epoch": 4.782608695652174, + "grad_norm": 0.19614329934120178, + "learning_rate": 5.7706427515447794e-06, + "loss": 0.00390195, + "memory(GiB)": 26.31, + "step": 4510, + "train_speed(iter/s)": 0.583088 + }, + { + "acc": 0.9993845, + "epoch": 4.7879109225874865, + "grad_norm": 0.1621880978345871, + "learning_rate": 5.761979526359009e-06, + "loss": 0.00212824, + "memory(GiB)": 26.31, + "step": 4515, + "train_speed(iter/s)": 0.583091 + }, + { + "acc": 0.99922352, + "epoch": 4.793213149522799, + "grad_norm": 0.09368137270212173, + "learning_rate": 5.753313959772461e-06, + "loss": 0.00180674, + "memory(GiB)": 26.31, + "step": 4520, + "train_speed(iter/s)": 0.58309 + }, + { + "acc": 0.99976778, + "epoch": 4.798515376458113, + "grad_norm": 0.06735534965991974, + "learning_rate": 5.744646078430065e-06, + "loss": 0.00071397, + "memory(GiB)": 26.31, + "step": 4525, + "train_speed(iter/s)": 0.58309 + }, + { + "acc": 0.99915466, + "epoch": 4.8038176033934255, + "grad_norm": 0.1537499576807022, + "learning_rate": 5.735975908983859e-06, + "loss": 0.00218606, + "memory(GiB)": 26.31, + "step": 4530, + "train_speed(iter/s)": 0.583094 + }, + { + "acc": 0.99936771, + "epoch": 4.809119830328738, + "grad_norm": 0.4072479009628296, + "learning_rate": 5.727303478092922e-06, + "loss": 0.00248822, + "memory(GiB)": 26.31, + "step": 4535, + "train_speed(iter/s)": 0.583093 + }, + { + "acc": 1.0, + "epoch": 4.814422057264051, + "grad_norm": 0.10145048052072525, + "learning_rate": 5.718628812423285e-06, + "loss": 0.00044452, + "memory(GiB)": 26.31, + "step": 4540, + "train_speed(iter/s)": 0.583099 + }, + { + "acc": 0.99870358, + "epoch": 4.8197242841993635, + "grad_norm": 0.4591797888278961, + "learning_rate": 5.70995193864785e-06, + "loss": 0.00218927, + "memory(GiB)": 26.31, + "step": 4545, + "train_speed(iter/s)": 0.583106 + }, + { + "acc": 0.99918861, + "epoch": 4.825026511134676, + "grad_norm": 0.4732370972633362, + "learning_rate": 5.701272883446308e-06, + "loss": 0.00389646, + "memory(GiB)": 26.31, + "step": 4550, + "train_speed(iter/s)": 0.58311 + }, + { + "acc": 0.99949169, + "epoch": 4.83032873806999, + "grad_norm": 0.30895915627479553, + "learning_rate": 5.692591673505058e-06, + "loss": 0.00123058, + "memory(GiB)": 26.31, + "step": 4555, + "train_speed(iter/s)": 0.583113 + }, + { + "acc": 0.99925365, + "epoch": 4.835630965005302, + "grad_norm": 0.2049397975206375, + "learning_rate": 5.683908335517124e-06, + "loss": 0.00311317, + "memory(GiB)": 26.31, + "step": 4560, + "train_speed(iter/s)": 0.583116 + }, + { + "acc": 0.99915085, + "epoch": 4.840933191940615, + "grad_norm": 0.27029553055763245, + "learning_rate": 5.675222896182074e-06, + "loss": 0.00321835, + "memory(GiB)": 26.31, + "step": 4565, + "train_speed(iter/s)": 0.583115 + }, + { + "acc": 0.99948587, + "epoch": 4.846235418875928, + "grad_norm": 0.4786251187324524, + "learning_rate": 5.666535382205941e-06, + "loss": 0.00240655, + "memory(GiB)": 26.31, + "step": 4570, + "train_speed(iter/s)": 0.583115 + }, + { + "acc": 0.99940729, + "epoch": 4.8515376458112405, + "grad_norm": 0.35688337683677673, + "learning_rate": 5.657845820301128e-06, + "loss": 0.00213064, + "memory(GiB)": 26.31, + "step": 4575, + "train_speed(iter/s)": 0.583112 + }, + { + "acc": 0.99936666, + "epoch": 4.856839872746553, + "grad_norm": 0.12073680013418198, + "learning_rate": 5.649154237186342e-06, + "loss": 0.00155683, + "memory(GiB)": 26.31, + "step": 4580, + "train_speed(iter/s)": 0.583112 + }, + { + "acc": 0.99914742, + "epoch": 4.862142099681867, + "grad_norm": 0.41659778356552124, + "learning_rate": 5.640460659586504e-06, + "loss": 0.00253949, + "memory(GiB)": 26.31, + "step": 4585, + "train_speed(iter/s)": 0.583115 + }, + { + "acc": 0.99952326, + "epoch": 4.867444326617179, + "grad_norm": 0.6744159460067749, + "learning_rate": 5.631765114232667e-06, + "loss": 0.0022239, + "memory(GiB)": 26.31, + "step": 4590, + "train_speed(iter/s)": 0.583114 + }, + { + "acc": 0.99954004, + "epoch": 4.872746553552492, + "grad_norm": 0.874528706073761, + "learning_rate": 5.623067627861931e-06, + "loss": 0.00239744, + "memory(GiB)": 26.31, + "step": 4595, + "train_speed(iter/s)": 0.583113 + }, + { + "acc": 0.99966507, + "epoch": 4.878048780487805, + "grad_norm": 0.06950568407773972, + "learning_rate": 5.6143682272173716e-06, + "loss": 0.00160969, + "memory(GiB)": 26.31, + "step": 4600, + "train_speed(iter/s)": 0.583113 + }, + { + "acc": 0.99939823, + "epoch": 4.8833510074231175, + "grad_norm": 0.6629308462142944, + "learning_rate": 5.605666939047942e-06, + "loss": 0.00220318, + "memory(GiB)": 26.31, + "step": 4605, + "train_speed(iter/s)": 0.583116 + }, + { + "acc": 0.99966755, + "epoch": 4.888653234358431, + "grad_norm": 0.7309154868125916, + "learning_rate": 5.596963790108406e-06, + "loss": 0.00109128, + "memory(GiB)": 26.31, + "step": 4610, + "train_speed(iter/s)": 0.583116 + }, + { + "acc": 0.99976997, + "epoch": 4.893955461293744, + "grad_norm": 0.412337064743042, + "learning_rate": 5.588258807159247e-06, + "loss": 0.00075653, + "memory(GiB)": 26.31, + "step": 4615, + "train_speed(iter/s)": 0.583115 + }, + { + "acc": 0.99912081, + "epoch": 4.899257688229056, + "grad_norm": 0.44406965374946594, + "learning_rate": 5.579552016966583e-06, + "loss": 0.00252374, + "memory(GiB)": 26.31, + "step": 4620, + "train_speed(iter/s)": 0.58312 + }, + { + "acc": 0.99974184, + "epoch": 4.904559915164369, + "grad_norm": 0.325088769197464, + "learning_rate": 5.570843446302096e-06, + "loss": 0.00167765, + "memory(GiB)": 26.31, + "step": 4625, + "train_speed(iter/s)": 0.583122 + }, + { + "acc": 0.99926529, + "epoch": 4.909862142099682, + "grad_norm": 0.6848169565200806, + "learning_rate": 5.562133121942941e-06, + "loss": 0.00173596, + "memory(GiB)": 26.31, + "step": 4630, + "train_speed(iter/s)": 0.583125 + }, + { + "acc": 0.99988041, + "epoch": 4.915164369034994, + "grad_norm": 0.08015554398298264, + "learning_rate": 5.5534210706716595e-06, + "loss": 0.00062716, + "memory(GiB)": 26.31, + "step": 4635, + "train_speed(iter/s)": 0.583128 + }, + { + "acc": 0.99927998, + "epoch": 4.920466595970307, + "grad_norm": 0.2661329209804535, + "learning_rate": 5.5447073192761095e-06, + "loss": 0.00297623, + "memory(GiB)": 26.31, + "step": 4640, + "train_speed(iter/s)": 0.583127 + }, + { + "acc": 0.99924908, + "epoch": 4.925768822905621, + "grad_norm": 1.0690864324569702, + "learning_rate": 5.5359918945493725e-06, + "loss": 0.00258645, + "memory(GiB)": 26.31, + "step": 4645, + "train_speed(iter/s)": 0.583127 + }, + { + "acc": 0.99939957, + "epoch": 4.931071049840933, + "grad_norm": 0.19076597690582275, + "learning_rate": 5.52727482328968e-06, + "loss": 0.00235107, + "memory(GiB)": 26.31, + "step": 4650, + "train_speed(iter/s)": 0.583127 + }, + { + "acc": 0.9994956, + "epoch": 4.936373276776246, + "grad_norm": 0.4487408995628357, + "learning_rate": 5.518556132300321e-06, + "loss": 0.00155169, + "memory(GiB)": 26.31, + "step": 4655, + "train_speed(iter/s)": 0.583128 + }, + { + "acc": 0.99989033, + "epoch": 4.941675503711559, + "grad_norm": 0.20197904109954834, + "learning_rate": 5.509835848389566e-06, + "loss": 0.00048707, + "memory(GiB)": 26.31, + "step": 4660, + "train_speed(iter/s)": 0.583128 + }, + { + "acc": 0.99916096, + "epoch": 4.946977730646871, + "grad_norm": 0.18906237185001373, + "learning_rate": 5.501113998370588e-06, + "loss": 0.00304128, + "memory(GiB)": 26.31, + "step": 4665, + "train_speed(iter/s)": 0.583131 + }, + { + "acc": 0.99973669, + "epoch": 4.952279957582185, + "grad_norm": 0.11769578605890274, + "learning_rate": 5.492390609061365e-06, + "loss": 0.0011104, + "memory(GiB)": 26.31, + "step": 4670, + "train_speed(iter/s)": 0.583129 + }, + { + "acc": 0.99927292, + "epoch": 4.957582184517498, + "grad_norm": 0.06326154619455338, + "learning_rate": 5.48366570728462e-06, + "loss": 0.0023735, + "memory(GiB)": 26.31, + "step": 4675, + "train_speed(iter/s)": 0.583129 + }, + { + "acc": 0.99961071, + "epoch": 4.96288441145281, + "grad_norm": 0.15000776946544647, + "learning_rate": 5.4749393198677225e-06, + "loss": 0.00089669, + "memory(GiB)": 26.31, + "step": 4680, + "train_speed(iter/s)": 0.583127 + }, + { + "acc": 0.99912415, + "epoch": 4.968186638388123, + "grad_norm": 0.5361363887786865, + "learning_rate": 5.466211473642606e-06, + "loss": 0.00306102, + "memory(GiB)": 26.31, + "step": 4685, + "train_speed(iter/s)": 0.583135 + }, + { + "acc": 0.99913349, + "epoch": 4.973488865323436, + "grad_norm": 0.9397711753845215, + "learning_rate": 5.457482195445693e-06, + "loss": 0.00252779, + "memory(GiB)": 26.31, + "step": 4690, + "train_speed(iter/s)": 0.583135 + }, + { + "acc": 0.99924164, + "epoch": 4.978791092258748, + "grad_norm": 0.8357404470443726, + "learning_rate": 5.44875151211781e-06, + "loss": 0.00215698, + "memory(GiB)": 26.31, + "step": 4695, + "train_speed(iter/s)": 0.583141 + }, + { + "acc": 0.99975786, + "epoch": 4.984093319194061, + "grad_norm": 0.2508851885795593, + "learning_rate": 5.440019450504101e-06, + "loss": 0.00104784, + "memory(GiB)": 26.31, + "step": 4700, + "train_speed(iter/s)": 0.583141 + }, + { + "acc": 0.99964581, + "epoch": 4.989395546129375, + "grad_norm": 0.41223564743995667, + "learning_rate": 5.431286037453949e-06, + "loss": 0.00124911, + "memory(GiB)": 26.31, + "step": 4705, + "train_speed(iter/s)": 0.583141 + }, + { + "acc": 0.99922943, + "epoch": 4.994697773064687, + "grad_norm": 0.41894757747650146, + "learning_rate": 5.422551299820895e-06, + "loss": 0.00315995, + "memory(GiB)": 26.31, + "step": 4710, + "train_speed(iter/s)": 0.583149 + }, + { + "acc": 0.99934368, + "epoch": 5.0, + "grad_norm": 0.036096345633268356, + "learning_rate": 5.4138152644625495e-06, + "loss": 0.0019122, + "memory(GiB)": 26.31, + "step": 4715, + "train_speed(iter/s)": 0.583142 + }, + { + "acc": 0.99912224, + "epoch": 5.005302226935313, + "grad_norm": 0.32283639907836914, + "learning_rate": 5.405077958240514e-06, + "loss": 0.00247847, + "memory(GiB)": 26.31, + "step": 4720, + "train_speed(iter/s)": 0.58308 + }, + { + "acc": 0.99974022, + "epoch": 5.010604453870625, + "grad_norm": 0.3302988111972809, + "learning_rate": 5.3963394080203e-06, + "loss": 0.00123621, + "memory(GiB)": 26.31, + "step": 4725, + "train_speed(iter/s)": 0.583087 + }, + { + "acc": 0.99912148, + "epoch": 5.015906680805939, + "grad_norm": 0.8059219717979431, + "learning_rate": 5.387599640671238e-06, + "loss": 0.00170408, + "memory(GiB)": 26.31, + "step": 4730, + "train_speed(iter/s)": 0.583087 + }, + { + "acc": 0.99928551, + "epoch": 5.021208907741252, + "grad_norm": 0.8436117768287659, + "learning_rate": 5.37885868306641e-06, + "loss": 0.00291735, + "memory(GiB)": 26.31, + "step": 4735, + "train_speed(iter/s)": 0.583086 + }, + { + "acc": 0.99951, + "epoch": 5.026511134676564, + "grad_norm": 0.06486281007528305, + "learning_rate": 5.370116562082551e-06, + "loss": 0.00148729, + "memory(GiB)": 26.31, + "step": 4740, + "train_speed(iter/s)": 0.583089 + }, + { + "acc": 0.99920187, + "epoch": 5.031813361611877, + "grad_norm": 0.5544983148574829, + "learning_rate": 5.361373304599975e-06, + "loss": 0.00254798, + "memory(GiB)": 26.31, + "step": 4745, + "train_speed(iter/s)": 0.583089 + }, + { + "acc": 0.99915466, + "epoch": 5.03711558854719, + "grad_norm": 0.16032570600509644, + "learning_rate": 5.352628937502491e-06, + "loss": 0.00236086, + "memory(GiB)": 26.31, + "step": 4750, + "train_speed(iter/s)": 0.583093 + }, + { + "acc": 0.9997427, + "epoch": 5.042417815482502, + "grad_norm": 0.13562826812267303, + "learning_rate": 5.343883487677319e-06, + "loss": 0.00140394, + "memory(GiB)": 26.31, + "step": 4755, + "train_speed(iter/s)": 0.583093 + }, + { + "acc": 0.99940615, + "epoch": 5.047720042417816, + "grad_norm": 0.3697991967201233, + "learning_rate": 5.335136982015008e-06, + "loss": 0.00208272, + "memory(GiB)": 26.31, + "step": 4760, + "train_speed(iter/s)": 0.583092 + }, + { + "acc": 0.99912567, + "epoch": 5.053022269353129, + "grad_norm": 0.7061968445777893, + "learning_rate": 5.326389447409356e-06, + "loss": 0.00275432, + "memory(GiB)": 26.31, + "step": 4765, + "train_speed(iter/s)": 0.583092 + }, + { + "acc": 0.99899225, + "epoch": 5.058324496288441, + "grad_norm": 0.3614329993724823, + "learning_rate": 5.31764091075732e-06, + "loss": 0.0032732, + "memory(GiB)": 26.31, + "step": 4770, + "train_speed(iter/s)": 0.583094 + }, + { + "acc": 0.99961834, + "epoch": 5.063626723223754, + "grad_norm": 0.3613954186439514, + "learning_rate": 5.308891398958944e-06, + "loss": 0.00152841, + "memory(GiB)": 26.31, + "step": 4775, + "train_speed(iter/s)": 0.583098 + }, + { + "acc": 0.99952335, + "epoch": 5.068928950159067, + "grad_norm": 0.5642781853675842, + "learning_rate": 5.300140938917265e-06, + "loss": 0.00217171, + "memory(GiB)": 26.31, + "step": 4780, + "train_speed(iter/s)": 0.583097 + }, + { + "acc": 0.99950047, + "epoch": 5.074231177094379, + "grad_norm": 0.09304577857255936, + "learning_rate": 5.29138955753824e-06, + "loss": 0.00100198, + "memory(GiB)": 26.31, + "step": 4785, + "train_speed(iter/s)": 0.583098 + }, + { + "acc": 0.99951153, + "epoch": 5.079533404029693, + "grad_norm": 0.5502454042434692, + "learning_rate": 5.282637281730657e-06, + "loss": 0.00078877, + "memory(GiB)": 26.31, + "step": 4790, + "train_speed(iter/s)": 0.583097 + }, + { + "acc": 0.99952335, + "epoch": 5.084835630965006, + "grad_norm": 0.08497211337089539, + "learning_rate": 5.273884138406053e-06, + "loss": 0.00157482, + "memory(GiB)": 26.31, + "step": 4795, + "train_speed(iter/s)": 0.583101 + }, + { + "acc": 0.99952164, + "epoch": 5.090137857900318, + "grad_norm": 0.1908387541770935, + "learning_rate": 5.265130154478633e-06, + "loss": 0.00112022, + "memory(GiB)": 26.31, + "step": 4800, + "train_speed(iter/s)": 0.583104 + }, + { + "acc": 0.9996439, + "epoch": 5.095440084835631, + "grad_norm": 0.07793429493904114, + "learning_rate": 5.25637535686519e-06, + "loss": 0.00093803, + "memory(GiB)": 26.31, + "step": 4805, + "train_speed(iter/s)": 0.583103 + }, + { + "acc": 0.99963684, + "epoch": 5.100742311770944, + "grad_norm": 0.31934216618537903, + "learning_rate": 5.247619772485013e-06, + "loss": 0.00145084, + "memory(GiB)": 26.31, + "step": 4810, + "train_speed(iter/s)": 0.583103 + }, + { + "acc": 0.99963875, + "epoch": 5.106044538706256, + "grad_norm": 0.26898130774497986, + "learning_rate": 5.238863428259817e-06, + "loss": 0.00137375, + "memory(GiB)": 26.31, + "step": 4815, + "train_speed(iter/s)": 0.583109 + }, + { + "acc": 0.99987183, + "epoch": 5.11134676564157, + "grad_norm": 0.05188106745481491, + "learning_rate": 5.230106351113646e-06, + "loss": 0.00048276, + "memory(GiB)": 26.31, + "step": 4820, + "train_speed(iter/s)": 0.583108 + }, + { + "acc": 0.99940996, + "epoch": 5.1166489925768825, + "grad_norm": 0.23561055958271027, + "learning_rate": 5.221348567972804e-06, + "loss": 0.00167051, + "memory(GiB)": 26.31, + "step": 4825, + "train_speed(iter/s)": 0.58311 + }, + { + "acc": 0.99960842, + "epoch": 5.121951219512195, + "grad_norm": 0.11288487911224365, + "learning_rate": 5.212590105765762e-06, + "loss": 0.00139646, + "memory(GiB)": 26.31, + "step": 4830, + "train_speed(iter/s)": 0.583114 + }, + { + "acc": 0.99960985, + "epoch": 5.127253446447508, + "grad_norm": 0.46378016471862793, + "learning_rate": 5.203830991423079e-06, + "loss": 0.00243313, + "memory(GiB)": 26.31, + "step": 4835, + "train_speed(iter/s)": 0.583114 + }, + { + "acc": 0.99950123, + "epoch": 5.132555673382821, + "grad_norm": 0.47227156162261963, + "learning_rate": 5.195071251877325e-06, + "loss": 0.00209684, + "memory(GiB)": 26.31, + "step": 4840, + "train_speed(iter/s)": 0.583117 + }, + { + "acc": 0.99961729, + "epoch": 5.137857900318133, + "grad_norm": 0.3798484802246094, + "learning_rate": 5.186310914062983e-06, + "loss": 0.00167997, + "memory(GiB)": 26.31, + "step": 4845, + "train_speed(iter/s)": 0.583117 + }, + { + "acc": 0.99963169, + "epoch": 5.143160127253447, + "grad_norm": 0.23786208033561707, + "learning_rate": 5.177550004916381e-06, + "loss": 0.00179124, + "memory(GiB)": 26.31, + "step": 4850, + "train_speed(iter/s)": 0.583127 + }, + { + "acc": 0.99936047, + "epoch": 5.1484623541887595, + "grad_norm": 0.7719597220420837, + "learning_rate": 5.168788551375607e-06, + "loss": 0.00196373, + "memory(GiB)": 26.31, + "step": 4855, + "train_speed(iter/s)": 0.583126 + }, + { + "acc": 0.99967613, + "epoch": 5.153764581124072, + "grad_norm": 0.25141432881355286, + "learning_rate": 5.160026580380412e-06, + "loss": 0.00157652, + "memory(GiB)": 26.31, + "step": 4860, + "train_speed(iter/s)": 0.583125 + }, + { + "acc": 0.99951057, + "epoch": 5.159066808059385, + "grad_norm": 0.37007245421409607, + "learning_rate": 5.15126411887215e-06, + "loss": 0.00161061, + "memory(GiB)": 26.31, + "step": 4865, + "train_speed(iter/s)": 0.583127 + }, + { + "acc": 0.99910822, + "epoch": 5.164369034994698, + "grad_norm": 0.10854408144950867, + "learning_rate": 5.142501193793677e-06, + "loss": 0.00538175, + "memory(GiB)": 26.31, + "step": 4870, + "train_speed(iter/s)": 0.583126 + }, + { + "acc": 0.99895782, + "epoch": 5.16967126193001, + "grad_norm": 0.27228420972824097, + "learning_rate": 5.133737832089277e-06, + "loss": 0.00419342, + "memory(GiB)": 26.31, + "step": 4875, + "train_speed(iter/s)": 0.583134 + }, + { + "acc": 0.99924507, + "epoch": 5.174973488865324, + "grad_norm": 0.42566487193107605, + "learning_rate": 5.124974060704574e-06, + "loss": 0.00254047, + "memory(GiB)": 26.31, + "step": 4880, + "train_speed(iter/s)": 0.583133 + }, + { + "acc": 0.99926548, + "epoch": 5.1802757158006365, + "grad_norm": 0.09999915957450867, + "learning_rate": 5.116209906586451e-06, + "loss": 0.0023471, + "memory(GiB)": 26.31, + "step": 4885, + "train_speed(iter/s)": 0.583137 + }, + { + "acc": 0.99965687, + "epoch": 5.185577942735949, + "grad_norm": 0.17713366448879242, + "learning_rate": 5.107445396682971e-06, + "loss": 0.00108484, + "memory(GiB)": 26.31, + "step": 4890, + "train_speed(iter/s)": 0.583138 + }, + { + "acc": 0.9994688, + "epoch": 5.190880169671262, + "grad_norm": 0.3907831311225891, + "learning_rate": 5.098680557943291e-06, + "loss": 0.00225697, + "memory(GiB)": 26.31, + "step": 4895, + "train_speed(iter/s)": 0.583141 + }, + { + "acc": 0.99924278, + "epoch": 5.1961823966065745, + "grad_norm": 0.45619088411331177, + "learning_rate": 5.089915417317577e-06, + "loss": 0.0022859, + "memory(GiB)": 26.31, + "step": 4900, + "train_speed(iter/s)": 0.583133 + }, + { + "acc": 0.99950504, + "epoch": 5.201484623541887, + "grad_norm": 0.26541733741760254, + "learning_rate": 5.081150001756924e-06, + "loss": 0.0016038, + "memory(GiB)": 26.31, + "step": 4905, + "train_speed(iter/s)": 0.583112 + }, + { + "acc": 0.99899044, + "epoch": 5.206786850477201, + "grad_norm": 0.39049601554870605, + "learning_rate": 5.072384338213271e-06, + "loss": 0.00281408, + "memory(GiB)": 26.31, + "step": 4910, + "train_speed(iter/s)": 0.5831 + }, + { + "acc": 0.99951391, + "epoch": 5.2120890774125135, + "grad_norm": 0.9988411068916321, + "learning_rate": 5.063618453639322e-06, + "loss": 0.00213081, + "memory(GiB)": 26.31, + "step": 4915, + "train_speed(iter/s)": 0.583079 + }, + { + "acc": 0.99915142, + "epoch": 5.217391304347826, + "grad_norm": 0.15615594387054443, + "learning_rate": 5.054852374988459e-06, + "loss": 0.0027803, + "memory(GiB)": 26.31, + "step": 4920, + "train_speed(iter/s)": 0.583078 + }, + { + "acc": 0.99976625, + "epoch": 5.222693531283139, + "grad_norm": 0.1486440747976303, + "learning_rate": 5.046086129214663e-06, + "loss": 0.00100315, + "memory(GiB)": 26.31, + "step": 4925, + "train_speed(iter/s)": 0.583081 + }, + { + "acc": 0.99928789, + "epoch": 5.2279957582184515, + "grad_norm": 0.2671545743942261, + "learning_rate": 5.037319743272424e-06, + "loss": 0.0028695, + "memory(GiB)": 26.31, + "step": 4930, + "train_speed(iter/s)": 0.583088 + }, + { + "acc": 0.99950676, + "epoch": 5.233297985153764, + "grad_norm": 0.27838829159736633, + "learning_rate": 5.028553244116671e-06, + "loss": 0.00226881, + "memory(GiB)": 26.31, + "step": 4935, + "train_speed(iter/s)": 0.583092 + }, + { + "acc": 0.99974127, + "epoch": 5.238600212089078, + "grad_norm": 0.1396360993385315, + "learning_rate": 5.01978665870267e-06, + "loss": 0.00160907, + "memory(GiB)": 26.31, + "step": 4940, + "train_speed(iter/s)": 0.583091 + }, + { + "acc": 0.99960365, + "epoch": 5.2439024390243905, + "grad_norm": 0.18549151718616486, + "learning_rate": 5.011020013985961e-06, + "loss": 0.00230109, + "memory(GiB)": 26.31, + "step": 4945, + "train_speed(iter/s)": 0.583095 + }, + { + "acc": 0.99975605, + "epoch": 5.249204665959703, + "grad_norm": 0.3893892467021942, + "learning_rate": 5.002253336922267e-06, + "loss": 0.00109208, + "memory(GiB)": 26.31, + "step": 4950, + "train_speed(iter/s)": 0.583098 + }, + { + "acc": 0.99975195, + "epoch": 5.254506892895016, + "grad_norm": 0.3478403389453888, + "learning_rate": 4.993486654467404e-06, + "loss": 0.00157792, + "memory(GiB)": 26.31, + "step": 4955, + "train_speed(iter/s)": 0.583105 + }, + { + "acc": 0.99963999, + "epoch": 5.2598091198303285, + "grad_norm": 0.4681493639945984, + "learning_rate": 4.984719993577207e-06, + "loss": 0.00123787, + "memory(GiB)": 26.31, + "step": 4960, + "train_speed(iter/s)": 0.583105 + }, + { + "acc": 0.99987803, + "epoch": 5.265111346765641, + "grad_norm": 0.10217458754777908, + "learning_rate": 4.9759533812074465e-06, + "loss": 0.00166023, + "memory(GiB)": 26.31, + "step": 4965, + "train_speed(iter/s)": 0.58311 + }, + { + "acc": 0.99974689, + "epoch": 5.270413573700955, + "grad_norm": 0.04794376716017723, + "learning_rate": 4.967186844313744e-06, + "loss": 0.00107571, + "memory(GiB)": 26.31, + "step": 4970, + "train_speed(iter/s)": 0.583114 + }, + { + "acc": 0.99917164, + "epoch": 5.275715800636267, + "grad_norm": 1.2356112003326416, + "learning_rate": 4.958420409851488e-06, + "loss": 0.00387285, + "memory(GiB)": 26.31, + "step": 4975, + "train_speed(iter/s)": 0.583113 + }, + { + "acc": 0.99936285, + "epoch": 5.28101802757158, + "grad_norm": 0.6110440492630005, + "learning_rate": 4.94965410477575e-06, + "loss": 0.00184677, + "memory(GiB)": 26.31, + "step": 4980, + "train_speed(iter/s)": 0.583113 + }, + { + "acc": 0.9995368, + "epoch": 5.286320254506893, + "grad_norm": 0.2816583514213562, + "learning_rate": 4.940887956041206e-06, + "loss": 0.0014614, + "memory(GiB)": 26.31, + "step": 4985, + "train_speed(iter/s)": 0.583111 + }, + { + "acc": 1.0, + "epoch": 5.2916224814422055, + "grad_norm": 0.28331875801086426, + "learning_rate": 4.932121990602051e-06, + "loss": 0.0004526, + "memory(GiB)": 26.31, + "step": 4990, + "train_speed(iter/s)": 0.583115 + }, + { + "acc": 0.99961424, + "epoch": 5.296924708377518, + "grad_norm": 0.41179099678993225, + "learning_rate": 4.9233562354119146e-06, + "loss": 0.00149953, + "memory(GiB)": 26.31, + "step": 4995, + "train_speed(iter/s)": 0.583113 + }, + { + "acc": 0.99923592, + "epoch": 5.302226935312832, + "grad_norm": 1.1502220630645752, + "learning_rate": 4.914590717423784e-06, + "loss": 0.00241425, + "memory(GiB)": 26.31, + "step": 5000, + "train_speed(iter/s)": 0.583115 + }, + { + "acc": 0.99924545, + "epoch": 5.307529162248144, + "grad_norm": 0.2299642413854599, + "learning_rate": 4.905825463589912e-06, + "loss": 0.00222843, + "memory(GiB)": 26.31, + "step": 5005, + "train_speed(iter/s)": 0.583115 + }, + { + "acc": 0.99898901, + "epoch": 5.312831389183457, + "grad_norm": 0.696652889251709, + "learning_rate": 4.897060500861745e-06, + "loss": 0.00209142, + "memory(GiB)": 26.31, + "step": 5010, + "train_speed(iter/s)": 0.583115 + }, + { + "acc": 0.99973383, + "epoch": 5.31813361611877, + "grad_norm": 0.57330721616745, + "learning_rate": 4.888295856189828e-06, + "loss": 0.00127123, + "memory(GiB)": 26.31, + "step": 5015, + "train_speed(iter/s)": 0.583113 + }, + { + "acc": 0.99952068, + "epoch": 5.3234358430540825, + "grad_norm": 0.04265210032463074, + "learning_rate": 4.8795315565237325e-06, + "loss": 0.00123829, + "memory(GiB)": 26.31, + "step": 5020, + "train_speed(iter/s)": 0.583116 + }, + { + "acc": 0.99938927, + "epoch": 5.328738069989395, + "grad_norm": 0.05427992716431618, + "learning_rate": 4.870767628811968e-06, + "loss": 0.00237837, + "memory(GiB)": 26.31, + "step": 5025, + "train_speed(iter/s)": 0.583117 + }, + { + "acc": 0.99955378, + "epoch": 5.334040296924709, + "grad_norm": 0.4924948215484619, + "learning_rate": 4.862004100001898e-06, + "loss": 0.00116051, + "memory(GiB)": 26.31, + "step": 5030, + "train_speed(iter/s)": 0.583117 + }, + { + "acc": 0.99962788, + "epoch": 5.339342523860021, + "grad_norm": 0.5894216299057007, + "learning_rate": 4.853240997039663e-06, + "loss": 0.00151597, + "memory(GiB)": 26.31, + "step": 5035, + "train_speed(iter/s)": 0.58312 + }, + { + "acc": 0.99962692, + "epoch": 5.344644750795334, + "grad_norm": 0.6638916730880737, + "learning_rate": 4.8444783468700925e-06, + "loss": 0.00201382, + "memory(GiB)": 26.31, + "step": 5040, + "train_speed(iter/s)": 0.583123 + }, + { + "acc": 0.99963865, + "epoch": 5.349946977730647, + "grad_norm": 0.33775195479393005, + "learning_rate": 4.835716176436624e-06, + "loss": 0.00127131, + "memory(GiB)": 26.31, + "step": 5045, + "train_speed(iter/s)": 0.579829 + }, + { + "acc": 0.99938679, + "epoch": 5.355249204665959, + "grad_norm": 0.35410913825035095, + "learning_rate": 4.826954512681219e-06, + "loss": 0.00201825, + "memory(GiB)": 26.31, + "step": 5050, + "train_speed(iter/s)": 0.579835 + }, + { + "acc": 0.99975891, + "epoch": 5.360551431601272, + "grad_norm": 0.05381745472550392, + "learning_rate": 4.818193382544282e-06, + "loss": 0.00096042, + "memory(GiB)": 26.31, + "step": 5055, + "train_speed(iter/s)": 0.579845 + }, + { + "acc": 0.99938011, + "epoch": 5.365853658536586, + "grad_norm": 0.1406700760126114, + "learning_rate": 4.809432812964577e-06, + "loss": 0.00262718, + "memory(GiB)": 26.31, + "step": 5060, + "train_speed(iter/s)": 0.579849 + }, + { + "acc": 0.99934797, + "epoch": 5.371155885471898, + "grad_norm": 0.4387652575969696, + "learning_rate": 4.800672830879143e-06, + "loss": 0.00249025, + "memory(GiB)": 26.31, + "step": 5065, + "train_speed(iter/s)": 0.579852 + }, + { + "acc": 0.99944916, + "epoch": 5.376458112407211, + "grad_norm": 0.17490963637828827, + "learning_rate": 4.791913463223214e-06, + "loss": 0.00172883, + "memory(GiB)": 26.31, + "step": 5070, + "train_speed(iter/s)": 0.57986 + }, + { + "acc": 0.9992794, + "epoch": 5.381760339342524, + "grad_norm": 0.642207682132721, + "learning_rate": 4.7831547369301365e-06, + "loss": 0.00203475, + "memory(GiB)": 26.31, + "step": 5075, + "train_speed(iter/s)": 0.579867 + }, + { + "acc": 0.99916515, + "epoch": 5.387062566277836, + "grad_norm": 0.1799846887588501, + "learning_rate": 4.774396678931278e-06, + "loss": 0.00362162, + "memory(GiB)": 26.31, + "step": 5080, + "train_speed(iter/s)": 0.579871 + }, + { + "acc": 0.99951611, + "epoch": 5.392364793213149, + "grad_norm": 0.38178086280822754, + "learning_rate": 4.76563931615596e-06, + "loss": 0.00107371, + "memory(GiB)": 26.31, + "step": 5085, + "train_speed(iter/s)": 0.579875 + }, + { + "acc": 0.99987803, + "epoch": 5.397667020148463, + "grad_norm": 0.07540518790483475, + "learning_rate": 4.75688267553136e-06, + "loss": 0.00127289, + "memory(GiB)": 26.31, + "step": 5090, + "train_speed(iter/s)": 0.579882 + }, + { + "acc": 0.9998889, + "epoch": 5.402969247083775, + "grad_norm": 0.10213793814182281, + "learning_rate": 4.748126783982437e-06, + "loss": 0.00054123, + "memory(GiB)": 26.31, + "step": 5095, + "train_speed(iter/s)": 0.579889 + }, + { + "acc": 0.99975166, + "epoch": 5.408271474019088, + "grad_norm": 0.5178495049476624, + "learning_rate": 4.739371668431848e-06, + "loss": 0.00119375, + "memory(GiB)": 26.31, + "step": 5100, + "train_speed(iter/s)": 0.579894 + }, + { + "acc": 0.99971905, + "epoch": 5.413573700954401, + "grad_norm": 0.019593840464949608, + "learning_rate": 4.730617355799862e-06, + "loss": 0.00118645, + "memory(GiB)": 26.31, + "step": 5105, + "train_speed(iter/s)": 0.579899 + }, + { + "acc": 0.99951067, + "epoch": 5.418875927889713, + "grad_norm": 0.05931418016552925, + "learning_rate": 4.72186387300428e-06, + "loss": 0.00153462, + "memory(GiB)": 26.31, + "step": 5110, + "train_speed(iter/s)": 0.579902 + }, + { + "acc": 0.99974537, + "epoch": 5.424178154825027, + "grad_norm": 0.16675366461277008, + "learning_rate": 4.7131112469603526e-06, + "loss": 0.0011792, + "memory(GiB)": 26.31, + "step": 5115, + "train_speed(iter/s)": 0.579905 + }, + { + "acc": 0.9995225, + "epoch": 5.42948038176034, + "grad_norm": 0.6896766424179077, + "learning_rate": 4.704359504580694e-06, + "loss": 0.0019345, + "memory(GiB)": 26.31, + "step": 5120, + "train_speed(iter/s)": 0.579909 + }, + { + "acc": 1.0, + "epoch": 5.434782608695652, + "grad_norm": 0.04017867520451546, + "learning_rate": 4.695608672775202e-06, + "loss": 0.00042916, + "memory(GiB)": 26.31, + "step": 5125, + "train_speed(iter/s)": 0.57991 + }, + { + "acc": 0.99963627, + "epoch": 5.440084835630965, + "grad_norm": 0.3894314765930176, + "learning_rate": 4.686858778450975e-06, + "loss": 0.00213848, + "memory(GiB)": 26.31, + "step": 5130, + "train_speed(iter/s)": 0.579916 + }, + { + "acc": 0.9996685, + "epoch": 5.445387062566278, + "grad_norm": 0.4688229560852051, + "learning_rate": 4.678109848512228e-06, + "loss": 0.00064681, + "memory(GiB)": 26.31, + "step": 5135, + "train_speed(iter/s)": 0.57992 + }, + { + "acc": 0.99963751, + "epoch": 5.45068928950159, + "grad_norm": 0.12050074338912964, + "learning_rate": 4.669361909860213e-06, + "loss": 0.00217735, + "memory(GiB)": 26.31, + "step": 5140, + "train_speed(iter/s)": 0.579924 + }, + { + "acc": 0.99936428, + "epoch": 5.455991516436903, + "grad_norm": 0.4008522629737854, + "learning_rate": 4.660614989393132e-06, + "loss": 0.001995, + "memory(GiB)": 26.31, + "step": 5145, + "train_speed(iter/s)": 0.579927 + }, + { + "acc": 0.99941254, + "epoch": 5.461293743372217, + "grad_norm": 0.2535358965396881, + "learning_rate": 4.6518691140060545e-06, + "loss": 0.00125535, + "memory(GiB)": 26.31, + "step": 5150, + "train_speed(iter/s)": 0.57993 + }, + { + "acc": 0.99964075, + "epoch": 5.466595970307529, + "grad_norm": 0.4533134996891022, + "learning_rate": 4.64312431059084e-06, + "loss": 0.00111725, + "memory(GiB)": 26.31, + "step": 5155, + "train_speed(iter/s)": 0.579937 + }, + { + "acc": 0.99975224, + "epoch": 5.471898197242842, + "grad_norm": 0.364401638507843, + "learning_rate": 4.63438060603605e-06, + "loss": 0.00101442, + "memory(GiB)": 26.31, + "step": 5160, + "train_speed(iter/s)": 0.57994 + }, + { + "acc": 0.9994957, + "epoch": 5.477200424178155, + "grad_norm": 0.11963590234518051, + "learning_rate": 4.625638027226868e-06, + "loss": 0.00167611, + "memory(GiB)": 26.31, + "step": 5165, + "train_speed(iter/s)": 0.579947 + }, + { + "acc": 0.99962101, + "epoch": 5.482502651113467, + "grad_norm": 0.37187841534614563, + "learning_rate": 4.616896601045017e-06, + "loss": 0.00190198, + "memory(GiB)": 26.31, + "step": 5170, + "train_speed(iter/s)": 0.579944 + }, + { + "acc": 0.9997221, + "epoch": 5.487804878048781, + "grad_norm": 0.09011055529117584, + "learning_rate": 4.608156354368674e-06, + "loss": 0.00183465, + "memory(GiB)": 26.31, + "step": 5175, + "train_speed(iter/s)": 0.579952 + }, + { + "acc": 0.99932022, + "epoch": 5.493107104984094, + "grad_norm": 0.5692781805992126, + "learning_rate": 4.5994173140723894e-06, + "loss": 0.00170957, + "memory(GiB)": 26.31, + "step": 5180, + "train_speed(iter/s)": 0.579958 + }, + { + "acc": 0.99976263, + "epoch": 5.498409331919406, + "grad_norm": 0.2293304055929184, + "learning_rate": 4.590679507027005e-06, + "loss": 0.00127389, + "memory(GiB)": 26.31, + "step": 5185, + "train_speed(iter/s)": 0.57996 + }, + { + "acc": 0.99975605, + "epoch": 5.503711558854719, + "grad_norm": 0.2158653885126114, + "learning_rate": 4.581942960099572e-06, + "loss": 0.00080789, + "memory(GiB)": 26.31, + "step": 5190, + "train_speed(iter/s)": 0.579965 + }, + { + "acc": 0.99951315, + "epoch": 5.509013785790032, + "grad_norm": 0.07053355872631073, + "learning_rate": 4.5732077001532605e-06, + "loss": 0.00099423, + "memory(GiB)": 26.31, + "step": 5195, + "train_speed(iter/s)": 0.579968 + }, + { + "acc": 0.99961748, + "epoch": 5.514316012725344, + "grad_norm": 0.08148149400949478, + "learning_rate": 4.564473754047294e-06, + "loss": 0.00141822, + "memory(GiB)": 26.31, + "step": 5200, + "train_speed(iter/s)": 0.579971 + }, + { + "acc": 0.99975567, + "epoch": 5.519618239660657, + "grad_norm": 0.391696959733963, + "learning_rate": 4.555741148636848e-06, + "loss": 0.00058978, + "memory(GiB)": 26.31, + "step": 5205, + "train_speed(iter/s)": 0.579974 + }, + { + "acc": 0.999753, + "epoch": 5.524920466595971, + "grad_norm": 0.0765453651547432, + "learning_rate": 4.547009910772977e-06, + "loss": 0.00054346, + "memory(GiB)": 26.31, + "step": 5210, + "train_speed(iter/s)": 0.579977 + }, + { + "acc": 0.99962978, + "epoch": 5.530222693531283, + "grad_norm": 0.12428858876228333, + "learning_rate": 4.538280067302533e-06, + "loss": 0.00103908, + "memory(GiB)": 26.31, + "step": 5215, + "train_speed(iter/s)": 0.579983 + }, + { + "acc": 0.99985294, + "epoch": 5.535524920466596, + "grad_norm": 0.0501282699406147, + "learning_rate": 4.529551645068079e-06, + "loss": 0.00052765, + "memory(GiB)": 26.31, + "step": 5220, + "train_speed(iter/s)": 0.579989 + }, + { + "acc": 0.99949055, + "epoch": 5.540827147401909, + "grad_norm": 0.6878573894500732, + "learning_rate": 4.520824670907807e-06, + "loss": 0.00138112, + "memory(GiB)": 26.31, + "step": 5225, + "train_speed(iter/s)": 0.579995 + }, + { + "acc": 0.9998724, + "epoch": 5.546129374337221, + "grad_norm": 0.4195837676525116, + "learning_rate": 4.51209917165546e-06, + "loss": 0.0007006, + "memory(GiB)": 26.31, + "step": 5230, + "train_speed(iter/s)": 0.579999 + }, + { + "acc": 0.99986629, + "epoch": 5.551431601272535, + "grad_norm": 0.13989785313606262, + "learning_rate": 4.5033751741402414e-06, + "loss": 0.00037461, + "memory(GiB)": 26.31, + "step": 5235, + "train_speed(iter/s)": 0.580002 + }, + { + "acc": 0.99955978, + "epoch": 5.556733828207848, + "grad_norm": 0.35394933819770813, + "learning_rate": 4.49465270518674e-06, + "loss": 0.00158651, + "memory(GiB)": 26.31, + "step": 5240, + "train_speed(iter/s)": 0.580005 + }, + { + "acc": 0.99952345, + "epoch": 5.56203605514316, + "grad_norm": 0.49062466621398926, + "learning_rate": 4.485931791614843e-06, + "loss": 0.00145427, + "memory(GiB)": 26.31, + "step": 5245, + "train_speed(iter/s)": 0.580008 + }, + { + "acc": 0.99963207, + "epoch": 5.567338282078473, + "grad_norm": 0.08707881718873978, + "learning_rate": 4.477212460239658e-06, + "loss": 0.00094686, + "memory(GiB)": 26.31, + "step": 5250, + "train_speed(iter/s)": 0.58001 + }, + { + "acc": 0.99987745, + "epoch": 5.572640509013786, + "grad_norm": 0.4491136968135834, + "learning_rate": 4.468494737871423e-06, + "loss": 0.00051695, + "memory(GiB)": 26.31, + "step": 5255, + "train_speed(iter/s)": 0.580013 + }, + { + "acc": 0.99973965, + "epoch": 5.577942735949098, + "grad_norm": 0.38590842485427856, + "learning_rate": 4.45977865131543e-06, + "loss": 0.00096882, + "memory(GiB)": 26.31, + "step": 5260, + "train_speed(iter/s)": 0.580019 + }, + { + "acc": 0.99960003, + "epoch": 5.583244962884411, + "grad_norm": 0.10056951642036438, + "learning_rate": 4.451064227371946e-06, + "loss": 0.0011963, + "memory(GiB)": 26.31, + "step": 5265, + "train_speed(iter/s)": 0.580024 + }, + { + "acc": 0.99985027, + "epoch": 5.5885471898197245, + "grad_norm": 0.023627113550901413, + "learning_rate": 4.4423514928361204e-06, + "loss": 0.00056202, + "memory(GiB)": 26.31, + "step": 5270, + "train_speed(iter/s)": 0.580026 + }, + { + "acc": 0.99952812, + "epoch": 5.593849416755037, + "grad_norm": 0.15069565176963806, + "learning_rate": 4.433640474497909e-06, + "loss": 0.0010099, + "memory(GiB)": 26.31, + "step": 5275, + "train_speed(iter/s)": 0.580031 + }, + { + "acc": 0.99974689, + "epoch": 5.59915164369035, + "grad_norm": 0.9341526031494141, + "learning_rate": 4.424931199141993e-06, + "loss": 0.00080414, + "memory(GiB)": 26.31, + "step": 5280, + "train_speed(iter/s)": 0.580034 + }, + { + "acc": 0.99950294, + "epoch": 5.604453870625663, + "grad_norm": 0.670882523059845, + "learning_rate": 4.416223693547691e-06, + "loss": 0.00239468, + "memory(GiB)": 26.31, + "step": 5285, + "train_speed(iter/s)": 0.580036 + }, + { + "acc": 0.9994276, + "epoch": 5.609756097560975, + "grad_norm": 0.03292407467961311, + "learning_rate": 4.407517984488881e-06, + "loss": 0.00245402, + "memory(GiB)": 26.31, + "step": 5290, + "train_speed(iter/s)": 0.580039 + }, + { + "acc": 0.99988422, + "epoch": 5.615058324496289, + "grad_norm": 0.22681885957717896, + "learning_rate": 4.39881409873392e-06, + "loss": 0.00070906, + "memory(GiB)": 26.31, + "step": 5295, + "train_speed(iter/s)": 0.580041 + }, + { + "acc": 0.99941082, + "epoch": 5.6203605514316015, + "grad_norm": 0.7250832319259644, + "learning_rate": 4.390112063045555e-06, + "loss": 0.00160247, + "memory(GiB)": 26.31, + "step": 5300, + "train_speed(iter/s)": 0.580052 + }, + { + "acc": 0.99921331, + "epoch": 5.625662778366914, + "grad_norm": 1.260987639427185, + "learning_rate": 4.381411904180846e-06, + "loss": 0.00214222, + "memory(GiB)": 26.31, + "step": 5305, + "train_speed(iter/s)": 0.580058 + }, + { + "acc": 0.9991765, + "epoch": 5.630965005302227, + "grad_norm": 0.24449238181114197, + "learning_rate": 4.372713648891081e-06, + "loss": 0.00172313, + "memory(GiB)": 26.31, + "step": 5310, + "train_speed(iter/s)": 0.580061 + }, + { + "acc": 0.99954405, + "epoch": 5.63626723223754, + "grad_norm": 0.8919406533241272, + "learning_rate": 4.364017323921696e-06, + "loss": 0.0016624, + "memory(GiB)": 26.31, + "step": 5315, + "train_speed(iter/s)": 0.580063 + }, + { + "acc": 0.99960203, + "epoch": 5.641569459172852, + "grad_norm": 0.2842835485935211, + "learning_rate": 4.355322956012191e-06, + "loss": 0.0010582, + "memory(GiB)": 26.31, + "step": 5320, + "train_speed(iter/s)": 0.580066 + }, + { + "acc": 0.99978628, + "epoch": 5.646871686108166, + "grad_norm": 0.6368212699890137, + "learning_rate": 4.346630571896048e-06, + "loss": 0.00189297, + "memory(GiB)": 26.31, + "step": 5325, + "train_speed(iter/s)": 0.580071 + }, + { + "acc": 0.99936647, + "epoch": 5.6521739130434785, + "grad_norm": 0.6470702290534973, + "learning_rate": 4.337940198300652e-06, + "loss": 0.00140759, + "memory(GiB)": 26.31, + "step": 5330, + "train_speed(iter/s)": 0.580074 + }, + { + "acc": 0.99936848, + "epoch": 5.657476139978791, + "grad_norm": 0.49525970220565796, + "learning_rate": 4.329251861947202e-06, + "loss": 0.00237361, + "memory(GiB)": 26.31, + "step": 5335, + "train_speed(iter/s)": 0.580077 + }, + { + "acc": 0.99964027, + "epoch": 5.662778366914104, + "grad_norm": 0.19526098668575287, + "learning_rate": 4.320565589550637e-06, + "loss": 0.00113255, + "memory(GiB)": 26.31, + "step": 5340, + "train_speed(iter/s)": 0.580083 + }, + { + "acc": 1.0, + "epoch": 5.6680805938494165, + "grad_norm": 0.20106367766857147, + "learning_rate": 4.311881407819546e-06, + "loss": 0.00045028, + "memory(GiB)": 26.31, + "step": 5345, + "train_speed(iter/s)": 0.580089 + }, + { + "acc": 0.99977512, + "epoch": 5.673382820784729, + "grad_norm": 0.2478099912405014, + "learning_rate": 4.303199343456091e-06, + "loss": 0.00076979, + "memory(GiB)": 26.31, + "step": 5350, + "train_speed(iter/s)": 0.580095 + }, + { + "acc": 0.99937286, + "epoch": 5.678685047720043, + "grad_norm": 0.7594713568687439, + "learning_rate": 4.294519423155924e-06, + "loss": 0.00199138, + "memory(GiB)": 26.31, + "step": 5355, + "train_speed(iter/s)": 0.580108 + }, + { + "acc": 0.99938717, + "epoch": 5.6839872746553555, + "grad_norm": 0.4824899733066559, + "learning_rate": 4.285841673608106e-06, + "loss": 0.0022531, + "memory(GiB)": 26.31, + "step": 5360, + "train_speed(iter/s)": 0.580114 + }, + { + "acc": 0.99939022, + "epoch": 5.689289501590668, + "grad_norm": 0.055846281349658966, + "learning_rate": 4.2771661214950185e-06, + "loss": 0.00241133, + "memory(GiB)": 26.31, + "step": 5365, + "train_speed(iter/s)": 0.580123 + }, + { + "acc": 1.0, + "epoch": 5.694591728525981, + "grad_norm": 0.22975574433803558, + "learning_rate": 4.2684927934922925e-06, + "loss": 0.00032793, + "memory(GiB)": 26.31, + "step": 5370, + "train_speed(iter/s)": 0.580129 + }, + { + "acc": 0.99939823, + "epoch": 5.6998939554612935, + "grad_norm": 0.4694140553474426, + "learning_rate": 4.259821716268714e-06, + "loss": 0.00242309, + "memory(GiB)": 26.31, + "step": 5375, + "train_speed(iter/s)": 0.580134 + }, + { + "acc": 0.99965668, + "epoch": 5.705196182396606, + "grad_norm": 0.22332924604415894, + "learning_rate": 4.251152916486151e-06, + "loss": 0.00169533, + "memory(GiB)": 26.31, + "step": 5380, + "train_speed(iter/s)": 0.580136 + }, + { + "acc": 0.99950085, + "epoch": 5.71049840933192, + "grad_norm": 0.08591309189796448, + "learning_rate": 4.242486420799474e-06, + "loss": 0.0019324, + "memory(GiB)": 26.31, + "step": 5385, + "train_speed(iter/s)": 0.580138 + }, + { + "acc": 0.99951143, + "epoch": 5.7158006362672324, + "grad_norm": 0.6218582391738892, + "learning_rate": 4.233822255856459e-06, + "loss": 0.00133204, + "memory(GiB)": 26.31, + "step": 5390, + "train_speed(iter/s)": 0.58014 + }, + { + "acc": 0.99975815, + "epoch": 5.721102863202545, + "grad_norm": 0.1280733346939087, + "learning_rate": 4.225160448297724e-06, + "loss": 0.00122716, + "memory(GiB)": 26.31, + "step": 5395, + "train_speed(iter/s)": 0.580142 + }, + { + "acc": 0.9993741, + "epoch": 5.726405090137858, + "grad_norm": 0.5542604327201843, + "learning_rate": 4.216501024756633e-06, + "loss": 0.00164174, + "memory(GiB)": 26.31, + "step": 5400, + "train_speed(iter/s)": 0.580144 + }, + { + "acc": 0.9996562, + "epoch": 5.7317073170731705, + "grad_norm": 0.3382243812084198, + "learning_rate": 4.207844011859222e-06, + "loss": 0.00148793, + "memory(GiB)": 26.31, + "step": 5405, + "train_speed(iter/s)": 0.580147 + }, + { + "acc": 0.9995121, + "epoch": 5.737009544008483, + "grad_norm": 0.5271421074867249, + "learning_rate": 4.199189436224115e-06, + "loss": 0.0008573, + "memory(GiB)": 26.31, + "step": 5410, + "train_speed(iter/s)": 0.580148 + }, + { + "acc": 0.99942265, + "epoch": 5.742311770943797, + "grad_norm": 0.20610488951206207, + "learning_rate": 4.190537324462441e-06, + "loss": 0.00160457, + "memory(GiB)": 26.31, + "step": 5415, + "train_speed(iter/s)": 0.580149 + }, + { + "acc": 0.99972239, + "epoch": 5.747613997879109, + "grad_norm": 0.7685815095901489, + "learning_rate": 4.181887703177751e-06, + "loss": 0.00161561, + "memory(GiB)": 26.31, + "step": 5420, + "train_speed(iter/s)": 0.580155 + }, + { + "acc": 0.99925451, + "epoch": 5.752916224814422, + "grad_norm": 0.04731239378452301, + "learning_rate": 4.173240598965944e-06, + "loss": 0.00405467, + "memory(GiB)": 26.31, + "step": 5425, + "train_speed(iter/s)": 0.580157 + }, + { + "acc": 0.99961529, + "epoch": 5.758218451749735, + "grad_norm": 0.22838689386844635, + "learning_rate": 4.164596038415176e-06, + "loss": 0.0010858, + "memory(GiB)": 26.31, + "step": 5430, + "train_speed(iter/s)": 0.580164 + }, + { + "acc": 0.9991354, + "epoch": 5.7635206786850475, + "grad_norm": 0.07062011957168579, + "learning_rate": 4.155954048105779e-06, + "loss": 0.00281267, + "memory(GiB)": 26.31, + "step": 5435, + "train_speed(iter/s)": 0.580165 + }, + { + "acc": 0.99977236, + "epoch": 5.768822905620361, + "grad_norm": 0.06927081197500229, + "learning_rate": 4.1473146546101865e-06, + "loss": 0.00067332, + "memory(GiB)": 26.31, + "step": 5440, + "train_speed(iter/s)": 0.580172 + }, + { + "acc": 0.9998889, + "epoch": 5.774125132555674, + "grad_norm": 0.14817936718463898, + "learning_rate": 4.138677884492846e-06, + "loss": 0.00083515, + "memory(GiB)": 26.31, + "step": 5445, + "train_speed(iter/s)": 0.580178 + }, + { + "acc": 0.99987869, + "epoch": 5.779427359490986, + "grad_norm": 0.05728980153799057, + "learning_rate": 4.130043764310138e-06, + "loss": 0.00039365, + "memory(GiB)": 26.31, + "step": 5450, + "train_speed(iter/s)": 0.580179 + }, + { + "acc": 0.99962482, + "epoch": 5.784729586426299, + "grad_norm": 0.5075556635856628, + "learning_rate": 4.121412320610294e-06, + "loss": 0.00210712, + "memory(GiB)": 26.31, + "step": 5455, + "train_speed(iter/s)": 0.58018 + }, + { + "acc": 0.99939346, + "epoch": 5.790031813361612, + "grad_norm": 0.4643910825252533, + "learning_rate": 4.112783579933319e-06, + "loss": 0.00085811, + "memory(GiB)": 26.31, + "step": 5460, + "train_speed(iter/s)": 0.580182 + }, + { + "acc": 1.0, + "epoch": 5.7953340402969244, + "grad_norm": 0.22472567856311798, + "learning_rate": 4.1041575688109034e-06, + "loss": 0.00029708, + "memory(GiB)": 26.31, + "step": 5465, + "train_speed(iter/s)": 0.580188 + }, + { + "acc": 0.99951172, + "epoch": 5.800636267232237, + "grad_norm": 0.0859142392873764, + "learning_rate": 4.0955343137663466e-06, + "loss": 0.00106492, + "memory(GiB)": 26.31, + "step": 5470, + "train_speed(iter/s)": 0.58019 + }, + { + "acc": 0.99962921, + "epoch": 5.805938494167551, + "grad_norm": 0.0403328463435173, + "learning_rate": 4.086913841314474e-06, + "loss": 0.0008122, + "memory(GiB)": 26.31, + "step": 5475, + "train_speed(iter/s)": 0.580193 + }, + { + "acc": 0.99966984, + "epoch": 5.811240721102863, + "grad_norm": 0.1314668506383896, + "learning_rate": 4.078296177961553e-06, + "loss": 0.00078211, + "memory(GiB)": 26.31, + "step": 5480, + "train_speed(iter/s)": 0.580202 + }, + { + "acc": 0.99951582, + "epoch": 5.816542948038176, + "grad_norm": 0.12210696190595627, + "learning_rate": 4.069681350205214e-06, + "loss": 0.00171753, + "memory(GiB)": 26.31, + "step": 5485, + "train_speed(iter/s)": 0.580204 + }, + { + "acc": 0.99915504, + "epoch": 5.821845174973489, + "grad_norm": 0.7598603963851929, + "learning_rate": 4.06106938453437e-06, + "loss": 0.00366015, + "memory(GiB)": 26.31, + "step": 5490, + "train_speed(iter/s)": 0.580207 + }, + { + "acc": 0.99903126, + "epoch": 5.827147401908801, + "grad_norm": 0.2603316903114319, + "learning_rate": 4.0524603074291355e-06, + "loss": 0.00248764, + "memory(GiB)": 26.31, + "step": 5495, + "train_speed(iter/s)": 0.580214 + }, + { + "acc": 0.99948807, + "epoch": 5.832449628844115, + "grad_norm": 0.06389316916465759, + "learning_rate": 4.043854145360737e-06, + "loss": 0.00247518, + "memory(GiB)": 26.31, + "step": 5500, + "train_speed(iter/s)": 0.580215 + }, + { + "acc": 0.99949951, + "epoch": 5.837751855779428, + "grad_norm": 0.43321940302848816, + "learning_rate": 4.035250924791445e-06, + "loss": 0.00341372, + "memory(GiB)": 26.31, + "step": 5505, + "train_speed(iter/s)": 0.580221 + }, + { + "acc": 1.0, + "epoch": 5.84305408271474, + "grad_norm": 0.21487638354301453, + "learning_rate": 4.026650672174478e-06, + "loss": 0.0005853, + "memory(GiB)": 26.31, + "step": 5510, + "train_speed(iter/s)": 0.580226 + }, + { + "acc": 0.99974747, + "epoch": 5.848356309650053, + "grad_norm": 0.10891429334878922, + "learning_rate": 4.018053413953936e-06, + "loss": 0.00077117, + "memory(GiB)": 26.31, + "step": 5515, + "train_speed(iter/s)": 0.580229 + }, + { + "acc": 0.99962959, + "epoch": 5.853658536585366, + "grad_norm": 0.06120121479034424, + "learning_rate": 4.0094591765647055e-06, + "loss": 0.00084518, + "memory(GiB)": 26.31, + "step": 5520, + "train_speed(iter/s)": 0.580233 + }, + { + "acc": 0.99952469, + "epoch": 5.858960763520678, + "grad_norm": 0.7826321125030518, + "learning_rate": 4.00086798643239e-06, + "loss": 0.00178249, + "memory(GiB)": 26.31, + "step": 5525, + "train_speed(iter/s)": 0.580244 + }, + { + "acc": 0.99960136, + "epoch": 5.864262990455991, + "grad_norm": 0.0882963016629219, + "learning_rate": 3.992279869973219e-06, + "loss": 0.00086801, + "memory(GiB)": 26.31, + "step": 5530, + "train_speed(iter/s)": 0.580247 + }, + { + "acc": 0.99987049, + "epoch": 5.869565217391305, + "grad_norm": 0.08971802890300751, + "learning_rate": 3.983694853593975e-06, + "loss": 0.00069402, + "memory(GiB)": 26.31, + "step": 5535, + "train_speed(iter/s)": 0.580249 + }, + { + "acc": 0.99986706, + "epoch": 5.874867444326617, + "grad_norm": 0.09412523359060287, + "learning_rate": 3.975112963691903e-06, + "loss": 0.00077367, + "memory(GiB)": 26.31, + "step": 5540, + "train_speed(iter/s)": 0.580254 + }, + { + "acc": 0.99965477, + "epoch": 5.88016967126193, + "grad_norm": 0.44179901480674744, + "learning_rate": 3.966534226654638e-06, + "loss": 0.00075631, + "memory(GiB)": 26.31, + "step": 5545, + "train_speed(iter/s)": 0.580256 + }, + { + "acc": 0.99975748, + "epoch": 5.885471898197243, + "grad_norm": 0.07655708491802216, + "learning_rate": 3.957958668860124e-06, + "loss": 0.00042496, + "memory(GiB)": 26.31, + "step": 5550, + "train_speed(iter/s)": 0.580261 + }, + { + "acc": 1.0, + "epoch": 5.890774125132555, + "grad_norm": 0.11391156911849976, + "learning_rate": 3.9493863166765216e-06, + "loss": 0.00015957, + "memory(GiB)": 26.31, + "step": 5555, + "train_speed(iter/s)": 0.580265 + }, + { + "acc": 0.99986839, + "epoch": 5.896076352067869, + "grad_norm": 0.0224043820053339, + "learning_rate": 3.940817196462143e-06, + "loss": 0.00036973, + "memory(GiB)": 26.31, + "step": 5560, + "train_speed(iter/s)": 0.580273 + }, + { + "acc": 0.99961348, + "epoch": 5.901378579003182, + "grad_norm": 0.495105504989624, + "learning_rate": 3.932251334565355e-06, + "loss": 0.00118398, + "memory(GiB)": 26.31, + "step": 5565, + "train_speed(iter/s)": 0.580274 + }, + { + "acc": 0.99988375, + "epoch": 5.906680805938494, + "grad_norm": 0.08777466416358948, + "learning_rate": 3.923688757324512e-06, + "loss": 0.00026788, + "memory(GiB)": 26.31, + "step": 5570, + "train_speed(iter/s)": 0.580277 + }, + { + "acc": 0.99988422, + "epoch": 5.911983032873807, + "grad_norm": 0.014756974764168262, + "learning_rate": 3.915129491067865e-06, + "loss": 0.00051979, + "memory(GiB)": 26.31, + "step": 5575, + "train_speed(iter/s)": 0.580279 + }, + { + "acc": 0.9996336, + "epoch": 5.91728525980912, + "grad_norm": 0.39819061756134033, + "learning_rate": 3.906573562113485e-06, + "loss": 0.00142694, + "memory(GiB)": 26.31, + "step": 5580, + "train_speed(iter/s)": 0.580286 + }, + { + "acc": 0.99911852, + "epoch": 5.922587486744432, + "grad_norm": 0.43225204944610596, + "learning_rate": 3.898020996769183e-06, + "loss": 0.00255523, + "memory(GiB)": 26.31, + "step": 5585, + "train_speed(iter/s)": 0.58029 + }, + { + "acc": 0.99950981, + "epoch": 5.927889713679745, + "grad_norm": 0.39705580472946167, + "learning_rate": 3.8894718213324265e-06, + "loss": 0.00156185, + "memory(GiB)": 26.31, + "step": 5590, + "train_speed(iter/s)": 0.580292 + }, + { + "acc": 0.99985552, + "epoch": 5.933191940615059, + "grad_norm": 0.013175041414797306, + "learning_rate": 3.88092606209026e-06, + "loss": 0.00043316, + "memory(GiB)": 26.31, + "step": 5595, + "train_speed(iter/s)": 0.580292 + }, + { + "acc": 0.99986343, + "epoch": 5.938494167550371, + "grad_norm": 0.03439586982131004, + "learning_rate": 3.872383745319222e-06, + "loss": 0.00046334, + "memory(GiB)": 26.31, + "step": 5600, + "train_speed(iter/s)": 0.580293 + }, + { + "acc": 0.99962111, + "epoch": 5.943796394485684, + "grad_norm": 0.24579821527004242, + "learning_rate": 3.8638448972852696e-06, + "loss": 0.00096211, + "memory(GiB)": 26.31, + "step": 5605, + "train_speed(iter/s)": 0.580295 + }, + { + "acc": 0.99963531, + "epoch": 5.949098621420997, + "grad_norm": 0.3907880187034607, + "learning_rate": 3.8553095442436914e-06, + "loss": 0.00164328, + "memory(GiB)": 26.31, + "step": 5610, + "train_speed(iter/s)": 0.580297 + }, + { + "acc": 0.99963646, + "epoch": 5.954400848356309, + "grad_norm": 0.0487142838537693, + "learning_rate": 3.8467777124390305e-06, + "loss": 0.00131084, + "memory(GiB)": 26.31, + "step": 5615, + "train_speed(iter/s)": 0.580299 + }, + { + "acc": 0.99961796, + "epoch": 5.959703075291623, + "grad_norm": 0.18401463329792023, + "learning_rate": 3.838249428105002e-06, + "loss": 0.00148167, + "memory(GiB)": 26.31, + "step": 5620, + "train_speed(iter/s)": 0.580301 + }, + { + "acc": 0.9992384, + "epoch": 5.965005302226936, + "grad_norm": 0.6816220879554749, + "learning_rate": 3.829724717464415e-06, + "loss": 0.00182049, + "memory(GiB)": 26.31, + "step": 5625, + "train_speed(iter/s)": 0.580303 + }, + { + "acc": 0.99937592, + "epoch": 5.970307529162248, + "grad_norm": 0.031764183193445206, + "learning_rate": 3.82120360672909e-06, + "loss": 0.00159306, + "memory(GiB)": 26.31, + "step": 5630, + "train_speed(iter/s)": 0.580304 + }, + { + "acc": 0.99964085, + "epoch": 5.975609756097561, + "grad_norm": 0.01764695718884468, + "learning_rate": 3.812686122099777e-06, + "loss": 0.00055448, + "memory(GiB)": 26.31, + "step": 5635, + "train_speed(iter/s)": 0.580313 + }, + { + "acc": 0.99962826, + "epoch": 5.980911983032874, + "grad_norm": 0.054920535534620285, + "learning_rate": 3.8041722897660766e-06, + "loss": 0.00077166, + "memory(GiB)": 26.31, + "step": 5640, + "train_speed(iter/s)": 0.580314 + }, + { + "acc": 0.99951496, + "epoch": 5.986214209968186, + "grad_norm": 0.5747213959693909, + "learning_rate": 3.7956621359063607e-06, + "loss": 0.00069109, + "memory(GiB)": 26.31, + "step": 5645, + "train_speed(iter/s)": 0.580319 + }, + { + "acc": 0.99986773, + "epoch": 5.991516436903499, + "grad_norm": 0.014520245604217052, + "learning_rate": 3.7871556866876886e-06, + "loss": 0.00121044, + "memory(GiB)": 26.31, + "step": 5650, + "train_speed(iter/s)": 0.580322 + }, + { + "acc": 0.99964981, + "epoch": 5.996818663838813, + "grad_norm": 0.18743537366390228, + "learning_rate": 3.7786529682657307e-06, + "loss": 0.00232155, + "memory(GiB)": 26.31, + "step": 5655, + "train_speed(iter/s)": 0.580325 + }, + { + "acc": 0.99961834, + "epoch": 6.002120890774125, + "grad_norm": 0.1396491974592209, + "learning_rate": 3.7701540067846855e-06, + "loss": 0.00328599, + "memory(GiB)": 26.31, + "step": 5660, + "train_speed(iter/s)": 0.580272 + }, + { + "acc": 0.99976521, + "epoch": 6.007423117709438, + "grad_norm": 0.07440268248319626, + "learning_rate": 3.7616588283771987e-06, + "loss": 0.00044463, + "memory(GiB)": 26.31, + "step": 5665, + "train_speed(iter/s)": 0.580273 + }, + { + "acc": 0.99973955, + "epoch": 6.012725344644751, + "grad_norm": 0.12697941064834595, + "learning_rate": 3.7531674591642843e-06, + "loss": 0.00148034, + "memory(GiB)": 26.31, + "step": 5670, + "train_speed(iter/s)": 0.580275 + }, + { + "acc": 0.9996397, + "epoch": 6.018027571580063, + "grad_norm": 0.05559253692626953, + "learning_rate": 3.7446799252552435e-06, + "loss": 0.00097631, + "memory(GiB)": 26.31, + "step": 5675, + "train_speed(iter/s)": 0.580278 + }, + { + "acc": 0.99927549, + "epoch": 6.023329798515377, + "grad_norm": 0.07566984742879868, + "learning_rate": 3.736196252747585e-06, + "loss": 0.00127908, + "memory(GiB)": 26.31, + "step": 5680, + "train_speed(iter/s)": 0.58028 + }, + { + "acc": 0.99911137, + "epoch": 6.0286320254506895, + "grad_norm": 0.5210506319999695, + "learning_rate": 3.7277164677269428e-06, + "loss": 0.00300755, + "memory(GiB)": 26.31, + "step": 5685, + "train_speed(iter/s)": 0.580285 + }, + { + "acc": 0.99961376, + "epoch": 6.033934252386002, + "grad_norm": 0.026948614045977592, + "learning_rate": 3.7192405962670007e-06, + "loss": 0.00089632, + "memory(GiB)": 26.31, + "step": 5690, + "train_speed(iter/s)": 0.580288 + }, + { + "acc": 0.99959393, + "epoch": 6.039236479321315, + "grad_norm": 0.08892546594142914, + "learning_rate": 3.710768664429409e-06, + "loss": 0.00135952, + "memory(GiB)": 26.31, + "step": 5695, + "train_speed(iter/s)": 0.58029 + }, + { + "acc": 0.99962883, + "epoch": 6.044538706256628, + "grad_norm": 0.039922814816236496, + "learning_rate": 3.7023006982637e-06, + "loss": 0.0013433, + "memory(GiB)": 26.31, + "step": 5700, + "train_speed(iter/s)": 0.580292 + }, + { + "acc": 0.99964199, + "epoch": 6.04984093319194, + "grad_norm": 0.30086588859558105, + "learning_rate": 3.693836723807217e-06, + "loss": 0.00114452, + "memory(GiB)": 26.31, + "step": 5705, + "train_speed(iter/s)": 0.5803 + }, + { + "acc": 1.0, + "epoch": 6.055143160127254, + "grad_norm": 0.05265273526310921, + "learning_rate": 3.6853767670850277e-06, + "loss": 0.00043265, + "memory(GiB)": 26.31, + "step": 5710, + "train_speed(iter/s)": 0.580305 + }, + { + "acc": 0.99988737, + "epoch": 6.0604453870625665, + "grad_norm": 0.20193615555763245, + "learning_rate": 3.6769208541098445e-06, + "loss": 0.0004146, + "memory(GiB)": 26.31, + "step": 5715, + "train_speed(iter/s)": 0.58031 + }, + { + "acc": 0.99975872, + "epoch": 6.065747613997879, + "grad_norm": 0.09985252469778061, + "learning_rate": 3.6684690108819503e-06, + "loss": 0.00076501, + "memory(GiB)": 26.31, + "step": 5720, + "train_speed(iter/s)": 0.580312 + }, + { + "acc": 0.99988317, + "epoch": 6.071049840933192, + "grad_norm": 0.12704992294311523, + "learning_rate": 3.6600212633891115e-06, + "loss": 0.00077298, + "memory(GiB)": 26.31, + "step": 5725, + "train_speed(iter/s)": 0.580317 + }, + { + "acc": 0.99954891, + "epoch": 6.076352067868505, + "grad_norm": 0.24382582306861877, + "learning_rate": 3.6515776376064993e-06, + "loss": 0.00140351, + "memory(GiB)": 26.31, + "step": 5730, + "train_speed(iter/s)": 0.580321 + }, + { + "acc": 0.99947262, + "epoch": 6.081654294803817, + "grad_norm": 0.2057705819606781, + "learning_rate": 3.6431381594966132e-06, + "loss": 0.00178049, + "memory(GiB)": 26.31, + "step": 5735, + "train_speed(iter/s)": 0.580325 + }, + { + "acc": 0.99937897, + "epoch": 6.086956521739131, + "grad_norm": 0.12836192548274994, + "learning_rate": 3.634702855009202e-06, + "loss": 0.00141335, + "memory(GiB)": 26.31, + "step": 5740, + "train_speed(iter/s)": 0.580327 + }, + { + "acc": 0.99946241, + "epoch": 6.0922587486744435, + "grad_norm": 0.024506429210305214, + "learning_rate": 3.626271750081179e-06, + "loss": 0.00189124, + "memory(GiB)": 26.31, + "step": 5745, + "train_speed(iter/s)": 0.580333 + }, + { + "acc": 0.99987984, + "epoch": 6.097560975609756, + "grad_norm": 0.09324854612350464, + "learning_rate": 3.6178448706365425e-06, + "loss": 0.00046005, + "memory(GiB)": 26.31, + "step": 5750, + "train_speed(iter/s)": 0.580335 + }, + { + "acc": 0.99986706, + "epoch": 6.102863202545069, + "grad_norm": 0.2579773962497711, + "learning_rate": 3.609422242586302e-06, + "loss": 0.00109415, + "memory(GiB)": 26.31, + "step": 5755, + "train_speed(iter/s)": 0.580341 + }, + { + "acc": 0.99936991, + "epoch": 6.1081654294803815, + "grad_norm": 1.3310717344284058, + "learning_rate": 3.601003891828393e-06, + "loss": 0.00154827, + "memory(GiB)": 26.31, + "step": 5760, + "train_speed(iter/s)": 0.580343 + }, + { + "acc": 0.99950371, + "epoch": 6.113467656415694, + "grad_norm": 0.8870381712913513, + "learning_rate": 3.592589844247599e-06, + "loss": 0.00193892, + "memory(GiB)": 26.31, + "step": 5765, + "train_speed(iter/s)": 0.580345 + }, + { + "acc": 0.9996335, + "epoch": 6.118769883351008, + "grad_norm": 0.17148469388484955, + "learning_rate": 3.5841801257154724e-06, + "loss": 0.00105367, + "memory(GiB)": 26.31, + "step": 5770, + "train_speed(iter/s)": 0.580347 + }, + { + "acc": 0.99933376, + "epoch": 6.1240721102863205, + "grad_norm": 0.3207806646823883, + "learning_rate": 3.575774762090255e-06, + "loss": 0.00165185, + "memory(GiB)": 26.31, + "step": 5775, + "train_speed(iter/s)": 0.58035 + }, + { + "acc": 0.99961987, + "epoch": 6.129374337221633, + "grad_norm": 0.5090070962905884, + "learning_rate": 3.5673737792167974e-06, + "loss": 0.00113542, + "memory(GiB)": 26.31, + "step": 5780, + "train_speed(iter/s)": 0.580356 + }, + { + "acc": 0.99986839, + "epoch": 6.134676564156946, + "grad_norm": 0.870831310749054, + "learning_rate": 3.5589772029264806e-06, + "loss": 0.00054594, + "memory(GiB)": 26.31, + "step": 5785, + "train_speed(iter/s)": 0.580358 + }, + { + "acc": 0.99954042, + "epoch": 6.1399787910922585, + "grad_norm": 0.043732356280088425, + "learning_rate": 3.550585059037138e-06, + "loss": 0.00141125, + "memory(GiB)": 26.31, + "step": 5790, + "train_speed(iter/s)": 0.58036 + }, + { + "acc": 0.99966211, + "epoch": 6.145281018027571, + "grad_norm": 0.08508095890283585, + "learning_rate": 3.5421973733529703e-06, + "loss": 0.00087431, + "memory(GiB)": 26.31, + "step": 5795, + "train_speed(iter/s)": 0.580365 + }, + { + "acc": 0.99954786, + "epoch": 6.150583244962885, + "grad_norm": 0.06490044295787811, + "learning_rate": 3.5338141716644734e-06, + "loss": 0.00147838, + "memory(GiB)": 26.31, + "step": 5800, + "train_speed(iter/s)": 0.580367 + }, + { + "acc": 0.99926376, + "epoch": 6.1558854718981975, + "grad_norm": 0.5829356908798218, + "learning_rate": 3.5254354797483547e-06, + "loss": 0.00214265, + "memory(GiB)": 26.31, + "step": 5805, + "train_speed(iter/s)": 0.580374 + }, + { + "acc": 0.99986916, + "epoch": 6.16118769883351, + "grad_norm": 0.05491666868329048, + "learning_rate": 3.517061323367454e-06, + "loss": 0.00057996, + "memory(GiB)": 26.31, + "step": 5810, + "train_speed(iter/s)": 0.580377 + }, + { + "acc": 0.99975939, + "epoch": 6.166489925768823, + "grad_norm": 0.16530375182628632, + "learning_rate": 3.508691728270666e-06, + "loss": 0.00065435, + "memory(GiB)": 26.31, + "step": 5815, + "train_speed(iter/s)": 0.58038 + }, + { + "acc": 0.99928436, + "epoch": 6.1717921527041355, + "grad_norm": 0.3584500849246979, + "learning_rate": 3.500326720192862e-06, + "loss": 0.00136761, + "memory(GiB)": 26.31, + "step": 5820, + "train_speed(iter/s)": 0.580382 + }, + { + "acc": 0.99927006, + "epoch": 6.177094379639448, + "grad_norm": 0.483460396528244, + "learning_rate": 3.4919663248548074e-06, + "loss": 0.00137038, + "memory(GiB)": 26.31, + "step": 5825, + "train_speed(iter/s)": 0.580385 + }, + { + "acc": 1.0, + "epoch": 6.182396606574762, + "grad_norm": 0.40385866165161133, + "learning_rate": 3.483610567963083e-06, + "loss": 0.00039675, + "memory(GiB)": 26.31, + "step": 5830, + "train_speed(iter/s)": 0.580387 + }, + { + "acc": 0.99945812, + "epoch": 6.187698833510074, + "grad_norm": 0.3637162744998932, + "learning_rate": 3.4752594752100104e-06, + "loss": 0.00089644, + "memory(GiB)": 26.31, + "step": 5835, + "train_speed(iter/s)": 0.580392 + }, + { + "acc": 0.99965353, + "epoch": 6.193001060445387, + "grad_norm": 0.01765528880059719, + "learning_rate": 3.4669130722735677e-06, + "loss": 0.00234709, + "memory(GiB)": 26.31, + "step": 5840, + "train_speed(iter/s)": 0.580395 + }, + { + "acc": 0.99975843, + "epoch": 6.1983032873807, + "grad_norm": 0.05318576842546463, + "learning_rate": 3.4585713848173103e-06, + "loss": 0.00053864, + "memory(GiB)": 26.31, + "step": 5845, + "train_speed(iter/s)": 0.580397 + }, + { + "acc": 0.99962549, + "epoch": 6.2036055143160125, + "grad_norm": 0.1479036659002304, + "learning_rate": 3.450234438490302e-06, + "loss": 0.00229071, + "memory(GiB)": 26.31, + "step": 5850, + "train_speed(iter/s)": 0.580397 + }, + { + "acc": 0.9994688, + "epoch": 6.208907741251325, + "grad_norm": 0.29508110880851746, + "learning_rate": 3.441902258927023e-06, + "loss": 0.00189362, + "memory(GiB)": 26.31, + "step": 5855, + "train_speed(iter/s)": 0.580402 + }, + { + "acc": 0.99962225, + "epoch": 6.214209968186639, + "grad_norm": 0.07441110908985138, + "learning_rate": 3.4335748717472966e-06, + "loss": 0.00136693, + "memory(GiB)": 26.31, + "step": 5860, + "train_speed(iter/s)": 0.580404 + }, + { + "acc": 0.99952202, + "epoch": 6.219512195121951, + "grad_norm": 0.049704521894454956, + "learning_rate": 3.4252523025562127e-06, + "loss": 0.00066762, + "memory(GiB)": 26.31, + "step": 5865, + "train_speed(iter/s)": 0.580406 + }, + { + "acc": 0.99935627, + "epoch": 6.224814422057264, + "grad_norm": 0.3847588896751404, + "learning_rate": 3.4169345769440435e-06, + "loss": 0.0027355, + "memory(GiB)": 26.31, + "step": 5870, + "train_speed(iter/s)": 0.580411 + }, + { + "acc": 0.99940147, + "epoch": 6.230116648992577, + "grad_norm": 0.10670210421085358, + "learning_rate": 3.4086217204861722e-06, + "loss": 0.00174768, + "memory(GiB)": 26.31, + "step": 5875, + "train_speed(iter/s)": 0.580413 + }, + { + "acc": 0.99975185, + "epoch": 6.2354188759278895, + "grad_norm": 0.04808919504284859, + "learning_rate": 3.400313758743006e-06, + "loss": 0.00071273, + "memory(GiB)": 26.31, + "step": 5880, + "train_speed(iter/s)": 0.580414 + }, + { + "acc": 0.99987183, + "epoch": 6.240721102863202, + "grad_norm": 0.04007013142108917, + "learning_rate": 3.392010717259907e-06, + "loss": 0.00057667, + "memory(GiB)": 26.31, + "step": 5885, + "train_speed(iter/s)": 0.580419 + }, + { + "acc": 0.99977446, + "epoch": 6.246023329798516, + "grad_norm": 0.38751649856567383, + "learning_rate": 3.383712621567104e-06, + "loss": 0.0004282, + "memory(GiB)": 26.31, + "step": 5890, + "train_speed(iter/s)": 0.580424 + }, + { + "acc": 1.0, + "epoch": 6.251325556733828, + "grad_norm": 0.02446839213371277, + "learning_rate": 3.37541949717962e-06, + "loss": 0.00021479, + "memory(GiB)": 26.31, + "step": 5895, + "train_speed(iter/s)": 0.580426 + }, + { + "acc": 1.0, + "epoch": 6.256627783669141, + "grad_norm": 0.03881816565990448, + "learning_rate": 3.367131369597193e-06, + "loss": 0.00025941, + "memory(GiB)": 26.31, + "step": 5900, + "train_speed(iter/s)": 0.580428 + }, + { + "acc": 0.99954081, + "epoch": 6.261930010604454, + "grad_norm": 0.10912594944238663, + "learning_rate": 3.3588482643041955e-06, + "loss": 0.00139892, + "memory(GiB)": 26.31, + "step": 5905, + "train_speed(iter/s)": 0.580434 + }, + { + "acc": 0.9998641, + "epoch": 6.267232237539766, + "grad_norm": 0.03487817570567131, + "learning_rate": 3.3505702067695577e-06, + "loss": 0.00065392, + "memory(GiB)": 26.31, + "step": 5910, + "train_speed(iter/s)": 0.580436 + }, + { + "acc": 0.99950733, + "epoch": 6.272534464475079, + "grad_norm": 0.21163144707679749, + "learning_rate": 3.3422972224466905e-06, + "loss": 0.00111407, + "memory(GiB)": 26.31, + "step": 5915, + "train_speed(iter/s)": 0.580442 + }, + { + "acc": 1.0, + "epoch": 6.277836691410393, + "grad_norm": 0.3267722427845001, + "learning_rate": 3.334029336773403e-06, + "loss": 0.00024705, + "memory(GiB)": 26.31, + "step": 5920, + "train_speed(iter/s)": 0.580444 + }, + { + "acc": 0.9996254, + "epoch": 6.283138918345705, + "grad_norm": 0.06944040954113007, + "learning_rate": 3.32576657517183e-06, + "loss": 0.00047762, + "memory(GiB)": 26.31, + "step": 5925, + "train_speed(iter/s)": 0.580446 + }, + { + "acc": 0.99964447, + "epoch": 6.288441145281018, + "grad_norm": 0.5120355486869812, + "learning_rate": 3.3175089630483474e-06, + "loss": 0.00079916, + "memory(GiB)": 26.31, + "step": 5930, + "train_speed(iter/s)": 0.580449 + }, + { + "acc": 0.99948606, + "epoch": 6.293743372216331, + "grad_norm": 0.38711291551589966, + "learning_rate": 3.3092565257935004e-06, + "loss": 0.00121321, + "memory(GiB)": 26.31, + "step": 5935, + "train_speed(iter/s)": 0.58045 + }, + { + "acc": 0.99974937, + "epoch": 6.299045599151643, + "grad_norm": 0.2560780644416809, + "learning_rate": 3.3010092887819207e-06, + "loss": 0.00046337, + "memory(GiB)": 26.31, + "step": 5940, + "train_speed(iter/s)": 0.580453 + }, + { + "acc": 0.99935503, + "epoch": 6.304347826086957, + "grad_norm": 0.19002583622932434, + "learning_rate": 3.29276727737225e-06, + "loss": 0.00214132, + "memory(GiB)": 26.31, + "step": 5945, + "train_speed(iter/s)": 0.580458 + }, + { + "acc": 0.99951153, + "epoch": 6.30965005302227, + "grad_norm": 0.35275277495384216, + "learning_rate": 3.2845305169070658e-06, + "loss": 0.00183648, + "memory(GiB)": 26.31, + "step": 5950, + "train_speed(iter/s)": 0.580463 + }, + { + "acc": 0.99953251, + "epoch": 6.314952279957582, + "grad_norm": 0.36648836731910706, + "learning_rate": 3.2762990327127924e-06, + "loss": 0.00148667, + "memory(GiB)": 26.31, + "step": 5955, + "train_speed(iter/s)": 0.580466 + }, + { + "acc": 0.99934921, + "epoch": 6.320254506892895, + "grad_norm": 0.5325089693069458, + "learning_rate": 3.268072850099642e-06, + "loss": 0.00145414, + "memory(GiB)": 26.31, + "step": 5960, + "train_speed(iter/s)": 0.580472 + }, + { + "acc": 0.99988632, + "epoch": 6.325556733828208, + "grad_norm": 0.43265655636787415, + "learning_rate": 3.259851994361516e-06, + "loss": 0.00036228, + "memory(GiB)": 26.31, + "step": 5965, + "train_speed(iter/s)": 0.580474 + }, + { + "acc": 0.99985638, + "epoch": 6.33085896076352, + "grad_norm": 0.12824462354183197, + "learning_rate": 3.2516364907759384e-06, + "loss": 0.00056045, + "memory(GiB)": 26.31, + "step": 5970, + "train_speed(iter/s)": 0.580479 + }, + { + "acc": 1.0, + "epoch": 6.336161187698833, + "grad_norm": 0.09254786372184753, + "learning_rate": 3.24342636460398e-06, + "loss": 0.00025173, + "memory(GiB)": 26.31, + "step": 5975, + "train_speed(iter/s)": 0.580481 + }, + { + "acc": 0.99964771, + "epoch": 6.341463414634147, + "grad_norm": 0.018121229484677315, + "learning_rate": 3.2352216410901717e-06, + "loss": 0.00077881, + "memory(GiB)": 26.31, + "step": 5980, + "train_speed(iter/s)": 0.580483 + }, + { + "acc": 0.99964867, + "epoch": 6.346765641569459, + "grad_norm": 0.2775912284851074, + "learning_rate": 3.227022345462438e-06, + "loss": 0.00143707, + "memory(GiB)": 26.31, + "step": 5985, + "train_speed(iter/s)": 0.580484 + }, + { + "acc": 0.9995945, + "epoch": 6.352067868504772, + "grad_norm": 0.21149085462093353, + "learning_rate": 3.218828502932011e-06, + "loss": 0.00178856, + "memory(GiB)": 26.31, + "step": 5990, + "train_speed(iter/s)": 0.580487 + }, + { + "acc": 0.99965143, + "epoch": 6.357370095440085, + "grad_norm": 0.11793508380651474, + "learning_rate": 3.210640138693354e-06, + "loss": 0.00077981, + "memory(GiB)": 26.31, + "step": 5995, + "train_speed(iter/s)": 0.580491 + }, + { + "acc": 1.0, + "epoch": 6.362672322375397, + "grad_norm": 0.08975546807050705, + "learning_rate": 3.2024572779240894e-06, + "loss": 0.00041794, + "memory(GiB)": 26.31, + "step": 6000, + "train_speed(iter/s)": 0.580493 + }, + { + "acc": 0.99987803, + "epoch": 6.367974549310711, + "grad_norm": 0.07812675833702087, + "learning_rate": 3.1942799457849133e-06, + "loss": 0.00044167, + "memory(GiB)": 26.31, + "step": 6005, + "train_speed(iter/s)": 0.580495 + }, + { + "acc": 0.99948139, + "epoch": 6.373276776246024, + "grad_norm": 0.2446129322052002, + "learning_rate": 3.1861081674195256e-06, + "loss": 0.00090716, + "memory(GiB)": 26.31, + "step": 6010, + "train_speed(iter/s)": 0.580499 + }, + { + "acc": 0.99952393, + "epoch": 6.378579003181336, + "grad_norm": 0.06943213939666748, + "learning_rate": 3.1779419679545477e-06, + "loss": 0.00096079, + "memory(GiB)": 26.31, + "step": 6015, + "train_speed(iter/s)": 0.580503 + }, + { + "acc": 0.99987497, + "epoch": 6.383881230116649, + "grad_norm": 0.0138889504596591, + "learning_rate": 3.1697813724994486e-06, + "loss": 0.00091873, + "memory(GiB)": 26.31, + "step": 6020, + "train_speed(iter/s)": 0.580504 + }, + { + "acc": 0.99928799, + "epoch": 6.389183457051962, + "grad_norm": 0.6376550793647766, + "learning_rate": 3.161626406146464e-06, + "loss": 0.00331742, + "memory(GiB)": 26.31, + "step": 6025, + "train_speed(iter/s)": 0.580505 + }, + { + "acc": 0.99977684, + "epoch": 6.394485683987274, + "grad_norm": 0.24388806521892548, + "learning_rate": 3.153477093970523e-06, + "loss": 0.00077564, + "memory(GiB)": 26.31, + "step": 6030, + "train_speed(iter/s)": 0.580507 + }, + { + "acc": 0.9996335, + "epoch": 6.399787910922587, + "grad_norm": 0.03800429403781891, + "learning_rate": 3.1453334610291675e-06, + "loss": 0.00044972, + "memory(GiB)": 26.31, + "step": 6035, + "train_speed(iter/s)": 0.580512 + }, + { + "acc": 0.9997385, + "epoch": 6.405090137857901, + "grad_norm": 0.27016544342041016, + "learning_rate": 3.1371955323624764e-06, + "loss": 0.00117639, + "memory(GiB)": 26.31, + "step": 6040, + "train_speed(iter/s)": 0.580514 + }, + { + "acc": 0.99986916, + "epoch": 6.410392364793213, + "grad_norm": 0.4027722179889679, + "learning_rate": 3.1290633329929947e-06, + "loss": 0.00075675, + "memory(GiB)": 26.31, + "step": 6045, + "train_speed(iter/s)": 0.580516 + }, + { + "acc": 0.9998744, + "epoch": 6.415694591728526, + "grad_norm": 0.09925177693367004, + "learning_rate": 3.1209368879256437e-06, + "loss": 0.00023921, + "memory(GiB)": 26.31, + "step": 6050, + "train_speed(iter/s)": 0.580523 + }, + { + "acc": 0.9996172, + "epoch": 6.420996818663839, + "grad_norm": 0.04051406309008598, + "learning_rate": 3.112816222147655e-06, + "loss": 0.00120936, + "memory(GiB)": 26.31, + "step": 6055, + "train_speed(iter/s)": 0.580525 + }, + { + "acc": 0.99975414, + "epoch": 6.426299045599151, + "grad_norm": 0.10684379935264587, + "learning_rate": 3.1047013606284887e-06, + "loss": 0.00150528, + "memory(GiB)": 26.31, + "step": 6060, + "train_speed(iter/s)": 0.58053 + }, + { + "acc": 0.99965429, + "epoch": 6.431601272534465, + "grad_norm": 0.0392468124628067, + "learning_rate": 3.096592328319758e-06, + "loss": 0.00054083, + "memory(GiB)": 26.31, + "step": 6065, + "train_speed(iter/s)": 0.580537 + }, + { + "acc": 0.99987803, + "epoch": 6.436903499469778, + "grad_norm": 0.04810251295566559, + "learning_rate": 3.0884891501551552e-06, + "loss": 0.00025685, + "memory(GiB)": 26.31, + "step": 6070, + "train_speed(iter/s)": 0.580542 + }, + { + "acc": 0.99963684, + "epoch": 6.44220572640509, + "grad_norm": 0.06519544869661331, + "learning_rate": 3.0803918510503688e-06, + "loss": 0.00039638, + "memory(GiB)": 26.31, + "step": 6075, + "train_speed(iter/s)": 0.580553 + }, + { + "acc": 0.99988785, + "epoch": 6.447507953340403, + "grad_norm": 0.024030832573771477, + "learning_rate": 3.072300455903011e-06, + "loss": 0.00040283, + "memory(GiB)": 26.31, + "step": 6080, + "train_speed(iter/s)": 0.580555 + }, + { + "acc": 0.99975195, + "epoch": 6.452810180275716, + "grad_norm": 0.03808549419045448, + "learning_rate": 3.0642149895925435e-06, + "loss": 0.00141243, + "memory(GiB)": 26.31, + "step": 6085, + "train_speed(iter/s)": 0.580556 + }, + { + "acc": 0.99953356, + "epoch": 6.458112407211028, + "grad_norm": 0.032950401306152344, + "learning_rate": 3.056135476980193e-06, + "loss": 0.00124777, + "memory(GiB)": 26.31, + "step": 6090, + "train_speed(iter/s)": 0.580558 + }, + { + "acc": 0.99964943, + "epoch": 6.463414634146342, + "grad_norm": 0.09107203036546707, + "learning_rate": 3.0480619429088845e-06, + "loss": 0.00064693, + "memory(GiB)": 26.31, + "step": 6095, + "train_speed(iter/s)": 0.580561 + }, + { + "acc": 0.99975853, + "epoch": 6.468716861081655, + "grad_norm": 0.4441612660884857, + "learning_rate": 3.039994412203155e-06, + "loss": 0.00034327, + "memory(GiB)": 26.31, + "step": 6100, + "train_speed(iter/s)": 0.580563 + }, + { + "acc": 0.99963551, + "epoch": 6.474019088016967, + "grad_norm": 0.2487131804227829, + "learning_rate": 3.0319329096690882e-06, + "loss": 0.00101695, + "memory(GiB)": 26.31, + "step": 6105, + "train_speed(iter/s)": 0.580564 + }, + { + "acc": 0.99965181, + "epoch": 6.47932131495228, + "grad_norm": 0.5057806968688965, + "learning_rate": 3.023877460094226e-06, + "loss": 0.00179216, + "memory(GiB)": 26.31, + "step": 6110, + "train_speed(iter/s)": 0.580565 + }, + { + "acc": 0.99960289, + "epoch": 6.484623541887593, + "grad_norm": 0.1770421415567398, + "learning_rate": 3.0158280882475062e-06, + "loss": 0.00084595, + "memory(GiB)": 26.31, + "step": 6115, + "train_speed(iter/s)": 0.580567 + }, + { + "acc": 0.99987621, + "epoch": 6.489925768822905, + "grad_norm": 0.013583851978182793, + "learning_rate": 3.0077848188791724e-06, + "loss": 0.00031157, + "memory(GiB)": 26.31, + "step": 6120, + "train_speed(iter/s)": 0.580567 + }, + { + "acc": 0.99923391, + "epoch": 6.495227995758219, + "grad_norm": 0.10873568058013916, + "learning_rate": 2.999747676720706e-06, + "loss": 0.00287378, + "memory(GiB)": 26.31, + "step": 6125, + "train_speed(iter/s)": 0.580569 + }, + { + "acc": 0.9996233, + "epoch": 6.5005302226935315, + "grad_norm": 0.021153470501303673, + "learning_rate": 2.991716686484751e-06, + "loss": 0.00045944, + "memory(GiB)": 26.31, + "step": 6130, + "train_speed(iter/s)": 0.580571 + }, + { + "acc": 0.99950199, + "epoch": 6.505832449628844, + "grad_norm": 0.4688558876514435, + "learning_rate": 2.9836918728650304e-06, + "loss": 0.00141591, + "memory(GiB)": 26.31, + "step": 6135, + "train_speed(iter/s)": 0.580573 + }, + { + "acc": 0.99975605, + "epoch": 6.511134676564157, + "grad_norm": 0.511949896812439, + "learning_rate": 2.97567326053628e-06, + "loss": 0.00100874, + "memory(GiB)": 26.31, + "step": 6140, + "train_speed(iter/s)": 0.580578 + }, + { + "acc": 1.0, + "epoch": 6.51643690349947, + "grad_norm": 0.07330206036567688, + "learning_rate": 2.967660874154166e-06, + "loss": 0.00015084, + "memory(GiB)": 26.31, + "step": 6145, + "train_speed(iter/s)": 0.58058 + }, + { + "acc": 0.99946985, + "epoch": 6.521739130434782, + "grad_norm": 0.05954563617706299, + "learning_rate": 2.9596547383552127e-06, + "loss": 0.00098971, + "memory(GiB)": 26.31, + "step": 6150, + "train_speed(iter/s)": 0.580583 + }, + { + "acc": 0.9996397, + "epoch": 6.527041357370095, + "grad_norm": 0.08538658916950226, + "learning_rate": 2.9516548777567216e-06, + "loss": 0.00134288, + "memory(GiB)": 26.31, + "step": 6155, + "train_speed(iter/s)": 0.580586 + }, + { + "acc": 0.99972944, + "epoch": 6.5323435843054085, + "grad_norm": 0.03336101025342941, + "learning_rate": 2.9436613169567006e-06, + "loss": 0.00101618, + "memory(GiB)": 26.31, + "step": 6160, + "train_speed(iter/s)": 0.580587 + }, + { + "acc": 0.9998641, + "epoch": 6.537645811240721, + "grad_norm": 0.1260402351617813, + "learning_rate": 2.9356740805337897e-06, + "loss": 0.00048851, + "memory(GiB)": 26.31, + "step": 6165, + "train_speed(iter/s)": 0.58059 + }, + { + "acc": 0.99977436, + "epoch": 6.542948038176034, + "grad_norm": 0.1818159520626068, + "learning_rate": 2.9276931930471765e-06, + "loss": 0.00140418, + "memory(GiB)": 26.31, + "step": 6170, + "train_speed(iter/s)": 0.580592 + }, + { + "acc": 0.99977322, + "epoch": 6.548250265111347, + "grad_norm": 0.5192388296127319, + "learning_rate": 2.919718679036535e-06, + "loss": 0.00106864, + "memory(GiB)": 26.31, + "step": 6175, + "train_speed(iter/s)": 0.580594 + }, + { + "acc": 0.99987564, + "epoch": 6.553552492046659, + "grad_norm": 0.46558713912963867, + "learning_rate": 2.9117505630219366e-06, + "loss": 0.00072226, + "memory(GiB)": 26.31, + "step": 6180, + "train_speed(iter/s)": 0.580594 + }, + { + "acc": 0.99974155, + "epoch": 6.558854718981973, + "grad_norm": 0.03769663721323013, + "learning_rate": 2.903788869503782e-06, + "loss": 0.00071387, + "memory(GiB)": 26.31, + "step": 6185, + "train_speed(iter/s)": 0.580596 + }, + { + "acc": 1.0, + "epoch": 6.5641569459172855, + "grad_norm": 0.009774613194167614, + "learning_rate": 2.8958336229627208e-06, + "loss": 0.00032132, + "memory(GiB)": 26.31, + "step": 6190, + "train_speed(iter/s)": 0.580602 + }, + { + "acc": 0.99953594, + "epoch": 6.569459172852598, + "grad_norm": 0.061937376856803894, + "learning_rate": 2.8878848478595844e-06, + "loss": 0.00216506, + "memory(GiB)": 26.31, + "step": 6195, + "train_speed(iter/s)": 0.580604 + }, + { + "acc": 0.99976463, + "epoch": 6.574761399787911, + "grad_norm": 0.03381747752428055, + "learning_rate": 2.8799425686353022e-06, + "loss": 0.00101449, + "memory(GiB)": 26.31, + "step": 6200, + "train_speed(iter/s)": 0.580608 + }, + { + "acc": 0.99985552, + "epoch": 6.5800636267232235, + "grad_norm": 0.02245154045522213, + "learning_rate": 2.8720068097108316e-06, + "loss": 0.00081072, + "memory(GiB)": 26.31, + "step": 6205, + "train_speed(iter/s)": 0.580613 + }, + { + "acc": 0.99988422, + "epoch": 6.585365853658536, + "grad_norm": 0.040584757924079895, + "learning_rate": 2.8640775954870803e-06, + "loss": 0.00035723, + "memory(GiB)": 26.31, + "step": 6210, + "train_speed(iter/s)": 0.580614 + }, + { + "acc": 1.0, + "epoch": 6.59066808059385, + "grad_norm": 0.02291068434715271, + "learning_rate": 2.856154950344833e-06, + "loss": 0.00016179, + "memory(GiB)": 26.31, + "step": 6215, + "train_speed(iter/s)": 0.580616 + }, + { + "acc": 0.99978943, + "epoch": 6.5959703075291625, + "grad_norm": 0.16216276586055756, + "learning_rate": 2.8482388986446763e-06, + "loss": 0.00112295, + "memory(GiB)": 26.31, + "step": 6220, + "train_speed(iter/s)": 0.580618 + }, + { + "acc": 0.99913769, + "epoch": 6.601272534464475, + "grad_norm": 0.33439528942108154, + "learning_rate": 2.84032946472692e-06, + "loss": 0.00229247, + "memory(GiB)": 26.31, + "step": 6225, + "train_speed(iter/s)": 0.580622 + }, + { + "acc": 0.99950619, + "epoch": 6.606574761399788, + "grad_norm": 0.15778516232967377, + "learning_rate": 2.8324266729115323e-06, + "loss": 0.00081515, + "memory(GiB)": 26.31, + "step": 6230, + "train_speed(iter/s)": 0.580624 + }, + { + "acc": 0.99988585, + "epoch": 6.6118769883351005, + "grad_norm": 0.056077904999256134, + "learning_rate": 2.824530547498052e-06, + "loss": 0.00068123, + "memory(GiB)": 26.31, + "step": 6235, + "train_speed(iter/s)": 0.580629 + }, + { + "acc": 0.99976425, + "epoch": 6.617179215270413, + "grad_norm": 0.3225567042827606, + "learning_rate": 2.816641112765523e-06, + "loss": 0.00101443, + "memory(GiB)": 26.31, + "step": 6240, + "train_speed(iter/s)": 0.580632 + }, + { + "acc": 0.99988632, + "epoch": 6.622481442205727, + "grad_norm": 0.33098331093788147, + "learning_rate": 2.8087583929724137e-06, + "loss": 0.00050625, + "memory(GiB)": 26.31, + "step": 6245, + "train_speed(iter/s)": 0.580633 + }, + { + "acc": 0.99946451, + "epoch": 6.6277836691410394, + "grad_norm": 0.08109508454799652, + "learning_rate": 2.80088241235655e-06, + "loss": 0.00112214, + "memory(GiB)": 26.31, + "step": 6250, + "train_speed(iter/s)": 0.580634 + }, + { + "acc": 0.99975662, + "epoch": 6.633085896076352, + "grad_norm": 0.02807941474020481, + "learning_rate": 2.793013195135032e-06, + "loss": 0.00050048, + "memory(GiB)": 26.31, + "step": 6255, + "train_speed(iter/s)": 0.58064 + }, + { + "acc": 0.99951725, + "epoch": 6.638388123011665, + "grad_norm": 0.6170868277549744, + "learning_rate": 2.7851507655041646e-06, + "loss": 0.00163178, + "memory(GiB)": 26.31, + "step": 6260, + "train_speed(iter/s)": 0.580648 + }, + { + "acc": 0.99974747, + "epoch": 6.6436903499469775, + "grad_norm": 0.03456702455878258, + "learning_rate": 2.777295147639385e-06, + "loss": 0.00059733, + "memory(GiB)": 26.31, + "step": 6265, + "train_speed(iter/s)": 0.580653 + }, + { + "acc": 0.99989538, + "epoch": 6.648992576882291, + "grad_norm": 0.032612890005111694, + "learning_rate": 2.7694463656951816e-06, + "loss": 0.00089563, + "memory(GiB)": 26.31, + "step": 6270, + "train_speed(iter/s)": 0.580658 + }, + { + "acc": 0.99964666, + "epoch": 6.654294803817604, + "grad_norm": 0.13008488714694977, + "learning_rate": 2.761604443805025e-06, + "loss": 0.00074435, + "memory(GiB)": 26.31, + "step": 6275, + "train_speed(iter/s)": 0.580662 + }, + { + "acc": 1.0, + "epoch": 6.659597030752916, + "grad_norm": 0.03558952361345291, + "learning_rate": 2.7537694060812935e-06, + "loss": 0.00016105, + "memory(GiB)": 26.31, + "step": 6280, + "train_speed(iter/s)": 0.580663 + }, + { + "acc": 0.99962788, + "epoch": 6.664899257688229, + "grad_norm": 0.5213852524757385, + "learning_rate": 2.7459412766151992e-06, + "loss": 0.0011747, + "memory(GiB)": 26.31, + "step": 6285, + "train_speed(iter/s)": 0.580668 + }, + { + "acc": 0.99911413, + "epoch": 6.670201484623542, + "grad_norm": 0.12430860847234726, + "learning_rate": 2.738120079476708e-06, + "loss": 0.00155401, + "memory(GiB)": 26.31, + "step": 6290, + "train_speed(iter/s)": 0.580669 + }, + { + "acc": 0.99988937, + "epoch": 6.6755037115588545, + "grad_norm": 0.04833541810512543, + "learning_rate": 2.730305838714476e-06, + "loss": 0.00025307, + "memory(GiB)": 26.31, + "step": 6295, + "train_speed(iter/s)": 0.58067 + }, + { + "acc": 0.99987307, + "epoch": 6.680805938494167, + "grad_norm": 0.21674484014511108, + "learning_rate": 2.7224985783557656e-06, + "loss": 0.00076243, + "memory(GiB)": 26.31, + "step": 6300, + "train_speed(iter/s)": 0.580672 + }, + { + "acc": 0.99974499, + "epoch": 6.686108165429481, + "grad_norm": 0.062140047550201416, + "learning_rate": 2.7146983224063777e-06, + "loss": 0.00059126, + "memory(GiB)": 26.31, + "step": 6305, + "train_speed(iter/s)": 0.580674 + }, + { + "acc": 0.99949837, + "epoch": 6.691410392364793, + "grad_norm": 0.5021100044250488, + "learning_rate": 2.7069050948505743e-06, + "loss": 0.00240989, + "memory(GiB)": 26.31, + "step": 6310, + "train_speed(iter/s)": 0.580675 + }, + { + "acc": 0.99959183, + "epoch": 6.696712619300106, + "grad_norm": 0.02061682753264904, + "learning_rate": 2.6991189196510078e-06, + "loss": 0.00075686, + "memory(GiB)": 26.31, + "step": 6315, + "train_speed(iter/s)": 0.580677 + }, + { + "acc": 0.99961433, + "epoch": 6.702014846235419, + "grad_norm": 0.011941331438720226, + "learning_rate": 2.691339820748646e-06, + "loss": 0.00153979, + "memory(GiB)": 26.31, + "step": 6320, + "train_speed(iter/s)": 0.580678 + }, + { + "acc": 0.9996357, + "epoch": 6.7073170731707314, + "grad_norm": 0.03024384006857872, + "learning_rate": 2.683567822062698e-06, + "loss": 0.00087377, + "memory(GiB)": 26.31, + "step": 6325, + "train_speed(iter/s)": 0.58068 + }, + { + "acc": 0.99974327, + "epoch": 6.712619300106045, + "grad_norm": 0.0527118556201458, + "learning_rate": 2.6758029474905423e-06, + "loss": 0.00085505, + "memory(GiB)": 26.31, + "step": 6330, + "train_speed(iter/s)": 0.580683 + }, + { + "acc": 0.99950848, + "epoch": 6.717921527041358, + "grad_norm": 0.009388357400894165, + "learning_rate": 2.6680452209076494e-06, + "loss": 0.00144344, + "memory(GiB)": 26.31, + "step": 6335, + "train_speed(iter/s)": 0.580686 + }, + { + "acc": 0.99961433, + "epoch": 6.72322375397667, + "grad_norm": 0.1637595295906067, + "learning_rate": 2.6602946661675144e-06, + "loss": 0.00094269, + "memory(GiB)": 26.31, + "step": 6340, + "train_speed(iter/s)": 0.580692 + }, + { + "acc": 0.99936409, + "epoch": 6.728525980911983, + "grad_norm": 0.3204542100429535, + "learning_rate": 2.6525513071015786e-06, + "loss": 0.0012643, + "memory(GiB)": 26.31, + "step": 6345, + "train_speed(iter/s)": 0.580696 + }, + { + "acc": 0.99953194, + "epoch": 6.733828207847296, + "grad_norm": 0.26317375898361206, + "learning_rate": 2.6448151675191585e-06, + "loss": 0.00247378, + "memory(GiB)": 26.31, + "step": 6350, + "train_speed(iter/s)": 0.580697 + }, + { + "acc": 0.99960594, + "epoch": 6.739130434782608, + "grad_norm": 0.04262370988726616, + "learning_rate": 2.6370862712073693e-06, + "loss": 0.00100168, + "memory(GiB)": 26.31, + "step": 6355, + "train_speed(iter/s)": 0.580701 + }, + { + "acc": 0.99975929, + "epoch": 6.744432661717921, + "grad_norm": 0.030493035912513733, + "learning_rate": 2.629364641931062e-06, + "loss": 0.00062508, + "memory(GiB)": 26.31, + "step": 6360, + "train_speed(iter/s)": 0.580702 + }, + { + "acc": 0.99924755, + "epoch": 6.749734888653235, + "grad_norm": 0.13393616676330566, + "learning_rate": 2.6216503034327344e-06, + "loss": 0.00160743, + "memory(GiB)": 26.31, + "step": 6365, + "train_speed(iter/s)": 0.580705 + }, + { + "acc": 0.9998908, + "epoch": 6.755037115588547, + "grad_norm": 0.07078687846660614, + "learning_rate": 2.613943279432472e-06, + "loss": 0.00053432, + "memory(GiB)": 26.31, + "step": 6370, + "train_speed(iter/s)": 0.580709 + }, + { + "acc": 0.99951706, + "epoch": 6.76033934252386, + "grad_norm": 0.05957571789622307, + "learning_rate": 2.606243593627868e-06, + "loss": 0.00148512, + "memory(GiB)": 26.31, + "step": 6375, + "train_speed(iter/s)": 0.580711 + }, + { + "acc": 0.9995348, + "epoch": 6.765641569459173, + "grad_norm": 0.12280748784542084, + "learning_rate": 2.598551269693951e-06, + "loss": 0.00106705, + "memory(GiB)": 26.31, + "step": 6380, + "train_speed(iter/s)": 0.580712 + }, + { + "acc": 0.99965029, + "epoch": 6.770943796394485, + "grad_norm": 0.6474934816360474, + "learning_rate": 2.590866331283114e-06, + "loss": 0.00132872, + "memory(GiB)": 26.31, + "step": 6385, + "train_speed(iter/s)": 0.580718 + }, + { + "acc": 1.0, + "epoch": 6.776246023329799, + "grad_norm": 0.0352616161108017, + "learning_rate": 2.58318880202504e-06, + "loss": 0.00019549, + "memory(GiB)": 26.31, + "step": 6390, + "train_speed(iter/s)": 0.580719 + }, + { + "acc": 0.99986486, + "epoch": 6.781548250265112, + "grad_norm": 0.0694318413734436, + "learning_rate": 2.5755187055266353e-06, + "loss": 0.000316, + "memory(GiB)": 26.31, + "step": 6395, + "train_speed(iter/s)": 0.580724 + }, + { + "acc": 0.99922085, + "epoch": 6.786850477200424, + "grad_norm": 0.0368763767182827, + "learning_rate": 2.567856065371946e-06, + "loss": 0.00130442, + "memory(GiB)": 26.31, + "step": 6400, + "train_speed(iter/s)": 0.580728 + }, + { + "acc": 0.99974422, + "epoch": 6.792152704135737, + "grad_norm": 0.32792964577674866, + "learning_rate": 2.5602009051220933e-06, + "loss": 0.00090845, + "memory(GiB)": 26.31, + "step": 6405, + "train_speed(iter/s)": 0.580729 + }, + { + "acc": 0.99988422, + "epoch": 6.79745493107105, + "grad_norm": 0.10647040605545044, + "learning_rate": 2.5525532483152006e-06, + "loss": 0.00050243, + "memory(GiB)": 26.31, + "step": 6410, + "train_speed(iter/s)": 0.580731 + }, + { + "acc": 1.0, + "epoch": 6.802757158006362, + "grad_norm": 0.005562162026762962, + "learning_rate": 2.5449131184663174e-06, + "loss": 0.00012104, + "memory(GiB)": 26.31, + "step": 6415, + "train_speed(iter/s)": 0.580734 + }, + { + "acc": 0.99987679, + "epoch": 6.808059384941675, + "grad_norm": 0.031007250770926476, + "learning_rate": 2.5372805390673477e-06, + "loss": 0.0004752, + "memory(GiB)": 26.31, + "step": 6420, + "train_speed(iter/s)": 0.580736 + }, + { + "acc": 0.99961834, + "epoch": 6.813361611876989, + "grad_norm": 0.0886315107345581, + "learning_rate": 2.529655533586987e-06, + "loss": 0.0014479, + "memory(GiB)": 26.31, + "step": 6425, + "train_speed(iter/s)": 0.580737 + }, + { + "acc": 0.99987803, + "epoch": 6.818663838812301, + "grad_norm": 0.01469503901898861, + "learning_rate": 2.522038125470636e-06, + "loss": 0.00058394, + "memory(GiB)": 26.31, + "step": 6430, + "train_speed(iter/s)": 0.580739 + }, + { + "acc": 0.9995821, + "epoch": 6.823966065747614, + "grad_norm": 0.09232966601848602, + "learning_rate": 2.5144283381403357e-06, + "loss": 0.00080152, + "memory(GiB)": 26.31, + "step": 6435, + "train_speed(iter/s)": 0.580741 + }, + { + "acc": 1.0, + "epoch": 6.829268292682927, + "grad_norm": 0.032236695289611816, + "learning_rate": 2.5068261949946947e-06, + "loss": 0.00030665, + "memory(GiB)": 26.31, + "step": 6440, + "train_speed(iter/s)": 0.580742 + }, + { + "acc": 0.99977741, + "epoch": 6.834570519618239, + "grad_norm": 0.011980585753917694, + "learning_rate": 2.49923171940882e-06, + "loss": 0.00056685, + "memory(GiB)": 26.31, + "step": 6445, + "train_speed(iter/s)": 0.580747 + }, + { + "acc": 0.99964066, + "epoch": 6.839872746553553, + "grad_norm": 0.2035069316625595, + "learning_rate": 2.491644934734238e-06, + "loss": 0.00075537, + "memory(GiB)": 26.31, + "step": 6450, + "train_speed(iter/s)": 0.580748 + }, + { + "acc": 0.99988585, + "epoch": 6.845174973488866, + "grad_norm": 0.008113852702081203, + "learning_rate": 2.4840658642988314e-06, + "loss": 0.00041419, + "memory(GiB)": 26.31, + "step": 6455, + "train_speed(iter/s)": 0.58075 + }, + { + "acc": 0.99985638, + "epoch": 6.850477200424178, + "grad_norm": 0.04093657806515694, + "learning_rate": 2.476494531406759e-06, + "loss": 0.00032245, + "memory(GiB)": 26.31, + "step": 6460, + "train_speed(iter/s)": 0.580754 + }, + { + "acc": 0.99964466, + "epoch": 6.855779427359491, + "grad_norm": 0.2267787754535675, + "learning_rate": 2.468930959338392e-06, + "loss": 0.00157925, + "memory(GiB)": 26.31, + "step": 6465, + "train_speed(iter/s)": 0.580755 + }, + { + "acc": 0.99977474, + "epoch": 6.861081654294804, + "grad_norm": 0.025810925289988518, + "learning_rate": 2.4613751713502355e-06, + "loss": 0.00087813, + "memory(GiB)": 26.31, + "step": 6470, + "train_speed(iter/s)": 0.58076 + }, + { + "acc": 0.99976044, + "epoch": 6.866383881230116, + "grad_norm": 0.06630630046129227, + "learning_rate": 2.4538271906748628e-06, + "loss": 0.00129472, + "memory(GiB)": 26.31, + "step": 6475, + "train_speed(iter/s)": 0.580761 + }, + { + "acc": 0.99987984, + "epoch": 6.871686108165429, + "grad_norm": 0.162663072347641, + "learning_rate": 2.446287040520838e-06, + "loss": 0.00037377, + "memory(GiB)": 26.31, + "step": 6480, + "train_speed(iter/s)": 0.580763 + }, + { + "acc": 0.99947395, + "epoch": 6.876988335100743, + "grad_norm": 0.961998462677002, + "learning_rate": 2.4387547440726496e-06, + "loss": 0.00098506, + "memory(GiB)": 26.31, + "step": 6485, + "train_speed(iter/s)": 0.580764 + }, + { + "acc": 0.99952316, + "epoch": 6.882290562036055, + "grad_norm": 0.5905699729919434, + "learning_rate": 2.431230324490641e-06, + "loss": 0.00158523, + "memory(GiB)": 26.31, + "step": 6490, + "train_speed(iter/s)": 0.580766 + }, + { + "acc": 0.99961891, + "epoch": 6.887592788971368, + "grad_norm": 0.2418178915977478, + "learning_rate": 2.42371380491093e-06, + "loss": 0.00067505, + "memory(GiB)": 26.31, + "step": 6495, + "train_speed(iter/s)": 0.580771 + }, + { + "acc": 0.99974556, + "epoch": 6.892895015906681, + "grad_norm": 0.6824781894683838, + "learning_rate": 2.4162052084453438e-06, + "loss": 0.00131153, + "memory(GiB)": 26.31, + "step": 6500, + "train_speed(iter/s)": 0.580775 + }, + { + "acc": 0.99976358, + "epoch": 6.898197242841993, + "grad_norm": 0.055095236748456955, + "learning_rate": 2.408704558181354e-06, + "loss": 0.00070585, + "memory(GiB)": 26.31, + "step": 6505, + "train_speed(iter/s)": 0.580781 + }, + { + "acc": 0.99989271, + "epoch": 6.903499469777307, + "grad_norm": 0.035954736173152924, + "learning_rate": 2.4012118771819924e-06, + "loss": 0.00029226, + "memory(GiB)": 26.31, + "step": 6510, + "train_speed(iter/s)": 0.580784 + }, + { + "acc": 1.0, + "epoch": 6.90880169671262, + "grad_norm": 0.03536577895283699, + "learning_rate": 2.3937271884857895e-06, + "loss": 0.00017438, + "memory(GiB)": 26.31, + "step": 6515, + "train_speed(iter/s)": 0.580786 + }, + { + "acc": 0.9996439, + "epoch": 6.914103923647932, + "grad_norm": 0.025644930079579353, + "learning_rate": 2.3862505151067004e-06, + "loss": 0.00101558, + "memory(GiB)": 26.31, + "step": 6520, + "train_speed(iter/s)": 0.580788 + }, + { + "acc": 0.99974003, + "epoch": 6.919406150583245, + "grad_norm": 0.014122070744633675, + "learning_rate": 2.378781880034036e-06, + "loss": 0.00101173, + "memory(GiB)": 26.31, + "step": 6525, + "train_speed(iter/s)": 0.580792 + }, + { + "acc": 0.99975986, + "epoch": 6.924708377518558, + "grad_norm": 0.35527971386909485, + "learning_rate": 2.3713213062323886e-06, + "loss": 0.00107869, + "memory(GiB)": 26.31, + "step": 6530, + "train_speed(iter/s)": 0.580799 + }, + { + "acc": 0.99964218, + "epoch": 6.93001060445387, + "grad_norm": 0.8211327791213989, + "learning_rate": 2.363868816641566e-06, + "loss": 0.00098987, + "memory(GiB)": 26.31, + "step": 6535, + "train_speed(iter/s)": 0.580804 + }, + { + "acc": 0.99974022, + "epoch": 6.935312831389183, + "grad_norm": 0.1665266454219818, + "learning_rate": 2.3564244341765173e-06, + "loss": 0.00067698, + "memory(GiB)": 26.31, + "step": 6540, + "train_speed(iter/s)": 0.580808 + }, + { + "acc": 1.0, + "epoch": 6.9406150583244965, + "grad_norm": 0.0726097822189331, + "learning_rate": 2.3489881817272647e-06, + "loss": 0.00023324, + "memory(GiB)": 26.31, + "step": 6545, + "train_speed(iter/s)": 0.580812 + }, + { + "acc": 0.99985952, + "epoch": 6.945917285259809, + "grad_norm": 0.018996795639395714, + "learning_rate": 2.3415600821588293e-06, + "loss": 0.00049761, + "memory(GiB)": 26.31, + "step": 6550, + "train_speed(iter/s)": 0.580813 + }, + { + "acc": 0.99961214, + "epoch": 6.951219512195122, + "grad_norm": 0.3382207155227661, + "learning_rate": 2.33414015831117e-06, + "loss": 0.00079759, + "memory(GiB)": 26.31, + "step": 6555, + "train_speed(iter/s)": 0.580816 + }, + { + "acc": 1.0, + "epoch": 6.956521739130435, + "grad_norm": 0.018769921734929085, + "learning_rate": 2.3267284329991015e-06, + "loss": 0.00018237, + "memory(GiB)": 26.31, + "step": 6560, + "train_speed(iter/s)": 0.580823 + }, + { + "acc": 1.0, + "epoch": 6.961823966065747, + "grad_norm": 0.0020895751658827066, + "learning_rate": 2.3193249290122304e-06, + "loss": 4.325e-05, + "memory(GiB)": 26.31, + "step": 6565, + "train_speed(iter/s)": 0.580824 + }, + { + "acc": 0.99958591, + "epoch": 6.967126193001061, + "grad_norm": 0.030543280765414238, + "learning_rate": 2.3119296691148854e-06, + "loss": 0.00201222, + "memory(GiB)": 26.31, + "step": 6570, + "train_speed(iter/s)": 0.580829 + }, + { + "acc": 1.0, + "epoch": 6.9724284199363735, + "grad_norm": 0.012603729963302612, + "learning_rate": 2.3045426760460463e-06, + "loss": 0.00011642, + "memory(GiB)": 26.31, + "step": 6575, + "train_speed(iter/s)": 0.58083 + }, + { + "acc": 0.99963036, + "epoch": 6.977730646871686, + "grad_norm": 0.09143660217523575, + "learning_rate": 2.2971639725192722e-06, + "loss": 0.00087152, + "memory(GiB)": 26.31, + "step": 6580, + "train_speed(iter/s)": 0.580831 + }, + { + "acc": 1.0, + "epoch": 6.983032873806999, + "grad_norm": 0.02645263820886612, + "learning_rate": 2.2897935812226367e-06, + "loss": 0.00021495, + "memory(GiB)": 26.31, + "step": 6585, + "train_speed(iter/s)": 0.580836 + }, + { + "acc": 0.99941025, + "epoch": 6.988335100742312, + "grad_norm": 1.0920523405075073, + "learning_rate": 2.2824315248186522e-06, + "loss": 0.00135035, + "memory(GiB)": 26.31, + "step": 6590, + "train_speed(iter/s)": 0.580838 + }, + { + "acc": 0.99966278, + "epoch": 6.993637327677624, + "grad_norm": 0.0877826139330864, + "learning_rate": 2.2750778259442033e-06, + "loss": 0.00058898, + "memory(GiB)": 26.31, + "step": 6595, + "train_speed(iter/s)": 0.580838 + }, + { + "acc": 0.99973173, + "epoch": 6.998939554612938, + "grad_norm": 0.10175105929374695, + "learning_rate": 2.267732507210478e-06, + "loss": 0.00033401, + "memory(GiB)": 26.31, + "step": 6600, + "train_speed(iter/s)": 0.580839 + }, + { + "acc": 1.0, + "epoch": 7.0042417815482505, + "grad_norm": 0.03302436321973801, + "learning_rate": 2.2603955912028968e-06, + "loss": 0.00014786, + "memory(GiB)": 26.31, + "step": 6605, + "train_speed(iter/s)": 0.580791 + }, + { + "acc": 0.99987745, + "epoch": 7.009544008483563, + "grad_norm": 0.049822960048913956, + "learning_rate": 2.2530671004810408e-06, + "loss": 0.00034393, + "memory(GiB)": 26.31, + "step": 6610, + "train_speed(iter/s)": 0.580792 + }, + { + "acc": 0.99964399, + "epoch": 7.014846235418876, + "grad_norm": 0.014102225191891193, + "learning_rate": 2.24574705757859e-06, + "loss": 0.0008734, + "memory(GiB)": 26.31, + "step": 6615, + "train_speed(iter/s)": 0.580794 + }, + { + "acc": 0.99962807, + "epoch": 7.0201484623541885, + "grad_norm": 0.02806282229721546, + "learning_rate": 2.238435485003244e-06, + "loss": 0.00088563, + "memory(GiB)": 26.31, + "step": 6620, + "train_speed(iter/s)": 0.580795 + }, + { + "acc": 0.99974384, + "epoch": 7.025450689289501, + "grad_norm": 0.05678127333521843, + "learning_rate": 2.231132405236663e-06, + "loss": 0.00090697, + "memory(GiB)": 26.31, + "step": 6625, + "train_speed(iter/s)": 0.580799 + }, + { + "acc": 0.99975758, + "epoch": 7.030752916224815, + "grad_norm": 0.0719962865114212, + "learning_rate": 2.223837840734388e-06, + "loss": 0.00198463, + "memory(GiB)": 26.31, + "step": 6630, + "train_speed(iter/s)": 0.5808 + }, + { + "acc": 1.0, + "epoch": 7.0360551431601275, + "grad_norm": 0.022466253489255905, + "learning_rate": 2.2165518139257804e-06, + "loss": 4.929e-05, + "memory(GiB)": 26.31, + "step": 6635, + "train_speed(iter/s)": 0.580802 + }, + { + "acc": 0.9998724, + "epoch": 7.04135737009544, + "grad_norm": 0.05142033100128174, + "learning_rate": 2.2092743472139495e-06, + "loss": 0.00026469, + "memory(GiB)": 26.31, + "step": 6640, + "train_speed(iter/s)": 0.580804 + }, + { + "acc": 0.9998826, + "epoch": 7.046659597030753, + "grad_norm": 0.006165654398500919, + "learning_rate": 2.2020054629756837e-06, + "loss": 0.00029751, + "memory(GiB)": 26.31, + "step": 6645, + "train_speed(iter/s)": 0.580805 + }, + { + "acc": 0.99987984, + "epoch": 7.0519618239660655, + "grad_norm": 0.14492355287075043, + "learning_rate": 2.1947451835613813e-06, + "loss": 0.00028882, + "memory(GiB)": 26.31, + "step": 6650, + "train_speed(iter/s)": 0.580811 + }, + { + "acc": 0.99928799, + "epoch": 7.057264050901378, + "grad_norm": 0.21041271090507507, + "learning_rate": 2.1874935312949828e-06, + "loss": 0.00141331, + "memory(GiB)": 26.31, + "step": 6655, + "train_speed(iter/s)": 0.580812 + }, + { + "acc": 0.99960728, + "epoch": 7.062566277836692, + "grad_norm": 0.048662737011909485, + "learning_rate": 2.180250528473903e-06, + "loss": 0.00084743, + "memory(GiB)": 26.31, + "step": 6660, + "train_speed(iter/s)": 0.580812 + }, + { + "acc": 0.99976778, + "epoch": 7.0678685047720045, + "grad_norm": 0.033747218549251556, + "learning_rate": 2.17301619736896e-06, + "loss": 0.00140712, + "memory(GiB)": 26.31, + "step": 6665, + "train_speed(iter/s)": 0.580813 + }, + { + "acc": 0.99985123, + "epoch": 7.073170731707317, + "grad_norm": 0.030595524236559868, + "learning_rate": 2.1657905602243093e-06, + "loss": 0.00018092, + "memory(GiB)": 26.31, + "step": 6670, + "train_speed(iter/s)": 0.580819 + }, + { + "acc": 0.99975853, + "epoch": 7.07847295864263, + "grad_norm": 0.24107412993907928, + "learning_rate": 2.1585736392573724e-06, + "loss": 0.0007273, + "memory(GiB)": 26.31, + "step": 6675, + "train_speed(iter/s)": 0.58082 + }, + { + "acc": 0.99972353, + "epoch": 7.0837751855779425, + "grad_norm": 0.01268590334802866, + "learning_rate": 2.1513654566587705e-06, + "loss": 0.00061308, + "memory(GiB)": 26.31, + "step": 6680, + "train_speed(iter/s)": 0.580821 + }, + { + "acc": 0.99951534, + "epoch": 7.089077412513255, + "grad_norm": 0.0627276599407196, + "learning_rate": 2.14416603459226e-06, + "loss": 0.00072982, + "memory(GiB)": 26.31, + "step": 6685, + "train_speed(iter/s)": 0.580822 + }, + { + "acc": 0.99988317, + "epoch": 7.094379639448569, + "grad_norm": 0.3131565451622009, + "learning_rate": 2.1369753951946548e-06, + "loss": 0.00025356, + "memory(GiB)": 26.31, + "step": 6690, + "train_speed(iter/s)": 0.580825 + }, + { + "acc": 0.99987745, + "epoch": 7.099681866383881, + "grad_norm": 0.020614469423890114, + "learning_rate": 2.129793560575767e-06, + "loss": 0.00057884, + "memory(GiB)": 26.31, + "step": 6695, + "train_speed(iter/s)": 0.580828 + }, + { + "acc": 0.99987869, + "epoch": 7.104984093319194, + "grad_norm": 0.041664209216833115, + "learning_rate": 2.122620552818335e-06, + "loss": 0.00065411, + "memory(GiB)": 26.31, + "step": 6700, + "train_speed(iter/s)": 0.580832 + }, + { + "acc": 1.0, + "epoch": 7.110286320254507, + "grad_norm": 0.025744246318936348, + "learning_rate": 2.115456393977956e-06, + "loss": 8.391e-05, + "memory(GiB)": 26.31, + "step": 6705, + "train_speed(iter/s)": 0.580832 + }, + { + "acc": 0.99976645, + "epoch": 7.1155885471898195, + "grad_norm": 0.06860660761594772, + "learning_rate": 2.1083011060830183e-06, + "loss": 0.00106212, + "memory(GiB)": 26.31, + "step": 6710, + "train_speed(iter/s)": 0.580833 + }, + { + "acc": 0.99988155, + "epoch": 7.120890774125132, + "grad_norm": 0.14570750296115875, + "learning_rate": 2.101154711134634e-06, + "loss": 0.00041477, + "memory(GiB)": 26.31, + "step": 6715, + "train_speed(iter/s)": 0.580835 + }, + { + "acc": 0.99988155, + "epoch": 7.126193001060446, + "grad_norm": 0.1013423353433609, + "learning_rate": 2.0940172311065734e-06, + "loss": 0.00021523, + "memory(GiB)": 26.31, + "step": 6720, + "train_speed(iter/s)": 0.580836 + }, + { + "acc": 0.99975052, + "epoch": 7.131495227995758, + "grad_norm": 0.10225103795528412, + "learning_rate": 2.086888687945192e-06, + "loss": 0.00051952, + "memory(GiB)": 26.31, + "step": 6725, + "train_speed(iter/s)": 0.580836 + }, + { + "acc": 0.99963379, + "epoch": 7.136797454931071, + "grad_norm": 0.03145146742463112, + "learning_rate": 2.079769103569367e-06, + "loss": 0.00069922, + "memory(GiB)": 26.31, + "step": 6730, + "train_speed(iter/s)": 0.580837 + }, + { + "acc": 0.99976254, + "epoch": 7.142099681866384, + "grad_norm": 0.009951247833669186, + "learning_rate": 2.0726584998704293e-06, + "loss": 0.00049842, + "memory(GiB)": 26.31, + "step": 6735, + "train_speed(iter/s)": 0.580837 + }, + { + "acc": 1.0, + "epoch": 7.1474019088016965, + "grad_norm": 0.02770557440817356, + "learning_rate": 2.065556898712098e-06, + "loss": 6.999e-05, + "memory(GiB)": 26.31, + "step": 6740, + "train_speed(iter/s)": 0.580841 + }, + { + "acc": 0.99976578, + "epoch": 7.152704135737009, + "grad_norm": 0.15290257334709167, + "learning_rate": 2.0584643219304063e-06, + "loss": 0.00046628, + "memory(GiB)": 26.31, + "step": 6745, + "train_speed(iter/s)": 0.580842 + }, + { + "acc": 0.99974947, + "epoch": 7.158006362672323, + "grad_norm": 0.025673704221844673, + "learning_rate": 2.051380791333642e-06, + "loss": 0.00038622, + "memory(GiB)": 26.31, + "step": 6750, + "train_speed(iter/s)": 0.580842 + }, + { + "acc": 0.99988422, + "epoch": 7.163308589607635, + "grad_norm": 0.005067603662610054, + "learning_rate": 2.044306328702281e-06, + "loss": 0.00027976, + "memory(GiB)": 26.31, + "step": 6755, + "train_speed(iter/s)": 0.580845 + }, + { + "acc": 0.99974613, + "epoch": 7.168610816542948, + "grad_norm": 0.004405306186527014, + "learning_rate": 2.0372409557889127e-06, + "loss": 0.00149132, + "memory(GiB)": 26.31, + "step": 6760, + "train_speed(iter/s)": 0.580846 + }, + { + "acc": 0.99988213, + "epoch": 7.173913043478261, + "grad_norm": 0.34647825360298157, + "learning_rate": 2.030184694318177e-06, + "loss": 0.00023813, + "memory(GiB)": 26.31, + "step": 6765, + "train_speed(iter/s)": 0.58085 + }, + { + "acc": 1.0, + "epoch": 7.179215270413573, + "grad_norm": 0.1090255081653595, + "learning_rate": 2.0231375659867e-06, + "loss": 0.00013765, + "memory(GiB)": 26.31, + "step": 6770, + "train_speed(iter/s)": 0.58085 + }, + { + "acc": 1.0, + "epoch": 7.184517497348886, + "grad_norm": 0.009488901123404503, + "learning_rate": 2.0160995924630258e-06, + "loss": 0.00031856, + "memory(GiB)": 26.31, + "step": 6775, + "train_speed(iter/s)": 0.580851 + }, + { + "acc": 0.9994812, + "epoch": 7.1898197242842, + "grad_norm": 0.13912135362625122, + "learning_rate": 2.0090707953875464e-06, + "loss": 0.00101557, + "memory(GiB)": 26.31, + "step": 6780, + "train_speed(iter/s)": 0.580853 + }, + { + "acc": 1.0, + "epoch": 7.195121951219512, + "grad_norm": 0.006838640663772821, + "learning_rate": 2.00205119637244e-06, + "loss": 0.00016393, + "memory(GiB)": 26.31, + "step": 6785, + "train_speed(iter/s)": 0.580853 + }, + { + "acc": 0.99976959, + "epoch": 7.200424178154825, + "grad_norm": 1.121747612953186, + "learning_rate": 1.9950408170016023e-06, + "loss": 0.00135582, + "memory(GiB)": 26.31, + "step": 6790, + "train_speed(iter/s)": 0.580855 + }, + { + "acc": 1.0, + "epoch": 7.205726405090138, + "grad_norm": 0.003712412202730775, + "learning_rate": 1.98803967883058e-06, + "loss": 0.00012902, + "memory(GiB)": 26.31, + "step": 6795, + "train_speed(iter/s)": 0.580855 + }, + { + "acc": 0.99988689, + "epoch": 7.21102863202545, + "grad_norm": 0.007916714064776897, + "learning_rate": 1.981047803386506e-06, + "loss": 0.00045676, + "memory(GiB)": 26.31, + "step": 6800, + "train_speed(iter/s)": 0.580856 + }, + { + "acc": 0.99976072, + "epoch": 7.216330858960763, + "grad_norm": 0.017491836100816727, + "learning_rate": 1.97406521216803e-06, + "loss": 0.00086029, + "memory(GiB)": 26.31, + "step": 6805, + "train_speed(iter/s)": 0.580857 + }, + { + "acc": 0.99939404, + "epoch": 7.221633085896077, + "grad_norm": 0.07758060842752457, + "learning_rate": 1.9670919266452552e-06, + "loss": 0.00167624, + "memory(GiB)": 26.31, + "step": 6810, + "train_speed(iter/s)": 0.580857 + }, + { + "acc": 0.9996129, + "epoch": 7.226935312831389, + "grad_norm": 0.22923319041728973, + "learning_rate": 1.960127968259675e-06, + "loss": 0.00129077, + "memory(GiB)": 26.31, + "step": 6815, + "train_speed(iter/s)": 0.580859 + }, + { + "acc": 1.0, + "epoch": 7.232237539766702, + "grad_norm": 0.027276834473013878, + "learning_rate": 1.9531733584240996e-06, + "loss": 0.00020395, + "memory(GiB)": 26.31, + "step": 6820, + "train_speed(iter/s)": 0.580859 + }, + { + "acc": 0.99987497, + "epoch": 7.237539766702015, + "grad_norm": 0.010682585649192333, + "learning_rate": 1.946228118522594e-06, + "loss": 0.00020047, + "memory(GiB)": 26.31, + "step": 6825, + "train_speed(iter/s)": 0.580861 + }, + { + "acc": 0.99988422, + "epoch": 7.242841993637327, + "grad_norm": 0.05258682742714882, + "learning_rate": 1.9392922699104164e-06, + "loss": 0.00039802, + "memory(GiB)": 26.31, + "step": 6830, + "train_speed(iter/s)": 0.580862 + }, + { + "acc": 0.99986038, + "epoch": 7.248144220572641, + "grad_norm": 0.140438511967659, + "learning_rate": 1.9323658339139455e-06, + "loss": 0.00027934, + "memory(GiB)": 26.31, + "step": 6835, + "train_speed(iter/s)": 0.580864 + }, + { + "acc": 0.99959145, + "epoch": 7.253446447507954, + "grad_norm": 0.024344947189092636, + "learning_rate": 1.9254488318306183e-06, + "loss": 0.00097043, + "memory(GiB)": 26.31, + "step": 6840, + "train_speed(iter/s)": 0.580864 + }, + { + "acc": 0.99974985, + "epoch": 7.258748674443266, + "grad_norm": 0.04792139679193497, + "learning_rate": 1.918541284928866e-06, + "loss": 0.00058359, + "memory(GiB)": 26.31, + "step": 6845, + "train_speed(iter/s)": 0.580867 + }, + { + "acc": 0.99988098, + "epoch": 7.264050901378579, + "grad_norm": 0.014974468387663364, + "learning_rate": 1.911643214448044e-06, + "loss": 0.00070725, + "memory(GiB)": 26.31, + "step": 6850, + "train_speed(iter/s)": 0.580868 + }, + { + "acc": 0.99977837, + "epoch": 7.269353128313892, + "grad_norm": 0.011281152255833149, + "learning_rate": 1.9047546415983719e-06, + "loss": 0.00041005, + "memory(GiB)": 26.31, + "step": 6855, + "train_speed(iter/s)": 0.580868 + }, + { + "acc": 0.99963131, + "epoch": 7.274655355249204, + "grad_norm": 0.06311699002981186, + "learning_rate": 1.897875587560866e-06, + "loss": 0.00196712, + "memory(GiB)": 26.31, + "step": 6860, + "train_speed(iter/s)": 0.580869 + }, + { + "acc": 0.99988041, + "epoch": 7.279957582184517, + "grad_norm": 0.28648215532302856, + "learning_rate": 1.8910060734872732e-06, + "loss": 0.00027213, + "memory(GiB)": 26.31, + "step": 6865, + "train_speed(iter/s)": 0.58087 + }, + { + "acc": 0.99951057, + "epoch": 7.285259809119831, + "grad_norm": 0.06947100162506104, + "learning_rate": 1.8841461205000073e-06, + "loss": 0.00103844, + "memory(GiB)": 26.31, + "step": 6870, + "train_speed(iter/s)": 0.580872 + }, + { + "acc": 0.99936943, + "epoch": 7.290562036055143, + "grad_norm": 0.1860935091972351, + "learning_rate": 1.8772957496920822e-06, + "loss": 0.00224305, + "memory(GiB)": 26.31, + "step": 6875, + "train_speed(iter/s)": 0.580875 + }, + { + "acc": 0.99958391, + "epoch": 7.295864262990456, + "grad_norm": 0.05386153236031532, + "learning_rate": 1.8704549821270519e-06, + "loss": 0.00067724, + "memory(GiB)": 26.31, + "step": 6880, + "train_speed(iter/s)": 0.580881 + }, + { + "acc": 0.99984941, + "epoch": 7.301166489925769, + "grad_norm": 0.06292575597763062, + "learning_rate": 1.8636238388389394e-06, + "loss": 0.00104737, + "memory(GiB)": 26.31, + "step": 6885, + "train_speed(iter/s)": 0.580885 + }, + { + "acc": 0.99929142, + "epoch": 7.306468716861081, + "grad_norm": 0.14226506650447845, + "learning_rate": 1.8568023408321762e-06, + "loss": 0.00195795, + "memory(GiB)": 26.31, + "step": 6890, + "train_speed(iter/s)": 0.580884 + }, + { + "acc": 0.99963522, + "epoch": 7.311770943796395, + "grad_norm": 0.09832887351512909, + "learning_rate": 1.8499905090815348e-06, + "loss": 0.00130671, + "memory(GiB)": 26.31, + "step": 6895, + "train_speed(iter/s)": 0.580887 + }, + { + "acc": 0.99989033, + "epoch": 7.317073170731708, + "grad_norm": 0.03976750746369362, + "learning_rate": 1.8431883645320677e-06, + "loss": 0.00054767, + "memory(GiB)": 26.31, + "step": 6900, + "train_speed(iter/s)": 0.580888 + }, + { + "acc": 0.99963274, + "epoch": 7.32237539766702, + "grad_norm": 0.0048141395673155785, + "learning_rate": 1.8363959280990408e-06, + "loss": 0.00075108, + "memory(GiB)": 26.31, + "step": 6905, + "train_speed(iter/s)": 0.580889 + }, + { + "acc": 0.99974098, + "epoch": 7.327677624602333, + "grad_norm": 0.9042002558708191, + "learning_rate": 1.8296132206678684e-06, + "loss": 0.00052153, + "memory(GiB)": 26.31, + "step": 6910, + "train_speed(iter/s)": 0.580895 + }, + { + "acc": 0.99987803, + "epoch": 7.332979851537646, + "grad_norm": 0.06263940036296844, + "learning_rate": 1.8228402630940513e-06, + "loss": 0.00030076, + "memory(GiB)": 26.31, + "step": 6915, + "train_speed(iter/s)": 0.580896 + }, + { + "acc": 0.99974251, + "epoch": 7.338282078472958, + "grad_norm": 0.05377192422747612, + "learning_rate": 1.8160770762031102e-06, + "loss": 0.00032192, + "memory(GiB)": 26.31, + "step": 6920, + "train_speed(iter/s)": 0.580897 + }, + { + "acc": 0.99951305, + "epoch": 7.343584305408271, + "grad_norm": 0.3651827871799469, + "learning_rate": 1.8093236807905241e-06, + "loss": 0.00175905, + "memory(GiB)": 26.31, + "step": 6925, + "train_speed(iter/s)": 0.580899 + }, + { + "acc": 0.99974251, + "epoch": 7.348886532343585, + "grad_norm": 0.0032980055548250675, + "learning_rate": 1.8025800976216638e-06, + "loss": 0.00044379, + "memory(GiB)": 26.31, + "step": 6930, + "train_speed(iter/s)": 0.5809 + }, + { + "acc": 1.0, + "epoch": 7.354188759278897, + "grad_norm": 0.1116834357380867, + "learning_rate": 1.795846347431729e-06, + "loss": 8.933e-05, + "memory(GiB)": 26.31, + "step": 6935, + "train_speed(iter/s)": 0.580907 + }, + { + "acc": 0.99988585, + "epoch": 7.35949098621421, + "grad_norm": 0.0936645120382309, + "learning_rate": 1.789122450925689e-06, + "loss": 0.00096569, + "memory(GiB)": 26.31, + "step": 6940, + "train_speed(iter/s)": 0.58091 + }, + { + "acc": 0.99987116, + "epoch": 7.364793213149523, + "grad_norm": 0.01857062429189682, + "learning_rate": 1.782408428778208e-06, + "loss": 0.00049213, + "memory(GiB)": 26.31, + "step": 6945, + "train_speed(iter/s)": 0.580912 + }, + { + "acc": 0.99951744, + "epoch": 7.370095440084835, + "grad_norm": 0.12067139893770218, + "learning_rate": 1.7757043016335974e-06, + "loss": 0.00087103, + "memory(GiB)": 26.31, + "step": 6950, + "train_speed(iter/s)": 0.580912 + }, + { + "acc": 0.99972391, + "epoch": 7.375397667020149, + "grad_norm": 0.03747297078371048, + "learning_rate": 1.7690100901057356e-06, + "loss": 0.00047853, + "memory(GiB)": 26.31, + "step": 6955, + "train_speed(iter/s)": 0.580916 + }, + { + "acc": 1.0, + "epoch": 7.3806998939554616, + "grad_norm": 0.05759872496128082, + "learning_rate": 1.7623258147780149e-06, + "loss": 0.00017719, + "memory(GiB)": 26.31, + "step": 6960, + "train_speed(iter/s)": 0.580916 + }, + { + "acc": 0.99973459, + "epoch": 7.386002120890774, + "grad_norm": 0.012503202073276043, + "learning_rate": 1.7556514962032767e-06, + "loss": 0.0014173, + "memory(GiB)": 26.31, + "step": 6965, + "train_speed(iter/s)": 0.580917 + }, + { + "acc": 0.99985714, + "epoch": 7.391304347826087, + "grad_norm": 0.004481893964111805, + "learning_rate": 1.748987154903746e-06, + "loss": 0.00022981, + "memory(GiB)": 26.31, + "step": 6970, + "train_speed(iter/s)": 0.580917 + }, + { + "acc": 0.99988041, + "epoch": 7.3966065747614, + "grad_norm": 0.006199334282428026, + "learning_rate": 1.7423328113709714e-06, + "loss": 0.00029288, + "memory(GiB)": 26.31, + "step": 6975, + "train_speed(iter/s)": 0.580921 + }, + { + "acc": 0.99976082, + "epoch": 7.401908801696712, + "grad_norm": 0.04824783280491829, + "learning_rate": 1.735688486065758e-06, + "loss": 0.00051028, + "memory(GiB)": 26.31, + "step": 6980, + "train_speed(iter/s)": 0.580927 + }, + { + "acc": 0.99965048, + "epoch": 7.407211028632026, + "grad_norm": 0.013628070242702961, + "learning_rate": 1.7290541994181089e-06, + "loss": 0.00077747, + "memory(GiB)": 26.31, + "step": 6985, + "train_speed(iter/s)": 0.580928 + }, + { + "acc": 0.99987745, + "epoch": 7.4125132555673385, + "grad_norm": 0.16469603776931763, + "learning_rate": 1.722429971827159e-06, + "loss": 0.00054255, + "memory(GiB)": 26.31, + "step": 6990, + "train_speed(iter/s)": 0.580929 + }, + { + "acc": 0.99987049, + "epoch": 7.417815482502651, + "grad_norm": 0.020483864471316338, + "learning_rate": 1.7158158236611144e-06, + "loss": 0.00021344, + "memory(GiB)": 26.31, + "step": 6995, + "train_speed(iter/s)": 0.58093 + }, + { + "acc": 0.99958992, + "epoch": 7.423117709437964, + "grad_norm": 0.0954505056142807, + "learning_rate": 1.7092117752571875e-06, + "loss": 0.00145397, + "memory(GiB)": 26.31, + "step": 7000, + "train_speed(iter/s)": 0.58093 + }, + { + "acc": 0.99971962, + "epoch": 7.428419936373277, + "grad_norm": 0.43828457593917847, + "learning_rate": 1.7026178469215362e-06, + "loss": 0.00043246, + "memory(GiB)": 26.31, + "step": 7005, + "train_speed(iter/s)": 0.580933 + }, + { + "acc": 1.0, + "epoch": 7.433722163308589, + "grad_norm": 0.03433601185679436, + "learning_rate": 1.6960340589292051e-06, + "loss": 0.00017182, + "memory(GiB)": 26.31, + "step": 7010, + "train_speed(iter/s)": 0.580934 + }, + { + "acc": 0.99989223, + "epoch": 7.439024390243903, + "grad_norm": 0.021550146862864494, + "learning_rate": 1.689460431524054e-06, + "loss": 0.00029675, + "memory(GiB)": 26.31, + "step": 7015, + "train_speed(iter/s)": 0.580934 + }, + { + "acc": 0.99976244, + "epoch": 7.4443266171792155, + "grad_norm": 0.12215669453144073, + "learning_rate": 1.6828969849187032e-06, + "loss": 0.00055049, + "memory(GiB)": 26.31, + "step": 7020, + "train_speed(iter/s)": 0.580936 + }, + { + "acc": 1.0, + "epoch": 7.449628844114528, + "grad_norm": 0.01776804029941559, + "learning_rate": 1.6763437392944688e-06, + "loss": 0.00015911, + "memory(GiB)": 26.31, + "step": 7025, + "train_speed(iter/s)": 0.580937 + }, + { + "acc": 0.99964476, + "epoch": 7.454931071049841, + "grad_norm": 0.053600966930389404, + "learning_rate": 1.6698007148013e-06, + "loss": 0.00056505, + "memory(GiB)": 26.31, + "step": 7030, + "train_speed(iter/s)": 0.580936 + }, + { + "acc": 0.99975433, + "epoch": 7.4602332979851536, + "grad_norm": 0.01400748547166586, + "learning_rate": 1.66326793155772e-06, + "loss": 0.00049177, + "memory(GiB)": 26.31, + "step": 7035, + "train_speed(iter/s)": 0.580942 + }, + { + "acc": 1.0, + "epoch": 7.465535524920466, + "grad_norm": 0.09328199923038483, + "learning_rate": 1.6567454096507598e-06, + "loss": 0.0001565, + "memory(GiB)": 26.31, + "step": 7040, + "train_speed(iter/s)": 0.580943 + }, + { + "acc": 0.99989405, + "epoch": 7.47083775185578, + "grad_norm": 0.03550637513399124, + "learning_rate": 1.6502331691358995e-06, + "loss": 0.00025591, + "memory(GiB)": 26.31, + "step": 7045, + "train_speed(iter/s)": 0.580944 + }, + { + "acc": 0.99951725, + "epoch": 7.4761399787910925, + "grad_norm": 1.2384247779846191, + "learning_rate": 1.6437312300370084e-06, + "loss": 0.00159495, + "memory(GiB)": 26.31, + "step": 7050, + "train_speed(iter/s)": 0.580946 + }, + { + "acc": 0.99976501, + "epoch": 7.481442205726405, + "grad_norm": 0.026157498359680176, + "learning_rate": 1.6372396123462784e-06, + "loss": 0.0002734, + "memory(GiB)": 26.31, + "step": 7055, + "train_speed(iter/s)": 0.580946 + }, + { + "acc": 0.99975224, + "epoch": 7.486744432661718, + "grad_norm": 0.07278779149055481, + "learning_rate": 1.6307583360241658e-06, + "loss": 0.00074835, + "memory(GiB)": 26.31, + "step": 7060, + "train_speed(iter/s)": 0.580952 + }, + { + "acc": 1.0, + "epoch": 7.4920466595970305, + "grad_norm": 0.03025503270328045, + "learning_rate": 1.6242874209993275e-06, + "loss": 5.68e-05, + "memory(GiB)": 26.31, + "step": 7065, + "train_speed(iter/s)": 0.580955 + }, + { + "acc": 1.0, + "epoch": 7.497348886532343, + "grad_norm": 0.007420065347105265, + "learning_rate": 1.6178268871685647e-06, + "loss": 0.00010299, + "memory(GiB)": 26.31, + "step": 7070, + "train_speed(iter/s)": 0.580961 + }, + { + "acc": 1.0, + "epoch": 7.502651113467657, + "grad_norm": 0.05877010524272919, + "learning_rate": 1.611376754396754e-06, + "loss": 9.454e-05, + "memory(GiB)": 26.31, + "step": 7075, + "train_speed(iter/s)": 0.580962 + }, + { + "acc": 1.0, + "epoch": 7.5079533404029695, + "grad_norm": 0.0142194963991642, + "learning_rate": 1.604937042516797e-06, + "loss": 0.00010233, + "memory(GiB)": 26.31, + "step": 7080, + "train_speed(iter/s)": 0.580964 + }, + { + "acc": 1.0, + "epoch": 7.513255567338282, + "grad_norm": 0.0043426030315458775, + "learning_rate": 1.598507771329549e-06, + "loss": 0.00012825, + "memory(GiB)": 26.31, + "step": 7085, + "train_speed(iter/s)": 0.580964 + }, + { + "acc": 0.99961729, + "epoch": 7.518557794273595, + "grad_norm": 0.27042967081069946, + "learning_rate": 1.5920889606037612e-06, + "loss": 0.00112635, + "memory(GiB)": 26.31, + "step": 7090, + "train_speed(iter/s)": 0.580964 + }, + { + "acc": 0.99977627, + "epoch": 7.5238600212089075, + "grad_norm": 0.004301531706005335, + "learning_rate": 1.585680630076023e-06, + "loss": 0.0010411, + "memory(GiB)": 26.31, + "step": 7095, + "train_speed(iter/s)": 0.580966 + }, + { + "acc": 0.99988317, + "epoch": 7.52916224814422, + "grad_norm": 0.05325544625520706, + "learning_rate": 1.5792827994507001e-06, + "loss": 0.00031735, + "memory(GiB)": 26.31, + "step": 7100, + "train_speed(iter/s)": 0.58097 + }, + { + "acc": 0.99973316, + "epoch": 7.534464475079534, + "grad_norm": 0.02882418781518936, + "learning_rate": 1.57289548839987e-06, + "loss": 0.00033525, + "memory(GiB)": 26.31, + "step": 7105, + "train_speed(iter/s)": 0.580974 + }, + { + "acc": 0.99977551, + "epoch": 7.5397667020148464, + "grad_norm": 0.06291340291500092, + "learning_rate": 1.5665187165632675e-06, + "loss": 0.0002386, + "memory(GiB)": 26.31, + "step": 7110, + "train_speed(iter/s)": 0.580975 + }, + { + "acc": 0.99977036, + "epoch": 7.545068928950159, + "grad_norm": 0.1016281470656395, + "learning_rate": 1.5601525035482201e-06, + "loss": 0.00037299, + "memory(GiB)": 26.31, + "step": 7115, + "train_speed(iter/s)": 0.580977 + }, + { + "acc": 1.0, + "epoch": 7.550371155885472, + "grad_norm": 0.04114781692624092, + "learning_rate": 1.5537968689295879e-06, + "loss": 0.00021627, + "memory(GiB)": 26.31, + "step": 7120, + "train_speed(iter/s)": 0.580979 + }, + { + "acc": 0.99988317, + "epoch": 7.5556733828207845, + "grad_norm": 0.01582081988453865, + "learning_rate": 1.547451832249707e-06, + "loss": 0.00034413, + "memory(GiB)": 26.31, + "step": 7125, + "train_speed(iter/s)": 0.580981 + }, + { + "acc": 0.99986982, + "epoch": 7.560975609756097, + "grad_norm": 0.004217559937387705, + "learning_rate": 1.5411174130183246e-06, + "loss": 0.00028027, + "memory(GiB)": 26.31, + "step": 7130, + "train_speed(iter/s)": 0.580986 + }, + { + "acc": 0.99988041, + "epoch": 7.566277836691411, + "grad_norm": 0.03366623446345329, + "learning_rate": 1.5347936307125414e-06, + "loss": 0.0001684, + "memory(GiB)": 26.31, + "step": 7135, + "train_speed(iter/s)": 0.580991 + }, + { + "acc": 1.0, + "epoch": 7.571580063626723, + "grad_norm": 0.11424966156482697, + "learning_rate": 1.5284805047767555e-06, + "loss": 0.00016731, + "memory(GiB)": 26.31, + "step": 7140, + "train_speed(iter/s)": 0.580994 + }, + { + "acc": 0.99987049, + "epoch": 7.576882290562036, + "grad_norm": 0.0017340040067210793, + "learning_rate": 1.5221780546225942e-06, + "loss": 0.0014659, + "memory(GiB)": 26.31, + "step": 7145, + "train_speed(iter/s)": 0.581 + }, + { + "acc": 0.999755, + "epoch": 7.582184517497349, + "grad_norm": 0.008549396879971027, + "learning_rate": 1.5158862996288584e-06, + "loss": 0.00028182, + "memory(GiB)": 26.31, + "step": 7150, + "train_speed(iter/s)": 0.581002 + }, + { + "acc": 0.99944859, + "epoch": 7.5874867444326615, + "grad_norm": 0.07049953192472458, + "learning_rate": 1.509605259141469e-06, + "loss": 0.00190711, + "memory(GiB)": 26.31, + "step": 7155, + "train_speed(iter/s)": 0.581003 + }, + { + "acc": 1.0, + "epoch": 7.592788971367975, + "grad_norm": 0.0234018936753273, + "learning_rate": 1.503334952473397e-06, + "loss": 5.961e-05, + "memory(GiB)": 26.31, + "step": 7160, + "train_speed(iter/s)": 0.581003 + }, + { + "acc": 0.99917202, + "epoch": 7.598091198303288, + "grad_norm": 0.20772714912891388, + "learning_rate": 1.49707539890461e-06, + "loss": 0.00167405, + "memory(GiB)": 26.31, + "step": 7165, + "train_speed(iter/s)": 0.581009 + }, + { + "acc": 0.99976158, + "epoch": 7.6033934252386, + "grad_norm": 0.0049901618622243404, + "learning_rate": 1.490826617682013e-06, + "loss": 0.00034158, + "memory(GiB)": 26.31, + "step": 7170, + "train_speed(iter/s)": 0.58101 + }, + { + "acc": 0.99975185, + "epoch": 7.608695652173913, + "grad_norm": 0.06982958316802979, + "learning_rate": 1.4845886280193864e-06, + "loss": 0.00028914, + "memory(GiB)": 26.31, + "step": 7175, + "train_speed(iter/s)": 0.581013 + }, + { + "acc": 1.0, + "epoch": 7.613997879109226, + "grad_norm": 0.048727817833423615, + "learning_rate": 1.4783614490973306e-06, + "loss": 4.181e-05, + "memory(GiB)": 26.31, + "step": 7180, + "train_speed(iter/s)": 0.581015 + }, + { + "acc": 0.99975662, + "epoch": 7.6193001060445384, + "grad_norm": 0.12339375168085098, + "learning_rate": 1.4721451000632039e-06, + "loss": 0.00052848, + "memory(GiB)": 26.31, + "step": 7185, + "train_speed(iter/s)": 0.581018 + }, + { + "acc": 1.0, + "epoch": 7.624602332979851, + "grad_norm": 0.036420684307813644, + "learning_rate": 1.4659396000310644e-06, + "loss": 0.00010585, + "memory(GiB)": 26.31, + "step": 7190, + "train_speed(iter/s)": 0.581023 + }, + { + "acc": 0.99966774, + "epoch": 7.629904559915165, + "grad_norm": 0.37844347953796387, + "learning_rate": 1.4597449680816136e-06, + "loss": 0.00042416, + "memory(GiB)": 26.31, + "step": 7195, + "train_speed(iter/s)": 0.581026 + }, + { + "acc": 0.99973469, + "epoch": 7.635206786850477, + "grad_norm": 0.1244843453168869, + "learning_rate": 1.4535612232621336e-06, + "loss": 0.00036726, + "memory(GiB)": 26.31, + "step": 7200, + "train_speed(iter/s)": 0.581029 + }, + { + "acc": 0.99987183, + "epoch": 7.64050901378579, + "grad_norm": 0.03416428714990616, + "learning_rate": 1.4473883845864307e-06, + "loss": 0.00026054, + "memory(GiB)": 26.31, + "step": 7205, + "train_speed(iter/s)": 0.58103 + }, + { + "acc": 0.99987679, + "epoch": 7.645811240721103, + "grad_norm": 0.03423444554209709, + "learning_rate": 1.4412264710347803e-06, + "loss": 0.00099486, + "memory(GiB)": 26.31, + "step": 7210, + "train_speed(iter/s)": 0.581038 + }, + { + "acc": 0.99987431, + "epoch": 7.651113467656415, + "grad_norm": 0.030545970425009727, + "learning_rate": 1.4350755015538615e-06, + "loss": 0.00020079, + "memory(GiB)": 26.31, + "step": 7215, + "train_speed(iter/s)": 0.581038 + }, + { + "acc": 0.99946852, + "epoch": 7.656415694591729, + "grad_norm": 0.521009087562561, + "learning_rate": 1.4289354950567039e-06, + "loss": 0.0009209, + "memory(GiB)": 26.31, + "step": 7220, + "train_speed(iter/s)": 0.581041 + }, + { + "acc": 0.99974804, + "epoch": 7.661717921527042, + "grad_norm": 0.00251702475361526, + "learning_rate": 1.4228064704226276e-06, + "loss": 0.00047443, + "memory(GiB)": 26.31, + "step": 7225, + "train_speed(iter/s)": 0.581045 + }, + { + "acc": 0.99924774, + "epoch": 7.667020148462354, + "grad_norm": 1.2205266952514648, + "learning_rate": 1.4166884464971858e-06, + "loss": 0.00217944, + "memory(GiB)": 26.31, + "step": 7230, + "train_speed(iter/s)": 0.581047 + }, + { + "acc": 0.99965, + "epoch": 7.672322375397667, + "grad_norm": 0.007064941339194775, + "learning_rate": 1.4105814420921073e-06, + "loss": 0.00124084, + "memory(GiB)": 26.31, + "step": 7235, + "train_speed(iter/s)": 0.58105 + }, + { + "acc": 0.99972963, + "epoch": 7.67762460233298, + "grad_norm": 0.028828129172325134, + "learning_rate": 1.4044854759852378e-06, + "loss": 0.00059189, + "memory(GiB)": 26.31, + "step": 7240, + "train_speed(iter/s)": 0.581051 + }, + { + "acc": 0.9998641, + "epoch": 7.682926829268292, + "grad_norm": 0.01702168956398964, + "learning_rate": 1.3984005669204808e-06, + "loss": 0.00023274, + "memory(GiB)": 26.31, + "step": 7245, + "train_speed(iter/s)": 0.581055 + }, + { + "acc": 0.99950886, + "epoch": 7.688229056203605, + "grad_norm": 0.09453985095024109, + "learning_rate": 1.392326733607744e-06, + "loss": 0.00087211, + "memory(GiB)": 26.31, + "step": 7250, + "train_speed(iter/s)": 0.581057 + }, + { + "acc": 0.99919186, + "epoch": 7.693531283138919, + "grad_norm": 0.06911499798297882, + "learning_rate": 1.3862639947228785e-06, + "loss": 0.00130631, + "memory(GiB)": 26.31, + "step": 7255, + "train_speed(iter/s)": 0.581061 + }, + { + "acc": 0.99972954, + "epoch": 7.698833510074231, + "grad_norm": 0.0033358214423060417, + "learning_rate": 1.3802123689076192e-06, + "loss": 0.00047809, + "memory(GiB)": 26.31, + "step": 7260, + "train_speed(iter/s)": 0.581062 + }, + { + "acc": 0.99975901, + "epoch": 7.704135737009544, + "grad_norm": 0.046811651438474655, + "learning_rate": 1.3741718747695368e-06, + "loss": 0.00049594, + "memory(GiB)": 26.31, + "step": 7265, + "train_speed(iter/s)": 0.581065 + }, + { + "acc": 1.0, + "epoch": 7.709437963944857, + "grad_norm": 0.006222330033779144, + "learning_rate": 1.3681425308819673e-06, + "loss": 0.0001086, + "memory(GiB)": 26.31, + "step": 7270, + "train_speed(iter/s)": 0.581069 + }, + { + "acc": 0.99987497, + "epoch": 7.714740190880169, + "grad_norm": 0.4325442910194397, + "learning_rate": 1.3621243557839688e-06, + "loss": 0.00109212, + "memory(GiB)": 26.31, + "step": 7275, + "train_speed(iter/s)": 0.581071 + }, + { + "acc": 0.99974489, + "epoch": 7.720042417815483, + "grad_norm": 0.10214357823133469, + "learning_rate": 1.3561173679802524e-06, + "loss": 0.00060738, + "memory(GiB)": 26.31, + "step": 7280, + "train_speed(iter/s)": 0.581077 + }, + { + "acc": 0.99987564, + "epoch": 7.725344644750796, + "grad_norm": 0.01606924459338188, + "learning_rate": 1.3501215859411318e-06, + "loss": 0.00048148, + "memory(GiB)": 26.31, + "step": 7285, + "train_speed(iter/s)": 0.581079 + }, + { + "acc": 0.99961681, + "epoch": 7.730646871686108, + "grad_norm": 0.06415250152349472, + "learning_rate": 1.3441370281024654e-06, + "loss": 0.00113499, + "memory(GiB)": 26.31, + "step": 7290, + "train_speed(iter/s)": 0.581081 + }, + { + "acc": 1.0, + "epoch": 7.735949098621421, + "grad_norm": 0.0027941372245550156, + "learning_rate": 1.3381637128655995e-06, + "loss": 0.00013706, + "memory(GiB)": 26.31, + "step": 7295, + "train_speed(iter/s)": 0.581085 + }, + { + "acc": 0.99935102, + "epoch": 7.741251325556734, + "grad_norm": 0.05772147700190544, + "learning_rate": 1.3322016585973113e-06, + "loss": 0.00217016, + "memory(GiB)": 26.31, + "step": 7300, + "train_speed(iter/s)": 0.581086 + }, + { + "acc": 0.99988098, + "epoch": 7.746553552492046, + "grad_norm": 0.2209397852420807, + "learning_rate": 1.326250883629753e-06, + "loss": 0.00022564, + "memory(GiB)": 26.31, + "step": 7305, + "train_speed(iter/s)": 0.581088 + }, + { + "acc": 1.0, + "epoch": 7.751855779427359, + "grad_norm": 0.03734031319618225, + "learning_rate": 1.3203114062603944e-06, + "loss": 0.00019665, + "memory(GiB)": 26.31, + "step": 7310, + "train_speed(iter/s)": 0.581088 + }, + { + "acc": 0.99974689, + "epoch": 7.757158006362673, + "grad_norm": 0.24819055199623108, + "learning_rate": 1.3143832447519692e-06, + "loss": 0.00042938, + "memory(GiB)": 26.31, + "step": 7315, + "train_speed(iter/s)": 0.581089 + }, + { + "acc": 0.99972458, + "epoch": 7.762460233297985, + "grad_norm": 0.016377611085772514, + "learning_rate": 1.3084664173324144e-06, + "loss": 0.0007581, + "memory(GiB)": 26.31, + "step": 7320, + "train_speed(iter/s)": 0.581091 + }, + { + "acc": 0.99985714, + "epoch": 7.767762460233298, + "grad_norm": 0.03054504469037056, + "learning_rate": 1.30256094219482e-06, + "loss": 0.00041984, + "memory(GiB)": 26.31, + "step": 7325, + "train_speed(iter/s)": 0.581093 + }, + { + "acc": 0.99988937, + "epoch": 7.773064687168611, + "grad_norm": 0.00745094520971179, + "learning_rate": 1.2966668374973673e-06, + "loss": 0.00020874, + "memory(GiB)": 26.31, + "step": 7330, + "train_speed(iter/s)": 0.581097 + }, + { + "acc": 0.99976444, + "epoch": 7.778366914103923, + "grad_norm": 0.009208225645124912, + "learning_rate": 1.290784121363275e-06, + "loss": 0.00059624, + "memory(GiB)": 26.31, + "step": 7335, + "train_speed(iter/s)": 0.5811 + }, + { + "acc": 0.99987926, + "epoch": 7.783669141039237, + "grad_norm": 0.01450218167155981, + "learning_rate": 1.28491281188075e-06, + "loss": 0.00031939, + "memory(GiB)": 26.31, + "step": 7340, + "train_speed(iter/s)": 0.581101 + }, + { + "acc": 0.99976578, + "epoch": 7.78897136797455, + "grad_norm": 0.007167202420532703, + "learning_rate": 1.2790529271029191e-06, + "loss": 0.00062234, + "memory(GiB)": 26.31, + "step": 7345, + "train_speed(iter/s)": 0.581099 + }, + { + "acc": 0.99965649, + "epoch": 7.794273594909862, + "grad_norm": 0.3587786555290222, + "learning_rate": 1.2732044850477839e-06, + "loss": 0.00082365, + "memory(GiB)": 26.31, + "step": 7350, + "train_speed(iter/s)": 0.581101 + }, + { + "acc": 0.99975052, + "epoch": 7.799575821845175, + "grad_norm": 0.15072518587112427, + "learning_rate": 1.2673675036981609e-06, + "loss": 0.00072264, + "memory(GiB)": 26.31, + "step": 7355, + "train_speed(iter/s)": 0.581097 + }, + { + "acc": 1.0, + "epoch": 7.804878048780488, + "grad_norm": 0.005496453959494829, + "learning_rate": 1.2615420010016277e-06, + "loss": 3.213e-05, + "memory(GiB)": 26.31, + "step": 7360, + "train_speed(iter/s)": 0.581099 + }, + { + "acc": 0.99988422, + "epoch": 7.8101802757158, + "grad_norm": 0.0037899240851402283, + "learning_rate": 1.2557279948704668e-06, + "loss": 0.00021584, + "memory(GiB)": 26.31, + "step": 7365, + "train_speed(iter/s)": 0.581101 + }, + { + "acc": 0.99967022, + "epoch": 7.815482502651113, + "grad_norm": 0.002349494956433773, + "learning_rate": 1.2499255031816091e-06, + "loss": 0.00057175, + "memory(GiB)": 26.31, + "step": 7370, + "train_speed(iter/s)": 0.581103 + }, + { + "acc": 0.99985123, + "epoch": 7.820784729586427, + "grad_norm": 0.013030619360506535, + "learning_rate": 1.244134543776587e-06, + "loss": 0.00116417, + "memory(GiB)": 26.31, + "step": 7375, + "train_speed(iter/s)": 0.581104 + }, + { + "acc": 1.0, + "epoch": 7.826086956521739, + "grad_norm": 0.006604825146496296, + "learning_rate": 1.238355134461467e-06, + "loss": 0.00014518, + "memory(GiB)": 26.31, + "step": 7380, + "train_speed(iter/s)": 0.581105 + }, + { + "acc": 1.0, + "epoch": 7.831389183457052, + "grad_norm": 0.014985980466008186, + "learning_rate": 1.2325872930068038e-06, + "loss": 0.00011804, + "memory(GiB)": 26.31, + "step": 7385, + "train_speed(iter/s)": 0.58111 + }, + { + "acc": 0.99986839, + "epoch": 7.836691410392365, + "grad_norm": 0.09459065645933151, + "learning_rate": 1.2268310371475835e-06, + "loss": 0.00033445, + "memory(GiB)": 26.31, + "step": 7390, + "train_speed(iter/s)": 0.581111 + }, + { + "acc": 0.99988422, + "epoch": 7.841993637327677, + "grad_norm": 0.840636134147644, + "learning_rate": 1.2210863845831671e-06, + "loss": 0.0012121, + "memory(GiB)": 26.31, + "step": 7395, + "train_speed(iter/s)": 0.581115 + }, + { + "acc": 0.99977112, + "epoch": 7.847295864262991, + "grad_norm": 0.11717978119850159, + "learning_rate": 1.215353352977239e-06, + "loss": 0.00099637, + "memory(GiB)": 26.31, + "step": 7400, + "train_speed(iter/s)": 0.58112 + }, + { + "acc": 0.99964523, + "epoch": 7.8525980911983035, + "grad_norm": 0.04506349936127663, + "learning_rate": 1.2096319599577535e-06, + "loss": 0.00089844, + "memory(GiB)": 26.31, + "step": 7405, + "train_speed(iter/s)": 0.581127 + }, + { + "acc": 0.99959965, + "epoch": 7.857900318133616, + "grad_norm": 0.02824774570763111, + "learning_rate": 1.203922223116874e-06, + "loss": 0.00082064, + "memory(GiB)": 26.31, + "step": 7410, + "train_speed(iter/s)": 0.58113 + }, + { + "acc": 0.99965115, + "epoch": 7.863202545068929, + "grad_norm": 0.19301556050777435, + "learning_rate": 1.1982241600109274e-06, + "loss": 0.00085676, + "memory(GiB)": 26.31, + "step": 7415, + "train_speed(iter/s)": 0.581131 + }, + { + "acc": 0.99988985, + "epoch": 7.868504772004242, + "grad_norm": 0.005214956123381853, + "learning_rate": 1.1925377881603432e-06, + "loss": 0.00020933, + "memory(GiB)": 26.31, + "step": 7420, + "train_speed(iter/s)": 0.581132 + }, + { + "acc": 0.99974422, + "epoch": 7.873806998939554, + "grad_norm": 0.0082614840939641, + "learning_rate": 1.1868631250496052e-06, + "loss": 0.00044404, + "memory(GiB)": 26.31, + "step": 7425, + "train_speed(iter/s)": 0.581134 + }, + { + "acc": 0.99974804, + "epoch": 7.879109225874867, + "grad_norm": 0.037489645183086395, + "learning_rate": 1.1812001881271926e-06, + "loss": 0.00052237, + "memory(GiB)": 26.31, + "step": 7430, + "train_speed(iter/s)": 0.581136 + }, + { + "acc": 1.0, + "epoch": 7.8844114528101805, + "grad_norm": 0.03406713902950287, + "learning_rate": 1.1755489948055305e-06, + "loss": 1.791e-05, + "memory(GiB)": 26.31, + "step": 7435, + "train_speed(iter/s)": 0.581139 + }, + { + "acc": 0.99985552, + "epoch": 7.889713679745493, + "grad_norm": 0.0035832603462040424, + "learning_rate": 1.1699095624609343e-06, + "loss": 0.00037999, + "memory(GiB)": 26.31, + "step": 7440, + "train_speed(iter/s)": 0.581145 + }, + { + "acc": 1.0, + "epoch": 7.895015906680806, + "grad_norm": 0.016665274277329445, + "learning_rate": 1.1642819084335577e-06, + "loss": 4.864e-05, + "memory(GiB)": 26.31, + "step": 7445, + "train_speed(iter/s)": 0.581146 + }, + { + "acc": 0.99987803, + "epoch": 7.900318133616119, + "grad_norm": 0.021315449848771095, + "learning_rate": 1.1586660500273351e-06, + "loss": 0.00027156, + "memory(GiB)": 26.31, + "step": 7450, + "train_speed(iter/s)": 0.581149 + }, + { + "acc": 0.99988155, + "epoch": 7.905620360551431, + "grad_norm": 0.049092620611190796, + "learning_rate": 1.1530620045099361e-06, + "loss": 0.00037108, + "memory(GiB)": 26.31, + "step": 7455, + "train_speed(iter/s)": 0.58115 + }, + { + "acc": 0.99988375, + "epoch": 7.910922587486745, + "grad_norm": 0.017999274656176567, + "learning_rate": 1.1474697891127047e-06, + "loss": 0.00031416, + "memory(GiB)": 26.31, + "step": 7460, + "train_speed(iter/s)": 0.581151 + }, + { + "acc": 0.9998724, + "epoch": 7.9162248144220575, + "grad_norm": 0.00615483894944191, + "learning_rate": 1.14188942103061e-06, + "loss": 0.00021588, + "memory(GiB)": 26.31, + "step": 7465, + "train_speed(iter/s)": 0.581153 + }, + { + "acc": 0.99965563, + "epoch": 7.92152704135737, + "grad_norm": 0.0026157486718147993, + "learning_rate": 1.1363209174221953e-06, + "loss": 0.00055385, + "memory(GiB)": 26.31, + "step": 7470, + "train_speed(iter/s)": 0.581155 + }, + { + "acc": 0.99960432, + "epoch": 7.926829268292683, + "grad_norm": 0.014894828200340271, + "learning_rate": 1.130764295409521e-06, + "loss": 0.00133177, + "memory(GiB)": 26.31, + "step": 7475, + "train_speed(iter/s)": 0.581159 + }, + { + "acc": 0.999753, + "epoch": 7.9321314952279955, + "grad_norm": 0.11886921525001526, + "learning_rate": 1.1252195720781122e-06, + "loss": 0.00043521, + "memory(GiB)": 26.31, + "step": 7480, + "train_speed(iter/s)": 0.581161 + }, + { + "acc": 1.0, + "epoch": 7.937433722163308, + "grad_norm": 0.012216082774102688, + "learning_rate": 1.1196867644769127e-06, + "loss": 1.175e-05, + "memory(GiB)": 26.31, + "step": 7485, + "train_speed(iter/s)": 0.581162 + }, + { + "acc": 0.99987621, + "epoch": 7.942735949098622, + "grad_norm": 0.0035921805538237095, + "learning_rate": 1.1141658896182242e-06, + "loss": 0.0002258, + "memory(GiB)": 26.31, + "step": 7490, + "train_speed(iter/s)": 0.581163 + }, + { + "acc": 0.99988585, + "epoch": 7.9480381760339345, + "grad_norm": 0.06408660113811493, + "learning_rate": 1.1086569644776578e-06, + "loss": 0.0002532, + "memory(GiB)": 26.31, + "step": 7495, + "train_speed(iter/s)": 0.581165 + }, + { + "acc": 1.0, + "epoch": 7.953340402969247, + "grad_norm": 0.06699665635824203, + "learning_rate": 1.1031600059940816e-06, + "loss": 5.644e-05, + "memory(GiB)": 26.31, + "step": 7500, + "train_speed(iter/s)": 0.581169 + }, + { + "acc": 1.0, + "epoch": 7.95864262990456, + "grad_norm": 0.09777071326971054, + "learning_rate": 1.0976750310695696e-06, + "loss": 4.99e-05, + "memory(GiB)": 26.31, + "step": 7505, + "train_speed(iter/s)": 0.58117 + }, + { + "acc": 0.99987183, + "epoch": 7.9639448568398725, + "grad_norm": 0.04002142325043678, + "learning_rate": 1.0922020565693477e-06, + "loss": 0.00034577, + "memory(GiB)": 26.31, + "step": 7510, + "train_speed(iter/s)": 0.581168 + }, + { + "acc": 0.99974804, + "epoch": 7.969247083775185, + "grad_norm": 0.04886091500520706, + "learning_rate": 1.0867410993217438e-06, + "loss": 0.00035892, + "memory(GiB)": 26.31, + "step": 7515, + "train_speed(iter/s)": 0.581173 + }, + { + "acc": 0.99939957, + "epoch": 7.974549310710499, + "grad_norm": 0.06108390912413597, + "learning_rate": 1.0812921761181341e-06, + "loss": 0.0011682, + "memory(GiB)": 26.31, + "step": 7520, + "train_speed(iter/s)": 0.581173 + }, + { + "acc": 0.99964771, + "epoch": 7.9798515376458115, + "grad_norm": 0.04557066038250923, + "learning_rate": 1.0758553037128931e-06, + "loss": 0.00045693, + "memory(GiB)": 26.31, + "step": 7525, + "train_speed(iter/s)": 0.581175 + }, + { + "acc": 0.99987621, + "epoch": 7.985153764581124, + "grad_norm": 0.4263116121292114, + "learning_rate": 1.0704304988233402e-06, + "loss": 0.00060724, + "memory(GiB)": 26.31, + "step": 7530, + "train_speed(iter/s)": 0.58117 + }, + { + "acc": 0.99987745, + "epoch": 7.990455991516437, + "grad_norm": 0.04181910306215286, + "learning_rate": 1.0650177781296923e-06, + "loss": 0.00010795, + "memory(GiB)": 26.31, + "step": 7535, + "train_speed(iter/s)": 0.581171 + }, + { + "acc": 0.99963703, + "epoch": 7.9957582184517495, + "grad_norm": 0.001203623367473483, + "learning_rate": 1.0596171582750076e-06, + "loss": 0.00166455, + "memory(GiB)": 26.31, + "step": 7540, + "train_speed(iter/s)": 0.581161 + }, + { + "acc": 1.0, + "epoch": 8.001060445387063, + "grad_norm": 0.004303697030991316, + "learning_rate": 1.0542286558651369e-06, + "loss": 0.00017417, + "memory(GiB)": 26.31, + "step": 7545, + "train_speed(iter/s)": 0.58112 + }, + { + "acc": 0.99951143, + "epoch": 8.006362672322375, + "grad_norm": 0.03637172281742096, + "learning_rate": 1.048852287468672e-06, + "loss": 0.00085328, + "memory(GiB)": 26.31, + "step": 7550, + "train_speed(iter/s)": 0.581124 + }, + { + "acc": 0.99976511, + "epoch": 8.011664899257688, + "grad_norm": 0.018262850120663643, + "learning_rate": 1.0434880696168952e-06, + "loss": 0.00045877, + "memory(GiB)": 26.31, + "step": 7555, + "train_speed(iter/s)": 0.581125 + }, + { + "acc": 0.99960289, + "epoch": 8.016967126193, + "grad_norm": 0.08547333627939224, + "learning_rate": 1.0381360188037295e-06, + "loss": 0.00064282, + "memory(GiB)": 26.31, + "step": 7560, + "train_speed(iter/s)": 0.581126 + }, + { + "acc": 0.9999012, + "epoch": 8.022269353128314, + "grad_norm": 0.04770037904381752, + "learning_rate": 1.0327961514856845e-06, + "loss": 0.00030082, + "memory(GiB)": 26.31, + "step": 7565, + "train_speed(iter/s)": 0.581131 + }, + { + "acc": 0.9998579, + "epoch": 8.027571580063627, + "grad_norm": 0.008269469253718853, + "learning_rate": 1.0274684840818093e-06, + "loss": 0.00032678, + "memory(GiB)": 26.31, + "step": 7570, + "train_speed(iter/s)": 0.581133 + }, + { + "acc": 0.9997838, + "epoch": 8.03287380699894, + "grad_norm": 0.0030161449685692787, + "learning_rate": 1.0221530329736403e-06, + "loss": 0.00061227, + "memory(GiB)": 26.31, + "step": 7575, + "train_speed(iter/s)": 0.581134 + }, + { + "acc": 0.9998724, + "epoch": 8.038176033934253, + "grad_norm": 0.13272728025913239, + "learning_rate": 1.0168498145051508e-06, + "loss": 0.00031736, + "memory(GiB)": 26.31, + "step": 7580, + "train_speed(iter/s)": 0.581136 + }, + { + "acc": 0.99987679, + "epoch": 8.043478260869565, + "grad_norm": 0.004710217472165823, + "learning_rate": 1.0115588449827022e-06, + "loss": 0.00022688, + "memory(GiB)": 26.31, + "step": 7585, + "train_speed(iter/s)": 0.581137 + }, + { + "acc": 0.99988213, + "epoch": 8.048780487804878, + "grad_norm": 0.023875171318650246, + "learning_rate": 1.0062801406749908e-06, + "loss": 0.00036154, + "memory(GiB)": 26.31, + "step": 7590, + "train_speed(iter/s)": 0.581142 + }, + { + "acc": 1.0, + "epoch": 8.054082714740192, + "grad_norm": 0.055615752935409546, + "learning_rate": 1.0010137178130023e-06, + "loss": 3.904e-05, + "memory(GiB)": 26.31, + "step": 7595, + "train_speed(iter/s)": 0.581142 + }, + { + "acc": 0.9998724, + "epoch": 8.059384941675503, + "grad_norm": 0.013751799240708351, + "learning_rate": 9.957595925899576e-07, + "loss": 0.00014761, + "memory(GiB)": 26.31, + "step": 7600, + "train_speed(iter/s)": 0.581144 + }, + { + "acc": 0.99953594, + "epoch": 8.064687168610817, + "grad_norm": 0.13239844143390656, + "learning_rate": 9.90517781161266e-07, + "loss": 0.0006467, + "memory(GiB)": 26.31, + "step": 7605, + "train_speed(iter/s)": 0.581146 + }, + { + "acc": 0.99987803, + "epoch": 8.069989395546129, + "grad_norm": 0.0015992814442142844, + "learning_rate": 9.852882996444734e-07, + "loss": 0.00015576, + "memory(GiB)": 26.31, + "step": 7610, + "train_speed(iter/s)": 0.581147 + }, + { + "acc": 0.99988155, + "epoch": 8.075291622481442, + "grad_norm": 0.0007152906036935747, + "learning_rate": 9.800711641192137e-07, + "loss": 0.000157, + "memory(GiB)": 26.31, + "step": 7615, + "train_speed(iter/s)": 0.581148 + }, + { + "acc": 0.99987621, + "epoch": 8.080593849416754, + "grad_norm": 0.14747610688209534, + "learning_rate": 9.748663906271589e-07, + "loss": 0.00015784, + "memory(GiB)": 26.31, + "step": 7620, + "train_speed(iter/s)": 0.581152 + }, + { + "acc": 0.99974289, + "epoch": 8.085896076352068, + "grad_norm": 0.04375872015953064, + "learning_rate": 9.696739951719706e-07, + "loss": 0.0003344, + "memory(GiB)": 26.31, + "step": 7625, + "train_speed(iter/s)": 0.581152 + }, + { + "acc": 1.0, + "epoch": 8.091198303287381, + "grad_norm": 0.0009364792495034635, + "learning_rate": 9.644939937192512e-07, + "loss": 1.078e-05, + "memory(GiB)": 26.31, + "step": 7630, + "train_speed(iter/s)": 0.581156 + }, + { + "acc": 0.9997592, + "epoch": 8.096500530222693, + "grad_norm": 0.44751542806625366, + "learning_rate": 9.593264021964919e-07, + "loss": 0.00053736, + "memory(GiB)": 26.31, + "step": 7635, + "train_speed(iter/s)": 0.581158 + }, + { + "acc": 0.99986486, + "epoch": 8.101802757158007, + "grad_norm": 0.08755602687597275, + "learning_rate": 9.541712364930284e-07, + "loss": 0.00027139, + "memory(GiB)": 26.31, + "step": 7640, + "train_speed(iter/s)": 0.581162 + }, + { + "acc": 1.0, + "epoch": 8.107104984093318, + "grad_norm": 0.06107502058148384, + "learning_rate": 9.490285124599867e-07, + "loss": 9.593e-05, + "memory(GiB)": 26.31, + "step": 7645, + "train_speed(iter/s)": 0.581166 + }, + { + "acc": 0.99987621, + "epoch": 8.112407211028632, + "grad_norm": 0.0004909554845653474, + "learning_rate": 9.438982459102395e-07, + "loss": 0.00014782, + "memory(GiB)": 26.31, + "step": 7650, + "train_speed(iter/s)": 0.581168 + }, + { + "acc": 0.99988422, + "epoch": 8.117709437963946, + "grad_norm": 0.003382364520803094, + "learning_rate": 9.387804526183543e-07, + "loss": 0.00013919, + "memory(GiB)": 26.31, + "step": 7655, + "train_speed(iter/s)": 0.581171 + }, + { + "acc": 0.99941235, + "epoch": 8.123011664899257, + "grad_norm": 0.0029366735834628344, + "learning_rate": 9.336751483205435e-07, + "loss": 0.00112311, + "memory(GiB)": 26.31, + "step": 7660, + "train_speed(iter/s)": 0.581173 + }, + { + "acc": 0.99973831, + "epoch": 8.128313891834571, + "grad_norm": 0.03579862788319588, + "learning_rate": 9.285823487146234e-07, + "loss": 0.00040023, + "memory(GiB)": 26.31, + "step": 7665, + "train_speed(iter/s)": 0.581174 + }, + { + "acc": 0.99987373, + "epoch": 8.133616118769883, + "grad_norm": 0.04333464428782463, + "learning_rate": 9.235020694599566e-07, + "loss": 0.0002684, + "memory(GiB)": 26.31, + "step": 7670, + "train_speed(iter/s)": 0.581176 + }, + { + "acc": 1.0, + "epoch": 8.138918345705196, + "grad_norm": 0.022197069600224495, + "learning_rate": 9.18434326177409e-07, + "loss": 0.0001166, + "memory(GiB)": 26.31, + "step": 7675, + "train_speed(iter/s)": 0.581179 + }, + { + "acc": 0.99988155, + "epoch": 8.14422057264051, + "grad_norm": 0.01878158375620842, + "learning_rate": 9.133791344493017e-07, + "loss": 0.00013026, + "memory(GiB)": 26.31, + "step": 7680, + "train_speed(iter/s)": 0.581185 + }, + { + "acc": 0.99974842, + "epoch": 8.149522799575822, + "grad_norm": 0.03990185260772705, + "learning_rate": 9.083365098193609e-07, + "loss": 0.00041892, + "memory(GiB)": 26.31, + "step": 7685, + "train_speed(iter/s)": 0.581189 + }, + { + "acc": 0.99949589, + "epoch": 8.154825026511135, + "grad_norm": 0.003369416343048215, + "learning_rate": 9.033064677926724e-07, + "loss": 0.00066151, + "memory(GiB)": 26.31, + "step": 7690, + "train_speed(iter/s)": 0.58119 + }, + { + "acc": 1.0, + "epoch": 8.160127253446447, + "grad_norm": 0.0658353790640831, + "learning_rate": 8.982890238356318e-07, + "loss": 0.00011477, + "memory(GiB)": 26.31, + "step": 7695, + "train_speed(iter/s)": 0.581195 + }, + { + "acc": 0.99987116, + "epoch": 8.16542948038176, + "grad_norm": 0.07666454464197159, + "learning_rate": 8.932841933759011e-07, + "loss": 0.00020501, + "memory(GiB)": 26.31, + "step": 7700, + "train_speed(iter/s)": 0.581198 + }, + { + "acc": 0.99975843, + "epoch": 8.170731707317072, + "grad_norm": 0.0565459281206131, + "learning_rate": 8.882919918023548e-07, + "loss": 0.00032766, + "memory(GiB)": 26.31, + "step": 7705, + "train_speed(iter/s)": 0.5812 + }, + { + "acc": 0.99987307, + "epoch": 8.176033934252386, + "grad_norm": 0.001517447759397328, + "learning_rate": 8.833124344650383e-07, + "loss": 0.00012655, + "memory(GiB)": 26.31, + "step": 7710, + "train_speed(iter/s)": 0.581202 + }, + { + "acc": 1.0, + "epoch": 8.1813361611877, + "grad_norm": 0.008927865885198116, + "learning_rate": 8.783455366751168e-07, + "loss": 8.786e-05, + "memory(GiB)": 26.31, + "step": 7715, + "train_speed(iter/s)": 0.581204 + }, + { + "acc": 1.0, + "epoch": 8.186638388123011, + "grad_norm": 0.001537539646960795, + "learning_rate": 8.733913137048305e-07, + "loss": 7.365e-05, + "memory(GiB)": 26.31, + "step": 7720, + "train_speed(iter/s)": 0.581208 + }, + { + "acc": 0.99978895, + "epoch": 8.191940615058325, + "grad_norm": 0.0009232127922587097, + "learning_rate": 8.68449780787448e-07, + "loss": 0.00085949, + "memory(GiB)": 26.31, + "step": 7725, + "train_speed(iter/s)": 0.581209 + }, + { + "acc": 0.99941664, + "epoch": 8.197242841993637, + "grad_norm": 0.1339799463748932, + "learning_rate": 8.635209531172154e-07, + "loss": 0.00167964, + "memory(GiB)": 26.31, + "step": 7730, + "train_speed(iter/s)": 0.581211 + }, + { + "acc": 0.99975662, + "epoch": 8.20254506892895, + "grad_norm": 0.030591564252972603, + "learning_rate": 8.586048458493177e-07, + "loss": 0.00045913, + "memory(GiB)": 26.31, + "step": 7735, + "train_speed(iter/s)": 0.581213 + }, + { + "acc": 0.9998641, + "epoch": 8.207847295864262, + "grad_norm": 0.03506559878587723, + "learning_rate": 8.537014740998235e-07, + "loss": 0.00042046, + "memory(GiB)": 26.31, + "step": 7740, + "train_speed(iter/s)": 0.581217 + }, + { + "acc": 1.0, + "epoch": 8.213149522799576, + "grad_norm": 0.01826823502779007, + "learning_rate": 8.488108529456423e-07, + "loss": 3.581e-05, + "memory(GiB)": 26.31, + "step": 7745, + "train_speed(iter/s)": 0.581219 + }, + { + "acc": 0.99974556, + "epoch": 8.21845174973489, + "grad_norm": 0.11806601285934448, + "learning_rate": 8.439329974244791e-07, + "loss": 0.00072233, + "memory(GiB)": 26.31, + "step": 7750, + "train_speed(iter/s)": 0.581223 + }, + { + "acc": 1.0, + "epoch": 8.223753976670201, + "grad_norm": 0.01329426933079958, + "learning_rate": 8.390679225347866e-07, + "loss": 0.00020656, + "memory(GiB)": 26.31, + "step": 7755, + "train_speed(iter/s)": 0.581224 + }, + { + "acc": 0.99974918, + "epoch": 8.229056203605515, + "grad_norm": 0.03872642666101456, + "learning_rate": 8.342156432357194e-07, + "loss": 0.00053237, + "memory(GiB)": 26.31, + "step": 7760, + "train_speed(iter/s)": 0.581225 + }, + { + "acc": 0.99976864, + "epoch": 8.234358430540826, + "grad_norm": 0.04402993246912956, + "learning_rate": 8.293761744470884e-07, + "loss": 0.0003388, + "memory(GiB)": 26.31, + "step": 7765, + "train_speed(iter/s)": 0.581229 + }, + { + "acc": 0.99985552, + "epoch": 8.23966065747614, + "grad_norm": 0.021901650354266167, + "learning_rate": 8.245495310493146e-07, + "loss": 0.00030562, + "memory(GiB)": 26.31, + "step": 7770, + "train_speed(iter/s)": 0.58123 + }, + { + "acc": 0.99988585, + "epoch": 8.244962884411454, + "grad_norm": 0.1328391581773758, + "learning_rate": 8.197357278833834e-07, + "loss": 0.00030064, + "memory(GiB)": 26.31, + "step": 7775, + "train_speed(iter/s)": 0.581233 + }, + { + "acc": 0.99988213, + "epoch": 8.250265111346765, + "grad_norm": 0.0058752140030264854, + "learning_rate": 8.149347797507994e-07, + "loss": 0.00020896, + "memory(GiB)": 26.31, + "step": 7780, + "train_speed(iter/s)": 0.581236 + }, + { + "acc": 0.99989176, + "epoch": 8.255567338282079, + "grad_norm": 0.06424537301063538, + "learning_rate": 8.101467014135403e-07, + "loss": 0.00027083, + "memory(GiB)": 26.31, + "step": 7785, + "train_speed(iter/s)": 0.581237 + }, + { + "acc": 0.99976292, + "epoch": 8.26086956521739, + "grad_norm": 0.0293315090239048, + "learning_rate": 8.053715075940096e-07, + "loss": 0.00043081, + "memory(GiB)": 26.31, + "step": 7790, + "train_speed(iter/s)": 0.581239 + }, + { + "acc": 0.99987926, + "epoch": 8.266171792152704, + "grad_norm": 0.015792246907949448, + "learning_rate": 8.006092129749986e-07, + "loss": 0.00016041, + "memory(GiB)": 26.31, + "step": 7795, + "train_speed(iter/s)": 0.581236 + }, + { + "acc": 1.0, + "epoch": 8.271474019088018, + "grad_norm": 0.0027385330758988857, + "learning_rate": 7.958598321996309e-07, + "loss": 6.26e-06, + "memory(GiB)": 26.31, + "step": 7800, + "train_speed(iter/s)": 0.581237 + }, + { + "acc": 1.0, + "epoch": 8.27677624602333, + "grad_norm": 0.0007324932957999408, + "learning_rate": 7.91123379871324e-07, + "loss": 0.0001466, + "memory(GiB)": 26.31, + "step": 7805, + "train_speed(iter/s)": 0.58124 + }, + { + "acc": 0.99987679, + "epoch": 8.282078472958643, + "grad_norm": 0.000622512015979737, + "learning_rate": 7.863998705537454e-07, + "loss": 0.00020528, + "memory(GiB)": 26.31, + "step": 7810, + "train_speed(iter/s)": 0.581241 + }, + { + "acc": 0.99974909, + "epoch": 8.287380699893955, + "grad_norm": 0.16693612933158875, + "learning_rate": 7.816893187707619e-07, + "loss": 0.00094907, + "memory(GiB)": 26.31, + "step": 7815, + "train_speed(iter/s)": 0.581242 + }, + { + "acc": 0.99962425, + "epoch": 8.292682926829269, + "grad_norm": 0.7306221127510071, + "learning_rate": 7.769917390064011e-07, + "loss": 0.00160853, + "memory(GiB)": 26.31, + "step": 7820, + "train_speed(iter/s)": 0.581245 + }, + { + "acc": 0.99975004, + "epoch": 8.29798515376458, + "grad_norm": 0.07459704577922821, + "learning_rate": 7.72307145704802e-07, + "loss": 0.00056356, + "memory(GiB)": 26.31, + "step": 7825, + "train_speed(iter/s)": 0.581249 + }, + { + "acc": 0.99986629, + "epoch": 8.303287380699894, + "grad_norm": 0.0025041713379323483, + "learning_rate": 7.676355532701742e-07, + "loss": 0.00018201, + "memory(GiB)": 26.31, + "step": 7830, + "train_speed(iter/s)": 0.581255 + }, + { + "acc": 1.0, + "epoch": 8.308589607635207, + "grad_norm": 0.010728196240961552, + "learning_rate": 7.629769760667513e-07, + "loss": 0.00011778, + "memory(GiB)": 26.31, + "step": 7835, + "train_speed(iter/s)": 0.581257 + }, + { + "acc": 1.0, + "epoch": 8.31389183457052, + "grad_norm": 0.01707724668085575, + "learning_rate": 7.583314284187486e-07, + "loss": 0.0001733, + "memory(GiB)": 26.31, + "step": 7840, + "train_speed(iter/s)": 0.581258 + }, + { + "acc": 0.99988317, + "epoch": 8.319194061505833, + "grad_norm": 0.002590770134702325, + "learning_rate": 7.536989246103177e-07, + "loss": 0.00010814, + "memory(GiB)": 26.31, + "step": 7845, + "train_speed(iter/s)": 0.581261 + }, + { + "acc": 0.99975109, + "epoch": 8.324496288441145, + "grad_norm": 0.14999988675117493, + "learning_rate": 7.490794788855018e-07, + "loss": 0.0007674, + "memory(GiB)": 26.31, + "step": 7850, + "train_speed(iter/s)": 0.581263 + }, + { + "acc": 0.99988632, + "epoch": 8.329798515376458, + "grad_norm": 0.0026431609876453876, + "learning_rate": 7.444731054481951e-07, + "loss": 0.00010075, + "memory(GiB)": 26.31, + "step": 7855, + "train_speed(iter/s)": 0.581264 + }, + { + "acc": 0.99971752, + "epoch": 8.335100742311772, + "grad_norm": 0.049115173518657684, + "learning_rate": 7.398798184620941e-07, + "loss": 0.00072776, + "memory(GiB)": 26.31, + "step": 7860, + "train_speed(iter/s)": 0.581265 + }, + { + "acc": 0.99974127, + "epoch": 8.340402969247084, + "grad_norm": 0.06810028851032257, + "learning_rate": 7.352996320506616e-07, + "loss": 0.00034255, + "memory(GiB)": 26.31, + "step": 7865, + "train_speed(iter/s)": 0.581268 + }, + { + "acc": 0.99976759, + "epoch": 8.345705196182397, + "grad_norm": 0.0013417246518656611, + "learning_rate": 7.307325602970744e-07, + "loss": 0.0008347, + "memory(GiB)": 26.31, + "step": 7870, + "train_speed(iter/s)": 0.581269 + }, + { + "acc": 0.99987373, + "epoch": 8.351007423117709, + "grad_norm": 0.016306953504681587, + "learning_rate": 7.261786172441866e-07, + "loss": 0.0002477, + "memory(GiB)": 26.31, + "step": 7875, + "train_speed(iter/s)": 0.581272 + }, + { + "acc": 1.0, + "epoch": 8.356309650053023, + "grad_norm": 0.03384576737880707, + "learning_rate": 7.216378168944825e-07, + "loss": 0.00020672, + "memory(GiB)": 26.31, + "step": 7880, + "train_speed(iter/s)": 0.581277 + }, + { + "acc": 0.9998826, + "epoch": 8.361611876988334, + "grad_norm": 0.00972414668649435, + "learning_rate": 7.171101732100366e-07, + "loss": 0.00042422, + "memory(GiB)": 26.31, + "step": 7885, + "train_speed(iter/s)": 0.581279 + }, + { + "acc": 0.99987803, + "epoch": 8.366914103923648, + "grad_norm": 0.010824889875948429, + "learning_rate": 7.125957001124683e-07, + "loss": 0.00028841, + "memory(GiB)": 26.31, + "step": 7890, + "train_speed(iter/s)": 0.58128 + }, + { + "acc": 0.99977264, + "epoch": 8.372216330858961, + "grad_norm": 0.004371269606053829, + "learning_rate": 7.080944114829013e-07, + "loss": 0.00040134, + "memory(GiB)": 26.31, + "step": 7895, + "train_speed(iter/s)": 0.581281 + }, + { + "acc": 1.0, + "epoch": 8.377518557794273, + "grad_norm": 0.0028774836100637913, + "learning_rate": 7.036063211619177e-07, + "loss": 0.00010729, + "memory(GiB)": 26.31, + "step": 7900, + "train_speed(iter/s)": 0.581282 + }, + { + "acc": 0.99987183, + "epoch": 8.382820784729587, + "grad_norm": 0.005267132073640823, + "learning_rate": 6.991314429495186e-07, + "loss": 0.00031727, + "memory(GiB)": 26.31, + "step": 7905, + "train_speed(iter/s)": 0.581283 + }, + { + "acc": 0.99976959, + "epoch": 8.388123011664899, + "grad_norm": 0.050596244633197784, + "learning_rate": 6.946697906050808e-07, + "loss": 0.00017752, + "memory(GiB)": 26.31, + "step": 7910, + "train_speed(iter/s)": 0.581285 + }, + { + "acc": 0.9996068, + "epoch": 8.393425238600212, + "grad_norm": 0.011514488607645035, + "learning_rate": 6.902213778473115e-07, + "loss": 0.00113118, + "memory(GiB)": 26.31, + "step": 7915, + "train_speed(iter/s)": 0.581286 + }, + { + "acc": 0.99986191, + "epoch": 8.398727465535526, + "grad_norm": 0.038335733115673065, + "learning_rate": 6.857862183542143e-07, + "loss": 0.00021071, + "memory(GiB)": 26.31, + "step": 7920, + "train_speed(iter/s)": 0.581289 + }, + { + "acc": 0.99976358, + "epoch": 8.404029692470838, + "grad_norm": 0.040934812277555466, + "learning_rate": 6.813643257630354e-07, + "loss": 0.00021678, + "memory(GiB)": 26.31, + "step": 7925, + "train_speed(iter/s)": 0.581293 + }, + { + "acc": 0.99987116, + "epoch": 8.409331919406151, + "grad_norm": 0.0021664213854819536, + "learning_rate": 6.769557136702325e-07, + "loss": 0.00011409, + "memory(GiB)": 26.31, + "step": 7930, + "train_speed(iter/s)": 0.581294 + }, + { + "acc": 0.99972258, + "epoch": 8.414634146341463, + "grad_norm": 0.07637397944927216, + "learning_rate": 6.725603956314253e-07, + "loss": 0.00044861, + "memory(GiB)": 26.31, + "step": 7935, + "train_speed(iter/s)": 0.581296 + }, + { + "acc": 0.99988537, + "epoch": 8.419936373276776, + "grad_norm": 0.00367990811355412, + "learning_rate": 6.681783851613587e-07, + "loss": 0.0002019, + "memory(GiB)": 26.31, + "step": 7940, + "train_speed(iter/s)": 0.581297 + }, + { + "acc": 0.99964008, + "epoch": 8.425238600212088, + "grad_norm": 0.07514069974422455, + "learning_rate": 6.638096957338587e-07, + "loss": 0.00043779, + "memory(GiB)": 26.31, + "step": 7945, + "train_speed(iter/s)": 0.5813 + }, + { + "acc": 0.9997551, + "epoch": 8.430540827147402, + "grad_norm": 0.0004876448365394026, + "learning_rate": 6.594543407817915e-07, + "loss": 0.00077535, + "memory(GiB)": 26.31, + "step": 7950, + "train_speed(iter/s)": 0.581301 + }, + { + "acc": 0.99986038, + "epoch": 8.435843054082715, + "grad_norm": 0.05327408015727997, + "learning_rate": 6.551123336970226e-07, + "loss": 0.0002358, + "memory(GiB)": 26.31, + "step": 7955, + "train_speed(iter/s)": 0.581301 + }, + { + "acc": 0.99986629, + "epoch": 8.441145281018027, + "grad_norm": 0.003401283174753189, + "learning_rate": 6.507836878303758e-07, + "loss": 0.00015811, + "memory(GiB)": 26.31, + "step": 7960, + "train_speed(iter/s)": 0.581305 + }, + { + "acc": 1.0, + "epoch": 8.44644750795334, + "grad_norm": 0.0021104025654494762, + "learning_rate": 6.46468416491591e-07, + "loss": 4.444e-05, + "memory(GiB)": 26.31, + "step": 7965, + "train_speed(iter/s)": 0.581306 + }, + { + "acc": 0.99987803, + "epoch": 8.451749734888653, + "grad_norm": 0.001826183870434761, + "learning_rate": 6.421665329492848e-07, + "loss": 0.00017718, + "memory(GiB)": 26.31, + "step": 7970, + "train_speed(iter/s)": 0.581307 + }, + { + "acc": 0.99988098, + "epoch": 8.457051961823966, + "grad_norm": 0.025131119415163994, + "learning_rate": 6.378780504309089e-07, + "loss": 0.00017582, + "memory(GiB)": 26.31, + "step": 7975, + "train_speed(iter/s)": 0.581308 + }, + { + "acc": 0.9997613, + "epoch": 8.46235418875928, + "grad_norm": 0.06657654792070389, + "learning_rate": 6.336029821227086e-07, + "loss": 0.00028992, + "memory(GiB)": 26.31, + "step": 7980, + "train_speed(iter/s)": 0.58131 + }, + { + "acc": 0.9998724, + "epoch": 8.467656415694591, + "grad_norm": 0.002139176009222865, + "learning_rate": 6.293413411696846e-07, + "loss": 0.00022113, + "memory(GiB)": 26.31, + "step": 7985, + "train_speed(iter/s)": 0.581311 + }, + { + "acc": 0.99986038, + "epoch": 8.472958642629905, + "grad_norm": 0.001917119137942791, + "learning_rate": 6.250931406755482e-07, + "loss": 0.00025974, + "memory(GiB)": 26.31, + "step": 7990, + "train_speed(iter/s)": 0.581313 + }, + { + "acc": 1.0, + "epoch": 8.478260869565217, + "grad_norm": 0.0010230530751869082, + "learning_rate": 6.208583937026887e-07, + "loss": 0.00013286, + "memory(GiB)": 26.31, + "step": 7995, + "train_speed(iter/s)": 0.581317 + }, + { + "acc": 0.99986267, + "epoch": 8.48356309650053, + "grad_norm": 0.0006629582494497299, + "learning_rate": 6.166371132721243e-07, + "loss": 0.00013081, + "memory(GiB)": 26.31, + "step": 8000, + "train_speed(iter/s)": 0.581318 + }, + { + "acc": 1.0, + "epoch": 8.488865323435842, + "grad_norm": 0.0006745496648363769, + "learning_rate": 6.124293123634681e-07, + "loss": 5.918e-05, + "memory(GiB)": 26.31, + "step": 8005, + "train_speed(iter/s)": 0.581318 + }, + { + "acc": 0.99989834, + "epoch": 8.494167550371156, + "grad_norm": 0.0012669694842770696, + "learning_rate": 6.082350039148852e-07, + "loss": 0.00026928, + "memory(GiB)": 26.31, + "step": 8010, + "train_speed(iter/s)": 0.581319 + }, + { + "acc": 0.99975195, + "epoch": 8.49946977730647, + "grad_norm": 0.057022713124752045, + "learning_rate": 6.040542008230552e-07, + "loss": 0.00072886, + "memory(GiB)": 26.31, + "step": 8015, + "train_speed(iter/s)": 0.581322 + }, + { + "acc": 0.99977112, + "epoch": 8.504772004241781, + "grad_norm": 0.0022008903324604034, + "learning_rate": 5.998869159431307e-07, + "loss": 0.00036921, + "memory(GiB)": 26.31, + "step": 8020, + "train_speed(iter/s)": 0.581326 + }, + { + "acc": 0.99954042, + "epoch": 8.510074231177095, + "grad_norm": 0.22246907651424408, + "learning_rate": 5.957331620886968e-07, + "loss": 0.00077993, + "memory(GiB)": 26.31, + "step": 8025, + "train_speed(iter/s)": 0.581328 + }, + { + "acc": 0.99975395, + "epoch": 8.515376458112407, + "grad_norm": 0.12080390006303787, + "learning_rate": 5.915929520317385e-07, + "loss": 0.00024467, + "memory(GiB)": 26.31, + "step": 8030, + "train_speed(iter/s)": 0.581329 + }, + { + "acc": 1.0, + "epoch": 8.52067868504772, + "grad_norm": 0.04112159460783005, + "learning_rate": 5.874662985025903e-07, + "loss": 0.00021345, + "memory(GiB)": 26.31, + "step": 8035, + "train_speed(iter/s)": 0.581332 + }, + { + "acc": 0.99977589, + "epoch": 8.525980911983034, + "grad_norm": 0.00460523646324873, + "learning_rate": 5.833532141899069e-07, + "loss": 0.00028215, + "memory(GiB)": 26.31, + "step": 8040, + "train_speed(iter/s)": 0.581338 + }, + { + "acc": 0.99978123, + "epoch": 8.531283138918345, + "grad_norm": 0.07351583242416382, + "learning_rate": 5.792537117406182e-07, + "loss": 0.00062952, + "memory(GiB)": 26.31, + "step": 8045, + "train_speed(iter/s)": 0.581339 + }, + { + "acc": 1.0, + "epoch": 8.536585365853659, + "grad_norm": 0.004166380036622286, + "learning_rate": 5.751678037598939e-07, + "loss": 8.209e-05, + "memory(GiB)": 26.31, + "step": 8050, + "train_speed(iter/s)": 0.581341 + }, + { + "acc": 0.99986343, + "epoch": 8.54188759278897, + "grad_norm": 0.004580026958137751, + "learning_rate": 5.710955028111013e-07, + "loss": 0.00103224, + "memory(GiB)": 26.31, + "step": 8055, + "train_speed(iter/s)": 0.581343 + }, + { + "acc": 0.99973564, + "epoch": 8.547189819724284, + "grad_norm": 0.002784633543342352, + "learning_rate": 5.670368214157719e-07, + "loss": 0.00031335, + "memory(GiB)": 26.31, + "step": 8060, + "train_speed(iter/s)": 0.581345 + }, + { + "acc": 0.99988317, + "epoch": 8.552492046659598, + "grad_norm": 0.03273988887667656, + "learning_rate": 5.629917720535582e-07, + "loss": 0.00073676, + "memory(GiB)": 26.31, + "step": 8065, + "train_speed(iter/s)": 0.581346 + }, + { + "acc": 1.0, + "epoch": 8.55779427359491, + "grad_norm": 0.0014363267691805959, + "learning_rate": 5.589603671621957e-07, + "loss": 8.492e-05, + "memory(GiB)": 26.31, + "step": 8070, + "train_speed(iter/s)": 0.581354 + }, + { + "acc": 0.99988155, + "epoch": 8.563096500530223, + "grad_norm": 0.0011595258256420493, + "learning_rate": 5.549426191374673e-07, + "loss": 0.00026058, + "memory(GiB)": 26.31, + "step": 8075, + "train_speed(iter/s)": 0.581356 + }, + { + "acc": 0.99974566, + "epoch": 8.568398727465535, + "grad_norm": 0.4120935797691345, + "learning_rate": 5.509385403331628e-07, + "loss": 0.00089901, + "memory(GiB)": 26.31, + "step": 8080, + "train_speed(iter/s)": 0.581356 + }, + { + "acc": 0.99988785, + "epoch": 8.573700954400849, + "grad_norm": 0.03953966125845909, + "learning_rate": 5.46948143061043e-07, + "loss": 0.00022376, + "memory(GiB)": 26.31, + "step": 8085, + "train_speed(iter/s)": 0.581357 + }, + { + "acc": 0.99988422, + "epoch": 8.57900318133616, + "grad_norm": 0.04297780618071556, + "learning_rate": 5.429714395907992e-07, + "loss": 0.00016779, + "memory(GiB)": 26.31, + "step": 8090, + "train_speed(iter/s)": 0.581358 + }, + { + "acc": 0.99986973, + "epoch": 8.584305408271474, + "grad_norm": 0.11942251026630402, + "learning_rate": 5.39008442150018e-07, + "loss": 0.00034182, + "memory(GiB)": 26.31, + "step": 8095, + "train_speed(iter/s)": 0.581362 + }, + { + "acc": 0.99975233, + "epoch": 8.589607635206788, + "grad_norm": 0.4256443381309509, + "learning_rate": 5.350591629241419e-07, + "loss": 0.00112517, + "memory(GiB)": 26.31, + "step": 8100, + "train_speed(iter/s)": 0.581363 + }, + { + "acc": 0.99988317, + "epoch": 8.5949098621421, + "grad_norm": 0.03107130341231823, + "learning_rate": 5.311236140564336e-07, + "loss": 0.00030147, + "memory(GiB)": 26.31, + "step": 8105, + "train_speed(iter/s)": 0.581364 + }, + { + "acc": 1.0, + "epoch": 8.600212089077413, + "grad_norm": 0.01565447449684143, + "learning_rate": 5.272018076479365e-07, + "loss": 0.00019858, + "memory(GiB)": 26.31, + "step": 8110, + "train_speed(iter/s)": 0.581366 + }, + { + "acc": 1.0, + "epoch": 8.605514316012725, + "grad_norm": 0.01462327130138874, + "learning_rate": 5.232937557574392e-07, + "loss": 0.00015697, + "memory(GiB)": 26.31, + "step": 8115, + "train_speed(iter/s)": 0.581369 + }, + { + "acc": 1.0, + "epoch": 8.610816542948038, + "grad_norm": 0.0016528492560610175, + "learning_rate": 5.193994704014368e-07, + "loss": 4.902e-05, + "memory(GiB)": 26.31, + "step": 8120, + "train_speed(iter/s)": 0.581376 + }, + { + "acc": 0.9997674, + "epoch": 8.61611876988335, + "grad_norm": 0.010499105788767338, + "learning_rate": 5.155189635540981e-07, + "loss": 0.00049511, + "memory(GiB)": 26.31, + "step": 8125, + "train_speed(iter/s)": 0.581377 + }, + { + "acc": 1.0, + "epoch": 8.621420996818664, + "grad_norm": 0.040201202034950256, + "learning_rate": 5.116522471472227e-07, + "loss": 3.062e-05, + "memory(GiB)": 26.31, + "step": 8130, + "train_speed(iter/s)": 0.581379 + }, + { + "acc": 1.0, + "epoch": 8.626723223753977, + "grad_norm": 0.0030659495387226343, + "learning_rate": 5.07799333070206e-07, + "loss": 4.329e-05, + "memory(GiB)": 26.31, + "step": 8135, + "train_speed(iter/s)": 0.581384 + }, + { + "acc": 0.99987679, + "epoch": 8.632025450689289, + "grad_norm": 0.003144504502415657, + "learning_rate": 5.039602331700092e-07, + "loss": 0.00025904, + "memory(GiB)": 26.31, + "step": 8140, + "train_speed(iter/s)": 0.581386 + }, + { + "acc": 0.99987803, + "epoch": 8.637327677624603, + "grad_norm": 0.026025842875242233, + "learning_rate": 5.001349592511136e-07, + "loss": 0.00028486, + "memory(GiB)": 26.31, + "step": 8145, + "train_speed(iter/s)": 0.58139 + }, + { + "acc": 0.99986706, + "epoch": 8.642629904559914, + "grad_norm": 0.004663050640374422, + "learning_rate": 4.963235230754879e-07, + "loss": 0.00026084, + "memory(GiB)": 26.31, + "step": 8150, + "train_speed(iter/s)": 0.581392 + }, + { + "acc": 0.99987745, + "epoch": 8.647932131495228, + "grad_norm": 0.029145730659365654, + "learning_rate": 4.92525936362555e-07, + "loss": 0.00018182, + "memory(GiB)": 26.31, + "step": 8155, + "train_speed(iter/s)": 0.581394 + }, + { + "acc": 0.99975986, + "epoch": 8.653234358430542, + "grad_norm": 0.00190520950127393, + "learning_rate": 4.887422107891513e-07, + "loss": 0.00081231, + "memory(GiB)": 26.31, + "step": 8160, + "train_speed(iter/s)": 0.581395 + }, + { + "acc": 0.99974737, + "epoch": 8.658536585365853, + "grad_norm": 0.0602591373026371, + "learning_rate": 4.84972357989494e-07, + "loss": 0.00022456, + "memory(GiB)": 26.31, + "step": 8165, + "train_speed(iter/s)": 0.581396 + }, + { + "acc": 0.99988375, + "epoch": 8.663838812301167, + "grad_norm": 0.018995080143213272, + "learning_rate": 4.812163895551438e-07, + "loss": 0.00012022, + "memory(GiB)": 26.31, + "step": 8170, + "train_speed(iter/s)": 0.5814 + }, + { + "acc": 0.99986629, + "epoch": 8.669141039236479, + "grad_norm": 0.04597296565771103, + "learning_rate": 4.774743170349703e-07, + "loss": 0.00028186, + "memory(GiB)": 26.31, + "step": 8175, + "train_speed(iter/s)": 0.581403 + }, + { + "acc": 0.99921989, + "epoch": 8.674443266171792, + "grad_norm": 0.06943114101886749, + "learning_rate": 4.7374615193511503e-07, + "loss": 0.0026015, + "memory(GiB)": 26.31, + "step": 8180, + "train_speed(iter/s)": 0.581403 + }, + { + "acc": 1.0, + "epoch": 8.679745493107106, + "grad_norm": 0.041785068809986115, + "learning_rate": 4.7003190571895607e-07, + "loss": 9.327e-05, + "memory(GiB)": 26.31, + "step": 8185, + "train_speed(iter/s)": 0.581404 + }, + { + "acc": 0.99987984, + "epoch": 8.685047720042418, + "grad_norm": 0.027803828939795494, + "learning_rate": 4.663315898070774e-07, + "loss": 0.00023471, + "memory(GiB)": 26.31, + "step": 8190, + "train_speed(iter/s)": 0.581409 + }, + { + "acc": 0.99975643, + "epoch": 8.690349946977731, + "grad_norm": 0.0025035461876541376, + "learning_rate": 4.626452155772263e-07, + "loss": 0.00022978, + "memory(GiB)": 26.31, + "step": 8195, + "train_speed(iter/s)": 0.581412 + }, + { + "acc": 0.99966631, + "epoch": 8.695652173913043, + "grad_norm": 0.028833532705903053, + "learning_rate": 4.589727943642835e-07, + "loss": 0.00042026, + "memory(GiB)": 26.31, + "step": 8200, + "train_speed(iter/s)": 0.581415 + }, + { + "acc": 0.99961128, + "epoch": 8.700954400848357, + "grad_norm": 0.002554490463808179, + "learning_rate": 4.553143374602267e-07, + "loss": 0.00063897, + "memory(GiB)": 26.31, + "step": 8205, + "train_speed(iter/s)": 0.581416 + }, + { + "acc": 0.99988985, + "epoch": 8.706256627783668, + "grad_norm": 0.5980751514434814, + "learning_rate": 4.516698561140955e-07, + "loss": 0.00077893, + "memory(GiB)": 26.31, + "step": 8210, + "train_speed(iter/s)": 0.581417 + }, + { + "acc": 0.99973564, + "epoch": 8.711558854718982, + "grad_norm": 0.08448156714439392, + "learning_rate": 4.480393615319589e-07, + "loss": 0.00050244, + "memory(GiB)": 26.31, + "step": 8215, + "train_speed(iter/s)": 0.581419 + }, + { + "acc": 1.0, + "epoch": 8.716861081654296, + "grad_norm": 0.0047545284032821655, + "learning_rate": 4.4442286487687844e-07, + "loss": 0.00010639, + "memory(GiB)": 26.31, + "step": 8220, + "train_speed(iter/s)": 0.581419 + }, + { + "acc": 0.99964352, + "epoch": 8.722163308589607, + "grad_norm": 0.004202633630484343, + "learning_rate": 4.4082037726887366e-07, + "loss": 0.0005389, + "memory(GiB)": 26.31, + "step": 8225, + "train_speed(iter/s)": 0.58142 + }, + { + "acc": 0.99972897, + "epoch": 8.72746553552492, + "grad_norm": 0.0017853471217676997, + "learning_rate": 4.372319097848914e-07, + "loss": 0.00035583, + "memory(GiB)": 26.31, + "step": 8230, + "train_speed(iter/s)": 0.581421 + }, + { + "acc": 0.99976826, + "epoch": 8.732767762460233, + "grad_norm": 0.03493494167923927, + "learning_rate": 4.3365747345876773e-07, + "loss": 0.00036009, + "memory(GiB)": 26.31, + "step": 8235, + "train_speed(iter/s)": 0.581425 + }, + { + "acc": 0.99986839, + "epoch": 8.738069989395546, + "grad_norm": 0.160511776804924, + "learning_rate": 4.300970792811949e-07, + "loss": 0.0003888, + "memory(GiB)": 26.31, + "step": 8240, + "train_speed(iter/s)": 0.581432 + }, + { + "acc": 0.99985552, + "epoch": 8.743372216330858, + "grad_norm": 0.036365751177072525, + "learning_rate": 4.265507381996912e-07, + "loss": 0.00027258, + "memory(GiB)": 26.31, + "step": 8245, + "train_speed(iter/s)": 0.581436 + }, + { + "acc": 0.99975319, + "epoch": 8.748674443266172, + "grad_norm": 0.05677983909845352, + "learning_rate": 4.2301846111856155e-07, + "loss": 0.00044567, + "memory(GiB)": 26.31, + "step": 8250, + "train_speed(iter/s)": 0.581437 + }, + { + "acc": 0.99958639, + "epoch": 8.753976670201485, + "grad_norm": 0.04142065346240997, + "learning_rate": 4.1950025889886813e-07, + "loss": 0.00106637, + "memory(GiB)": 26.31, + "step": 8255, + "train_speed(iter/s)": 0.581441 + }, + { + "acc": 0.99974995, + "epoch": 8.759278897136797, + "grad_norm": 0.002513850573450327, + "learning_rate": 4.1599614235839595e-07, + "loss": 0.00029619, + "memory(GiB)": 26.31, + "step": 8260, + "train_speed(iter/s)": 0.581444 + }, + { + "acc": 0.99987869, + "epoch": 8.76458112407211, + "grad_norm": 0.04915174841880798, + "learning_rate": 4.1250612227161794e-07, + "loss": 0.00031457, + "memory(GiB)": 26.31, + "step": 8265, + "train_speed(iter/s)": 0.581448 + }, + { + "acc": 0.99959469, + "epoch": 8.769883351007422, + "grad_norm": 0.0011660271557047963, + "learning_rate": 4.0903020936966484e-07, + "loss": 0.0006362, + "memory(GiB)": 26.31, + "step": 8270, + "train_speed(iter/s)": 0.58145 + }, + { + "acc": 0.99973412, + "epoch": 8.775185577942736, + "grad_norm": 0.0009855523239821196, + "learning_rate": 4.0556841434028936e-07, + "loss": 0.00034017, + "memory(GiB)": 26.31, + "step": 8275, + "train_speed(iter/s)": 0.581451 + }, + { + "acc": 0.99977989, + "epoch": 8.78048780487805, + "grad_norm": 0.0027486311737447977, + "learning_rate": 4.0212074782783416e-07, + "loss": 0.00026543, + "memory(GiB)": 26.31, + "step": 8280, + "train_speed(iter/s)": 0.581452 + }, + { + "acc": 1.0, + "epoch": 8.785790031813361, + "grad_norm": 0.0027941821608692408, + "learning_rate": 3.986872204332013e-07, + "loss": 5.459e-05, + "memory(GiB)": 26.31, + "step": 8285, + "train_speed(iter/s)": 0.581454 + }, + { + "acc": 0.99988098, + "epoch": 8.791092258748675, + "grad_norm": 0.012088056653738022, + "learning_rate": 3.9526784271381666e-07, + "loss": 0.00016703, + "memory(GiB)": 26.31, + "step": 8290, + "train_speed(iter/s)": 0.581458 + }, + { + "acc": 0.99985714, + "epoch": 8.796394485683987, + "grad_norm": 0.05154626443982124, + "learning_rate": 3.9186262518359763e-07, + "loss": 0.00036198, + "memory(GiB)": 26.31, + "step": 8295, + "train_speed(iter/s)": 0.581462 + }, + { + "acc": 0.99988937, + "epoch": 8.8016967126193, + "grad_norm": 0.06838499009609222, + "learning_rate": 3.8847157831292366e-07, + "loss": 0.0004502, + "memory(GiB)": 26.31, + "step": 8300, + "train_speed(iter/s)": 0.581463 + }, + { + "acc": 0.99950447, + "epoch": 8.806998939554614, + "grad_norm": 0.30260416865348816, + "learning_rate": 3.8509471252860156e-07, + "loss": 0.00166767, + "memory(GiB)": 26.31, + "step": 8305, + "train_speed(iter/s)": 0.581468 + }, + { + "acc": 0.99987803, + "epoch": 8.812301166489926, + "grad_norm": 0.0005098844994790852, + "learning_rate": 3.8173203821383315e-07, + "loss": 0.00017671, + "memory(GiB)": 26.31, + "step": 8310, + "train_speed(iter/s)": 0.581471 + }, + { + "acc": 0.9998724, + "epoch": 8.81760339342524, + "grad_norm": 0.005667832680046558, + "learning_rate": 3.7838356570818497e-07, + "loss": 0.00022659, + "memory(GiB)": 26.31, + "step": 8315, + "train_speed(iter/s)": 0.581473 + }, + { + "acc": 0.99974613, + "epoch": 8.822905620360551, + "grad_norm": 0.0008252383559010923, + "learning_rate": 3.7504930530755664e-07, + "loss": 0.00026339, + "memory(GiB)": 26.31, + "step": 8320, + "train_speed(iter/s)": 0.581474 + }, + { + "acc": 0.99961548, + "epoch": 8.828207847295864, + "grad_norm": 0.026456259191036224, + "learning_rate": 3.7172926726414727e-07, + "loss": 0.00107916, + "memory(GiB)": 26.31, + "step": 8325, + "train_speed(iter/s)": 0.581474 + }, + { + "acc": 0.99976177, + "epoch": 8.833510074231176, + "grad_norm": 0.018122496083378792, + "learning_rate": 3.684234617864247e-07, + "loss": 0.00023061, + "memory(GiB)": 26.31, + "step": 8330, + "train_speed(iter/s)": 0.581477 + }, + { + "acc": 0.99963703, + "epoch": 8.83881230116649, + "grad_norm": 0.052434735000133514, + "learning_rate": 3.6513189903909565e-07, + "loss": 0.00043928, + "memory(GiB)": 26.31, + "step": 8335, + "train_speed(iter/s)": 0.581478 + }, + { + "acc": 0.99975014, + "epoch": 8.844114528101803, + "grad_norm": 0.06747570633888245, + "learning_rate": 3.618545891430718e-07, + "loss": 0.00037864, + "memory(GiB)": 26.31, + "step": 8340, + "train_speed(iter/s)": 0.581481 + }, + { + "acc": 0.9998848, + "epoch": 8.849416755037115, + "grad_norm": 0.003688774537295103, + "learning_rate": 3.5859154217544087e-07, + "loss": 0.00032196, + "memory(GiB)": 26.31, + "step": 8345, + "train_speed(iter/s)": 0.581482 + }, + { + "acc": 1.0, + "epoch": 8.854718981972429, + "grad_norm": 0.004484543111175299, + "learning_rate": 3.5534276816943463e-07, + "loss": 0.00030583, + "memory(GiB)": 26.31, + "step": 8350, + "train_speed(iter/s)": 0.581487 + }, + { + "acc": 1.0, + "epoch": 8.86002120890774, + "grad_norm": 0.05573554337024689, + "learning_rate": 3.5210827711439973e-07, + "loss": 7.674e-05, + "memory(GiB)": 26.31, + "step": 8355, + "train_speed(iter/s)": 0.581493 + }, + { + "acc": 0.99985876, + "epoch": 8.865323435843054, + "grad_norm": 0.13664424419403076, + "learning_rate": 3.488880789557624e-07, + "loss": 0.00028313, + "memory(GiB)": 26.31, + "step": 8360, + "train_speed(iter/s)": 0.581494 + }, + { + "acc": 0.99986773, + "epoch": 8.870625662778368, + "grad_norm": 0.04399307072162628, + "learning_rate": 3.456821835950048e-07, + "loss": 0.00018446, + "memory(GiB)": 26.31, + "step": 8365, + "train_speed(iter/s)": 0.581495 + }, + { + "acc": 0.99987803, + "epoch": 8.87592788971368, + "grad_norm": 0.002994449343532324, + "learning_rate": 3.4249060088962706e-07, + "loss": 0.0001245, + "memory(GiB)": 26.31, + "step": 8370, + "train_speed(iter/s)": 0.581498 + }, + { + "acc": 0.99988842, + "epoch": 8.881230116648993, + "grad_norm": 0.0009798618266358972, + "learning_rate": 3.393133406531237e-07, + "loss": 0.00014089, + "memory(GiB)": 26.31, + "step": 8375, + "train_speed(iter/s)": 0.5815 + }, + { + "acc": 1.0, + "epoch": 8.886532343584305, + "grad_norm": 0.0361686572432518, + "learning_rate": 3.3615041265494834e-07, + "loss": 0.00016998, + "memory(GiB)": 26.31, + "step": 8380, + "train_speed(iter/s)": 0.581501 + }, + { + "acc": 0.99961624, + "epoch": 8.891834570519618, + "grad_norm": 0.13913202285766602, + "learning_rate": 3.330018266204864e-07, + "loss": 0.00047402, + "memory(GiB)": 26.31, + "step": 8385, + "train_speed(iter/s)": 0.581501 + }, + { + "acc": 0.99974327, + "epoch": 8.89713679745493, + "grad_norm": 0.0007361209718510509, + "learning_rate": 3.298675922310256e-07, + "loss": 0.0005421, + "memory(GiB)": 26.31, + "step": 8390, + "train_speed(iter/s)": 0.581505 + }, + { + "acc": 0.99986343, + "epoch": 8.902439024390244, + "grad_norm": 0.02788899466395378, + "learning_rate": 3.2674771912372485e-07, + "loss": 0.00022882, + "memory(GiB)": 26.31, + "step": 8395, + "train_speed(iter/s)": 0.581506 + }, + { + "acc": 0.99964542, + "epoch": 8.907741251325557, + "grad_norm": 0.06532458961009979, + "learning_rate": 3.2364221689158365e-07, + "loss": 0.00044322, + "memory(GiB)": 26.31, + "step": 8400, + "train_speed(iter/s)": 0.581509 + }, + { + "acc": 0.99950171, + "epoch": 8.91304347826087, + "grad_norm": 0.49831533432006836, + "learning_rate": 3.2055109508341453e-07, + "loss": 0.00124991, + "memory(GiB)": 26.31, + "step": 8405, + "train_speed(iter/s)": 0.581512 + }, + { + "acc": 0.99976196, + "epoch": 8.918345705196183, + "grad_norm": 0.05611543357372284, + "learning_rate": 3.174743632038135e-07, + "loss": 0.00023922, + "memory(GiB)": 26.31, + "step": 8410, + "train_speed(iter/s)": 0.581514 + }, + { + "acc": 0.99985952, + "epoch": 8.923647932131495, + "grad_norm": 0.00900158379226923, + "learning_rate": 3.1441203071312993e-07, + "loss": 0.00030074, + "memory(GiB)": 26.31, + "step": 8415, + "train_speed(iter/s)": 0.581517 + }, + { + "acc": 1.0, + "epoch": 8.928950159066808, + "grad_norm": 0.005048360675573349, + "learning_rate": 3.113641070274376e-07, + "loss": 8.591e-05, + "memory(GiB)": 26.31, + "step": 8420, + "train_speed(iter/s)": 0.581523 + }, + { + "acc": 0.99986706, + "epoch": 8.934252386002122, + "grad_norm": 0.04308345913887024, + "learning_rate": 3.0833060151850695e-07, + "loss": 0.00046462, + "memory(GiB)": 26.31, + "step": 8425, + "train_speed(iter/s)": 0.581523 + }, + { + "acc": 0.99988098, + "epoch": 8.939554612937433, + "grad_norm": 0.0006353101343847811, + "learning_rate": 3.0531152351377423e-07, + "loss": 0.00011048, + "memory(GiB)": 26.31, + "step": 8430, + "train_speed(iter/s)": 0.581524 + }, + { + "acc": 0.9997304, + "epoch": 8.944856839872747, + "grad_norm": 0.08065080642700195, + "learning_rate": 3.0230688229631476e-07, + "loss": 0.00043914, + "memory(GiB)": 26.31, + "step": 8435, + "train_speed(iter/s)": 0.581527 + }, + { + "acc": 0.9997674, + "epoch": 8.950159066808059, + "grad_norm": 0.008123918436467648, + "learning_rate": 2.993166871048129e-07, + "loss": 0.00030036, + "memory(GiB)": 26.31, + "step": 8440, + "train_speed(iter/s)": 0.58153 + }, + { + "acc": 0.99975395, + "epoch": 8.955461293743372, + "grad_norm": 0.048602763563394547, + "learning_rate": 2.96340947133535e-07, + "loss": 0.00044107, + "memory(GiB)": 26.31, + "step": 8445, + "train_speed(iter/s)": 0.581531 + }, + { + "acc": 1.0, + "epoch": 8.960763520678686, + "grad_norm": 0.004590745083987713, + "learning_rate": 2.933796715323001e-07, + "loss": 0.00012758, + "memory(GiB)": 26.31, + "step": 8450, + "train_speed(iter/s)": 0.581533 + }, + { + "acc": 0.9992733, + "epoch": 8.966065747613998, + "grad_norm": 0.03488789498806, + "learning_rate": 2.9043286940645254e-07, + "loss": 0.00081013, + "memory(GiB)": 26.31, + "step": 8455, + "train_speed(iter/s)": 0.581534 + }, + { + "acc": 1.0, + "epoch": 8.971367974549311, + "grad_norm": 0.02488904632627964, + "learning_rate": 2.8750054981683154e-07, + "loss": 0.00012265, + "memory(GiB)": 26.31, + "step": 8460, + "train_speed(iter/s)": 0.581537 + }, + { + "acc": 0.99952488, + "epoch": 8.976670201484623, + "grad_norm": 0.03694002330303192, + "learning_rate": 2.845827217797496e-07, + "loss": 0.00072966, + "memory(GiB)": 26.31, + "step": 8465, + "train_speed(iter/s)": 0.58154 + }, + { + "acc": 0.99976711, + "epoch": 8.981972428419937, + "grad_norm": 0.021575380116701126, + "learning_rate": 2.816793942669559e-07, + "loss": 0.00030038, + "memory(GiB)": 26.31, + "step": 8470, + "train_speed(iter/s)": 0.581543 + }, + { + "acc": 0.99987869, + "epoch": 8.987274655355248, + "grad_norm": 0.006561917718499899, + "learning_rate": 2.7879057620561597e-07, + "loss": 0.00012396, + "memory(GiB)": 26.31, + "step": 8475, + "train_speed(iter/s)": 0.581543 + }, + { + "acc": 0.99978104, + "epoch": 8.992576882290562, + "grad_norm": 0.07010827213525772, + "learning_rate": 2.759162764782804e-07, + "loss": 0.00043721, + "memory(GiB)": 26.31, + "step": 8480, + "train_speed(iter/s)": 0.581544 + }, + { + "acc": 0.99987984, + "epoch": 8.997879109225876, + "grad_norm": 0.018172938376665115, + "learning_rate": 2.7305650392286003e-07, + "loss": 0.00020561, + "memory(GiB)": 26.31, + "step": 8485, + "train_speed(iter/s)": 0.581544 + }, + { + "acc": 1.0, + "epoch": 9.003181336161187, + "grad_norm": 0.03349972888827324, + "learning_rate": 2.7021126733259704e-07, + "loss": 0.00010355, + "memory(GiB)": 26.31, + "step": 8490, + "train_speed(iter/s)": 0.581504 + }, + { + "acc": 0.99965086, + "epoch": 9.008483563096501, + "grad_norm": 0.0025340563151985407, + "learning_rate": 2.673805754560371e-07, + "loss": 0.00051078, + "memory(GiB)": 26.31, + "step": 8495, + "train_speed(iter/s)": 0.581505 + }, + { + "acc": 0.99989128, + "epoch": 9.013785790031813, + "grad_norm": 0.036855071783065796, + "learning_rate": 2.64564436997005e-07, + "loss": 0.00042027, + "memory(GiB)": 26.31, + "step": 8500, + "train_speed(iter/s)": 0.581505 + }, + { + "acc": 0.9998724, + "epoch": 9.019088016967126, + "grad_norm": 0.03355565294623375, + "learning_rate": 2.617628606145764e-07, + "loss": 0.00017373, + "memory(GiB)": 26.31, + "step": 8505, + "train_speed(iter/s)": 0.581508 + }, + { + "acc": 0.99989882, + "epoch": 9.024390243902438, + "grad_norm": 0.0011976395035162568, + "learning_rate": 2.589758549230506e-07, + "loss": 0.00010378, + "memory(GiB)": 26.31, + "step": 8510, + "train_speed(iter/s)": 0.58151 + }, + { + "acc": 0.99986916, + "epoch": 9.029692470837752, + "grad_norm": 0.0022142117377370596, + "learning_rate": 2.562034284919272e-07, + "loss": 0.0002, + "memory(GiB)": 26.31, + "step": 8515, + "train_speed(iter/s)": 0.581514 + }, + { + "acc": 0.99988632, + "epoch": 9.034994697773065, + "grad_norm": 0.06629278510808945, + "learning_rate": 2.5344558984587513e-07, + "loss": 0.00030244, + "memory(GiB)": 26.31, + "step": 8520, + "train_speed(iter/s)": 0.581515 + }, + { + "acc": 0.99987869, + "epoch": 9.040296924708377, + "grad_norm": 0.00030782382236793637, + "learning_rate": 2.5070234746470904e-07, + "loss": 0.00025399, + "memory(GiB)": 26.31, + "step": 8525, + "train_speed(iter/s)": 0.581518 + }, + { + "acc": 1.0, + "epoch": 9.04559915164369, + "grad_norm": 0.0020412448793649673, + "learning_rate": 2.479737097833636e-07, + "loss": 0.00021156, + "memory(GiB)": 26.31, + "step": 8530, + "train_speed(iter/s)": 0.581519 + }, + { + "acc": 0.9998538, + "epoch": 9.050901378579002, + "grad_norm": 0.03479117900133133, + "learning_rate": 2.452596851918667e-07, + "loss": 0.00110229, + "memory(GiB)": 26.31, + "step": 8535, + "train_speed(iter/s)": 0.58152 + }, + { + "acc": 0.99977226, + "epoch": 9.056203605514316, + "grad_norm": 0.16863708198070526, + "learning_rate": 2.4256028203531364e-07, + "loss": 0.00090512, + "memory(GiB)": 26.31, + "step": 8540, + "train_speed(iter/s)": 0.581522 + }, + { + "acc": 0.9998724, + "epoch": 9.06150583244963, + "grad_norm": 0.001541724894195795, + "learning_rate": 2.3987550861384156e-07, + "loss": 0.00036416, + "memory(GiB)": 26.31, + "step": 8545, + "train_speed(iter/s)": 0.581523 + }, + { + "acc": 0.99988098, + "epoch": 9.066808059384941, + "grad_norm": 0.06520693749189377, + "learning_rate": 2.3720537318260462e-07, + "loss": 0.00031393, + "memory(GiB)": 26.31, + "step": 8550, + "train_speed(iter/s)": 0.581525 + }, + { + "acc": 1.0, + "epoch": 9.072110286320255, + "grad_norm": 0.002617688849568367, + "learning_rate": 2.3454988395174696e-07, + "loss": 7.239e-05, + "memory(GiB)": 26.31, + "step": 8555, + "train_speed(iter/s)": 0.581526 + }, + { + "acc": 0.9998724, + "epoch": 9.077412513255567, + "grad_norm": 0.0007258942350745201, + "learning_rate": 2.3190904908637995e-07, + "loss": 0.00016769, + "memory(GiB)": 26.31, + "step": 8560, + "train_speed(iter/s)": 0.58153 + }, + { + "acc": 1.0, + "epoch": 9.08271474019088, + "grad_norm": 0.029376430436968803, + "learning_rate": 2.2928287670655434e-07, + "loss": 7.223e-05, + "memory(GiB)": 26.31, + "step": 8565, + "train_speed(iter/s)": 0.581533 + }, + { + "acc": 0.99989033, + "epoch": 9.088016967126194, + "grad_norm": 0.03681400418281555, + "learning_rate": 2.266713748872365e-07, + "loss": 0.0001561, + "memory(GiB)": 26.31, + "step": 8570, + "train_speed(iter/s)": 0.581535 + }, + { + "acc": 1.0, + "epoch": 9.093319194061506, + "grad_norm": 0.004051406867802143, + "learning_rate": 2.2407455165828564e-07, + "loss": 0.00010099, + "memory(GiB)": 26.31, + "step": 8575, + "train_speed(iter/s)": 0.581535 + }, + { + "acc": 0.99986486, + "epoch": 9.09862142099682, + "grad_norm": 0.008981379680335522, + "learning_rate": 2.2149241500442432e-07, + "loss": 0.0002387, + "memory(GiB)": 26.31, + "step": 8580, + "train_speed(iter/s)": 0.581541 + }, + { + "acc": 0.99962225, + "epoch": 9.103923647932131, + "grad_norm": 0.004154110327363014, + "learning_rate": 2.189249728652202e-07, + "loss": 0.00068734, + "memory(GiB)": 26.31, + "step": 8585, + "train_speed(iter/s)": 0.581544 + }, + { + "acc": 1.0, + "epoch": 9.109225874867445, + "grad_norm": 0.001803599065169692, + "learning_rate": 2.163722331350555e-07, + "loss": 3.99e-06, + "memory(GiB)": 26.31, + "step": 8590, + "train_speed(iter/s)": 0.581545 + }, + { + "acc": 0.9998744, + "epoch": 9.114528101802756, + "grad_norm": 0.058594174683094025, + "learning_rate": 2.1383420366310594e-07, + "loss": 0.00020836, + "memory(GiB)": 26.31, + "step": 8595, + "train_speed(iter/s)": 0.581545 + }, + { + "acc": 0.99986115, + "epoch": 9.11983032873807, + "grad_norm": 0.05806174874305725, + "learning_rate": 2.1131089225331617e-07, + "loss": 0.00021942, + "memory(GiB)": 26.31, + "step": 8600, + "train_speed(iter/s)": 0.581546 + }, + { + "acc": 1.0, + "epoch": 9.125132555673384, + "grad_norm": 0.0015247429255396128, + "learning_rate": 2.0880230666437563e-07, + "loss": 3.77e-06, + "memory(GiB)": 26.31, + "step": 8605, + "train_speed(iter/s)": 0.581549 + }, + { + "acc": 0.99960184, + "epoch": 9.130434782608695, + "grad_norm": 0.04763953387737274, + "learning_rate": 2.0630845460969548e-07, + "loss": 0.00065606, + "memory(GiB)": 26.31, + "step": 8610, + "train_speed(iter/s)": 0.581552 + }, + { + "acc": 0.99988041, + "epoch": 9.135737009544009, + "grad_norm": 0.005552299320697784, + "learning_rate": 2.038293437573838e-07, + "loss": 0.00065457, + "memory(GiB)": 26.31, + "step": 8615, + "train_speed(iter/s)": 0.581553 + }, + { + "acc": 1.0, + "epoch": 9.14103923647932, + "grad_norm": 0.0331796295940876, + "learning_rate": 2.0136498173022114e-07, + "loss": 7.548e-05, + "memory(GiB)": 26.31, + "step": 8620, + "train_speed(iter/s)": 0.581554 + }, + { + "acc": 1.0, + "epoch": 9.146341463414634, + "grad_norm": 0.0036993902176618576, + "learning_rate": 1.9891537610563945e-07, + "loss": 8.443e-05, + "memory(GiB)": 26.31, + "step": 8625, + "train_speed(iter/s)": 0.581555 + }, + { + "acc": 0.9998826, + "epoch": 9.151643690349948, + "grad_norm": 0.0014519346877932549, + "learning_rate": 1.9648053441569815e-07, + "loss": 0.00015098, + "memory(GiB)": 26.31, + "step": 8630, + "train_speed(iter/s)": 0.581558 + }, + { + "acc": 0.99976854, + "epoch": 9.15694591728526, + "grad_norm": 0.0005271465633995831, + "learning_rate": 1.9406046414705976e-07, + "loss": 0.00032699, + "memory(GiB)": 26.31, + "step": 8635, + "train_speed(iter/s)": 0.581562 + }, + { + "acc": 0.99975309, + "epoch": 9.162248144220573, + "grad_norm": 0.06643125414848328, + "learning_rate": 1.9165517274096597e-07, + "loss": 0.00028296, + "memory(GiB)": 26.31, + "step": 8640, + "train_speed(iter/s)": 0.581565 + }, + { + "acc": 1.0, + "epoch": 9.167550371155885, + "grad_norm": 0.0011175911640748382, + "learning_rate": 1.8926466759321995e-07, + "loss": 4.757e-05, + "memory(GiB)": 26.31, + "step": 8645, + "train_speed(iter/s)": 0.581566 + }, + { + "acc": 0.99977655, + "epoch": 9.172852598091199, + "grad_norm": 0.29192134737968445, + "learning_rate": 1.8688895605415696e-07, + "loss": 0.00076506, + "memory(GiB)": 26.31, + "step": 8650, + "train_speed(iter/s)": 0.581567 + }, + { + "acc": 0.99963932, + "epoch": 9.17815482502651, + "grad_norm": 0.024805352091789246, + "learning_rate": 1.8452804542862633e-07, + "loss": 0.00051685, + "memory(GiB)": 26.31, + "step": 8655, + "train_speed(iter/s)": 0.581568 + }, + { + "acc": 0.99968348, + "epoch": 9.183457051961824, + "grad_norm": 0.0008316601160913706, + "learning_rate": 1.8218194297596643e-07, + "loss": 0.00039648, + "memory(GiB)": 26.31, + "step": 8660, + "train_speed(iter/s)": 0.581569 + }, + { + "acc": 0.99961185, + "epoch": 9.188759278897138, + "grad_norm": 0.032385896891355515, + "learning_rate": 1.798506559099847e-07, + "loss": 0.00075617, + "memory(GiB)": 26.31, + "step": 8665, + "train_speed(iter/s)": 0.581571 + }, + { + "acc": 0.99973583, + "epoch": 9.19406150583245, + "grad_norm": 0.00038688286440446973, + "learning_rate": 1.7753419139893244e-07, + "loss": 0.00191715, + "memory(GiB)": 26.31, + "step": 8670, + "train_speed(iter/s)": 0.581573 + }, + { + "acc": 0.9996172, + "epoch": 9.199363732767763, + "grad_norm": 0.11170506477355957, + "learning_rate": 1.7523255656548533e-07, + "loss": 0.00074308, + "memory(GiB)": 26.31, + "step": 8675, + "train_speed(iter/s)": 0.581575 + }, + { + "acc": 0.99962177, + "epoch": 9.204665959703075, + "grad_norm": 0.0007719770655967295, + "learning_rate": 1.7294575848672227e-07, + "loss": 0.00152528, + "memory(GiB)": 26.31, + "step": 8680, + "train_speed(iter/s)": 0.581576 + }, + { + "acc": 0.99953203, + "epoch": 9.209968186638388, + "grad_norm": 0.0024956208653748035, + "learning_rate": 1.7067380419409986e-07, + "loss": 0.0006236, + "memory(GiB)": 26.31, + "step": 8685, + "train_speed(iter/s)": 0.581577 + }, + { + "acc": 0.99964581, + "epoch": 9.215270413573702, + "grad_norm": 0.06978549808263779, + "learning_rate": 1.684167006734337e-07, + "loss": 0.00045885, + "memory(GiB)": 26.31, + "step": 8690, + "train_speed(iter/s)": 0.581578 + }, + { + "acc": 0.99962521, + "epoch": 9.220572640509014, + "grad_norm": 0.03831864148378372, + "learning_rate": 1.661744548648758e-07, + "loss": 0.00048588, + "memory(GiB)": 26.31, + "step": 8695, + "train_speed(iter/s)": 0.581585 + }, + { + "acc": 0.99987869, + "epoch": 9.225874867444327, + "grad_norm": 0.04184339568018913, + "learning_rate": 1.6394707366289395e-07, + "loss": 0.00197599, + "memory(GiB)": 26.31, + "step": 8700, + "train_speed(iter/s)": 0.581587 + }, + { + "acc": 1.0, + "epoch": 9.231177094379639, + "grad_norm": 0.004102388396859169, + "learning_rate": 1.6173456391625027e-07, + "loss": 6.415e-05, + "memory(GiB)": 26.31, + "step": 8705, + "train_speed(iter/s)": 0.58159 + }, + { + "acc": 0.99989033, + "epoch": 9.236479321314953, + "grad_norm": 0.0021884252782911062, + "learning_rate": 1.5953693242797986e-07, + "loss": 0.00030634, + "memory(GiB)": 26.31, + "step": 8710, + "train_speed(iter/s)": 0.581591 + }, + { + "acc": 0.99988785, + "epoch": 9.241781548250264, + "grad_norm": 0.030813999474048615, + "learning_rate": 1.57354185955371e-07, + "loss": 0.00035523, + "memory(GiB)": 26.31, + "step": 8715, + "train_speed(iter/s)": 0.581594 + }, + { + "acc": 0.99987745, + "epoch": 9.247083775185578, + "grad_norm": 0.001368062337860465, + "learning_rate": 1.5518633120994226e-07, + "loss": 0.00024497, + "memory(GiB)": 26.31, + "step": 8720, + "train_speed(iter/s)": 0.581596 + }, + { + "acc": 0.99973602, + "epoch": 9.252386002120891, + "grad_norm": 0.15966103971004486, + "learning_rate": 1.5303337485742323e-07, + "loss": 0.00089951, + "memory(GiB)": 26.31, + "step": 8725, + "train_speed(iter/s)": 0.581599 + }, + { + "acc": 1.0, + "epoch": 9.257688229056203, + "grad_norm": 0.012701621279120445, + "learning_rate": 1.5089532351773431e-07, + "loss": 5.587e-05, + "memory(GiB)": 26.31, + "step": 8730, + "train_speed(iter/s)": 0.581599 + }, + { + "acc": 1.0, + "epoch": 9.262990455991517, + "grad_norm": 0.029211273416876793, + "learning_rate": 1.487721837649655e-07, + "loss": 4.952e-05, + "memory(GiB)": 26.31, + "step": 8735, + "train_speed(iter/s)": 0.5816 + }, + { + "acc": 0.99989033, + "epoch": 9.268292682926829, + "grad_norm": 0.0012377180391922593, + "learning_rate": 1.466639621273562e-07, + "loss": 0.0001556, + "memory(GiB)": 26.31, + "step": 8740, + "train_speed(iter/s)": 0.5816 + }, + { + "acc": 0.9997323, + "epoch": 9.273594909862142, + "grad_norm": 0.046300843358039856, + "learning_rate": 1.445706650872768e-07, + "loss": 0.00038963, + "memory(GiB)": 26.31, + "step": 8745, + "train_speed(iter/s)": 0.581603 + }, + { + "acc": 0.99973717, + "epoch": 9.278897136797456, + "grad_norm": 0.04876074939966202, + "learning_rate": 1.4249229908120717e-07, + "loss": 0.00023545, + "memory(GiB)": 26.31, + "step": 8750, + "train_speed(iter/s)": 0.581604 + }, + { + "acc": 1.0, + "epoch": 9.284199363732768, + "grad_norm": 0.001729241805151105, + "learning_rate": 1.4042887049971513e-07, + "loss": 0.00016176, + "memory(GiB)": 26.31, + "step": 8755, + "train_speed(iter/s)": 0.581604 + }, + { + "acc": 0.99988842, + "epoch": 9.289501590668081, + "grad_norm": 0.0015717835631221533, + "learning_rate": 1.383803856874422e-07, + "loss": 0.00022297, + "memory(GiB)": 26.31, + "step": 8760, + "train_speed(iter/s)": 0.58161 + }, + { + "acc": 1.0, + "epoch": 9.294803817603393, + "grad_norm": 0.014836992137134075, + "learning_rate": 1.363468509430778e-07, + "loss": 0.0001063, + "memory(GiB)": 26.31, + "step": 8765, + "train_speed(iter/s)": 0.581611 + }, + { + "acc": 0.99987049, + "epoch": 9.300106044538706, + "grad_norm": 0.0016947545809671283, + "learning_rate": 1.3432827251934395e-07, + "loss": 0.00029463, + "memory(GiB)": 26.31, + "step": 8770, + "train_speed(iter/s)": 0.581612 + }, + { + "acc": 0.99973669, + "epoch": 9.305408271474018, + "grad_norm": 0.04517170786857605, + "learning_rate": 1.3232465662297557e-07, + "loss": 0.00024452, + "memory(GiB)": 26.31, + "step": 8775, + "train_speed(iter/s)": 0.581613 + }, + { + "acc": 0.99987564, + "epoch": 9.310710498409332, + "grad_norm": 0.0015988461673259735, + "learning_rate": 1.3033600941470023e-07, + "loss": 0.00018611, + "memory(GiB)": 26.31, + "step": 8780, + "train_speed(iter/s)": 0.581617 + }, + { + "acc": 0.99976091, + "epoch": 9.316012725344645, + "grad_norm": 0.05802205204963684, + "learning_rate": 1.2836233700921908e-07, + "loss": 0.00027926, + "memory(GiB)": 26.31, + "step": 8785, + "train_speed(iter/s)": 0.581621 + }, + { + "acc": 0.99987803, + "epoch": 9.321314952279957, + "grad_norm": 0.040856651961803436, + "learning_rate": 1.2640364547518917e-07, + "loss": 0.00032329, + "memory(GiB)": 26.31, + "step": 8790, + "train_speed(iter/s)": 0.581624 + }, + { + "acc": 0.99987497, + "epoch": 9.32661717921527, + "grad_norm": 0.0010028522228822112, + "learning_rate": 1.2445994083520462e-07, + "loss": 0.00016964, + "memory(GiB)": 26.31, + "step": 8795, + "train_speed(iter/s)": 0.581624 + }, + { + "acc": 0.99973888, + "epoch": 9.331919406150583, + "grad_norm": 0.0012929553631693125, + "learning_rate": 1.2253122906577757e-07, + "loss": 0.00022281, + "memory(GiB)": 26.31, + "step": 8800, + "train_speed(iter/s)": 0.581625 + }, + { + "acc": 0.9998641, + "epoch": 9.337221633085896, + "grad_norm": 0.0011795631144195795, + "learning_rate": 1.2061751609731894e-07, + "loss": 0.00024719, + "memory(GiB)": 26.31, + "step": 8805, + "train_speed(iter/s)": 0.581626 + }, + { + "acc": 0.99986839, + "epoch": 9.34252386002121, + "grad_norm": 0.03850088268518448, + "learning_rate": 1.1871880781412345e-07, + "loss": 0.00030572, + "memory(GiB)": 26.31, + "step": 8810, + "train_speed(iter/s)": 0.581627 + }, + { + "acc": 0.99987869, + "epoch": 9.347826086956522, + "grad_norm": 0.000750505831092596, + "learning_rate": 1.1683511005434775e-07, + "loss": 0.00012297, + "memory(GiB)": 26.31, + "step": 8815, + "train_speed(iter/s)": 0.58163 + }, + { + "acc": 0.99974213, + "epoch": 9.353128313891835, + "grad_norm": 0.004773573484271765, + "learning_rate": 1.1496642860999406e-07, + "loss": 0.00058561, + "memory(GiB)": 26.31, + "step": 8820, + "train_speed(iter/s)": 0.581633 + }, + { + "acc": 1.0, + "epoch": 9.358430540827147, + "grad_norm": 0.001007193815894425, + "learning_rate": 1.1311276922689271e-07, + "loss": 5.364e-05, + "memory(GiB)": 26.31, + "step": 8825, + "train_speed(iter/s)": 0.581633 + }, + { + "acc": 0.99976463, + "epoch": 9.36373276776246, + "grad_norm": 0.07513611763715744, + "learning_rate": 1.1127413760468455e-07, + "loss": 0.00029684, + "memory(GiB)": 26.31, + "step": 8830, + "train_speed(iter/s)": 0.581635 + }, + { + "acc": 0.99988041, + "epoch": 9.369034994697772, + "grad_norm": 0.00388423097319901, + "learning_rate": 1.0945053939680258e-07, + "loss": 0.00026647, + "memory(GiB)": 26.31, + "step": 8835, + "train_speed(iter/s)": 0.581636 + }, + { + "acc": 0.99988842, + "epoch": 9.374337221633086, + "grad_norm": 0.01094045676290989, + "learning_rate": 1.0764198021045414e-07, + "loss": 0.00116209, + "memory(GiB)": 26.31, + "step": 8840, + "train_speed(iter/s)": 0.581639 + }, + { + "acc": 0.99964161, + "epoch": 9.3796394485684, + "grad_norm": 0.04025011137127876, + "learning_rate": 1.0584846560660656e-07, + "loss": 0.00033552, + "memory(GiB)": 26.31, + "step": 8845, + "train_speed(iter/s)": 0.581644 + }, + { + "acc": 1.0, + "epoch": 9.384941675503711, + "grad_norm": 0.032477378845214844, + "learning_rate": 1.0407000109996655e-07, + "loss": 0.00027492, + "memory(GiB)": 26.31, + "step": 8850, + "train_speed(iter/s)": 0.581648 + }, + { + "acc": 0.99972134, + "epoch": 9.390243902439025, + "grad_norm": 0.055506035685539246, + "learning_rate": 1.0230659215896525e-07, + "loss": 0.00033372, + "memory(GiB)": 26.31, + "step": 8855, + "train_speed(iter/s)": 0.581651 + }, + { + "acc": 0.9998992, + "epoch": 9.395546129374337, + "grad_norm": 0.0010284942109137774, + "learning_rate": 1.0055824420573994e-07, + "loss": 0.00014935, + "memory(GiB)": 26.31, + "step": 8860, + "train_speed(iter/s)": 0.581653 + }, + { + "acc": 0.99975452, + "epoch": 9.40084835630965, + "grad_norm": 0.026280825957655907, + "learning_rate": 9.882496261611954e-08, + "loss": 0.00039164, + "memory(GiB)": 26.31, + "step": 8865, + "train_speed(iter/s)": 0.581654 + }, + { + "acc": 0.99987307, + "epoch": 9.406150583244964, + "grad_norm": 0.04926552250981331, + "learning_rate": 9.710675271960584e-08, + "loss": 0.00021097, + "memory(GiB)": 26.31, + "step": 8870, + "train_speed(iter/s)": 0.581656 + }, + { + "acc": 1.0, + "epoch": 9.411452810180275, + "grad_norm": 0.0016084155067801476, + "learning_rate": 9.54036197993589e-08, + "loss": 7.331e-05, + "memory(GiB)": 26.31, + "step": 8875, + "train_speed(iter/s)": 0.581657 + }, + { + "acc": 1.0, + "epoch": 9.416755037115589, + "grad_norm": 0.0036949021741747856, + "learning_rate": 9.371556909217946e-08, + "loss": 1.104e-05, + "memory(GiB)": 26.31, + "step": 8880, + "train_speed(iter/s)": 0.581658 + }, + { + "acc": 0.99987745, + "epoch": 9.4220572640509, + "grad_norm": 0.03400897979736328, + "learning_rate": 9.204260578849446e-08, + "loss": 0.00027076, + "memory(GiB)": 26.31, + "step": 8885, + "train_speed(iter/s)": 0.581659 + }, + { + "acc": 1.0, + "epoch": 9.427359490986214, + "grad_norm": 0.0016227929154410958, + "learning_rate": 9.038473503233809e-08, + "loss": 2.005e-05, + "memory(GiB)": 26.31, + "step": 8890, + "train_speed(iter/s)": 0.581662 + }, + { + "acc": 0.99986191, + "epoch": 9.432661717921526, + "grad_norm": 0.08077705651521683, + "learning_rate": 8.874196192133971e-08, + "loss": 0.0001798, + "memory(GiB)": 26.31, + "step": 8895, + "train_speed(iter/s)": 0.581665 + }, + { + "acc": 0.9996232, + "epoch": 9.43796394485684, + "grad_norm": 0.03813088312745094, + "learning_rate": 8.711429150670538e-08, + "loss": 0.00063143, + "memory(GiB)": 26.31, + "step": 8900, + "train_speed(iter/s)": 0.581666 + }, + { + "acc": 1.0, + "epoch": 9.443266171792153, + "grad_norm": 0.0346071831882, + "learning_rate": 8.550172879320358e-08, + "loss": 0.00033753, + "memory(GiB)": 26.31, + "step": 8905, + "train_speed(iter/s)": 0.581667 + }, + { + "acc": 0.99988155, + "epoch": 9.448568398727465, + "grad_norm": 0.0006143233040347695, + "learning_rate": 8.390427873915009e-08, + "loss": 0.00035487, + "memory(GiB)": 26.31, + "step": 8910, + "train_speed(iter/s)": 0.581669 + }, + { + "acc": 1.0, + "epoch": 9.453870625662779, + "grad_norm": 0.0009098179871216416, + "learning_rate": 8.232194625639202e-08, + "loss": 5.3e-06, + "memory(GiB)": 26.31, + "step": 8915, + "train_speed(iter/s)": 0.581671 + }, + { + "acc": 0.99975357, + "epoch": 9.45917285259809, + "grad_norm": 0.0381086990237236, + "learning_rate": 8.07547362102916e-08, + "loss": 0.00026466, + "memory(GiB)": 26.31, + "step": 8920, + "train_speed(iter/s)": 0.581675 + }, + { + "acc": 1.0, + "epoch": 9.464475079533404, + "grad_norm": 0.0034874596167355776, + "learning_rate": 7.920265341971407e-08, + "loss": 8.239e-05, + "memory(GiB)": 26.31, + "step": 8925, + "train_speed(iter/s)": 0.581675 + }, + { + "acc": 1.0, + "epoch": 9.469777306468718, + "grad_norm": 0.042031679302453995, + "learning_rate": 7.766570265701036e-08, + "loss": 0.00014838, + "memory(GiB)": 26.31, + "step": 8930, + "train_speed(iter/s)": 0.581678 + }, + { + "acc": 0.9998889, + "epoch": 9.47507953340403, + "grad_norm": 0.0011388412676751614, + "learning_rate": 7.614388864800498e-08, + "loss": 0.00030469, + "memory(GiB)": 26.31, + "step": 8935, + "train_speed(iter/s)": 0.581679 + }, + { + "acc": 0.99949665, + "epoch": 9.480381760339343, + "grad_norm": 0.000366046471754089, + "learning_rate": 7.46372160719771e-08, + "loss": 0.00050993, + "memory(GiB)": 26.31, + "step": 8940, + "train_speed(iter/s)": 0.581683 + }, + { + "acc": 0.99976463, + "epoch": 9.485683987274655, + "grad_norm": 0.08309927582740784, + "learning_rate": 7.314568956165167e-08, + "loss": 0.00032754, + "memory(GiB)": 26.31, + "step": 8945, + "train_speed(iter/s)": 0.581684 + }, + { + "acc": 0.9993681, + "epoch": 9.490986214209968, + "grad_norm": 0.0019951926078647375, + "learning_rate": 7.166931370318059e-08, + "loss": 0.0007342, + "memory(GiB)": 26.31, + "step": 8950, + "train_speed(iter/s)": 0.581686 + }, + { + "acc": 0.99973173, + "epoch": 9.496288441145282, + "grad_norm": 0.047303207218647, + "learning_rate": 7.020809303613208e-08, + "loss": 0.00031462, + "memory(GiB)": 26.31, + "step": 8955, + "train_speed(iter/s)": 0.581688 + }, + { + "acc": 1.0, + "epoch": 9.501590668080594, + "grad_norm": 0.004503290168941021, + "learning_rate": 6.8762032053473e-08, + "loss": 8.68e-06, + "memory(GiB)": 26.31, + "step": 8960, + "train_speed(iter/s)": 0.581689 + }, + { + "acc": 0.99988422, + "epoch": 9.506892895015907, + "grad_norm": 0.0026324728969484568, + "learning_rate": 6.733113520155939e-08, + "loss": 0.00016019, + "memory(GiB)": 26.31, + "step": 8965, + "train_speed(iter/s)": 0.581689 + }, + { + "acc": 1.0, + "epoch": 9.512195121951219, + "grad_norm": 0.0014660786837339401, + "learning_rate": 6.591540688011867e-08, + "loss": 0.00019902, + "memory(GiB)": 26.31, + "step": 8970, + "train_speed(iter/s)": 0.58169 + }, + { + "acc": 0.99988213, + "epoch": 9.517497348886533, + "grad_norm": 0.0004259504785295576, + "learning_rate": 6.45148514422397e-08, + "loss": 0.00029562, + "memory(GiB)": 26.31, + "step": 8975, + "train_speed(iter/s)": 0.581692 + }, + { + "acc": 0.9997159, + "epoch": 9.522799575821844, + "grad_norm": 0.00198033987544477, + "learning_rate": 6.312947319435664e-08, + "loss": 0.00066467, + "memory(GiB)": 26.31, + "step": 8980, + "train_speed(iter/s)": 0.581692 + }, + { + "acc": 1.0, + "epoch": 9.528101802757158, + "grad_norm": 0.002281604567542672, + "learning_rate": 6.175927639623681e-08, + "loss": 0.00010886, + "memory(GiB)": 26.31, + "step": 8985, + "train_speed(iter/s)": 0.581692 + }, + { + "acc": 0.99974871, + "epoch": 9.533404029692472, + "grad_norm": 0.03849175199866295, + "learning_rate": 6.040426526096728e-08, + "loss": 0.00055996, + "memory(GiB)": 26.31, + "step": 8990, + "train_speed(iter/s)": 0.581695 + }, + { + "acc": 0.9996357, + "epoch": 9.538706256627783, + "grad_norm": 0.06854744255542755, + "learning_rate": 5.906444395494326e-08, + "loss": 0.00036071, + "memory(GiB)": 26.31, + "step": 8995, + "train_speed(iter/s)": 0.581698 + }, + { + "acc": 0.99976387, + "epoch": 9.544008483563097, + "grad_norm": 0.0022573957685381174, + "learning_rate": 5.7739816597852564e-08, + "loss": 0.00025793, + "memory(GiB)": 26.31, + "step": 9000, + "train_speed(iter/s)": 0.581698 + }, + { + "acc": 0.99987116, + "epoch": 9.549310710498409, + "grad_norm": 0.0029828150290995836, + "learning_rate": 5.6430387262666145e-08, + "loss": 0.00016854, + "memory(GiB)": 26.31, + "step": 9005, + "train_speed(iter/s)": 0.581699 + }, + { + "acc": 0.99987183, + "epoch": 9.554612937433722, + "grad_norm": 0.00038223754381760955, + "learning_rate": 5.5136159975623705e-08, + "loss": 0.00025214, + "memory(GiB)": 26.31, + "step": 9010, + "train_speed(iter/s)": 0.581699 + }, + { + "acc": 0.99986115, + "epoch": 9.559915164369034, + "grad_norm": 0.0022197780199348927, + "learning_rate": 5.3857138716220866e-08, + "loss": 0.0002799, + "memory(GiB)": 26.31, + "step": 9015, + "train_speed(iter/s)": 0.581702 + }, + { + "acc": 0.99963999, + "epoch": 9.565217391304348, + "grad_norm": 0.0437530018389225, + "learning_rate": 5.2593327417198165e-08, + "loss": 0.0003854, + "memory(GiB)": 26.31, + "step": 9020, + "train_speed(iter/s)": 0.581703 + }, + { + "acc": 0.9998889, + "epoch": 9.570519618239661, + "grad_norm": 0.0011901530670002103, + "learning_rate": 5.134472996452874e-08, + "loss": 0.00021868, + "memory(GiB)": 26.31, + "step": 9025, + "train_speed(iter/s)": 0.581704 + }, + { + "acc": 0.99976788, + "epoch": 9.575821845174973, + "grad_norm": 0.025663409382104874, + "learning_rate": 5.011135019740506e-08, + "loss": 0.00028868, + "memory(GiB)": 26.31, + "step": 9030, + "train_speed(iter/s)": 0.581705 + }, + { + "acc": 0.9998908, + "epoch": 9.581124072110287, + "grad_norm": 0.046961378306150436, + "learning_rate": 4.88931919082295e-08, + "loss": 0.0001989, + "memory(GiB)": 26.31, + "step": 9035, + "train_speed(iter/s)": 0.581706 + }, + { + "acc": 1.0, + "epoch": 9.586426299045598, + "grad_norm": 0.0007390428800135851, + "learning_rate": 4.7690258842601e-08, + "loss": 6.1e-06, + "memory(GiB)": 26.31, + "step": 9040, + "train_speed(iter/s)": 0.581709 + }, + { + "acc": 1.0, + "epoch": 9.591728525980912, + "grad_norm": 0.001649084035307169, + "learning_rate": 4.65025546993034e-08, + "loss": 4.62e-06, + "memory(GiB)": 26.31, + "step": 9045, + "train_speed(iter/s)": 0.581709 + }, + { + "acc": 0.99962807, + "epoch": 9.597030752916226, + "grad_norm": 0.07276555895805359, + "learning_rate": 4.5330083130294896e-08, + "loss": 0.00061633, + "memory(GiB)": 26.31, + "step": 9050, + "train_speed(iter/s)": 0.581712 + }, + { + "acc": 0.99985876, + "epoch": 9.602332979851537, + "grad_norm": 0.00032748220837675035, + "learning_rate": 4.417284774069643e-08, + "loss": 0.00020644, + "memory(GiB)": 26.31, + "step": 9055, + "train_speed(iter/s)": 0.581715 + }, + { + "acc": 0.99963312, + "epoch": 9.607635206786851, + "grad_norm": 0.0011951240012422204, + "learning_rate": 4.3030852088781635e-08, + "loss": 0.00039923, + "memory(GiB)": 26.31, + "step": 9060, + "train_speed(iter/s)": 0.581716 + }, + { + "acc": 0.99988317, + "epoch": 9.612937433722163, + "grad_norm": 0.05354034900665283, + "learning_rate": 4.190409968596244e-08, + "loss": 0.00014953, + "memory(GiB)": 26.31, + "step": 9065, + "train_speed(iter/s)": 0.581717 + }, + { + "acc": 0.99965878, + "epoch": 9.618239660657476, + "grad_norm": 0.002356098499149084, + "learning_rate": 4.0792593996783495e-08, + "loss": 0.00033963, + "memory(GiB)": 26.31, + "step": 9070, + "train_speed(iter/s)": 0.581719 + }, + { + "acc": 0.99976301, + "epoch": 9.62354188759279, + "grad_norm": 0.037054624408483505, + "learning_rate": 3.969633843890722e-08, + "loss": 0.00020338, + "memory(GiB)": 26.31, + "step": 9075, + "train_speed(iter/s)": 0.58172 + }, + { + "acc": 0.99988213, + "epoch": 9.628844114528102, + "grad_norm": 0.0032587756868451834, + "learning_rate": 3.861533638310546e-08, + "loss": 0.00041673, + "memory(GiB)": 26.31, + "step": 9080, + "train_speed(iter/s)": 0.58172 + }, + { + "acc": 0.9996336, + "epoch": 9.634146341463415, + "grad_norm": 0.09497502446174622, + "learning_rate": 3.7549591153248365e-08, + "loss": 0.00046227, + "memory(GiB)": 26.31, + "step": 9085, + "train_speed(iter/s)": 0.58172 + }, + { + "acc": 0.99973307, + "epoch": 9.639448568398727, + "grad_norm": 0.042482633143663406, + "learning_rate": 3.649910602629388e-08, + "loss": 0.00025767, + "memory(GiB)": 26.31, + "step": 9090, + "train_speed(iter/s)": 0.581721 + }, + { + "acc": 0.99988632, + "epoch": 9.64475079533404, + "grad_norm": 0.031739819794893265, + "learning_rate": 3.546388423227937e-08, + "loss": 0.00018167, + "memory(GiB)": 26.31, + "step": 9095, + "train_speed(iter/s)": 0.581722 + }, + { + "acc": 1.0, + "epoch": 9.650053022269352, + "grad_norm": 0.09785232692956924, + "learning_rate": 3.4443928954308384e-08, + "loss": 5.175e-05, + "memory(GiB)": 26.31, + "step": 9100, + "train_speed(iter/s)": 0.581723 + }, + { + "acc": 0.99978104, + "epoch": 9.655355249204666, + "grad_norm": 0.03592614457011223, + "learning_rate": 3.343924332854555e-08, + "loss": 0.00019715, + "memory(GiB)": 26.31, + "step": 9105, + "train_speed(iter/s)": 0.581724 + }, + { + "acc": 0.9996417, + "epoch": 9.66065747613998, + "grad_norm": 0.03002362884581089, + "learning_rate": 3.244983044420168e-08, + "loss": 0.00065632, + "memory(GiB)": 26.31, + "step": 9110, + "train_speed(iter/s)": 0.581727 + }, + { + "acc": 0.99974298, + "epoch": 9.665959703075291, + "grad_norm": 0.007756541948765516, + "learning_rate": 3.1475693343528725e-08, + "loss": 0.00035167, + "memory(GiB)": 26.31, + "step": 9115, + "train_speed(iter/s)": 0.581728 + }, + { + "acc": 1.0, + "epoch": 9.671261930010605, + "grad_norm": 0.0003773514472413808, + "learning_rate": 3.051683502180812e-08, + "loss": 8.733e-05, + "memory(GiB)": 26.31, + "step": 9120, + "train_speed(iter/s)": 0.581728 + }, + { + "acc": 0.99949741, + "epoch": 9.676564156945917, + "grad_norm": 0.03288589045405388, + "learning_rate": 2.9573258427341382e-08, + "loss": 0.00087277, + "memory(GiB)": 26.31, + "step": 9125, + "train_speed(iter/s)": 0.581729 + }, + { + "acc": 1.0, + "epoch": 9.68186638388123, + "grad_norm": 0.0020175960380584, + "learning_rate": 2.864496646144231e-08, + "loss": 1.535e-05, + "memory(GiB)": 26.31, + "step": 9130, + "train_speed(iter/s)": 0.58173 + }, + { + "acc": 0.99988375, + "epoch": 9.687168610816542, + "grad_norm": 0.0019167440477758646, + "learning_rate": 2.7731961978427547e-08, + "loss": 0.00056926, + "memory(GiB)": 26.31, + "step": 9135, + "train_speed(iter/s)": 0.581732 + }, + { + "acc": 0.99974852, + "epoch": 9.692470837751856, + "grad_norm": 0.040033094584941864, + "learning_rate": 2.683424778560772e-08, + "loss": 0.00044879, + "memory(GiB)": 26.31, + "step": 9140, + "train_speed(iter/s)": 0.581737 + }, + { + "acc": 1.0, + "epoch": 9.69777306468717, + "grad_norm": 0.0005087403696961701, + "learning_rate": 2.5951826643277988e-08, + "loss": 4.28e-06, + "memory(GiB)": 26.31, + "step": 9145, + "train_speed(iter/s)": 0.581741 + }, + { + "acc": 1.0, + "epoch": 9.703075291622481, + "grad_norm": 0.040018994361162186, + "learning_rate": 2.508470126471083e-08, + "loss": 0.00033192, + "memory(GiB)": 26.31, + "step": 9150, + "train_speed(iter/s)": 0.581742 + }, + { + "acc": 0.99962864, + "epoch": 9.708377518557795, + "grad_norm": 0.15460503101348877, + "learning_rate": 2.423287431614827e-08, + "loss": 0.00039951, + "memory(GiB)": 26.31, + "step": 9155, + "train_speed(iter/s)": 0.581747 + }, + { + "acc": 1.0, + "epoch": 9.713679745493106, + "grad_norm": 0.00020370646961964667, + "learning_rate": 2.339634841679135e-08, + "loss": 5.839e-05, + "memory(GiB)": 26.31, + "step": 9160, + "train_speed(iter/s)": 0.581747 + }, + { + "acc": 0.99962616, + "epoch": 9.71898197242842, + "grad_norm": 0.03239269554615021, + "learning_rate": 2.257512613879289e-08, + "loss": 0.00045688, + "memory(GiB)": 26.31, + "step": 9165, + "train_speed(iter/s)": 0.581748 + }, + { + "acc": 1.0, + "epoch": 9.724284199363733, + "grad_norm": 0.0013463557697832584, + "learning_rate": 2.1769210007252503e-08, + "loss": 2.67e-06, + "memory(GiB)": 26.31, + "step": 9170, + "train_speed(iter/s)": 0.581748 + }, + { + "acc": 0.99976797, + "epoch": 9.729586426299045, + "grad_norm": 0.00148050079587847, + "learning_rate": 2.0978602500203827e-08, + "loss": 0.00020894, + "memory(GiB)": 26.31, + "step": 9175, + "train_speed(iter/s)": 0.581752 + }, + { + "acc": 0.99971256, + "epoch": 9.734888653234359, + "grad_norm": 0.035606566816568375, + "learning_rate": 2.020330604861065e-08, + "loss": 0.00057267, + "memory(GiB)": 26.31, + "step": 9180, + "train_speed(iter/s)": 0.581755 + }, + { + "acc": 0.99963932, + "epoch": 9.74019088016967, + "grad_norm": 0.02676430158317089, + "learning_rate": 1.9443323036358563e-08, + "loss": 0.00058691, + "memory(GiB)": 26.31, + "step": 9185, + "train_speed(iter/s)": 0.581756 + }, + { + "acc": 0.99975109, + "epoch": 9.745493107104984, + "grad_norm": 0.002146985149011016, + "learning_rate": 1.86986558002461e-08, + "loss": 0.00033436, + "memory(GiB)": 26.31, + "step": 9190, + "train_speed(iter/s)": 0.581757 + }, + { + "acc": 1.0, + "epoch": 9.750795334040298, + "grad_norm": 0.0011241419706493616, + "learning_rate": 1.7969306629980287e-08, + "loss": 0.00013526, + "memory(GiB)": 26.31, + "step": 9195, + "train_speed(iter/s)": 0.581757 + }, + { + "acc": 0.99987183, + "epoch": 9.75609756097561, + "grad_norm": 0.002333273645490408, + "learning_rate": 1.7255277768166646e-08, + "loss": 0.0001136, + "memory(GiB)": 26.31, + "step": 9200, + "train_speed(iter/s)": 0.58176 + }, + { + "acc": 0.99960842, + "epoch": 9.761399787910923, + "grad_norm": 0.003362901508808136, + "learning_rate": 1.6556571410304774e-08, + "loss": 0.00092601, + "memory(GiB)": 26.31, + "step": 9205, + "train_speed(iter/s)": 0.581765 + }, + { + "acc": 1.0, + "epoch": 9.766702014846235, + "grad_norm": 0.0012096832506358624, + "learning_rate": 1.587318970478055e-08, + "loss": 0.00019076, + "memory(GiB)": 26.31, + "step": 9210, + "train_speed(iter/s)": 0.581766 + }, + { + "acc": 1.0, + "epoch": 9.772004241781548, + "grad_norm": 0.03516737371683121, + "learning_rate": 1.5205134752858374e-08, + "loss": 0.00025258, + "memory(GiB)": 26.31, + "step": 9215, + "train_speed(iter/s)": 0.58177 + }, + { + "acc": 0.99987183, + "epoch": 9.77730646871686, + "grad_norm": 0.002632312010973692, + "learning_rate": 1.455240860867729e-08, + "loss": 0.0003727, + "memory(GiB)": 26.31, + "step": 9220, + "train_speed(iter/s)": 0.58177 + }, + { + "acc": 0.9998889, + "epoch": 9.782608695652174, + "grad_norm": 0.027906369417905807, + "learning_rate": 1.3915013279242639e-08, + "loss": 0.00029545, + "memory(GiB)": 26.31, + "step": 9225, + "train_speed(iter/s)": 0.581771 + }, + { + "acc": 0.99974747, + "epoch": 9.787910922587487, + "grad_norm": 0.07243865728378296, + "learning_rate": 1.3292950724419978e-08, + "loss": 0.00066217, + "memory(GiB)": 26.31, + "step": 9230, + "train_speed(iter/s)": 0.581772 + }, + { + "acc": 0.99987984, + "epoch": 9.7932131495228, + "grad_norm": 0.010551735758781433, + "learning_rate": 1.268622285693117e-08, + "loss": 0.00021876, + "memory(GiB)": 26.31, + "step": 9235, + "train_speed(iter/s)": 0.581772 + }, + { + "acc": 0.99988317, + "epoch": 9.798515376458113, + "grad_norm": 0.0032518194057047367, + "learning_rate": 1.2094831542344976e-08, + "loss": 0.00018491, + "memory(GiB)": 26.31, + "step": 9240, + "train_speed(iter/s)": 0.581772 + }, + { + "acc": 0.99975433, + "epoch": 9.803817603393425, + "grad_norm": 0.038194481283426285, + "learning_rate": 1.1518778599074806e-08, + "loss": 0.00021037, + "memory(GiB)": 26.31, + "step": 9245, + "train_speed(iter/s)": 0.581772 + }, + { + "acc": 0.9997674, + "epoch": 9.809119830328738, + "grad_norm": 0.01002445723861456, + "learning_rate": 1.0958065798370409e-08, + "loss": 0.00052305, + "memory(GiB)": 26.31, + "step": 9250, + "train_speed(iter/s)": 0.581772 + }, + { + "acc": 1.0, + "epoch": 9.814422057264052, + "grad_norm": 0.00044991099275648594, + "learning_rate": 1.041269486431399e-08, + "loss": 6.751e-05, + "memory(GiB)": 26.31, + "step": 9255, + "train_speed(iter/s)": 0.581772 + }, + { + "acc": 1.0, + "epoch": 9.819724284199363, + "grad_norm": 0.011789188720285892, + "learning_rate": 9.882667473815209e-09, + "loss": 1.297e-05, + "memory(GiB)": 26.31, + "step": 9260, + "train_speed(iter/s)": 0.581774 + }, + { + "acc": 0.99988985, + "epoch": 9.825026511134677, + "grad_norm": 0.0348358228802681, + "learning_rate": 9.367985256604519e-09, + "loss": 0.0001662, + "memory(GiB)": 26.31, + "step": 9265, + "train_speed(iter/s)": 0.581774 + }, + { + "acc": 1.0, + "epoch": 9.830328738069989, + "grad_norm": 0.0035749017260968685, + "learning_rate": 8.868649795228724e-09, + "loss": 9.127e-05, + "memory(GiB)": 26.31, + "step": 9270, + "train_speed(iter/s)": 0.581775 + }, + { + "acc": 0.99986839, + "epoch": 9.835630965005302, + "grad_norm": 0.020208999514579773, + "learning_rate": 8.38466262504766e-09, + "loss": 0.00017922, + "memory(GiB)": 26.31, + "step": 9275, + "train_speed(iter/s)": 0.581776 + }, + { + "acc": 1.0, + "epoch": 9.840933191940614, + "grad_norm": 0.002023870823904872, + "learning_rate": 7.916025234226407e-09, + "loss": 0.00025274, + "memory(GiB)": 26.31, + "step": 9280, + "train_speed(iter/s)": 0.581777 + }, + { + "acc": 0.99986706, + "epoch": 9.846235418875928, + "grad_norm": 0.025883223861455917, + "learning_rate": 7.462739063734198e-09, + "loss": 0.0001587, + "memory(GiB)": 26.31, + "step": 9285, + "train_speed(iter/s)": 0.581782 + }, + { + "acc": 0.99988098, + "epoch": 9.851537645811241, + "grad_norm": 0.045160189270973206, + "learning_rate": 7.024805507337186e-09, + "loss": 0.00011031, + "memory(GiB)": 26.31, + "step": 9290, + "train_speed(iter/s)": 0.581782 + }, + { + "acc": 0.99988213, + "epoch": 9.856839872746553, + "grad_norm": 0.000631249975413084, + "learning_rate": 6.602225911595128e-09, + "loss": 0.00017462, + "memory(GiB)": 26.31, + "step": 9295, + "train_speed(iter/s)": 0.581784 + }, + { + "acc": 0.99974346, + "epoch": 9.862142099681867, + "grad_norm": 0.03226780891418457, + "learning_rate": 6.1950015758580405e-09, + "loss": 0.00077184, + "memory(GiB)": 26.31, + "step": 9300, + "train_speed(iter/s)": 0.581785 + }, + { + "acc": 0.99987984, + "epoch": 9.867444326617179, + "grad_norm": 0.07820426672697067, + "learning_rate": 5.803133752260661e-09, + "loss": 0.00019142, + "memory(GiB)": 26.31, + "step": 9305, + "train_speed(iter/s)": 0.581786 + }, + { + "acc": 0.9997633, + "epoch": 9.872746553552492, + "grad_norm": 0.0021600211039185524, + "learning_rate": 5.426623645721333e-09, + "loss": 0.00034086, + "memory(GiB)": 26.31, + "step": 9310, + "train_speed(iter/s)": 0.581787 + }, + { + "acc": 0.9997406, + "epoch": 9.878048780487806, + "grad_norm": 0.05200710520148277, + "learning_rate": 5.065472413933678e-09, + "loss": 0.00040007, + "memory(GiB)": 26.31, + "step": 9315, + "train_speed(iter/s)": 0.581789 + }, + { + "acc": 0.99986706, + "epoch": 9.883351007423117, + "grad_norm": 0.0008037837687879801, + "learning_rate": 4.71968116736771e-09, + "loss": 0.00021175, + "memory(GiB)": 26.31, + "step": 9320, + "train_speed(iter/s)": 0.581789 + }, + { + "acc": 0.99984474, + "epoch": 9.888653234358431, + "grad_norm": 0.051441438496112823, + "learning_rate": 4.389250969264283e-09, + "loss": 0.00016188, + "memory(GiB)": 26.31, + "step": 9325, + "train_speed(iter/s)": 0.58179 + }, + { + "acc": 0.99963465, + "epoch": 9.893955461293743, + "grad_norm": 0.02896001748740673, + "learning_rate": 4.0741828356312046e-09, + "loss": 0.00053013, + "memory(GiB)": 26.31, + "step": 9330, + "train_speed(iter/s)": 0.581792 + }, + { + "acc": 0.99972954, + "epoch": 9.899257688229056, + "grad_norm": 0.0005594078684225678, + "learning_rate": 3.774477735241571e-09, + "loss": 0.0003093, + "memory(GiB)": 26.31, + "step": 9335, + "train_speed(iter/s)": 0.581792 + }, + { + "acc": 0.9995079, + "epoch": 9.90455991516437, + "grad_norm": 0.0392618291079998, + "learning_rate": 3.490136589629885e-09, + "loss": 0.00048772, + "memory(GiB)": 26.31, + "step": 9340, + "train_speed(iter/s)": 0.581795 + }, + { + "acc": 1.0, + "epoch": 9.909862142099682, + "grad_norm": 0.011398269794881344, + "learning_rate": 3.221160273090386e-09, + "loss": 1.085e-05, + "memory(GiB)": 26.31, + "step": 9345, + "train_speed(iter/s)": 0.581797 + }, + { + "acc": 0.99987679, + "epoch": 9.915164369034995, + "grad_norm": 0.0018169950926676393, + "learning_rate": 2.9675496126715013e-09, + "loss": 0.00024445, + "memory(GiB)": 26.31, + "step": 9350, + "train_speed(iter/s)": 0.581797 + }, + { + "acc": 1.0, + "epoch": 9.920466595970307, + "grad_norm": 0.026712248101830482, + "learning_rate": 2.7293053881769583e-09, + "loss": 0.00015502, + "memory(GiB)": 26.31, + "step": 9355, + "train_speed(iter/s)": 0.581798 + }, + { + "acc": 1.0, + "epoch": 9.92576882290562, + "grad_norm": 0.0008010675082914531, + "learning_rate": 2.5064283321618967e-09, + "loss": 7.096e-05, + "memory(GiB)": 26.31, + "step": 9360, + "train_speed(iter/s)": 0.581799 + }, + { + "acc": 0.99964323, + "epoch": 9.931071049840932, + "grad_norm": 0.0023205948527902365, + "learning_rate": 2.298919129928429e-09, + "loss": 0.00044941, + "memory(GiB)": 26.31, + "step": 9365, + "train_speed(iter/s)": 0.5818 + }, + { + "acc": 1.0, + "epoch": 9.936373276776246, + "grad_norm": 0.0038338894955813885, + "learning_rate": 2.1067784195278586e-09, + "loss": 0.00015866, + "memory(GiB)": 26.31, + "step": 9370, + "train_speed(iter/s)": 0.581804 + }, + { + "acc": 0.99964285, + "epoch": 9.94167550371156, + "grad_norm": 0.0026238136924803257, + "learning_rate": 1.9300067917551333e-09, + "loss": 0.00037828, + "memory(GiB)": 26.31, + "step": 9375, + "train_speed(iter/s)": 0.581804 + }, + { + "acc": 1.0, + "epoch": 9.946977730646871, + "grad_norm": 0.0014907962176948786, + "learning_rate": 1.7686047901482875e-09, + "loss": 7.117e-05, + "memory(GiB)": 26.31, + "step": 9380, + "train_speed(iter/s)": 0.581807 + }, + { + "acc": 0.99976768, + "epoch": 9.952279957582185, + "grad_norm": 0.04889528080821037, + "learning_rate": 1.6225729109867767e-09, + "loss": 0.00037514, + "memory(GiB)": 26.31, + "step": 9385, + "train_speed(iter/s)": 0.58181 + }, + { + "acc": 0.99988585, + "epoch": 9.957582184517497, + "grad_norm": 0.0002773120941128582, + "learning_rate": 1.491911603290369e-09, + "loss": 0.00015811, + "memory(GiB)": 26.31, + "step": 9390, + "train_speed(iter/s)": 0.581812 + }, + { + "acc": 0.99976158, + "epoch": 9.96288441145281, + "grad_norm": 0.06627703458070755, + "learning_rate": 1.3766212688169235e-09, + "loss": 0.00021021, + "memory(GiB)": 26.31, + "step": 9395, + "train_speed(iter/s)": 0.581812 + }, + { + "acc": 0.99974079, + "epoch": 9.968186638388122, + "grad_norm": 0.05650794133543968, + "learning_rate": 1.2767022620618365e-09, + "loss": 0.00020128, + "memory(GiB)": 26.31, + "step": 9400, + "train_speed(iter/s)": 0.581814 + }, + { + "acc": 0.99948587, + "epoch": 9.973488865323436, + "grad_norm": 0.030863817781209946, + "learning_rate": 1.1921548902563759e-09, + "loss": 0.00049622, + "memory(GiB)": 26.31, + "step": 9405, + "train_speed(iter/s)": 0.581818 + }, + { + "acc": 0.9998889, + "epoch": 9.97879109225875, + "grad_norm": 0.0006861954461783171, + "learning_rate": 1.1229794133676798e-09, + "loss": 0.00015753, + "memory(GiB)": 26.31, + "step": 9410, + "train_speed(iter/s)": 0.58182 + }, + { + "acc": 0.99987621, + "epoch": 9.984093319194061, + "grad_norm": 0.0052332268096506596, + "learning_rate": 1.0691760440959835e-09, + "loss": 0.00019103, + "memory(GiB)": 26.31, + "step": 9415, + "train_speed(iter/s)": 0.58182 + }, + { + "acc": 0.99984474, + "epoch": 9.989395546129375, + "grad_norm": 0.0013717457186430693, + "learning_rate": 1.0307449478762829e-09, + "loss": 0.00013517, + "memory(GiB)": 26.31, + "step": 9420, + "train_speed(iter/s)": 0.581821 + }, + { + "acc": 0.99989624, + "epoch": 9.994697773064686, + "grad_norm": 0.0009563152561895549, + "learning_rate": 1.0076862428777806e-09, + "loss": 0.0004057, + "memory(GiB)": 26.31, + "step": 9425, + "train_speed(iter/s)": 0.581826 + }, + { + "acc": 0.99975662, + "epoch": 10.0, + "grad_norm": 0.040931958705186844, + "learning_rate": 1e-09, + "loss": 0.00025084, + "memory(GiB)": 26.31, + "step": 9430, + "train_speed(iter/s)": 0.581823 + }, + { + "epoch": 10.0, + "eval_acc": 0.9468321662075603, + "eval_loss": 1.008236289024353, + "eval_runtime": 109.9561, + "eval_samples_per_second": 205.555, + "eval_steps_per_second": 0.809, + "step": 9430 + } + ], + "logging_steps": 5, + "max_steps": 9430, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 10000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.203857723149517e+18, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}