| { |
| "best_global_step": 345, |
| "best_metric": 0.12013684, |
| "best_model_checkpoint": "/mnt/bn/wdq-base1/data/VLMs/vsa_rl/checkpoint/emovit/v2-20250701-195511/checkpoint-345", |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 345, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002902757619738752, |
| "grad_norm": 255.519775390625, |
| "learning_rate": 5.555555555555555e-08, |
| "loss": 4.085740089416504, |
| "memory(GiB)": 43.95, |
| "step": 1, |
| "token_acc": 0.5151515151515151, |
| "train_speed(iter/s)": 0.011687 |
| }, |
| { |
| "epoch": 0.01451378809869376, |
| "grad_norm": 244.64340209960938, |
| "learning_rate": 2.7777777777777776e-07, |
| "loss": 3.8799986839294434, |
| "memory(GiB)": 49.87, |
| "step": 5, |
| "token_acc": 0.5189393939393939, |
| "train_speed(iter/s)": 0.029099 |
| }, |
| { |
| "epoch": 0.02902757619738752, |
| "grad_norm": 229.68177795410156, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 3.666259765625, |
| "memory(GiB)": 49.87, |
| "step": 10, |
| "token_acc": 0.5258358662613982, |
| "train_speed(iter/s)": 0.036058 |
| }, |
| { |
| "epoch": 0.04354136429608128, |
| "grad_norm": 148.16238403320312, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 2.4450706481933593, |
| "memory(GiB)": 49.87, |
| "step": 15, |
| "token_acc": 0.5792682926829268, |
| "train_speed(iter/s)": 0.039309 |
| }, |
| { |
| "epoch": 0.05805515239477504, |
| "grad_norm": 34.427486419677734, |
| "learning_rate": 9.99907702300141e-07, |
| "loss": 0.839083480834961, |
| "memory(GiB)": 49.87, |
| "step": 20, |
| "token_acc": 0.7400611620795107, |
| "train_speed(iter/s)": 0.041029 |
| }, |
| { |
| "epoch": 0.07256894049346879, |
| "grad_norm": 19.932300567626953, |
| "learning_rate": 9.988697444723761e-07, |
| "loss": 0.378426194190979, |
| "memory(GiB)": 49.87, |
| "step": 25, |
| "token_acc": 0.9141104294478528, |
| "train_speed(iter/s)": 0.041967 |
| }, |
| { |
| "epoch": 0.08708272859216255, |
| "grad_norm": 24.542755126953125, |
| "learning_rate": 9.966808593197956e-07, |
| "loss": 0.24850196838378907, |
| "memory(GiB)": 49.87, |
| "step": 30, |
| "token_acc": 0.9134328358208955, |
| "train_speed(iter/s)": 0.042819 |
| }, |
| { |
| "epoch": 0.10159651669085631, |
| "grad_norm": 70.33385467529297, |
| "learning_rate": 9.933460967532452e-07, |
| "loss": 0.22510967254638672, |
| "memory(GiB)": 49.87, |
| "step": 35, |
| "token_acc": 0.9174311926605505, |
| "train_speed(iter/s)": 0.043434 |
| }, |
| { |
| "epoch": 0.11611030478955008, |
| "grad_norm": 20.665966033935547, |
| "learning_rate": 9.888731503027535e-07, |
| "loss": 0.21692299842834473, |
| "memory(GiB)": 49.87, |
| "step": 40, |
| "token_acc": 0.9357798165137615, |
| "train_speed(iter/s)": 0.044038 |
| }, |
| { |
| "epoch": 0.13062409288824384, |
| "grad_norm": 17.56533432006836, |
| "learning_rate": 9.83272339368022e-07, |
| "loss": 0.2063352346420288, |
| "memory(GiB)": 49.87, |
| "step": 45, |
| "token_acc": 0.9425981873111783, |
| "train_speed(iter/s)": 0.044317 |
| }, |
| { |
| "epoch": 0.14513788098693758, |
| "grad_norm": 11.449491500854492, |
| "learning_rate": 9.765565854108502e-07, |
| "loss": 0.18542113304138183, |
| "memory(GiB)": 49.87, |
| "step": 50, |
| "token_acc": 0.9415384615384615, |
| "train_speed(iter/s)": 0.044609 |
| }, |
| { |
| "epoch": 0.15965166908563136, |
| "grad_norm": 13.59149169921875, |
| "learning_rate": 9.687413821444199e-07, |
| "loss": 0.16923766136169432, |
| "memory(GiB)": 49.87, |
| "step": 55, |
| "token_acc": 0.9447852760736196, |
| "train_speed(iter/s)": 0.044808 |
| }, |
| { |
| "epoch": 0.1741654571843251, |
| "grad_norm": 12.386320114135742, |
| "learning_rate": 9.598447597882179e-07, |
| "loss": 0.1599480152130127, |
| "memory(GiB)": 49.87, |
| "step": 60, |
| "token_acc": 0.9341692789968652, |
| "train_speed(iter/s)": 0.045084 |
| }, |
| { |
| "epoch": 0.18867924528301888, |
| "grad_norm": 12.34672737121582, |
| "learning_rate": 9.498872434710622e-07, |
| "loss": 0.16525213718414306, |
| "memory(GiB)": 49.87, |
| "step": 65, |
| "token_acc": 0.9476923076923077, |
| "train_speed(iter/s)": 0.045312 |
| }, |
| { |
| "epoch": 0.20319303338171263, |
| "grad_norm": 9.779081344604492, |
| "learning_rate": 9.388918058781945e-07, |
| "loss": 0.14802794456481932, |
| "memory(GiB)": 49.87, |
| "step": 70, |
| "token_acc": 0.9444444444444444, |
| "train_speed(iter/s)": 0.045487 |
| }, |
| { |
| "epoch": 0.21770682148040638, |
| "grad_norm": 8.754064559936523, |
| "learning_rate": 9.268838142516943e-07, |
| "loss": 0.15607104301452637, |
| "memory(GiB)": 49.87, |
| "step": 75, |
| "token_acc": 0.9335347432024169, |
| "train_speed(iter/s)": 0.045649 |
| }, |
| { |
| "epoch": 0.23222060957910015, |
| "grad_norm": 11.27889347076416, |
| "learning_rate": 9.138909718664787e-07, |
| "loss": 0.13595396280288696, |
| "memory(GiB)": 49.87, |
| "step": 80, |
| "token_acc": 0.9320987654320988, |
| "train_speed(iter/s)": 0.045813 |
| }, |
| { |
| "epoch": 0.2467343976777939, |
| "grad_norm": 9.61460018157959, |
| "learning_rate": 8.999432541169144e-07, |
| "loss": 0.1399265170097351, |
| "memory(GiB)": 49.87, |
| "step": 85, |
| "token_acc": 0.9604863221884499, |
| "train_speed(iter/s)": 0.045971 |
| }, |
| { |
| "epoch": 0.2612481857764877, |
| "grad_norm": 10.189661979675293, |
| "learning_rate": 8.850728393614901e-07, |
| "loss": 0.14257581233978273, |
| "memory(GiB)": 49.87, |
| "step": 90, |
| "token_acc": 0.9613095238095238, |
| "train_speed(iter/s)": 0.0461 |
| }, |
| { |
| "epoch": 0.2757619738751814, |
| "grad_norm": 15.340847969055176, |
| "learning_rate": 8.693140346850975e-07, |
| "loss": 0.13374924659729004, |
| "memory(GiB)": 49.87, |
| "step": 95, |
| "token_acc": 0.9664634146341463, |
| "train_speed(iter/s)": 0.04616 |
| }, |
| { |
| "epoch": 0.29027576197387517, |
| "grad_norm": 9.699533462524414, |
| "learning_rate": 8.527031967501906e-07, |
| "loss": 0.12369405031204224, |
| "memory(GiB)": 49.87, |
| "step": 100, |
| "token_acc": 0.9497041420118343, |
| "train_speed(iter/s)": 0.046232 |
| }, |
| { |
| "epoch": 0.29027576197387517, |
| "eval_loss": 0.14079514145851135, |
| "eval_runtime": 29.9586, |
| "eval_samples_per_second": 14.854, |
| "eval_steps_per_second": 1.869, |
| "eval_token_acc": 0.9537385691231846, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3047895500725689, |
| "grad_norm": 9.751545906066895, |
| "learning_rate": 8.352786479194287e-07, |
| "loss": 0.12529947757720947, |
| "memory(GiB)": 49.87, |
| "step": 105, |
| "token_acc": 0.9373134328358209, |
| "train_speed(iter/s)": 0.041661 |
| }, |
| { |
| "epoch": 0.3193033381712627, |
| "grad_norm": 17.604825973510742, |
| "learning_rate": 8.170805878433099e-07, |
| "loss": 0.12675491571426392, |
| "memory(GiB)": 49.87, |
| "step": 110, |
| "token_acc": 0.9649122807017544, |
| "train_speed(iter/s)": 0.041885 |
| }, |
| { |
| "epoch": 0.33381712626995647, |
| "grad_norm": 9.811626434326172, |
| "learning_rate": 7.981510007167717e-07, |
| "loss": 0.13588259220123292, |
| "memory(GiB)": 49.87, |
| "step": 115, |
| "token_acc": 0.9520958083832335, |
| "train_speed(iter/s)": 0.042137 |
| }, |
| { |
| "epoch": 0.3483309143686502, |
| "grad_norm": 12.495682716369629, |
| "learning_rate": 7.785335584187219e-07, |
| "loss": 0.11566004753112794, |
| "memory(GiB)": 49.87, |
| "step": 120, |
| "token_acc": 0.9491017964071856, |
| "train_speed(iter/s)": 0.042342 |
| }, |
| { |
| "epoch": 0.36284470246734396, |
| "grad_norm": 11.247044563293457, |
| "learning_rate": 7.582735197579656e-07, |
| "loss": 0.13109700679779052, |
| "memory(GiB)": 49.87, |
| "step": 125, |
| "token_acc": 0.9548192771084337, |
| "train_speed(iter/s)": 0.042526 |
| }, |
| { |
| "epoch": 0.37735849056603776, |
| "grad_norm": 10.986481666564941, |
| "learning_rate": 7.374176260579745e-07, |
| "loss": 0.13217699527740479, |
| "memory(GiB)": 49.87, |
| "step": 130, |
| "token_acc": 0.9473684210526315, |
| "train_speed(iter/s)": 0.042698 |
| }, |
| { |
| "epoch": 0.3918722786647315, |
| "grad_norm": 8.799654006958008, |
| "learning_rate": 7.160139933213898e-07, |
| "loss": 0.1257235050201416, |
| "memory(GiB)": 49.87, |
| "step": 135, |
| "token_acc": 0.941358024691358, |
| "train_speed(iter/s)": 0.042868 |
| }, |
| { |
| "epoch": 0.40638606676342526, |
| "grad_norm": 11.893990516662598, |
| "learning_rate": 6.941120012230463e-07, |
| "loss": 0.12591309547424318, |
| "memory(GiB)": 49.87, |
| "step": 140, |
| "token_acc": 0.9570552147239264, |
| "train_speed(iter/s)": 0.043009 |
| }, |
| { |
| "epoch": 0.420899854862119, |
| "grad_norm": 30.306238174438477, |
| "learning_rate": 6.717621791876146e-07, |
| "loss": 0.1210709571838379, |
| "memory(GiB)": 49.87, |
| "step": 145, |
| "token_acc": 0.9305135951661632, |
| "train_speed(iter/s)": 0.043167 |
| }, |
| { |
| "epoch": 0.43541364296081275, |
| "grad_norm": 11.80178165435791, |
| "learning_rate": 6.490160898146918e-07, |
| "loss": 0.12539260387420653, |
| "memory(GiB)": 49.87, |
| "step": 150, |
| "token_acc": 0.9698795180722891, |
| "train_speed(iter/s)": 0.043309 |
| }, |
| { |
| "epoch": 0.44992743105950656, |
| "grad_norm": 12.080060005187988, |
| "learning_rate": 6.259262099202849e-07, |
| "loss": 0.1291128873825073, |
| "memory(GiB)": 49.87, |
| "step": 155, |
| "token_acc": 0.950920245398773, |
| "train_speed(iter/s)": 0.043468 |
| }, |
| { |
| "epoch": 0.4644412191582003, |
| "grad_norm": 6.417619705200195, |
| "learning_rate": 6.025458094691323e-07, |
| "loss": 0.11431529521942138, |
| "memory(GiB)": 49.87, |
| "step": 160, |
| "token_acc": 0.9542682926829268, |
| "train_speed(iter/s)": 0.043601 |
| }, |
| { |
| "epoch": 0.47895500725689405, |
| "grad_norm": 7.19518518447876, |
| "learning_rate": 5.78928828677177e-07, |
| "loss": 0.13335959911346434, |
| "memory(GiB)": 49.87, |
| "step": 165, |
| "token_acc": 0.9634146341463414, |
| "train_speed(iter/s)": 0.043742 |
| }, |
| { |
| "epoch": 0.4934687953555878, |
| "grad_norm": 6.8175740242004395, |
| "learning_rate": 5.551297535677235e-07, |
| "loss": 0.1238396167755127, |
| "memory(GiB)": 49.87, |
| "step": 170, |
| "token_acc": 0.9597523219814241, |
| "train_speed(iter/s)": 0.043871 |
| }, |
| { |
| "epoch": 0.5079825834542816, |
| "grad_norm": 11.478657722473145, |
| "learning_rate": 5.312034902683779e-07, |
| "loss": 0.13186312913894654, |
| "memory(GiB)": 49.87, |
| "step": 175, |
| "token_acc": 0.9457831325301205, |
| "train_speed(iter/s)": 0.043978 |
| }, |
| { |
| "epoch": 0.5224963715529753, |
| "grad_norm": 22.4121150970459, |
| "learning_rate": 5.072052383387786e-07, |
| "loss": 0.1387540578842163, |
| "memory(GiB)": 49.87, |
| "step": 180, |
| "token_acc": 0.9520958083832335, |
| "train_speed(iter/s)": 0.044081 |
| }, |
| { |
| "epoch": 0.5370101596516691, |
| "grad_norm": 9.121978759765625, |
| "learning_rate": 4.831903634213598e-07, |
| "loss": 0.11786762475967408, |
| "memory(GiB)": 49.87, |
| "step": 185, |
| "token_acc": 0.9663608562691132, |
| "train_speed(iter/s)": 0.044167 |
| }, |
| { |
| "epoch": 0.5515239477503628, |
| "grad_norm": 8.12821102142334, |
| "learning_rate": 4.592142695089488e-07, |
| "loss": 0.12599266767501832, |
| "memory(GiB)": 49.87, |
| "step": 190, |
| "token_acc": 0.9706744868035191, |
| "train_speed(iter/s)": 0.044264 |
| }, |
| { |
| "epoch": 0.5660377358490566, |
| "grad_norm": 7.080636978149414, |
| "learning_rate": 4.353322711238869e-07, |
| "loss": 0.11752383708953858, |
| "memory(GiB)": 49.87, |
| "step": 195, |
| "token_acc": 0.9357798165137615, |
| "train_speed(iter/s)": 0.044354 |
| }, |
| { |
| "epoch": 0.5805515239477503, |
| "grad_norm": 10.683778762817383, |
| "learning_rate": 4.115994657035658e-07, |
| "loss": 0.12174248695373535, |
| "memory(GiB)": 49.87, |
| "step": 200, |
| "token_acc": 0.9601226993865031, |
| "train_speed(iter/s)": 0.044436 |
| }, |
| { |
| "epoch": 0.5805515239477503, |
| "eval_loss": 0.12456289678812027, |
| "eval_runtime": 30.3669, |
| "eval_samples_per_second": 14.654, |
| "eval_steps_per_second": 1.844, |
| "eval_token_acc": 0.9569661108122647, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5950653120464441, |
| "grad_norm": 7.245314598083496, |
| "learning_rate": 3.8807060648679257e-07, |
| "loss": 0.12915533781051636, |
| "memory(GiB)": 49.87, |
| "step": 205, |
| "token_acc": 0.9492537313432836, |
| "train_speed(iter/s)": 0.042189 |
| }, |
| { |
| "epoch": 0.6095791001451378, |
| "grad_norm": 10.728217124938965, |
| "learning_rate": 3.64799976194246e-07, |
| "loss": 0.10868642330169678, |
| "memory(GiB)": 49.87, |
| "step": 210, |
| "token_acc": 0.9563953488372093, |
| "train_speed(iter/s)": 0.042301 |
| }, |
| { |
| "epoch": 0.6240928882438317, |
| "grad_norm": 8.868558883666992, |
| "learning_rate": 3.4184126179445096e-07, |
| "loss": 0.120005202293396, |
| "memory(GiB)": 49.87, |
| "step": 215, |
| "token_acc": 0.9554896142433235, |
| "train_speed(iter/s)": 0.042401 |
| }, |
| { |
| "epoch": 0.6386066763425254, |
| "grad_norm": 7.093891620635986, |
| "learning_rate": 3.192474306441936e-07, |
| "loss": 0.12071295976638793, |
| "memory(GiB)": 49.87, |
| "step": 220, |
| "token_acc": 0.9702380952380952, |
| "train_speed(iter/s)": 0.042514 |
| }, |
| { |
| "epoch": 0.6531204644412192, |
| "grad_norm": 5.627134323120117, |
| "learning_rate": 2.9707060828913224e-07, |
| "loss": 0.11332046985626221, |
| "memory(GiB)": 49.87, |
| "step": 225, |
| "token_acc": 0.9386503067484663, |
| "train_speed(iter/s)": 0.042613 |
| }, |
| { |
| "epoch": 0.6676342525399129, |
| "grad_norm": 9.86678695678711, |
| "learning_rate": 2.75361958206525e-07, |
| "loss": 0.12341060638427734, |
| "memory(GiB)": 49.87, |
| "step": 230, |
| "token_acc": 0.9528023598820059, |
| "train_speed(iter/s)": 0.042729 |
| }, |
| { |
| "epoch": 0.6821480406386067, |
| "grad_norm": 7.587142467498779, |
| "learning_rate": 2.5417156376751557e-07, |
| "loss": 0.11968934535980225, |
| "memory(GiB)": 49.87, |
| "step": 235, |
| "token_acc": 0.9701492537313433, |
| "train_speed(iter/s)": 0.042825 |
| }, |
| { |
| "epoch": 0.6966618287373004, |
| "grad_norm": 6.26280403137207, |
| "learning_rate": 2.3354831269130132e-07, |
| "loss": 0.1178591012954712, |
| "memory(GiB)": 49.87, |
| "step": 240, |
| "token_acc": 0.9603658536585366, |
| "train_speed(iter/s)": 0.04292 |
| }, |
| { |
| "epoch": 0.7111756168359942, |
| "grad_norm": 9.032163619995117, |
| "learning_rate": 2.1353978425775004e-07, |
| "loss": 0.12919411659240723, |
| "memory(GiB)": 49.87, |
| "step": 245, |
| "token_acc": 0.9518072289156626, |
| "train_speed(iter/s)": 0.04302 |
| }, |
| { |
| "epoch": 0.7256894049346879, |
| "grad_norm": 12.530137062072754, |
| "learning_rate": 1.9419213953868235e-07, |
| "loss": 0.10113420486450195, |
| "memory(GiB)": 49.87, |
| "step": 250, |
| "token_acc": 0.9615384615384616, |
| "train_speed(iter/s)": 0.043104 |
| }, |
| { |
| "epoch": 0.7402031930333817, |
| "grad_norm": 6.279452800750732, |
| "learning_rate": 1.7555001490105486e-07, |
| "loss": 0.11017693281173706, |
| "memory(GiB)": 49.87, |
| "step": 255, |
| "token_acc": 0.9631901840490797, |
| "train_speed(iter/s)": 0.043201 |
| }, |
| { |
| "epoch": 0.7547169811320755, |
| "grad_norm": 43.64086151123047, |
| "learning_rate": 1.57656419027747e-07, |
| "loss": 0.13607945442199706, |
| "memory(GiB)": 49.87, |
| "step": 260, |
| "token_acc": 0.9575757575757575, |
| "train_speed(iter/s)": 0.043278 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 12.17973804473877, |
| "learning_rate": 1.4055263369352672e-07, |
| "loss": 0.1254185438156128, |
| "memory(GiB)": 49.87, |
| "step": 265, |
| "token_acc": 0.9637462235649547, |
| "train_speed(iter/s)": 0.043365 |
| }, |
| { |
| "epoch": 0.783744557329463, |
| "grad_norm": 10.712772369384766, |
| "learning_rate": 1.2427811852511395e-07, |
| "loss": 0.11515959501266479, |
| "memory(GiB)": 49.87, |
| "step": 270, |
| "token_acc": 0.9667673716012085, |
| "train_speed(iter/s)": 0.043444 |
| }, |
| { |
| "epoch": 0.7982583454281568, |
| "grad_norm": 6.96333122253418, |
| "learning_rate": 1.0887041996506857e-07, |
| "loss": 0.1222030520439148, |
| "memory(GiB)": 49.87, |
| "step": 275, |
| "token_acc": 0.9635258358662614, |
| "train_speed(iter/s)": 0.043528 |
| }, |
| { |
| "epoch": 0.8127721335268505, |
| "grad_norm": 6.654318809509277, |
| "learning_rate": 9.43650846495247e-08, |
| "loss": 0.1167829990386963, |
| "memory(GiB)": 49.87, |
| "step": 280, |
| "token_acc": 0.9697885196374623, |
| "train_speed(iter/s)": 0.043596 |
| }, |
| { |
| "epoch": 0.8272859216255443, |
| "grad_norm": 9.63259220123291, |
| "learning_rate": 8.079557739962128e-08, |
| "loss": 0.11740148067474365, |
| "memory(GiB)": 49.87, |
| "step": 285, |
| "token_acc": 0.9634146341463414, |
| "train_speed(iter/s)": 0.043666 |
| }, |
| { |
| "epoch": 0.841799709724238, |
| "grad_norm": 13.925198554992676, |
| "learning_rate": 6.819320401582257e-08, |
| "loss": 0.11932878494262696, |
| "memory(GiB)": 49.87, |
| "step": 290, |
| "token_acc": 0.946875, |
| "train_speed(iter/s)": 0.043721 |
| }, |
| { |
| "epoch": 0.8563134978229318, |
| "grad_norm": 11.304753303527832, |
| "learning_rate": 5.658703905325185e-08, |
| "loss": 0.13282599449157714, |
| "memory(GiB)": 49.87, |
| "step": 295, |
| "token_acc": 0.9246987951807228, |
| "train_speed(iter/s)": 0.043794 |
| }, |
| { |
| "epoch": 0.8708272859216255, |
| "grad_norm": 6.113060474395752, |
| "learning_rate": 4.600385874466256e-08, |
| "loss": 0.10344053506851196, |
| "memory(GiB)": 49.87, |
| "step": 300, |
| "token_acc": 0.9604863221884499, |
| "train_speed(iter/s)": 0.04385 |
| }, |
| { |
| "epoch": 0.8708272859216255, |
| "eval_loss": 0.12102954089641571, |
| "eval_runtime": 30.2478, |
| "eval_samples_per_second": 14.712, |
| "eval_steps_per_second": 1.851, |
| "eval_token_acc": 0.956428187197418, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8853410740203193, |
| "grad_norm": 9.591761589050293, |
| "learning_rate": 3.646807922580097e-08, |
| "loss": 0.12371342182159424, |
| "memory(GiB)": 49.87, |
| "step": 305, |
| "token_acc": 0.9606060606060606, |
| "train_speed(iter/s)": 0.042375 |
| }, |
| { |
| "epoch": 0.8998548621190131, |
| "grad_norm": 6.957780361175537, |
| "learning_rate": 2.800170020567566e-08, |
| "loss": 0.11073534488677979, |
| "memory(GiB)": 49.87, |
| "step": 310, |
| "token_acc": 0.9457831325301205, |
| "train_speed(iter/s)": 0.042451 |
| }, |
| { |
| "epoch": 0.9143686502177069, |
| "grad_norm": 8.52478313446045, |
| "learning_rate": 2.0624254211693894e-08, |
| "loss": 0.1204371452331543, |
| "memory(GiB)": 49.87, |
| "step": 315, |
| "token_acc": 0.9355828220858896, |
| "train_speed(iter/s)": 0.042532 |
| }, |
| { |
| "epoch": 0.9288824383164006, |
| "grad_norm": 10.085247993469238, |
| "learning_rate": 1.4352761526756907e-08, |
| "loss": 0.12228823900222778, |
| "memory(GiB)": 49.87, |
| "step": 320, |
| "token_acc": 0.937888198757764, |
| "train_speed(iter/s)": 0.042597 |
| }, |
| { |
| "epoch": 0.9433962264150944, |
| "grad_norm": 13.328726768493652, |
| "learning_rate": 9.201690922279404e-09, |
| "loss": 0.1078119158744812, |
| "memory(GiB)": 49.87, |
| "step": 325, |
| "token_acc": 0.9603658536585366, |
| "train_speed(iter/s)": 0.042678 |
| }, |
| { |
| "epoch": 0.9579100145137881, |
| "grad_norm": 16.931018829345703, |
| "learning_rate": 5.182926277723821e-09, |
| "loss": 0.1230659008026123, |
| "memory(GiB)": 49.87, |
| "step": 330, |
| "token_acc": 0.939209726443769, |
| "train_speed(iter/s)": 0.042761 |
| }, |
| { |
| "epoch": 0.9724238026124818, |
| "grad_norm": 8.692817687988281, |
| "learning_rate": 2.3057391636606695e-09, |
| "loss": 0.11264588832855224, |
| "memory(GiB)": 49.87, |
| "step": 335, |
| "token_acc": 0.93993993993994, |
| "train_speed(iter/s)": 0.042831 |
| }, |
| { |
| "epoch": 0.9869375907111756, |
| "grad_norm": 7.866856098175049, |
| "learning_rate": 5.767674516083954e-10, |
| "loss": 0.1168862223625183, |
| "memory(GiB)": 49.87, |
| "step": 340, |
| "token_acc": 0.9660493827160493, |
| "train_speed(iter/s)": 0.042902 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 11.080939292907715, |
| "learning_rate": 0.0, |
| "loss": 0.12124216556549072, |
| "memory(GiB)": 49.87, |
| "step": 345, |
| "token_acc": 0.935374149659864, |
| "train_speed(iter/s)": 0.04301 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.12013684213161469, |
| "eval_runtime": 30.5384, |
| "eval_samples_per_second": 14.572, |
| "eval_steps_per_second": 1.834, |
| "eval_token_acc": 0.956428187197418, |
| "step": 345 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 345, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.752508131096658e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|