| { |
| "best_metric": 0.13022484, |
| "best_model_checkpoint": "/workspace/ms-swift/qwen_atlas/v1-20250305-211120/checkpoint-1152", |
| "epoch": 1.997939038941317, |
| "eval_steps": 100, |
| "global_step": 1152, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001735546154680551, |
| "grad_norm": 4.6760759353637695, |
| "learning_rate": 1.724137931034483e-06, |
| "loss": 1.6580421924591064, |
| "memory(GiB)": 35.37, |
| "step": 1, |
| "token_acc": 0.6974789915966386, |
| "train_speed(iter/s)": 0.008041 |
| }, |
| { |
| "epoch": 0.008677730773402755, |
| "grad_norm": 4.790174961090088, |
| "learning_rate": 8.620689655172414e-06, |
| "loss": 1.6793524026870728, |
| "memory(GiB)": 36.38, |
| "step": 5, |
| "token_acc": 0.6903924077628493, |
| "train_speed(iter/s)": 0.008451 |
| }, |
| { |
| "epoch": 0.01735546154680551, |
| "grad_norm": 4.196518421173096, |
| "learning_rate": 1.7241379310344828e-05, |
| "loss": 1.64276065826416, |
| "memory(GiB)": 39.49, |
| "step": 10, |
| "token_acc": 0.6917943452851051, |
| "train_speed(iter/s)": 0.008498 |
| }, |
| { |
| "epoch": 0.026033192320208266, |
| "grad_norm": 6.951071739196777, |
| "learning_rate": 2.5862068965517244e-05, |
| "loss": 1.35954008102417, |
| "memory(GiB)": 39.49, |
| "step": 15, |
| "token_acc": 0.7025507998270644, |
| "train_speed(iter/s)": 0.00851 |
| }, |
| { |
| "epoch": 0.03471092309361102, |
| "grad_norm": 5.639510154724121, |
| "learning_rate": 3.4482758620689657e-05, |
| "loss": 0.8004651069641113, |
| "memory(GiB)": 42.62, |
| "step": 20, |
| "token_acc": 0.7704440185273917, |
| "train_speed(iter/s)": 0.008512 |
| }, |
| { |
| "epoch": 0.04338865386701377, |
| "grad_norm": 2.946929693222046, |
| "learning_rate": 4.3103448275862066e-05, |
| "loss": 0.5983310222625733, |
| "memory(GiB)": 43.7, |
| "step": 25, |
| "token_acc": 0.8041049513330512, |
| "train_speed(iter/s)": 0.008515 |
| }, |
| { |
| "epoch": 0.05206638464041653, |
| "grad_norm": 2.4821643829345703, |
| "learning_rate": 5.172413793103449e-05, |
| "loss": 0.5139754772186279, |
| "memory(GiB)": 43.7, |
| "step": 30, |
| "token_acc": 0.8192055870798778, |
| "train_speed(iter/s)": 0.00852 |
| }, |
| { |
| "epoch": 0.060744115413819286, |
| "grad_norm": 2.31684947013855, |
| "learning_rate": 6.03448275862069e-05, |
| "loss": 0.4888479709625244, |
| "memory(GiB)": 43.7, |
| "step": 35, |
| "token_acc": 0.8198602583103959, |
| "train_speed(iter/s)": 0.008521 |
| }, |
| { |
| "epoch": 0.06942184618722204, |
| "grad_norm": 2.18639874458313, |
| "learning_rate": 6.896551724137931e-05, |
| "loss": 0.4743186473846436, |
| "memory(GiB)": 43.7, |
| "step": 40, |
| "token_acc": 0.8318414322250639, |
| "train_speed(iter/s)": 0.008525 |
| }, |
| { |
| "epoch": 0.07809957696062479, |
| "grad_norm": 2.380423069000244, |
| "learning_rate": 7.758620689655173e-05, |
| "loss": 0.4614367961883545, |
| "memory(GiB)": 43.7, |
| "step": 45, |
| "token_acc": 0.8394912427022518, |
| "train_speed(iter/s)": 0.008526 |
| }, |
| { |
| "epoch": 0.08677730773402755, |
| "grad_norm": 16.561281204223633, |
| "learning_rate": 8.620689655172413e-05, |
| "loss": 0.4493127346038818, |
| "memory(GiB)": 43.7, |
| "step": 50, |
| "token_acc": 0.8405128414519619, |
| "train_speed(iter/s)": 0.008526 |
| }, |
| { |
| "epoch": 0.09545503850743031, |
| "grad_norm": 2.035421848297119, |
| "learning_rate": 9.482758620689656e-05, |
| "loss": 0.43794918060302734, |
| "memory(GiB)": 43.7, |
| "step": 55, |
| "token_acc": 0.8461954318151101, |
| "train_speed(iter/s)": 0.008527 |
| }, |
| { |
| "epoch": 0.10413276928083307, |
| "grad_norm": 1.2835978269577026, |
| "learning_rate": 9.999917536092901e-05, |
| "loss": 0.43846426010131834, |
| "memory(GiB)": 43.7, |
| "step": 60, |
| "token_acc": 0.8478704525288376, |
| "train_speed(iter/s)": 0.008527 |
| }, |
| { |
| "epoch": 0.11281050005423582, |
| "grad_norm": 6.1280741691589355, |
| "learning_rate": 9.99898984837663e-05, |
| "loss": 0.4272454261779785, |
| "memory(GiB)": 43.7, |
| "step": 65, |
| "token_acc": 0.8532792155601994, |
| "train_speed(iter/s)": 0.008528 |
| }, |
| { |
| "epoch": 0.12148823082763857, |
| "grad_norm": 2.31329607963562, |
| "learning_rate": 9.997031584946868e-05, |
| "loss": 0.4156841278076172, |
| "memory(GiB)": 43.7, |
| "step": 70, |
| "token_acc": 0.8506665595888211, |
| "train_speed(iter/s)": 0.008529 |
| }, |
| { |
| "epoch": 0.13016596160104132, |
| "grad_norm": 4.000900745391846, |
| "learning_rate": 9.994043149512924e-05, |
| "loss": 0.40361742973327636, |
| "memory(GiB)": 43.7, |
| "step": 75, |
| "token_acc": 0.8494347592032796, |
| "train_speed(iter/s)": 0.00853 |
| }, |
| { |
| "epoch": 0.13884369237444408, |
| "grad_norm": 4.327010154724121, |
| "learning_rate": 9.99002515816106e-05, |
| "loss": 0.4148906707763672, |
| "memory(GiB)": 43.7, |
| "step": 80, |
| "token_acc": 0.8513289724269272, |
| "train_speed(iter/s)": 0.008532 |
| }, |
| { |
| "epoch": 0.14752142314784683, |
| "grad_norm": 2.917447805404663, |
| "learning_rate": 9.984978439227486e-05, |
| "loss": 0.3819650411605835, |
| "memory(GiB)": 43.7, |
| "step": 85, |
| "token_acc": 0.865501155338536, |
| "train_speed(iter/s)": 0.008532 |
| }, |
| { |
| "epoch": 0.15619915392124958, |
| "grad_norm": 2.645838499069214, |
| "learning_rate": 9.978904033127593e-05, |
| "loss": 0.3954829454421997, |
| "memory(GiB)": 43.7, |
| "step": 90, |
| "token_acc": 0.8565228073048891, |
| "train_speed(iter/s)": 0.008532 |
| }, |
| { |
| "epoch": 0.16487688469465234, |
| "grad_norm": 1.4669901132583618, |
| "learning_rate": 9.971803192141458e-05, |
| "loss": 0.36885552406311034, |
| "memory(GiB)": 43.7, |
| "step": 95, |
| "token_acc": 0.8682406076780949, |
| "train_speed(iter/s)": 0.008534 |
| }, |
| { |
| "epoch": 0.1735546154680551, |
| "grad_norm": 2.0109212398529053, |
| "learning_rate": 9.963677380155683e-05, |
| "loss": 0.3746215581893921, |
| "memory(GiB)": 43.7, |
| "step": 100, |
| "token_acc": 0.8680254347917632, |
| "train_speed(iter/s)": 0.008533 |
| }, |
| { |
| "epoch": 0.1735546154680551, |
| "eval_loss": 0.385597825050354, |
| "eval_runtime": 222.5947, |
| "eval_samples_per_second": 4.182, |
| "eval_steps_per_second": 0.84, |
| "eval_token_acc": 0.8657494207806096, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18223234624145787, |
| "grad_norm": 6.436813831329346, |
| "learning_rate": 9.954528272361607e-05, |
| "loss": 0.36356263160705565, |
| "memory(GiB)": 43.7, |
| "step": 105, |
| "token_acc": 0.8616467952308554, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 0.19091007701486062, |
| "grad_norm": 4.178765773773193, |
| "learning_rate": 9.944357754909945e-05, |
| "loss": 0.3653342485427856, |
| "memory(GiB)": 43.7, |
| "step": 110, |
| "token_acc": 0.8718457996659594, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 0.19958780778826338, |
| "grad_norm": 2.2793004512786865, |
| "learning_rate": 9.933167924521956e-05, |
| "loss": 0.36059207916259767, |
| "memory(GiB)": 43.7, |
| "step": 115, |
| "token_acc": 0.8739488320355951, |
| "train_speed(iter/s)": 0.008395 |
| }, |
| { |
| "epoch": 0.20826553856166613, |
| "grad_norm": 2.0142924785614014, |
| "learning_rate": 9.920961088057184e-05, |
| "loss": 0.3489166975021362, |
| "memory(GiB)": 43.7, |
| "step": 120, |
| "token_acc": 0.8721164521567498, |
| "train_speed(iter/s)": 0.0084 |
| }, |
| { |
| "epoch": 0.21694326933506888, |
| "grad_norm": 1.6888527870178223, |
| "learning_rate": 9.907739762037879e-05, |
| "loss": 0.3451664447784424, |
| "memory(GiB)": 43.7, |
| "step": 125, |
| "token_acc": 0.8695542472666106, |
| "train_speed(iter/s)": 0.008406 |
| }, |
| { |
| "epoch": 0.22562100010847164, |
| "grad_norm": 1.9126875400543213, |
| "learning_rate": 9.893506672130211e-05, |
| "loss": 0.336126446723938, |
| "memory(GiB)": 43.7, |
| "step": 130, |
| "token_acc": 0.8788651577940708, |
| "train_speed(iter/s)": 0.008411 |
| }, |
| { |
| "epoch": 0.2342987308818744, |
| "grad_norm": 1.2295624017715454, |
| "learning_rate": 9.878264752582341e-05, |
| "loss": 0.3329684495925903, |
| "memory(GiB)": 43.7, |
| "step": 135, |
| "token_acc": 0.8763455827765405, |
| "train_speed(iter/s)": 0.008415 |
| }, |
| { |
| "epoch": 0.24297646165527714, |
| "grad_norm": 2.3171041011810303, |
| "learning_rate": 9.86201714561952e-05, |
| "loss": 0.3186241865158081, |
| "memory(GiB)": 43.7, |
| "step": 140, |
| "token_acc": 0.8794609212217936, |
| "train_speed(iter/s)": 0.00842 |
| }, |
| { |
| "epoch": 0.2516541924286799, |
| "grad_norm": 1.815627932548523, |
| "learning_rate": 9.844767200796284e-05, |
| "loss": 0.318283748626709, |
| "memory(GiB)": 43.7, |
| "step": 145, |
| "token_acc": 0.891897272546287, |
| "train_speed(iter/s)": 0.008423 |
| }, |
| { |
| "epoch": 0.26033192320208265, |
| "grad_norm": 2.60662841796875, |
| "learning_rate": 9.826518474305932e-05, |
| "loss": 0.3101086378097534, |
| "memory(GiB)": 43.7, |
| "step": 150, |
| "token_acc": 0.8821918980194208, |
| "train_speed(iter/s)": 0.008427 |
| }, |
| { |
| "epoch": 0.26900965397548543, |
| "grad_norm": 2.4237709045410156, |
| "learning_rate": 9.807274728247389e-05, |
| "loss": 0.31526162624359133, |
| "memory(GiB)": 43.7, |
| "step": 155, |
| "token_acc": 0.8851850323858245, |
| "train_speed(iter/s)": 0.008429 |
| }, |
| { |
| "epoch": 0.27768738474888816, |
| "grad_norm": 2.208160638809204, |
| "learning_rate": 9.787039929849617e-05, |
| "loss": 0.30453202724456785, |
| "memory(GiB)": 43.7, |
| "step": 160, |
| "token_acc": 0.8869576413029484, |
| "train_speed(iter/s)": 0.008433 |
| }, |
| { |
| "epoch": 0.28636511552229094, |
| "grad_norm": 2.1958048343658447, |
| "learning_rate": 9.765818250653756e-05, |
| "loss": 0.3076713800430298, |
| "memory(GiB)": 43.7, |
| "step": 165, |
| "token_acc": 0.889782004881744, |
| "train_speed(iter/s)": 0.008437 |
| }, |
| { |
| "epoch": 0.29504284629569366, |
| "grad_norm": 3.110142707824707, |
| "learning_rate": 9.743614065653119e-05, |
| "loss": 0.29779419898986814, |
| "memory(GiB)": 43.7, |
| "step": 170, |
| "token_acc": 0.8947498864058822, |
| "train_speed(iter/s)": 0.00844 |
| }, |
| { |
| "epoch": 0.30372057706909644, |
| "grad_norm": 2.8065741062164307, |
| "learning_rate": 9.720431952391271e-05, |
| "loss": 0.29869651794433594, |
| "memory(GiB)": 43.7, |
| "step": 175, |
| "token_acc": 0.886225766767064, |
| "train_speed(iter/s)": 0.008442 |
| }, |
| { |
| "epoch": 0.31239830784249917, |
| "grad_norm": 3.4282145500183105, |
| "learning_rate": 9.696276690018329e-05, |
| "loss": 0.2898601293563843, |
| "memory(GiB)": 43.7, |
| "step": 180, |
| "token_acc": 0.896655905968049, |
| "train_speed(iter/s)": 0.008445 |
| }, |
| { |
| "epoch": 0.32107603861590195, |
| "grad_norm": 1.7516893148422241, |
| "learning_rate": 9.671153258305709e-05, |
| "loss": 0.2760122060775757, |
| "memory(GiB)": 43.7, |
| "step": 185, |
| "token_acc": 0.8976162563501368, |
| "train_speed(iter/s)": 0.008447 |
| }, |
| { |
| "epoch": 0.3297537693893047, |
| "grad_norm": 3.0154619216918945, |
| "learning_rate": 9.64506683661951e-05, |
| "loss": 0.29156625270843506, |
| "memory(GiB)": 43.7, |
| "step": 190, |
| "token_acc": 0.8932791728212703, |
| "train_speed(iter/s)": 0.00845 |
| }, |
| { |
| "epoch": 0.33843150016270745, |
| "grad_norm": 3.490983724594116, |
| "learning_rate": 9.61802280285276e-05, |
| "loss": 0.2764655590057373, |
| "memory(GiB)": 43.7, |
| "step": 195, |
| "token_acc": 0.895898926293695, |
| "train_speed(iter/s)": 0.008453 |
| }, |
| { |
| "epoch": 0.3471092309361102, |
| "grad_norm": 13.885275840759277, |
| "learning_rate": 9.59002673231672e-05, |
| "loss": 0.2801302671432495, |
| "memory(GiB)": 43.7, |
| "step": 200, |
| "token_acc": 0.9076711267908618, |
| "train_speed(iter/s)": 0.008455 |
| }, |
| { |
| "epoch": 0.3471092309361102, |
| "eval_loss": 0.28404700756073, |
| "eval_runtime": 220.1539, |
| "eval_samples_per_second": 4.229, |
| "eval_steps_per_second": 0.849, |
| "eval_token_acc": 0.9015362680449118, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.35578696170951296, |
| "grad_norm": 1.6657084226608276, |
| "learning_rate": 9.561084396591494e-05, |
| "loss": 0.2741654396057129, |
| "memory(GiB)": 43.7, |
| "step": 205, |
| "token_acc": 0.8974332690967239, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 0.36446469248291574, |
| "grad_norm": 3.5704126358032227, |
| "learning_rate": 9.53120176233619e-05, |
| "loss": 0.27562870979309084, |
| "memory(GiB)": 43.7, |
| "step": 210, |
| "token_acc": 0.8987169683065681, |
| "train_speed(iter/s)": 0.008384 |
| }, |
| { |
| "epoch": 0.37314242325631847, |
| "grad_norm": 1.3969208002090454, |
| "learning_rate": 9.500384990058841e-05, |
| "loss": 0.27314207553863523, |
| "memory(GiB)": 43.7, |
| "step": 215, |
| "token_acc": 0.8985029696525914, |
| "train_speed(iter/s)": 0.008388 |
| }, |
| { |
| "epoch": 0.38182015402972125, |
| "grad_norm": 2.852271795272827, |
| "learning_rate": 9.468640432846378e-05, |
| "loss": 0.2624432325363159, |
| "memory(GiB)": 43.7, |
| "step": 220, |
| "token_acc": 0.9034218361270852, |
| "train_speed(iter/s)": 0.008391 |
| }, |
| { |
| "epoch": 0.390497884803124, |
| "grad_norm": 2.3971970081329346, |
| "learning_rate": 9.4359746350549e-05, |
| "loss": 0.27120444774627683, |
| "memory(GiB)": 43.7, |
| "step": 225, |
| "token_acc": 0.9019728189390618, |
| "train_speed(iter/s)": 0.008395 |
| }, |
| { |
| "epoch": 0.39917561557652675, |
| "grad_norm": 2.9562697410583496, |
| "learning_rate": 9.402394330960506e-05, |
| "loss": 0.2618232727050781, |
| "memory(GiB)": 43.7, |
| "step": 230, |
| "token_acc": 0.9105880494451246, |
| "train_speed(iter/s)": 0.008397 |
| }, |
| { |
| "epoch": 0.4078533463499295, |
| "grad_norm": 2.3861279487609863, |
| "learning_rate": 9.367906443370984e-05, |
| "loss": 0.2571915864944458, |
| "memory(GiB)": 43.7, |
| "step": 235, |
| "token_acc": 0.9074556213017752, |
| "train_speed(iter/s)": 0.0084 |
| }, |
| { |
| "epoch": 0.41653107712333226, |
| "grad_norm": 2.1653852462768555, |
| "learning_rate": 9.332518082198624e-05, |
| "loss": 0.26276435852050783, |
| "memory(GiB)": 43.7, |
| "step": 240, |
| "token_acc": 0.9037630249806616, |
| "train_speed(iter/s)": 0.008403 |
| }, |
| { |
| "epoch": 0.425208807896735, |
| "grad_norm": 6.575355529785156, |
| "learning_rate": 9.296236542994463e-05, |
| "loss": 0.24440584182739258, |
| "memory(GiB)": 43.7, |
| "step": 245, |
| "token_acc": 0.9097762166002289, |
| "train_speed(iter/s)": 0.008406 |
| }, |
| { |
| "epoch": 0.43388653867013777, |
| "grad_norm": 6.715099811553955, |
| "learning_rate": 9.259069305444252e-05, |
| "loss": 0.2519469499588013, |
| "memory(GiB)": 43.7, |
| "step": 250, |
| "token_acc": 0.9191672993167379, |
| "train_speed(iter/s)": 0.008408 |
| }, |
| { |
| "epoch": 0.4425642694435405, |
| "grad_norm": 2.3563008308410645, |
| "learning_rate": 9.221024031826476e-05, |
| "loss": 0.2473703384399414, |
| "memory(GiB)": 43.7, |
| "step": 255, |
| "token_acc": 0.9128261606235174, |
| "train_speed(iter/s)": 0.008411 |
| }, |
| { |
| "epoch": 0.4512420002169433, |
| "grad_norm": 3.382643461227417, |
| "learning_rate": 9.18210856543272e-05, |
| "loss": 0.236089825630188, |
| "memory(GiB)": 43.7, |
| "step": 260, |
| "token_acc": 0.9163037833086972, |
| "train_speed(iter/s)": 0.008413 |
| }, |
| { |
| "epoch": 0.459919730990346, |
| "grad_norm": 2.673158884048462, |
| "learning_rate": 9.142330928950718e-05, |
| "loss": 0.24100546836853026, |
| "memory(GiB)": 43.7, |
| "step": 265, |
| "token_acc": 0.9135581499410161, |
| "train_speed(iter/s)": 0.008416 |
| }, |
| { |
| "epoch": 0.4685974617637488, |
| "grad_norm": 11.933094024658203, |
| "learning_rate": 9.101699322810424e-05, |
| "loss": 0.23190362453460694, |
| "memory(GiB)": 43.7, |
| "step": 270, |
| "token_acc": 0.9265802143189936, |
| "train_speed(iter/s)": 0.008418 |
| }, |
| { |
| "epoch": 0.47727519253715156, |
| "grad_norm": 22.01103973388672, |
| "learning_rate": 9.060222123493441e-05, |
| "loss": 0.22824497222900392, |
| "memory(GiB)": 43.7, |
| "step": 275, |
| "token_acc": 0.9234252124870919, |
| "train_speed(iter/s)": 0.00842 |
| }, |
| { |
| "epoch": 0.4859529233105543, |
| "grad_norm": 2.3091957569122314, |
| "learning_rate": 9.017907881806146e-05, |
| "loss": 0.22891411781311036, |
| "memory(GiB)": 43.7, |
| "step": 280, |
| "token_acc": 0.9208098411071246, |
| "train_speed(iter/s)": 0.008422 |
| }, |
| { |
| "epoch": 0.49463065408395707, |
| "grad_norm": 1.878693699836731, |
| "learning_rate": 8.974765321116886e-05, |
| "loss": 0.22780225276947022, |
| "memory(GiB)": 43.7, |
| "step": 285, |
| "token_acc": 0.92338361626043, |
| "train_speed(iter/s)": 0.008424 |
| }, |
| { |
| "epoch": 0.5033083848573598, |
| "grad_norm": 4.434957981109619, |
| "learning_rate": 8.930803335557602e-05, |
| "loss": 0.22615401744842528, |
| "memory(GiB)": 43.7, |
| "step": 290, |
| "token_acc": 0.9223257075684963, |
| "train_speed(iter/s)": 0.008426 |
| }, |
| { |
| "epoch": 0.5119861156307626, |
| "grad_norm": 3.4905004501342773, |
| "learning_rate": 8.886030988190232e-05, |
| "loss": 0.22355277538299562, |
| "memory(GiB)": 43.7, |
| "step": 295, |
| "token_acc": 0.9136541664790888, |
| "train_speed(iter/s)": 0.008428 |
| }, |
| { |
| "epoch": 0.5206638464041653, |
| "grad_norm": 2.17803692817688, |
| "learning_rate": 8.840457509138307e-05, |
| "loss": 0.2240373134613037, |
| "memory(GiB)": 43.7, |
| "step": 300, |
| "token_acc": 0.9161153744376819, |
| "train_speed(iter/s)": 0.00843 |
| }, |
| { |
| "epoch": 0.5206638464041653, |
| "eval_loss": 0.22916455566883087, |
| "eval_runtime": 220.0371, |
| "eval_samples_per_second": 4.231, |
| "eval_steps_per_second": 0.85, |
| "eval_token_acc": 0.9222598467296382, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.529341577177568, |
| "grad_norm": 2.641390800476074, |
| "learning_rate": 8.794092293684098e-05, |
| "loss": 0.22017295360565187, |
| "memory(GiB)": 43.7, |
| "step": 305, |
| "token_acc": 0.9203932638956488, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 0.5380193079509709, |
| "grad_norm": 2.450665235519409, |
| "learning_rate": 8.746944900331711e-05, |
| "loss": 0.2305682897567749, |
| "memory(GiB)": 43.7, |
| "step": 310, |
| "token_acc": 0.914595610214365, |
| "train_speed(iter/s)": 0.008382 |
| }, |
| { |
| "epoch": 0.5466970387243736, |
| "grad_norm": 1.7363907098770142, |
| "learning_rate": 8.699025048836541e-05, |
| "loss": 0.21577081680297852, |
| "memory(GiB)": 43.7, |
| "step": 315, |
| "token_acc": 0.9174962038130589, |
| "train_speed(iter/s)": 0.008384 |
| }, |
| { |
| "epoch": 0.5553747694977763, |
| "grad_norm": 4.199025630950928, |
| "learning_rate": 8.650342618201475e-05, |
| "loss": 0.22321650981903077, |
| "memory(GiB)": 43.7, |
| "step": 320, |
| "token_acc": 0.9186493336603712, |
| "train_speed(iter/s)": 0.008386 |
| }, |
| { |
| "epoch": 0.564052500271179, |
| "grad_norm": 3.0483787059783936, |
| "learning_rate": 8.60090764464027e-05, |
| "loss": 0.21764111518859863, |
| "memory(GiB)": 43.7, |
| "step": 325, |
| "token_acc": 0.9221584676398616, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 0.5727302310445819, |
| "grad_norm": 1.9495664834976196, |
| "learning_rate": 8.550730319508516e-05, |
| "loss": 0.21329159736633302, |
| "memory(GiB)": 43.7, |
| "step": 330, |
| "token_acc": 0.9290431767522497, |
| "train_speed(iter/s)": 0.008391 |
| }, |
| { |
| "epoch": 0.5814079618179846, |
| "grad_norm": 1.935138463973999, |
| "learning_rate": 8.49982098720263e-05, |
| "loss": 0.21635849475860597, |
| "memory(GiB)": 43.7, |
| "step": 335, |
| "token_acc": 0.9326674848702865, |
| "train_speed(iter/s)": 0.008393 |
| }, |
| { |
| "epoch": 0.5900856925913873, |
| "grad_norm": 2.897500991821289, |
| "learning_rate": 8.448190143027269e-05, |
| "loss": 0.2165597438812256, |
| "memory(GiB)": 43.7, |
| "step": 340, |
| "token_acc": 0.9246217699825947, |
| "train_speed(iter/s)": 0.008395 |
| }, |
| { |
| "epoch": 0.5987634233647902, |
| "grad_norm": 3.2862493991851807, |
| "learning_rate": 8.395848431031672e-05, |
| "loss": 0.2119969129562378, |
| "memory(GiB)": 43.7, |
| "step": 345, |
| "token_acc": 0.927697031729785, |
| "train_speed(iter/s)": 0.008397 |
| }, |
| { |
| "epoch": 0.6074411541381929, |
| "grad_norm": 2.485685110092163, |
| "learning_rate": 8.342806641815304e-05, |
| "loss": 0.19694406986236573, |
| "memory(GiB)": 43.7, |
| "step": 350, |
| "token_acc": 0.9306310071040534, |
| "train_speed(iter/s)": 0.008399 |
| }, |
| { |
| "epoch": 0.6161188849115956, |
| "grad_norm": 1.790946364402771, |
| "learning_rate": 8.289075710303305e-05, |
| "loss": 0.20862700939178466, |
| "memory(GiB)": 43.7, |
| "step": 355, |
| "token_acc": 0.9261981517300666, |
| "train_speed(iter/s)": 0.008401 |
| }, |
| { |
| "epoch": 0.6247966156849983, |
| "grad_norm": 9.109935760498047, |
| "learning_rate": 8.234666713492178e-05, |
| "loss": 0.2021815538406372, |
| "memory(GiB)": 43.7, |
| "step": 360, |
| "token_acc": 0.9324708811238552, |
| "train_speed(iter/s)": 0.008403 |
| }, |
| { |
| "epoch": 0.6334743464584012, |
| "grad_norm": 8.062849998474121, |
| "learning_rate": 8.179590868166196e-05, |
| "loss": 0.20523991584777831, |
| "memory(GiB)": 43.7, |
| "step": 365, |
| "token_acc": 0.9240613952655837, |
| "train_speed(iter/s)": 0.008404 |
| }, |
| { |
| "epoch": 0.6421520772318039, |
| "grad_norm": 2.0501291751861572, |
| "learning_rate": 8.123859528584985e-05, |
| "loss": 0.1977332353591919, |
| "memory(GiB)": 43.7, |
| "step": 370, |
| "token_acc": 0.9301781085375032, |
| "train_speed(iter/s)": 0.008406 |
| }, |
| { |
| "epoch": 0.6508298080052066, |
| "grad_norm": 6.636540412902832, |
| "learning_rate": 8.067484184142759e-05, |
| "loss": 0.19646908044815065, |
| "memory(GiB)": 43.7, |
| "step": 375, |
| "token_acc": 0.9374709076803723, |
| "train_speed(iter/s)": 0.008407 |
| }, |
| { |
| "epoch": 0.6595075387786093, |
| "grad_norm": 4.470943927764893, |
| "learning_rate": 8.010476456999712e-05, |
| "loss": 0.1992442488670349, |
| "memory(GiB)": 43.7, |
| "step": 380, |
| "token_acc": 0.9223243653537995, |
| "train_speed(iter/s)": 0.008408 |
| }, |
| { |
| "epoch": 0.6681852695520122, |
| "grad_norm": 3.5556273460388184, |
| "learning_rate": 7.952848099686025e-05, |
| "loss": 0.18782631158828736, |
| "memory(GiB)": 43.7, |
| "step": 385, |
| "token_acc": 0.9409830807473338, |
| "train_speed(iter/s)": 0.00841 |
| }, |
| { |
| "epoch": 0.6768630003254149, |
| "grad_norm": 3.291494846343994, |
| "learning_rate": 7.894610992679008e-05, |
| "loss": 0.18964465856552123, |
| "memory(GiB)": 43.7, |
| "step": 390, |
| "token_acc": 0.933142197268347, |
| "train_speed(iter/s)": 0.008411 |
| }, |
| { |
| "epoch": 0.6855407310988176, |
| "grad_norm": 2.276304006576538, |
| "learning_rate": 7.835777141953853e-05, |
| "loss": 0.1859665870666504, |
| "memory(GiB)": 43.7, |
| "step": 395, |
| "token_acc": 0.9327179666652085, |
| "train_speed(iter/s)": 0.008413 |
| }, |
| { |
| "epoch": 0.6942184618722204, |
| "grad_norm": 9.960855484008789, |
| "learning_rate": 7.776358676508522e-05, |
| "loss": 0.1905187964439392, |
| "memory(GiB)": 43.7, |
| "step": 400, |
| "token_acc": 0.93853976001627, |
| "train_speed(iter/s)": 0.008414 |
| }, |
| { |
| "epoch": 0.6942184618722204, |
| "eval_loss": 0.19546635448932648, |
| "eval_runtime": 220.2237, |
| "eval_samples_per_second": 4.228, |
| "eval_steps_per_second": 0.849, |
| "eval_token_acc": 0.933366601318838, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7028961926456232, |
| "grad_norm": 2.7447469234466553, |
| "learning_rate": 7.716367845863273e-05, |
| "loss": 0.17779254913330078, |
| "memory(GiB)": 43.7, |
| "step": 405, |
| "token_acc": 0.9323472308400552, |
| "train_speed(iter/s)": 0.008377 |
| }, |
| { |
| "epoch": 0.7115739234190259, |
| "grad_norm": 2.3675718307495117, |
| "learning_rate": 7.655817017535339e-05, |
| "loss": 0.19050852060317994, |
| "memory(GiB)": 43.7, |
| "step": 410, |
| "token_acc": 0.9310586393044051, |
| "train_speed(iter/s)": 0.008379 |
| }, |
| { |
| "epoch": 0.7202516541924286, |
| "grad_norm": 3.075871706008911, |
| "learning_rate": 7.594718674489281e-05, |
| "loss": 0.19913625717163086, |
| "memory(GiB)": 43.7, |
| "step": 415, |
| "token_acc": 0.9342206089937487, |
| "train_speed(iter/s)": 0.008381 |
| }, |
| { |
| "epoch": 0.7289293849658315, |
| "grad_norm": 1.9185936450958252, |
| "learning_rate": 7.533085412563534e-05, |
| "loss": 0.19407714605331422, |
| "memory(GiB)": 43.7, |
| "step": 420, |
| "token_acc": 0.9339040074954218, |
| "train_speed(iter/s)": 0.008382 |
| }, |
| { |
| "epoch": 0.7376071157392342, |
| "grad_norm": 3.576767683029175, |
| "learning_rate": 7.470929937873695e-05, |
| "loss": 0.19026898145675658, |
| "memory(GiB)": 43.7, |
| "step": 425, |
| "token_acc": 0.9307452283502857, |
| "train_speed(iter/s)": 0.008384 |
| }, |
| { |
| "epoch": 0.7462848465126369, |
| "grad_norm": 18.04636573791504, |
| "learning_rate": 7.408265064193071e-05, |
| "loss": 0.1796002984046936, |
| "memory(GiB)": 43.7, |
| "step": 430, |
| "token_acc": 0.9397331762131224, |
| "train_speed(iter/s)": 0.008386 |
| }, |
| { |
| "epoch": 0.7549625772860397, |
| "grad_norm": 2.2993645668029785, |
| "learning_rate": 7.345103710311031e-05, |
| "loss": 0.18258541822433472, |
| "memory(GiB)": 43.7, |
| "step": 435, |
| "token_acc": 0.9341671247938428, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 0.7636403080594425, |
| "grad_norm": 2.52563214302063, |
| "learning_rate": 7.281458897369707e-05, |
| "loss": 0.18408983945846558, |
| "memory(GiB)": 43.7, |
| "step": 440, |
| "token_acc": 0.9345260397152492, |
| "train_speed(iter/s)": 0.008388 |
| }, |
| { |
| "epoch": 0.7723180388328452, |
| "grad_norm": 2.247847318649292, |
| "learning_rate": 7.217343746179601e-05, |
| "loss": 0.1789139986038208, |
| "memory(GiB)": 43.7, |
| "step": 445, |
| "token_acc": 0.9334740027072219, |
| "train_speed(iter/s)": 0.00839 |
| }, |
| { |
| "epoch": 0.780995769606248, |
| "grad_norm": 2.011239767074585, |
| "learning_rate": 7.152771474514642e-05, |
| "loss": 0.183357834815979, |
| "memory(GiB)": 43.7, |
| "step": 450, |
| "token_acc": 0.9391916113998925, |
| "train_speed(iter/s)": 0.008392 |
| }, |
| { |
| "epoch": 0.7896735003796507, |
| "grad_norm": 1.3411664962768555, |
| "learning_rate": 7.087755394387251e-05, |
| "loss": 0.1795508623123169, |
| "memory(GiB)": 43.7, |
| "step": 455, |
| "token_acc": 0.9274950486523723, |
| "train_speed(iter/s)": 0.008394 |
| }, |
| { |
| "epoch": 0.7983512311530535, |
| "grad_norm": 5.725118637084961, |
| "learning_rate": 7.022308909303974e-05, |
| "loss": 0.18583301305770875, |
| "memory(GiB)": 43.7, |
| "step": 460, |
| "token_acc": 0.9388505371512602, |
| "train_speed(iter/s)": 0.008395 |
| }, |
| { |
| "epoch": 0.8070289619264562, |
| "grad_norm": 5.536004066467285, |
| "learning_rate": 6.956445511502264e-05, |
| "loss": 0.17879717350006102, |
| "memory(GiB)": 43.7, |
| "step": 465, |
| "token_acc": 0.9410176125244618, |
| "train_speed(iter/s)": 0.008397 |
| }, |
| { |
| "epoch": 0.815706692699859, |
| "grad_norm": 2.3825104236602783, |
| "learning_rate": 6.890178779168963e-05, |
| "loss": 0.17840908765792846, |
| "memory(GiB)": 43.7, |
| "step": 470, |
| "token_acc": 0.9386419190454489, |
| "train_speed(iter/s)": 0.008398 |
| }, |
| { |
| "epoch": 0.8243844234732618, |
| "grad_norm": 2.428227424621582, |
| "learning_rate": 6.823522373641066e-05, |
| "loss": 0.15655564069747924, |
| "memory(GiB)": 43.7, |
| "step": 475, |
| "token_acc": 0.9488992717935766, |
| "train_speed(iter/s)": 0.008399 |
| }, |
| { |
| "epoch": 0.8330621542466645, |
| "grad_norm": 3.9280638694763184, |
| "learning_rate": 6.756490036589346e-05, |
| "loss": 0.17563689947128297, |
| "memory(GiB)": 43.7, |
| "step": 480, |
| "token_acc": 0.9412151607333717, |
| "train_speed(iter/s)": 0.008401 |
| }, |
| { |
| "epoch": 0.8417398850200672, |
| "grad_norm": 3.4658777713775635, |
| "learning_rate": 6.68909558718541e-05, |
| "loss": 0.1809309244155884, |
| "memory(GiB)": 43.7, |
| "step": 485, |
| "token_acc": 0.9326161790017212, |
| "train_speed(iter/s)": 0.008402 |
| }, |
| { |
| "epoch": 0.85041761579347, |
| "grad_norm": 3.0663387775421143, |
| "learning_rate": 6.621352919252788e-05, |
| "loss": 0.16797908544540405, |
| "memory(GiB)": 43.7, |
| "step": 490, |
| "token_acc": 0.9391401202940521, |
| "train_speed(iter/s)": 0.008403 |
| }, |
| { |
| "epoch": 0.8590953465668728, |
| "grad_norm": 2.3330888748168945, |
| "learning_rate": 6.553275998402625e-05, |
| "loss": 0.16253708600997924, |
| "memory(GiB)": 43.7, |
| "step": 495, |
| "token_acc": 0.9451398355847482, |
| "train_speed(iter/s)": 0.008405 |
| }, |
| { |
| "epoch": 0.8677730773402755, |
| "grad_norm": 5.0757317543029785, |
| "learning_rate": 6.484878859154576e-05, |
| "loss": 0.16285682916641236, |
| "memory(GiB)": 43.7, |
| "step": 500, |
| "token_acc": 0.9440452863052471, |
| "train_speed(iter/s)": 0.008406 |
| }, |
| { |
| "epoch": 0.8677730773402755, |
| "eval_loss": 0.1776481717824936, |
| "eval_runtime": 220.271, |
| "eval_samples_per_second": 4.227, |
| "eval_steps_per_second": 0.849, |
| "eval_token_acc": 0.9389627517376582, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8764508081136783, |
| "grad_norm": 35.683311462402344, |
| "learning_rate": 6.416175602043489e-05, |
| "loss": 0.16943529844284058, |
| "memory(GiB)": 43.7, |
| "step": 505, |
| "token_acc": 0.9365593665522729, |
| "train_speed(iter/s)": 0.008377 |
| }, |
| { |
| "epoch": 0.885128538887081, |
| "grad_norm": 5.939138889312744, |
| "learning_rate": 6.347180390712497e-05, |
| "loss": 0.17387474775314332, |
| "memory(GiB)": 43.7, |
| "step": 510, |
| "token_acc": 0.9383922829581993, |
| "train_speed(iter/s)": 0.008378 |
| }, |
| { |
| "epoch": 0.8938062696604838, |
| "grad_norm": 2.826707601547241, |
| "learning_rate": 6.277907448993072e-05, |
| "loss": 0.16873899698257447, |
| "memory(GiB)": 43.7, |
| "step": 515, |
| "token_acc": 0.9365477021643599, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 0.9024840004338865, |
| "grad_norm": 1.4783248901367188, |
| "learning_rate": 6.208371057972694e-05, |
| "loss": 0.16192502975463868, |
| "memory(GiB)": 43.7, |
| "step": 520, |
| "token_acc": 0.9410790980672871, |
| "train_speed(iter/s)": 0.008381 |
| }, |
| { |
| "epoch": 0.9111617312072893, |
| "grad_norm": 4.642916202545166, |
| "learning_rate": 6.13858555305071e-05, |
| "loss": 0.15619339942932128, |
| "memory(GiB)": 43.7, |
| "step": 525, |
| "token_acc": 0.9391466731620184, |
| "train_speed(iter/s)": 0.008383 |
| }, |
| { |
| "epoch": 0.919839461980692, |
| "grad_norm": 3.962188243865967, |
| "learning_rate": 6.068565320982982e-05, |
| "loss": 0.1607579231262207, |
| "memory(GiB)": 43.7, |
| "step": 530, |
| "token_acc": 0.9430901476746749, |
| "train_speed(iter/s)": 0.008384 |
| }, |
| { |
| "epoch": 0.9285171927540948, |
| "grad_norm": 4.833482265472412, |
| "learning_rate": 5.998324796915973e-05, |
| "loss": 0.15943752527236937, |
| "memory(GiB)": 43.7, |
| "step": 535, |
| "token_acc": 0.9461845102505695, |
| "train_speed(iter/s)": 0.008386 |
| }, |
| { |
| "epoch": 0.9371949235274976, |
| "grad_norm": 3.3520522117614746, |
| "learning_rate": 5.9278784614108375e-05, |
| "loss": 0.1695178270339966, |
| "memory(GiB)": 43.7, |
| "step": 540, |
| "token_acc": 0.9380854742089816, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 0.9458726543009003, |
| "grad_norm": 5.150724411010742, |
| "learning_rate": 5.857240837458155e-05, |
| "loss": 0.16256927251815795, |
| "memory(GiB)": 43.7, |
| "step": 545, |
| "token_acc": 0.9492572786690434, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 0.9545503850743031, |
| "grad_norm": 2.933917284011841, |
| "learning_rate": 5.7864264874839144e-05, |
| "loss": 0.15667448043823243, |
| "memory(GiB)": 43.7, |
| "step": 550, |
| "token_acc": 0.947681539807524, |
| "train_speed(iter/s)": 0.00839 |
| }, |
| { |
| "epoch": 0.9632281158477058, |
| "grad_norm": 2.804276704788208, |
| "learning_rate": 5.715450010347384e-05, |
| "loss": 0.15544140338897705, |
| "memory(GiB)": 43.7, |
| "step": 555, |
| "token_acc": 0.9417873014476174, |
| "train_speed(iter/s)": 0.008391 |
| }, |
| { |
| "epoch": 0.9719058466211086, |
| "grad_norm": 3.734673500061035, |
| "learning_rate": 5.644326038331439e-05, |
| "loss": 0.1483505606651306, |
| "memory(GiB)": 43.7, |
| "step": 560, |
| "token_acc": 0.9512087329296858, |
| "train_speed(iter/s)": 0.008392 |
| }, |
| { |
| "epoch": 0.9805835773945113, |
| "grad_norm": 2.5557937622070312, |
| "learning_rate": 5.5730692341260294e-05, |
| "loss": 0.1612637758255005, |
| "memory(GiB)": 43.7, |
| "step": 565, |
| "token_acc": 0.9437612864390748, |
| "train_speed(iter/s)": 0.008394 |
| }, |
| { |
| "epoch": 0.9892613081679141, |
| "grad_norm": 4.100978851318359, |
| "learning_rate": 5.501694287805361e-05, |
| "loss": 0.16001300811767577, |
| "memory(GiB)": 43.7, |
| "step": 570, |
| "token_acc": 0.9466726417208156, |
| "train_speed(iter/s)": 0.008395 |
| }, |
| { |
| "epoch": 0.9979390389413169, |
| "grad_norm": 2.458726167678833, |
| "learning_rate": 5.430215913799441e-05, |
| "loss": 0.1615024447441101, |
| "memory(GiB)": 43.7, |
| "step": 575, |
| "token_acc": 0.9395579236564958, |
| "train_speed(iter/s)": 0.008396 |
| }, |
| { |
| "epoch": 1.0052066384640417, |
| "grad_norm": 2.017334461212158, |
| "learning_rate": 5.358648847860599e-05, |
| "loss": 0.1611289620399475, |
| "memory(GiB)": 43.7, |
| "step": 580, |
| "token_acc": 0.9456555752581621, |
| "train_speed(iter/s)": 0.008408 |
| }, |
| { |
| "epoch": 1.0138843692374444, |
| "grad_norm": 2.5282626152038574, |
| "learning_rate": 5.287007844025604e-05, |
| "loss": 0.15156197547912598, |
| "memory(GiB)": 43.7, |
| "step": 585, |
| "token_acc": 0.9409188629667967, |
| "train_speed(iter/s)": 0.008409 |
| }, |
| { |
| "epoch": 1.0225621000108471, |
| "grad_norm": 2.413511037826538, |
| "learning_rate": 5.215307671574027e-05, |
| "loss": 0.1491085410118103, |
| "memory(GiB)": 43.7, |
| "step": 590, |
| "token_acc": 0.948726757418302, |
| "train_speed(iter/s)": 0.00841 |
| }, |
| { |
| "epoch": 1.0312398307842499, |
| "grad_norm": 2.100595235824585, |
| "learning_rate": 5.1435631119834526e-05, |
| "loss": 0.1547134280204773, |
| "memory(GiB)": 43.7, |
| "step": 595, |
| "token_acc": 0.9431145431145431, |
| "train_speed(iter/s)": 0.008411 |
| }, |
| { |
| "epoch": 1.0399175615576526, |
| "grad_norm": 2.0920307636260986, |
| "learning_rate": 5.071788955882171e-05, |
| "loss": 0.15303828716278076, |
| "memory(GiB)": 43.7, |
| "step": 600, |
| "token_acc": 0.9547254329480462, |
| "train_speed(iter/s)": 0.008412 |
| }, |
| { |
| "epoch": 1.0399175615576526, |
| "eval_loss": 0.15999968349933624, |
| "eval_runtime": 220.7406, |
| "eval_samples_per_second": 4.218, |
| "eval_steps_per_second": 0.847, |
| "eval_token_acc": 0.9457921939048298, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0485952923310555, |
| "grad_norm": 7.501833438873291, |
| "learning_rate": 5e-05, |
| "loss": 0.14910356998443602, |
| "memory(GiB)": 43.7, |
| "step": 605, |
| "token_acc": 0.9452935468219311, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 1.0572730231044583, |
| "grad_norm": 3.8511178493499756, |
| "learning_rate": 4.92821104411783e-05, |
| "loss": 0.1561565399169922, |
| "memory(GiB)": 43.7, |
| "step": 610, |
| "token_acc": 0.9424419619479839, |
| "train_speed(iter/s)": 0.008388 |
| }, |
| { |
| "epoch": 1.065950753877861, |
| "grad_norm": 4.334895610809326, |
| "learning_rate": 4.856436888016549e-05, |
| "loss": 0.15020160675048827, |
| "memory(GiB)": 43.7, |
| "step": 615, |
| "token_acc": 0.9489557386472505, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 1.0746284846512637, |
| "grad_norm": 6.407334804534912, |
| "learning_rate": 4.784692328425974e-05, |
| "loss": 0.15203957557678222, |
| "memory(GiB)": 43.7, |
| "step": 620, |
| "token_acc": 0.9531370872521477, |
| "train_speed(iter/s)": 0.00839 |
| }, |
| { |
| "epoch": 1.0833062154246664, |
| "grad_norm": 3.85739803314209, |
| "learning_rate": 4.712992155974397e-05, |
| "loss": 0.14752843379974365, |
| "memory(GiB)": 43.7, |
| "step": 625, |
| "token_acc": 0.9495189355168885, |
| "train_speed(iter/s)": 0.008391 |
| }, |
| { |
| "epoch": 1.0919839461980692, |
| "grad_norm": 59.63698959350586, |
| "learning_rate": 4.6413511521394026e-05, |
| "loss": 0.14950259923934936, |
| "memory(GiB)": 43.7, |
| "step": 630, |
| "token_acc": 0.9457502223351544, |
| "train_speed(iter/s)": 0.008392 |
| }, |
| { |
| "epoch": 1.1006616769714719, |
| "grad_norm": 2.6143155097961426, |
| "learning_rate": 4.56978408620056e-05, |
| "loss": 0.14179157018661498, |
| "memory(GiB)": 43.7, |
| "step": 635, |
| "token_acc": 0.9504487580880818, |
| "train_speed(iter/s)": 0.008393 |
| }, |
| { |
| "epoch": 1.1093394077448746, |
| "grad_norm": 7.679717540740967, |
| "learning_rate": 4.4983057121946414e-05, |
| "loss": 0.13978817462921142, |
| "memory(GiB)": 43.7, |
| "step": 640, |
| "token_acc": 0.9578175752787288, |
| "train_speed(iter/s)": 0.008394 |
| }, |
| { |
| "epoch": 1.1180171385182776, |
| "grad_norm": 4.3338518142700195, |
| "learning_rate": 4.426930765873971e-05, |
| "loss": 0.12927252054214478, |
| "memory(GiB)": 43.7, |
| "step": 645, |
| "token_acc": 0.9480657805044308, |
| "train_speed(iter/s)": 0.008395 |
| }, |
| { |
| "epoch": 1.1266948692916803, |
| "grad_norm": 8.904815673828125, |
| "learning_rate": 4.355673961668561e-05, |
| "loss": 0.142861807346344, |
| "memory(GiB)": 43.7, |
| "step": 650, |
| "token_acc": 0.9463022237196765, |
| "train_speed(iter/s)": 0.008396 |
| }, |
| { |
| "epoch": 1.135372600065083, |
| "grad_norm": 4.161317348480225, |
| "learning_rate": 4.2845499896526165e-05, |
| "loss": 0.14242833852767944, |
| "memory(GiB)": 43.7, |
| "step": 655, |
| "token_acc": 0.9534545454545454, |
| "train_speed(iter/s)": 0.008397 |
| }, |
| { |
| "epoch": 1.1440503308384857, |
| "grad_norm": 4.19969367980957, |
| "learning_rate": 4.213573512516086e-05, |
| "loss": 0.145144522190094, |
| "memory(GiB)": 43.7, |
| "step": 660, |
| "token_acc": 0.9491097208854667, |
| "train_speed(iter/s)": 0.008398 |
| }, |
| { |
| "epoch": 1.1527280616118885, |
| "grad_norm": 14.433186531066895, |
| "learning_rate": 4.142759162541847e-05, |
| "loss": 0.14555753469467164, |
| "memory(GiB)": 43.7, |
| "step": 665, |
| "token_acc": 0.9485169091252598, |
| "train_speed(iter/s)": 0.008398 |
| }, |
| { |
| "epoch": 1.1614057923852912, |
| "grad_norm": 2.4086694717407227, |
| "learning_rate": 4.072121538589164e-05, |
| "loss": 0.1370793342590332, |
| "memory(GiB)": 43.7, |
| "step": 670, |
| "token_acc": 0.9519276329959078, |
| "train_speed(iter/s)": 0.008399 |
| }, |
| { |
| "epoch": 1.170083523158694, |
| "grad_norm": 1.9969909191131592, |
| "learning_rate": 4.001675203084029e-05, |
| "loss": 0.13346275091171264, |
| "memory(GiB)": 43.7, |
| "step": 675, |
| "token_acc": 0.956349370225067, |
| "train_speed(iter/s)": 0.0084 |
| }, |
| { |
| "epoch": 1.1787612539320969, |
| "grad_norm": 6.479780673980713, |
| "learning_rate": 3.931434679017019e-05, |
| "loss": 0.1528043270111084, |
| "memory(GiB)": 43.7, |
| "step": 680, |
| "token_acc": 0.9442832532414543, |
| "train_speed(iter/s)": 0.008401 |
| }, |
| { |
| "epoch": 1.1874389847054996, |
| "grad_norm": 5.8431315422058105, |
| "learning_rate": 3.8614144469492914e-05, |
| "loss": 0.1413252830505371, |
| "memory(GiB)": 43.7, |
| "step": 685, |
| "token_acc": 0.9446944213883186, |
| "train_speed(iter/s)": 0.008402 |
| }, |
| { |
| "epoch": 1.1961167154789023, |
| "grad_norm": 4.603435039520264, |
| "learning_rate": 3.791628942027307e-05, |
| "loss": 0.13914390802383422, |
| "memory(GiB)": 43.7, |
| "step": 690, |
| "token_acc": 0.9466184864287768, |
| "train_speed(iter/s)": 0.008403 |
| }, |
| { |
| "epoch": 1.204794446252305, |
| "grad_norm": 3.8140857219696045, |
| "learning_rate": 3.72209255100693e-05, |
| "loss": 0.1497533917427063, |
| "memory(GiB)": 43.7, |
| "step": 695, |
| "token_acc": 0.9487390588439206, |
| "train_speed(iter/s)": 0.008404 |
| }, |
| { |
| "epoch": 1.2134721770257078, |
| "grad_norm": 3.3604190349578857, |
| "learning_rate": 3.6528196092875044e-05, |
| "loss": 0.13937609195709227, |
| "memory(GiB)": 43.7, |
| "step": 700, |
| "token_acc": 0.9550221300689821, |
| "train_speed(iter/s)": 0.008404 |
| }, |
| { |
| "epoch": 1.2134721770257078, |
| "eval_loss": 0.14972732961177826, |
| "eval_runtime": 220.5567, |
| "eval_samples_per_second": 4.221, |
| "eval_steps_per_second": 0.848, |
| "eval_token_acc": 0.9490785956157548, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2221499077991105, |
| "grad_norm": 1.7973473072052002, |
| "learning_rate": 3.5838243979565126e-05, |
| "loss": 0.14264475107192992, |
| "memory(GiB)": 43.7, |
| "step": 705, |
| "token_acc": 0.9420059621871812, |
| "train_speed(iter/s)": 0.008383 |
| }, |
| { |
| "epoch": 1.2308276385725132, |
| "grad_norm": 3.916630744934082, |
| "learning_rate": 3.5151211408454276e-05, |
| "loss": 0.14307937622070313, |
| "memory(GiB)": 43.7, |
| "step": 710, |
| "token_acc": 0.9455980249526077, |
| "train_speed(iter/s)": 0.008384 |
| }, |
| { |
| "epoch": 1.239505369345916, |
| "grad_norm": 1.8483539819717407, |
| "learning_rate": 3.446724001597375e-05, |
| "loss": 0.13684509992599486, |
| "memory(GiB)": 43.7, |
| "step": 715, |
| "token_acc": 0.9552911412420445, |
| "train_speed(iter/s)": 0.008385 |
| }, |
| { |
| "epoch": 1.2481831001193189, |
| "grad_norm": 4.51676607131958, |
| "learning_rate": 3.378647080747213e-05, |
| "loss": 0.1370592713356018, |
| "memory(GiB)": 43.7, |
| "step": 720, |
| "token_acc": 0.9479323691179466, |
| "train_speed(iter/s)": 0.008386 |
| }, |
| { |
| "epoch": 1.2568608308927216, |
| "grad_norm": 5.6989922523498535, |
| "learning_rate": 3.31090441281459e-05, |
| "loss": 0.13757110834121705, |
| "memory(GiB)": 43.7, |
| "step": 725, |
| "token_acc": 0.9518228006061146, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 1.2655385616661243, |
| "grad_norm": 2.230487108230591, |
| "learning_rate": 3.2435099634106545e-05, |
| "loss": 0.14716867208480836, |
| "memory(GiB)": 43.7, |
| "step": 730, |
| "token_acc": 0.9534712267048945, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 1.274216292439527, |
| "grad_norm": 4.099515438079834, |
| "learning_rate": 3.176477626358935e-05, |
| "loss": 0.1300519347190857, |
| "memory(GiB)": 43.7, |
| "step": 735, |
| "token_acc": 0.9527396126469128, |
| "train_speed(iter/s)": 0.008388 |
| }, |
| { |
| "epoch": 1.2828940232129298, |
| "grad_norm": 7.339303016662598, |
| "learning_rate": 3.1098212208310385e-05, |
| "loss": 0.13597266674041747, |
| "memory(GiB)": 43.7, |
| "step": 740, |
| "token_acc": 0.9525093572664708, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 1.2915717539863325, |
| "grad_norm": 3.044003486633301, |
| "learning_rate": 3.0435544884977368e-05, |
| "loss": 0.13851481676101685, |
| "memory(GiB)": 43.7, |
| "step": 745, |
| "token_acc": 0.955963223921636, |
| "train_speed(iter/s)": 0.00839 |
| }, |
| { |
| "epoch": 1.3002494847597355, |
| "grad_norm": 2.625251054763794, |
| "learning_rate": 2.977691090696027e-05, |
| "loss": 0.13460161685943603, |
| "memory(GiB)": 43.7, |
| "step": 750, |
| "token_acc": 0.9536030797609977, |
| "train_speed(iter/s)": 0.00839 |
| }, |
| { |
| "epoch": 1.3089272155331382, |
| "grad_norm": 4.502344131469727, |
| "learning_rate": 2.912244605612749e-05, |
| "loss": 0.13218367099761963, |
| "memory(GiB)": 43.7, |
| "step": 755, |
| "token_acc": 0.9520431328036323, |
| "train_speed(iter/s)": 0.008391 |
| }, |
| { |
| "epoch": 1.317604946306541, |
| "grad_norm": 4.324583530426025, |
| "learning_rate": 2.8472285254853593e-05, |
| "loss": 0.13542770147323607, |
| "memory(GiB)": 43.7, |
| "step": 760, |
| "token_acc": 0.9550812224079551, |
| "train_speed(iter/s)": 0.008392 |
| }, |
| { |
| "epoch": 1.3262826770799436, |
| "grad_norm": 5.790456771850586, |
| "learning_rate": 2.7826562538204004e-05, |
| "loss": 0.1304815888404846, |
| "memory(GiB)": 43.7, |
| "step": 765, |
| "token_acc": 0.9475846709162122, |
| "train_speed(iter/s)": 0.008393 |
| }, |
| { |
| "epoch": 1.3349604078533464, |
| "grad_norm": 7.2669267654418945, |
| "learning_rate": 2.7185411026302964e-05, |
| "loss": 0.14250266551971436, |
| "memory(GiB)": 43.7, |
| "step": 770, |
| "token_acc": 0.9475920679886686, |
| "train_speed(iter/s)": 0.008394 |
| }, |
| { |
| "epoch": 1.343638138626749, |
| "grad_norm": 5.094613552093506, |
| "learning_rate": 2.654896289688972e-05, |
| "loss": 0.12874077558517455, |
| "memory(GiB)": 43.7, |
| "step": 775, |
| "token_acc": 0.9529416591449905, |
| "train_speed(iter/s)": 0.008394 |
| }, |
| { |
| "epoch": 1.3523158694001518, |
| "grad_norm": 6.989784240722656, |
| "learning_rate": 2.591734935806929e-05, |
| "loss": 0.12754837274551392, |
| "memory(GiB)": 43.7, |
| "step": 780, |
| "token_acc": 0.9427796380282287, |
| "train_speed(iter/s)": 0.008395 |
| }, |
| { |
| "epoch": 1.3609936001735545, |
| "grad_norm": 2.299795150756836, |
| "learning_rate": 2.5290700621263046e-05, |
| "loss": 0.12484978437423706, |
| "memory(GiB)": 43.7, |
| "step": 785, |
| "token_acc": 0.9552753758998168, |
| "train_speed(iter/s)": 0.008396 |
| }, |
| { |
| "epoch": 1.3696713309469573, |
| "grad_norm": 6.531172275543213, |
| "learning_rate": 2.4669145874364658e-05, |
| "loss": 0.12864662408828736, |
| "memory(GiB)": 43.7, |
| "step": 790, |
| "token_acc": 0.9534572241754562, |
| "train_speed(iter/s)": 0.008396 |
| }, |
| { |
| "epoch": 1.3783490617203602, |
| "grad_norm": 5.2566819190979, |
| "learning_rate": 2.4052813255107198e-05, |
| "loss": 0.12368242740631104, |
| "memory(GiB)": 43.7, |
| "step": 795, |
| "token_acc": 0.9542535446205171, |
| "train_speed(iter/s)": 0.008397 |
| }, |
| { |
| "epoch": 1.387026792493763, |
| "grad_norm": 2.911468029022217, |
| "learning_rate": 2.3441829824646604e-05, |
| "loss": 0.1311914801597595, |
| "memory(GiB)": 43.7, |
| "step": 800, |
| "token_acc": 0.9545532351934269, |
| "train_speed(iter/s)": 0.008398 |
| }, |
| { |
| "epoch": 1.387026792493763, |
| "eval_loss": 0.1414770781993866, |
| "eval_runtime": 220.3542, |
| "eval_samples_per_second": 4.225, |
| "eval_steps_per_second": 0.849, |
| "eval_token_acc": 0.9522295490999821, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.3957045232671657, |
| "grad_norm": 43.0417594909668, |
| "learning_rate": 2.2836321541367272e-05, |
| "loss": 0.1324560523033142, |
| "memory(GiB)": 43.7, |
| "step": 805, |
| "token_acc": 0.9497365543294563, |
| "train_speed(iter/s)": 0.008379 |
| }, |
| { |
| "epoch": 1.4043822540405684, |
| "grad_norm": 3.845632791519165, |
| "learning_rate": 2.2236413234914805e-05, |
| "loss": 0.12397520542144776, |
| "memory(GiB)": 43.7, |
| "step": 810, |
| "token_acc": 0.953968391052768, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 1.413059984813971, |
| "grad_norm": 32.551578521728516, |
| "learning_rate": 2.164222858046148e-05, |
| "loss": 0.12206425666809081, |
| "memory(GiB)": 43.7, |
| "step": 815, |
| "token_acc": 0.9575527717120638, |
| "train_speed(iter/s)": 0.008381 |
| }, |
| { |
| "epoch": 1.4217377155873738, |
| "grad_norm": 6.810664653778076, |
| "learning_rate": 2.105389007320992e-05, |
| "loss": 0.12367465496063232, |
| "memory(GiB)": 43.7, |
| "step": 820, |
| "token_acc": 0.9609237622601696, |
| "train_speed(iter/s)": 0.008382 |
| }, |
| { |
| "epoch": 1.4304154463607768, |
| "grad_norm": 6.067445755004883, |
| "learning_rate": 2.0471519003139762e-05, |
| "loss": 0.13576604127883912, |
| "memory(GiB)": 43.7, |
| "step": 825, |
| "token_acc": 0.9502264513472711, |
| "train_speed(iter/s)": 0.008383 |
| }, |
| { |
| "epoch": 1.4390931771341795, |
| "grad_norm": 2.4773712158203125, |
| "learning_rate": 1.9895235430002894e-05, |
| "loss": 0.1232601284980774, |
| "memory(GiB)": 43.7, |
| "step": 830, |
| "token_acc": 0.9555563932448733, |
| "train_speed(iter/s)": 0.008383 |
| }, |
| { |
| "epoch": 1.4477709079075822, |
| "grad_norm": 15.01673698425293, |
| "learning_rate": 1.9325158158572433e-05, |
| "loss": 0.12532531023025512, |
| "memory(GiB)": 43.7, |
| "step": 835, |
| "token_acc": 0.9559790297079138, |
| "train_speed(iter/s)": 0.008384 |
| }, |
| { |
| "epoch": 1.456448638680985, |
| "grad_norm": 4.682022571563721, |
| "learning_rate": 1.876140471415016e-05, |
| "loss": 0.13699095249176024, |
| "memory(GiB)": 43.7, |
| "step": 840, |
| "token_acc": 0.9564936637339575, |
| "train_speed(iter/s)": 0.008385 |
| }, |
| { |
| "epoch": 1.4651263694543877, |
| "grad_norm": 2.494595527648926, |
| "learning_rate": 1.820409131833804e-05, |
| "loss": 0.12996993064880372, |
| "memory(GiB)": 43.7, |
| "step": 845, |
| "token_acc": 0.9557536734356943, |
| "train_speed(iter/s)": 0.008386 |
| }, |
| { |
| "epoch": 1.4738041002277904, |
| "grad_norm": 5.67137336730957, |
| "learning_rate": 1.7653332865078242e-05, |
| "loss": 0.12316564321517945, |
| "memory(GiB)": 43.7, |
| "step": 850, |
| "token_acc": 0.9541896573848325, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 1.4824818310011931, |
| "grad_norm": 3.5610580444335938, |
| "learning_rate": 1.710924289696697e-05, |
| "loss": 0.12576040029525756, |
| "memory(GiB)": 43.7, |
| "step": 855, |
| "token_acc": 0.958788442172128, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 1.4911595617745959, |
| "grad_norm": 5.919505596160889, |
| "learning_rate": 1.6571933581846965e-05, |
| "loss": 0.13589413166046144, |
| "memory(GiB)": 43.7, |
| "step": 860, |
| "token_acc": 0.9557088249958451, |
| "train_speed(iter/s)": 0.008388 |
| }, |
| { |
| "epoch": 1.4998372925479986, |
| "grad_norm": 2.857247829437256, |
| "learning_rate": 1.604151568968328e-05, |
| "loss": 0.13363780975341796, |
| "memory(GiB)": 43.7, |
| "step": 865, |
| "token_acc": 0.9531686859273066, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 1.5085150233214013, |
| "grad_norm": 4.2472734451293945, |
| "learning_rate": 1.55180985697273e-05, |
| "loss": 0.11843851804733277, |
| "memory(GiB)": 43.7, |
| "step": 870, |
| "token_acc": 0.9585031493145609, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 1.5171927540948043, |
| "grad_norm": 2.154675006866455, |
| "learning_rate": 1.5001790127973719e-05, |
| "loss": 0.13474491834640503, |
| "memory(GiB)": 43.7, |
| "step": 875, |
| "token_acc": 0.9471295060080107, |
| "train_speed(iter/s)": 0.00839 |
| }, |
| { |
| "epoch": 1.525870484868207, |
| "grad_norm": 4.447220325469971, |
| "learning_rate": 1.449269680491484e-05, |
| "loss": 0.12090286016464233, |
| "memory(GiB)": 43.7, |
| "step": 880, |
| "token_acc": 0.9576200325661224, |
| "train_speed(iter/s)": 0.008391 |
| }, |
| { |
| "epoch": 1.5345482156416097, |
| "grad_norm": 5.527554035186768, |
| "learning_rate": 1.3990923553597312e-05, |
| "loss": 0.12562017440795897, |
| "memory(GiB)": 43.7, |
| "step": 885, |
| "token_acc": 0.9540816326530612, |
| "train_speed(iter/s)": 0.008391 |
| }, |
| { |
| "epoch": 1.5432259464150124, |
| "grad_norm": 5.432262420654297, |
| "learning_rate": 1.3496573817985264e-05, |
| "loss": 0.12086418867111207, |
| "memory(GiB)": 43.7, |
| "step": 890, |
| "token_acc": 0.9573405945087361, |
| "train_speed(iter/s)": 0.008392 |
| }, |
| { |
| "epoch": 1.5519036771884154, |
| "grad_norm": 1.7145942449569702, |
| "learning_rate": 1.30097495116346e-05, |
| "loss": 0.12666620016098024, |
| "memory(GiB)": 43.7, |
| "step": 895, |
| "token_acc": 0.9523390710725803, |
| "train_speed(iter/s)": 0.008393 |
| }, |
| { |
| "epoch": 1.560581407961818, |
| "grad_norm": 3.3511152267456055, |
| "learning_rate": 1.2530550996682905e-05, |
| "loss": 0.13257088661193847, |
| "memory(GiB)": 43.7, |
| "step": 900, |
| "token_acc": 0.9545784418356457, |
| "train_speed(iter/s)": 0.008393 |
| }, |
| { |
| "epoch": 1.560581407961818, |
| "eval_loss": 0.1344464272260666, |
| "eval_runtime": 220.4433, |
| "eval_samples_per_second": 4.223, |
| "eval_steps_per_second": 0.848, |
| "eval_token_acc": 0.9540331491712707, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.5692591387352208, |
| "grad_norm": 2.53428053855896, |
| "learning_rate": 1.2059077063159035e-05, |
| "loss": 0.13223330974578856, |
| "memory(GiB)": 43.7, |
| "step": 905, |
| "token_acc": 0.9484811124360815, |
| "train_speed(iter/s)": 0.008377 |
| }, |
| { |
| "epoch": 1.5779368695086236, |
| "grad_norm": 2.2530903816223145, |
| "learning_rate": 1.1595424908616931e-05, |
| "loss": 0.12704041004180908, |
| "memory(GiB)": 43.7, |
| "step": 910, |
| "token_acc": 0.9567981213053689, |
| "train_speed(iter/s)": 0.008377 |
| }, |
| { |
| "epoch": 1.5866146002820263, |
| "grad_norm": 3.6312787532806396, |
| "learning_rate": 1.113969011809769e-05, |
| "loss": 0.124385404586792, |
| "memory(GiB)": 43.7, |
| "step": 915, |
| "token_acc": 0.9561269586179189, |
| "train_speed(iter/s)": 0.008378 |
| }, |
| { |
| "epoch": 1.595292331055429, |
| "grad_norm": 2.5788967609405518, |
| "learning_rate": 1.0691966644423985e-05, |
| "loss": 0.1238779067993164, |
| "memory(GiB)": 43.7, |
| "step": 920, |
| "token_acc": 0.9614115681770724, |
| "train_speed(iter/s)": 0.008379 |
| }, |
| { |
| "epoch": 1.6039700618288317, |
| "grad_norm": 3.791745185852051, |
| "learning_rate": 1.0252346788831136e-05, |
| "loss": 0.12241133451461791, |
| "memory(GiB)": 43.7, |
| "step": 925, |
| "token_acc": 0.9497440670079107, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 1.6126477926022345, |
| "grad_norm": 2.4049911499023438, |
| "learning_rate": 9.820921181938547e-06, |
| "loss": 0.12140171527862549, |
| "memory(GiB)": 43.7, |
| "step": 930, |
| "token_acc": 0.9623712869189373, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 1.6213255233756372, |
| "grad_norm": 3.9300315380096436, |
| "learning_rate": 9.39777876506559e-06, |
| "loss": 0.1210600733757019, |
| "memory(GiB)": 43.7, |
| "step": 935, |
| "token_acc": 0.9548552328862265, |
| "train_speed(iter/s)": 0.008381 |
| }, |
| { |
| "epoch": 1.63000325414904, |
| "grad_norm": 3.4940907955169678, |
| "learning_rate": 8.983006771895763e-06, |
| "loss": 0.12003093957901001, |
| "memory(GiB)": 43.7, |
| "step": 940, |
| "token_acc": 0.9573810994441013, |
| "train_speed(iter/s)": 0.008382 |
| }, |
| { |
| "epoch": 1.6386809849224426, |
| "grad_norm": 3.156331777572632, |
| "learning_rate": 8.57669071049283e-06, |
| "loss": 0.13336446285247802, |
| "memory(GiB)": 43.7, |
| "step": 945, |
| "token_acc": 0.9616925789389326, |
| "train_speed(iter/s)": 0.008382 |
| }, |
| { |
| "epoch": 1.6473587156958456, |
| "grad_norm": 2.2776732444763184, |
| "learning_rate": 8.1789143456728e-06, |
| "loss": 0.12087045907974243, |
| "memory(GiB)": 43.7, |
| "step": 950, |
| "token_acc": 0.9592353951890035, |
| "train_speed(iter/s)": 0.008383 |
| }, |
| { |
| "epoch": 1.6560364464692483, |
| "grad_norm": 4.188504695892334, |
| "learning_rate": 7.789759681735242e-06, |
| "loss": 0.12968361377716064, |
| "memory(GiB)": 43.7, |
| "step": 955, |
| "token_acc": 0.9590592711080368, |
| "train_speed(iter/s)": 0.008383 |
| }, |
| { |
| "epoch": 1.664714177242651, |
| "grad_norm": 2.595299243927002, |
| "learning_rate": 7.409306945557487e-06, |
| "loss": 0.12285364866256714, |
| "memory(GiB)": 43.7, |
| "step": 960, |
| "token_acc": 0.9505176420874709, |
| "train_speed(iter/s)": 0.008384 |
| }, |
| { |
| "epoch": 1.6733919080160538, |
| "grad_norm": 5.3159050941467285, |
| "learning_rate": 7.03763457005539e-06, |
| "loss": 0.117578125, |
| "memory(GiB)": 43.7, |
| "step": 965, |
| "token_acc": 0.9578746014877789, |
| "train_speed(iter/s)": 0.008385 |
| }, |
| { |
| "epoch": 1.6820696387894567, |
| "grad_norm": 6.753002643585205, |
| "learning_rate": 6.674819178013769e-06, |
| "loss": 0.12883116006851197, |
| "memory(GiB)": 43.7, |
| "step": 970, |
| "token_acc": 0.9502868869623488, |
| "train_speed(iter/s)": 0.008385 |
| }, |
| { |
| "epoch": 1.6907473695628594, |
| "grad_norm": 2.9135501384735107, |
| "learning_rate": 6.32093556629017e-06, |
| "loss": 0.1296370506286621, |
| "memory(GiB)": 43.7, |
| "step": 975, |
| "token_acc": 0.9569416670212313, |
| "train_speed(iter/s)": 0.008386 |
| }, |
| { |
| "epoch": 1.6994251003362622, |
| "grad_norm": 8.138466835021973, |
| "learning_rate": 5.97605669039496e-06, |
| "loss": 0.12514114379882812, |
| "memory(GiB)": 43.7, |
| "step": 980, |
| "token_acc": 0.959045205591433, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 1.7081028311096649, |
| "grad_norm": 6.167930603027344, |
| "learning_rate": 5.640253649451016e-06, |
| "loss": 0.12624462842941284, |
| "memory(GiB)": 43.7, |
| "step": 985, |
| "token_acc": 0.9558782574099861, |
| "train_speed(iter/s)": 0.008387 |
| }, |
| { |
| "epoch": 1.7167805618830676, |
| "grad_norm": 3.414597749710083, |
| "learning_rate": 5.3135956715362205e-06, |
| "loss": 0.11242947578430176, |
| "memory(GiB)": 43.7, |
| "step": 990, |
| "token_acc": 0.9574127310061602, |
| "train_speed(iter/s)": 0.008388 |
| }, |
| { |
| "epoch": 1.7254582926564703, |
| "grad_norm": 5.249174118041992, |
| "learning_rate": 4.996150099411595e-06, |
| "loss": 0.1168135643005371, |
| "memory(GiB)": 43.7, |
| "step": 995, |
| "token_acc": 0.9566812923813323, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 1.734136023429873, |
| "grad_norm": 16.526689529418945, |
| "learning_rate": 4.687982376638101e-06, |
| "loss": 0.12106761932373047, |
| "memory(GiB)": 43.7, |
| "step": 1000, |
| "token_acc": 0.950882838633188, |
| "train_speed(iter/s)": 0.008389 |
| }, |
| { |
| "epoch": 1.734136023429873, |
| "eval_loss": 0.13192416727542877, |
| "eval_runtime": 220.4784, |
| "eval_samples_per_second": 4.223, |
| "eval_steps_per_second": 0.848, |
| "eval_token_acc": 0.9551238638388879, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.7428137542032758, |
| "grad_norm": 8.070154190063477, |
| "learning_rate": 4.389156034085062e-06, |
| "loss": 0.11758378744125367, |
| "memory(GiB)": 43.7, |
| "step": 1005, |
| "token_acc": 0.9530685920577617, |
| "train_speed(iter/s)": 0.008374 |
| }, |
| { |
| "epoch": 1.7514914849766785, |
| "grad_norm": 4.704627513885498, |
| "learning_rate": 4.099732676832818e-06, |
| "loss": 0.11970211267471313, |
| "memory(GiB)": 43.7, |
| "step": 1010, |
| "token_acc": 0.9605290871561565, |
| "train_speed(iter/s)": 0.008375 |
| }, |
| { |
| "epoch": 1.7601692157500812, |
| "grad_norm": 2.2863786220550537, |
| "learning_rate": 3.8197719714724075e-06, |
| "loss": 0.13179272413253784, |
| "memory(GiB)": 43.7, |
| "step": 1015, |
| "token_acc": 0.9519187762769479, |
| "train_speed(iter/s)": 0.008376 |
| }, |
| { |
| "epoch": 1.768846946523484, |
| "grad_norm": 2.264657735824585, |
| "learning_rate": 3.5493316338049086e-06, |
| "loss": 0.11556450128555298, |
| "memory(GiB)": 43.7, |
| "step": 1020, |
| "token_acc": 0.9577792123950936, |
| "train_speed(iter/s)": 0.008376 |
| }, |
| { |
| "epoch": 1.777524677296887, |
| "grad_norm": 2.526029586791992, |
| "learning_rate": 3.2884674169429195e-06, |
| "loss": 0.12338602542877197, |
| "memory(GiB)": 43.7, |
| "step": 1025, |
| "token_acc": 0.9530650918866511, |
| "train_speed(iter/s)": 0.008377 |
| }, |
| { |
| "epoch": 1.7862024080702896, |
| "grad_norm": 2.9345407485961914, |
| "learning_rate": 3.037233099816705e-06, |
| "loss": 0.11988996267318726, |
| "memory(GiB)": 43.7, |
| "step": 1030, |
| "token_acc": 0.9589889775750665, |
| "train_speed(iter/s)": 0.008377 |
| }, |
| { |
| "epoch": 1.7948801388436924, |
| "grad_norm": 26.361116409301758, |
| "learning_rate": 2.7956804760872923e-06, |
| "loss": 0.1184123396873474, |
| "memory(GiB)": 43.7, |
| "step": 1035, |
| "token_acc": 0.9544123107388919, |
| "train_speed(iter/s)": 0.008376 |
| }, |
| { |
| "epoch": 1.803557869617095, |
| "grad_norm": 1.9186588525772095, |
| "learning_rate": 2.563859343468822e-06, |
| "loss": 0.11901482343673705, |
| "memory(GiB)": 43.7, |
| "step": 1040, |
| "token_acc": 0.957713331308837, |
| "train_speed(iter/s)": 0.008376 |
| }, |
| { |
| "epoch": 1.812235600390498, |
| "grad_norm": 2.4511313438415527, |
| "learning_rate": 2.3418174934624614e-06, |
| "loss": 0.11996428966522217, |
| "memory(GiB)": 43.7, |
| "step": 1045, |
| "token_acc": 0.9580075296843324, |
| "train_speed(iter/s)": 0.008376 |
| }, |
| { |
| "epoch": 1.8209133311639008, |
| "grad_norm": 4.2686381340026855, |
| "learning_rate": 2.1296007015038366e-06, |
| "loss": 0.12237482070922852, |
| "memory(GiB)": 43.7, |
| "step": 1050, |
| "token_acc": 0.9547814096844838, |
| "train_speed(iter/s)": 0.008376 |
| }, |
| { |
| "epoch": 1.8295910619373035, |
| "grad_norm": 2.8372726440429688, |
| "learning_rate": 1.927252717526118e-06, |
| "loss": 0.11317713260650634, |
| "memory(GiB)": 43.7, |
| "step": 1055, |
| "token_acc": 0.95649260918722, |
| "train_speed(iter/s)": 0.008377 |
| }, |
| { |
| "epoch": 1.8382687927107062, |
| "grad_norm": 2.8570916652679443, |
| "learning_rate": 1.734815256940675e-06, |
| "loss": 0.12497738599777222, |
| "memory(GiB)": 43.7, |
| "step": 1060, |
| "token_acc": 0.9487427466150871, |
| "train_speed(iter/s)": 0.008377 |
| }, |
| { |
| "epoch": 1.846946523484109, |
| "grad_norm": 3.211838722229004, |
| "learning_rate": 1.552327992037167e-06, |
| "loss": 0.1036531686782837, |
| "memory(GiB)": 43.7, |
| "step": 1065, |
| "token_acc": 0.9615865210205974, |
| "train_speed(iter/s)": 0.008378 |
| }, |
| { |
| "epoch": 1.8556242542575117, |
| "grad_norm": 3.4643843173980713, |
| "learning_rate": 1.379828543804812e-06, |
| "loss": 0.12307696342468262, |
| "memory(GiB)": 43.7, |
| "step": 1070, |
| "token_acc": 0.9607265161038623, |
| "train_speed(iter/s)": 0.008378 |
| }, |
| { |
| "epoch": 1.8643019850309144, |
| "grad_norm": 1.6095548868179321, |
| "learning_rate": 1.2173524741765917e-06, |
| "loss": 0.12708972692489623, |
| "memory(GiB)": 43.7, |
| "step": 1075, |
| "token_acc": 0.952561829999156, |
| "train_speed(iter/s)": 0.008379 |
| }, |
| { |
| "epoch": 1.872979715804317, |
| "grad_norm": 4.927311897277832, |
| "learning_rate": 1.064933278697905e-06, |
| "loss": 0.12298980951309205, |
| "memory(GiB)": 43.7, |
| "step": 1080, |
| "token_acc": 0.9553622478150781, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 1.8816574465777198, |
| "grad_norm": 9.174079895019531, |
| "learning_rate": 9.22602379621218e-07, |
| "loss": 0.11775370836257934, |
| "memory(GiB)": 43.7, |
| "step": 1085, |
| "token_acc": 0.9566113042358697, |
| "train_speed(iter/s)": 0.00838 |
| }, |
| { |
| "epoch": 1.8903351773511226, |
| "grad_norm": 2.2587075233459473, |
| "learning_rate": 7.903891194281754e-07, |
| "loss": 0.12096530199050903, |
| "memory(GiB)": 43.7, |
| "step": 1090, |
| "token_acc": 0.9606564187785626, |
| "train_speed(iter/s)": 0.008381 |
| }, |
| { |
| "epoch": 1.8990129081245253, |
| "grad_norm": 2.2280704975128174, |
| "learning_rate": 6.683207547804382e-07, |
| "loss": 0.1250510573387146, |
| "memory(GiB)": 43.7, |
| "step": 1095, |
| "token_acc": 0.9589882943143813, |
| "train_speed(iter/s)": 0.008382 |
| }, |
| { |
| "epoch": 1.9076906388979282, |
| "grad_norm": 1.6023691892623901, |
| "learning_rate": 5.564224509005566e-07, |
| "loss": 0.1259409546852112, |
| "memory(GiB)": 43.7, |
| "step": 1100, |
| "token_acc": 0.9541287997492949, |
| "train_speed(iter/s)": 0.008382 |
| }, |
| { |
| "epoch": 1.9076906388979282, |
| "eval_loss": 0.13046810030937195, |
| "eval_runtime": 220.3988, |
| "eval_samples_per_second": 4.224, |
| "eval_steps_per_second": 0.848, |
| "eval_token_acc": 0.9554517911245767, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.916368369671331, |
| "grad_norm": 4.342984676361084, |
| "learning_rate": 4.5471727638394246e-07, |
| "loss": 0.11758095026016235, |
| "memory(GiB)": 43.7, |
| "step": 1105, |
| "token_acc": 0.9547268970774075, |
| "train_speed(iter/s)": 0.008369 |
| }, |
| { |
| "epoch": 1.9250461004447337, |
| "grad_norm": 1.968943476676941, |
| "learning_rate": 3.6322619844317286e-07, |
| "loss": 0.11470128297805786, |
| "memory(GiB)": 43.7, |
| "step": 1110, |
| "token_acc": 0.9598653765250316, |
| "train_speed(iter/s)": 0.008369 |
| }, |
| { |
| "epoch": 1.9337238312181364, |
| "grad_norm": 8.885481834411621, |
| "learning_rate": 2.8196807858543174e-07, |
| "loss": 0.12146815061569213, |
| "memory(GiB)": 43.7, |
| "step": 1115, |
| "token_acc": 0.957883686752047, |
| "train_speed(iter/s)": 0.00837 |
| }, |
| { |
| "epoch": 1.9424015619915393, |
| "grad_norm": 3.2066810131073, |
| "learning_rate": 2.1095966872407557e-07, |
| "loss": 0.12168284654617309, |
| "memory(GiB)": 43.7, |
| "step": 1120, |
| "token_acc": 0.9506006374111302, |
| "train_speed(iter/s)": 0.008371 |
| }, |
| { |
| "epoch": 1.951079292764942, |
| "grad_norm": 4.034703254699707, |
| "learning_rate": 1.5021560772514597e-07, |
| "loss": 0.12114499807357788, |
| "memory(GiB)": 43.7, |
| "step": 1125, |
| "token_acc": 0.9597590361445784, |
| "train_speed(iter/s)": 0.008371 |
| }, |
| { |
| "epoch": 1.9597570235383448, |
| "grad_norm": 4.031400203704834, |
| "learning_rate": 9.974841838941151e-08, |
| "loss": 0.124148690700531, |
| "memory(GiB)": 43.7, |
| "step": 1130, |
| "token_acc": 0.9580030654696737, |
| "train_speed(iter/s)": 0.008372 |
| }, |
| { |
| "epoch": 1.9684347543117475, |
| "grad_norm": 1.9329931735992432, |
| "learning_rate": 5.9568504870771704e-08, |
| "loss": 0.13172950744628906, |
| "memory(GiB)": 43.7, |
| "step": 1135, |
| "token_acc": 0.9577101598762249, |
| "train_speed(iter/s)": 0.008373 |
| }, |
| { |
| "epoch": 1.9771124850851503, |
| "grad_norm": 5.185240268707275, |
| "learning_rate": 2.9684150531317233e-08, |
| "loss": 0.1293073296546936, |
| "memory(GiB)": 43.7, |
| "step": 1140, |
| "token_acc": 0.9555131747946911, |
| "train_speed(iter/s)": 0.008373 |
| }, |
| { |
| "epoch": 1.985790215858553, |
| "grad_norm": 1.6715742349624634, |
| "learning_rate": 1.0101516233695928e-08, |
| "loss": 0.11854711771011353, |
| "memory(GiB)": 43.7, |
| "step": 1145, |
| "token_acc": 0.9571147482336105, |
| "train_speed(iter/s)": 0.008374 |
| }, |
| { |
| "epoch": 1.9944679466319557, |
| "grad_norm": 2.0126240253448486, |
| "learning_rate": 8.246390709787389e-10, |
| "loss": 0.12129169702529907, |
| "memory(GiB)": 43.7, |
| "step": 1150, |
| "token_acc": 0.9570041028217365, |
| "train_speed(iter/s)": 0.008375 |
| }, |
| { |
| "epoch": 1.997939038941317, |
| "eval_loss": 0.13022483885288239, |
| "eval_runtime": 220.4004, |
| "eval_samples_per_second": 4.224, |
| "eval_steps_per_second": 0.848, |
| "eval_token_acc": 0.9554517911245767, |
| "step": 1152 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1152, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2250994394408157e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|