{ "best_global_step": 18000, "best_metric": 3.9484923051611727, "best_model_checkpoint": "outputs/bert-tiny-stage2-sbert/checkpoints/checkpoint-18000", "epoch": 5.0, "eval_steps": 2000, "global_step": 21140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011825922421948912, "grad_norm": 37.789451599121094, "learning_rate": 2.3173327027666118e-07, "loss": 18.0314, "step": 50 }, { "epoch": 0.023651844843897825, "grad_norm": 43.297508239746094, "learning_rate": 4.6819579096713174e-07, "loss": 17.7147, "step": 100 }, { "epoch": 0.035477767265846734, "grad_norm": 36.56124496459961, "learning_rate": 7.046583116576024e-07, "loss": 17.0007, "step": 150 }, { "epoch": 0.04730368968779565, "grad_norm": 34.428916931152344, "learning_rate": 9.411208323480729e-07, "loss": 16.0667, "step": 200 }, { "epoch": 0.05912961210974456, "grad_norm": 33.16645812988281, "learning_rate": 1.1775833530385434e-06, "loss": 14.9131, "step": 250 }, { "epoch": 0.07095553453169347, "grad_norm": 29.064250946044922, "learning_rate": 1.4140458737290142e-06, "loss": 13.9449, "step": 300 }, { "epoch": 0.08278145695364239, "grad_norm": 31.42921257019043, "learning_rate": 1.6505083944194847e-06, "loss": 12.7957, "step": 350 }, { "epoch": 0.0946073793755913, "grad_norm": 33.341365814208984, "learning_rate": 1.8869709151099552e-06, "loss": 11.7288, "step": 400 }, { "epoch": 0.10643330179754021, "grad_norm": 36.34325408935547, "learning_rate": 2.123433435800426e-06, "loss": 10.6945, "step": 450 }, { "epoch": 0.11825922421948912, "grad_norm": 39.06941604614258, "learning_rate": 2.3598959564908965e-06, "loss": 9.4743, "step": 500 }, { "epoch": 0.13008514664143803, "grad_norm": 32.969947814941406, "learning_rate": 2.596358477181367e-06, "loss": 8.6215, "step": 550 }, { "epoch": 0.14191106906338694, "grad_norm": 33.9212760925293, "learning_rate": 2.8328209978718375e-06, "loss": 7.7279, "step": 600 }, { "epoch": 0.15373699148533584, "grad_norm": 32.65876007080078, "learning_rate": 3.069283518562308e-06, "loss": 7.1892, "step": 650 }, { "epoch": 0.16556291390728478, "grad_norm": 29.210859298706055, "learning_rate": 3.3057460392527786e-06, "loss": 6.9682, "step": 700 }, { "epoch": 0.1773888363292337, "grad_norm": 29.231189727783203, "learning_rate": 3.5422085599432495e-06, "loss": 6.4781, "step": 750 }, { "epoch": 0.1892147587511826, "grad_norm": 28.949066162109375, "learning_rate": 3.77867108063372e-06, "loss": 6.1271, "step": 800 }, { "epoch": 0.2010406811731315, "grad_norm": 29.826133728027344, "learning_rate": 4.01513360132419e-06, "loss": 6.1199, "step": 850 }, { "epoch": 0.21286660359508042, "grad_norm": 27.585041046142578, "learning_rate": 4.2515961220146615e-06, "loss": 5.9544, "step": 900 }, { "epoch": 0.22469252601702933, "grad_norm": 28.10279655456543, "learning_rate": 4.488058642705131e-06, "loss": 5.8145, "step": 950 }, { "epoch": 0.23651844843897823, "grad_norm": 26.567943572998047, "learning_rate": 4.7245211633956025e-06, "loss": 5.5599, "step": 1000 }, { "epoch": 0.24834437086092714, "grad_norm": 24.42616081237793, "learning_rate": 4.960983684086072e-06, "loss": 5.2344, "step": 1050 }, { "epoch": 0.26017029328287605, "grad_norm": 25.857810974121094, "learning_rate": 5.197446204776543e-06, "loss": 5.3013, "step": 1100 }, { "epoch": 0.27199621570482496, "grad_norm": 26.047733306884766, "learning_rate": 5.433908725467014e-06, "loss": 5.0562, "step": 1150 }, { "epoch": 0.28382213812677387, "grad_norm": 26.875659942626953, "learning_rate": 5.670371246157485e-06, "loss": 4.8728, "step": 1200 }, { "epoch": 0.2956480605487228, "grad_norm": 21.9539737701416, "learning_rate": 5.906833766847954e-06, "loss": 4.7826, "step": 1250 }, { "epoch": 0.3074739829706717, "grad_norm": 23.06488609313965, "learning_rate": 6.143296287538426e-06, "loss": 4.8806, "step": 1300 }, { "epoch": 0.3192999053926206, "grad_norm": 24.24974250793457, "learning_rate": 6.379758808228896e-06, "loss": 4.6464, "step": 1350 }, { "epoch": 0.33112582781456956, "grad_norm": 22.658571243286133, "learning_rate": 6.616221328919367e-06, "loss": 4.7046, "step": 1400 }, { "epoch": 0.34295175023651847, "grad_norm": 21.927656173706055, "learning_rate": 6.852683849609837e-06, "loss": 4.5188, "step": 1450 }, { "epoch": 0.3547776726584674, "grad_norm": 24.39653778076172, "learning_rate": 7.089146370300309e-06, "loss": 4.4968, "step": 1500 }, { "epoch": 0.3666035950804163, "grad_norm": 23.591333389282227, "learning_rate": 7.325608890990778e-06, "loss": 4.4387, "step": 1550 }, { "epoch": 0.3784295175023652, "grad_norm": 24.572961807250977, "learning_rate": 7.562071411681249e-06, "loss": 4.1702, "step": 1600 }, { "epoch": 0.3902554399243141, "grad_norm": 22.61821174621582, "learning_rate": 7.79853393237172e-06, "loss": 4.2147, "step": 1650 }, { "epoch": 0.402081362346263, "grad_norm": 22.490327835083008, "learning_rate": 8.03499645306219e-06, "loss": 3.9972, "step": 1700 }, { "epoch": 0.4139072847682119, "grad_norm": 23.695873260498047, "learning_rate": 8.271458973752661e-06, "loss": 4.1279, "step": 1750 }, { "epoch": 0.42573320719016083, "grad_norm": 24.085838317871094, "learning_rate": 8.507921494443131e-06, "loss": 4.0214, "step": 1800 }, { "epoch": 0.43755912961210974, "grad_norm": 20.78253173828125, "learning_rate": 8.744384015133602e-06, "loss": 3.9161, "step": 1850 }, { "epoch": 0.44938505203405865, "grad_norm": 19.800090789794922, "learning_rate": 8.980846535824072e-06, "loss": 3.7544, "step": 1900 }, { "epoch": 0.46121097445600756, "grad_norm": 22.900514602661133, "learning_rate": 9.217309056514543e-06, "loss": 3.8246, "step": 1950 }, { "epoch": 0.47303689687795647, "grad_norm": 22.419363021850586, "learning_rate": 9.453771577205015e-06, "loss": 3.7991, "step": 2000 }, { "epoch": 0.47303689687795647, "eval_runtime": 46.7005, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 5.98806651504585, "step": 2000 }, { "epoch": 0.4848628192999054, "grad_norm": 22.308876037597656, "learning_rate": 9.690234097895484e-06, "loss": 3.8554, "step": 2050 }, { "epoch": 0.4966887417218543, "grad_norm": 23.8614501953125, "learning_rate": 9.926696618585954e-06, "loss": 3.8123, "step": 2100 }, { "epoch": 0.5085146641438032, "grad_norm": 21.00491714477539, "learning_rate": 1.0163159139276425e-05, "loss": 3.5525, "step": 2150 }, { "epoch": 0.5203405865657521, "grad_norm": 25.555097579956055, "learning_rate": 1.0399621659966897e-05, "loss": 3.5591, "step": 2200 }, { "epoch": 0.532166508987701, "grad_norm": 25.4840087890625, "learning_rate": 1.0636084180657367e-05, "loss": 3.6293, "step": 2250 }, { "epoch": 0.5439924314096499, "grad_norm": 21.117971420288086, "learning_rate": 1.0872546701347836e-05, "loss": 3.5831, "step": 2300 }, { "epoch": 0.5558183538315988, "grad_norm": 23.38995361328125, "learning_rate": 1.1109009222038308e-05, "loss": 3.6007, "step": 2350 }, { "epoch": 0.5676442762535477, "grad_norm": 22.385738372802734, "learning_rate": 1.1345471742728777e-05, "loss": 3.4225, "step": 2400 }, { "epoch": 0.5794701986754967, "grad_norm": 21.53306007385254, "learning_rate": 1.158193426341925e-05, "loss": 3.4405, "step": 2450 }, { "epoch": 0.5912961210974456, "grad_norm": 22.93678092956543, "learning_rate": 1.181839678410972e-05, "loss": 3.4002, "step": 2500 }, { "epoch": 0.6031220435193945, "grad_norm": 20.330045700073242, "learning_rate": 1.2054859304800191e-05, "loss": 3.3653, "step": 2550 }, { "epoch": 0.6149479659413434, "grad_norm": 21.98198699951172, "learning_rate": 1.2291321825490661e-05, "loss": 3.321, "step": 2600 }, { "epoch": 0.6267738883632923, "grad_norm": 18.49015998840332, "learning_rate": 1.252778434618113e-05, "loss": 3.3042, "step": 2650 }, { "epoch": 0.6385998107852412, "grad_norm": 22.69803237915039, "learning_rate": 1.2764246866871602e-05, "loss": 3.2117, "step": 2700 }, { "epoch": 0.6504257332071902, "grad_norm": 19.658132553100586, "learning_rate": 1.3000709387562072e-05, "loss": 3.3423, "step": 2750 }, { "epoch": 0.6622516556291391, "grad_norm": 20.783931732177734, "learning_rate": 1.3237171908252545e-05, "loss": 3.2494, "step": 2800 }, { "epoch": 0.674077578051088, "grad_norm": 17.039609909057617, "learning_rate": 1.3473634428943014e-05, "loss": 3.1364, "step": 2850 }, { "epoch": 0.6859035004730369, "grad_norm": 21.787738800048828, "learning_rate": 1.3710096949633484e-05, "loss": 3.1836, "step": 2900 }, { "epoch": 0.6977294228949859, "grad_norm": 20.883773803710938, "learning_rate": 1.3946559470323956e-05, "loss": 3.1268, "step": 2950 }, { "epoch": 0.7095553453169348, "grad_norm": 17.700597763061523, "learning_rate": 1.4183021991014425e-05, "loss": 3.072, "step": 3000 }, { "epoch": 0.7213812677388837, "grad_norm": 20.23262596130371, "learning_rate": 1.4419484511704895e-05, "loss": 3.0135, "step": 3050 }, { "epoch": 0.7332071901608326, "grad_norm": 19.417842864990234, "learning_rate": 1.4655947032395366e-05, "loss": 3.0607, "step": 3100 }, { "epoch": 0.7450331125827815, "grad_norm": 19.843341827392578, "learning_rate": 1.4892409553085838e-05, "loss": 3.0963, "step": 3150 }, { "epoch": 0.7568590350047304, "grad_norm": 20.248523712158203, "learning_rate": 1.5128872073776309e-05, "loss": 3.0419, "step": 3200 }, { "epoch": 0.7686849574266793, "grad_norm": 24.61260986328125, "learning_rate": 1.5365334594466777e-05, "loss": 2.9891, "step": 3250 }, { "epoch": 0.7805108798486282, "grad_norm": 16.637826919555664, "learning_rate": 1.560179711515725e-05, "loss": 2.9384, "step": 3300 }, { "epoch": 0.7923368022705771, "grad_norm": 24.341026306152344, "learning_rate": 1.583825963584772e-05, "loss": 2.8918, "step": 3350 }, { "epoch": 0.804162724692526, "grad_norm": 18.246440887451172, "learning_rate": 1.607472215653819e-05, "loss": 2.9816, "step": 3400 }, { "epoch": 0.8159886471144749, "grad_norm": 19.296022415161133, "learning_rate": 1.631118467722866e-05, "loss": 2.9664, "step": 3450 }, { "epoch": 0.8278145695364238, "grad_norm": 19.331918716430664, "learning_rate": 1.6547647197919134e-05, "loss": 2.8969, "step": 3500 }, { "epoch": 0.8396404919583728, "grad_norm": 25.586254119873047, "learning_rate": 1.6784109718609602e-05, "loss": 2.9368, "step": 3550 }, { "epoch": 0.8514664143803217, "grad_norm": 19.701223373413086, "learning_rate": 1.7020572239300073e-05, "loss": 2.8513, "step": 3600 }, { "epoch": 0.8632923368022706, "grad_norm": 16.68182945251465, "learning_rate": 1.7257034759990545e-05, "loss": 2.9808, "step": 3650 }, { "epoch": 0.8751182592242195, "grad_norm": 19.592416763305664, "learning_rate": 1.7493497280681013e-05, "loss": 2.8428, "step": 3700 }, { "epoch": 0.8869441816461684, "grad_norm": 20.324504852294922, "learning_rate": 1.7729959801371484e-05, "loss": 2.8775, "step": 3750 }, { "epoch": 0.8987701040681173, "grad_norm": 19.49851417541504, "learning_rate": 1.7966422322061955e-05, "loss": 2.739, "step": 3800 }, { "epoch": 0.9105960264900662, "grad_norm": 19.18546485900879, "learning_rate": 1.8202884842752427e-05, "loss": 2.8277, "step": 3850 }, { "epoch": 0.9224219489120151, "grad_norm": 23.6113338470459, "learning_rate": 1.8439347363442898e-05, "loss": 2.767, "step": 3900 }, { "epoch": 0.934247871333964, "grad_norm": 19.779712677001953, "learning_rate": 1.8675809884133366e-05, "loss": 2.794, "step": 3950 }, { "epoch": 0.9460737937559129, "grad_norm": 23.361425399780273, "learning_rate": 1.8912272404823837e-05, "loss": 2.7738, "step": 4000 }, { "epoch": 0.9460737937559129, "eval_runtime": 47.0317, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 4.773771009103065, "step": 4000 }, { "epoch": 0.9578997161778618, "grad_norm": 18.137535095214844, "learning_rate": 1.914873492551431e-05, "loss": 2.8568, "step": 4050 }, { "epoch": 0.9697256385998108, "grad_norm": 18.014116287231445, "learning_rate": 1.9385197446204777e-05, "loss": 2.7938, "step": 4100 }, { "epoch": 0.9815515610217597, "grad_norm": 17.168569564819336, "learning_rate": 1.9621659966895248e-05, "loss": 2.7272, "step": 4150 }, { "epoch": 0.9933774834437086, "grad_norm": 17.75269889831543, "learning_rate": 1.985812248758572e-05, "loss": 2.7079, "step": 4200 }, { "epoch": 1.0052034058656576, "grad_norm": 19.342844009399414, "learning_rate": 1.9976346756548995e-05, "loss": 2.6383, "step": 4250 }, { "epoch": 1.0170293282876064, "grad_norm": 17.54117774963379, "learning_rate": 1.9917213647921473e-05, "loss": 2.6855, "step": 4300 }, { "epoch": 1.0288552507095554, "grad_norm": 18.412206649780273, "learning_rate": 1.9858080539293952e-05, "loss": 2.6568, "step": 4350 }, { "epoch": 1.0406811731315042, "grad_norm": 18.794939041137695, "learning_rate": 1.979894743066643e-05, "loss": 2.5981, "step": 4400 }, { "epoch": 1.0525070955534532, "grad_norm": 17.26803970336914, "learning_rate": 1.973981432203891e-05, "loss": 2.6987, "step": 4450 }, { "epoch": 1.064333017975402, "grad_norm": 15.831737518310547, "learning_rate": 1.968068121341139e-05, "loss": 2.6992, "step": 4500 }, { "epoch": 1.076158940397351, "grad_norm": 16.746700286865234, "learning_rate": 1.962154810478387e-05, "loss": 2.5434, "step": 4550 }, { "epoch": 1.0879848628192998, "grad_norm": 18.824857711791992, "learning_rate": 1.956241499615635e-05, "loss": 2.5553, "step": 4600 }, { "epoch": 1.0998107852412489, "grad_norm": 16.81246566772461, "learning_rate": 1.9503281887528828e-05, "loss": 2.4978, "step": 4650 }, { "epoch": 1.1116367076631977, "grad_norm": 18.369991302490234, "learning_rate": 1.9444148778901307e-05, "loss": 2.5679, "step": 4700 }, { "epoch": 1.1234626300851467, "grad_norm": 19.55158805847168, "learning_rate": 1.938501567027379e-05, "loss": 2.4768, "step": 4750 }, { "epoch": 1.1352885525070955, "grad_norm": 20.673002243041992, "learning_rate": 1.9325882561646268e-05, "loss": 2.5578, "step": 4800 }, { "epoch": 1.1471144749290445, "grad_norm": 17.067432403564453, "learning_rate": 1.9266749453018747e-05, "loss": 2.4758, "step": 4850 }, { "epoch": 1.1589403973509933, "grad_norm": 22.328304290771484, "learning_rate": 1.9207616344391226e-05, "loss": 2.5352, "step": 4900 }, { "epoch": 1.1707663197729423, "grad_norm": 15.121694564819336, "learning_rate": 1.9148483235763708e-05, "loss": 2.5023, "step": 4950 }, { "epoch": 1.1825922421948911, "grad_norm": 15.201376914978027, "learning_rate": 1.9089350127136187e-05, "loss": 2.4713, "step": 5000 }, { "epoch": 1.1944181646168401, "grad_norm": 20.54207992553711, "learning_rate": 1.9030217018508665e-05, "loss": 2.486, "step": 5050 }, { "epoch": 1.206244087038789, "grad_norm": 16.934635162353516, "learning_rate": 1.8971083909881144e-05, "loss": 2.483, "step": 5100 }, { "epoch": 1.218070009460738, "grad_norm": 16.963790893554688, "learning_rate": 1.8911950801253623e-05, "loss": 2.4098, "step": 5150 }, { "epoch": 1.2298959318826868, "grad_norm": 16.505352020263672, "learning_rate": 1.8852817692626102e-05, "loss": 2.5061, "step": 5200 }, { "epoch": 1.2417218543046358, "grad_norm": 16.634069442749023, "learning_rate": 1.879368458399858e-05, "loss": 2.4597, "step": 5250 }, { "epoch": 1.2535477767265846, "grad_norm": 16.373046875, "learning_rate": 1.8734551475371063e-05, "loss": 2.4591, "step": 5300 }, { "epoch": 1.2653736991485336, "grad_norm": 21.308876037597656, "learning_rate": 1.867541836674354e-05, "loss": 2.3879, "step": 5350 }, { "epoch": 1.2771996215704826, "grad_norm": 20.565275192260742, "learning_rate": 1.861628525811602e-05, "loss": 2.4146, "step": 5400 }, { "epoch": 1.2890255439924314, "grad_norm": 15.853353500366211, "learning_rate": 1.85571521494885e-05, "loss": 2.3418, "step": 5450 }, { "epoch": 1.3008514664143802, "grad_norm": 13.12362003326416, "learning_rate": 1.8498019040860978e-05, "loss": 2.4307, "step": 5500 }, { "epoch": 1.3126773888363292, "grad_norm": 19.059667587280273, "learning_rate": 1.843888593223346e-05, "loss": 2.3653, "step": 5550 }, { "epoch": 1.3245033112582782, "grad_norm": 17.448827743530273, "learning_rate": 1.837975282360594e-05, "loss": 2.3995, "step": 5600 }, { "epoch": 1.336329233680227, "grad_norm": 18.326887130737305, "learning_rate": 1.8320619714978418e-05, "loss": 2.4527, "step": 5650 }, { "epoch": 1.3481551561021758, "grad_norm": 18.03122901916504, "learning_rate": 1.8261486606350896e-05, "loss": 2.4547, "step": 5700 }, { "epoch": 1.3599810785241249, "grad_norm": 18.269872665405273, "learning_rate": 1.820235349772338e-05, "loss": 2.3695, "step": 5750 }, { "epoch": 1.3718070009460739, "grad_norm": 16.90838623046875, "learning_rate": 1.8143220389095857e-05, "loss": 2.3341, "step": 5800 }, { "epoch": 1.3836329233680227, "grad_norm": 18.816362380981445, "learning_rate": 1.8084087280468336e-05, "loss": 2.2412, "step": 5850 }, { "epoch": 1.3954588457899715, "grad_norm": 17.30527687072754, "learning_rate": 1.8024954171840815e-05, "loss": 2.2695, "step": 5900 }, { "epoch": 1.4072847682119205, "grad_norm": 18.299711227416992, "learning_rate": 1.7965821063213297e-05, "loss": 2.2922, "step": 5950 }, { "epoch": 1.4191106906338695, "grad_norm": 18.047449111938477, "learning_rate": 1.7906687954585773e-05, "loss": 2.3176, "step": 6000 }, { "epoch": 1.4191106906338695, "eval_runtime": 46.9839, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 4.305679076455633, "step": 6000 }, { "epoch": 1.4309366130558183, "grad_norm": 20.608333587646484, "learning_rate": 1.784755484595825e-05, "loss": 2.281, "step": 6050 }, { "epoch": 1.4427625354777673, "grad_norm": 16.299001693725586, "learning_rate": 1.7788421737330734e-05, "loss": 2.2155, "step": 6100 }, { "epoch": 1.4545884578997161, "grad_norm": 17.70014762878418, "learning_rate": 1.7729288628703212e-05, "loss": 2.1908, "step": 6150 }, { "epoch": 1.4664143803216652, "grad_norm": 13.944992065429688, "learning_rate": 1.767015552007569e-05, "loss": 2.2071, "step": 6200 }, { "epoch": 1.478240302743614, "grad_norm": 18.37308692932129, "learning_rate": 1.761102241144817e-05, "loss": 2.2617, "step": 6250 }, { "epoch": 1.490066225165563, "grad_norm": 16.624183654785156, "learning_rate": 1.7551889302820652e-05, "loss": 2.2864, "step": 6300 }, { "epoch": 1.5018921475875118, "grad_norm": 15.490421295166016, "learning_rate": 1.749275619419313e-05, "loss": 2.2509, "step": 6350 }, { "epoch": 1.5137180700094608, "grad_norm": 15.517704010009766, "learning_rate": 1.743362308556561e-05, "loss": 2.1227, "step": 6400 }, { "epoch": 1.5255439924314098, "grad_norm": 14.78442096710205, "learning_rate": 1.737448997693809e-05, "loss": 2.1919, "step": 6450 }, { "epoch": 1.5373699148533586, "grad_norm": 19.766271591186523, "learning_rate": 1.7315356868310567e-05, "loss": 2.2072, "step": 6500 }, { "epoch": 1.5491958372753074, "grad_norm": 17.84695053100586, "learning_rate": 1.725622375968305e-05, "loss": 2.1652, "step": 6550 }, { "epoch": 1.5610217596972564, "grad_norm": 17.325145721435547, "learning_rate": 1.7197090651055528e-05, "loss": 2.2224, "step": 6600 }, { "epoch": 1.5728476821192054, "grad_norm": 19.243274688720703, "learning_rate": 1.7137957542428007e-05, "loss": 2.0715, "step": 6650 }, { "epoch": 1.5846736045411542, "grad_norm": 17.589859008789062, "learning_rate": 1.7078824433800486e-05, "loss": 2.1693, "step": 6700 }, { "epoch": 1.596499526963103, "grad_norm": 14.71687126159668, "learning_rate": 1.7019691325172968e-05, "loss": 2.1141, "step": 6750 }, { "epoch": 1.608325449385052, "grad_norm": 14.723918914794922, "learning_rate": 1.6960558216545443e-05, "loss": 2.1129, "step": 6800 }, { "epoch": 1.620151371807001, "grad_norm": 16.5570011138916, "learning_rate": 1.6901425107917922e-05, "loss": 2.1001, "step": 6850 }, { "epoch": 1.6319772942289499, "grad_norm": 17.945083618164062, "learning_rate": 1.6842291999290404e-05, "loss": 2.094, "step": 6900 }, { "epoch": 1.6438032166508987, "grad_norm": 18.704225540161133, "learning_rate": 1.6783158890662883e-05, "loss": 2.2176, "step": 6950 }, { "epoch": 1.6556291390728477, "grad_norm": 15.701910018920898, "learning_rate": 1.6724025782035362e-05, "loss": 2.109, "step": 7000 }, { "epoch": 1.6674550614947967, "grad_norm": 16.768260955810547, "learning_rate": 1.666489267340784e-05, "loss": 2.0537, "step": 7050 }, { "epoch": 1.6792809839167455, "grad_norm": 17.835603713989258, "learning_rate": 1.6605759564780323e-05, "loss": 2.0328, "step": 7100 }, { "epoch": 1.6911069063386943, "grad_norm": 18.1043701171875, "learning_rate": 1.6546626456152802e-05, "loss": 2.1541, "step": 7150 }, { "epoch": 1.7029328287606433, "grad_norm": 14.032896995544434, "learning_rate": 1.648749334752528e-05, "loss": 2.0164, "step": 7200 }, { "epoch": 1.7147587511825924, "grad_norm": 15.934415817260742, "learning_rate": 1.642836023889776e-05, "loss": 2.0225, "step": 7250 }, { "epoch": 1.7265846736045412, "grad_norm": 15.602225303649902, "learning_rate": 1.636922713027024e-05, "loss": 2.0243, "step": 7300 }, { "epoch": 1.73841059602649, "grad_norm": 15.584887504577637, "learning_rate": 1.631009402164272e-05, "loss": 2.0152, "step": 7350 }, { "epoch": 1.750236518448439, "grad_norm": 17.52799415588379, "learning_rate": 1.62509609130152e-05, "loss": 2.0455, "step": 7400 }, { "epoch": 1.762062440870388, "grad_norm": 15.92798900604248, "learning_rate": 1.6191827804387678e-05, "loss": 2.0026, "step": 7450 }, { "epoch": 1.7738883632923368, "grad_norm": 14.851804733276367, "learning_rate": 1.6132694695760157e-05, "loss": 1.9846, "step": 7500 }, { "epoch": 1.7857142857142856, "grad_norm": 15.551090240478516, "learning_rate": 1.607356158713264e-05, "loss": 1.9594, "step": 7550 }, { "epoch": 1.7975402081362346, "grad_norm": 14.651620864868164, "learning_rate": 1.6014428478505118e-05, "loss": 2.0523, "step": 7600 }, { "epoch": 1.8093661305581836, "grad_norm": 19.447086334228516, "learning_rate": 1.5955295369877596e-05, "loss": 1.9751, "step": 7650 }, { "epoch": 1.8211920529801324, "grad_norm": 14.130012512207031, "learning_rate": 1.5896162261250075e-05, "loss": 1.9898, "step": 7700 }, { "epoch": 1.8330179754020812, "grad_norm": 18.4505615234375, "learning_rate": 1.5837029152622554e-05, "loss": 1.9658, "step": 7750 }, { "epoch": 1.8448438978240302, "grad_norm": 12.992496490478516, "learning_rate": 1.5777896043995033e-05, "loss": 1.9976, "step": 7800 }, { "epoch": 1.8566698202459793, "grad_norm": 17.20708656311035, "learning_rate": 1.571876293536751e-05, "loss": 1.9939, "step": 7850 }, { "epoch": 1.868495742667928, "grad_norm": 14.438339233398438, "learning_rate": 1.5659629826739994e-05, "loss": 1.9666, "step": 7900 }, { "epoch": 1.8803216650898769, "grad_norm": 16.87125015258789, "learning_rate": 1.5600496718112473e-05, "loss": 1.9704, "step": 7950 }, { "epoch": 1.8921475875118259, "grad_norm": 17.480026245117188, "learning_rate": 1.554136360948495e-05, "loss": 1.9822, "step": 8000 }, { "epoch": 1.8921475875118259, "eval_runtime": 47.026, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 4.1330844767959665, "step": 8000 }, { "epoch": 1.903973509933775, "grad_norm": 15.649256706237793, "learning_rate": 1.548223050085743e-05, "loss": 1.8534, "step": 8050 }, { "epoch": 1.9157994323557237, "grad_norm": 17.02906608581543, "learning_rate": 1.5423097392229912e-05, "loss": 1.856, "step": 8100 }, { "epoch": 1.9276253547776725, "grad_norm": 16.321977615356445, "learning_rate": 1.536396428360239e-05, "loss": 1.9817, "step": 8150 }, { "epoch": 1.9394512771996215, "grad_norm": 21.492490768432617, "learning_rate": 1.530483117497487e-05, "loss": 1.9095, "step": 8200 }, { "epoch": 1.9512771996215705, "grad_norm": 18.752315521240234, "learning_rate": 1.524569806634735e-05, "loss": 1.9343, "step": 8250 }, { "epoch": 1.9631031220435196, "grad_norm": 17.007205963134766, "learning_rate": 1.518656495771983e-05, "loss": 1.95, "step": 8300 }, { "epoch": 1.9749290444654684, "grad_norm": 16.75872230529785, "learning_rate": 1.512743184909231e-05, "loss": 1.9981, "step": 8350 }, { "epoch": 1.9867549668874172, "grad_norm": 18.816049575805664, "learning_rate": 1.5068298740464788e-05, "loss": 1.8872, "step": 8400 }, { "epoch": 1.9985808893093662, "grad_norm": 16.992637634277344, "learning_rate": 1.5009165631837266e-05, "loss": 1.8112, "step": 8450 }, { "epoch": 2.010406811731315, "grad_norm": 16.72859001159668, "learning_rate": 1.4950032523209746e-05, "loss": 1.8451, "step": 8500 }, { "epoch": 2.0222327341532638, "grad_norm": 15.676278114318848, "learning_rate": 1.4890899414582225e-05, "loss": 1.8918, "step": 8550 }, { "epoch": 2.034058656575213, "grad_norm": 15.531780242919922, "learning_rate": 1.4831766305954705e-05, "loss": 1.7837, "step": 8600 }, { "epoch": 2.045884578997162, "grad_norm": 17.246252059936523, "learning_rate": 1.4772633197327184e-05, "loss": 1.8692, "step": 8650 }, { "epoch": 2.057710501419111, "grad_norm": 13.021443367004395, "learning_rate": 1.4713500088699663e-05, "loss": 1.8614, "step": 8700 }, { "epoch": 2.0695364238410594, "grad_norm": 15.586688041687012, "learning_rate": 1.4654366980072143e-05, "loss": 1.8677, "step": 8750 }, { "epoch": 2.0813623462630084, "grad_norm": 19.62430191040039, "learning_rate": 1.4595233871444622e-05, "loss": 1.8005, "step": 8800 }, { "epoch": 2.0931882686849574, "grad_norm": 15.454833984375, "learning_rate": 1.4536100762817103e-05, "loss": 1.8008, "step": 8850 }, { "epoch": 2.1050141911069065, "grad_norm": 16.70480728149414, "learning_rate": 1.4476967654189581e-05, "loss": 1.8207, "step": 8900 }, { "epoch": 2.116840113528855, "grad_norm": 17.584407806396484, "learning_rate": 1.4417834545562062e-05, "loss": 1.7491, "step": 8950 }, { "epoch": 2.128666035950804, "grad_norm": 17.367647171020508, "learning_rate": 1.435870143693454e-05, "loss": 1.8351, "step": 9000 }, { "epoch": 2.140491958372753, "grad_norm": 15.521934509277344, "learning_rate": 1.4299568328307021e-05, "loss": 1.7934, "step": 9050 }, { "epoch": 2.152317880794702, "grad_norm": 18.928241729736328, "learning_rate": 1.42404352196795e-05, "loss": 1.8162, "step": 9100 }, { "epoch": 2.1641438032166507, "grad_norm": 16.490169525146484, "learning_rate": 1.418130211105198e-05, "loss": 1.8496, "step": 9150 }, { "epoch": 2.1759697256385997, "grad_norm": 16.48432731628418, "learning_rate": 1.412216900242446e-05, "loss": 1.7747, "step": 9200 }, { "epoch": 2.1877956480605487, "grad_norm": 11.924939155578613, "learning_rate": 1.406303589379694e-05, "loss": 1.7665, "step": 9250 }, { "epoch": 2.1996215704824977, "grad_norm": 17.498945236206055, "learning_rate": 1.4003902785169417e-05, "loss": 1.789, "step": 9300 }, { "epoch": 2.2114474929044468, "grad_norm": 15.384320259094238, "learning_rate": 1.3944769676541896e-05, "loss": 1.8264, "step": 9350 }, { "epoch": 2.2232734153263953, "grad_norm": 13.456559181213379, "learning_rate": 1.3885636567914376e-05, "loss": 1.788, "step": 9400 }, { "epoch": 2.2350993377483444, "grad_norm": 24.769336700439453, "learning_rate": 1.3826503459286855e-05, "loss": 1.7902, "step": 9450 }, { "epoch": 2.2469252601702934, "grad_norm": 18.193721771240234, "learning_rate": 1.3767370350659335e-05, "loss": 1.8175, "step": 9500 }, { "epoch": 2.258751182592242, "grad_norm": 16.10167121887207, "learning_rate": 1.3708237242031814e-05, "loss": 1.8042, "step": 9550 }, { "epoch": 2.270577105014191, "grad_norm": 15.939582824707031, "learning_rate": 1.3649104133404295e-05, "loss": 1.7767, "step": 9600 }, { "epoch": 2.28240302743614, "grad_norm": 17.35470199584961, "learning_rate": 1.3589971024776774e-05, "loss": 1.7099, "step": 9650 }, { "epoch": 2.294228949858089, "grad_norm": 16.262712478637695, "learning_rate": 1.3530837916149252e-05, "loss": 1.7841, "step": 9700 }, { "epoch": 2.306054872280038, "grad_norm": 13.716343879699707, "learning_rate": 1.3471704807521733e-05, "loss": 1.87, "step": 9750 }, { "epoch": 2.3178807947019866, "grad_norm": 13.402505874633789, "learning_rate": 1.3412571698894212e-05, "loss": 1.7485, "step": 9800 }, { "epoch": 2.3297067171239356, "grad_norm": 14.37375259399414, "learning_rate": 1.3353438590266692e-05, "loss": 1.8367, "step": 9850 }, { "epoch": 2.3415326395458846, "grad_norm": 14.258302688598633, "learning_rate": 1.3294305481639171e-05, "loss": 1.7925, "step": 9900 }, { "epoch": 2.3533585619678337, "grad_norm": 18.176448822021484, "learning_rate": 1.3235172373011651e-05, "loss": 1.9135, "step": 9950 }, { "epoch": 2.3651844843897822, "grad_norm": 16.076860427856445, "learning_rate": 1.317603926438413e-05, "loss": 1.7746, "step": 10000 }, { "epoch": 2.3651844843897822, "eval_runtime": 46.8576, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 4.062871016729038, "step": 10000 }, { "epoch": 2.3770104068117313, "grad_norm": 14.89098072052002, "learning_rate": 1.311690615575661e-05, "loss": 1.672, "step": 10050 }, { "epoch": 2.3888363292336803, "grad_norm": 21.15306854248047, "learning_rate": 1.3057773047129088e-05, "loss": 1.7265, "step": 10100 }, { "epoch": 2.4006622516556293, "grad_norm": 13.14006519317627, "learning_rate": 1.2998639938501567e-05, "loss": 1.6875, "step": 10150 }, { "epoch": 2.412488174077578, "grad_norm": 16.71653175354004, "learning_rate": 1.2939506829874047e-05, "loss": 1.7421, "step": 10200 }, { "epoch": 2.424314096499527, "grad_norm": 19.673765182495117, "learning_rate": 1.2880373721246526e-05, "loss": 1.7447, "step": 10250 }, { "epoch": 2.436140018921476, "grad_norm": 13.806225776672363, "learning_rate": 1.2821240612619006e-05, "loss": 1.7335, "step": 10300 }, { "epoch": 2.447965941343425, "grad_norm": 17.10091209411621, "learning_rate": 1.2762107503991485e-05, "loss": 1.6583, "step": 10350 }, { "epoch": 2.4597918637653735, "grad_norm": 13.57816219329834, "learning_rate": 1.2702974395363966e-05, "loss": 1.6937, "step": 10400 }, { "epoch": 2.4716177861873225, "grad_norm": 15.529336929321289, "learning_rate": 1.2643841286736444e-05, "loss": 1.6425, "step": 10450 }, { "epoch": 2.4834437086092715, "grad_norm": 15.039297103881836, "learning_rate": 1.2584708178108925e-05, "loss": 1.7837, "step": 10500 }, { "epoch": 2.4952696310312206, "grad_norm": 18.062923431396484, "learning_rate": 1.2525575069481404e-05, "loss": 1.7589, "step": 10550 }, { "epoch": 2.507095553453169, "grad_norm": 14.291655540466309, "learning_rate": 1.2466441960853884e-05, "loss": 1.6618, "step": 10600 }, { "epoch": 2.518921475875118, "grad_norm": 15.268333435058594, "learning_rate": 1.2407308852226363e-05, "loss": 1.6107, "step": 10650 }, { "epoch": 2.530747398297067, "grad_norm": 15.746752738952637, "learning_rate": 1.2348175743598842e-05, "loss": 1.706, "step": 10700 }, { "epoch": 2.542573320719016, "grad_norm": 14.740198135375977, "learning_rate": 1.2289042634971322e-05, "loss": 1.6662, "step": 10750 }, { "epoch": 2.5543992431409652, "grad_norm": 18.715717315673828, "learning_rate": 1.2229909526343801e-05, "loss": 1.7491, "step": 10800 }, { "epoch": 2.566225165562914, "grad_norm": 13.341856956481934, "learning_rate": 1.2170776417716281e-05, "loss": 1.615, "step": 10850 }, { "epoch": 2.578051087984863, "grad_norm": 15.429610252380371, "learning_rate": 1.211164330908876e-05, "loss": 1.6314, "step": 10900 }, { "epoch": 2.589877010406812, "grad_norm": 16.15951919555664, "learning_rate": 1.2052510200461239e-05, "loss": 1.6564, "step": 10950 }, { "epoch": 2.6017029328287604, "grad_norm": 16.425504684448242, "learning_rate": 1.1993377091833718e-05, "loss": 1.6085, "step": 11000 }, { "epoch": 2.6135288552507094, "grad_norm": 19.02115249633789, "learning_rate": 1.1934243983206197e-05, "loss": 1.6969, "step": 11050 }, { "epoch": 2.6253547776726585, "grad_norm": 16.245838165283203, "learning_rate": 1.1875110874578677e-05, "loss": 1.5963, "step": 11100 }, { "epoch": 2.6371807000946075, "grad_norm": 14.986413955688477, "learning_rate": 1.1815977765951156e-05, "loss": 1.6626, "step": 11150 }, { "epoch": 2.6490066225165565, "grad_norm": 18.501134872436523, "learning_rate": 1.1756844657323636e-05, "loss": 1.715, "step": 11200 }, { "epoch": 2.660832544938505, "grad_norm": 19.390989303588867, "learning_rate": 1.1697711548696115e-05, "loss": 1.6182, "step": 11250 }, { "epoch": 2.672658467360454, "grad_norm": 16.83384132385254, "learning_rate": 1.1638578440068596e-05, "loss": 1.5667, "step": 11300 }, { "epoch": 2.684484389782403, "grad_norm": 17.595382690429688, "learning_rate": 1.1579445331441074e-05, "loss": 1.6255, "step": 11350 }, { "epoch": 2.6963103122043517, "grad_norm": 18.588014602661133, "learning_rate": 1.1520312222813555e-05, "loss": 1.6146, "step": 11400 }, { "epoch": 2.7081362346263007, "grad_norm": 18.090600967407227, "learning_rate": 1.1461179114186034e-05, "loss": 1.5807, "step": 11450 }, { "epoch": 2.7199621570482497, "grad_norm": 16.144756317138672, "learning_rate": 1.1402046005558514e-05, "loss": 1.571, "step": 11500 }, { "epoch": 2.7317880794701987, "grad_norm": 19.271270751953125, "learning_rate": 1.1342912896930993e-05, "loss": 1.611, "step": 11550 }, { "epoch": 2.7436140018921478, "grad_norm": 15.365574836730957, "learning_rate": 1.1283779788303474e-05, "loss": 1.5757, "step": 11600 }, { "epoch": 2.7554399243140963, "grad_norm": 18.699979782104492, "learning_rate": 1.1224646679675952e-05, "loss": 1.6048, "step": 11650 }, { "epoch": 2.7672658467360454, "grad_norm": 15.537507057189941, "learning_rate": 1.1165513571048431e-05, "loss": 1.5559, "step": 11700 }, { "epoch": 2.7790917691579944, "grad_norm": 15.148637771606445, "learning_rate": 1.110638046242091e-05, "loss": 1.5634, "step": 11750 }, { "epoch": 2.790917691579943, "grad_norm": 17.472370147705078, "learning_rate": 1.1047247353793389e-05, "loss": 1.5663, "step": 11800 }, { "epoch": 2.8027436140018924, "grad_norm": 16.284570693969727, "learning_rate": 1.098811424516587e-05, "loss": 1.6274, "step": 11850 }, { "epoch": 2.814569536423841, "grad_norm": 17.758365631103516, "learning_rate": 1.0928981136538348e-05, "loss": 1.5478, "step": 11900 }, { "epoch": 2.82639545884579, "grad_norm": 14.631210327148438, "learning_rate": 1.0869848027910828e-05, "loss": 1.5728, "step": 11950 }, { "epoch": 2.838221381267739, "grad_norm": 13.960256576538086, "learning_rate": 1.0810714919283307e-05, "loss": 1.5694, "step": 12000 }, { "epoch": 2.838221381267739, "eval_runtime": 47.1893, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 3.9845195254937416, "step": 12000 }, { "epoch": 2.8500473036896876, "grad_norm": 13.964838981628418, "learning_rate": 1.0751581810655786e-05, "loss": 1.5544, "step": 12050 }, { "epoch": 2.8618732261116366, "grad_norm": 20.20077133178711, "learning_rate": 1.0692448702028267e-05, "loss": 1.6172, "step": 12100 }, { "epoch": 2.8736991485335857, "grad_norm": 13.513507843017578, "learning_rate": 1.0633315593400745e-05, "loss": 1.5358, "step": 12150 }, { "epoch": 2.8855250709555347, "grad_norm": 18.936565399169922, "learning_rate": 1.0574182484773226e-05, "loss": 1.5656, "step": 12200 }, { "epoch": 2.8973509933774837, "grad_norm": 17.975814819335938, "learning_rate": 1.0515049376145705e-05, "loss": 1.5273, "step": 12250 }, { "epoch": 2.9091769157994323, "grad_norm": 18.273731231689453, "learning_rate": 1.0455916267518185e-05, "loss": 1.4981, "step": 12300 }, { "epoch": 2.9210028382213813, "grad_norm": 16.280357360839844, "learning_rate": 1.0396783158890664e-05, "loss": 1.5256, "step": 12350 }, { "epoch": 2.9328287606433303, "grad_norm": 13.220331192016602, "learning_rate": 1.0337650050263144e-05, "loss": 1.522, "step": 12400 }, { "epoch": 2.944654683065279, "grad_norm": 16.336288452148438, "learning_rate": 1.0278516941635623e-05, "loss": 1.465, "step": 12450 }, { "epoch": 2.956480605487228, "grad_norm": 20.016876220703125, "learning_rate": 1.0219383833008104e-05, "loss": 1.6151, "step": 12500 }, { "epoch": 2.968306527909177, "grad_norm": 17.370023727416992, "learning_rate": 1.0160250724380582e-05, "loss": 1.5521, "step": 12550 }, { "epoch": 2.980132450331126, "grad_norm": 18.69423484802246, "learning_rate": 1.010111761575306e-05, "loss": 1.5657, "step": 12600 }, { "epoch": 2.991958372753075, "grad_norm": 18.094669342041016, "learning_rate": 1.004198450712554e-05, "loss": 1.4935, "step": 12650 }, { "epoch": 3.0037842951750235, "grad_norm": 15.485885620117188, "learning_rate": 9.98285139849802e-06, "loss": 1.4081, "step": 12700 }, { "epoch": 3.0156102175969726, "grad_norm": 15.505888938903809, "learning_rate": 9.9237182898705e-06, "loss": 1.5243, "step": 12750 }, { "epoch": 3.0274361400189216, "grad_norm": 16.799917221069336, "learning_rate": 9.864585181242978e-06, "loss": 1.4999, "step": 12800 }, { "epoch": 3.0392620624408706, "grad_norm": 15.498218536376953, "learning_rate": 9.805452072615459e-06, "loss": 1.4139, "step": 12850 }, { "epoch": 3.051087984862819, "grad_norm": 19.318891525268555, "learning_rate": 9.746318963987937e-06, "loss": 1.5522, "step": 12900 }, { "epoch": 3.062913907284768, "grad_norm": 14.893320083618164, "learning_rate": 9.687185855360418e-06, "loss": 1.4865, "step": 12950 }, { "epoch": 3.074739829706717, "grad_norm": 18.767566680908203, "learning_rate": 9.628052746732897e-06, "loss": 1.4755, "step": 13000 }, { "epoch": 3.0865657521286662, "grad_norm": 14.623005867004395, "learning_rate": 9.568919638105375e-06, "loss": 1.4582, "step": 13050 }, { "epoch": 3.098391674550615, "grad_norm": 14.217521667480469, "learning_rate": 9.509786529477856e-06, "loss": 1.4112, "step": 13100 }, { "epoch": 3.110217596972564, "grad_norm": 13.287856101989746, "learning_rate": 9.450653420850335e-06, "loss": 1.4758, "step": 13150 }, { "epoch": 3.122043519394513, "grad_norm": 14.488649368286133, "learning_rate": 9.391520312222813e-06, "loss": 1.4388, "step": 13200 }, { "epoch": 3.133869441816462, "grad_norm": 15.88402271270752, "learning_rate": 9.332387203595294e-06, "loss": 1.4819, "step": 13250 }, { "epoch": 3.1456953642384105, "grad_norm": 13.743453025817871, "learning_rate": 9.273254094967773e-06, "loss": 1.4525, "step": 13300 }, { "epoch": 3.1575212866603595, "grad_norm": 16.949493408203125, "learning_rate": 9.214120986340253e-06, "loss": 1.4583, "step": 13350 }, { "epoch": 3.1693472090823085, "grad_norm": 15.139965057373047, "learning_rate": 9.154987877712732e-06, "loss": 1.4714, "step": 13400 }, { "epoch": 3.1811731315042575, "grad_norm": 18.97600746154785, "learning_rate": 9.095854769085213e-06, "loss": 1.4265, "step": 13450 }, { "epoch": 3.192999053926206, "grad_norm": 16.3485107421875, "learning_rate": 9.036721660457691e-06, "loss": 1.454, "step": 13500 }, { "epoch": 3.204824976348155, "grad_norm": 17.43102264404297, "learning_rate": 8.97758855183017e-06, "loss": 1.4506, "step": 13550 }, { "epoch": 3.216650898770104, "grad_norm": 15.63143253326416, "learning_rate": 8.91845544320265e-06, "loss": 1.4055, "step": 13600 }, { "epoch": 3.228476821192053, "grad_norm": 12.323601722717285, "learning_rate": 8.85932233457513e-06, "loss": 1.4729, "step": 13650 }, { "epoch": 3.2403027436140017, "grad_norm": 17.078189849853516, "learning_rate": 8.800189225947608e-06, "loss": 1.4791, "step": 13700 }, { "epoch": 3.2521286660359507, "grad_norm": 14.752788543701172, "learning_rate": 8.741056117320089e-06, "loss": 1.4962, "step": 13750 }, { "epoch": 3.2639545884578998, "grad_norm": 12.897354125976562, "learning_rate": 8.681923008692567e-06, "loss": 1.4101, "step": 13800 }, { "epoch": 3.275780510879849, "grad_norm": 12.985773086547852, "learning_rate": 8.622789900065048e-06, "loss": 1.4596, "step": 13850 }, { "epoch": 3.2876064333017974, "grad_norm": 16.538026809692383, "learning_rate": 8.563656791437527e-06, "loss": 1.4207, "step": 13900 }, { "epoch": 3.2994323557237464, "grad_norm": 20.395875930786133, "learning_rate": 8.504523682810007e-06, "loss": 1.4972, "step": 13950 }, { "epoch": 3.3112582781456954, "grad_norm": 11.993943214416504, "learning_rate": 8.445390574182486e-06, "loss": 1.4473, "step": 14000 }, { "epoch": 3.3112582781456954, "eval_runtime": 47.0766, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 3.9688115547590725, "step": 14000 }, { "epoch": 3.3230842005676444, "grad_norm": 12.915935516357422, "learning_rate": 8.386257465554965e-06, "loss": 1.46, "step": 14050 }, { "epoch": 3.334910122989593, "grad_norm": 19.571788787841797, "learning_rate": 8.327124356927444e-06, "loss": 1.5012, "step": 14100 }, { "epoch": 3.346736045411542, "grad_norm": 17.244380950927734, "learning_rate": 8.267991248299924e-06, "loss": 1.5466, "step": 14150 }, { "epoch": 3.358561967833491, "grad_norm": 18.117067337036133, "learning_rate": 8.208858139672403e-06, "loss": 1.5067, "step": 14200 }, { "epoch": 3.37038789025544, "grad_norm": 14.23071575164795, "learning_rate": 8.149725031044883e-06, "loss": 1.4413, "step": 14250 }, { "epoch": 3.3822138126773886, "grad_norm": 16.817371368408203, "learning_rate": 8.090591922417362e-06, "loss": 1.3448, "step": 14300 }, { "epoch": 3.3940397350993377, "grad_norm": 21.39740753173828, "learning_rate": 8.031458813789843e-06, "loss": 1.3792, "step": 14350 }, { "epoch": 3.4058656575212867, "grad_norm": 13.991111755371094, "learning_rate": 7.972325705162321e-06, "loss": 1.4019, "step": 14400 }, { "epoch": 3.4176915799432357, "grad_norm": 14.572546005249023, "learning_rate": 7.9131925965348e-06, "loss": 1.4771, "step": 14450 }, { "epoch": 3.4295175023651847, "grad_norm": 15.65616226196289, "learning_rate": 7.854059487907279e-06, "loss": 1.4383, "step": 14500 }, { "epoch": 3.4413434247871333, "grad_norm": 16.871171951293945, "learning_rate": 7.79492637927976e-06, "loss": 1.4172, "step": 14550 }, { "epoch": 3.4531693472090823, "grad_norm": 16.653839111328125, "learning_rate": 7.735793270652238e-06, "loss": 1.3284, "step": 14600 }, { "epoch": 3.4649952696310313, "grad_norm": 18.008516311645508, "learning_rate": 7.676660162024719e-06, "loss": 1.3867, "step": 14650 }, { "epoch": 3.47682119205298, "grad_norm": 21.629899978637695, "learning_rate": 7.6175270533971976e-06, "loss": 1.3931, "step": 14700 }, { "epoch": 3.488647114474929, "grad_norm": 15.525995254516602, "learning_rate": 7.558393944769677e-06, "loss": 1.4429, "step": 14750 }, { "epoch": 3.500473036896878, "grad_norm": 15.045352935791016, "learning_rate": 7.499260836142157e-06, "loss": 1.4665, "step": 14800 }, { "epoch": 3.512298959318827, "grad_norm": 16.258941650390625, "learning_rate": 7.440127727514636e-06, "loss": 1.3232, "step": 14850 }, { "epoch": 3.524124881740776, "grad_norm": 14.834844589233398, "learning_rate": 7.380994618887115e-06, "loss": 1.4112, "step": 14900 }, { "epoch": 3.5359508041627246, "grad_norm": 18.840707778930664, "learning_rate": 7.321861510259595e-06, "loss": 1.3916, "step": 14950 }, { "epoch": 3.5477767265846736, "grad_norm": 17.09494972229004, "learning_rate": 7.2627284016320746e-06, "loss": 1.3572, "step": 15000 }, { "epoch": 3.5596026490066226, "grad_norm": 17.76523780822754, "learning_rate": 7.203595293004554e-06, "loss": 1.4414, "step": 15050 }, { "epoch": 3.571428571428571, "grad_norm": 19.53270149230957, "learning_rate": 7.144462184377034e-06, "loss": 1.2716, "step": 15100 }, { "epoch": 3.58325449385052, "grad_norm": 18.649320602416992, "learning_rate": 7.085329075749513e-06, "loss": 1.4043, "step": 15150 }, { "epoch": 3.595080416272469, "grad_norm": 13.581181526184082, "learning_rate": 7.026195967121992e-06, "loss": 1.3686, "step": 15200 }, { "epoch": 3.6069063386944182, "grad_norm": 21.46381950378418, "learning_rate": 6.967062858494472e-06, "loss": 1.3687, "step": 15250 }, { "epoch": 3.6187322611163673, "grad_norm": 10.937467575073242, "learning_rate": 6.907929749866951e-06, "loss": 1.3183, "step": 15300 }, { "epoch": 3.630558183538316, "grad_norm": 18.974475860595703, "learning_rate": 6.84879664123943e-06, "loss": 1.3712, "step": 15350 }, { "epoch": 3.642384105960265, "grad_norm": 17.913204193115234, "learning_rate": 6.78966353261191e-06, "loss": 1.4006, "step": 15400 }, { "epoch": 3.654210028382214, "grad_norm": 14.945576667785645, "learning_rate": 6.73053042398439e-06, "loss": 1.4326, "step": 15450 }, { "epoch": 3.666035950804163, "grad_norm": 15.58818531036377, "learning_rate": 6.671397315356869e-06, "loss": 1.3116, "step": 15500 }, { "epoch": 3.677861873226112, "grad_norm": 16.57988739013672, "learning_rate": 6.612264206729349e-06, "loss": 1.2975, "step": 15550 }, { "epoch": 3.6896877956480605, "grad_norm": 13.658615112304688, "learning_rate": 6.5531310981018285e-06, "loss": 1.3709, "step": 15600 }, { "epoch": 3.7015137180700095, "grad_norm": 16.559919357299805, "learning_rate": 6.493997989474307e-06, "loss": 1.3267, "step": 15650 }, { "epoch": 3.7133396404919585, "grad_norm": 16.319732666015625, "learning_rate": 6.434864880846786e-06, "loss": 1.2947, "step": 15700 }, { "epoch": 3.725165562913907, "grad_norm": 15.4765043258667, "learning_rate": 6.375731772219266e-06, "loss": 1.3524, "step": 15750 }, { "epoch": 3.736991485335856, "grad_norm": 14.876737594604492, "learning_rate": 6.316598663591745e-06, "loss": 1.3092, "step": 15800 }, { "epoch": 3.748817407757805, "grad_norm": 13.654143333435059, "learning_rate": 6.257465554964225e-06, "loss": 1.3635, "step": 15850 }, { "epoch": 3.760643330179754, "grad_norm": 16.795425415039062, "learning_rate": 6.198332446336705e-06, "loss": 1.282, "step": 15900 }, { "epoch": 3.772469252601703, "grad_norm": 12.707657814025879, "learning_rate": 6.139199337709184e-06, "loss": 1.3122, "step": 15950 }, { "epoch": 3.7842951750236518, "grad_norm": 15.771327018737793, "learning_rate": 6.080066229081664e-06, "loss": 1.2944, "step": 16000 }, { "epoch": 3.7842951750236518, "eval_runtime": 46.7062, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 3.97229260179494, "step": 16000 }, { "epoch": 3.796121097445601, "grad_norm": 15.549239158630371, "learning_rate": 6.020933120454144e-06, "loss": 1.3878, "step": 16050 }, { "epoch": 3.80794701986755, "grad_norm": 11.558497428894043, "learning_rate": 5.9618000118266215e-06, "loss": 1.2978, "step": 16100 }, { "epoch": 3.8197729422894984, "grad_norm": 17.571189880371094, "learning_rate": 5.902666903199101e-06, "loss": 1.3128, "step": 16150 }, { "epoch": 3.8315988647114474, "grad_norm": 16.608991622924805, "learning_rate": 5.843533794571581e-06, "loss": 1.317, "step": 16200 }, { "epoch": 3.8434247871333964, "grad_norm": 17.64645004272461, "learning_rate": 5.7844006859440605e-06, "loss": 1.3225, "step": 16250 }, { "epoch": 3.8552507095553454, "grad_norm": 16.340919494628906, "learning_rate": 5.72526757731654e-06, "loss": 1.3339, "step": 16300 }, { "epoch": 3.8670766319772945, "grad_norm": 17.24504280090332, "learning_rate": 5.66613446868902e-06, "loss": 1.3137, "step": 16350 }, { "epoch": 3.878902554399243, "grad_norm": 16.168750762939453, "learning_rate": 5.607001360061499e-06, "loss": 1.3128, "step": 16400 }, { "epoch": 3.890728476821192, "grad_norm": 20.071321487426758, "learning_rate": 5.547868251433979e-06, "loss": 1.3262, "step": 16450 }, { "epoch": 3.902554399243141, "grad_norm": 19.031503677368164, "learning_rate": 5.488735142806457e-06, "loss": 1.2235, "step": 16500 }, { "epoch": 3.9143803216650896, "grad_norm": 18.36022186279297, "learning_rate": 5.429602034178937e-06, "loss": 1.2619, "step": 16550 }, { "epoch": 3.9262062440870387, "grad_norm": 17.923831939697266, "learning_rate": 5.370468925551416e-06, "loss": 1.3289, "step": 16600 }, { "epoch": 3.9380321665089877, "grad_norm": 13.550859451293945, "learning_rate": 5.311335816923896e-06, "loss": 1.2437, "step": 16650 }, { "epoch": 3.9498580889309367, "grad_norm": 12.4674654006958, "learning_rate": 5.2522027082963755e-06, "loss": 1.2886, "step": 16700 }, { "epoch": 3.9616840113528857, "grad_norm": 18.66042709350586, "learning_rate": 5.193069599668855e-06, "loss": 1.3309, "step": 16750 }, { "epoch": 3.9735099337748343, "grad_norm": 15.646864891052246, "learning_rate": 5.133936491041335e-06, "loss": 1.3457, "step": 16800 }, { "epoch": 3.9853358561967833, "grad_norm": 17.00884246826172, "learning_rate": 5.0748033824138144e-06, "loss": 1.3184, "step": 16850 }, { "epoch": 3.9971617786187323, "grad_norm": 13.399270057678223, "learning_rate": 5.015670273786294e-06, "loss": 1.2087, "step": 16900 }, { "epoch": 4.008987701040681, "grad_norm": 13.706061363220215, "learning_rate": 4.956537165158773e-06, "loss": 1.229, "step": 16950 }, { "epoch": 4.02081362346263, "grad_norm": 22.725217819213867, "learning_rate": 4.8974040565312525e-06, "loss": 1.3235, "step": 17000 }, { "epoch": 4.032639545884579, "grad_norm": 16.072246551513672, "learning_rate": 4.838270947903731e-06, "loss": 1.195, "step": 17050 }, { "epoch": 4.0444654683065275, "grad_norm": 17.015745162963867, "learning_rate": 4.779137839276211e-06, "loss": 1.2793, "step": 17100 }, { "epoch": 4.056291390728477, "grad_norm": 15.687799453735352, "learning_rate": 4.7200047306486906e-06, "loss": 1.2719, "step": 17150 }, { "epoch": 4.068117313150426, "grad_norm": 11.79020881652832, "learning_rate": 4.66087162202117e-06, "loss": 1.2701, "step": 17200 }, { "epoch": 4.079943235572375, "grad_norm": 14.385472297668457, "learning_rate": 4.601738513393649e-06, "loss": 1.2593, "step": 17250 }, { "epoch": 4.091769157994324, "grad_norm": 18.47262954711914, "learning_rate": 4.542605404766129e-06, "loss": 1.2324, "step": 17300 }, { "epoch": 4.103595080416272, "grad_norm": 22.801834106445312, "learning_rate": 4.483472296138608e-06, "loss": 1.2338, "step": 17350 }, { "epoch": 4.115421002838222, "grad_norm": 16.11665916442871, "learning_rate": 4.424339187511088e-06, "loss": 1.2338, "step": 17400 }, { "epoch": 4.12724692526017, "grad_norm": 18.113365173339844, "learning_rate": 4.365206078883567e-06, "loss": 1.2595, "step": 17450 }, { "epoch": 4.139072847682119, "grad_norm": 15.56670093536377, "learning_rate": 4.306072970256046e-06, "loss": 1.2434, "step": 17500 }, { "epoch": 4.150898770104068, "grad_norm": 18.501914978027344, "learning_rate": 4.246939861628526e-06, "loss": 1.268, "step": 17550 }, { "epoch": 4.162724692526017, "grad_norm": 16.622150421142578, "learning_rate": 4.187806753001006e-06, "loss": 1.2345, "step": 17600 }, { "epoch": 4.174550614947966, "grad_norm": 19.019207000732422, "learning_rate": 4.128673644373484e-06, "loss": 1.228, "step": 17650 }, { "epoch": 4.186376537369915, "grad_norm": 11.55809211730957, "learning_rate": 4.069540535745964e-06, "loss": 1.235, "step": 17700 }, { "epoch": 4.1982024597918635, "grad_norm": 14.763603210449219, "learning_rate": 4.010407427118444e-06, "loss": 1.2198, "step": 17750 }, { "epoch": 4.210028382213813, "grad_norm": 17.480113983154297, "learning_rate": 3.951274318490923e-06, "loss": 1.263, "step": 17800 }, { "epoch": 4.2218543046357615, "grad_norm": 17.487497329711914, "learning_rate": 3.892141209863403e-06, "loss": 1.2288, "step": 17850 }, { "epoch": 4.23368022705771, "grad_norm": 14.157654762268066, "learning_rate": 3.833008101235882e-06, "loss": 1.2251, "step": 17900 }, { "epoch": 4.2455061494796595, "grad_norm": 21.731857299804688, "learning_rate": 3.773874992608362e-06, "loss": 1.2796, "step": 17950 }, { "epoch": 4.257332071901608, "grad_norm": 17.268417358398438, "learning_rate": 3.7147418839808415e-06, "loss": 1.2934, "step": 18000 }, { "epoch": 4.257332071901608, "eval_runtime": 47.1593, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 3.9484923051611727, "step": 18000 }, { "epoch": 4.269157994323558, "grad_norm": 12.740385055541992, "learning_rate": 3.6556087753533203e-06, "loss": 1.2197, "step": 18050 }, { "epoch": 4.280983916745506, "grad_norm": 17.239517211914062, "learning_rate": 3.5964756667258e-06, "loss": 1.1908, "step": 18100 }, { "epoch": 4.292809839167455, "grad_norm": 16.485107421875, "learning_rate": 3.5373425580982795e-06, "loss": 1.2549, "step": 18150 }, { "epoch": 4.304635761589404, "grad_norm": 17.04962921142578, "learning_rate": 3.478209449470759e-06, "loss": 1.3468, "step": 18200 }, { "epoch": 4.316461684011353, "grad_norm": 14.987895965576172, "learning_rate": 3.419076340843238e-06, "loss": 1.2323, "step": 18250 }, { "epoch": 4.328287606433301, "grad_norm": 14.840313911437988, "learning_rate": 3.3599432322157176e-06, "loss": 1.2897, "step": 18300 }, { "epoch": 4.340113528855251, "grad_norm": 17.09177589416504, "learning_rate": 3.3008101235881973e-06, "loss": 1.3231, "step": 18350 }, { "epoch": 4.351939451277199, "grad_norm": 16.76932716369629, "learning_rate": 3.241677014960677e-06, "loss": 1.3587, "step": 18400 }, { "epoch": 4.363765373699149, "grad_norm": 17.611955642700195, "learning_rate": 3.1825439063331565e-06, "loss": 1.2639, "step": 18450 }, { "epoch": 4.375591296121097, "grad_norm": 11.352503776550293, "learning_rate": 3.1234107977056353e-06, "loss": 1.2244, "step": 18500 }, { "epoch": 4.387417218543046, "grad_norm": 17.059810638427734, "learning_rate": 3.064277689078115e-06, "loss": 1.1932, "step": 18550 }, { "epoch": 4.3992431409649955, "grad_norm": 15.7676420211792, "learning_rate": 3.0051445804505946e-06, "loss": 1.2072, "step": 18600 }, { "epoch": 4.411069063386944, "grad_norm": 20.51708984375, "learning_rate": 2.9460114718230742e-06, "loss": 1.2257, "step": 18650 }, { "epoch": 4.4228949858088935, "grad_norm": 9.713994979858398, "learning_rate": 2.8868783631955535e-06, "loss": 1.2368, "step": 18700 }, { "epoch": 4.434720908230842, "grad_norm": 17.381057739257812, "learning_rate": 2.8277452545680327e-06, "loss": 1.2796, "step": 18750 }, { "epoch": 4.446546830652791, "grad_norm": 17.901290893554688, "learning_rate": 2.7686121459405123e-06, "loss": 1.1617, "step": 18800 }, { "epoch": 4.45837275307474, "grad_norm": 14.669180870056152, "learning_rate": 2.709479037312992e-06, "loss": 1.238, "step": 18850 }, { "epoch": 4.470198675496689, "grad_norm": 19.362512588500977, "learning_rate": 2.650345928685471e-06, "loss": 1.1765, "step": 18900 }, { "epoch": 4.482024597918637, "grad_norm": 16.083276748657227, "learning_rate": 2.591212820057951e-06, "loss": 1.2626, "step": 18950 }, { "epoch": 4.493850520340587, "grad_norm": 12.657955169677734, "learning_rate": 2.53207971143043e-06, "loss": 1.2582, "step": 19000 }, { "epoch": 4.505676442762535, "grad_norm": 19.395004272460938, "learning_rate": 2.4729466028029097e-06, "loss": 1.2478, "step": 19050 }, { "epoch": 4.517502365184484, "grad_norm": 19.803897857666016, "learning_rate": 2.413813494175389e-06, "loss": 1.1628, "step": 19100 }, { "epoch": 4.529328287606433, "grad_norm": 18.098979949951172, "learning_rate": 2.3546803855478685e-06, "loss": 1.251, "step": 19150 }, { "epoch": 4.541154210028382, "grad_norm": 20.26512908935547, "learning_rate": 2.295547276920348e-06, "loss": 1.208, "step": 19200 }, { "epoch": 4.552980132450331, "grad_norm": 11.94166088104248, "learning_rate": 2.2364141682928274e-06, "loss": 1.2535, "step": 19250 }, { "epoch": 4.56480605487228, "grad_norm": 15.473821640014648, "learning_rate": 2.177281059665307e-06, "loss": 1.1903, "step": 19300 }, { "epoch": 4.5766319772942285, "grad_norm": 14.091665267944336, "learning_rate": 2.1181479510377862e-06, "loss": 1.1725, "step": 19350 }, { "epoch": 4.588457899716178, "grad_norm": 15.09231948852539, "learning_rate": 2.059014842410266e-06, "loss": 1.2023, "step": 19400 }, { "epoch": 4.600283822138127, "grad_norm": 19.047542572021484, "learning_rate": 1.999881733782745e-06, "loss": 1.1607, "step": 19450 }, { "epoch": 4.612109744560076, "grad_norm": 15.40837574005127, "learning_rate": 1.9407486251552247e-06, "loss": 1.2483, "step": 19500 }, { "epoch": 4.623935666982025, "grad_norm": 16.487464904785156, "learning_rate": 1.881615516527704e-06, "loss": 1.1202, "step": 19550 }, { "epoch": 4.635761589403973, "grad_norm": 18.49724006652832, "learning_rate": 1.8224824079001836e-06, "loss": 1.2428, "step": 19600 }, { "epoch": 4.647587511825923, "grad_norm": 13.098505973815918, "learning_rate": 1.7633492992726628e-06, "loss": 1.2413, "step": 19650 }, { "epoch": 4.659413434247871, "grad_norm": 14.599630355834961, "learning_rate": 1.7042161906451424e-06, "loss": 1.1916, "step": 19700 }, { "epoch": 4.671239356669821, "grad_norm": 13.156811714172363, "learning_rate": 1.6450830820176216e-06, "loss": 1.1738, "step": 19750 }, { "epoch": 4.683065279091769, "grad_norm": 12.79720687866211, "learning_rate": 1.5859499733901013e-06, "loss": 1.1718, "step": 19800 }, { "epoch": 4.694891201513718, "grad_norm": 20.443012237548828, "learning_rate": 1.5268168647625805e-06, "loss": 1.2093, "step": 19850 }, { "epoch": 4.706717123935667, "grad_norm": 14.799368858337402, "learning_rate": 1.4676837561350601e-06, "loss": 1.1457, "step": 19900 }, { "epoch": 4.718543046357616, "grad_norm": 12.656880378723145, "learning_rate": 1.4085506475075394e-06, "loss": 1.1704, "step": 19950 }, { "epoch": 4.7303689687795645, "grad_norm": 17.24571418762207, "learning_rate": 1.349417538880019e-06, "loss": 1.1767, "step": 20000 }, { "epoch": 4.7303689687795645, "eval_runtime": 47.055, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 3.9691287893885456, "step": 20000 }, { "epoch": 4.742194891201514, "grad_norm": 15.378254890441895, "learning_rate": 1.2902844302524986e-06, "loss": 1.1956, "step": 20050 }, { "epoch": 4.7540208136234625, "grad_norm": 17.969493865966797, "learning_rate": 1.2311513216249778e-06, "loss": 1.1815, "step": 20100 }, { "epoch": 4.765846736045411, "grad_norm": 18.55719757080078, "learning_rate": 1.1720182129974573e-06, "loss": 1.1376, "step": 20150 }, { "epoch": 4.7776726584673606, "grad_norm": 17.548654556274414, "learning_rate": 1.1128851043699367e-06, "loss": 1.1839, "step": 20200 }, { "epoch": 4.789498580889309, "grad_norm": 21.243549346923828, "learning_rate": 1.0537519957424163e-06, "loss": 1.1722, "step": 20250 }, { "epoch": 4.801324503311259, "grad_norm": 12.436286926269531, "learning_rate": 9.946188871148958e-07, "loss": 1.2525, "step": 20300 }, { "epoch": 4.813150425733207, "grad_norm": 12.564568519592285, "learning_rate": 9.354857784873751e-07, "loss": 1.1445, "step": 20350 }, { "epoch": 4.824976348155156, "grad_norm": 19.906652450561523, "learning_rate": 8.763526698598545e-07, "loss": 1.1819, "step": 20400 }, { "epoch": 4.836802270577105, "grad_norm": 17.435781478881836, "learning_rate": 8.17219561232334e-07, "loss": 1.213, "step": 20450 }, { "epoch": 4.848628192999054, "grad_norm": 14.816115379333496, "learning_rate": 7.580864526048135e-07, "loss": 1.1366, "step": 20500 }, { "epoch": 4.860454115421003, "grad_norm": 14.993414878845215, "learning_rate": 6.989533439772929e-07, "loss": 1.2219, "step": 20550 }, { "epoch": 4.872280037842952, "grad_norm": 17.53949737548828, "learning_rate": 6.398202353497723e-07, "loss": 1.1501, "step": 20600 }, { "epoch": 4.8841059602649, "grad_norm": 14.761266708374023, "learning_rate": 5.806871267222519e-07, "loss": 1.1949, "step": 20650 }, { "epoch": 4.89593188268685, "grad_norm": 15.1113920211792, "learning_rate": 5.215540180947313e-07, "loss": 1.1757, "step": 20700 }, { "epoch": 4.907757805108798, "grad_norm": 17.890682220458984, "learning_rate": 4.624209094672107e-07, "loss": 1.1029, "step": 20750 }, { "epoch": 4.919583727530747, "grad_norm": 16.85039710998535, "learning_rate": 4.032878008396902e-07, "loss": 1.1426, "step": 20800 }, { "epoch": 4.9314096499526965, "grad_norm": 22.656349182128906, "learning_rate": 3.441546922121696e-07, "loss": 1.2021, "step": 20850 }, { "epoch": 4.943235572374645, "grad_norm": 19.370864868164062, "learning_rate": 2.8502158358464905e-07, "loss": 1.1034, "step": 20900 }, { "epoch": 4.955061494796594, "grad_norm": 13.943963050842285, "learning_rate": 2.258884749571285e-07, "loss": 1.2271, "step": 20950 }, { "epoch": 4.966887417218543, "grad_norm": 14.200597763061523, "learning_rate": 1.6675536632960799e-07, "loss": 1.2032, "step": 21000 }, { "epoch": 4.978713339640492, "grad_norm": 12.472103118896484, "learning_rate": 1.076222577020874e-07, "loss": 1.2036, "step": 21050 }, { "epoch": 4.990539262062441, "grad_norm": 15.718477249145508, "learning_rate": 4.848914907456685e-08, "loss": 1.1324, "step": 21100 } ], "logging_steps": 50, "max_steps": 21140, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }