| { | |
| "best_metric": 0.51153946, | |
| "best_model_checkpoint": "/export/home2/zli/kc/mm_rag/Qwen2.5-72B-Instruct_lora/checkpoint-513", | |
| "epoch": 0.9995129079396006, | |
| "eval_steps": 100, | |
| "global_step": 513, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001948368241597662, | |
| "grad_norm": 0.7911198735237122, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 1.1104155778884888, | |
| "memory(GiB)": 239.17, | |
| "step": 1, | |
| "token_acc": 0.7912871287128713, | |
| "train_speed(iter/s)": 0.00699 | |
| }, | |
| { | |
| "epoch": 0.00974184120798831, | |
| "grad_norm": 1.048244833946228, | |
| "learning_rate": 1.923076923076923e-05, | |
| "loss": 1.0879460573196411, | |
| "memory(GiB)": 257.56, | |
| "step": 5, | |
| "token_acc": 0.7876893738743511, | |
| "train_speed(iter/s)": 0.006513 | |
| }, | |
| { | |
| "epoch": 0.01948368241597662, | |
| "grad_norm": 0.8897583484649658, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 1.179165744781494, | |
| "memory(GiB)": 267.89, | |
| "step": 10, | |
| "token_acc": 0.7578167359479486, | |
| "train_speed(iter/s)": 0.006891 | |
| }, | |
| { | |
| "epoch": 0.029225523623964928, | |
| "grad_norm": 0.3992486298084259, | |
| "learning_rate": 5.769230769230769e-05, | |
| "loss": 0.866185188293457, | |
| "memory(GiB)": 267.89, | |
| "step": 15, | |
| "token_acc": 0.7692150449713818, | |
| "train_speed(iter/s)": 0.006796 | |
| }, | |
| { | |
| "epoch": 0.03896736483195324, | |
| "grad_norm": 0.50007164478302, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": 0.7950119972229004, | |
| "memory(GiB)": 267.89, | |
| "step": 20, | |
| "token_acc": 0.7725610358802016, | |
| "train_speed(iter/s)": 0.006865 | |
| }, | |
| { | |
| "epoch": 0.04870920603994155, | |
| "grad_norm": 0.2926434874534607, | |
| "learning_rate": 9.615384615384617e-05, | |
| "loss": 0.6839358806610107, | |
| "memory(GiB)": 267.89, | |
| "step": 25, | |
| "token_acc": 0.7947733918128655, | |
| "train_speed(iter/s)": 0.006974 | |
| }, | |
| { | |
| "epoch": 0.058451047247929856, | |
| "grad_norm": 0.4686521887779236, | |
| "learning_rate": 9.998335523311734e-05, | |
| "loss": 0.6574328422546387, | |
| "memory(GiB)": 267.89, | |
| "step": 30, | |
| "token_acc": 0.8028529939352524, | |
| "train_speed(iter/s)": 0.006948 | |
| }, | |
| { | |
| "epoch": 0.06819288845591817, | |
| "grad_norm": 0.2973114252090454, | |
| "learning_rate": 9.991575486016592e-05, | |
| "loss": 0.6306457042694091, | |
| "memory(GiB)": 267.89, | |
| "step": 35, | |
| "token_acc": 0.8055379603655778, | |
| "train_speed(iter/s)": 0.006928 | |
| }, | |
| { | |
| "epoch": 0.07793472966390648, | |
| "grad_norm": 0.2803875803947449, | |
| "learning_rate": 9.979622885143301e-05, | |
| "loss": 0.6483866691589355, | |
| "memory(GiB)": 267.89, | |
| "step": 40, | |
| "token_acc": 0.8065050756163248, | |
| "train_speed(iter/s)": 0.006967 | |
| }, | |
| { | |
| "epoch": 0.08767657087189479, | |
| "grad_norm": 0.486200213432312, | |
| "learning_rate": 9.962490154569727e-05, | |
| "loss": 0.6592767715454102, | |
| "memory(GiB)": 279.45, | |
| "step": 45, | |
| "token_acc": 0.7944840703756538, | |
| "train_speed(iter/s)": 0.006889 | |
| }, | |
| { | |
| "epoch": 0.0974184120798831, | |
| "grad_norm": 0.3190113604068756, | |
| "learning_rate": 9.940195116883755e-05, | |
| "loss": 0.6488125324249268, | |
| "memory(GiB)": 279.45, | |
| "step": 50, | |
| "token_acc": 0.7989409984871407, | |
| "train_speed(iter/s)": 0.006919 | |
| }, | |
| { | |
| "epoch": 0.1071602532878714, | |
| "grad_norm": 0.2820813059806824, | |
| "learning_rate": 9.91276096484306e-05, | |
| "loss": 0.6796814918518066, | |
| "memory(GiB)": 279.45, | |
| "step": 55, | |
| "token_acc": 0.7929543782192788, | |
| "train_speed(iter/s)": 0.006811 | |
| }, | |
| { | |
| "epoch": 0.11690209449585971, | |
| "grad_norm": 0.37464168667793274, | |
| "learning_rate": 9.880216237248481e-05, | |
| "loss": 0.7066696166992188, | |
| "memory(GiB)": 279.45, | |
| "step": 60, | |
| "token_acc": 0.7865912142720123, | |
| "train_speed(iter/s)": 0.006893 | |
| }, | |
| { | |
| "epoch": 0.12664393570384802, | |
| "grad_norm": 0.3829931318759918, | |
| "learning_rate": 9.842594789256103e-05, | |
| "loss": 0.5756209850311279, | |
| "memory(GiB)": 279.45, | |
| "step": 65, | |
| "token_acc": 0.8192581553237291, | |
| "train_speed(iter/s)": 0.0069 | |
| }, | |
| { | |
| "epoch": 0.13638577691183634, | |
| "grad_norm": 0.3420902490615845, | |
| "learning_rate": 9.799935757158891e-05, | |
| "loss": 0.7137881755828858, | |
| "memory(GiB)": 279.45, | |
| "step": 70, | |
| "token_acc": 0.7774079467443312, | |
| "train_speed(iter/s)": 0.006961 | |
| }, | |
| { | |
| "epoch": 0.14612761811982464, | |
| "grad_norm": 0.2829829454421997, | |
| "learning_rate": 9.752283517674575e-05, | |
| "loss": 0.5826292037963867, | |
| "memory(GiB)": 279.45, | |
| "step": 75, | |
| "token_acc": 0.8175895765472313, | |
| "train_speed(iter/s)": 0.006986 | |
| }, | |
| { | |
| "epoch": 0.15586945932781296, | |
| "grad_norm": 0.4237753748893738, | |
| "learning_rate": 9.699687641782067e-05, | |
| "loss": 0.5978005886077881, | |
| "memory(GiB)": 279.45, | |
| "step": 80, | |
| "token_acc": 0.8125563774129533, | |
| "train_speed(iter/s)": 0.007002 | |
| }, | |
| { | |
| "epoch": 0.16561130053580125, | |
| "grad_norm": 0.6381401419639587, | |
| "learning_rate": 9.642202843154491e-05, | |
| "loss": 0.6211316585540771, | |
| "memory(GiB)": 279.45, | |
| "step": 85, | |
| "token_acc": 0.8153757510906248, | |
| "train_speed(iter/s)": 0.007036 | |
| }, | |
| { | |
| "epoch": 0.17535314174378958, | |
| "grad_norm": 0.3631569743156433, | |
| "learning_rate": 9.579888921242439e-05, | |
| "loss": 0.6291302680969239, | |
| "memory(GiB)": 279.45, | |
| "step": 90, | |
| "token_acc": 0.8056357749190514, | |
| "train_speed(iter/s)": 0.00706 | |
| }, | |
| { | |
| "epoch": 0.1850949829517779, | |
| "grad_norm": 0.31778669357299805, | |
| "learning_rate": 9.512810699066667e-05, | |
| "loss": 0.6138756752014161, | |
| "memory(GiB)": 279.45, | |
| "step": 95, | |
| "token_acc": 0.8115504301665751, | |
| "train_speed(iter/s)": 0.007048 | |
| }, | |
| { | |
| "epoch": 0.1948368241597662, | |
| "grad_norm": 0.3409082889556885, | |
| "learning_rate": 9.441037955784944e-05, | |
| "loss": 0.640412425994873, | |
| "memory(GiB)": 279.45, | |
| "step": 100, | |
| "token_acc": 0.7977538518656023, | |
| "train_speed(iter/s)": 0.007066 | |
| }, | |
| { | |
| "epoch": 0.1948368241597662, | |
| "eval_loss": 0.6026075482368469, | |
| "eval_runtime": 26.8674, | |
| "eval_samples_per_second": 0.149, | |
| "eval_steps_per_second": 0.074, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20457866536775451, | |
| "grad_norm": 0.5213446021080017, | |
| "learning_rate": 9.364645354103206e-05, | |
| "loss": 0.6617852210998535, | |
| "memory(GiB)": 279.45, | |
| "step": 105, | |
| "token_acc": 0.7954326459818848, | |
| "train_speed(iter/s)": 0.007023 | |
| }, | |
| { | |
| "epoch": 0.2143205065757428, | |
| "grad_norm": 0.3881484270095825, | |
| "learning_rate": 9.28371236260652e-05, | |
| "loss": 0.6231249809265137, | |
| "memory(GiB)": 279.45, | |
| "step": 110, | |
| "token_acc": 0.8025908424311553, | |
| "train_speed(iter/s)": 0.007042 | |
| }, | |
| { | |
| "epoch": 0.22406234778373113, | |
| "grad_norm": 0.34954214096069336, | |
| "learning_rate": 9.198323173090663e-05, | |
| "loss": 0.6497172355651856, | |
| "memory(GiB)": 279.45, | |
| "step": 115, | |
| "token_acc": 0.794488631798827, | |
| "train_speed(iter/s)": 0.007052 | |
| }, | |
| { | |
| "epoch": 0.23380418899171942, | |
| "grad_norm": 0.3073936402797699, | |
| "learning_rate": 9.108566612980298e-05, | |
| "loss": 0.6538199424743653, | |
| "memory(GiB)": 279.45, | |
| "step": 120, | |
| "token_acc": 0.7920408356177332, | |
| "train_speed(iter/s)": 0.00707 | |
| }, | |
| { | |
| "epoch": 0.24354603019970775, | |
| "grad_norm": 0.3140193819999695, | |
| "learning_rate": 9.014536052924883e-05, | |
| "loss": 0.5965206623077393, | |
| "memory(GiB)": 279.45, | |
| "step": 125, | |
| "token_acc": 0.8178110129163834, | |
| "train_speed(iter/s)": 0.007102 | |
| }, | |
| { | |
| "epoch": 0.25328787140769604, | |
| "grad_norm": 0.9085149168968201, | |
| "learning_rate": 8.916329309668397e-05, | |
| "loss": 0.5718743324279785, | |
| "memory(GiB)": 279.45, | |
| "step": 130, | |
| "token_acc": 0.8312764304824373, | |
| "train_speed(iter/s)": 0.007101 | |
| }, | |
| { | |
| "epoch": 0.26302971261568436, | |
| "grad_norm": 0.4713858664035797, | |
| "learning_rate": 8.814048544293965e-05, | |
| "loss": 0.582437801361084, | |
| "memory(GiB)": 279.45, | |
| "step": 135, | |
| "token_acc": 0.8134561744569352, | |
| "train_speed(iter/s)": 0.007088 | |
| }, | |
| { | |
| "epoch": 0.2727715538236727, | |
| "grad_norm": 0.39269959926605225, | |
| "learning_rate": 8.707800155949217e-05, | |
| "loss": 0.5708662986755371, | |
| "memory(GiB)": 279.45, | |
| "step": 140, | |
| "token_acc": 0.8203411790110128, | |
| "train_speed(iter/s)": 0.007075 | |
| }, | |
| { | |
| "epoch": 0.282513395031661, | |
| "grad_norm": 0.38881248235702515, | |
| "learning_rate": 8.597694671162921e-05, | |
| "loss": 0.6288607597351075, | |
| "memory(GiB)": 279.45, | |
| "step": 145, | |
| "token_acc": 0.8012965630788228, | |
| "train_speed(iter/s)": 0.007075 | |
| }, | |
| { | |
| "epoch": 0.2922552362396493, | |
| "grad_norm": 1.00093412399292, | |
| "learning_rate": 8.483846628868055e-05, | |
| "loss": 0.6310084819793701, | |
| "memory(GiB)": 279.45, | |
| "step": 150, | |
| "token_acc": 0.8051169868746433, | |
| "train_speed(iter/s)": 0.007102 | |
| }, | |
| { | |
| "epoch": 0.3019970774476376, | |
| "grad_norm": 0.5331721901893616, | |
| "learning_rate": 8.366374461250916e-05, | |
| "loss": 0.5937428951263428, | |
| "memory(GiB)": 279.45, | |
| "step": 155, | |
| "token_acc": 0.8136619109381755, | |
| "train_speed(iter/s)": 0.007062 | |
| }, | |
| { | |
| "epoch": 0.3117389186556259, | |
| "grad_norm": 0.6358391642570496, | |
| "learning_rate": 8.245400370550198e-05, | |
| "loss": 0.6008272171020508, | |
| "memory(GiB)": 279.45, | |
| "step": 160, | |
| "token_acc": 0.8157670857358919, | |
| "train_speed(iter/s)": 0.007051 | |
| }, | |
| { | |
| "epoch": 0.32148075986361424, | |
| "grad_norm": 0.40399816632270813, | |
| "learning_rate": 8.121050201934235e-05, | |
| "loss": 0.6029816627502441, | |
| "memory(GiB)": 279.45, | |
| "step": 165, | |
| "token_acc": 0.8082507259437268, | |
| "train_speed(iter/s)": 0.007056 | |
| }, | |
| { | |
| "epoch": 0.3312226010716025, | |
| "grad_norm": 0.4126909673213959, | |
| "learning_rate": 7.993453312588607e-05, | |
| "loss": 0.6517924785614013, | |
| "memory(GiB)": 279.45, | |
| "step": 170, | |
| "token_acc": 0.7958906764876914, | |
| "train_speed(iter/s)": 0.007056 | |
| }, | |
| { | |
| "epoch": 0.34096444227959083, | |
| "grad_norm": 0.6618629097938538, | |
| "learning_rate": 7.862742437150336e-05, | |
| "loss": 0.6141845226287842, | |
| "memory(GiB)": 279.45, | |
| "step": 175, | |
| "token_acc": 0.8037235096058626, | |
| "train_speed(iter/s)": 0.007045 | |
| }, | |
| { | |
| "epoch": 0.35070628348757915, | |
| "grad_norm": 0.5881444215774536, | |
| "learning_rate": 7.729053549628622e-05, | |
| "loss": 0.6001346588134766, | |
| "memory(GiB)": 279.45, | |
| "step": 180, | |
| "token_acc": 0.8113048368953881, | |
| "train_speed(iter/s)": 0.007061 | |
| }, | |
| { | |
| "epoch": 0.3604481246955675, | |
| "grad_norm": 1.692840576171875, | |
| "learning_rate": 7.592525721955786e-05, | |
| "loss": 0.574149227142334, | |
| "memory(GiB)": 279.45, | |
| "step": 185, | |
| "token_acc": 0.8219587430113746, | |
| "train_speed(iter/s)": 0.007067 | |
| }, | |
| { | |
| "epoch": 0.3701899659035558, | |
| "grad_norm": 0.38788875937461853, | |
| "learning_rate": 7.45330097931553e-05, | |
| "loss": 0.6197400093078613, | |
| "memory(GiB)": 279.45, | |
| "step": 190, | |
| "token_acc": 0.8041193060422186, | |
| "train_speed(iter/s)": 0.007079 | |
| }, | |
| { | |
| "epoch": 0.37993180711154406, | |
| "grad_norm": 0.48055437207221985, | |
| "learning_rate": 7.311524152399054e-05, | |
| "loss": 0.6016897678375244, | |
| "memory(GiB)": 279.45, | |
| "step": 195, | |
| "token_acc": 0.8031721765717562, | |
| "train_speed(iter/s)": 0.007068 | |
| }, | |
| { | |
| "epoch": 0.3896736483195324, | |
| "grad_norm": 0.4017133116722107, | |
| "learning_rate": 7.167342726742685e-05, | |
| "loss": 0.5860675811767578, | |
| "memory(GiB)": 279.45, | |
| "step": 200, | |
| "token_acc": 0.8191920830629461, | |
| "train_speed(iter/s)": 0.007059 | |
| }, | |
| { | |
| "epoch": 0.3896736483195324, | |
| "eval_loss": 0.5398527979850769, | |
| "eval_runtime": 26.925, | |
| "eval_samples_per_second": 0.149, | |
| "eval_steps_per_second": 0.074, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3994154895275207, | |
| "grad_norm": 0.48819786310195923, | |
| "learning_rate": 7.020906689303766e-05, | |
| "loss": 0.60306715965271, | |
| "memory(GiB)": 279.45, | |
| "step": 205, | |
| "token_acc": 0.8102000173175167, | |
| "train_speed(iter/s)": 0.007055 | |
| }, | |
| { | |
| "epoch": 0.40915733073550903, | |
| "grad_norm": 0.5116552114486694, | |
| "learning_rate": 6.872368372434416e-05, | |
| "loss": 0.6070918083190918, | |
| "memory(GiB)": 279.45, | |
| "step": 210, | |
| "token_acc": 0.804848606780002, | |
| "train_speed(iter/s)": 0.007061 | |
| }, | |
| { | |
| "epoch": 0.4188991719434973, | |
| "grad_norm": 0.42464399337768555, | |
| "learning_rate": 6.721882295415425e-05, | |
| "loss": 0.633366060256958, | |
| "memory(GiB)": 279.45, | |
| "step": 215, | |
| "token_acc": 0.8031126254719587, | |
| "train_speed(iter/s)": 0.007047 | |
| }, | |
| { | |
| "epoch": 0.4286410131514856, | |
| "grad_norm": 0.43728524446487427, | |
| "learning_rate": 6.569605003715201e-05, | |
| "loss": 0.5457853317260742, | |
| "memory(GiB)": 279.45, | |
| "step": 220, | |
| "token_acc": 0.8268458881732474, | |
| "train_speed(iter/s)": 0.007041 | |
| }, | |
| { | |
| "epoch": 0.43838285435947394, | |
| "grad_norm": 0.526282548904419, | |
| "learning_rate": 6.41569490614092e-05, | |
| "loss": 0.5758168220520019, | |
| "memory(GiB)": 291.53, | |
| "step": 225, | |
| "token_acc": 0.8169302630500697, | |
| "train_speed(iter/s)": 0.007022 | |
| }, | |
| { | |
| "epoch": 0.44812469556746226, | |
| "grad_norm": 0.4335802495479584, | |
| "learning_rate": 6.260312110051312e-05, | |
| "loss": 0.5597887992858886, | |
| "memory(GiB)": 291.53, | |
| "step": 230, | |
| "token_acc": 0.8260340632603407, | |
| "train_speed(iter/s)": 0.007017 | |
| }, | |
| { | |
| "epoch": 0.4578665367754506, | |
| "grad_norm": 0.46290066838264465, | |
| "learning_rate": 6.103618254802511e-05, | |
| "loss": 0.5268624782562256, | |
| "memory(GiB)": 291.53, | |
| "step": 235, | |
| "token_acc": 0.8337082864641919, | |
| "train_speed(iter/s)": 0.007024 | |
| }, | |
| { | |
| "epoch": 0.46760837798343885, | |
| "grad_norm": 0.44755905866622925, | |
| "learning_rate": 5.945776343600207e-05, | |
| "loss": 0.6087980270385742, | |
| "memory(GiB)": 291.53, | |
| "step": 240, | |
| "token_acc": 0.8013279748383715, | |
| "train_speed(iter/s)": 0.007003 | |
| }, | |
| { | |
| "epoch": 0.47735021919142717, | |
| "grad_norm": 0.35961049795150757, | |
| "learning_rate": 5.7869505739330546e-05, | |
| "loss": 0.6320755958557129, | |
| "memory(GiB)": 291.53, | |
| "step": 245, | |
| "token_acc": 0.7990379032926931, | |
| "train_speed(iter/s)": 0.006988 | |
| }, | |
| { | |
| "epoch": 0.4870920603994155, | |
| "grad_norm": 0.43942877650260925, | |
| "learning_rate": 5.627306166763684e-05, | |
| "loss": 0.5911942958831787, | |
| "memory(GiB)": 291.53, | |
| "step": 250, | |
| "token_acc": 0.8108974358974359, | |
| "train_speed(iter/s)": 0.006984 | |
| }, | |
| { | |
| "epoch": 0.4968339016074038, | |
| "grad_norm": 0.3216777443885803, | |
| "learning_rate": 5.467009194655045e-05, | |
| "loss": 0.5380321025848389, | |
| "memory(GiB)": 291.53, | |
| "step": 255, | |
| "token_acc": 0.8256673332753673, | |
| "train_speed(iter/s)": 0.006954 | |
| }, | |
| { | |
| "epoch": 0.5065757428153921, | |
| "grad_norm": 0.3348521590232849, | |
| "learning_rate": 5.306226409010855e-05, | |
| "loss": 0.5896577358245849, | |
| "memory(GiB)": 291.53, | |
| "step": 260, | |
| "token_acc": 0.8136957057229658, | |
| "train_speed(iter/s)": 0.00696 | |
| }, | |
| { | |
| "epoch": 0.5163175840233805, | |
| "grad_norm": 0.4316939115524292, | |
| "learning_rate": 5.145125066609877e-05, | |
| "loss": 0.5456296920776367, | |
| "memory(GiB)": 291.53, | |
| "step": 265, | |
| "token_acc": 0.8252879627187197, | |
| "train_speed(iter/s)": 0.006968 | |
| }, | |
| { | |
| "epoch": 0.5260594252313687, | |
| "grad_norm": 0.4729405343532562, | |
| "learning_rate": 4.983872755614461e-05, | |
| "loss": 0.5579260349273681, | |
| "memory(GiB)": 291.53, | |
| "step": 270, | |
| "token_acc": 0.8236703843672228, | |
| "train_speed(iter/s)": 0.006963 | |
| }, | |
| { | |
| "epoch": 0.535801266439357, | |
| "grad_norm": 0.4944189786911011, | |
| "learning_rate": 4.8226372212343726e-05, | |
| "loss": 0.5667993545532226, | |
| "memory(GiB)": 291.53, | |
| "step": 275, | |
| "token_acc": 0.8146758992113868, | |
| "train_speed(iter/s)": 0.006964 | |
| }, | |
| { | |
| "epoch": 0.5455431076473454, | |
| "grad_norm": 0.8255349397659302, | |
| "learning_rate": 4.661586191227247e-05, | |
| "loss": 0.5635212898254395, | |
| "memory(GiB)": 291.53, | |
| "step": 280, | |
| "token_acc": 0.8212789502806103, | |
| "train_speed(iter/s)": 0.006981 | |
| }, | |
| { | |
| "epoch": 0.5552849488553336, | |
| "grad_norm": 0.4205448627471924, | |
| "learning_rate": 4.500887201417187e-05, | |
| "loss": 0.6006342887878418, | |
| "memory(GiB)": 291.53, | |
| "step": 285, | |
| "token_acc": 0.8099815913274698, | |
| "train_speed(iter/s)": 0.007001 | |
| }, | |
| { | |
| "epoch": 0.565026790063322, | |
| "grad_norm": 0.39303454756736755, | |
| "learning_rate": 4.3407074214130446e-05, | |
| "loss": 0.5548898220062256, | |
| "memory(GiB)": 291.53, | |
| "step": 290, | |
| "token_acc": 0.8264817150063052, | |
| "train_speed(iter/s)": 0.007007 | |
| }, | |
| { | |
| "epoch": 0.5747686312713103, | |
| "grad_norm": 0.5799729824066162, | |
| "learning_rate": 4.181213480707637e-05, | |
| "loss": 0.5500263214111328, | |
| "memory(GiB)": 291.53, | |
| "step": 295, | |
| "token_acc": 0.8245773863751392, | |
| "train_speed(iter/s)": 0.007016 | |
| }, | |
| { | |
| "epoch": 0.5845104724792985, | |
| "grad_norm": 0.5087621212005615, | |
| "learning_rate": 4.0225712953388494e-05, | |
| "loss": 0.5443117618560791, | |
| "memory(GiB)": 291.53, | |
| "step": 300, | |
| "token_acc": 0.8282135401133314, | |
| "train_speed(iter/s)": 0.007015 | |
| }, | |
| { | |
| "epoch": 0.5845104724792985, | |
| "eval_loss": 0.5304377675056458, | |
| "eval_runtime": 26.9276, | |
| "eval_samples_per_second": 0.149, | |
| "eval_steps_per_second": 0.074, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5942523136872869, | |
| "grad_norm": 0.5076440572738647, | |
| "learning_rate": 3.864945895292908e-05, | |
| "loss": 0.6255448818206787, | |
| "memory(GiB)": 291.53, | |
| "step": 305, | |
| "token_acc": 0.8085856491082538, | |
| "train_speed(iter/s)": 0.007003 | |
| }, | |
| { | |
| "epoch": 0.6039941548952752, | |
| "grad_norm": 0.5984292030334473, | |
| "learning_rate": 3.708501252829386e-05, | |
| "loss": 0.5672544956207275, | |
| "memory(GiB)": 291.53, | |
| "step": 310, | |
| "token_acc": 0.8221675325859084, | |
| "train_speed(iter/s)": 0.007004 | |
| }, | |
| { | |
| "epoch": 0.6137359961032636, | |
| "grad_norm": 0.4321894645690918, | |
| "learning_rate": 3.553400111906523e-05, | |
| "loss": 0.5947960376739502, | |
| "memory(GiB)": 291.53, | |
| "step": 315, | |
| "token_acc": 0.8143758155719878, | |
| "train_speed(iter/s)": 0.007012 | |
| }, | |
| { | |
| "epoch": 0.6234778373112518, | |
| "grad_norm": 0.7790753245353699, | |
| "learning_rate": 3.399803818884311e-05, | |
| "loss": 0.5950922489166259, | |
| "memory(GiB)": 291.53, | |
| "step": 320, | |
| "token_acc": 0.8138864880437914, | |
| "train_speed(iter/s)": 0.007019 | |
| }, | |
| { | |
| "epoch": 0.6332196785192401, | |
| "grad_norm": 0.37229499220848083, | |
| "learning_rate": 3.247872154681439e-05, | |
| "loss": 0.5663117408752442, | |
| "memory(GiB)": 291.53, | |
| "step": 325, | |
| "token_acc": 0.8152720218675602, | |
| "train_speed(iter/s)": 0.007009 | |
| }, | |
| { | |
| "epoch": 0.6429615197272285, | |
| "grad_norm": 0.5171226859092712, | |
| "learning_rate": 3.097763168560741e-05, | |
| "loss": 0.5201399803161622, | |
| "memory(GiB)": 291.53, | |
| "step": 330, | |
| "token_acc": 0.8316269396896496, | |
| "train_speed(iter/s)": 0.007019 | |
| }, | |
| { | |
| "epoch": 0.6527033609352167, | |
| "grad_norm": 0.579806923866272, | |
| "learning_rate": 2.949633013715982e-05, | |
| "loss": 0.576199722290039, | |
| "memory(GiB)": 291.53, | |
| "step": 335, | |
| "token_acc": 0.8226395939086294, | |
| "train_speed(iter/s)": 0.007018 | |
| }, | |
| { | |
| "epoch": 0.662445202143205, | |
| "grad_norm": 0.5151726007461548, | |
| "learning_rate": 2.8036357848311012e-05, | |
| "loss": 0.5100949287414551, | |
| "memory(GiB)": 291.53, | |
| "step": 340, | |
| "token_acc": 0.835439610037913, | |
| "train_speed(iter/s)": 0.007019 | |
| }, | |
| { | |
| "epoch": 0.6721870433511934, | |
| "grad_norm": 0.6563583612442017, | |
| "learning_rate": 2.659923357780828e-05, | |
| "loss": 0.5569330215454101, | |
| "memory(GiB)": 291.53, | |
| "step": 345, | |
| "token_acc": 0.8189111231296197, | |
| "train_speed(iter/s)": 0.007026 | |
| }, | |
| { | |
| "epoch": 0.6819288845591817, | |
| "grad_norm": 0.4978316128253937, | |
| "learning_rate": 2.518645231639457e-05, | |
| "loss": 0.5415875434875488, | |
| "memory(GiB)": 291.53, | |
| "step": 350, | |
| "token_acc": 0.8224528817263043, | |
| "train_speed(iter/s)": 0.007018 | |
| }, | |
| { | |
| "epoch": 0.69167072576717, | |
| "grad_norm": 0.3655760586261749, | |
| "learning_rate": 2.3799483731621237e-05, | |
| "loss": 0.5323990821838379, | |
| "memory(GiB)": 291.53, | |
| "step": 355, | |
| "token_acc": 0.8286348880915769, | |
| "train_speed(iter/s)": 0.007019 | |
| }, | |
| { | |
| "epoch": 0.7014125669751583, | |
| "grad_norm": 0.5881947875022888, | |
| "learning_rate": 2.2439770639003627e-05, | |
| "loss": 0.5367710590362549, | |
| "memory(GiB)": 291.53, | |
| "step": 360, | |
| "token_acc": 0.8259178446067735, | |
| "train_speed(iter/s)": 0.007019 | |
| }, | |
| { | |
| "epoch": 0.7111544081831466, | |
| "grad_norm": 0.4462352991104126, | |
| "learning_rate": 2.110872750110996e-05, | |
| "loss": 0.6012316703796386, | |
| "memory(GiB)": 291.53, | |
| "step": 365, | |
| "token_acc": 0.8113236655353286, | |
| "train_speed(iter/s)": 0.007025 | |
| }, | |
| { | |
| "epoch": 0.720896249391135, | |
| "grad_norm": 0.5265180468559265, | |
| "learning_rate": 1.980773895614481e-05, | |
| "loss": 0.5925416946411133, | |
| "memory(GiB)": 291.53, | |
| "step": 370, | |
| "token_acc": 0.8087358252834943, | |
| "train_speed(iter/s)": 0.007037 | |
| }, | |
| { | |
| "epoch": 0.7306380905991232, | |
| "grad_norm": 0.5589653253555298, | |
| "learning_rate": 1.8538158377557702e-05, | |
| "loss": 0.554245138168335, | |
| "memory(GiB)": 291.53, | |
| "step": 375, | |
| "token_acc": 0.8206760127289928, | |
| "train_speed(iter/s)": 0.007042 | |
| }, | |
| { | |
| "epoch": 0.7403799318071116, | |
| "grad_norm": 0.49020129442214966, | |
| "learning_rate": 1.7301306466175533e-05, | |
| "loss": 0.5485538482666016, | |
| "memory(GiB)": 291.53, | |
| "step": 380, | |
| "token_acc": 0.8174893357708715, | |
| "train_speed(iter/s)": 0.007043 | |
| }, | |
| { | |
| "epoch": 0.7501217730150999, | |
| "grad_norm": 0.8308594226837158, | |
| "learning_rate": 1.6098469876323093e-05, | |
| "loss": 0.5580153465270996, | |
| "memory(GiB)": 291.53, | |
| "step": 385, | |
| "token_acc": 0.8252497729336966, | |
| "train_speed(iter/s)": 0.007044 | |
| }, | |
| { | |
| "epoch": 0.7598636142230881, | |
| "grad_norm": 0.9425000548362732, | |
| "learning_rate": 1.4930899877361015e-05, | |
| "loss": 0.5678109169006348, | |
| "memory(GiB)": 291.53, | |
| "step": 390, | |
| "token_acc": 0.8189367271414387, | |
| "train_speed(iter/s)": 0.007047 | |
| }, | |
| { | |
| "epoch": 0.7696054554310765, | |
| "grad_norm": 0.46440383791923523, | |
| "learning_rate": 1.3799811052033467e-05, | |
| "loss": 0.5055109024047851, | |
| "memory(GiB)": 291.53, | |
| "step": 395, | |
| "token_acc": 0.835208145167912, | |
| "train_speed(iter/s)": 0.007044 | |
| }, | |
| { | |
| "epoch": 0.7793472966390648, | |
| "grad_norm": 0.6887357831001282, | |
| "learning_rate": 1.2706380032979691e-05, | |
| "loss": 0.6191585540771485, | |
| "memory(GiB)": 291.53, | |
| "step": 400, | |
| "token_acc": 0.801925584194163, | |
| "train_speed(iter/s)": 0.007049 | |
| }, | |
| { | |
| "epoch": 0.7793472966390648, | |
| "eval_loss": 0.5201972723007202, | |
| "eval_runtime": 26.7707, | |
| "eval_samples_per_second": 0.149, | |
| "eval_steps_per_second": 0.075, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7890891378470531, | |
| "grad_norm": 0.6512044072151184, | |
| "learning_rate": 1.1651744278723687e-05, | |
| "loss": 0.5115838527679444, | |
| "memory(GiB)": 291.53, | |
| "step": 405, | |
| "token_acc": 0.8362541073384447, | |
| "train_speed(iter/s)": 0.007043 | |
| }, | |
| { | |
| "epoch": 0.7988309790550414, | |
| "grad_norm": 0.36380794644355774, | |
| "learning_rate": 1.0637000890415388e-05, | |
| "loss": 0.5678341865539551, | |
| "memory(GiB)": 291.53, | |
| "step": 410, | |
| "token_acc": 0.8218965517241379, | |
| "train_speed(iter/s)": 0.007056 | |
| }, | |
| { | |
| "epoch": 0.8085728202630297, | |
| "grad_norm": 0.524336576461792, | |
| "learning_rate": 9.663205470554276e-06, | |
| "loss": 0.5683213233947754, | |
| "memory(GiB)": 291.54, | |
| "step": 415, | |
| "token_acc": 0.8144156614593633, | |
| "train_speed(iter/s)": 0.007057 | |
| }, | |
| { | |
| "epoch": 0.8183146614710181, | |
| "grad_norm": 0.510553777217865, | |
| "learning_rate": 8.73137102488249e-06, | |
| "loss": 0.5425020217895508, | |
| "memory(GiB)": 291.54, | |
| "step": 420, | |
| "token_acc": 0.8262775083923909, | |
| "train_speed(iter/s)": 0.007067 | |
| }, | |
| { | |
| "epoch": 0.8280565026790063, | |
| "grad_norm": 0.4342099726200104, | |
| "learning_rate": 7.842466908590006e-06, | |
| "loss": 0.4687312602996826, | |
| "memory(GiB)": 291.54, | |
| "step": 425, | |
| "token_acc": 0.8489349775784754, | |
| "train_speed(iter/s)": 0.007065 | |
| }, | |
| { | |
| "epoch": 0.8377983438869946, | |
| "grad_norm": 0.47141674160957336, | |
| "learning_rate": 6.997417817927865e-06, | |
| "loss": 0.5220559597015381, | |
| "memory(GiB)": 291.54, | |
| "step": 430, | |
| "token_acc": 0.8345159448658348, | |
| "train_speed(iter/s)": 0.007065 | |
| }, | |
| { | |
| "epoch": 0.847540185094983, | |
| "grad_norm": 0.7017420530319214, | |
| "learning_rate": 6.197102828278611e-06, | |
| "loss": 0.4900541305541992, | |
| "memory(GiB)": 291.54, | |
| "step": 435, | |
| "token_acc": 0.846220772158782, | |
| "train_speed(iter/s)": 0.007061 | |
| }, | |
| { | |
| "epoch": 0.8572820263029712, | |
| "grad_norm": 0.9358065724372864, | |
| "learning_rate": 5.442354479684558e-06, | |
| "loss": 0.5937717914581299, | |
| "memory(GiB)": 291.54, | |
| "step": 440, | |
| "token_acc": 0.8115085230071928, | |
| "train_speed(iter/s)": 0.007068 | |
| }, | |
| { | |
| "epoch": 0.8670238675109596, | |
| "grad_norm": 0.6291302442550659, | |
| "learning_rate": 4.733957910785114e-06, | |
| "loss": 0.5440679073333741, | |
| "memory(GiB)": 291.54, | |
| "step": 445, | |
| "token_acc": 0.8238650458943191, | |
| "train_speed(iter/s)": 0.007071 | |
| }, | |
| { | |
| "epoch": 0.8767657087189479, | |
| "grad_norm": 0.7625430822372437, | |
| "learning_rate": 4.072650042064174e-06, | |
| "loss": 0.5928035736083984, | |
| "memory(GiB)": 291.54, | |
| "step": 450, | |
| "token_acc": 0.8072507552870091, | |
| "train_speed(iter/s)": 0.007064 | |
| }, | |
| { | |
| "epoch": 0.8865075499269361, | |
| "grad_norm": 0.5580248832702637, | |
| "learning_rate": 3.4591188092571893e-06, | |
| "loss": 0.5281401634216308, | |
| "memory(GiB)": 291.54, | |
| "step": 455, | |
| "token_acc": 0.8284527518172378, | |
| "train_speed(iter/s)": 0.007059 | |
| }, | |
| { | |
| "epoch": 0.8962493911349245, | |
| "grad_norm": 0.6311898827552795, | |
| "learning_rate": 2.894002447715399e-06, | |
| "loss": 0.483402156829834, | |
| "memory(GiB)": 291.54, | |
| "step": 460, | |
| "token_acc": 0.846503178928247, | |
| "train_speed(iter/s)": 0.007059 | |
| }, | |
| { | |
| "epoch": 0.9059912323429128, | |
| "grad_norm": 0.47069641947746277, | |
| "learning_rate": 2.3778888284716193e-06, | |
| "loss": 0.5357548236846924, | |
| "memory(GiB)": 291.54, | |
| "step": 465, | |
| "token_acc": 0.8302510497632449, | |
| "train_speed(iter/s)": 0.007062 | |
| }, | |
| { | |
| "epoch": 0.9157330735509012, | |
| "grad_norm": 0.6504586935043335, | |
| "learning_rate": 1.9113148466983254e-06, | |
| "loss": 0.4945201873779297, | |
| "memory(GiB)": 291.54, | |
| "step": 470, | |
| "token_acc": 0.836600057366861, | |
| "train_speed(iter/s)": 0.007062 | |
| }, | |
| { | |
| "epoch": 0.9254749147588894, | |
| "grad_norm": 0.8726986646652222, | |
| "learning_rate": 1.4947658631941309e-06, | |
| "loss": 0.6114956855773925, | |
| "memory(GiB)": 291.54, | |
| "step": 475, | |
| "token_acc": 0.8079200232941861, | |
| "train_speed(iter/s)": 0.007069 | |
| }, | |
| { | |
| "epoch": 0.9352167559668777, | |
| "grad_norm": 0.4940550625324249, | |
| "learning_rate": 1.1286751994797284e-06, | |
| "loss": 0.4838570117950439, | |
| "memory(GiB)": 291.54, | |
| "step": 480, | |
| "token_acc": 0.8428108691740147, | |
| "train_speed(iter/s)": 0.007071 | |
| }, | |
| { | |
| "epoch": 0.9449585971748661, | |
| "grad_norm": 0.47085583209991455, | |
| "learning_rate": 8.134236870284861e-07, | |
| "loss": 0.5545851230621338, | |
| "memory(GiB)": 291.54, | |
| "step": 485, | |
| "token_acc": 0.8195587462791105, | |
| "train_speed(iter/s)": 0.007076 | |
| }, | |
| { | |
| "epoch": 0.9547004383828543, | |
| "grad_norm": 0.47154033184051514, | |
| "learning_rate": 5.493392711005796e-07, | |
| "loss": 0.5688926696777343, | |
| "memory(GiB)": 291.54, | |
| "step": 490, | |
| "token_acc": 0.8178977272727272, | |
| "train_speed(iter/s)": 0.007077 | |
| }, | |
| { | |
| "epoch": 0.9644422795908427, | |
| "grad_norm": 0.49407562613487244, | |
| "learning_rate": 3.366966695929119e-07, | |
| "loss": 0.5036429405212403, | |
| "memory(GiB)": 291.54, | |
| "step": 495, | |
| "token_acc": 0.8390853451207072, | |
| "train_speed(iter/s)": 0.007075 | |
| }, | |
| { | |
| "epoch": 0.974184120798831, | |
| "grad_norm": 0.5743114352226257, | |
| "learning_rate": 1.7571708725953596e-07, | |
| "loss": 0.514036750793457, | |
| "memory(GiB)": 291.54, | |
| "step": 500, | |
| "token_acc": 0.8316027940343591, | |
| "train_speed(iter/s)": 0.007081 | |
| }, | |
| { | |
| "epoch": 0.974184120798831, | |
| "eval_loss": 0.5129119753837585, | |
| "eval_runtime": 26.8989, | |
| "eval_samples_per_second": 0.149, | |
| "eval_steps_per_second": 0.074, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9839259620068193, | |
| "grad_norm": 0.5348408818244934, | |
| "learning_rate": 6.656798560001343e-08, | |
| "loss": 0.5319667816162109, | |
| "memory(GiB)": 291.54, | |
| "step": 505, | |
| "token_acc": 0.8256676557863502, | |
| "train_speed(iter/s)": 0.007071 | |
| }, | |
| { | |
| "epoch": 0.9936678032148076, | |
| "grad_norm": 0.5291311144828796, | |
| "learning_rate": 9.362908654986235e-09, | |
| "loss": 0.5844589233398437, | |
| "memory(GiB)": 291.54, | |
| "step": 510, | |
| "token_acc": 0.8063741960257271, | |
| "train_speed(iter/s)": 0.007071 | |
| }, | |
| { | |
| "epoch": 0.9995129079396006, | |
| "eval_loss": 0.5115394592285156, | |
| "eval_runtime": 26.9687, | |
| "eval_samples_per_second": 0.148, | |
| "eval_steps_per_second": 0.074, | |
| "step": 513 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 513, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.13335087128363e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |