| { |
| "best_metric": 0.7681954999183362, |
| "best_model_checkpoint": "/p/scratch/ccstdl/krishna/finetuned-cosine-loss/checkpoint-630", |
| "epoch": 0.2564102564102564, |
| "eval_steps": 10, |
| "global_step": 630, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00407000407000407, |
| "grad_norm": 1.8248943090438843, |
| "learning_rate": 3.391670058336725e-09, |
| "loss": 0.7718, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00407000407000407, |
| "eval_cos_sim": 0.2161455750465393, |
| "eval_loss": 0.7850055160735792, |
| "eval_runtime": 91.1558, |
| "eval_samples_per_second": 10.97, |
| "eval_steps_per_second": 0.351, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00814000814000814, |
| "grad_norm": 1.5165057182312012, |
| "learning_rate": 6.78334011667345e-09, |
| "loss": 0.7694, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00814000814000814, |
| "eval_cos_sim": 0.21615812182426453, |
| "eval_loss": 0.7849929766868299, |
| "eval_runtime": 88.5893, |
| "eval_samples_per_second": 11.288, |
| "eval_steps_per_second": 0.361, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01221001221001221, |
| "grad_norm": 1.8078064918518066, |
| "learning_rate": 1.0175010175010176e-08, |
| "loss": 0.7875, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01221001221001221, |
| "eval_cos_sim": 0.21617917716503143, |
| "eval_loss": 0.7849719367240614, |
| "eval_runtime": 88.6143, |
| "eval_samples_per_second": 11.285, |
| "eval_steps_per_second": 0.361, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01628001628001628, |
| "grad_norm": 1.8216888904571533, |
| "learning_rate": 1.35666802333469e-08, |
| "loss": 0.7742, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01628001628001628, |
| "eval_cos_sim": 0.21620890498161316, |
| "eval_loss": 0.7849422373984999, |
| "eval_runtime": 88.4565, |
| "eval_samples_per_second": 11.305, |
| "eval_steps_per_second": 0.362, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02035002035002035, |
| "grad_norm": 1.7948490381240845, |
| "learning_rate": 1.6958350291683625e-08, |
| "loss": 0.7683, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02035002035002035, |
| "eval_cos_sim": 0.2162467986345291, |
| "eval_loss": 0.7849043712829298, |
| "eval_runtime": 88.6129, |
| "eval_samples_per_second": 11.285, |
| "eval_steps_per_second": 0.361, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02442002442002442, |
| "grad_norm": 1.5213645696640015, |
| "learning_rate": 2.035002035002035e-08, |
| "loss": 0.7773, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02442002442002442, |
| "eval_cos_sim": 0.21629446744918823, |
| "eval_loss": 0.7848567362044996, |
| "eval_runtime": 88.6351, |
| "eval_samples_per_second": 11.282, |
| "eval_steps_per_second": 0.361, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02849002849002849, |
| "grad_norm": 1.816196322441101, |
| "learning_rate": 2.3741690408357078e-08, |
| "loss": 0.7677, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02849002849002849, |
| "eval_cos_sim": 0.21635092794895172, |
| "eval_loss": 0.7848003177856153, |
| "eval_runtime": 88.6682, |
| "eval_samples_per_second": 11.278, |
| "eval_steps_per_second": 0.361, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03256003256003256, |
| "grad_norm": 1.8195900917053223, |
| "learning_rate": 2.71333604666938e-08, |
| "loss": 0.78, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03256003256003256, |
| "eval_cos_sim": 0.21641571819782257, |
| "eval_loss": 0.7847355809425062, |
| "eval_runtime": 88.4895, |
| "eval_samples_per_second": 11.301, |
| "eval_steps_per_second": 0.362, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03663003663003663, |
| "grad_norm": 1.7980706691741943, |
| "learning_rate": 3.052503052503053e-08, |
| "loss": 0.7655, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03663003663003663, |
| "eval_cos_sim": 0.2164890468120575, |
| "eval_loss": 0.7846623053764051, |
| "eval_runtime": 88.0594, |
| "eval_samples_per_second": 11.356, |
| "eval_steps_per_second": 0.363, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0407000407000407, |
| "grad_norm": 1.8171244859695435, |
| "learning_rate": 3.391670058336725e-08, |
| "loss": 0.7747, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0407000407000407, |
| "eval_cos_sim": 0.2165709137916565, |
| "eval_loss": 0.7845804929946607, |
| "eval_runtime": 88.4211, |
| "eval_samples_per_second": 11.31, |
| "eval_steps_per_second": 0.362, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04477004477004477, |
| "grad_norm": 1.529422640800476, |
| "learning_rate": 3.730837064170397e-08, |
| "loss": 0.776, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04477004477004477, |
| "eval_cos_sim": 0.2166617214679718, |
| "eval_loss": 0.7844897389625257, |
| "eval_runtime": 88.6911, |
| "eval_samples_per_second": 11.275, |
| "eval_steps_per_second": 0.361, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04884004884004884, |
| "grad_norm": 1.828736424446106, |
| "learning_rate": 4.07000407000407e-08, |
| "loss": 0.7757, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04884004884004884, |
| "eval_cos_sim": 0.216759592294693, |
| "eval_loss": 0.7843919339393324, |
| "eval_runtime": 89.0261, |
| "eval_samples_per_second": 11.233, |
| "eval_steps_per_second": 0.359, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05291005291005291, |
| "grad_norm": 1.5348294973373413, |
| "learning_rate": 4.4091710758377425e-08, |
| "loss": 0.7829, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05291005291005291, |
| "eval_cos_sim": 0.21686479449272156, |
| "eval_loss": 0.7842868261550612, |
| "eval_runtime": 88.7305, |
| "eval_samples_per_second": 11.27, |
| "eval_steps_per_second": 0.361, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05698005698005698, |
| "grad_norm": 1.8242194652557373, |
| "learning_rate": 4.7483380816714155e-08, |
| "loss": 0.7794, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05698005698005698, |
| "eval_cos_sim": 0.21697954833507538, |
| "eval_loss": 0.7841721568320936, |
| "eval_runtime": 88.4802, |
| "eval_samples_per_second": 11.302, |
| "eval_steps_per_second": 0.362, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06105006105006105, |
| "grad_norm": 1.4458454847335815, |
| "learning_rate": 5.087505087505087e-08, |
| "loss": 0.7751, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06105006105006105, |
| "eval_cos_sim": 0.21710529923439026, |
| "eval_loss": 0.7840465169166273, |
| "eval_runtime": 89.0281, |
| "eval_samples_per_second": 11.232, |
| "eval_steps_per_second": 0.359, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06512006512006512, |
| "grad_norm": 1.8203269243240356, |
| "learning_rate": 5.42667209333876e-08, |
| "loss": 0.7681, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06512006512006512, |
| "eval_cos_sim": 0.21723878383636475, |
| "eval_loss": 0.7839131169532484, |
| "eval_runtime": 88.2381, |
| "eval_samples_per_second": 11.333, |
| "eval_steps_per_second": 0.363, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06919006919006919, |
| "grad_norm": 1.5368891954421997, |
| "learning_rate": 5.7658390991724324e-08, |
| "loss": 0.7704, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06919006919006919, |
| "eval_cos_sim": 0.21737951040267944, |
| "eval_loss": 0.783772478601808, |
| "eval_runtime": 88.519, |
| "eval_samples_per_second": 11.297, |
| "eval_steps_per_second": 0.362, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07326007326007326, |
| "grad_norm": 1.5333787202835083, |
| "learning_rate": 6.105006105006105e-08, |
| "loss": 0.7713, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07326007326007326, |
| "eval_cos_sim": 0.21752700209617615, |
| "eval_loss": 0.7836251006339735, |
| "eval_runtime": 88.1346, |
| "eval_samples_per_second": 11.346, |
| "eval_steps_per_second": 0.363, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07733007733007732, |
| "grad_norm": 1.8334540128707886, |
| "learning_rate": 6.444173110839778e-08, |
| "loss": 0.7818, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07733007733007732, |
| "eval_cos_sim": 0.2176828682422638, |
| "eval_loss": 0.7834693560813611, |
| "eval_runtime": 88.9414, |
| "eval_samples_per_second": 11.243, |
| "eval_steps_per_second": 0.36, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0814000814000814, |
| "grad_norm": 1.7938237190246582, |
| "learning_rate": 6.78334011667345e-08, |
| "loss": 0.7756, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0814000814000814, |
| "eval_cos_sim": 0.21784724295139313, |
| "eval_loss": 0.7833051133369154, |
| "eval_runtime": 88.7619, |
| "eval_samples_per_second": 11.266, |
| "eval_steps_per_second": 0.361, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "grad_norm": 1.8335057497024536, |
| "learning_rate": 7.122507122507124e-08, |
| "loss": 0.7742, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "eval_cos_sim": 0.21802641451358795, |
| "eval_loss": 0.7831260886405653, |
| "eval_runtime": 88.6351, |
| "eval_samples_per_second": 11.282, |
| "eval_steps_per_second": 0.361, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08954008954008955, |
| "grad_norm": 1.7981688976287842, |
| "learning_rate": 7.461674128340795e-08, |
| "loss": 0.7744, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08954008954008955, |
| "eval_cos_sim": 0.21821342408657074, |
| "eval_loss": 0.7829392380927748, |
| "eval_runtime": 88.6647, |
| "eval_samples_per_second": 11.278, |
| "eval_steps_per_second": 0.361, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0936100936100936, |
| "grad_norm": 1.658451795578003, |
| "learning_rate": 7.800841134174468e-08, |
| "loss": 0.7703, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0936100936100936, |
| "eval_cos_sim": 0.21841329336166382, |
| "eval_loss": 0.7827395224784559, |
| "eval_runtime": 88.7603, |
| "eval_samples_per_second": 11.266, |
| "eval_steps_per_second": 0.361, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09768009768009768, |
| "grad_norm": 1.794303297996521, |
| "learning_rate": 8.14000814000814e-08, |
| "loss": 0.7663, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09768009768009768, |
| "eval_cos_sim": 0.21862231194972992, |
| "eval_loss": 0.782530607721681, |
| "eval_runtime": 88.7986, |
| "eval_samples_per_second": 11.261, |
| "eval_steps_per_second": 0.36, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10175010175010175, |
| "grad_norm": 1.7992609739303589, |
| "learning_rate": 8.479175145841813e-08, |
| "loss": 0.7736, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10175010175010175, |
| "eval_cos_sim": 0.21883440017700195, |
| "eval_loss": 0.7823186969970412, |
| "eval_runtime": 88.8672, |
| "eval_samples_per_second": 11.253, |
| "eval_steps_per_second": 0.36, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10582010582010581, |
| "grad_norm": 1.8333110809326172, |
| "learning_rate": 8.818342151675485e-08, |
| "loss": 0.7732, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10582010582010581, |
| "eval_cos_sim": 0.21905279159545898, |
| "eval_loss": 0.782100461981172, |
| "eval_runtime": 88.9248, |
| "eval_samples_per_second": 11.245, |
| "eval_steps_per_second": 0.36, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "grad_norm": 1.8126780986785889, |
| "learning_rate": 9.157509157509157e-08, |
| "loss": 0.7731, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "eval_cos_sim": 0.2192821353673935, |
| "eval_loss": 0.7818713002418226, |
| "eval_runtime": 89.0894, |
| "eval_samples_per_second": 11.225, |
| "eval_steps_per_second": 0.359, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.11396011396011396, |
| "grad_norm": 1.8034828901290894, |
| "learning_rate": 9.496676163342831e-08, |
| "loss": 0.7653, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11396011396011396, |
| "eval_cos_sim": 0.21952229738235474, |
| "eval_loss": 0.781631359598512, |
| "eval_runtime": 88.5405, |
| "eval_samples_per_second": 11.294, |
| "eval_steps_per_second": 0.361, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11803011803011804, |
| "grad_norm": 1.5344951152801514, |
| "learning_rate": 9.835843169176503e-08, |
| "loss": 0.7647, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11803011803011804, |
| "eval_cos_sim": 0.21977314352989197, |
| "eval_loss": 0.7813806886886304, |
| "eval_runtime": 89.249, |
| "eval_samples_per_second": 11.205, |
| "eval_steps_per_second": 0.359, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1221001221001221, |
| "grad_norm": 1.5414931774139404, |
| "learning_rate": 1.0175010175010174e-07, |
| "loss": 0.7778, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1221001221001221, |
| "eval_cos_sim": 0.2200283408164978, |
| "eval_loss": 0.7811256456588453, |
| "eval_runtime": 88.215, |
| "eval_samples_per_second": 11.336, |
| "eval_steps_per_second": 0.363, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12617012617012616, |
| "grad_norm": 1.507014513015747, |
| "learning_rate": 1.0514177180843848e-07, |
| "loss": 0.7792, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12617012617012616, |
| "eval_cos_sim": 0.22028397023677826, |
| "eval_loss": 0.7808702659820265, |
| "eval_runtime": 89.0436, |
| "eval_samples_per_second": 11.23, |
| "eval_steps_per_second": 0.359, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.13024013024013023, |
| "grad_norm": 1.8126572370529175, |
| "learning_rate": 1.085334418667752e-07, |
| "loss": 0.7647, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.13024013024013023, |
| "eval_cos_sim": 0.22054480016231537, |
| "eval_loss": 0.7806096720908827, |
| "eval_runtime": 89.0304, |
| "eval_samples_per_second": 11.232, |
| "eval_steps_per_second": 0.359, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1343101343101343, |
| "grad_norm": 1.8201279640197754, |
| "learning_rate": 1.1192511192511194e-07, |
| "loss": 0.7791, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1343101343101343, |
| "eval_cos_sim": 0.22081315517425537, |
| "eval_loss": 0.7803415045951552, |
| "eval_runtime": 89.0654, |
| "eval_samples_per_second": 11.228, |
| "eval_steps_per_second": 0.359, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.13838013838013838, |
| "grad_norm": 1.7973226308822632, |
| "learning_rate": 1.1531678198344865e-07, |
| "loss": 0.7672, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13838013838013838, |
| "eval_cos_sim": 0.22110143303871155, |
| "eval_loss": 0.7800534692023939, |
| "eval_runtime": 88.2286, |
| "eval_samples_per_second": 11.334, |
| "eval_steps_per_second": 0.363, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.14245014245014245, |
| "grad_norm": 1.8215441703796387, |
| "learning_rate": 1.1870845204178537e-07, |
| "loss": 0.7699, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14245014245014245, |
| "eval_cos_sim": 0.22139768302440643, |
| "eval_loss": 0.7797573957656568, |
| "eval_runtime": 88.8585, |
| "eval_samples_per_second": 11.254, |
| "eval_steps_per_second": 0.36, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "grad_norm": 1.8159250020980835, |
| "learning_rate": 1.221001221001221e-07, |
| "loss": 0.7712, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "eval_cos_sim": 0.2217043787240982, |
| "eval_loss": 0.7794509191726393, |
| "eval_runtime": 88.9767, |
| "eval_samples_per_second": 11.239, |
| "eval_steps_per_second": 0.36, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1505901505901506, |
| "grad_norm": 1.7892403602600098, |
| "learning_rate": 1.2549179215845883e-07, |
| "loss": 0.7594, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1505901505901506, |
| "eval_cos_sim": 0.22202090919017792, |
| "eval_loss": 0.7791346249793715, |
| "eval_runtime": 88.4113, |
| "eval_samples_per_second": 11.311, |
| "eval_steps_per_second": 0.362, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.15466015466015465, |
| "grad_norm": 1.499679446220398, |
| "learning_rate": 1.2888346221679555e-07, |
| "loss": 0.7702, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.15466015466015465, |
| "eval_cos_sim": 0.22234594821929932, |
| "eval_loss": 0.7788098197196669, |
| "eval_runtime": 89.1927, |
| "eval_samples_per_second": 11.212, |
| "eval_steps_per_second": 0.359, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 1.827500343322754, |
| "learning_rate": 1.3227513227513228e-07, |
| "loss": 0.7578, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "eval_cos_sim": 0.22267462313175201, |
| "eval_loss": 0.7784813823913282, |
| "eval_runtime": 89.2341, |
| "eval_samples_per_second": 11.206, |
| "eval_steps_per_second": 0.359, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1628001628001628, |
| "grad_norm": 1.8155056238174438, |
| "learning_rate": 1.35666802333469e-07, |
| "loss": 0.7663, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1628001628001628, |
| "eval_cos_sim": 0.2230025976896286, |
| "eval_loss": 0.7781536517356581, |
| "eval_runtime": 89.0874, |
| "eval_samples_per_second": 11.225, |
| "eval_steps_per_second": 0.359, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16687016687016687, |
| "grad_norm": 1.5147600173950195, |
| "learning_rate": 1.3905847239180572e-07, |
| "loss": 0.7685, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.16687016687016687, |
| "eval_cos_sim": 0.2233392745256424, |
| "eval_loss": 0.7778171930526442, |
| "eval_runtime": 89.0688, |
| "eval_samples_per_second": 11.227, |
| "eval_steps_per_second": 0.359, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 1.5169967412948608, |
| "learning_rate": 1.4245014245014247e-07, |
| "loss": 0.7614, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "eval_cos_sim": 0.2236924022436142, |
| "eval_loss": 0.7774643163894361, |
| "eval_runtime": 88.9798, |
| "eval_samples_per_second": 11.239, |
| "eval_steps_per_second": 0.36, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.17501017501017502, |
| "grad_norm": 1.5205143690109253, |
| "learning_rate": 1.4584181250847917e-07, |
| "loss": 0.7646, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.17501017501017502, |
| "eval_cos_sim": 0.224058136343956, |
| "eval_loss": 0.7770988393043227, |
| "eval_runtime": 89.0477, |
| "eval_samples_per_second": 11.23, |
| "eval_steps_per_second": 0.359, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1790801790801791, |
| "grad_norm": 1.5187054872512817, |
| "learning_rate": 1.492334825668159e-07, |
| "loss": 0.775, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1790801790801791, |
| "eval_cos_sim": 0.22443027794361115, |
| "eval_loss": 0.7767269988273329, |
| "eval_runtime": 89.3697, |
| "eval_samples_per_second": 11.189, |
| "eval_steps_per_second": 0.358, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.18315018315018314, |
| "grad_norm": 1.8240047693252563, |
| "learning_rate": 1.5262515262515264e-07, |
| "loss": 0.7622, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.18315018315018314, |
| "eval_cos_sim": 0.22480851411819458, |
| "eval_loss": 0.7763490748618788, |
| "eval_runtime": 89.2014, |
| "eval_samples_per_second": 11.211, |
| "eval_steps_per_second": 0.359, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1872201872201872, |
| "grad_norm": 1.8208777904510498, |
| "learning_rate": 1.5601682268348936e-07, |
| "loss": 0.7563, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1872201872201872, |
| "eval_cos_sim": 0.2251981645822525, |
| "eval_loss": 0.7759597497199721, |
| "eval_runtime": 89.202, |
| "eval_samples_per_second": 11.211, |
| "eval_steps_per_second": 0.359, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.19129019129019129, |
| "grad_norm": 1.523047924041748, |
| "learning_rate": 1.594084927418261e-07, |
| "loss": 0.7613, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.19129019129019129, |
| "eval_cos_sim": 0.2256050556898117, |
| "eval_loss": 0.775553204080934, |
| "eval_runtime": 88.9659, |
| "eval_samples_per_second": 11.24, |
| "eval_steps_per_second": 0.36, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.19536019536019536, |
| "grad_norm": 1.7896461486816406, |
| "learning_rate": 1.628001628001628e-07, |
| "loss": 0.7571, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.19536019536019536, |
| "eval_cos_sim": 0.2260132133960724, |
| "eval_loss": 0.7751453356956189, |
| "eval_runtime": 89.3097, |
| "eval_samples_per_second": 11.197, |
| "eval_steps_per_second": 0.358, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.19943019943019943, |
| "grad_norm": 1.802852749824524, |
| "learning_rate": 1.6619183285849953e-07, |
| "loss": 0.7666, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.19943019943019943, |
| "eval_cos_sim": 0.2264304757118225, |
| "eval_loss": 0.7747283859466261, |
| "eval_runtime": 89.0947, |
| "eval_samples_per_second": 11.224, |
| "eval_steps_per_second": 0.359, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2035002035002035, |
| "grad_norm": 1.821495532989502, |
| "learning_rate": 1.6958350291683626e-07, |
| "loss": 0.7624, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2035002035002035, |
| "eval_cos_sim": 0.22685033082962036, |
| "eval_loss": 0.7743088460181898, |
| "eval_runtime": 89.2762, |
| "eval_samples_per_second": 11.201, |
| "eval_steps_per_second": 0.358, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20757020757020758, |
| "grad_norm": 1.545665979385376, |
| "learning_rate": 1.7297517297517298e-07, |
| "loss": 0.7694, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.20757020757020758, |
| "eval_cos_sim": 0.22728146612644196, |
| "eval_loss": 0.7738780179237074, |
| "eval_runtime": 88.7471, |
| "eval_samples_per_second": 11.268, |
| "eval_steps_per_second": 0.361, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.21164021164021163, |
| "grad_norm": 1.7993839979171753, |
| "learning_rate": 1.763668430335097e-07, |
| "loss": 0.7601, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.21164021164021163, |
| "eval_cos_sim": 0.2277180701494217, |
| "eval_loss": 0.7734417824958509, |
| "eval_runtime": 89.3566, |
| "eval_samples_per_second": 11.191, |
| "eval_steps_per_second": 0.358, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2157102157102157, |
| "grad_norm": 1.7981739044189453, |
| "learning_rate": 1.7975851309184642e-07, |
| "loss": 0.7572, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2157102157102157, |
| "eval_cos_sim": 0.22816509008407593, |
| "eval_loss": 0.7729950847839063, |
| "eval_runtime": 89.1907, |
| "eval_samples_per_second": 11.212, |
| "eval_steps_per_second": 0.359, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "grad_norm": 1.4281421899795532, |
| "learning_rate": 1.8315018315018315e-07, |
| "loss": 0.7713, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "eval_cos_sim": 0.2286159247159958, |
| "eval_loss": 0.7725445466254895, |
| "eval_runtime": 88.9936, |
| "eval_samples_per_second": 11.237, |
| "eval_steps_per_second": 0.36, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.22385022385022385, |
| "grad_norm": 1.800079107284546, |
| "learning_rate": 1.865418532085199e-07, |
| "loss": 0.7637, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.22385022385022385, |
| "eval_cos_sim": 0.22906461358070374, |
| "eval_loss": 0.772096163771028, |
| "eval_runtime": 88.9615, |
| "eval_samples_per_second": 11.241, |
| "eval_steps_per_second": 0.36, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.22792022792022792, |
| "grad_norm": 1.5776735544204712, |
| "learning_rate": 1.8993352326685662e-07, |
| "loss": 0.7699, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.22792022792022792, |
| "eval_cos_sim": 0.2295254021883011, |
| "eval_loss": 0.771635666391725, |
| "eval_runtime": 88.9524, |
| "eval_samples_per_second": 11.242, |
| "eval_steps_per_second": 0.36, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.231990231990232, |
| "grad_norm": 1.8183437585830688, |
| "learning_rate": 1.9332519332519332e-07, |
| "loss": 0.7634, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.231990231990232, |
| "eval_cos_sim": 0.22998037934303284, |
| "eval_loss": 0.7711810879920667, |
| "eval_runtime": 88.9474, |
| "eval_samples_per_second": 11.243, |
| "eval_steps_per_second": 0.36, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.23606023606023607, |
| "grad_norm": 1.8223985433578491, |
| "learning_rate": 1.9671686338353007e-07, |
| "loss": 0.7772, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.23606023606023607, |
| "eval_cos_sim": 0.23044081032276154, |
| "eval_loss": 0.7707209568237012, |
| "eval_runtime": 88.7661, |
| "eval_samples_per_second": 11.266, |
| "eval_steps_per_second": 0.36, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.24013024013024012, |
| "grad_norm": 1.7877051830291748, |
| "learning_rate": 2.001085334418668e-07, |
| "loss": 0.7534, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.24013024013024012, |
| "eval_cos_sim": 0.23091764748096466, |
| "eval_loss": 0.770244472048158, |
| "eval_runtime": 89.1681, |
| "eval_samples_per_second": 11.215, |
| "eval_steps_per_second": 0.359, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "grad_norm": 1.6694035530090332, |
| "learning_rate": 2.0350020350020349e-07, |
| "loss": 0.764, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "eval_cos_sim": 0.23141798377037048, |
| "eval_loss": 0.769744483969087, |
| "eval_runtime": 89.1824, |
| "eval_samples_per_second": 11.213, |
| "eval_steps_per_second": 0.359, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.24827024827024827, |
| "grad_norm": 1.5185260772705078, |
| "learning_rate": 2.0689187355854024e-07, |
| "loss": 0.7641, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.24827024827024827, |
| "eval_cos_sim": 0.2319241613149643, |
| "eval_loss": 0.7692386307929701, |
| "eval_runtime": 89.2776, |
| "eval_samples_per_second": 11.201, |
| "eval_steps_per_second": 0.358, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2523402523402523, |
| "grad_norm": 1.8109288215637207, |
| "learning_rate": 2.1028354361687696e-07, |
| "loss": 0.7554, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2523402523402523, |
| "eval_cos_sim": 0.23244358599185944, |
| "eval_loss": 0.7687196593497938, |
| "eval_runtime": 89.197, |
| "eval_samples_per_second": 11.211, |
| "eval_steps_per_second": 0.359, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 1.8006572723388672, |
| "learning_rate": 2.136752136752137e-07, |
| "loss": 0.7547, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "eval_cos_sim": 0.23296819627285004, |
| "eval_loss": 0.7681954999183362, |
| "eval_runtime": 89.2344, |
| "eval_samples_per_second": 11.206, |
| "eval_steps_per_second": 0.359, |
| "step": 630 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1474200, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 600, |
| "save_steps": 10, |
| "total_flos": 0.0, |
| "train_batch_size": 160, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|