| { |
| "best_metric": 0.7333984103416151, |
| "best_model_checkpoint": "/p/scratch/ccstdl/krishna/finetuned-cosine-loss/checkpoint-660", |
| "epoch": 0.2686202686202686, |
| "eval_steps": 10, |
| "global_step": 660, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00407000407000407, |
| "grad_norm": 1.8124009370803833, |
| "learning_rate": 3.391670058336725e-09, |
| "loss": 0.732, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00407000407000407, |
| "eval_cos_sim": 0.25005149841308594, |
| "eval_loss": 0.7511245851730055, |
| "eval_runtime": 89.901, |
| "eval_samples_per_second": 11.123, |
| "eval_steps_per_second": 0.356, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00814000814000814, |
| "grad_norm": 1.8144397735595703, |
| "learning_rate": 6.78334011667345e-09, |
| "loss": 0.7341, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00814000814000814, |
| "eval_cos_sim": 0.25006377696990967, |
| "eval_loss": 0.7511123490546888, |
| "eval_runtime": 88.402, |
| "eval_samples_per_second": 11.312, |
| "eval_steps_per_second": 0.362, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01221001221001221, |
| "grad_norm": 1.7918556928634644, |
| "learning_rate": 1.0175010175010176e-08, |
| "loss": 0.7425, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01221001221001221, |
| "eval_cos_sim": 0.2500843405723572, |
| "eval_loss": 0.7510917806838697, |
| "eval_runtime": 88.6959, |
| "eval_samples_per_second": 11.274, |
| "eval_steps_per_second": 0.361, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01628001628001628, |
| "grad_norm": 1.804418683052063, |
| "learning_rate": 1.35666802333469e-08, |
| "loss": 0.7437, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01628001628001628, |
| "eval_cos_sim": 0.25011318922042847, |
| "eval_loss": 0.7510629492019362, |
| "eval_runtime": 88.1716, |
| "eval_samples_per_second": 11.342, |
| "eval_steps_per_second": 0.363, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02035002035002035, |
| "grad_norm": 1.794533133506775, |
| "learning_rate": 1.6958350291683625e-08, |
| "loss": 0.7319, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02035002035002035, |
| "eval_cos_sim": 0.2501494884490967, |
| "eval_loss": 0.7510266695235914, |
| "eval_runtime": 87.5335, |
| "eval_samples_per_second": 11.424, |
| "eval_steps_per_second": 0.366, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02442002442002442, |
| "grad_norm": 1.5170952081680298, |
| "learning_rate": 2.035002035002035e-08, |
| "loss": 0.7504, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02442002442002442, |
| "eval_cos_sim": 0.25019460916519165, |
| "eval_loss": 0.7509815736030286, |
| "eval_runtime": 88.3799, |
| "eval_samples_per_second": 11.315, |
| "eval_steps_per_second": 0.362, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02849002849002849, |
| "grad_norm": 1.5275336503982544, |
| "learning_rate": 2.3741690408357078e-08, |
| "loss": 0.7458, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02849002849002849, |
| "eval_cos_sim": 0.2502490282058716, |
| "eval_loss": 0.7509272141669935, |
| "eval_runtime": 88.4986, |
| "eval_samples_per_second": 11.3, |
| "eval_steps_per_second": 0.362, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03256003256003256, |
| "grad_norm": 1.7890334129333496, |
| "learning_rate": 2.71333604666938e-08, |
| "loss": 0.764, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03256003256003256, |
| "eval_cos_sim": 0.2503115236759186, |
| "eval_loss": 0.7508647556518263, |
| "eval_runtime": 88.1347, |
| "eval_samples_per_second": 11.346, |
| "eval_steps_per_second": 0.363, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03663003663003663, |
| "grad_norm": 1.8015758991241455, |
| "learning_rate": 3.052503052503053e-08, |
| "loss": 0.7331, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03663003663003663, |
| "eval_cos_sim": 0.2503812611103058, |
| "eval_loss": 0.7507950768684095, |
| "eval_runtime": 88.5092, |
| "eval_samples_per_second": 11.298, |
| "eval_steps_per_second": 0.362, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0407000407000407, |
| "grad_norm": 1.786906361579895, |
| "learning_rate": 3.391670058336725e-08, |
| "loss": 0.7403, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0407000407000407, |
| "eval_cos_sim": 0.25045979022979736, |
| "eval_loss": 0.750716588518495, |
| "eval_runtime": 87.8285, |
| "eval_samples_per_second": 11.386, |
| "eval_steps_per_second": 0.364, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04477004477004477, |
| "grad_norm": 1.5222004652023315, |
| "learning_rate": 3.730837064170397e-08, |
| "loss": 0.7402, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04477004477004477, |
| "eval_cos_sim": 0.250546395778656, |
| "eval_loss": 0.7506300463889783, |
| "eval_runtime": 88.743, |
| "eval_samples_per_second": 11.268, |
| "eval_steps_per_second": 0.361, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04884004884004884, |
| "grad_norm": 1.8194061517715454, |
| "learning_rate": 4.07000407000407e-08, |
| "loss": 0.7417, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04884004884004884, |
| "eval_cos_sim": 0.2506391704082489, |
| "eval_loss": 0.7505373349403089, |
| "eval_runtime": 88.5658, |
| "eval_samples_per_second": 11.291, |
| "eval_steps_per_second": 0.361, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05291005291005291, |
| "grad_norm": 1.8265488147735596, |
| "learning_rate": 4.4091710758377425e-08, |
| "loss": 0.7297, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05291005291005291, |
| "eval_cos_sim": 0.2507420480251312, |
| "eval_loss": 0.7504345479224866, |
| "eval_runtime": 88.6555, |
| "eval_samples_per_second": 11.28, |
| "eval_steps_per_second": 0.361, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05698005698005698, |
| "grad_norm": 1.7994428873062134, |
| "learning_rate": 4.7483380816714155e-08, |
| "loss": 0.7484, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05698005698005698, |
| "eval_cos_sim": 0.2508507966995239, |
| "eval_loss": 0.7503258481239027, |
| "eval_runtime": 89.045, |
| "eval_samples_per_second": 11.23, |
| "eval_steps_per_second": 0.359, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06105006105006105, |
| "grad_norm": 1.5162811279296875, |
| "learning_rate": 5.087505087505087e-08, |
| "loss": 0.7453, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06105006105006105, |
| "eval_cos_sim": 0.2509710192680359, |
| "eval_loss": 0.7502057595466322, |
| "eval_runtime": 88.4537, |
| "eval_samples_per_second": 11.305, |
| "eval_steps_per_second": 0.362, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06512006512006512, |
| "grad_norm": 1.5280661582946777, |
| "learning_rate": 5.42667209333876e-08, |
| "loss": 0.7404, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06512006512006512, |
| "eval_cos_sim": 0.2511015236377716, |
| "eval_loss": 0.7500753340934462, |
| "eval_runtime": 87.8406, |
| "eval_samples_per_second": 11.384, |
| "eval_steps_per_second": 0.364, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06919006919006919, |
| "grad_norm": 1.5292084217071533, |
| "learning_rate": 5.7658390991724324e-08, |
| "loss": 0.7317, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06919006919006919, |
| "eval_cos_sim": 0.2512374520301819, |
| "eval_loss": 0.7499395051215834, |
| "eval_runtime": 88.4647, |
| "eval_samples_per_second": 11.304, |
| "eval_steps_per_second": 0.362, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07326007326007326, |
| "grad_norm": 1.5250757932662964, |
| "learning_rate": 6.105006105006105e-08, |
| "loss": 0.7437, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07326007326007326, |
| "eval_cos_sim": 0.25138044357299805, |
| "eval_loss": 0.7497965970252699, |
| "eval_runtime": 88.2947, |
| "eval_samples_per_second": 11.326, |
| "eval_steps_per_second": 0.362, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07733007733007732, |
| "grad_norm": 1.8300116062164307, |
| "learning_rate": 6.444173110839778e-08, |
| "loss": 0.7374, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07733007733007732, |
| "eval_cos_sim": 0.25153324007987976, |
| "eval_loss": 0.7496438927863782, |
| "eval_runtime": 88.4194, |
| "eval_samples_per_second": 11.31, |
| "eval_steps_per_second": 0.362, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0814000814000814, |
| "grad_norm": 1.7857962846755981, |
| "learning_rate": 6.78334011667345e-08, |
| "loss": 0.7294, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0814000814000814, |
| "eval_cos_sim": 0.2516980767250061, |
| "eval_loss": 0.7494791708205885, |
| "eval_runtime": 88.859, |
| "eval_samples_per_second": 11.254, |
| "eval_steps_per_second": 0.36, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "grad_norm": 1.8195607662200928, |
| "learning_rate": 7.122507122507124e-08, |
| "loss": 0.7441, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "eval_cos_sim": 0.25187191367149353, |
| "eval_loss": 0.7493054814552015, |
| "eval_runtime": 88.3989, |
| "eval_samples_per_second": 11.312, |
| "eval_steps_per_second": 0.362, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08954008954008955, |
| "grad_norm": 1.7800198793411255, |
| "learning_rate": 7.461674128340795e-08, |
| "loss": 0.7395, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08954008954008955, |
| "eval_cos_sim": 0.2520497441291809, |
| "eval_loss": 0.7491277885650343, |
| "eval_runtime": 88.4463, |
| "eval_samples_per_second": 11.306, |
| "eval_steps_per_second": 0.362, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0936100936100936, |
| "grad_norm": 1.5236977338790894, |
| "learning_rate": 7.800841134174468e-08, |
| "loss": 0.7402, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0936100936100936, |
| "eval_cos_sim": 0.25224006175994873, |
| "eval_loss": 0.7489376101707167, |
| "eval_runtime": 88.645, |
| "eval_samples_per_second": 11.281, |
| "eval_steps_per_second": 0.361, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09768009768009768, |
| "grad_norm": 1.79439377784729, |
| "learning_rate": 8.14000814000814e-08, |
| "loss": 0.7432, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09768009768009768, |
| "eval_cos_sim": 0.25243115425109863, |
| "eval_loss": 0.7487466225837416, |
| "eval_runtime": 88.5544, |
| "eval_samples_per_second": 11.292, |
| "eval_steps_per_second": 0.361, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10175010175010175, |
| "grad_norm": 1.8155500888824463, |
| "learning_rate": 8.479175145841813e-08, |
| "loss": 0.7358, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10175010175010175, |
| "eval_cos_sim": 0.25262758135795593, |
| "eval_loss": 0.7485503211234754, |
| "eval_runtime": 88.6192, |
| "eval_samples_per_second": 11.284, |
| "eval_steps_per_second": 0.361, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10582010582010581, |
| "grad_norm": 1.8234913349151611, |
| "learning_rate": 8.818342151675485e-08, |
| "loss": 0.7418, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10582010582010581, |
| "eval_cos_sim": 0.25283777713775635, |
| "eval_loss": 0.7483402905677503, |
| "eval_runtime": 89.1169, |
| "eval_samples_per_second": 11.221, |
| "eval_steps_per_second": 0.359, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "grad_norm": 1.808308482170105, |
| "learning_rate": 9.157509157509157e-08, |
| "loss": 0.7298, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "eval_cos_sim": 0.2530568242073059, |
| "eval_loss": 0.7481214027618116, |
| "eval_runtime": 88.155, |
| "eval_samples_per_second": 11.344, |
| "eval_steps_per_second": 0.363, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.11396011396011396, |
| "grad_norm": 1.7929309606552124, |
| "learning_rate": 9.496676163342831e-08, |
| "loss": 0.7257, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11396011396011396, |
| "eval_cos_sim": 0.25328928232192993, |
| "eval_loss": 0.747889132521028, |
| "eval_runtime": 89.0622, |
| "eval_samples_per_second": 11.228, |
| "eval_steps_per_second": 0.359, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11803011803011804, |
| "grad_norm": 1.4342882633209229, |
| "learning_rate": 9.835843169176503e-08, |
| "loss": 0.7451, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11803011803011804, |
| "eval_cos_sim": 0.2535253167152405, |
| "eval_loss": 0.7476532855247205, |
| "eval_runtime": 89.396, |
| "eval_samples_per_second": 11.186, |
| "eval_steps_per_second": 0.358, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1221001221001221, |
| "grad_norm": 1.7873544692993164, |
| "learning_rate": 1.0175010175010174e-07, |
| "loss": 0.7289, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1221001221001221, |
| "eval_cos_sim": 0.25376906991004944, |
| "eval_loss": 0.7474096789573378, |
| "eval_runtime": 88.7445, |
| "eval_samples_per_second": 11.268, |
| "eval_steps_per_second": 0.361, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12617012617012616, |
| "grad_norm": 1.4993253946304321, |
| "learning_rate": 1.0514177180843848e-07, |
| "loss": 0.7498, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12617012617012616, |
| "eval_cos_sim": 0.2540150582790375, |
| "eval_loss": 0.747163932344789, |
| "eval_runtime": 88.8446, |
| "eval_samples_per_second": 11.256, |
| "eval_steps_per_second": 0.36, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.13024013024013023, |
| "grad_norm": 1.5209181308746338, |
| "learning_rate": 1.085334418667752e-07, |
| "loss": 0.7332, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.13024013024013023, |
| "eval_cos_sim": 0.25427138805389404, |
| "eval_loss": 0.7469077882980055, |
| "eval_runtime": 89.1231, |
| "eval_samples_per_second": 11.22, |
| "eval_steps_per_second": 0.359, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1343101343101343, |
| "grad_norm": 1.8111993074417114, |
| "learning_rate": 1.1192511192511194e-07, |
| "loss": 0.7377, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1343101343101343, |
| "eval_cos_sim": 0.2545357644557953, |
| "eval_loss": 0.7466436038230604, |
| "eval_runtime": 88.9406, |
| "eval_samples_per_second": 11.243, |
| "eval_steps_per_second": 0.36, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.13838013838013838, |
| "grad_norm": 1.5225635766983032, |
| "learning_rate": 1.1531678198344865e-07, |
| "loss": 0.7411, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13838013838013838, |
| "eval_cos_sim": 0.25481417775154114, |
| "eval_loss": 0.7463653974746413, |
| "eval_runtime": 88.552, |
| "eval_samples_per_second": 11.293, |
| "eval_steps_per_second": 0.361, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.14245014245014245, |
| "grad_norm": 1.81794273853302, |
| "learning_rate": 1.1870845204178537e-07, |
| "loss": 0.7482, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14245014245014245, |
| "eval_cos_sim": 0.2550930678844452, |
| "eval_loss": 0.7460866894935316, |
| "eval_runtime": 88.1664, |
| "eval_samples_per_second": 11.342, |
| "eval_steps_per_second": 0.363, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "grad_norm": 1.4908636808395386, |
| "learning_rate": 1.221001221001221e-07, |
| "loss": 0.7319, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "eval_cos_sim": 0.2553824186325073, |
| "eval_loss": 0.7457975163673108, |
| "eval_runtime": 88.5321, |
| "eval_samples_per_second": 11.295, |
| "eval_steps_per_second": 0.361, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1505901505901506, |
| "grad_norm": 1.7756201028823853, |
| "learning_rate": 1.2549179215845883e-07, |
| "loss": 0.7245, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1505901505901506, |
| "eval_cos_sim": 0.25568509101867676, |
| "eval_loss": 0.7454950423453993, |
| "eval_runtime": 88.9525, |
| "eval_samples_per_second": 11.242, |
| "eval_steps_per_second": 0.36, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.15466015466015465, |
| "grad_norm": 1.5107450485229492, |
| "learning_rate": 1.2888346221679555e-07, |
| "loss": 0.7453, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.15466015466015465, |
| "eval_cos_sim": 0.25599703192710876, |
| "eval_loss": 0.7451833567832655, |
| "eval_runtime": 88.5223, |
| "eval_samples_per_second": 11.297, |
| "eval_steps_per_second": 0.361, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 1.8164764642715454, |
| "learning_rate": 1.3227513227513228e-07, |
| "loss": 0.7226, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "eval_cos_sim": 0.25630947947502136, |
| "eval_loss": 0.7448710847114272, |
| "eval_runtime": 89.3118, |
| "eval_samples_per_second": 11.197, |
| "eval_steps_per_second": 0.358, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1628001628001628, |
| "grad_norm": 1.500030279159546, |
| "learning_rate": 1.35666802333469e-07, |
| "loss": 0.7354, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1628001628001628, |
| "eval_cos_sim": 0.2566259205341339, |
| "eval_loss": 0.7445548601363844, |
| "eval_runtime": 89.5491, |
| "eval_samples_per_second": 11.167, |
| "eval_steps_per_second": 0.357, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16687016687016687, |
| "grad_norm": 1.4817250967025757, |
| "learning_rate": 1.3905847239180572e-07, |
| "loss": 0.7507, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.16687016687016687, |
| "eval_cos_sim": 0.25694403052330017, |
| "eval_loss": 0.744236885568971, |
| "eval_runtime": 88.9828, |
| "eval_samples_per_second": 11.238, |
| "eval_steps_per_second": 0.36, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 1.510607123374939, |
| "learning_rate": 1.4245014245014247e-07, |
| "loss": 0.7244, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "eval_cos_sim": 0.2572762966156006, |
| "eval_loss": 0.7439048562263196, |
| "eval_runtime": 88.9759, |
| "eval_samples_per_second": 11.239, |
| "eval_steps_per_second": 0.36, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.17501017501017502, |
| "grad_norm": 1.7870216369628906, |
| "learning_rate": 1.4584181250847917e-07, |
| "loss": 0.7468, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.17501017501017502, |
| "eval_cos_sim": 0.2576209008693695, |
| "eval_loss": 0.7435604424689954, |
| "eval_runtime": 89.244, |
| "eval_samples_per_second": 11.205, |
| "eval_steps_per_second": 0.359, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1790801790801791, |
| "grad_norm": 1.7750380039215088, |
| "learning_rate": 1.492334825668159e-07, |
| "loss": 0.7291, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1790801790801791, |
| "eval_cos_sim": 0.2579769194126129, |
| "eval_loss": 0.7432047133659071, |
| "eval_runtime": 88.5309, |
| "eval_samples_per_second": 11.295, |
| "eval_steps_per_second": 0.361, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.18315018315018314, |
| "grad_norm": 1.7759586572647095, |
| "learning_rate": 1.5262515262515264e-07, |
| "loss": 0.7293, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.18315018315018314, |
| "eval_cos_sim": 0.25834715366363525, |
| "eval_loss": 0.7428347926353163, |
| "eval_runtime": 89.2461, |
| "eval_samples_per_second": 11.205, |
| "eval_steps_per_second": 0.359, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1872201872201872, |
| "grad_norm": 1.8177165985107422, |
| "learning_rate": 1.5601682268348936e-07, |
| "loss": 0.7326, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1872201872201872, |
| "eval_cos_sim": 0.25872257351875305, |
| "eval_loss": 0.7424596538757032, |
| "eval_runtime": 89.1253, |
| "eval_samples_per_second": 11.22, |
| "eval_steps_per_second": 0.359, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.19129019129019129, |
| "grad_norm": 1.5271542072296143, |
| "learning_rate": 1.594084927418261e-07, |
| "loss": 0.7358, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.19129019129019129, |
| "eval_cos_sim": 0.25911861658096313, |
| "eval_loss": 0.742063922426576, |
| "eval_runtime": 88.958, |
| "eval_samples_per_second": 11.241, |
| "eval_steps_per_second": 0.36, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.19536019536019536, |
| "grad_norm": 1.519902229309082, |
| "learning_rate": 1.628001628001628e-07, |
| "loss": 0.7568, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.19536019536019536, |
| "eval_cos_sim": 0.2595009207725525, |
| "eval_loss": 0.7416818804954237, |
| "eval_runtime": 89.0027, |
| "eval_samples_per_second": 11.236, |
| "eval_steps_per_second": 0.36, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.19943019943019943, |
| "grad_norm": 1.5114340782165527, |
| "learning_rate": 1.6619183285849953e-07, |
| "loss": 0.7242, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.19943019943019943, |
| "eval_cos_sim": 0.2598978579044342, |
| "eval_loss": 0.7412852273200696, |
| "eval_runtime": 89.2389, |
| "eval_samples_per_second": 11.206, |
| "eval_steps_per_second": 0.359, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2035002035002035, |
| "grad_norm": 1.78502357006073, |
| "learning_rate": 1.6958350291683626e-07, |
| "loss": 0.7339, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2035002035002035, |
| "eval_cos_sim": 0.26030153036117554, |
| "eval_loss": 0.7408818502639478, |
| "eval_runtime": 88.3154, |
| "eval_samples_per_second": 11.323, |
| "eval_steps_per_second": 0.362, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20757020757020758, |
| "grad_norm": 1.540235161781311, |
| "learning_rate": 1.7297517297517298e-07, |
| "loss": 0.7322, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.20757020757020758, |
| "eval_cos_sim": 0.26071667671203613, |
| "eval_loss": 0.7404670024131483, |
| "eval_runtime": 88.9616, |
| "eval_samples_per_second": 11.241, |
| "eval_steps_per_second": 0.36, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.21164021164021163, |
| "grad_norm": 1.8035300970077515, |
| "learning_rate": 1.763668430335097e-07, |
| "loss": 0.721, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.21164021164021163, |
| "eval_cos_sim": 0.26114267110824585, |
| "eval_loss": 0.7400413398956007, |
| "eval_runtime": 89.0108, |
| "eval_samples_per_second": 11.235, |
| "eval_steps_per_second": 0.36, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2157102157102157, |
| "grad_norm": 1.5290201902389526, |
| "learning_rate": 1.7975851309184642e-07, |
| "loss": 0.7263, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2157102157102157, |
| "eval_cos_sim": 0.2615705728530884, |
| "eval_loss": 0.7396137051795667, |
| "eval_runtime": 89.0214, |
| "eval_samples_per_second": 11.233, |
| "eval_steps_per_second": 0.359, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "grad_norm": 1.5043171644210815, |
| "learning_rate": 1.8315018315018315e-07, |
| "loss": 0.7215, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "eval_cos_sim": 0.26200759410858154, |
| "eval_loss": 0.7391770148490614, |
| "eval_runtime": 89.0696, |
| "eval_samples_per_second": 11.227, |
| "eval_steps_per_second": 0.359, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.22385022385022385, |
| "grad_norm": 1.8029693365097046, |
| "learning_rate": 1.865418532085199e-07, |
| "loss": 0.7221, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.22385022385022385, |
| "eval_cos_sim": 0.26245614886283875, |
| "eval_loss": 0.7387287416671461, |
| "eval_runtime": 89.6121, |
| "eval_samples_per_second": 11.159, |
| "eval_steps_per_second": 0.357, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.22792022792022792, |
| "grad_norm": 1.492769479751587, |
| "learning_rate": 1.8993352326685662e-07, |
| "loss": 0.7335, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.22792022792022792, |
| "eval_cos_sim": 0.2629122734069824, |
| "eval_loss": 0.738272936842317, |
| "eval_runtime": 89.4796, |
| "eval_samples_per_second": 11.176, |
| "eval_steps_per_second": 0.358, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.231990231990232, |
| "grad_norm": 1.4421427249908447, |
| "learning_rate": 1.9332519332519332e-07, |
| "loss": 0.7335, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.231990231990232, |
| "eval_cos_sim": 0.2633700668811798, |
| "eval_loss": 0.7378155255531019, |
| "eval_runtime": 89.3151, |
| "eval_samples_per_second": 11.196, |
| "eval_steps_per_second": 0.358, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.23606023606023607, |
| "grad_norm": 1.8092644214630127, |
| "learning_rate": 1.9671686338353007e-07, |
| "loss": 0.7406, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.23606023606023607, |
| "eval_cos_sim": 0.26382094621658325, |
| "eval_loss": 0.7373649797652906, |
| "eval_runtime": 89.6512, |
| "eval_samples_per_second": 11.154, |
| "eval_steps_per_second": 0.357, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.24013024013024012, |
| "grad_norm": 1.7754801511764526, |
| "learning_rate": 2.001085334418668e-07, |
| "loss": 0.7369, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.24013024013024012, |
| "eval_cos_sim": 0.2642780840396881, |
| "eval_loss": 0.7369081225608534, |
| "eval_runtime": 89.4603, |
| "eval_samples_per_second": 11.178, |
| "eval_steps_per_second": 0.358, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "grad_norm": 1.8073511123657227, |
| "learning_rate": 2.0350020350020349e-07, |
| "loss": 0.723, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "eval_cos_sim": 0.2647515833377838, |
| "eval_loss": 0.7364349565719313, |
| "eval_runtime": 89.5556, |
| "eval_samples_per_second": 11.166, |
| "eval_steps_per_second": 0.357, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.24827024827024827, |
| "grad_norm": 1.5272294282913208, |
| "learning_rate": 2.0689187355854024e-07, |
| "loss": 0.7257, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.24827024827024827, |
| "eval_cos_sim": 0.2652308940887451, |
| "eval_loss": 0.7359559812759108, |
| "eval_runtime": 90.0107, |
| "eval_samples_per_second": 11.11, |
| "eval_steps_per_second": 0.356, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2523402523402523, |
| "grad_norm": 1.5300447940826416, |
| "learning_rate": 2.1028354361687696e-07, |
| "loss": 0.7159, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2523402523402523, |
| "eval_cos_sim": 0.26572421193122864, |
| "eval_loss": 0.7354630170081801, |
| "eval_runtime": 89.5462, |
| "eval_samples_per_second": 11.167, |
| "eval_steps_per_second": 0.357, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 1.7922492027282715, |
| "learning_rate": 2.136752136752137e-07, |
| "loss": 0.7201, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "eval_cos_sim": 0.2662273347377777, |
| "eval_loss": 0.7349601850722974, |
| "eval_runtime": 89.2139, |
| "eval_samples_per_second": 11.209, |
| "eval_steps_per_second": 0.359, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.26048026048026046, |
| "grad_norm": 1.800451636314392, |
| "learning_rate": 2.170668837335504e-07, |
| "loss": 0.7122, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.26048026048026046, |
| "eval_cos_sim": 0.2667379379272461, |
| "eval_loss": 0.7344499821876234, |
| "eval_runtime": 88.9978, |
| "eval_samples_per_second": 11.236, |
| "eval_steps_per_second": 0.36, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.26455026455026454, |
| "grad_norm": 1.5168377161026, |
| "learning_rate": 2.2045855379188713e-07, |
| "loss": 0.7292, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.26455026455026454, |
| "eval_cos_sim": 0.2672579288482666, |
| "eval_loss": 0.7339303827499097, |
| "eval_runtime": 89.422, |
| "eval_samples_per_second": 11.183, |
| "eval_steps_per_second": 0.358, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2686202686202686, |
| "grad_norm": 1.7991094589233398, |
| "learning_rate": 2.2385022385022388e-07, |
| "loss": 0.7102, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2686202686202686, |
| "eval_cos_sim": 0.267790287733078, |
| "eval_loss": 0.7333984103416151, |
| "eval_runtime": 89.5146, |
| "eval_samples_per_second": 11.171, |
| "eval_steps_per_second": 0.357, |
| "step": 660 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1474200, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 600, |
| "save_steps": 10, |
| "total_flos": 0.0, |
| "train_batch_size": 160, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|