| { |
| "best_metric": 0.854606050035829, |
| "best_model_checkpoint": "/p/scratch/ccstdl/krishna/finetuned-cosine-loss/checkpoint-650", |
| "epoch": 0.26455026455026454, |
| "eval_steps": 10, |
| "global_step": 650, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00407000407000407, |
| "grad_norm": 1.8341377973556519, |
| "learning_rate": 3.391670058336725e-09, |
| "loss": 0.8563, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00407000407000407, |
| "eval_cos_sim": 0.1275755614042282, |
| "eval_loss": 0.8735055446838087, |
| "eval_runtime": 91.5265, |
| "eval_samples_per_second": 10.926, |
| "eval_steps_per_second": 0.35, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00814000814000814, |
| "grad_norm": 1.5246553421020508, |
| "learning_rate": 6.78334011667345e-09, |
| "loss": 0.8643, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00814000814000814, |
| "eval_cos_sim": 0.12758877873420715, |
| "eval_loss": 0.8734923420165723, |
| "eval_runtime": 88.663, |
| "eval_samples_per_second": 11.279, |
| "eval_steps_per_second": 0.361, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01221001221001221, |
| "grad_norm": 1.8004460334777832, |
| "learning_rate": 1.0175010175010176e-08, |
| "loss": 0.8665, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01221001221001221, |
| "eval_cos_sim": 0.12761123478412628, |
| "eval_loss": 0.8734698925231642, |
| "eval_runtime": 88.8085, |
| "eval_samples_per_second": 11.26, |
| "eval_steps_per_second": 0.36, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01628001628001628, |
| "grad_norm": 1.8268524408340454, |
| "learning_rate": 1.35666802333469e-08, |
| "loss": 0.8661, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01628001628001628, |
| "eval_cos_sim": 0.1276426911354065, |
| "eval_loss": 0.8734384694312758, |
| "eval_runtime": 88.8002, |
| "eval_samples_per_second": 11.261, |
| "eval_steps_per_second": 0.36, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02035002035002035, |
| "grad_norm": 1.5149165391921997, |
| "learning_rate": 1.6958350291683625e-08, |
| "loss": 0.8613, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02035002035002035, |
| "eval_cos_sim": 0.1276828944683075, |
| "eval_loss": 0.8733982930396742, |
| "eval_runtime": 89.0846, |
| "eval_samples_per_second": 11.225, |
| "eval_steps_per_second": 0.359, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02442002442002442, |
| "grad_norm": 1.4543992280960083, |
| "learning_rate": 2.035002035002035e-08, |
| "loss": 0.8729, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02442002442002442, |
| "eval_cos_sim": 0.12773165106773376, |
| "eval_loss": 0.8733495784019178, |
| "eval_runtime": 88.8621, |
| "eval_samples_per_second": 11.253, |
| "eval_steps_per_second": 0.36, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02849002849002849, |
| "grad_norm": 1.8213750123977661, |
| "learning_rate": 2.3741690408357078e-08, |
| "loss": 0.8632, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02849002849002849, |
| "eval_cos_sim": 0.12778951227664948, |
| "eval_loss": 0.8732917666648573, |
| "eval_runtime": 88.7523, |
| "eval_samples_per_second": 11.267, |
| "eval_steps_per_second": 0.361, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03256003256003256, |
| "grad_norm": 1.8100404739379883, |
| "learning_rate": 2.71333604666938e-08, |
| "loss": 0.866, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03256003256003256, |
| "eval_cos_sim": 0.12785714864730835, |
| "eval_loss": 0.8732241764281935, |
| "eval_runtime": 88.4393, |
| "eval_samples_per_second": 11.307, |
| "eval_steps_per_second": 0.362, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03663003663003663, |
| "grad_norm": 1.8266569375991821, |
| "learning_rate": 3.052503052503053e-08, |
| "loss": 0.8614, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03663003663003663, |
| "eval_cos_sim": 0.12793460488319397, |
| "eval_loss": 0.8731467800353712, |
| "eval_runtime": 89.2974, |
| "eval_samples_per_second": 11.199, |
| "eval_steps_per_second": 0.358, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0407000407000407, |
| "grad_norm": 1.8240498304367065, |
| "learning_rate": 3.391670058336725e-08, |
| "loss": 0.8622, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0407000407000407, |
| "eval_cos_sim": 0.12802155315876007, |
| "eval_loss": 0.8730598721717543, |
| "eval_runtime": 89.1998, |
| "eval_samples_per_second": 11.211, |
| "eval_steps_per_second": 0.359, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04477004477004477, |
| "grad_norm": 1.5381144285202026, |
| "learning_rate": 3.730837064170397e-08, |
| "loss": 0.8598, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04477004477004477, |
| "eval_cos_sim": 0.12811602652072906, |
| "eval_loss": 0.8729654574607557, |
| "eval_runtime": 88.7617, |
| "eval_samples_per_second": 11.266, |
| "eval_steps_per_second": 0.361, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04884004884004884, |
| "grad_norm": 1.8371686935424805, |
| "learning_rate": 4.07000407000407e-08, |
| "loss": 0.859, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04884004884004884, |
| "eval_cos_sim": 0.12821921706199646, |
| "eval_loss": 0.8728623566840834, |
| "eval_runtime": 89.6925, |
| "eval_samples_per_second": 11.149, |
| "eval_steps_per_second": 0.357, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05291005291005291, |
| "grad_norm": 1.8292527198791504, |
| "learning_rate": 4.4091710758377425e-08, |
| "loss": 0.8673, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05291005291005291, |
| "eval_cos_sim": 0.12833045423030853, |
| "eval_loss": 0.872751211664552, |
| "eval_runtime": 88.9497, |
| "eval_samples_per_second": 11.242, |
| "eval_steps_per_second": 0.36, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05698005698005698, |
| "grad_norm": 1.547228455543518, |
| "learning_rate": 4.7483380816714155e-08, |
| "loss": 0.8694, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05698005698005698, |
| "eval_cos_sim": 0.12844981253147125, |
| "eval_loss": 0.8726319508765882, |
| "eval_runtime": 89.5439, |
| "eval_samples_per_second": 11.168, |
| "eval_steps_per_second": 0.357, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06105006105006105, |
| "grad_norm": 1.5469759702682495, |
| "learning_rate": 5.087505087505087e-08, |
| "loss": 0.8695, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06105006105006105, |
| "eval_cos_sim": 0.1285790205001831, |
| "eval_loss": 0.8725028333877272, |
| "eval_runtime": 88.9224, |
| "eval_samples_per_second": 11.246, |
| "eval_steps_per_second": 0.36, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06512006512006512, |
| "grad_norm": 1.5482810735702515, |
| "learning_rate": 5.42667209333876e-08, |
| "loss": 0.8647, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06512006512006512, |
| "eval_cos_sim": 0.12871865928173065, |
| "eval_loss": 0.8723633003448195, |
| "eval_runtime": 88.7738, |
| "eval_samples_per_second": 11.265, |
| "eval_steps_per_second": 0.36, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06919006919006919, |
| "grad_norm": 1.5481818914413452, |
| "learning_rate": 5.7658390991724324e-08, |
| "loss": 0.8601, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06919006919006919, |
| "eval_cos_sim": 0.12886543571949005, |
| "eval_loss": 0.8722166295264906, |
| "eval_runtime": 88.67, |
| "eval_samples_per_second": 11.278, |
| "eval_steps_per_second": 0.361, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07326007326007326, |
| "grad_norm": 1.5428705215454102, |
| "learning_rate": 6.105006105006105e-08, |
| "loss": 0.8647, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07326007326007326, |
| "eval_cos_sim": 0.12902309000492096, |
| "eval_loss": 0.8720590691779798, |
| "eval_runtime": 89.4286, |
| "eval_samples_per_second": 11.182, |
| "eval_steps_per_second": 0.358, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07733007733007732, |
| "grad_norm": 1.8401004076004028, |
| "learning_rate": 6.444173110839778e-08, |
| "loss": 0.8605, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07733007733007732, |
| "eval_cos_sim": 0.1291920691728592, |
| "eval_loss": 0.8718901972984022, |
| "eval_runtime": 89.319, |
| "eval_samples_per_second": 11.196, |
| "eval_steps_per_second": 0.358, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0814000814000814, |
| "grad_norm": 1.8132197856903076, |
| "learning_rate": 6.78334011667345e-08, |
| "loss": 0.8649, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0814000814000814, |
| "eval_cos_sim": 0.12936964631080627, |
| "eval_loss": 0.8717127638076491, |
| "eval_runtime": 88.8517, |
| "eval_samples_per_second": 11.255, |
| "eval_steps_per_second": 0.36, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "grad_norm": 1.841541051864624, |
| "learning_rate": 7.122507122507124e-08, |
| "loss": 0.8648, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "eval_cos_sim": 0.12955836951732635, |
| "eval_loss": 0.8715242018912978, |
| "eval_runtime": 88.9573, |
| "eval_samples_per_second": 11.241, |
| "eval_steps_per_second": 0.36, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08954008954008955, |
| "grad_norm": 1.7975077629089355, |
| "learning_rate": 7.461674128340795e-08, |
| "loss": 0.8593, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08954008954008955, |
| "eval_cos_sim": 0.12975716590881348, |
| "eval_loss": 0.8713255877708143, |
| "eval_runtime": 89.0134, |
| "eval_samples_per_second": 11.234, |
| "eval_steps_per_second": 0.359, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0936100936100936, |
| "grad_norm": 1.6715312004089355, |
| "learning_rate": 7.800841134174468e-08, |
| "loss": 0.8635, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0936100936100936, |
| "eval_cos_sim": 0.12996670603752136, |
| "eval_loss": 0.8711161980842298, |
| "eval_runtime": 89.3295, |
| "eval_samples_per_second": 11.195, |
| "eval_steps_per_second": 0.358, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09768009768009768, |
| "grad_norm": 1.8103832006454468, |
| "learning_rate": 8.14000814000814e-08, |
| "loss": 0.8609, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09768009768009768, |
| "eval_cos_sim": 0.13018178939819336, |
| "eval_loss": 0.8709012541984266, |
| "eval_runtime": 89.2626, |
| "eval_samples_per_second": 11.203, |
| "eval_steps_per_second": 0.358, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10175010175010175, |
| "grad_norm": 1.8056939840316772, |
| "learning_rate": 8.479175145841813e-08, |
| "loss": 0.8639, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10175010175010175, |
| "eval_cos_sim": 0.13040043413639069, |
| "eval_loss": 0.8706827788566297, |
| "eval_runtime": 89.3651, |
| "eval_samples_per_second": 11.19, |
| "eval_steps_per_second": 0.358, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10582010582010581, |
| "grad_norm": 1.8392592668533325, |
| "learning_rate": 8.818342151675485e-08, |
| "loss": 0.8607, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10582010582010581, |
| "eval_cos_sim": 0.1306302398443222, |
| "eval_loss": 0.8704531607841199, |
| "eval_runtime": 89.3602, |
| "eval_samples_per_second": 11.191, |
| "eval_steps_per_second": 0.358, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "grad_norm": 1.8241254091262817, |
| "learning_rate": 9.157509157509157e-08, |
| "loss": 0.8601, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10989010989010989, |
| "eval_cos_sim": 0.13087190687656403, |
| "eval_loss": 0.8702116866325087, |
| "eval_runtime": 89.3533, |
| "eval_samples_per_second": 11.192, |
| "eval_steps_per_second": 0.358, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.11396011396011396, |
| "grad_norm": 1.5199859142303467, |
| "learning_rate": 9.496676163342831e-08, |
| "loss": 0.862, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11396011396011396, |
| "eval_cos_sim": 0.1311246156692505, |
| "eval_loss": 0.8699591713164991, |
| "eval_runtime": 89.8365, |
| "eval_samples_per_second": 11.131, |
| "eval_steps_per_second": 0.356, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11803011803011804, |
| "grad_norm": 1.539494276046753, |
| "learning_rate": 9.835843169176503e-08, |
| "loss": 0.8646, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11803011803011804, |
| "eval_cos_sim": 0.13138911128044128, |
| "eval_loss": 0.8696949014877028, |
| "eval_runtime": 89.1002, |
| "eval_samples_per_second": 11.223, |
| "eval_steps_per_second": 0.359, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1221001221001221, |
| "grad_norm": 1.8310585021972656, |
| "learning_rate": 1.0175010175010174e-07, |
| "loss": 0.8678, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1221001221001221, |
| "eval_cos_sim": 0.13165612518787384, |
| "eval_loss": 0.8694280743812269, |
| "eval_runtime": 89.8617, |
| "eval_samples_per_second": 11.128, |
| "eval_steps_per_second": 0.356, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12617012617012616, |
| "grad_norm": 1.5015047788619995, |
| "learning_rate": 1.0514177180843848e-07, |
| "loss": 0.8646, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12617012617012616, |
| "eval_cos_sim": 0.13192161917686462, |
| "eval_loss": 0.8691628270362562, |
| "eval_runtime": 89.425, |
| "eval_samples_per_second": 11.183, |
| "eval_steps_per_second": 0.358, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.13024013024013023, |
| "grad_norm": 1.8223438262939453, |
| "learning_rate": 1.085334418667752e-07, |
| "loss": 0.8574, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.13024013024013023, |
| "eval_cos_sim": 0.13219775259494781, |
| "eval_loss": 0.8688869447921461, |
| "eval_runtime": 89.6601, |
| "eval_samples_per_second": 11.153, |
| "eval_steps_per_second": 0.357, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1343101343101343, |
| "grad_norm": 1.834613561630249, |
| "learning_rate": 1.1192511192511194e-07, |
| "loss": 0.8609, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1343101343101343, |
| "eval_cos_sim": 0.13248248398303986, |
| "eval_loss": 0.868602445146913, |
| "eval_runtime": 89.6443, |
| "eval_samples_per_second": 11.155, |
| "eval_steps_per_second": 0.357, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.13838013838013838, |
| "grad_norm": 1.8023933172225952, |
| "learning_rate": 1.1531678198344865e-07, |
| "loss": 0.8587, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13838013838013838, |
| "eval_cos_sim": 0.13278011977672577, |
| "eval_loss": 0.8683050141547864, |
| "eval_runtime": 89.9517, |
| "eval_samples_per_second": 11.117, |
| "eval_steps_per_second": 0.356, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.14245014245014245, |
| "grad_norm": 1.821526288986206, |
| "learning_rate": 1.1870845204178537e-07, |
| "loss": 0.8598, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14245014245014245, |
| "eval_cos_sim": 0.1330917626619339, |
| "eval_loss": 0.867993585607881, |
| "eval_runtime": 90.2991, |
| "eval_samples_per_second": 11.074, |
| "eval_steps_per_second": 0.354, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "grad_norm": 1.801190733909607, |
| "learning_rate": 1.221001221001221e-07, |
| "loss": 0.8525, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.14652014652014653, |
| "eval_cos_sim": 0.13341833651065826, |
| "eval_loss": 0.8676672454093641, |
| "eval_runtime": 90.0422, |
| "eval_samples_per_second": 11.106, |
| "eval_steps_per_second": 0.355, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1505901505901506, |
| "grad_norm": 1.5358909368515015, |
| "learning_rate": 1.2549179215845883e-07, |
| "loss": 0.8575, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1505901505901506, |
| "eval_cos_sim": 0.13375183939933777, |
| "eval_loss": 0.8673340068076796, |
| "eval_runtime": 90.0945, |
| "eval_samples_per_second": 11.099, |
| "eval_steps_per_second": 0.355, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.15466015466015465, |
| "grad_norm": 1.7882535457611084, |
| "learning_rate": 1.2888346221679555e-07, |
| "loss": 0.8521, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.15466015466015465, |
| "eval_cos_sim": 0.13408976793289185, |
| "eval_loss": 0.8669963665221876, |
| "eval_runtime": 90.0493, |
| "eval_samples_per_second": 11.105, |
| "eval_steps_per_second": 0.355, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 1.839800477027893, |
| "learning_rate": 1.3227513227513228e-07, |
| "loss": 0.8558, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "eval_cos_sim": 0.13443315029144287, |
| "eval_loss": 0.8666532302116102, |
| "eval_runtime": 89.9978, |
| "eval_samples_per_second": 11.111, |
| "eval_steps_per_second": 0.356, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1628001628001628, |
| "grad_norm": 1.8280110359191895, |
| "learning_rate": 1.35666802333469e-07, |
| "loss": 0.8539, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1628001628001628, |
| "eval_cos_sim": 0.13477978110313416, |
| "eval_loss": 0.8663068804954237, |
| "eval_runtime": 90.1723, |
| "eval_samples_per_second": 11.09, |
| "eval_steps_per_second": 0.355, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16687016687016687, |
| "grad_norm": 1.5013295412063599, |
| "learning_rate": 1.3905847239180572e-07, |
| "loss": 0.8631, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.16687016687016687, |
| "eval_cos_sim": 0.13513372838497162, |
| "eval_loss": 0.8659531727050489, |
| "eval_runtime": 90.4867, |
| "eval_samples_per_second": 11.051, |
| "eval_steps_per_second": 0.354, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 1.7966923713684082, |
| "learning_rate": 1.4245014245014247e-07, |
| "loss": 0.8533, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "eval_cos_sim": 0.13550296425819397, |
| "eval_loss": 0.8655842075561232, |
| "eval_runtime": 89.6565, |
| "eval_samples_per_second": 11.154, |
| "eval_steps_per_second": 0.357, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.17501017501017502, |
| "grad_norm": 1.8006147146224976, |
| "learning_rate": 1.4584181250847917e-07, |
| "loss": 0.8549, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.17501017501017502, |
| "eval_cos_sim": 0.1358877271413803, |
| "eval_loss": 0.8651996951316542, |
| "eval_runtime": 91.2533, |
| "eval_samples_per_second": 10.959, |
| "eval_steps_per_second": 0.351, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1790801790801791, |
| "grad_norm": 1.7972980737686157, |
| "learning_rate": 1.492334825668159e-07, |
| "loss": 0.8568, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1790801790801791, |
| "eval_cos_sim": 0.136278435587883, |
| "eval_loss": 0.8648093180869764, |
| "eval_runtime": 90.2011, |
| "eval_samples_per_second": 11.086, |
| "eval_steps_per_second": 0.355, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.18315018315018314, |
| "grad_norm": 1.8002476692199707, |
| "learning_rate": 1.5262515262515264e-07, |
| "loss": 0.8593, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.18315018315018314, |
| "eval_cos_sim": 0.1366884559392929, |
| "eval_loss": 0.8643996544097609, |
| "eval_runtime": 90.0768, |
| "eval_samples_per_second": 11.102, |
| "eval_steps_per_second": 0.355, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1872201872201872, |
| "grad_norm": 1.8119208812713623, |
| "learning_rate": 1.5601682268348936e-07, |
| "loss": 0.8497, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1872201872201872, |
| "eval_cos_sim": 0.1371019184589386, |
| "eval_loss": 0.8639865331863111, |
| "eval_runtime": 90.2856, |
| "eval_samples_per_second": 11.076, |
| "eval_steps_per_second": 0.354, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.19129019129019129, |
| "grad_norm": 1.5471041202545166, |
| "learning_rate": 1.594084927418261e-07, |
| "loss": 0.857, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.19129019129019129, |
| "eval_cos_sim": 0.13752888143062592, |
| "eval_loss": 0.8635599189017957, |
| "eval_runtime": 90.6564, |
| "eval_samples_per_second": 11.031, |
| "eval_steps_per_second": 0.353, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.19536019536019536, |
| "grad_norm": 1.5276424884796143, |
| "learning_rate": 1.628001628001628e-07, |
| "loss": 0.8553, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.19536019536019536, |
| "eval_cos_sim": 0.13795536756515503, |
| "eval_loss": 0.8631337700103467, |
| "eval_runtime": 89.9592, |
| "eval_samples_per_second": 11.116, |
| "eval_steps_per_second": 0.356, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.19943019943019943, |
| "grad_norm": 1.8085660934448242, |
| "learning_rate": 1.6619183285849953e-07, |
| "loss": 0.8559, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.19943019943019943, |
| "eval_cos_sim": 0.13838599622249603, |
| "eval_loss": 0.8627034917091078, |
| "eval_runtime": 89.645, |
| "eval_samples_per_second": 11.155, |
| "eval_steps_per_second": 0.357, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2035002035002035, |
| "grad_norm": 1.8282767534255981, |
| "learning_rate": 1.6958350291683626e-07, |
| "loss": 0.8533, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2035002035002035, |
| "eval_cos_sim": 0.13883180916309357, |
| "eval_loss": 0.8622579980109877, |
| "eval_runtime": 90.3692, |
| "eval_samples_per_second": 11.066, |
| "eval_steps_per_second": 0.354, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20757020757020758, |
| "grad_norm": 1.5567238330841064, |
| "learning_rate": 1.7297517297517298e-07, |
| "loss": 0.8525, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.20757020757020758, |
| "eval_cos_sim": 0.1392912119626999, |
| "eval_loss": 0.8617989306663221, |
| "eval_runtime": 89.9834, |
| "eval_samples_per_second": 11.113, |
| "eval_steps_per_second": 0.356, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.21164021164021163, |
| "grad_norm": 1.5459282398223877, |
| "learning_rate": 1.763668430335097e-07, |
| "loss": 0.853, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.21164021164021163, |
| "eval_cos_sim": 0.13975809514522552, |
| "eval_loss": 0.8613324156020826, |
| "eval_runtime": 90.0086, |
| "eval_samples_per_second": 11.11, |
| "eval_steps_per_second": 0.356, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2157102157102157, |
| "grad_norm": 1.799883484840393, |
| "learning_rate": 1.7975851309184642e-07, |
| "loss": 0.8512, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2157102157102157, |
| "eval_cos_sim": 0.14022615551948547, |
| "eval_loss": 0.8608647189353651, |
| "eval_runtime": 90.0398, |
| "eval_samples_per_second": 11.106, |
| "eval_steps_per_second": 0.355, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "grad_norm": 1.5823848247528076, |
| "learning_rate": 1.8315018315018315e-07, |
| "loss": 0.86, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.21978021978021978, |
| "eval_cos_sim": 0.14070864021778107, |
| "eval_loss": 0.8603825893615431, |
| "eval_runtime": 90.1006, |
| "eval_samples_per_second": 11.099, |
| "eval_steps_per_second": 0.355, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.22385022385022385, |
| "grad_norm": 1.8258429765701294, |
| "learning_rate": 1.865418532085199e-07, |
| "loss": 0.8504, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.22385022385022385, |
| "eval_cos_sim": 0.14118121564388275, |
| "eval_loss": 0.8599103837226576, |
| "eval_runtime": 90.1395, |
| "eval_samples_per_second": 11.094, |
| "eval_steps_per_second": 0.355, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.22792022792022792, |
| "grad_norm": 1.4464470148086548, |
| "learning_rate": 1.8993352326685662e-07, |
| "loss": 0.8574, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.22792022792022792, |
| "eval_cos_sim": 0.1416817009449005, |
| "eval_loss": 0.8594102664207166, |
| "eval_runtime": 90.7409, |
| "eval_samples_per_second": 11.02, |
| "eval_steps_per_second": 0.353, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.231990231990232, |
| "grad_norm": 1.8329437971115112, |
| "learning_rate": 1.9332519332519332e-07, |
| "loss": 0.855, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.231990231990232, |
| "eval_cos_sim": 0.14218488335609436, |
| "eval_loss": 0.8589075465415663, |
| "eval_runtime": 90.6138, |
| "eval_samples_per_second": 11.036, |
| "eval_steps_per_second": 0.353, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.23606023606023607, |
| "grad_norm": 1.8315315246582031, |
| "learning_rate": 1.9671686338353007e-07, |
| "loss": 0.8553, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.23606023606023607, |
| "eval_cos_sim": 0.14268743991851807, |
| "eval_loss": 0.8584054036353773, |
| "eval_runtime": 90.2957, |
| "eval_samples_per_second": 11.075, |
| "eval_steps_per_second": 0.354, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.24013024013024012, |
| "grad_norm": 1.5344057083129883, |
| "learning_rate": 2.001085334418668e-07, |
| "loss": 0.8448, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.24013024013024012, |
| "eval_cos_sim": 0.14320193231105804, |
| "eval_loss": 0.8578913531516736, |
| "eval_runtime": 90.9648, |
| "eval_samples_per_second": 10.993, |
| "eval_steps_per_second": 0.352, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "grad_norm": 1.6833467483520508, |
| "learning_rate": 2.0350020350020349e-07, |
| "loss": 0.8521, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "eval_cos_sim": 0.14373008906841278, |
| "eval_loss": 0.8573636312698072, |
| "eval_runtime": 90.5973, |
| "eval_samples_per_second": 11.038, |
| "eval_steps_per_second": 0.353, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.24827024827024827, |
| "grad_norm": 1.808730959892273, |
| "learning_rate": 2.0689187355854024e-07, |
| "loss": 0.844, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.24827024827024827, |
| "eval_cos_sim": 0.14426389336585999, |
| "eval_loss": 0.8568301882957167, |
| "eval_runtime": 90.4286, |
| "eval_samples_per_second": 11.058, |
| "eval_steps_per_second": 0.354, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2523402523402523, |
| "grad_norm": 1.844853162765503, |
| "learning_rate": 2.1028354361687696e-07, |
| "loss": 0.8461, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2523402523402523, |
| "eval_cos_sim": 0.14481142163276672, |
| "eval_loss": 0.8562831025336928, |
| "eval_runtime": 90.4232, |
| "eval_samples_per_second": 11.059, |
| "eval_steps_per_second": 0.354, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 1.4428536891937256, |
| "learning_rate": 2.136752136752137e-07, |
| "loss": 0.8521, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "eval_cos_sim": 0.14536888897418976, |
| "eval_loss": 0.8557261476730055, |
| "eval_runtime": 90.6076, |
| "eval_samples_per_second": 11.037, |
| "eval_steps_per_second": 0.353, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.26048026048026046, |
| "grad_norm": 1.8345551490783691, |
| "learning_rate": 2.170668837335504e-07, |
| "loss": 0.8396, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.26048026048026046, |
| "eval_cos_sim": 0.14592213928699493, |
| "eval_loss": 0.8551734004234022, |
| "eval_runtime": 90.9039, |
| "eval_samples_per_second": 11.001, |
| "eval_steps_per_second": 0.352, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.26455026455026454, |
| "grad_norm": 1.5427323579788208, |
| "learning_rate": 2.2045855379188713e-07, |
| "loss": 0.8471, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.26455026455026454, |
| "eval_cos_sim": 0.1464899480342865, |
| "eval_loss": 0.854606050035829, |
| "eval_runtime": 89.9594, |
| "eval_samples_per_second": 11.116, |
| "eval_steps_per_second": 0.356, |
| "step": 650 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1474200, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 600, |
| "save_steps": 10, |
| "total_flos": 0.0, |
| "train_batch_size": 160, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|