| { | |
| "best_metric": 0.914588417551393, | |
| "best_model_checkpoint": "/p/scratch/ccstdl/krishna/finetuned-cosine-loss/checkpoint-650", | |
| "epoch": 0.26455026455026454, | |
| "eval_steps": 10, | |
| "global_step": 650, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00407000407000407, | |
| "grad_norm": 1.8234177827835083, | |
| "learning_rate": 3.391670058336725e-09, | |
| "loss": 0.9272, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00407000407000407, | |
| "eval_cos_sim": 0.0673384740948677, | |
| "eval_loss": 0.9336964531158155, | |
| "eval_runtime": 91.7154, | |
| "eval_samples_per_second": 10.903, | |
| "eval_steps_per_second": 0.349, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00814000814000814, | |
| "grad_norm": 1.7962757349014282, | |
| "learning_rate": 6.78334011667345e-09, | |
| "loss": 0.9259, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.00814000814000814, | |
| "eval_cos_sim": 0.06735177338123322, | |
| "eval_loss": 0.9336831488822646, | |
| "eval_runtime": 89.447, | |
| "eval_samples_per_second": 11.18, | |
| "eval_steps_per_second": 0.358, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01221001221001221, | |
| "grad_norm": 1.784166693687439, | |
| "learning_rate": 1.0175010175010176e-08, | |
| "loss": 0.9296, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01221001221001221, | |
| "eval_cos_sim": 0.0673740804195404, | |
| "eval_loss": 0.9336608658050245, | |
| "eval_runtime": 89.5427, | |
| "eval_samples_per_second": 11.168, | |
| "eval_steps_per_second": 0.357, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01628001628001628, | |
| "grad_norm": 1.8226275444030762, | |
| "learning_rate": 1.35666802333469e-08, | |
| "loss": 0.9259, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01628001628001628, | |
| "eval_cos_sim": 0.06740561872720718, | |
| "eval_loss": 0.93362934305417, | |
| "eval_runtime": 89.3449, | |
| "eval_samples_per_second": 11.193, | |
| "eval_steps_per_second": 0.358, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02035002035002035, | |
| "grad_norm": 1.7831834554672241, | |
| "learning_rate": 1.6958350291683625e-08, | |
| "loss": 0.9251, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02035002035002035, | |
| "eval_cos_sim": 0.06744563579559326, | |
| "eval_loss": 0.933589364073152, | |
| "eval_runtime": 89.6651, | |
| "eval_samples_per_second": 11.153, | |
| "eval_steps_per_second": 0.357, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02442002442002442, | |
| "grad_norm": 1.5201727151870728, | |
| "learning_rate": 2.035002035002035e-08, | |
| "loss": 0.9276, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02442002442002442, | |
| "eval_cos_sim": 0.06749571859836578, | |
| "eval_loss": 0.9335393262122816, | |
| "eval_runtime": 89.3495, | |
| "eval_samples_per_second": 11.192, | |
| "eval_steps_per_second": 0.358, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02849002849002849, | |
| "grad_norm": 1.813641905784607, | |
| "learning_rate": 2.3741690408357078e-08, | |
| "loss": 0.9303, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02849002849002849, | |
| "eval_cos_sim": 0.06755577772855759, | |
| "eval_loss": 0.9334793086265272, | |
| "eval_runtime": 89.5359, | |
| "eval_samples_per_second": 11.169, | |
| "eval_steps_per_second": 0.357, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03256003256003256, | |
| "grad_norm": 1.8243420124053955, | |
| "learning_rate": 2.71333604666938e-08, | |
| "loss": 0.9288, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03256003256003256, | |
| "eval_cos_sim": 0.06762455403804779, | |
| "eval_loss": 0.9334105701659864, | |
| "eval_runtime": 89.5324, | |
| "eval_samples_per_second": 11.169, | |
| "eval_steps_per_second": 0.357, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 1.7983627319335938, | |
| "learning_rate": 3.052503052503053e-08, | |
| "loss": 0.9243, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "eval_cos_sim": 0.06770093739032745, | |
| "eval_loss": 0.9333342409347242, | |
| "eval_runtime": 89.987, | |
| "eval_samples_per_second": 11.113, | |
| "eval_steps_per_second": 0.356, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0407000407000407, | |
| "grad_norm": 1.8116332292556763, | |
| "learning_rate": 3.391670058336725e-08, | |
| "loss": 0.9289, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0407000407000407, | |
| "eval_cos_sim": 0.06778644770383835, | |
| "eval_loss": 0.9332487797950453, | |
| "eval_runtime": 89.222, | |
| "eval_samples_per_second": 11.208, | |
| "eval_steps_per_second": 0.359, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04477004477004477, | |
| "grad_norm": 1.4440973997116089, | |
| "learning_rate": 3.730837064170397e-08, | |
| "loss": 0.9274, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04477004477004477, | |
| "eval_cos_sim": 0.06788003444671631, | |
| "eval_loss": 0.933155252001161, | |
| "eval_runtime": 89.3804, | |
| "eval_samples_per_second": 11.188, | |
| "eval_steps_per_second": 0.358, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "grad_norm": 1.8082635402679443, | |
| "learning_rate": 4.07000407000407e-08, | |
| "loss": 0.9267, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "eval_cos_sim": 0.06798040121793747, | |
| "eval_loss": 0.9330549512122815, | |
| "eval_runtime": 89.9274, | |
| "eval_samples_per_second": 11.12, | |
| "eval_steps_per_second": 0.356, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05291005291005291, | |
| "grad_norm": 1.8302645683288574, | |
| "learning_rate": 4.4091710758377425e-08, | |
| "loss": 0.9244, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05291005291005291, | |
| "eval_cos_sim": 0.06809354573488235, | |
| "eval_loss": 0.932941900274629, | |
| "eval_runtime": 89.8842, | |
| "eval_samples_per_second": 11.125, | |
| "eval_steps_per_second": 0.356, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05698005698005698, | |
| "grad_norm": 1.5440356731414795, | |
| "learning_rate": 4.7483380816714155e-08, | |
| "loss": 0.9316, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05698005698005698, | |
| "eval_cos_sim": 0.06821373105049133, | |
| "eval_loss": 0.9328218155120558, | |
| "eval_runtime": 89.7159, | |
| "eval_samples_per_second": 11.146, | |
| "eval_steps_per_second": 0.357, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06105006105006105, | |
| "grad_norm": 1.5421152114868164, | |
| "learning_rate": 5.087505087505087e-08, | |
| "loss": 0.927, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06105006105006105, | |
| "eval_cos_sim": 0.06834477931261063, | |
| "eval_loss": 0.9326908560012526, | |
| "eval_runtime": 89.7787, | |
| "eval_samples_per_second": 11.138, | |
| "eval_steps_per_second": 0.356, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06512006512006512, | |
| "grad_norm": 1.7998121976852417, | |
| "learning_rate": 5.42667209333876e-08, | |
| "loss": 0.9252, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06512006512006512, | |
| "eval_cos_sim": 0.06848659366369247, | |
| "eval_loss": 0.9325491523956007, | |
| "eval_runtime": 89.6926, | |
| "eval_samples_per_second": 11.149, | |
| "eval_steps_per_second": 0.357, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06919006919006919, | |
| "grad_norm": 1.542074203491211, | |
| "learning_rate": 5.7658390991724324e-08, | |
| "loss": 0.9252, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06919006919006919, | |
| "eval_cos_sim": 0.0686374306678772, | |
| "eval_loss": 0.9323984141563123, | |
| "eval_runtime": 89.453, | |
| "eval_samples_per_second": 11.179, | |
| "eval_steps_per_second": 0.358, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 1.5359669923782349, | |
| "learning_rate": 6.105006105006105e-08, | |
| "loss": 0.9267, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "eval_cos_sim": 0.0687984898686409, | |
| "eval_loss": 0.9322374482368178, | |
| "eval_runtime": 89.6161, | |
| "eval_samples_per_second": 11.159, | |
| "eval_steps_per_second": 0.357, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07733007733007732, | |
| "grad_norm": 1.8200968503952026, | |
| "learning_rate": 6.444173110839778e-08, | |
| "loss": 0.9235, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07733007733007732, | |
| "eval_cos_sim": 0.06896985322237015, | |
| "eval_loss": 0.9320661959861464, | |
| "eval_runtime": 89.8737, | |
| "eval_samples_per_second": 11.127, | |
| "eval_steps_per_second": 0.356, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0814000814000814, | |
| "grad_norm": 1.8174818754196167, | |
| "learning_rate": 6.78334011667345e-08, | |
| "loss": 0.9248, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0814000814000814, | |
| "eval_cos_sim": 0.06914974004030228, | |
| "eval_loss": 0.9318864355300611, | |
| "eval_runtime": 89.8925, | |
| "eval_samples_per_second": 11.124, | |
| "eval_steps_per_second": 0.356, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 1.8348275423049927, | |
| "learning_rate": 7.122507122507124e-08, | |
| "loss": 0.9234, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "eval_cos_sim": 0.06933821737766266, | |
| "eval_loss": 0.9316981101249403, | |
| "eval_runtime": 89.537, | |
| "eval_samples_per_second": 11.169, | |
| "eval_steps_per_second": 0.357, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08954008954008955, | |
| "grad_norm": 1.8035074472427368, | |
| "learning_rate": 7.461674128340795e-08, | |
| "loss": 0.9222, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08954008954008955, | |
| "eval_cos_sim": 0.06953444331884384, | |
| "eval_loss": 0.9315020323013014, | |
| "eval_runtime": 89.8235, | |
| "eval_samples_per_second": 11.133, | |
| "eval_steps_per_second": 0.356, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0936100936100936, | |
| "grad_norm": 1.8179575204849243, | |
| "learning_rate": 7.800841134174468e-08, | |
| "loss": 0.9286, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0936100936100936, | |
| "eval_cos_sim": 0.06974231451749802, | |
| "eval_loss": 0.9312943387244886, | |
| "eval_runtime": 89.2117, | |
| "eval_samples_per_second": 11.209, | |
| "eval_steps_per_second": 0.359, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "grad_norm": 1.7966115474700928, | |
| "learning_rate": 8.14000814000814e-08, | |
| "loss": 0.9255, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "eval_cos_sim": 0.06995726376771927, | |
| "eval_loss": 0.931079523107881, | |
| "eval_runtime": 89.804, | |
| "eval_samples_per_second": 11.135, | |
| "eval_steps_per_second": 0.356, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10175010175010175, | |
| "grad_norm": 1.797987461090088, | |
| "learning_rate": 8.479175145841813e-08, | |
| "loss": 0.9262, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10175010175010175, | |
| "eval_cos_sim": 0.07017555087804794, | |
| "eval_loss": 0.9308614025329298, | |
| "eval_runtime": 89.3997, | |
| "eval_samples_per_second": 11.186, | |
| "eval_steps_per_second": 0.358, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10582010582010581, | |
| "grad_norm": 1.8064875602722168, | |
| "learning_rate": 8.818342151675485e-08, | |
| "loss": 0.9273, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10582010582010581, | |
| "eval_cos_sim": 0.0703999400138855, | |
| "eval_loss": 0.9306371965621656, | |
| "eval_runtime": 89.9472, | |
| "eval_samples_per_second": 11.118, | |
| "eval_steps_per_second": 0.356, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 1.8176881074905396, | |
| "learning_rate": 9.157509157509157e-08, | |
| "loss": 0.9206, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "eval_cos_sim": 0.07064417004585266, | |
| "eval_loss": 0.9303931417678541, | |
| "eval_runtime": 89.7373, | |
| "eval_samples_per_second": 11.144, | |
| "eval_steps_per_second": 0.357, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11396011396011396, | |
| "grad_norm": 1.5151400566101074, | |
| "learning_rate": 9.496676163342831e-08, | |
| "loss": 0.9211, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11396011396011396, | |
| "eval_cos_sim": 0.0709003284573555, | |
| "eval_loss": 0.9301371760581678, | |
| "eval_runtime": 89.9622, | |
| "eval_samples_per_second": 11.116, | |
| "eval_steps_per_second": 0.356, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11803011803011804, | |
| "grad_norm": 1.7980380058288574, | |
| "learning_rate": 9.835843169176503e-08, | |
| "loss": 0.9203, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11803011803011804, | |
| "eval_cos_sim": 0.07116622477769852, | |
| "eval_loss": 0.9298714599822706, | |
| "eval_runtime": 90.2995, | |
| "eval_samples_per_second": 11.074, | |
| "eval_steps_per_second": 0.354, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1221001221001221, | |
| "grad_norm": 1.5477901697158813, | |
| "learning_rate": 1.0175010175010174e-07, | |
| "loss": 0.9216, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1221001221001221, | |
| "eval_cos_sim": 0.07143931835889816, | |
| "eval_loss": 0.9295985355590528, | |
| "eval_runtime": 90.1186, | |
| "eval_samples_per_second": 11.096, | |
| "eval_steps_per_second": 0.355, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12617012617012616, | |
| "grad_norm": 1.7951384782791138, | |
| "learning_rate": 1.0514177180843848e-07, | |
| "loss": 0.9261, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.12617012617012616, | |
| "eval_cos_sim": 0.07170899212360382, | |
| "eval_loss": 0.9293290882323927, | |
| "eval_runtime": 90.3148, | |
| "eval_samples_per_second": 11.072, | |
| "eval_steps_per_second": 0.354, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.13024013024013023, | |
| "grad_norm": 1.7869328260421753, | |
| "learning_rate": 1.085334418667752e-07, | |
| "loss": 0.9189, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.13024013024013023, | |
| "eval_cos_sim": 0.07198921591043472, | |
| "eval_loss": 0.9290490951751417, | |
| "eval_runtime": 89.77, | |
| "eval_samples_per_second": 11.14, | |
| "eval_steps_per_second": 0.356, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1343101343101343, | |
| "grad_norm": 1.5212459564208984, | |
| "learning_rate": 1.1192511192511194e-07, | |
| "loss": 0.9254, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1343101343101343, | |
| "eval_cos_sim": 0.07227852195501328, | |
| "eval_loss": 0.9287599940513319, | |
| "eval_runtime": 90.335, | |
| "eval_samples_per_second": 11.07, | |
| "eval_steps_per_second": 0.354, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.13838013838013838, | |
| "grad_norm": 1.8259695768356323, | |
| "learning_rate": 1.1531678198344865e-07, | |
| "loss": 0.924, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.13838013838013838, | |
| "eval_cos_sim": 0.072585329413414, | |
| "eval_loss": 0.928453390142793, | |
| "eval_runtime": 90.0139, | |
| "eval_samples_per_second": 11.109, | |
| "eval_steps_per_second": 0.356, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.14245014245014245, | |
| "grad_norm": 1.810530424118042, | |
| "learning_rate": 1.1870845204178537e-07, | |
| "loss": 0.9223, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14245014245014245, | |
| "eval_cos_sim": 0.07290184497833252, | |
| "eval_loss": 0.92813706495511, | |
| "eval_runtime": 89.979, | |
| "eval_samples_per_second": 11.114, | |
| "eval_steps_per_second": 0.356, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 1.819162368774414, | |
| "learning_rate": 1.221001221001221e-07, | |
| "loss": 0.9227, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "eval_cos_sim": 0.07322434335947037, | |
| "eval_loss": 0.9278148160194105, | |
| "eval_runtime": 90.5195, | |
| "eval_samples_per_second": 11.047, | |
| "eval_steps_per_second": 0.354, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1505901505901506, | |
| "grad_norm": 1.7979774475097656, | |
| "learning_rate": 1.2549179215845883e-07, | |
| "loss": 0.9185, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1505901505901506, | |
| "eval_cos_sim": 0.07355932146310806, | |
| "eval_loss": 0.9274800749038404, | |
| "eval_runtime": 90.4314, | |
| "eval_samples_per_second": 11.058, | |
| "eval_steps_per_second": 0.354, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.15466015466015465, | |
| "grad_norm": 1.784081220626831, | |
| "learning_rate": 1.2888346221679555e-07, | |
| "loss": 0.9175, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15466015466015465, | |
| "eval_cos_sim": 0.07390536367893219, | |
| "eval_loss": 0.9271342835639662, | |
| "eval_runtime": 90.2731, | |
| "eval_samples_per_second": 11.077, | |
| "eval_steps_per_second": 0.354, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 1.8276127576828003, | |
| "learning_rate": 1.3227513227513228e-07, | |
| "loss": 0.9165, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "eval_cos_sim": 0.07425787299871445, | |
| "eval_loss": 0.926782009146089, | |
| "eval_runtime": 89.6304, | |
| "eval_samples_per_second": 11.157, | |
| "eval_steps_per_second": 0.357, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1628001628001628, | |
| "grad_norm": 1.8162034749984741, | |
| "learning_rate": 1.35666802333469e-07, | |
| "loss": 0.9199, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1628001628001628, | |
| "eval_cos_sim": 0.07460718601942062, | |
| "eval_loss": 0.9264329524253553, | |
| "eval_runtime": 90.4314, | |
| "eval_samples_per_second": 11.058, | |
| "eval_steps_per_second": 0.354, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16687016687016687, | |
| "grad_norm": 1.4985719919204712, | |
| "learning_rate": 1.3905847239180572e-07, | |
| "loss": 0.9224, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.16687016687016687, | |
| "eval_cos_sim": 0.0749591663479805, | |
| "eval_loss": 0.9260812273238844, | |
| "eval_runtime": 89.9689, | |
| "eval_samples_per_second": 11.115, | |
| "eval_steps_per_second": 0.356, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 1.8033661842346191, | |
| "learning_rate": 1.4245014245014247e-07, | |
| "loss": 0.9152, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "eval_cos_sim": 0.07533340901136398, | |
| "eval_loss": 0.9257072530006116, | |
| "eval_runtime": 90.1973, | |
| "eval_samples_per_second": 11.087, | |
| "eval_steps_per_second": 0.355, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17501017501017502, | |
| "grad_norm": 1.7833577394485474, | |
| "learning_rate": 1.4584181250847917e-07, | |
| "loss": 0.9187, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17501017501017502, | |
| "eval_cos_sim": 0.07572542876005173, | |
| "eval_loss": 0.9253155136321729, | |
| "eval_runtime": 89.962, | |
| "eval_samples_per_second": 11.116, | |
| "eval_steps_per_second": 0.356, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1790801790801791, | |
| "grad_norm": 1.7877451181411743, | |
| "learning_rate": 1.492334825668159e-07, | |
| "loss": 0.9175, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1790801790801791, | |
| "eval_cos_sim": 0.0761212408542633, | |
| "eval_loss": 0.9249199991439527, | |
| "eval_runtime": 89.9831, | |
| "eval_samples_per_second": 11.113, | |
| "eval_steps_per_second": 0.356, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 1.7935380935668945, | |
| "learning_rate": 1.5262515262515264e-07, | |
| "loss": 0.9167, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "eval_cos_sim": 0.07652699947357178, | |
| "eval_loss": 0.9245145797942823, | |
| "eval_runtime": 90.5936, | |
| "eval_samples_per_second": 11.038, | |
| "eval_steps_per_second": 0.353, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1872201872201872, | |
| "grad_norm": 1.8291174173355103, | |
| "learning_rate": 1.5601682268348936e-07, | |
| "loss": 0.9127, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1872201872201872, | |
| "eval_cos_sim": 0.0769369825720787, | |
| "eval_loss": 0.9241049232696241, | |
| "eval_runtime": 89.8268, | |
| "eval_samples_per_second": 11.133, | |
| "eval_steps_per_second": 0.356, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.19129019129019129, | |
| "grad_norm": 1.5488710403442383, | |
| "learning_rate": 1.594084927418261e-07, | |
| "loss": 0.9227, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.19129019129019129, | |
| "eval_cos_sim": 0.07736244797706604, | |
| "eval_loss": 0.9236797976707166, | |
| "eval_runtime": 89.9173, | |
| "eval_samples_per_second": 11.121, | |
| "eval_steps_per_second": 0.356, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "grad_norm": 1.5216211080551147, | |
| "learning_rate": 1.628001628001628e-07, | |
| "loss": 0.9196, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "eval_cos_sim": 0.07778871059417725, | |
| "eval_loss": 0.9232538404678052, | |
| "eval_runtime": 90.6603, | |
| "eval_samples_per_second": 11.03, | |
| "eval_steps_per_second": 0.353, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.19943019943019943, | |
| "grad_norm": 1.7952001094818115, | |
| "learning_rate": 1.6619183285849953e-07, | |
| "loss": 0.9113, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.19943019943019943, | |
| "eval_cos_sim": 0.07823511958122253, | |
| "eval_loss": 0.922807737371797, | |
| "eval_runtime": 90.9654, | |
| "eval_samples_per_second": 10.993, | |
| "eval_steps_per_second": 0.352, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2035002035002035, | |
| "grad_norm": 1.5366356372833252, | |
| "learning_rate": 1.6958350291683626e-07, | |
| "loss": 0.9204, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2035002035002035, | |
| "eval_cos_sim": 0.07868806272745132, | |
| "eval_loss": 0.9223551345084852, | |
| "eval_runtime": 90.5097, | |
| "eval_samples_per_second": 11.049, | |
| "eval_steps_per_second": 0.354, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20757020757020758, | |
| "grad_norm": 1.5292208194732666, | |
| "learning_rate": 1.7297517297517298e-07, | |
| "loss": 0.9163, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.20757020757020758, | |
| "eval_cos_sim": 0.0791405662894249, | |
| "eval_loss": 0.9219029598449415, | |
| "eval_runtime": 90.3795, | |
| "eval_samples_per_second": 11.064, | |
| "eval_steps_per_second": 0.354, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.21164021164021163, | |
| "grad_norm": 1.8251779079437256, | |
| "learning_rate": 1.763668430335097e-07, | |
| "loss": 0.9177, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.21164021164021163, | |
| "eval_cos_sim": 0.07960349321365356, | |
| "eval_loss": 0.921440408251161, | |
| "eval_runtime": 90.2913, | |
| "eval_samples_per_second": 11.075, | |
| "eval_steps_per_second": 0.354, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2157102157102157, | |
| "grad_norm": 1.8137582540512085, | |
| "learning_rate": 1.7975851309184642e-07, | |
| "loss": 0.9108, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2157102157102157, | |
| "eval_cos_sim": 0.08007470518350601, | |
| "eval_loss": 0.9209695620750136, | |
| "eval_runtime": 90.4592, | |
| "eval_samples_per_second": 11.055, | |
| "eval_steps_per_second": 0.354, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 1.5241363048553467, | |
| "learning_rate": 1.8315018315018315e-07, | |
| "loss": 0.9184, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "eval_cos_sim": 0.08055756241083145, | |
| "eval_loss": 0.9204870734428113, | |
| "eval_runtime": 91.1901, | |
| "eval_samples_per_second": 10.966, | |
| "eval_steps_per_second": 0.351, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.22385022385022385, | |
| "grad_norm": 1.8105984926223755, | |
| "learning_rate": 1.865418532085199e-07, | |
| "loss": 0.9157, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22385022385022385, | |
| "eval_cos_sim": 0.08104187250137329, | |
| "eval_loss": 0.9200031218742079, | |
| "eval_runtime": 90.5659, | |
| "eval_samples_per_second": 11.042, | |
| "eval_steps_per_second": 0.353, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22792022792022792, | |
| "grad_norm": 1.430216908454895, | |
| "learning_rate": 1.8993352326685662e-07, | |
| "loss": 0.9178, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.22792022792022792, | |
| "eval_cos_sim": 0.08153677731752396, | |
| "eval_loss": 0.9195085845206923, | |
| "eval_runtime": 91.0613, | |
| "eval_samples_per_second": 10.982, | |
| "eval_steps_per_second": 0.351, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.231990231990232, | |
| "grad_norm": 1.5421180725097656, | |
| "learning_rate": 1.9332519332519332e-07, | |
| "loss": 0.9142, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.231990231990232, | |
| "eval_cos_sim": 0.08204251527786255, | |
| "eval_loss": 0.9190033140395826, | |
| "eval_runtime": 90.396, | |
| "eval_samples_per_second": 11.062, | |
| "eval_steps_per_second": 0.354, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.23606023606023607, | |
| "grad_norm": 1.8284296989440918, | |
| "learning_rate": 1.9671686338353007e-07, | |
| "loss": 0.9138, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.23606023606023607, | |
| "eval_cos_sim": 0.08255407214164734, | |
| "eval_loss": 0.9184921603416151, | |
| "eval_runtime": 90.7456, | |
| "eval_samples_per_second": 11.02, | |
| "eval_steps_per_second": 0.353, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.24013024013024012, | |
| "grad_norm": 1.7948687076568604, | |
| "learning_rate": 2.001085334418668e-07, | |
| "loss": 0.9125, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24013024013024012, | |
| "eval_cos_sim": 0.08306826651096344, | |
| "eval_loss": 0.9179783883308119, | |
| "eval_runtime": 90.7739, | |
| "eval_samples_per_second": 11.016, | |
| "eval_steps_per_second": 0.353, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "grad_norm": 1.7996617555618286, | |
| "learning_rate": 2.0350020350020349e-07, | |
| "loss": 0.9098, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "eval_cos_sim": 0.08360342681407928, | |
| "eval_loss": 0.917443627378816, | |
| "eval_runtime": 91.56, | |
| "eval_samples_per_second": 10.922, | |
| "eval_steps_per_second": 0.349, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24827024827024827, | |
| "grad_norm": 1.5217535495758057, | |
| "learning_rate": 2.0689187355854024e-07, | |
| "loss": 0.9111, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.24827024827024827, | |
| "eval_cos_sim": 0.08415944874286652, | |
| "eval_loss": 0.9168879828666395, | |
| "eval_runtime": 91.0251, | |
| "eval_samples_per_second": 10.986, | |
| "eval_steps_per_second": 0.352, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2523402523402523, | |
| "grad_norm": 1.549865961074829, | |
| "learning_rate": 2.1028354361687696e-07, | |
| "loss": 0.9089, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2523402523402523, | |
| "eval_cos_sim": 0.08473014831542969, | |
| "eval_loss": 0.9163176923011488, | |
| "eval_runtime": 91.5776, | |
| "eval_samples_per_second": 10.92, | |
| "eval_steps_per_second": 0.349, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 1.531300663948059, | |
| "learning_rate": 2.136752136752137e-07, | |
| "loss": 0.9091, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "eval_cos_sim": 0.08530249446630478, | |
| "eval_loss": 0.9157457843040174, | |
| "eval_runtime": 91.1083, | |
| "eval_samples_per_second": 10.976, | |
| "eval_steps_per_second": 0.351, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.26048026048026046, | |
| "grad_norm": 1.8052423000335693, | |
| "learning_rate": 2.170668837335504e-07, | |
| "loss": 0.9067, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.26048026048026046, | |
| "eval_cos_sim": 0.0858771950006485, | |
| "eval_loss": 0.9151715054725355, | |
| "eval_runtime": 92.0234, | |
| "eval_samples_per_second": 10.867, | |
| "eval_steps_per_second": 0.348, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.26455026455026454, | |
| "grad_norm": 1.513959288597107, | |
| "learning_rate": 2.2045855379188713e-07, | |
| "loss": 0.91, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.26455026455026454, | |
| "eval_cos_sim": 0.08646074682474136, | |
| "eval_loss": 0.914588417551393, | |
| "eval_runtime": 91.7102, | |
| "eval_samples_per_second": 10.904, | |
| "eval_steps_per_second": 0.349, | |
| "step": 650 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1474200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 600, | |
| "save_steps": 10, | |
| "total_flos": 0.0, | |
| "train_batch_size": 160, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |