| { |
| "best_metric": 0.2512021064758301, |
| "best_model_checkpoint": "multilingual-e5-small-aligned-quality/checkpoint-40644", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 40644, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03690581635665781, |
| "grad_norm": 5.902834892272949, |
| "learning_rate": 4.938490306072237e-05, |
| "loss": 0.4031, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07381163271331562, |
| "grad_norm": 2.7090084552764893, |
| "learning_rate": 4.876980612144474e-05, |
| "loss": 0.3469, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.11071744906997343, |
| "grad_norm": 2.5691611766815186, |
| "learning_rate": 4.815470918216711e-05, |
| "loss": 0.3437, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.14762326542663123, |
| "grad_norm": 2.3187239170074463, |
| "learning_rate": 4.7539612242889484e-05, |
| "loss": 0.3309, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.18452908178328906, |
| "grad_norm": 2.1093502044677734, |
| "learning_rate": 4.692451530361185e-05, |
| "loss": 0.3247, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.22143489813994685, |
| "grad_norm": 1.9845925569534302, |
| "learning_rate": 4.6309418364334224e-05, |
| "loss": 0.3169, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.2583407144966047, |
| "grad_norm": 2.292973756790161, |
| "learning_rate": 4.5694321425056594e-05, |
| "loss": 0.3197, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.29524653085326247, |
| "grad_norm": 1.499457597732544, |
| "learning_rate": 4.507922448577896e-05, |
| "loss": 0.3142, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.33215234720992026, |
| "grad_norm": 1.7365167140960693, |
| "learning_rate": 4.4464127546501335e-05, |
| "loss": 0.3138, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.3690581635665781, |
| "grad_norm": 2.04133939743042, |
| "learning_rate": 4.38490306072237e-05, |
| "loss": 0.3132, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.4059639799232359, |
| "grad_norm": 1.8704568147659302, |
| "learning_rate": 4.323393366794607e-05, |
| "loss": 0.3104, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.4428697962798937, |
| "grad_norm": 2.452059268951416, |
| "learning_rate": 4.261883672866844e-05, |
| "loss": 0.3046, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.4797756126365515, |
| "grad_norm": 2.5406882762908936, |
| "learning_rate": 4.200373978939081e-05, |
| "loss": 0.2989, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.5166814289932093, |
| "grad_norm": 1.574673056602478, |
| "learning_rate": 4.138864285011318e-05, |
| "loss": 0.3005, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.5535872453498671, |
| "grad_norm": 2.1640470027923584, |
| "learning_rate": 4.077354591083555e-05, |
| "loss": 0.3013, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.5904930617065249, |
| "grad_norm": 2.8707878589630127, |
| "learning_rate": 4.015844897155792e-05, |
| "loss": 0.2981, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.6273988780631827, |
| "grad_norm": 2.8684544563293457, |
| "learning_rate": 3.954335203228029e-05, |
| "loss": 0.2979, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.6643046944198405, |
| "grad_norm": 2.2373464107513428, |
| "learning_rate": 3.892825509300266e-05, |
| "loss": 0.288, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.7012105107764984, |
| "grad_norm": 2.0412282943725586, |
| "learning_rate": 3.8313158153725024e-05, |
| "loss": 0.292, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.7381163271331562, |
| "grad_norm": 2.2188100814819336, |
| "learning_rate": 3.76980612144474e-05, |
| "loss": 0.2909, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.775022143489814, |
| "grad_norm": 1.4839853048324585, |
| "learning_rate": 3.708296427516977e-05, |
| "loss": 0.2851, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.8119279598464718, |
| "grad_norm": 1.4828788042068481, |
| "learning_rate": 3.6467867335892135e-05, |
| "loss": 0.2894, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.8488337762031296, |
| "grad_norm": 1.8619405031204224, |
| "learning_rate": 3.585277039661451e-05, |
| "loss": 0.2885, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.8857395925597874, |
| "grad_norm": 1.9477214813232422, |
| "learning_rate": 3.5237673457336876e-05, |
| "loss": 0.2813, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.9226454089164452, |
| "grad_norm": 2.7381999492645264, |
| "learning_rate": 3.4622576518059246e-05, |
| "loss": 0.284, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.959551225273103, |
| "grad_norm": 1.8171463012695312, |
| "learning_rate": 3.400747957878162e-05, |
| "loss": 0.2811, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.9964570416297609, |
| "grad_norm": 2.59826922416687, |
| "learning_rate": 3.3392382639503986e-05, |
| "loss": 0.283, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.27975523471832275, |
| "eval_mse": 0.27975526814178425, |
| "eval_runtime": 52.0159, |
| "eval_samples_per_second": 1852.088, |
| "eval_steps_per_second": 231.525, |
| "step": 13548 |
| }, |
| { |
| "epoch": 1.0333628579864187, |
| "grad_norm": 3.655153751373291, |
| "learning_rate": 3.277728570022636e-05, |
| "loss": 0.236, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.0702686743430765, |
| "grad_norm": 2.276049852371216, |
| "learning_rate": 3.216218876094873e-05, |
| "loss": 0.2364, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.1071744906997343, |
| "grad_norm": 1.4967354536056519, |
| "learning_rate": 3.15470918216711e-05, |
| "loss": 0.2317, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.144080307056392, |
| "grad_norm": 1.4636516571044922, |
| "learning_rate": 3.093199488239347e-05, |
| "loss": 0.2342, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.1809861234130499, |
| "grad_norm": 2.246140956878662, |
| "learning_rate": 3.0316897943115834e-05, |
| "loss": 0.2288, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.2178919397697077, |
| "grad_norm": 1.4207803010940552, |
| "learning_rate": 2.9701801003838208e-05, |
| "loss": 0.2302, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.2547977561263655, |
| "grad_norm": 2.0020480155944824, |
| "learning_rate": 2.9086704064560578e-05, |
| "loss": 0.2331, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.2917035724830233, |
| "grad_norm": 1.7502425909042358, |
| "learning_rate": 2.8471607125282945e-05, |
| "loss": 0.2296, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.328609388839681, |
| "grad_norm": 1.819958209991455, |
| "learning_rate": 2.7856510186005312e-05, |
| "loss": 0.2346, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.3655152051963388, |
| "grad_norm": 2.5178093910217285, |
| "learning_rate": 2.7241413246727686e-05, |
| "loss": 0.2291, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.4024210215529966, |
| "grad_norm": 1.7607210874557495, |
| "learning_rate": 2.6626316307450056e-05, |
| "loss": 0.2266, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.4393268379096544, |
| "grad_norm": 2.5194263458251953, |
| "learning_rate": 2.6011219368172423e-05, |
| "loss": 0.2292, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.4762326542663124, |
| "grad_norm": 1.6286081075668335, |
| "learning_rate": 2.5396122428894797e-05, |
| "loss": 0.2298, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.51313847062297, |
| "grad_norm": 2.4123353958129883, |
| "learning_rate": 2.4781025489617167e-05, |
| "loss": 0.2283, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.550044286979628, |
| "grad_norm": 1.9285629987716675, |
| "learning_rate": 2.4165928550339534e-05, |
| "loss": 0.2263, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.5869501033362858, |
| "grad_norm": 2.4015371799468994, |
| "learning_rate": 2.3550831611061904e-05, |
| "loss": 0.2265, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.6238559196929436, |
| "grad_norm": 1.3897498846054077, |
| "learning_rate": 2.2935734671784274e-05, |
| "loss": 0.2279, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.6607617360496014, |
| "grad_norm": 1.909913182258606, |
| "learning_rate": 2.2320637732506645e-05, |
| "loss": 0.2235, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.6976675524062592, |
| "grad_norm": 2.007033586502075, |
| "learning_rate": 2.1705540793229015e-05, |
| "loss": 0.2226, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.734573368762917, |
| "grad_norm": 1.4410597085952759, |
| "learning_rate": 2.1090443853951382e-05, |
| "loss": 0.2237, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.7714791851195748, |
| "grad_norm": 1.568517804145813, |
| "learning_rate": 2.0475346914673755e-05, |
| "loss": 0.2236, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.8083850014762326, |
| "grad_norm": 1.9290361404418945, |
| "learning_rate": 1.9860249975396122e-05, |
| "loss": 0.2245, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.8452908178328906, |
| "grad_norm": 1.7693965435028076, |
| "learning_rate": 1.9245153036118493e-05, |
| "loss": 0.2219, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.8821966341895484, |
| "grad_norm": 1.637657642364502, |
| "learning_rate": 1.8630056096840863e-05, |
| "loss": 0.2222, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.9191024505462062, |
| "grad_norm": 1.8758279085159302, |
| "learning_rate": 1.8014959157563233e-05, |
| "loss": 0.2237, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.956008266902864, |
| "grad_norm": 1.9924167394638062, |
| "learning_rate": 1.7399862218285603e-05, |
| "loss": 0.2221, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.9929140832595218, |
| "grad_norm": 1.6823917627334595, |
| "learning_rate": 1.678476527900797e-05, |
| "loss": 0.2212, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.2521688938140869, |
| "eval_mse": 0.2521688884473344, |
| "eval_runtime": 52.0006, |
| "eval_samples_per_second": 1852.634, |
| "eval_steps_per_second": 231.594, |
| "step": 27096 |
| }, |
| { |
| "epoch": 2.0298198996161796, |
| "grad_norm": 1.8799372911453247, |
| "learning_rate": 1.6169668339730344e-05, |
| "loss": 0.1923, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.0667257159728374, |
| "grad_norm": 1.8323724269866943, |
| "learning_rate": 1.555457140045271e-05, |
| "loss": 0.1843, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.103631532329495, |
| "grad_norm": 2.1051783561706543, |
| "learning_rate": 1.4939474461175081e-05, |
| "loss": 0.1828, |
| "step": 28500 |
| }, |
| { |
| "epoch": 2.140537348686153, |
| "grad_norm": 1.5870431661605835, |
| "learning_rate": 1.4324377521897453e-05, |
| "loss": 0.1871, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.1774431650428108, |
| "grad_norm": 1.8576958179473877, |
| "learning_rate": 1.3709280582619822e-05, |
| "loss": 0.1845, |
| "step": 29500 |
| }, |
| { |
| "epoch": 2.2143489813994686, |
| "grad_norm": 1.5509694814682007, |
| "learning_rate": 1.3094183643342192e-05, |
| "loss": 0.1838, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.2512547977561264, |
| "grad_norm": 1.8506149053573608, |
| "learning_rate": 1.2479086704064562e-05, |
| "loss": 0.1849, |
| "step": 30500 |
| }, |
| { |
| "epoch": 2.288160614112784, |
| "grad_norm": 1.8075580596923828, |
| "learning_rate": 1.186398976478693e-05, |
| "loss": 0.1858, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.325066430469442, |
| "grad_norm": 2.2976126670837402, |
| "learning_rate": 1.1248892825509301e-05, |
| "loss": 0.187, |
| "step": 31500 |
| }, |
| { |
| "epoch": 2.3619722468260997, |
| "grad_norm": 2.127387046813965, |
| "learning_rate": 1.0633795886231671e-05, |
| "loss": 0.1851, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.3988780631827575, |
| "grad_norm": 1.7915741205215454, |
| "learning_rate": 1.001869894695404e-05, |
| "loss": 0.1813, |
| "step": 32500 |
| }, |
| { |
| "epoch": 2.4357838795394153, |
| "grad_norm": 2.1885006427764893, |
| "learning_rate": 9.40360200767641e-06, |
| "loss": 0.1807, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.472689695896073, |
| "grad_norm": 2.7843916416168213, |
| "learning_rate": 8.78850506839878e-06, |
| "loss": 0.1839, |
| "step": 33500 |
| }, |
| { |
| "epoch": 2.509595512252731, |
| "grad_norm": 1.519360899925232, |
| "learning_rate": 8.17340812912115e-06, |
| "loss": 0.1846, |
| "step": 34000 |
| }, |
| { |
| "epoch": 2.5465013286093887, |
| "grad_norm": 1.867719292640686, |
| "learning_rate": 7.55831118984352e-06, |
| "loss": 0.1843, |
| "step": 34500 |
| }, |
| { |
| "epoch": 2.5834071449660465, |
| "grad_norm": 1.8827580213546753, |
| "learning_rate": 6.94321425056589e-06, |
| "loss": 0.182, |
| "step": 35000 |
| }, |
| { |
| "epoch": 2.6203129613227043, |
| "grad_norm": 2.268225908279419, |
| "learning_rate": 6.328117311288259e-06, |
| "loss": 0.1817, |
| "step": 35500 |
| }, |
| { |
| "epoch": 2.657218777679362, |
| "grad_norm": 1.7755805253982544, |
| "learning_rate": 5.713020372010629e-06, |
| "loss": 0.1821, |
| "step": 36000 |
| }, |
| { |
| "epoch": 2.69412459403602, |
| "grad_norm": 1.9568016529083252, |
| "learning_rate": 5.097923432732999e-06, |
| "loss": 0.18, |
| "step": 36500 |
| }, |
| { |
| "epoch": 2.7310304103926777, |
| "grad_norm": 2.343839406967163, |
| "learning_rate": 4.482826493455368e-06, |
| "loss": 0.181, |
| "step": 37000 |
| }, |
| { |
| "epoch": 2.7679362267493355, |
| "grad_norm": 2.2050397396087646, |
| "learning_rate": 3.8677295541777385e-06, |
| "loss": 0.1817, |
| "step": 37500 |
| }, |
| { |
| "epoch": 2.8048420431059933, |
| "grad_norm": 1.7823182344436646, |
| "learning_rate": 3.2526326149001084e-06, |
| "loss": 0.1779, |
| "step": 38000 |
| }, |
| { |
| "epoch": 2.841747859462651, |
| "grad_norm": 1.8498305082321167, |
| "learning_rate": 2.6375356756224782e-06, |
| "loss": 0.1819, |
| "step": 38500 |
| }, |
| { |
| "epoch": 2.878653675819309, |
| "grad_norm": 2.2064967155456543, |
| "learning_rate": 2.022438736344848e-06, |
| "loss": 0.182, |
| "step": 39000 |
| }, |
| { |
| "epoch": 2.9155594921759667, |
| "grad_norm": 2.3844711780548096, |
| "learning_rate": 1.4073417970672177e-06, |
| "loss": 0.1786, |
| "step": 39500 |
| }, |
| { |
| "epoch": 2.952465308532625, |
| "grad_norm": 1.8031284809112549, |
| "learning_rate": 7.922448577895876e-07, |
| "loss": 0.1815, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.9893711248892827, |
| "grad_norm": 1.6803677082061768, |
| "learning_rate": 1.771479185119575e-07, |
| "loss": 0.1801, |
| "step": 40500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.2512021064758301, |
| "eval_mse": 0.2512021179375455, |
| "eval_runtime": 59.0402, |
| "eval_samples_per_second": 1631.736, |
| "eval_steps_per_second": 203.98, |
| "step": 40644 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 40644, |
| "total_flos": 4.283504864539085e+16, |
| "train_loss": 0.23920188012548746, |
| "train_runtime": 3164.8098, |
| "train_samples_per_second": 821.89, |
| "train_steps_per_second": 12.842 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 40644, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.283504864539085e+16, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|