| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 48015, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "embedding_loss": 0.2883, | |
| "epoch": 0.00010413412475268145, | |
| "grad_norm": 1.762959361076355, | |
| "learning_rate": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "embedding_loss": 0.2973, | |
| "epoch": 0.005206706237634072, | |
| "grad_norm": 2.416536569595337, | |
| "learning_rate": 2.0408163265306121e-07, | |
| "step": 50 | |
| }, | |
| { | |
| "embedding_loss": 0.2757, | |
| "epoch": 0.010413412475268145, | |
| "grad_norm": 1.1051920652389526, | |
| "learning_rate": 4.1232819658475635e-07, | |
| "step": 100 | |
| }, | |
| { | |
| "embedding_loss": 0.2678, | |
| "epoch": 0.015620118712902219, | |
| "grad_norm": 1.2399098873138428, | |
| "learning_rate": 6.205747605164515e-07, | |
| "step": 150 | |
| }, | |
| { | |
| "embedding_loss": 0.2554, | |
| "epoch": 0.02082682495053629, | |
| "grad_norm": 0.9736790657043457, | |
| "learning_rate": 8.288213244481466e-07, | |
| "step": 200 | |
| }, | |
| { | |
| "embedding_loss": 0.2485, | |
| "epoch": 0.026033531188170363, | |
| "grad_norm": 0.9768863916397095, | |
| "learning_rate": 1.037067888379842e-06, | |
| "step": 250 | |
| }, | |
| { | |
| "embedding_loss": 0.2472, | |
| "epoch": 0.031240237425804437, | |
| "grad_norm": 1.2124693393707275, | |
| "learning_rate": 1.2453144523115369e-06, | |
| "step": 300 | |
| }, | |
| { | |
| "embedding_loss": 0.2309, | |
| "epoch": 0.03644694366343851, | |
| "grad_norm": 1.0609492063522339, | |
| "learning_rate": 1.453561016243232e-06, | |
| "step": 350 | |
| }, | |
| { | |
| "embedding_loss": 0.225, | |
| "epoch": 0.04165364990107258, | |
| "grad_norm": 0.9569733142852783, | |
| "learning_rate": 1.6618075801749272e-06, | |
| "step": 400 | |
| }, | |
| { | |
| "embedding_loss": 0.2123, | |
| "epoch": 0.046860356138706656, | |
| "grad_norm": 1.1237151622772217, | |
| "learning_rate": 1.8700541441066226e-06, | |
| "step": 450 | |
| }, | |
| { | |
| "embedding_loss": 0.2045, | |
| "epoch": 0.05206706237634073, | |
| "grad_norm": 1.0526118278503418, | |
| "learning_rate": 2.0783007080383173e-06, | |
| "step": 500 | |
| }, | |
| { | |
| "embedding_loss": 0.1934, | |
| "epoch": 0.0572737686139748, | |
| "grad_norm": 1.1583402156829834, | |
| "learning_rate": 2.2865472719700125e-06, | |
| "step": 550 | |
| }, | |
| { | |
| "embedding_loss": 0.184, | |
| "epoch": 0.062480474851608875, | |
| "grad_norm": 1.313284158706665, | |
| "learning_rate": 2.494793835901708e-06, | |
| "step": 600 | |
| }, | |
| { | |
| "embedding_loss": 0.183, | |
| "epoch": 0.06768718108924295, | |
| "grad_norm": 1.2073508501052856, | |
| "learning_rate": 2.7030403998334032e-06, | |
| "step": 650 | |
| }, | |
| { | |
| "embedding_loss": 0.171, | |
| "epoch": 0.07289388732687702, | |
| "grad_norm": 0.9187403321266174, | |
| "learning_rate": 2.911286963765098e-06, | |
| "step": 700 | |
| }, | |
| { | |
| "embedding_loss": 0.1752, | |
| "epoch": 0.07810059356451109, | |
| "grad_norm": 1.049507737159729, | |
| "learning_rate": 3.119533527696793e-06, | |
| "step": 750 | |
| }, | |
| { | |
| "embedding_loss": 0.1627, | |
| "epoch": 0.08330729980214516, | |
| "grad_norm": 1.696567416191101, | |
| "learning_rate": 3.3277800916284887e-06, | |
| "step": 800 | |
| }, | |
| { | |
| "embedding_loss": 0.1623, | |
| "epoch": 0.08851400603977924, | |
| "grad_norm": 1.0722424983978271, | |
| "learning_rate": 3.5360266555601835e-06, | |
| "step": 850 | |
| }, | |
| { | |
| "embedding_loss": 0.1557, | |
| "epoch": 0.09372071227741331, | |
| "grad_norm": 0.8424978256225586, | |
| "learning_rate": 3.7442732194918786e-06, | |
| "step": 900 | |
| }, | |
| { | |
| "embedding_loss": 0.151, | |
| "epoch": 0.09892741851504738, | |
| "grad_norm": 1.0043538808822632, | |
| "learning_rate": 3.952519783423574e-06, | |
| "step": 950 | |
| }, | |
| { | |
| "embedding_loss": 0.1424, | |
| "epoch": 0.10413412475268145, | |
| "grad_norm": 0.910914957523346, | |
| "learning_rate": 4.160766347355269e-06, | |
| "step": 1000 | |
| }, | |
| { | |
| "embedding_loss": 0.1347, | |
| "epoch": 0.10934083099031552, | |
| "grad_norm": 1.1411229372024536, | |
| "learning_rate": 4.369012911286964e-06, | |
| "step": 1050 | |
| }, | |
| { | |
| "embedding_loss": 0.1342, | |
| "epoch": 0.1145475372279496, | |
| "grad_norm": 1.1177036762237549, | |
| "learning_rate": 4.57725947521866e-06, | |
| "step": 1100 | |
| }, | |
| { | |
| "embedding_loss": 0.1282, | |
| "epoch": 0.11975424346558367, | |
| "grad_norm": 1.1329469680786133, | |
| "learning_rate": 4.785506039150354e-06, | |
| "step": 1150 | |
| }, | |
| { | |
| "embedding_loss": 0.1309, | |
| "epoch": 0.12496094970321775, | |
| "grad_norm": 1.26534104347229, | |
| "learning_rate": 4.993752603082049e-06, | |
| "step": 1200 | |
| }, | |
| { | |
| "embedding_loss": 0.1198, | |
| "epoch": 0.1301676559408518, | |
| "grad_norm": 0.8742411136627197, | |
| "learning_rate": 5.201999167013745e-06, | |
| "step": 1250 | |
| }, | |
| { | |
| "embedding_loss": 0.1182, | |
| "epoch": 0.1353743621784859, | |
| "grad_norm": 1.0818109512329102, | |
| "learning_rate": 5.41024573094544e-06, | |
| "step": 1300 | |
| }, | |
| { | |
| "embedding_loss": 0.1099, | |
| "epoch": 0.14058106841611998, | |
| "grad_norm": 0.9232504963874817, | |
| "learning_rate": 5.618492294877135e-06, | |
| "step": 1350 | |
| }, | |
| { | |
| "embedding_loss": 0.1019, | |
| "epoch": 0.14578777465375403, | |
| "grad_norm": 1.013424277305603, | |
| "learning_rate": 5.826738858808831e-06, | |
| "step": 1400 | |
| }, | |
| { | |
| "embedding_loss": 0.0982, | |
| "epoch": 0.15099448089138812, | |
| "grad_norm": 1.4098010063171387, | |
| "learning_rate": 6.034985422740526e-06, | |
| "step": 1450 | |
| }, | |
| { | |
| "embedding_loss": 0.1038, | |
| "epoch": 0.15620118712902217, | |
| "grad_norm": 1.015489935874939, | |
| "learning_rate": 6.24323198667222e-06, | |
| "step": 1500 | |
| }, | |
| { | |
| "embedding_loss": 0.1064, | |
| "epoch": 0.16140789336665626, | |
| "grad_norm": 1.4240305423736572, | |
| "learning_rate": 6.451478550603915e-06, | |
| "step": 1550 | |
| }, | |
| { | |
| "embedding_loss": 0.1007, | |
| "epoch": 0.16661459960429031, | |
| "grad_norm": 1.1620804071426392, | |
| "learning_rate": 6.659725114535611e-06, | |
| "step": 1600 | |
| }, | |
| { | |
| "embedding_loss": 0.0899, | |
| "epoch": 0.1718213058419244, | |
| "grad_norm": 2.043187379837036, | |
| "learning_rate": 6.867971678467306e-06, | |
| "step": 1650 | |
| }, | |
| { | |
| "embedding_loss": 0.1019, | |
| "epoch": 0.17702801207955848, | |
| "grad_norm": 1.0461031198501587, | |
| "learning_rate": 7.076218242399001e-06, | |
| "step": 1700 | |
| }, | |
| { | |
| "embedding_loss": 0.0954, | |
| "epoch": 0.18223471831719254, | |
| "grad_norm": 1.0107996463775635, | |
| "learning_rate": 7.284464806330697e-06, | |
| "step": 1750 | |
| }, | |
| { | |
| "embedding_loss": 0.0799, | |
| "epoch": 0.18744142455482662, | |
| "grad_norm": 1.0582354068756104, | |
| "learning_rate": 7.492711370262391e-06, | |
| "step": 1800 | |
| }, | |
| { | |
| "embedding_loss": 0.0864, | |
| "epoch": 0.19264813079246068, | |
| "grad_norm": 1.3842886686325073, | |
| "learning_rate": 7.700957934194086e-06, | |
| "step": 1850 | |
| }, | |
| { | |
| "embedding_loss": 0.0863, | |
| "epoch": 0.19785483703009477, | |
| "grad_norm": 1.2879295349121094, | |
| "learning_rate": 7.909204498125781e-06, | |
| "step": 1900 | |
| }, | |
| { | |
| "embedding_loss": 0.0823, | |
| "epoch": 0.20306154326772882, | |
| "grad_norm": 1.2666908502578735, | |
| "learning_rate": 8.117451062057477e-06, | |
| "step": 1950 | |
| }, | |
| { | |
| "embedding_loss": 0.083, | |
| "epoch": 0.2082682495053629, | |
| "grad_norm": 1.3312140703201294, | |
| "learning_rate": 8.325697625989172e-06, | |
| "step": 2000 | |
| }, | |
| { | |
| "embedding_loss": 0.0887, | |
| "epoch": 0.213474955742997, | |
| "grad_norm": 0.9761432409286499, | |
| "learning_rate": 8.533944189920867e-06, | |
| "step": 2050 | |
| }, | |
| { | |
| "embedding_loss": 0.0796, | |
| "epoch": 0.21868166198063105, | |
| "grad_norm": 1.1199424266815186, | |
| "learning_rate": 8.742190753852562e-06, | |
| "step": 2100 | |
| }, | |
| { | |
| "embedding_loss": 0.0827, | |
| "epoch": 0.22388836821826513, | |
| "grad_norm": 1.4127182960510254, | |
| "learning_rate": 8.950437317784257e-06, | |
| "step": 2150 | |
| }, | |
| { | |
| "embedding_loss": 0.0745, | |
| "epoch": 0.2290950744558992, | |
| "grad_norm": 2.0330944061279297, | |
| "learning_rate": 9.158683881715952e-06, | |
| "step": 2200 | |
| }, | |
| { | |
| "embedding_loss": 0.0752, | |
| "epoch": 0.23430178069353327, | |
| "grad_norm": 1.4823620319366455, | |
| "learning_rate": 9.366930445647648e-06, | |
| "step": 2250 | |
| }, | |
| { | |
| "embedding_loss": 0.0676, | |
| "epoch": 0.23950848693116733, | |
| "grad_norm": 1.2921267747879028, | |
| "learning_rate": 9.575177009579343e-06, | |
| "step": 2300 | |
| }, | |
| { | |
| "embedding_loss": 0.0616, | |
| "epoch": 0.24471519316880141, | |
| "grad_norm": 1.8864023685455322, | |
| "learning_rate": 9.783423573511038e-06, | |
| "step": 2350 | |
| }, | |
| { | |
| "embedding_loss": 0.0661, | |
| "epoch": 0.2499218994064355, | |
| "grad_norm": 1.3003889322280884, | |
| "learning_rate": 9.991670137442733e-06, | |
| "step": 2400 | |
| }, | |
| { | |
| "embedding_loss": 0.0587, | |
| "epoch": 0.2551286056440696, | |
| "grad_norm": 1.0105185508728027, | |
| "learning_rate": 1.0199916701374428e-05, | |
| "step": 2450 | |
| }, | |
| { | |
| "embedding_loss": 0.0576, | |
| "epoch": 0.2603353118817036, | |
| "grad_norm": 1.3157322406768799, | |
| "learning_rate": 1.0408163265306123e-05, | |
| "step": 2500 | |
| }, | |
| { | |
| "embedding_loss": 0.0548, | |
| "epoch": 0.2655420181193377, | |
| "grad_norm": 1.5181084871292114, | |
| "learning_rate": 1.0616409829237819e-05, | |
| "step": 2550 | |
| }, | |
| { | |
| "embedding_loss": 0.0549, | |
| "epoch": 0.2707487243569718, | |
| "grad_norm": 0.9998575448989868, | |
| "learning_rate": 1.0824656393169512e-05, | |
| "step": 2600 | |
| }, | |
| { | |
| "embedding_loss": 0.0542, | |
| "epoch": 0.27595543059460587, | |
| "grad_norm": 1.8933945894241333, | |
| "learning_rate": 1.1032902957101209e-05, | |
| "step": 2650 | |
| }, | |
| { | |
| "embedding_loss": 0.0551, | |
| "epoch": 0.28116213683223995, | |
| "grad_norm": 0.8569897413253784, | |
| "learning_rate": 1.1241149521032904e-05, | |
| "step": 2700 | |
| }, | |
| { | |
| "embedding_loss": 0.0535, | |
| "epoch": 0.286368843069874, | |
| "grad_norm": 2.692810297012329, | |
| "learning_rate": 1.14493960849646e-05, | |
| "step": 2750 | |
| }, | |
| { | |
| "embedding_loss": 0.0512, | |
| "epoch": 0.29157554930750806, | |
| "grad_norm": 1.0588935613632202, | |
| "learning_rate": 1.1657642648896294e-05, | |
| "step": 2800 | |
| }, | |
| { | |
| "embedding_loss": 0.0471, | |
| "epoch": 0.29678225554514215, | |
| "grad_norm": 1.4624029397964478, | |
| "learning_rate": 1.186588921282799e-05, | |
| "step": 2850 | |
| }, | |
| { | |
| "embedding_loss": 0.0418, | |
| "epoch": 0.30198896178277623, | |
| "grad_norm": 1.4698013067245483, | |
| "learning_rate": 1.2074135776759683e-05, | |
| "step": 2900 | |
| }, | |
| { | |
| "embedding_loss": 0.0456, | |
| "epoch": 0.30719566802041026, | |
| "grad_norm": 2.7604193687438965, | |
| "learning_rate": 1.2282382340691378e-05, | |
| "step": 2950 | |
| }, | |
| { | |
| "embedding_loss": 0.0426, | |
| "epoch": 0.31240237425804435, | |
| "grad_norm": 1.8741382360458374, | |
| "learning_rate": 1.2490628904623075e-05, | |
| "step": 3000 | |
| }, | |
| { | |
| "embedding_loss": 0.0381, | |
| "epoch": 0.31760908049567843, | |
| "grad_norm": 1.6704306602478027, | |
| "learning_rate": 1.269887546855477e-05, | |
| "step": 3050 | |
| }, | |
| { | |
| "embedding_loss": 0.0476, | |
| "epoch": 0.3228157867333125, | |
| "grad_norm": 1.6957885026931763, | |
| "learning_rate": 1.2907122032486465e-05, | |
| "step": 3100 | |
| }, | |
| { | |
| "embedding_loss": 0.0384, | |
| "epoch": 0.3280224929709466, | |
| "grad_norm": 1.3916475772857666, | |
| "learning_rate": 1.311536859641816e-05, | |
| "step": 3150 | |
| }, | |
| { | |
| "embedding_loss": 0.0398, | |
| "epoch": 0.33322919920858063, | |
| "grad_norm": 1.3985787630081177, | |
| "learning_rate": 1.3323615160349854e-05, | |
| "step": 3200 | |
| }, | |
| { | |
| "embedding_loss": 0.0383, | |
| "epoch": 0.3384359054462147, | |
| "grad_norm": 1.0203100442886353, | |
| "learning_rate": 1.3531861724281549e-05, | |
| "step": 3250 | |
| }, | |
| { | |
| "embedding_loss": 0.0355, | |
| "epoch": 0.3436426116838488, | |
| "grad_norm": 1.1589559316635132, | |
| "learning_rate": 1.3740108288213246e-05, | |
| "step": 3300 | |
| }, | |
| { | |
| "embedding_loss": 0.0391, | |
| "epoch": 0.3488493179214829, | |
| "grad_norm": 0.5909947752952576, | |
| "learning_rate": 1.3948354852144941e-05, | |
| "step": 3350 | |
| }, | |
| { | |
| "embedding_loss": 0.0376, | |
| "epoch": 0.35405602415911697, | |
| "grad_norm": 1.096784234046936, | |
| "learning_rate": 1.4156601416076636e-05, | |
| "step": 3400 | |
| }, | |
| { | |
| "embedding_loss": 0.0372, | |
| "epoch": 0.359262730396751, | |
| "grad_norm": 2.7773685455322266, | |
| "learning_rate": 1.4364847980008331e-05, | |
| "step": 3450 | |
| }, | |
| { | |
| "embedding_loss": 0.0354, | |
| "epoch": 0.3644694366343851, | |
| "grad_norm": 1.3911575078964233, | |
| "learning_rate": 1.4573094543940025e-05, | |
| "step": 3500 | |
| }, | |
| { | |
| "embedding_loss": 0.0292, | |
| "epoch": 0.36967614287201916, | |
| "grad_norm": 1.4253603219985962, | |
| "learning_rate": 1.478134110787172e-05, | |
| "step": 3550 | |
| }, | |
| { | |
| "embedding_loss": 0.0341, | |
| "epoch": 0.37488284910965325, | |
| "grad_norm": 0.5666287541389465, | |
| "learning_rate": 1.4989587671803415e-05, | |
| "step": 3600 | |
| }, | |
| { | |
| "embedding_loss": 0.032, | |
| "epoch": 0.38008955534728733, | |
| "grad_norm": 0.7250155210494995, | |
| "learning_rate": 1.5197834235735112e-05, | |
| "step": 3650 | |
| }, | |
| { | |
| "embedding_loss": 0.0312, | |
| "epoch": 0.38529626158492136, | |
| "grad_norm": 1.2066556215286255, | |
| "learning_rate": 1.5406080799666807e-05, | |
| "step": 3700 | |
| }, | |
| { | |
| "embedding_loss": 0.0286, | |
| "epoch": 0.39050296782255545, | |
| "grad_norm": 1.999817967414856, | |
| "learning_rate": 1.5614327363598502e-05, | |
| "step": 3750 | |
| }, | |
| { | |
| "embedding_loss": 0.0303, | |
| "epoch": 0.39570967406018953, | |
| "grad_norm": 2.4242656230926514, | |
| "learning_rate": 1.5822573927530198e-05, | |
| "step": 3800 | |
| }, | |
| { | |
| "embedding_loss": 0.0321, | |
| "epoch": 0.4009163802978236, | |
| "grad_norm": 1.0087485313415527, | |
| "learning_rate": 1.6030820491461893e-05, | |
| "step": 3850 | |
| }, | |
| { | |
| "embedding_loss": 0.0291, | |
| "epoch": 0.40612308653545764, | |
| "grad_norm": 0.9711636900901794, | |
| "learning_rate": 1.6239067055393588e-05, | |
| "step": 3900 | |
| }, | |
| { | |
| "embedding_loss": 0.0267, | |
| "epoch": 0.41132979277309173, | |
| "grad_norm": 2.3323183059692383, | |
| "learning_rate": 1.6447313619325283e-05, | |
| "step": 3950 | |
| }, | |
| { | |
| "embedding_loss": 0.0315, | |
| "epoch": 0.4165364990107258, | |
| "grad_norm": 0.25967147946357727, | |
| "learning_rate": 1.6655560183256978e-05, | |
| "step": 4000 | |
| }, | |
| { | |
| "embedding_loss": 0.026, | |
| "epoch": 0.4217432052483599, | |
| "grad_norm": 0.5975779294967651, | |
| "learning_rate": 1.6863806747188673e-05, | |
| "step": 4050 | |
| }, | |
| { | |
| "embedding_loss": 0.0222, | |
| "epoch": 0.426949911485994, | |
| "grad_norm": 2.1180572509765625, | |
| "learning_rate": 1.707205331112037e-05, | |
| "step": 4100 | |
| }, | |
| { | |
| "embedding_loss": 0.0226, | |
| "epoch": 0.432156617723628, | |
| "grad_norm": 2.647836923599243, | |
| "learning_rate": 1.7280299875052064e-05, | |
| "step": 4150 | |
| }, | |
| { | |
| "embedding_loss": 0.025, | |
| "epoch": 0.4373633239612621, | |
| "grad_norm": 1.7642154693603516, | |
| "learning_rate": 1.748854643898376e-05, | |
| "step": 4200 | |
| }, | |
| { | |
| "embedding_loss": 0.0258, | |
| "epoch": 0.4425700301988962, | |
| "grad_norm": 0.6594904065132141, | |
| "learning_rate": 1.7696793002915454e-05, | |
| "step": 4250 | |
| }, | |
| { | |
| "embedding_loss": 0.0227, | |
| "epoch": 0.44777673643653026, | |
| "grad_norm": 1.1913025379180908, | |
| "learning_rate": 1.790503956684715e-05, | |
| "step": 4300 | |
| }, | |
| { | |
| "embedding_loss": 0.025, | |
| "epoch": 0.45298344267416435, | |
| "grad_norm": 2.221813201904297, | |
| "learning_rate": 1.8113286130778844e-05, | |
| "step": 4350 | |
| }, | |
| { | |
| "embedding_loss": 0.0192, | |
| "epoch": 0.4581901489117984, | |
| "grad_norm": 0.7045194506645203, | |
| "learning_rate": 1.832153269471054e-05, | |
| "step": 4400 | |
| }, | |
| { | |
| "embedding_loss": 0.0214, | |
| "epoch": 0.46339685514943246, | |
| "grad_norm": 1.9471172094345093, | |
| "learning_rate": 1.8529779258642235e-05, | |
| "step": 4450 | |
| }, | |
| { | |
| "embedding_loss": 0.0232, | |
| "epoch": 0.46860356138706655, | |
| "grad_norm": 0.8062028288841248, | |
| "learning_rate": 1.873802582257393e-05, | |
| "step": 4500 | |
| }, | |
| { | |
| "embedding_loss": 0.0201, | |
| "epoch": 0.47381026762470063, | |
| "grad_norm": 0.2602122724056244, | |
| "learning_rate": 1.8946272386505625e-05, | |
| "step": 4550 | |
| }, | |
| { | |
| "embedding_loss": 0.0234, | |
| "epoch": 0.47901697386233466, | |
| "grad_norm": 0.2223815619945526, | |
| "learning_rate": 1.915451895043732e-05, | |
| "step": 4600 | |
| }, | |
| { | |
| "embedding_loss": 0.0206, | |
| "epoch": 0.48422368009996875, | |
| "grad_norm": 1.995078682899475, | |
| "learning_rate": 1.9362765514369015e-05, | |
| "step": 4650 | |
| }, | |
| { | |
| "embedding_loss": 0.0228, | |
| "epoch": 0.48943038633760283, | |
| "grad_norm": 1.3003852367401123, | |
| "learning_rate": 1.957101207830071e-05, | |
| "step": 4700 | |
| }, | |
| { | |
| "embedding_loss": 0.0194, | |
| "epoch": 0.4946370925752369, | |
| "grad_norm": 0.10561434924602509, | |
| "learning_rate": 1.9779258642232402e-05, | |
| "step": 4750 | |
| }, | |
| { | |
| "embedding_loss": 0.0197, | |
| "epoch": 0.499843798812871, | |
| "grad_norm": 0.3040192425251007, | |
| "learning_rate": 1.99875052061641e-05, | |
| "step": 4800 | |
| }, | |
| { | |
| "embedding_loss": 0.0167, | |
| "epoch": 0.5050505050505051, | |
| "grad_norm": 1.1733126640319824, | |
| "learning_rate": 1.9978247286696136e-05, | |
| "step": 4850 | |
| }, | |
| { | |
| "embedding_loss": 0.0244, | |
| "epoch": 0.5102572112881392, | |
| "grad_norm": 1.6774014234542847, | |
| "learning_rate": 1.995510610233032e-05, | |
| "step": 4900 | |
| }, | |
| { | |
| "embedding_loss": 0.0152, | |
| "epoch": 0.5154639175257731, | |
| "grad_norm": 0.4171350300312042, | |
| "learning_rate": 1.9931964917964502e-05, | |
| "step": 4950 | |
| }, | |
| { | |
| "embedding_loss": 0.0191, | |
| "epoch": 0.5206706237634072, | |
| "grad_norm": 0.1614975482225418, | |
| "learning_rate": 1.990882373359869e-05, | |
| "step": 5000 | |
| }, | |
| { | |
| "embedding_loss": 0.0174, | |
| "epoch": 0.5258773300010413, | |
| "grad_norm": 1.8693324327468872, | |
| "learning_rate": 1.9885682549232872e-05, | |
| "step": 5050 | |
| }, | |
| { | |
| "embedding_loss": 0.0242, | |
| "epoch": 0.5310840362386754, | |
| "grad_norm": 0.42837658524513245, | |
| "learning_rate": 1.9862541364867056e-05, | |
| "step": 5100 | |
| }, | |
| { | |
| "embedding_loss": 0.0202, | |
| "epoch": 0.5362907424763095, | |
| "grad_norm": 1.6628985404968262, | |
| "learning_rate": 1.983940018050124e-05, | |
| "step": 5150 | |
| }, | |
| { | |
| "embedding_loss": 0.0186, | |
| "epoch": 0.5414974487139436, | |
| "grad_norm": 0.25852930545806885, | |
| "learning_rate": 1.9816258996135426e-05, | |
| "step": 5200 | |
| }, | |
| { | |
| "embedding_loss": 0.0172, | |
| "epoch": 0.5467041549515776, | |
| "grad_norm": 1.1750682592391968, | |
| "learning_rate": 1.979311781176961e-05, | |
| "step": 5250 | |
| }, | |
| { | |
| "embedding_loss": 0.0208, | |
| "epoch": 0.5519108611892117, | |
| "grad_norm": 3.196448564529419, | |
| "learning_rate": 1.9769976627403792e-05, | |
| "step": 5300 | |
| }, | |
| { | |
| "embedding_loss": 0.0185, | |
| "epoch": 0.5571175674268458, | |
| "grad_norm": 0.057756174355745316, | |
| "learning_rate": 1.974683544303798e-05, | |
| "step": 5350 | |
| }, | |
| { | |
| "embedding_loss": 0.0177, | |
| "epoch": 0.5623242736644799, | |
| "grad_norm": 0.08262369781732559, | |
| "learning_rate": 1.9723694258672162e-05, | |
| "step": 5400 | |
| }, | |
| { | |
| "embedding_loss": 0.0169, | |
| "epoch": 0.5675309799021139, | |
| "grad_norm": 1.7123504877090454, | |
| "learning_rate": 1.9700553074306345e-05, | |
| "step": 5450 | |
| }, | |
| { | |
| "embedding_loss": 0.0186, | |
| "epoch": 0.572737686139748, | |
| "grad_norm": 0.9821128249168396, | |
| "learning_rate": 1.967741188994053e-05, | |
| "step": 5500 | |
| }, | |
| { | |
| "embedding_loss": 0.014, | |
| "epoch": 0.577944392377382, | |
| "grad_norm": 0.0532955676317215, | |
| "learning_rate": 1.9654270705574715e-05, | |
| "step": 5550 | |
| }, | |
| { | |
| "embedding_loss": 0.0166, | |
| "epoch": 0.5831510986150161, | |
| "grad_norm": 0.13396751880645752, | |
| "learning_rate": 1.96311295212089e-05, | |
| "step": 5600 | |
| }, | |
| { | |
| "embedding_loss": 0.0129, | |
| "epoch": 0.5883578048526502, | |
| "grad_norm": 0.14156009256839752, | |
| "learning_rate": 1.9607988336843082e-05, | |
| "step": 5650 | |
| }, | |
| { | |
| "embedding_loss": 0.0114, | |
| "epoch": 0.5935645110902843, | |
| "grad_norm": 0.9108484387397766, | |
| "learning_rate": 1.9584847152477265e-05, | |
| "step": 5700 | |
| }, | |
| { | |
| "embedding_loss": 0.0152, | |
| "epoch": 0.5987712173279184, | |
| "grad_norm": 0.18236614763736725, | |
| "learning_rate": 1.9561705968111448e-05, | |
| "step": 5750 | |
| }, | |
| { | |
| "embedding_loss": 0.0134, | |
| "epoch": 0.6039779235655525, | |
| "grad_norm": 1.664371371269226, | |
| "learning_rate": 1.953856478374563e-05, | |
| "step": 5800 | |
| }, | |
| { | |
| "embedding_loss": 0.0156, | |
| "epoch": 0.6091846298031866, | |
| "grad_norm": 0.9308391213417053, | |
| "learning_rate": 1.9515423599379818e-05, | |
| "step": 5850 | |
| }, | |
| { | |
| "embedding_loss": 0.0144, | |
| "epoch": 0.6143913360408205, | |
| "grad_norm": 0.10233943164348602, | |
| "learning_rate": 1.9492282415014e-05, | |
| "step": 5900 | |
| }, | |
| { | |
| "embedding_loss": 0.0128, | |
| "epoch": 0.6195980422784546, | |
| "grad_norm": 0.1212044283747673, | |
| "learning_rate": 1.9469141230648185e-05, | |
| "step": 5950 | |
| }, | |
| { | |
| "embedding_loss": 0.0141, | |
| "epoch": 0.6248047485160887, | |
| "grad_norm": 0.03808877244591713, | |
| "learning_rate": 1.9446000046282368e-05, | |
| "step": 6000 | |
| }, | |
| { | |
| "embedding_loss": 0.0129, | |
| "epoch": 0.6300114547537228, | |
| "grad_norm": 1.263411521911621, | |
| "learning_rate": 1.9422858861916555e-05, | |
| "step": 6050 | |
| }, | |
| { | |
| "embedding_loss": 0.0128, | |
| "epoch": 0.6352181609913569, | |
| "grad_norm": 1.1934914588928223, | |
| "learning_rate": 1.9399717677550738e-05, | |
| "step": 6100 | |
| }, | |
| { | |
| "embedding_loss": 0.0131, | |
| "epoch": 0.640424867228991, | |
| "grad_norm": 0.5088186264038086, | |
| "learning_rate": 1.937657649318492e-05, | |
| "step": 6150 | |
| }, | |
| { | |
| "embedding_loss": 0.0133, | |
| "epoch": 0.645631573466625, | |
| "grad_norm": 0.2881380319595337, | |
| "learning_rate": 1.9353435308819108e-05, | |
| "step": 6200 | |
| }, | |
| { | |
| "embedding_loss": 0.012, | |
| "epoch": 0.6508382797042591, | |
| "grad_norm": 0.1978471279144287, | |
| "learning_rate": 1.933029412445329e-05, | |
| "step": 6250 | |
| }, | |
| { | |
| "embedding_loss": 0.0131, | |
| "epoch": 0.6560449859418932, | |
| "grad_norm": 1.7363338470458984, | |
| "learning_rate": 1.9307152940087474e-05, | |
| "step": 6300 | |
| }, | |
| { | |
| "embedding_loss": 0.0124, | |
| "epoch": 0.6612516921795273, | |
| "grad_norm": 0.1304263323545456, | |
| "learning_rate": 1.9284011755721658e-05, | |
| "step": 6350 | |
| }, | |
| { | |
| "embedding_loss": 0.0138, | |
| "epoch": 0.6664583984171613, | |
| "grad_norm": 0.15851274132728577, | |
| "learning_rate": 1.9260870571355844e-05, | |
| "step": 6400 | |
| }, | |
| { | |
| "embedding_loss": 0.0141, | |
| "epoch": 0.6716651046547953, | |
| "grad_norm": 1.2692539691925049, | |
| "learning_rate": 1.9237729386990027e-05, | |
| "step": 6450 | |
| }, | |
| { | |
| "embedding_loss": 0.0106, | |
| "epoch": 0.6768718108924294, | |
| "grad_norm": 0.30289334058761597, | |
| "learning_rate": 1.921458820262421e-05, | |
| "step": 6500 | |
| }, | |
| { | |
| "embedding_loss": 0.0125, | |
| "epoch": 0.6820785171300635, | |
| "grad_norm": 0.02164456807076931, | |
| "learning_rate": 1.9191447018258394e-05, | |
| "step": 6550 | |
| }, | |
| { | |
| "embedding_loss": 0.0132, | |
| "epoch": 0.6872852233676976, | |
| "grad_norm": 0.34430477023124695, | |
| "learning_rate": 1.916830583389258e-05, | |
| "step": 6600 | |
| }, | |
| { | |
| "embedding_loss": 0.0143, | |
| "epoch": 0.6924919296053317, | |
| "grad_norm": 0.2521458864212036, | |
| "learning_rate": 1.9145164649526764e-05, | |
| "step": 6650 | |
| }, | |
| { | |
| "embedding_loss": 0.0127, | |
| "epoch": 0.6976986358429658, | |
| "grad_norm": 0.6990224719047546, | |
| "learning_rate": 1.9122023465160947e-05, | |
| "step": 6700 | |
| }, | |
| { | |
| "embedding_loss": 0.014, | |
| "epoch": 0.7029053420805998, | |
| "grad_norm": 0.07170717418193817, | |
| "learning_rate": 1.9098882280795134e-05, | |
| "step": 6750 | |
| }, | |
| { | |
| "embedding_loss": 0.0118, | |
| "epoch": 0.7081120483182339, | |
| "grad_norm": 0.18331408500671387, | |
| "learning_rate": 1.9075741096429317e-05, | |
| "step": 6800 | |
| }, | |
| { | |
| "embedding_loss": 0.0116, | |
| "epoch": 0.7133187545558679, | |
| "grad_norm": 1.0223900079727173, | |
| "learning_rate": 1.90525999120635e-05, | |
| "step": 6850 | |
| }, | |
| { | |
| "embedding_loss": 0.0101, | |
| "epoch": 0.718525460793502, | |
| "grad_norm": 0.08013039082288742, | |
| "learning_rate": 1.9029458727697684e-05, | |
| "step": 6900 | |
| }, | |
| { | |
| "embedding_loss": 0.0119, | |
| "epoch": 0.7237321670311361, | |
| "grad_norm": 1.7682616710662842, | |
| "learning_rate": 1.900631754333187e-05, | |
| "step": 6950 | |
| }, | |
| { | |
| "embedding_loss": 0.0095, | |
| "epoch": 0.7289388732687702, | |
| "grad_norm": 0.10093237459659576, | |
| "learning_rate": 1.8983176358966054e-05, | |
| "step": 7000 | |
| }, | |
| { | |
| "embedding_loss": 0.009, | |
| "epoch": 0.7341455795064042, | |
| "grad_norm": 2.3766512870788574, | |
| "learning_rate": 1.8960035174600237e-05, | |
| "step": 7050 | |
| }, | |
| { | |
| "embedding_loss": 0.0086, | |
| "epoch": 0.7393522857440383, | |
| "grad_norm": 0.019304808229207993, | |
| "learning_rate": 1.893689399023442e-05, | |
| "step": 7100 | |
| }, | |
| { | |
| "embedding_loss": 0.0083, | |
| "epoch": 0.7445589919816724, | |
| "grad_norm": 0.08419201523065567, | |
| "learning_rate": 1.8913752805868607e-05, | |
| "step": 7150 | |
| }, | |
| { | |
| "embedding_loss": 0.0144, | |
| "epoch": 0.7497656982193065, | |
| "grad_norm": 1.224936842918396, | |
| "learning_rate": 1.889061162150279e-05, | |
| "step": 7200 | |
| }, | |
| { | |
| "embedding_loss": 0.0107, | |
| "epoch": 0.7549724044569406, | |
| "grad_norm": 0.3462272584438324, | |
| "learning_rate": 1.8867470437136973e-05, | |
| "step": 7250 | |
| }, | |
| { | |
| "embedding_loss": 0.0088, | |
| "epoch": 0.7601791106945747, | |
| "grad_norm": 0.03107067011296749, | |
| "learning_rate": 1.884432925277116e-05, | |
| "step": 7300 | |
| }, | |
| { | |
| "embedding_loss": 0.0096, | |
| "epoch": 0.7653858169322086, | |
| "grad_norm": 0.12990835309028625, | |
| "learning_rate": 1.8821188068405343e-05, | |
| "step": 7350 | |
| }, | |
| { | |
| "embedding_loss": 0.0073, | |
| "epoch": 0.7705925231698427, | |
| "grad_norm": 0.10413219034671783, | |
| "learning_rate": 1.8798046884039526e-05, | |
| "step": 7400 | |
| }, | |
| { | |
| "embedding_loss": 0.0063, | |
| "epoch": 0.7757992294074768, | |
| "grad_norm": 0.39868220686912537, | |
| "learning_rate": 1.877490569967371e-05, | |
| "step": 7450 | |
| }, | |
| { | |
| "embedding_loss": 0.0096, | |
| "epoch": 0.7810059356451109, | |
| "grad_norm": 0.4435900151729584, | |
| "learning_rate": 1.8751764515307896e-05, | |
| "step": 7500 | |
| }, | |
| { | |
| "embedding_loss": 0.0091, | |
| "epoch": 0.786212641882745, | |
| "grad_norm": 1.100035309791565, | |
| "learning_rate": 1.872862333094208e-05, | |
| "step": 7550 | |
| }, | |
| { | |
| "embedding_loss": 0.01, | |
| "epoch": 0.7914193481203791, | |
| "grad_norm": 0.12100836634635925, | |
| "learning_rate": 1.8705482146576263e-05, | |
| "step": 7600 | |
| }, | |
| { | |
| "embedding_loss": 0.0093, | |
| "epoch": 0.7966260543580131, | |
| "grad_norm": 0.38435640931129456, | |
| "learning_rate": 1.868234096221045e-05, | |
| "step": 7650 | |
| }, | |
| { | |
| "embedding_loss": 0.0121, | |
| "epoch": 0.8018327605956472, | |
| "grad_norm": 0.8386930823326111, | |
| "learning_rate": 1.8659199777844633e-05, | |
| "step": 7700 | |
| }, | |
| { | |
| "embedding_loss": 0.014, | |
| "epoch": 0.8070394668332813, | |
| "grad_norm": 0.4830886423587799, | |
| "learning_rate": 1.8636058593478816e-05, | |
| "step": 7750 | |
| }, | |
| { | |
| "embedding_loss": 0.0078, | |
| "epoch": 0.8122461730709153, | |
| "grad_norm": 0.026604199782013893, | |
| "learning_rate": 1.8612917409113e-05, | |
| "step": 7800 | |
| }, | |
| { | |
| "embedding_loss": 0.0082, | |
| "epoch": 0.8174528793085494, | |
| "grad_norm": 0.5969211459159851, | |
| "learning_rate": 1.8589776224747186e-05, | |
| "step": 7850 | |
| }, | |
| { | |
| "embedding_loss": 0.0086, | |
| "epoch": 0.8226595855461835, | |
| "grad_norm": 0.06108603999018669, | |
| "learning_rate": 1.856663504038137e-05, | |
| "step": 7900 | |
| }, | |
| { | |
| "embedding_loss": 0.0066, | |
| "epoch": 0.8278662917838175, | |
| "grad_norm": 0.3239186406135559, | |
| "learning_rate": 1.8543493856015553e-05, | |
| "step": 7950 | |
| }, | |
| { | |
| "embedding_loss": 0.0112, | |
| "epoch": 0.8330729980214516, | |
| "grad_norm": 0.2972595989704132, | |
| "learning_rate": 1.8520352671649736e-05, | |
| "step": 8000 | |
| }, | |
| { | |
| "embedding_loss": 0.0073, | |
| "epoch": 0.8382797042590857, | |
| "grad_norm": 0.533140242099762, | |
| "learning_rate": 1.8497211487283922e-05, | |
| "step": 8050 | |
| }, | |
| { | |
| "embedding_loss": 0.0078, | |
| "epoch": 0.8434864104967198, | |
| "grad_norm": 1.5684537887573242, | |
| "learning_rate": 1.8474070302918106e-05, | |
| "step": 8100 | |
| }, | |
| { | |
| "embedding_loss": 0.0087, | |
| "epoch": 0.8486931167343539, | |
| "grad_norm": 0.4422908425331116, | |
| "learning_rate": 1.845092911855229e-05, | |
| "step": 8150 | |
| }, | |
| { | |
| "embedding_loss": 0.012, | |
| "epoch": 0.853899822971988, | |
| "grad_norm": 0.5563941597938538, | |
| "learning_rate": 1.8427787934186476e-05, | |
| "step": 8200 | |
| }, | |
| { | |
| "embedding_loss": 0.0088, | |
| "epoch": 0.8591065292096219, | |
| "grad_norm": 0.3089462220668793, | |
| "learning_rate": 1.840464674982066e-05, | |
| "step": 8250 | |
| }, | |
| { | |
| "embedding_loss": 0.0122, | |
| "epoch": 0.864313235447256, | |
| "grad_norm": 4.295806884765625, | |
| "learning_rate": 1.8381505565454842e-05, | |
| "step": 8300 | |
| }, | |
| { | |
| "embedding_loss": 0.0091, | |
| "epoch": 0.8695199416848901, | |
| "grad_norm": 0.8506584763526917, | |
| "learning_rate": 1.8358364381089025e-05, | |
| "step": 8350 | |
| }, | |
| { | |
| "embedding_loss": 0.01, | |
| "epoch": 0.8747266479225242, | |
| "grad_norm": 0.9140012264251709, | |
| "learning_rate": 1.8335223196723212e-05, | |
| "step": 8400 | |
| }, | |
| { | |
| "embedding_loss": 0.0095, | |
| "epoch": 0.8799333541601583, | |
| "grad_norm": 0.9452886581420898, | |
| "learning_rate": 1.8312082012357395e-05, | |
| "step": 8450 | |
| }, | |
| { | |
| "embedding_loss": 0.0051, | |
| "epoch": 0.8851400603977924, | |
| "grad_norm": 0.34865090250968933, | |
| "learning_rate": 1.828894082799158e-05, | |
| "step": 8500 | |
| }, | |
| { | |
| "embedding_loss": 0.0109, | |
| "epoch": 0.8903467666354264, | |
| "grad_norm": 0.027646692469716072, | |
| "learning_rate": 1.8265799643625762e-05, | |
| "step": 8550 | |
| }, | |
| { | |
| "embedding_loss": 0.0074, | |
| "epoch": 0.8955534728730605, | |
| "grad_norm": 0.28435996174812317, | |
| "learning_rate": 1.8242658459259945e-05, | |
| "step": 8600 | |
| }, | |
| { | |
| "embedding_loss": 0.0048, | |
| "epoch": 0.9007601791106946, | |
| "grad_norm": 0.1109330877661705, | |
| "learning_rate": 1.821951727489413e-05, | |
| "step": 8650 | |
| }, | |
| { | |
| "embedding_loss": 0.0089, | |
| "epoch": 0.9059668853483287, | |
| "grad_norm": 0.46810364723205566, | |
| "learning_rate": 1.8196376090528315e-05, | |
| "step": 8700 | |
| }, | |
| { | |
| "embedding_loss": 0.0087, | |
| "epoch": 0.9111735915859627, | |
| "grad_norm": 0.2674962878227234, | |
| "learning_rate": 1.81732349061625e-05, | |
| "step": 8750 | |
| }, | |
| { | |
| "embedding_loss": 0.0096, | |
| "epoch": 0.9163802978235968, | |
| "grad_norm": 3.0557987689971924, | |
| "learning_rate": 1.815009372179668e-05, | |
| "step": 8800 | |
| }, | |
| { | |
| "embedding_loss": 0.0085, | |
| "epoch": 0.9215870040612308, | |
| "grad_norm": 0.6088097095489502, | |
| "learning_rate": 1.8126952537430865e-05, | |
| "step": 8850 | |
| }, | |
| { | |
| "embedding_loss": 0.0083, | |
| "epoch": 0.9267937102988649, | |
| "grad_norm": 0.12588393688201904, | |
| "learning_rate": 1.810381135306505e-05, | |
| "step": 8900 | |
| }, | |
| { | |
| "embedding_loss": 0.009, | |
| "epoch": 0.932000416536499, | |
| "grad_norm": 0.46597975492477417, | |
| "learning_rate": 1.8080670168699235e-05, | |
| "step": 8950 | |
| }, | |
| { | |
| "embedding_loss": 0.0078, | |
| "epoch": 0.9372071227741331, | |
| "grad_norm": 0.03179040551185608, | |
| "learning_rate": 1.8057528984333418e-05, | |
| "step": 9000 | |
| }, | |
| { | |
| "embedding_loss": 0.0096, | |
| "epoch": 0.9424138290117672, | |
| "grad_norm": 0.476052463054657, | |
| "learning_rate": 1.8034387799967605e-05, | |
| "step": 9050 | |
| }, | |
| { | |
| "embedding_loss": 0.0084, | |
| "epoch": 0.9476205352494013, | |
| "grad_norm": 0.6995823979377747, | |
| "learning_rate": 1.8011246615601788e-05, | |
| "step": 9100 | |
| }, | |
| { | |
| "embedding_loss": 0.0073, | |
| "epoch": 0.9528272414870353, | |
| "grad_norm": 0.042539093643426895, | |
| "learning_rate": 1.798810543123597e-05, | |
| "step": 9150 | |
| }, | |
| { | |
| "embedding_loss": 0.0055, | |
| "epoch": 0.9580339477246693, | |
| "grad_norm": 0.024517321959137917, | |
| "learning_rate": 1.7964964246870154e-05, | |
| "step": 9200 | |
| }, | |
| { | |
| "embedding_loss": 0.0059, | |
| "epoch": 0.9632406539623034, | |
| "grad_norm": 0.020516090095043182, | |
| "learning_rate": 1.794182306250434e-05, | |
| "step": 9250 | |
| }, | |
| { | |
| "embedding_loss": 0.008, | |
| "epoch": 0.9684473601999375, | |
| "grad_norm": 0.07251976430416107, | |
| "learning_rate": 1.7918681878138524e-05, | |
| "step": 9300 | |
| }, | |
| { | |
| "embedding_loss": 0.0112, | |
| "epoch": 0.9736540664375716, | |
| "grad_norm": 0.12063586711883545, | |
| "learning_rate": 1.7895540693772708e-05, | |
| "step": 9350 | |
| }, | |
| { | |
| "embedding_loss": 0.0045, | |
| "epoch": 0.9788607726752057, | |
| "grad_norm": 0.36446037888526917, | |
| "learning_rate": 1.787239950940689e-05, | |
| "step": 9400 | |
| }, | |
| { | |
| "embedding_loss": 0.0086, | |
| "epoch": 0.9840674789128397, | |
| "grad_norm": 0.08372894674539566, | |
| "learning_rate": 1.7849258325041078e-05, | |
| "step": 9450 | |
| }, | |
| { | |
| "embedding_loss": 0.0049, | |
| "epoch": 0.9892741851504738, | |
| "grad_norm": 0.04579677805304527, | |
| "learning_rate": 1.782611714067526e-05, | |
| "step": 9500 | |
| }, | |
| { | |
| "embedding_loss": 0.0056, | |
| "epoch": 0.9944808913881079, | |
| "grad_norm": 0.1182708889245987, | |
| "learning_rate": 1.7802975956309444e-05, | |
| "step": 9550 | |
| }, | |
| { | |
| "embedding_loss": 0.0067, | |
| "epoch": 0.999687597625742, | |
| "grad_norm": 0.01671171560883522, | |
| "learning_rate": 1.777983477194363e-05, | |
| "step": 9600 | |
| }, | |
| { | |
| "embedding_loss": 0.0058, | |
| "epoch": 1.004894303863376, | |
| "grad_norm": 0.9829951524734497, | |
| "learning_rate": 1.7756693587577814e-05, | |
| "step": 9650 | |
| }, | |
| { | |
| "embedding_loss": 0.0132, | |
| "epoch": 1.0101010101010102, | |
| "grad_norm": 0.0442439503967762, | |
| "learning_rate": 1.7733552403211997e-05, | |
| "step": 9700 | |
| }, | |
| { | |
| "embedding_loss": 0.0076, | |
| "epoch": 1.0153077163386441, | |
| "grad_norm": 0.031697243452072144, | |
| "learning_rate": 1.771041121884618e-05, | |
| "step": 9750 | |
| }, | |
| { | |
| "embedding_loss": 0.0069, | |
| "epoch": 1.0205144225762783, | |
| "grad_norm": 0.019380003213882446, | |
| "learning_rate": 1.7687270034480367e-05, | |
| "step": 9800 | |
| }, | |
| { | |
| "embedding_loss": 0.0072, | |
| "epoch": 1.0257211288139123, | |
| "grad_norm": 0.03249906376004219, | |
| "learning_rate": 1.766412885011455e-05, | |
| "step": 9850 | |
| }, | |
| { | |
| "embedding_loss": 0.0092, | |
| "epoch": 1.0309278350515463, | |
| "grad_norm": 0.3388296663761139, | |
| "learning_rate": 1.7640987665748734e-05, | |
| "step": 9900 | |
| }, | |
| { | |
| "embedding_loss": 0.0077, | |
| "epoch": 1.0361345412891805, | |
| "grad_norm": 0.1678103804588318, | |
| "learning_rate": 1.7617846481382917e-05, | |
| "step": 9950 | |
| }, | |
| { | |
| "embedding_loss": 0.0073, | |
| "epoch": 1.0413412475268145, | |
| "grad_norm": 0.015974771231412888, | |
| "learning_rate": 1.7594705297017104e-05, | |
| "step": 10000 | |
| }, | |
| { | |
| "embedding_loss": 0.0071, | |
| "epoch": 1.0465479537644486, | |
| "grad_norm": 0.041760511696338654, | |
| "learning_rate": 1.7571564112651287e-05, | |
| "step": 10050 | |
| }, | |
| { | |
| "embedding_loss": 0.0082, | |
| "epoch": 1.0517546600020826, | |
| "grad_norm": 1.2133060693740845, | |
| "learning_rate": 1.754842292828547e-05, | |
| "step": 10100 | |
| }, | |
| { | |
| "embedding_loss": 0.0046, | |
| "epoch": 1.0569613662397168, | |
| "grad_norm": 0.04206147417426109, | |
| "learning_rate": 1.7525281743919657e-05, | |
| "step": 10150 | |
| }, | |
| { | |
| "embedding_loss": 0.0053, | |
| "epoch": 1.0621680724773508, | |
| "grad_norm": 0.18272073566913605, | |
| "learning_rate": 1.750214055955384e-05, | |
| "step": 10200 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.067374778714985, | |
| "grad_norm": 0.03547310084104538, | |
| "learning_rate": 1.7478999375188023e-05, | |
| "step": 10250 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.072581484952619, | |
| "grad_norm": 0.0350540354847908, | |
| "learning_rate": 1.7455858190822207e-05, | |
| "step": 10300 | |
| }, | |
| { | |
| "embedding_loss": 0.0052, | |
| "epoch": 1.077788191190253, | |
| "grad_norm": 1.6414012908935547, | |
| "learning_rate": 1.7432717006456393e-05, | |
| "step": 10350 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.0829948974278871, | |
| "grad_norm": 3.3341734409332275, | |
| "learning_rate": 1.7409575822090577e-05, | |
| "step": 10400 | |
| }, | |
| { | |
| "embedding_loss": 0.0082, | |
| "epoch": 1.088201603665521, | |
| "grad_norm": 0.01878177374601364, | |
| "learning_rate": 1.738643463772476e-05, | |
| "step": 10450 | |
| }, | |
| { | |
| "embedding_loss": 0.006, | |
| "epoch": 1.0934083099031553, | |
| "grad_norm": 0.5989029407501221, | |
| "learning_rate": 1.7363293453358946e-05, | |
| "step": 10500 | |
| }, | |
| { | |
| "embedding_loss": 0.0063, | |
| "epoch": 1.0986150161407893, | |
| "grad_norm": 0.23778136074543, | |
| "learning_rate": 1.734015226899313e-05, | |
| "step": 10550 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 1.1038217223784235, | |
| "grad_norm": 0.012218566611409187, | |
| "learning_rate": 1.7317011084627313e-05, | |
| "step": 10600 | |
| }, | |
| { | |
| "embedding_loss": 0.0049, | |
| "epoch": 1.1090284286160574, | |
| "grad_norm": 0.05297623947262764, | |
| "learning_rate": 1.7293869900261496e-05, | |
| "step": 10650 | |
| }, | |
| { | |
| "embedding_loss": 0.0062, | |
| "epoch": 1.1142351348536916, | |
| "grad_norm": 0.5116108655929565, | |
| "learning_rate": 1.7270728715895683e-05, | |
| "step": 10700 | |
| }, | |
| { | |
| "embedding_loss": 0.0084, | |
| "epoch": 1.1194418410913256, | |
| "grad_norm": 0.4478176236152649, | |
| "learning_rate": 1.7247587531529866e-05, | |
| "step": 10750 | |
| }, | |
| { | |
| "embedding_loss": 0.0057, | |
| "epoch": 1.1246485473289598, | |
| "grad_norm": 0.4622497856616974, | |
| "learning_rate": 1.722444634716405e-05, | |
| "step": 10800 | |
| }, | |
| { | |
| "embedding_loss": 0.0074, | |
| "epoch": 1.1298552535665938, | |
| "grad_norm": 0.017630133777856827, | |
| "learning_rate": 1.7201305162798233e-05, | |
| "step": 10850 | |
| }, | |
| { | |
| "embedding_loss": 0.0056, | |
| "epoch": 1.1350619598042277, | |
| "grad_norm": 0.6077584624290466, | |
| "learning_rate": 1.717816397843242e-05, | |
| "step": 10900 | |
| }, | |
| { | |
| "embedding_loss": 0.0078, | |
| "epoch": 1.140268666041862, | |
| "grad_norm": 0.18036994338035583, | |
| "learning_rate": 1.7155022794066603e-05, | |
| "step": 10950 | |
| }, | |
| { | |
| "embedding_loss": 0.0059, | |
| "epoch": 1.145475372279496, | |
| "grad_norm": 0.009565812535583973, | |
| "learning_rate": 1.7131881609700786e-05, | |
| "step": 11000 | |
| }, | |
| { | |
| "embedding_loss": 0.0062, | |
| "epoch": 1.1506820785171301, | |
| "grad_norm": 2.7242627143859863, | |
| "learning_rate": 1.7108740425334973e-05, | |
| "step": 11050 | |
| }, | |
| { | |
| "embedding_loss": 0.0054, | |
| "epoch": 1.155888784754764, | |
| "grad_norm": 0.017238834872841835, | |
| "learning_rate": 1.7085599240969156e-05, | |
| "step": 11100 | |
| }, | |
| { | |
| "embedding_loss": 0.006, | |
| "epoch": 1.1610954909923983, | |
| "grad_norm": 1.461991548538208, | |
| "learning_rate": 1.706245805660334e-05, | |
| "step": 11150 | |
| }, | |
| { | |
| "embedding_loss": 0.0077, | |
| "epoch": 1.1663021972300323, | |
| "grad_norm": 0.11797866970300674, | |
| "learning_rate": 1.7039316872237522e-05, | |
| "step": 11200 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.1715089034676662, | |
| "grad_norm": 0.040576279163360596, | |
| "learning_rate": 1.701617568787171e-05, | |
| "step": 11250 | |
| }, | |
| { | |
| "embedding_loss": 0.0061, | |
| "epoch": 1.1767156097053004, | |
| "grad_norm": 0.013650750741362572, | |
| "learning_rate": 1.6993034503505892e-05, | |
| "step": 11300 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.1819223159429346, | |
| "grad_norm": 0.013326168991625309, | |
| "learning_rate": 1.6969893319140075e-05, | |
| "step": 11350 | |
| }, | |
| { | |
| "embedding_loss": 0.0061, | |
| "epoch": 1.1871290221805686, | |
| "grad_norm": 0.07993318140506744, | |
| "learning_rate": 1.694675213477426e-05, | |
| "step": 11400 | |
| }, | |
| { | |
| "embedding_loss": 0.0054, | |
| "epoch": 1.1923357284182026, | |
| "grad_norm": 0.38105425238609314, | |
| "learning_rate": 1.6923610950408442e-05, | |
| "step": 11450 | |
| }, | |
| { | |
| "embedding_loss": 0.0046, | |
| "epoch": 1.1975424346558368, | |
| "grad_norm": 0.13614040613174438, | |
| "learning_rate": 1.6900469766042625e-05, | |
| "step": 11500 | |
| }, | |
| { | |
| "embedding_loss": 0.0054, | |
| "epoch": 1.2027491408934707, | |
| "grad_norm": 1.016570806503296, | |
| "learning_rate": 1.6877328581676812e-05, | |
| "step": 11550 | |
| }, | |
| { | |
| "embedding_loss": 0.0039, | |
| "epoch": 1.207955847131105, | |
| "grad_norm": 0.4211491644382477, | |
| "learning_rate": 1.6854187397310995e-05, | |
| "step": 11600 | |
| }, | |
| { | |
| "embedding_loss": 0.0076, | |
| "epoch": 1.213162553368739, | |
| "grad_norm": 0.020438892766833305, | |
| "learning_rate": 1.683104621294518e-05, | |
| "step": 11650 | |
| }, | |
| { | |
| "embedding_loss": 0.0064, | |
| "epoch": 1.218369259606373, | |
| "grad_norm": 0.043074000626802444, | |
| "learning_rate": 1.6807905028579362e-05, | |
| "step": 11700 | |
| }, | |
| { | |
| "embedding_loss": 0.0068, | |
| "epoch": 1.223575965844007, | |
| "grad_norm": 0.1304844170808792, | |
| "learning_rate": 1.678476384421355e-05, | |
| "step": 11750 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 1.2287826720816413, | |
| "grad_norm": 0.10536648333072662, | |
| "learning_rate": 1.676162265984773e-05, | |
| "step": 11800 | |
| }, | |
| { | |
| "embedding_loss": 0.0053, | |
| "epoch": 1.2339893783192752, | |
| "grad_norm": 0.2895510792732239, | |
| "learning_rate": 1.6738481475481915e-05, | |
| "step": 11850 | |
| }, | |
| { | |
| "embedding_loss": 0.0074, | |
| "epoch": 1.2391960845569092, | |
| "grad_norm": 0.14891253411769867, | |
| "learning_rate": 1.67153402911161e-05, | |
| "step": 11900 | |
| }, | |
| { | |
| "embedding_loss": 0.0083, | |
| "epoch": 1.2444027907945434, | |
| "grad_norm": 0.03617144003510475, | |
| "learning_rate": 1.6692199106750285e-05, | |
| "step": 11950 | |
| }, | |
| { | |
| "embedding_loss": 0.0053, | |
| "epoch": 1.2496094970321774, | |
| "grad_norm": 0.031169302761554718, | |
| "learning_rate": 1.6669057922384468e-05, | |
| "step": 12000 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.2548162032698116, | |
| "grad_norm": 0.3214148283004761, | |
| "learning_rate": 1.664591673801865e-05, | |
| "step": 12050 | |
| }, | |
| { | |
| "embedding_loss": 0.0052, | |
| "epoch": 1.2600229095074456, | |
| "grad_norm": 1.033286213874817, | |
| "learning_rate": 1.6622775553652838e-05, | |
| "step": 12100 | |
| }, | |
| { | |
| "embedding_loss": 0.0079, | |
| "epoch": 1.2652296157450795, | |
| "grad_norm": 0.014789101667702198, | |
| "learning_rate": 1.659963436928702e-05, | |
| "step": 12150 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.2704363219827137, | |
| "grad_norm": 0.5162740349769592, | |
| "learning_rate": 1.6576493184921205e-05, | |
| "step": 12200 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.275643028220348, | |
| "grad_norm": 0.01165369339287281, | |
| "learning_rate": 1.6553352000555388e-05, | |
| "step": 12250 | |
| }, | |
| { | |
| "embedding_loss": 0.0063, | |
| "epoch": 1.280849734457982, | |
| "grad_norm": 0.031679488718509674, | |
| "learning_rate": 1.6530210816189574e-05, | |
| "step": 12300 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 1.2860564406956159, | |
| "grad_norm": 0.008600637316703796, | |
| "learning_rate": 1.6507069631823758e-05, | |
| "step": 12350 | |
| }, | |
| { | |
| "embedding_loss": 0.0065, | |
| "epoch": 1.29126314693325, | |
| "grad_norm": 0.014314206317067146, | |
| "learning_rate": 1.648392844745794e-05, | |
| "step": 12400 | |
| }, | |
| { | |
| "embedding_loss": 0.0068, | |
| "epoch": 1.296469853170884, | |
| "grad_norm": 0.3240402936935425, | |
| "learning_rate": 1.6460787263092128e-05, | |
| "step": 12450 | |
| }, | |
| { | |
| "embedding_loss": 0.0046, | |
| "epoch": 1.3016765594085182, | |
| "grad_norm": 0.030164631083607674, | |
| "learning_rate": 1.643764607872631e-05, | |
| "step": 12500 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 1.3068832656461522, | |
| "grad_norm": 0.0063670300878584385, | |
| "learning_rate": 1.6414504894360494e-05, | |
| "step": 12550 | |
| }, | |
| { | |
| "embedding_loss": 0.0036, | |
| "epoch": 1.3120899718837864, | |
| "grad_norm": 0.021254096180200577, | |
| "learning_rate": 1.6391363709994677e-05, | |
| "step": 12600 | |
| }, | |
| { | |
| "embedding_loss": 0.0058, | |
| "epoch": 1.3172966781214204, | |
| "grad_norm": 0.25233790278434753, | |
| "learning_rate": 1.6368222525628864e-05, | |
| "step": 12650 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 1.3225033843590546, | |
| "grad_norm": 1.2394205331802368, | |
| "learning_rate": 1.6345081341263047e-05, | |
| "step": 12700 | |
| }, | |
| { | |
| "embedding_loss": 0.006, | |
| "epoch": 1.3277100905966885, | |
| "grad_norm": 1.9639242887496948, | |
| "learning_rate": 1.632194015689723e-05, | |
| "step": 12750 | |
| }, | |
| { | |
| "embedding_loss": 0.0052, | |
| "epoch": 1.3329167968343225, | |
| "grad_norm": 0.0384540930390358, | |
| "learning_rate": 1.6298798972531414e-05, | |
| "step": 12800 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.3381235030719567, | |
| "grad_norm": 0.16365939378738403, | |
| "learning_rate": 1.62756577881656e-05, | |
| "step": 12850 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 1.3433302093095907, | |
| "grad_norm": 0.03347177803516388, | |
| "learning_rate": 1.6252516603799784e-05, | |
| "step": 12900 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.3485369155472249, | |
| "grad_norm": 0.014545961283147335, | |
| "learning_rate": 1.6229375419433967e-05, | |
| "step": 12950 | |
| }, | |
| { | |
| "embedding_loss": 0.0079, | |
| "epoch": 1.3537436217848589, | |
| "grad_norm": 0.7268438935279846, | |
| "learning_rate": 1.6206234235068154e-05, | |
| "step": 13000 | |
| }, | |
| { | |
| "embedding_loss": 0.0058, | |
| "epoch": 1.358950328022493, | |
| "grad_norm": 0.040684785693883896, | |
| "learning_rate": 1.6183093050702337e-05, | |
| "step": 13050 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.364157034260127, | |
| "grad_norm": 3.591543197631836, | |
| "learning_rate": 1.615995186633652e-05, | |
| "step": 13100 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.3693637404977612, | |
| "grad_norm": 0.037789322435855865, | |
| "learning_rate": 1.6136810681970703e-05, | |
| "step": 13150 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 1.3745704467353952, | |
| "grad_norm": 0.549343466758728, | |
| "learning_rate": 1.611366949760489e-05, | |
| "step": 13200 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 1.3797771529730292, | |
| "grad_norm": 0.015304960310459137, | |
| "learning_rate": 1.6090528313239073e-05, | |
| "step": 13250 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.3849838592106634, | |
| "grad_norm": 0.010115724988281727, | |
| "learning_rate": 1.6067387128873257e-05, | |
| "step": 13300 | |
| }, | |
| { | |
| "embedding_loss": 0.0042, | |
| "epoch": 1.3901905654482973, | |
| "grad_norm": 0.004204587545245886, | |
| "learning_rate": 1.6044245944507443e-05, | |
| "step": 13350 | |
| }, | |
| { | |
| "embedding_loss": 0.0046, | |
| "epoch": 1.3953972716859315, | |
| "grad_norm": 0.04513470083475113, | |
| "learning_rate": 1.6021104760141627e-05, | |
| "step": 13400 | |
| }, | |
| { | |
| "embedding_loss": 0.0047, | |
| "epoch": 1.4006039779235655, | |
| "grad_norm": 0.21044224500656128, | |
| "learning_rate": 1.599796357577581e-05, | |
| "step": 13450 | |
| }, | |
| { | |
| "embedding_loss": 0.005, | |
| "epoch": 1.4058106841611997, | |
| "grad_norm": 0.4665778577327728, | |
| "learning_rate": 1.5974822391409993e-05, | |
| "step": 13500 | |
| }, | |
| { | |
| "embedding_loss": 0.0047, | |
| "epoch": 1.4110173903988337, | |
| "grad_norm": 0.03980934992432594, | |
| "learning_rate": 1.595168120704418e-05, | |
| "step": 13550 | |
| }, | |
| { | |
| "embedding_loss": 0.0052, | |
| "epoch": 1.4162240966364679, | |
| "grad_norm": 0.08631590753793716, | |
| "learning_rate": 1.5928540022678363e-05, | |
| "step": 13600 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.4214308028741018, | |
| "grad_norm": 0.008251226507127285, | |
| "learning_rate": 1.5905398838312546e-05, | |
| "step": 13650 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.4266375091117358, | |
| "grad_norm": 0.10959483683109283, | |
| "learning_rate": 1.588225765394673e-05, | |
| "step": 13700 | |
| }, | |
| { | |
| "embedding_loss": 0.0065, | |
| "epoch": 1.43184421534937, | |
| "grad_norm": 0.03955509141087532, | |
| "learning_rate": 1.5859116469580916e-05, | |
| "step": 13750 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 1.437050921587004, | |
| "grad_norm": 0.15788401663303375, | |
| "learning_rate": 1.58359752852151e-05, | |
| "step": 13800 | |
| }, | |
| { | |
| "embedding_loss": 0.0036, | |
| "epoch": 1.4422576278246382, | |
| "grad_norm": 1.064596176147461, | |
| "learning_rate": 1.5812834100849283e-05, | |
| "step": 13850 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.4474643340622722, | |
| "grad_norm": 2.6524391174316406, | |
| "learning_rate": 1.578969291648347e-05, | |
| "step": 13900 | |
| }, | |
| { | |
| "embedding_loss": 0.006, | |
| "epoch": 1.4526710402999063, | |
| "grad_norm": 0.2990039885044098, | |
| "learning_rate": 1.5766551732117653e-05, | |
| "step": 13950 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 1.4578777465375403, | |
| "grad_norm": 0.12428417056798935, | |
| "learning_rate": 1.5743410547751836e-05, | |
| "step": 14000 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 1.4630844527751745, | |
| "grad_norm": 0.01266538817435503, | |
| "learning_rate": 1.572026936338602e-05, | |
| "step": 14050 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.4682911590128085, | |
| "grad_norm": 0.07004108279943466, | |
| "learning_rate": 1.5697128179020206e-05, | |
| "step": 14100 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 1.4734978652504425, | |
| "grad_norm": 0.0223364420235157, | |
| "learning_rate": 1.567398699465439e-05, | |
| "step": 14150 | |
| }, | |
| { | |
| "embedding_loss": 0.0046, | |
| "epoch": 1.4787045714880767, | |
| "grad_norm": 0.13812583684921265, | |
| "learning_rate": 1.5650845810288572e-05, | |
| "step": 14200 | |
| }, | |
| { | |
| "embedding_loss": 0.0054, | |
| "epoch": 1.4839112777257109, | |
| "grad_norm": 0.11324401199817657, | |
| "learning_rate": 1.5627704625922756e-05, | |
| "step": 14250 | |
| }, | |
| { | |
| "embedding_loss": 0.0054, | |
| "epoch": 1.4891179839633448, | |
| "grad_norm": 0.3810628354549408, | |
| "learning_rate": 1.560456344155694e-05, | |
| "step": 14300 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 1.4943246902009788, | |
| "grad_norm": 0.014939317479729652, | |
| "learning_rate": 1.5581422257191122e-05, | |
| "step": 14350 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 1.499531396438613, | |
| "grad_norm": 0.054862458258867264, | |
| "learning_rate": 1.555828107282531e-05, | |
| "step": 14400 | |
| }, | |
| { | |
| "embedding_loss": 0.0034, | |
| "epoch": 1.504738102676247, | |
| "grad_norm": 0.11869315803050995, | |
| "learning_rate": 1.5535139888459492e-05, | |
| "step": 14450 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 1.5099448089138812, | |
| "grad_norm": 0.040105488151311874, | |
| "learning_rate": 1.5511998704093675e-05, | |
| "step": 14500 | |
| }, | |
| { | |
| "embedding_loss": 0.0058, | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 0.6557055711746216, | |
| "learning_rate": 1.548885751972786e-05, | |
| "step": 14550 | |
| }, | |
| { | |
| "embedding_loss": 0.0043, | |
| "epoch": 1.5203582213891491, | |
| "grad_norm": 0.7020523548126221, | |
| "learning_rate": 1.5465716335362045e-05, | |
| "step": 14600 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 1.5255649276267833, | |
| "grad_norm": 0.2461288869380951, | |
| "learning_rate": 1.544257515099623e-05, | |
| "step": 14650 | |
| }, | |
| { | |
| "embedding_loss": 0.0061, | |
| "epoch": 1.5307716338644175, | |
| "grad_norm": 0.033834848552942276, | |
| "learning_rate": 1.5419433966630412e-05, | |
| "step": 14700 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 1.5359783401020515, | |
| "grad_norm": 0.0170294102281332, | |
| "learning_rate": 1.53962927822646e-05, | |
| "step": 14750 | |
| }, | |
| { | |
| "embedding_loss": 0.0059, | |
| "epoch": 1.5411850463396854, | |
| "grad_norm": 0.038527410477399826, | |
| "learning_rate": 1.537315159789878e-05, | |
| "step": 14800 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 1.5463917525773194, | |
| "grad_norm": 0.020393826067447662, | |
| "learning_rate": 1.5350010413532965e-05, | |
| "step": 14850 | |
| }, | |
| { | |
| "embedding_loss": 0.0041, | |
| "epoch": 1.5515984588149536, | |
| "grad_norm": 0.08289851248264313, | |
| "learning_rate": 1.5326869229167148e-05, | |
| "step": 14900 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.5568051650525878, | |
| "grad_norm": 0.010838224552571774, | |
| "learning_rate": 1.5303728044801335e-05, | |
| "step": 14950 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.5620118712902218, | |
| "grad_norm": 0.021554453298449516, | |
| "learning_rate": 1.5280586860435518e-05, | |
| "step": 15000 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 1.5672185775278558, | |
| "grad_norm": 0.21896220743656158, | |
| "learning_rate": 1.5257445676069701e-05, | |
| "step": 15050 | |
| }, | |
| { | |
| "embedding_loss": 0.0059, | |
| "epoch": 1.57242528376549, | |
| "grad_norm": 3.4779744148254395, | |
| "learning_rate": 1.5234304491703886e-05, | |
| "step": 15100 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 1.5776319900031242, | |
| "grad_norm": 0.010911405086517334, | |
| "learning_rate": 1.521116330733807e-05, | |
| "step": 15150 | |
| }, | |
| { | |
| "embedding_loss": 0.0055, | |
| "epoch": 1.5828386962407581, | |
| "grad_norm": 1.0184364318847656, | |
| "learning_rate": 1.5188022122972255e-05, | |
| "step": 15200 | |
| }, | |
| { | |
| "embedding_loss": 0.0052, | |
| "epoch": 1.588045402478392, | |
| "grad_norm": 0.01177753321826458, | |
| "learning_rate": 1.516488093860644e-05, | |
| "step": 15250 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 1.5932521087160263, | |
| "grad_norm": 0.024036038666963577, | |
| "learning_rate": 1.5141739754240623e-05, | |
| "step": 15300 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.5984588149536603, | |
| "grad_norm": 0.015944767743349075, | |
| "learning_rate": 1.5118598569874808e-05, | |
| "step": 15350 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 1.6036655211912945, | |
| "grad_norm": 0.0119936503469944, | |
| "learning_rate": 1.5095457385508991e-05, | |
| "step": 15400 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 1.6088722274289284, | |
| "grad_norm": 0.1267576962709427, | |
| "learning_rate": 1.5072316201143176e-05, | |
| "step": 15450 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.6140789336665624, | |
| "grad_norm": 0.004355051554739475, | |
| "learning_rate": 1.504917501677736e-05, | |
| "step": 15500 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 1.6192856399041966, | |
| "grad_norm": 0.0077704135328531265, | |
| "learning_rate": 1.5026033832411544e-05, | |
| "step": 15550 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 1.6244923461418308, | |
| "grad_norm": 0.06213510408997536, | |
| "learning_rate": 1.5002892648045727e-05, | |
| "step": 15600 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.6296990523794648, | |
| "grad_norm": 0.10908373445272446, | |
| "learning_rate": 1.4979751463679912e-05, | |
| "step": 15650 | |
| }, | |
| { | |
| "embedding_loss": 0.0074, | |
| "epoch": 1.6349057586170987, | |
| "grad_norm": 0.008925637230277061, | |
| "learning_rate": 1.4956610279314097e-05, | |
| "step": 15700 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 1.640112464854733, | |
| "grad_norm": 0.023670511320233345, | |
| "learning_rate": 1.493346909494828e-05, | |
| "step": 15750 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 1.645319171092367, | |
| "grad_norm": 0.006442319136112928, | |
| "learning_rate": 1.4910327910582466e-05, | |
| "step": 15800 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 1.6505258773300011, | |
| "grad_norm": 0.013194055296480656, | |
| "learning_rate": 1.4887186726216649e-05, | |
| "step": 15850 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.655732583567635, | |
| "grad_norm": 0.011845475062727928, | |
| "learning_rate": 1.4864045541850834e-05, | |
| "step": 15900 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.660939289805269, | |
| "grad_norm": 0.007666606921702623, | |
| "learning_rate": 1.4840904357485017e-05, | |
| "step": 15950 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 1.6661459960429033, | |
| "grad_norm": 0.016819607466459274, | |
| "learning_rate": 1.4817763173119202e-05, | |
| "step": 16000 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 1.6713527022805375, | |
| "grad_norm": 0.07455668598413467, | |
| "learning_rate": 1.4794621988753385e-05, | |
| "step": 16050 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 1.6765594085181714, | |
| "grad_norm": 0.04744337126612663, | |
| "learning_rate": 1.477148080438757e-05, | |
| "step": 16100 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 1.6817661147558054, | |
| "grad_norm": 0.008270618505775928, | |
| "learning_rate": 1.4748339620021754e-05, | |
| "step": 16150 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.6869728209934396, | |
| "grad_norm": 0.007761300075799227, | |
| "learning_rate": 1.4725198435655939e-05, | |
| "step": 16200 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 1.6921795272310738, | |
| "grad_norm": 0.06050006300210953, | |
| "learning_rate": 1.4702057251290123e-05, | |
| "step": 16250 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 1.6973862334687078, | |
| "grad_norm": 0.019928568974137306, | |
| "learning_rate": 1.4678916066924307e-05, | |
| "step": 16300 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 1.7025929397063417, | |
| "grad_norm": 0.027616068720817566, | |
| "learning_rate": 1.4655774882558492e-05, | |
| "step": 16350 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 1.7077996459439757, | |
| "grad_norm": 0.37783312797546387, | |
| "learning_rate": 1.4632633698192675e-05, | |
| "step": 16400 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 1.71300635218161, | |
| "grad_norm": 0.8646184802055359, | |
| "learning_rate": 1.460949251382686e-05, | |
| "step": 16450 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 1.718213058419244, | |
| "grad_norm": 0.009249920025467873, | |
| "learning_rate": 1.4586351329461043e-05, | |
| "step": 16500 | |
| }, | |
| { | |
| "embedding_loss": 0.0034, | |
| "epoch": 1.723419764656878, | |
| "grad_norm": 0.010544302873313427, | |
| "learning_rate": 1.4563210145095228e-05, | |
| "step": 16550 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 1.728626470894512, | |
| "grad_norm": 0.038693223148584366, | |
| "learning_rate": 1.4540068960729411e-05, | |
| "step": 16600 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 1.7338331771321462, | |
| "grad_norm": 0.018318980932235718, | |
| "learning_rate": 1.4516927776363596e-05, | |
| "step": 16650 | |
| }, | |
| { | |
| "embedding_loss": 0.0045, | |
| "epoch": 1.7390398833697804, | |
| "grad_norm": 0.18338936567306519, | |
| "learning_rate": 1.4493786591997781e-05, | |
| "step": 16700 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 1.7442465896074144, | |
| "grad_norm": 0.029749080538749695, | |
| "learning_rate": 1.4470645407631965e-05, | |
| "step": 16750 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.7494532958450484, | |
| "grad_norm": 0.09010512381792068, | |
| "learning_rate": 1.444750422326615e-05, | |
| "step": 16800 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.7546600020826824, | |
| "grad_norm": 0.017163589596748352, | |
| "learning_rate": 1.4424363038900333e-05, | |
| "step": 16850 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 1.7598667083203166, | |
| "grad_norm": 0.028121547773480415, | |
| "learning_rate": 1.4401221854534518e-05, | |
| "step": 16900 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 1.7650734145579507, | |
| "grad_norm": 0.21652670204639435, | |
| "learning_rate": 1.4378080670168701e-05, | |
| "step": 16950 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 1.7702801207955847, | |
| "grad_norm": 0.03087479993700981, | |
| "learning_rate": 1.4354939485802886e-05, | |
| "step": 17000 | |
| }, | |
| { | |
| "embedding_loss": 0.0035, | |
| "epoch": 1.7754868270332187, | |
| "grad_norm": 0.0054185641929507256, | |
| "learning_rate": 1.4331798301437068e-05, | |
| "step": 17050 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 1.780693533270853, | |
| "grad_norm": 0.0028866103384643793, | |
| "learning_rate": 1.4308657117071253e-05, | |
| "step": 17100 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.785900239508487, | |
| "grad_norm": 0.058498039841651917, | |
| "learning_rate": 1.4285515932705436e-05, | |
| "step": 17150 | |
| }, | |
| { | |
| "embedding_loss": 0.0065, | |
| "epoch": 1.791106945746121, | |
| "grad_norm": 0.28154024481773376, | |
| "learning_rate": 1.426237474833962e-05, | |
| "step": 17200 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.796313651983755, | |
| "grad_norm": 0.01061001792550087, | |
| "learning_rate": 1.4239233563973804e-05, | |
| "step": 17250 | |
| }, | |
| { | |
| "embedding_loss": 0.003, | |
| "epoch": 1.801520358221389, | |
| "grad_norm": 0.004344331566244364, | |
| "learning_rate": 1.4216092379607989e-05, | |
| "step": 17300 | |
| }, | |
| { | |
| "embedding_loss": 0.0062, | |
| "epoch": 1.8067270644590232, | |
| "grad_norm": 1.790716528892517, | |
| "learning_rate": 1.4192951195242172e-05, | |
| "step": 17350 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.8119337706966574, | |
| "grad_norm": 0.042736802250146866, | |
| "learning_rate": 1.4169810010876357e-05, | |
| "step": 17400 | |
| }, | |
| { | |
| "embedding_loss": 0.0047, | |
| "epoch": 1.8171404769342914, | |
| "grad_norm": 0.003962809685617685, | |
| "learning_rate": 1.414666882651054e-05, | |
| "step": 17450 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 1.8223471831719253, | |
| "grad_norm": 0.012670880183577538, | |
| "learning_rate": 1.4123527642144725e-05, | |
| "step": 17500 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.8275538894095595, | |
| "grad_norm": 0.005040524061769247, | |
| "learning_rate": 1.410038645777891e-05, | |
| "step": 17550 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 1.8327605956471937, | |
| "grad_norm": 0.36730483174324036, | |
| "learning_rate": 1.4077245273413094e-05, | |
| "step": 17600 | |
| }, | |
| { | |
| "embedding_loss": 0.0034, | |
| "epoch": 1.8379673018848277, | |
| "grad_norm": 0.02946503274142742, | |
| "learning_rate": 1.4054104089047279e-05, | |
| "step": 17650 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 1.8431740081224617, | |
| "grad_norm": 0.013080528937280178, | |
| "learning_rate": 1.4030962904681462e-05, | |
| "step": 17700 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 1.8483807143600957, | |
| "grad_norm": 0.02603771910071373, | |
| "learning_rate": 1.4007821720315647e-05, | |
| "step": 17750 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 1.8535874205977299, | |
| "grad_norm": 0.2753530740737915, | |
| "learning_rate": 1.398468053594983e-05, | |
| "step": 17800 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 1.858794126835364, | |
| "grad_norm": 0.09671527147293091, | |
| "learning_rate": 1.3961539351584015e-05, | |
| "step": 17850 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 1.864000833072998, | |
| "grad_norm": 0.00563651230186224, | |
| "learning_rate": 1.3938398167218198e-05, | |
| "step": 17900 | |
| }, | |
| { | |
| "embedding_loss": 0.0047, | |
| "epoch": 1.869207539310632, | |
| "grad_norm": 0.013191591948270798, | |
| "learning_rate": 1.3915256982852383e-05, | |
| "step": 17950 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 1.8744142455482662, | |
| "grad_norm": 0.0058168028481304646, | |
| "learning_rate": 1.3892115798486567e-05, | |
| "step": 18000 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 1.8796209517859004, | |
| "grad_norm": 0.41721343994140625, | |
| "learning_rate": 1.3868974614120751e-05, | |
| "step": 18050 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 1.8848276580235344, | |
| "grad_norm": 0.19165031611919403, | |
| "learning_rate": 1.3845833429754936e-05, | |
| "step": 18100 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 1.8900343642611683, | |
| "grad_norm": 0.006406415719538927, | |
| "learning_rate": 1.382269224538912e-05, | |
| "step": 18150 | |
| }, | |
| { | |
| "embedding_loss": 0.0034, | |
| "epoch": 1.8952410704988023, | |
| "grad_norm": 0.01080580148845911, | |
| "learning_rate": 1.3799551061023305e-05, | |
| "step": 18200 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 1.9004477767364365, | |
| "grad_norm": 0.006921404041349888, | |
| "learning_rate": 1.3776409876657488e-05, | |
| "step": 18250 | |
| }, | |
| { | |
| "embedding_loss": 0.0033, | |
| "epoch": 1.9056544829740707, | |
| "grad_norm": 0.0030105006881058216, | |
| "learning_rate": 1.3753268692291673e-05, | |
| "step": 18300 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 1.9108611892117047, | |
| "grad_norm": 0.025791391730308533, | |
| "learning_rate": 1.3730127507925856e-05, | |
| "step": 18350 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 1.9160678954493386, | |
| "grad_norm": 0.0030609758105129004, | |
| "learning_rate": 1.3706986323560041e-05, | |
| "step": 18400 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 1.9212746016869728, | |
| "grad_norm": 0.5819743871688843, | |
| "learning_rate": 1.3683845139194224e-05, | |
| "step": 18450 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 1.926481307924607, | |
| "grad_norm": 0.015468656085431576, | |
| "learning_rate": 1.366070395482841e-05, | |
| "step": 18500 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 1.931688014162241, | |
| "grad_norm": 0.004252830985933542, | |
| "learning_rate": 1.3637562770462594e-05, | |
| "step": 18550 | |
| }, | |
| { | |
| "embedding_loss": 0.0035, | |
| "epoch": 1.936894720399875, | |
| "grad_norm": 0.008880583569407463, | |
| "learning_rate": 1.3614421586096778e-05, | |
| "step": 18600 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 1.942101426637509, | |
| "grad_norm": 0.007954990491271019, | |
| "learning_rate": 1.3591280401730962e-05, | |
| "step": 18650 | |
| }, | |
| { | |
| "embedding_loss": 0.0029, | |
| "epoch": 1.9473081328751431, | |
| "grad_norm": 0.014845364727079868, | |
| "learning_rate": 1.3568139217365146e-05, | |
| "step": 18700 | |
| }, | |
| { | |
| "embedding_loss": 0.0033, | |
| "epoch": 1.9525148391127773, | |
| "grad_norm": 0.004382742568850517, | |
| "learning_rate": 1.354499803299933e-05, | |
| "step": 18750 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 1.9577215453504113, | |
| "grad_norm": 0.2789106070995331, | |
| "learning_rate": 1.3521856848633514e-05, | |
| "step": 18800 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 1.9629282515880453, | |
| "grad_norm": 0.00724539440125227, | |
| "learning_rate": 1.3498715664267699e-05, | |
| "step": 18850 | |
| }, | |
| { | |
| "embedding_loss": 0.0044, | |
| "epoch": 1.9681349578256795, | |
| "grad_norm": 0.05976763367652893, | |
| "learning_rate": 1.3475574479901882e-05, | |
| "step": 18900 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 1.9733416640633137, | |
| "grad_norm": 0.018617313355207443, | |
| "learning_rate": 1.3452433295536067e-05, | |
| "step": 18950 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 1.9785483703009477, | |
| "grad_norm": 0.07279914617538452, | |
| "learning_rate": 1.342929211117025e-05, | |
| "step": 19000 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 1.9837550765385816, | |
| "grad_norm": 0.005604149773716927, | |
| "learning_rate": 1.3406150926804435e-05, | |
| "step": 19050 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 1.9889617827762156, | |
| "grad_norm": 0.4676770865917206, | |
| "learning_rate": 1.338300974243862e-05, | |
| "step": 19100 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 1.9941684890138498, | |
| "grad_norm": 0.006381129380315542, | |
| "learning_rate": 1.3359868558072804e-05, | |
| "step": 19150 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 1.999375195251484, | |
| "grad_norm": 0.44813236594200134, | |
| "learning_rate": 1.3336727373706989e-05, | |
| "step": 19200 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.004581901489118, | |
| "grad_norm": 0.01616285741329193, | |
| "learning_rate": 1.3313586189341172e-05, | |
| "step": 19250 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.009788607726752, | |
| "grad_norm": 0.006148567423224449, | |
| "learning_rate": 1.3290445004975357e-05, | |
| "step": 19300 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 2.014995313964386, | |
| "grad_norm": 0.009615874849259853, | |
| "learning_rate": 1.326730382060954e-05, | |
| "step": 19350 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.0202020202020203, | |
| "grad_norm": 0.004251678008586168, | |
| "learning_rate": 1.3244162636243725e-05, | |
| "step": 19400 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.0254087264396543, | |
| "grad_norm": 0.008113077841699123, | |
| "learning_rate": 1.3221021451877908e-05, | |
| "step": 19450 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 2.0306154326772883, | |
| "grad_norm": 0.02726900391280651, | |
| "learning_rate": 1.3197880267512093e-05, | |
| "step": 19500 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 2.0358221389149223, | |
| "grad_norm": 0.00499620521441102, | |
| "learning_rate": 1.3174739083146278e-05, | |
| "step": 19550 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.0410288451525567, | |
| "grad_norm": 1.2157723903656006, | |
| "learning_rate": 1.3151597898780461e-05, | |
| "step": 19600 | |
| }, | |
| { | |
| "embedding_loss": 0.0031, | |
| "epoch": 2.0462355513901906, | |
| "grad_norm": 0.08977110683917999, | |
| "learning_rate": 1.3128456714414646e-05, | |
| "step": 19650 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.0514422576278246, | |
| "grad_norm": 0.05430648848414421, | |
| "learning_rate": 1.310531553004883e-05, | |
| "step": 19700 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.0566489638654586, | |
| "grad_norm": 0.01022451464086771, | |
| "learning_rate": 1.3082174345683015e-05, | |
| "step": 19750 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.0618556701030926, | |
| "grad_norm": 0.00965672917664051, | |
| "learning_rate": 1.3059033161317198e-05, | |
| "step": 19800 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.067062376340727, | |
| "grad_norm": 0.005539502017199993, | |
| "learning_rate": 1.3035891976951383e-05, | |
| "step": 19850 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.072269082578361, | |
| "grad_norm": 0.006059055682271719, | |
| "learning_rate": 1.3012750792585564e-05, | |
| "step": 19900 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 2.077475788815995, | |
| "grad_norm": 0.14464695751667023, | |
| "learning_rate": 1.298960960821975e-05, | |
| "step": 19950 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.082682495053629, | |
| "grad_norm": 0.011897514574229717, | |
| "learning_rate": 1.2966468423853933e-05, | |
| "step": 20000 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.0878892012912633, | |
| "grad_norm": 0.0040528737008571625, | |
| "learning_rate": 1.2943327239488118e-05, | |
| "step": 20050 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.0930959075288973, | |
| "grad_norm": 0.007819181308150291, | |
| "learning_rate": 1.2920186055122301e-05, | |
| "step": 20100 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.0983026137665313, | |
| "grad_norm": 0.013666506856679916, | |
| "learning_rate": 1.2897044870756486e-05, | |
| "step": 20150 | |
| }, | |
| { | |
| "embedding_loss": 0.0057, | |
| "epoch": 2.1035093200041652, | |
| "grad_norm": 0.010328873060643673, | |
| "learning_rate": 1.2873903686390669e-05, | |
| "step": 20200 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.108716026241799, | |
| "grad_norm": 0.019933296367526054, | |
| "learning_rate": 1.2850762502024854e-05, | |
| "step": 20250 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.1139227324794336, | |
| "grad_norm": 0.007374211680144072, | |
| "learning_rate": 1.2827621317659037e-05, | |
| "step": 20300 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 2.1191294387170676, | |
| "grad_norm": 0.012251504696905613, | |
| "learning_rate": 1.2804480133293222e-05, | |
| "step": 20350 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.1243361449547016, | |
| "grad_norm": 0.005697314627468586, | |
| "learning_rate": 1.2781338948927407e-05, | |
| "step": 20400 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.1295428511923356, | |
| "grad_norm": 0.002244447823613882, | |
| "learning_rate": 1.275819776456159e-05, | |
| "step": 20450 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.13474955742997, | |
| "grad_norm": 0.015698591247200966, | |
| "learning_rate": 1.2735056580195775e-05, | |
| "step": 20500 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.139956263667604, | |
| "grad_norm": 0.01356748677790165, | |
| "learning_rate": 1.2711915395829959e-05, | |
| "step": 20550 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.145162969905238, | |
| "grad_norm": 0.008849513716995716, | |
| "learning_rate": 1.2688774211464144e-05, | |
| "step": 20600 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 2.150369676142872, | |
| "grad_norm": 0.6928774118423462, | |
| "learning_rate": 1.2665633027098327e-05, | |
| "step": 20650 | |
| }, | |
| { | |
| "embedding_loss": 0.0039, | |
| "epoch": 2.155576382380506, | |
| "grad_norm": 0.03237714618444443, | |
| "learning_rate": 1.2642491842732512e-05, | |
| "step": 20700 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 2.1607830886181403, | |
| "grad_norm": 0.0030646566301584244, | |
| "learning_rate": 1.2619350658366695e-05, | |
| "step": 20750 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.1659897948557743, | |
| "grad_norm": 0.011956333182752132, | |
| "learning_rate": 1.259620947400088e-05, | |
| "step": 20800 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.1711965010934082, | |
| "grad_norm": 0.007671385072171688, | |
| "learning_rate": 1.2573068289635063e-05, | |
| "step": 20850 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.176403207331042, | |
| "grad_norm": 0.010090204887092113, | |
| "learning_rate": 1.2549927105269248e-05, | |
| "step": 20900 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 2.1816099135686766, | |
| "grad_norm": 0.010057215578854084, | |
| "learning_rate": 1.2526785920903433e-05, | |
| "step": 20950 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.1868166198063106, | |
| "grad_norm": 0.009716392494738102, | |
| "learning_rate": 1.2503644736537617e-05, | |
| "step": 21000 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.1920233260439446, | |
| "grad_norm": 0.003773706266656518, | |
| "learning_rate": 1.2480503552171802e-05, | |
| "step": 21050 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.1972300322815785, | |
| "grad_norm": 0.004189903382211924, | |
| "learning_rate": 1.2457362367805985e-05, | |
| "step": 21100 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.2024367385192125, | |
| "grad_norm": 0.005080494098365307, | |
| "learning_rate": 1.243422118344017e-05, | |
| "step": 21150 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.207643444756847, | |
| "grad_norm": 0.0064069656655192375, | |
| "learning_rate": 1.2411079999074353e-05, | |
| "step": 21200 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.212850150994481, | |
| "grad_norm": 0.0846613198518753, | |
| "learning_rate": 1.2387938814708538e-05, | |
| "step": 21250 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.218056857232115, | |
| "grad_norm": 0.004274032544344664, | |
| "learning_rate": 1.2364797630342721e-05, | |
| "step": 21300 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 2.223263563469749, | |
| "grad_norm": 0.006647061090916395, | |
| "learning_rate": 1.2341656445976906e-05, | |
| "step": 21350 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 2.2284702697073833, | |
| "grad_norm": 0.1389550268650055, | |
| "learning_rate": 1.2318515261611091e-05, | |
| "step": 21400 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.2336769759450172, | |
| "grad_norm": 0.00890056136995554, | |
| "learning_rate": 1.2295374077245274e-05, | |
| "step": 21450 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.238883682182651, | |
| "grad_norm": 0.022632068023085594, | |
| "learning_rate": 1.227223289287946e-05, | |
| "step": 21500 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.244090388420285, | |
| "grad_norm": 0.007279681041836739, | |
| "learning_rate": 1.2249091708513643e-05, | |
| "step": 21550 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.2492970946579196, | |
| "grad_norm": 0.010247277095913887, | |
| "learning_rate": 1.2225950524147828e-05, | |
| "step": 21600 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.2545038008955536, | |
| "grad_norm": 0.004706698004156351, | |
| "learning_rate": 1.2202809339782011e-05, | |
| "step": 21650 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.2597105071331876, | |
| "grad_norm": 0.031804159283638, | |
| "learning_rate": 1.2179668155416196e-05, | |
| "step": 21700 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.2649172133708215, | |
| "grad_norm": 0.021003112196922302, | |
| "learning_rate": 1.2156526971050379e-05, | |
| "step": 21750 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.2701239196084555, | |
| "grad_norm": 0.003928271122276783, | |
| "learning_rate": 1.2133385786684564e-05, | |
| "step": 21800 | |
| }, | |
| { | |
| "embedding_loss": 0.0038, | |
| "epoch": 2.27533062584609, | |
| "grad_norm": 0.9323834180831909, | |
| "learning_rate": 1.2110244602318747e-05, | |
| "step": 21850 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.280537332083724, | |
| "grad_norm": 0.010309775359928608, | |
| "learning_rate": 1.2087103417952932e-05, | |
| "step": 21900 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 2.285744038321358, | |
| "grad_norm": 0.008217355236411095, | |
| "learning_rate": 1.2063962233587117e-05, | |
| "step": 21950 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.290950744558992, | |
| "grad_norm": 0.013672198168933392, | |
| "learning_rate": 1.20408210492213e-05, | |
| "step": 22000 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.296157450796626, | |
| "grad_norm": 0.1977008581161499, | |
| "learning_rate": 1.2017679864855485e-05, | |
| "step": 22050 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.3013641570342602, | |
| "grad_norm": 0.006241293158382177, | |
| "learning_rate": 1.1994538680489669e-05, | |
| "step": 22100 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.306570863271894, | |
| "grad_norm": 0.0057389759458601475, | |
| "learning_rate": 1.1971397496123854e-05, | |
| "step": 22150 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.311777569509528, | |
| "grad_norm": 0.006034619640558958, | |
| "learning_rate": 1.1948256311758037e-05, | |
| "step": 22200 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.3169842757471626, | |
| "grad_norm": 0.3582187592983246, | |
| "learning_rate": 1.1925115127392222e-05, | |
| "step": 22250 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.3221909819847966, | |
| "grad_norm": 0.003342969575896859, | |
| "learning_rate": 1.1901973943026405e-05, | |
| "step": 22300 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.3273976882224305, | |
| "grad_norm": 0.017463702708482742, | |
| "learning_rate": 1.187883275866059e-05, | |
| "step": 22350 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 2.3326043944600645, | |
| "grad_norm": 0.005371089559048414, | |
| "learning_rate": 1.1855691574294775e-05, | |
| "step": 22400 | |
| }, | |
| { | |
| "embedding_loss": 0.004, | |
| "epoch": 2.3378111006976985, | |
| "grad_norm": 0.005444334354251623, | |
| "learning_rate": 1.1832550389928958e-05, | |
| "step": 22450 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.3430178069353325, | |
| "grad_norm": 0.00549267278984189, | |
| "learning_rate": 1.1809409205563143e-05, | |
| "step": 22500 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.348224513172967, | |
| "grad_norm": 0.009904368780553341, | |
| "learning_rate": 1.1786268021197327e-05, | |
| "step": 22550 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.353431219410601, | |
| "grad_norm": 0.004460447933524847, | |
| "learning_rate": 1.1763126836831512e-05, | |
| "step": 22600 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.358637925648235, | |
| "grad_norm": 0.012372348457574844, | |
| "learning_rate": 1.1739985652465695e-05, | |
| "step": 22650 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.3638446318858692, | |
| "grad_norm": 0.007495572324842215, | |
| "learning_rate": 1.171684446809988e-05, | |
| "step": 22700 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.369051338123503, | |
| "grad_norm": 0.014190604910254478, | |
| "learning_rate": 1.1693703283734061e-05, | |
| "step": 22750 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.374258044361137, | |
| "grad_norm": 0.4924188256263733, | |
| "learning_rate": 1.1670562099368246e-05, | |
| "step": 22800 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.379464750598771, | |
| "grad_norm": 0.013879277743399143, | |
| "learning_rate": 1.164742091500243e-05, | |
| "step": 22850 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 2.384671456836405, | |
| "grad_norm": 0.02071734145283699, | |
| "learning_rate": 1.1624279730636614e-05, | |
| "step": 22900 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 2.3898781630740396, | |
| "grad_norm": 0.004272214137017727, | |
| "learning_rate": 1.1601138546270798e-05, | |
| "step": 22950 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.3950848693116735, | |
| "grad_norm": 0.002411644207313657, | |
| "learning_rate": 1.1577997361904983e-05, | |
| "step": 23000 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.4002915755493075, | |
| "grad_norm": 0.019449541345238686, | |
| "learning_rate": 1.1554856177539166e-05, | |
| "step": 23050 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.4054982817869415, | |
| "grad_norm": 0.02527959644794464, | |
| "learning_rate": 1.1531714993173351e-05, | |
| "step": 23100 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.410704988024576, | |
| "grad_norm": 0.16354507207870483, | |
| "learning_rate": 1.1508573808807534e-05, | |
| "step": 23150 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.41591169426221, | |
| "grad_norm": 0.017682882025837898, | |
| "learning_rate": 1.148543262444172e-05, | |
| "step": 23200 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.421118400499844, | |
| "grad_norm": 0.0050527737475931644, | |
| "learning_rate": 1.1462291440075904e-05, | |
| "step": 23250 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.426325106737478, | |
| "grad_norm": 0.0023584417067468166, | |
| "learning_rate": 1.1439150255710087e-05, | |
| "step": 23300 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 2.431531812975112, | |
| "grad_norm": 0.3315781354904175, | |
| "learning_rate": 1.1416009071344272e-05, | |
| "step": 23350 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 2.436738519212746, | |
| "grad_norm": 0.003767622634768486, | |
| "learning_rate": 1.1392867886978456e-05, | |
| "step": 23400 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.44194522545038, | |
| "grad_norm": 0.06164936348795891, | |
| "learning_rate": 1.136972670261264e-05, | |
| "step": 23450 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.447151931688014, | |
| "grad_norm": 0.014987274073064327, | |
| "learning_rate": 1.1346585518246824e-05, | |
| "step": 23500 | |
| }, | |
| { | |
| "embedding_loss": 0.0025, | |
| "epoch": 2.452358637925648, | |
| "grad_norm": 0.014723850414156914, | |
| "learning_rate": 1.1323444333881009e-05, | |
| "step": 23550 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.4575653441632825, | |
| "grad_norm": 0.007753140293061733, | |
| "learning_rate": 1.1300303149515192e-05, | |
| "step": 23600 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.4627720504009165, | |
| "grad_norm": 0.11420779675245285, | |
| "learning_rate": 1.1277161965149377e-05, | |
| "step": 23650 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.4679787566385505, | |
| "grad_norm": 0.0015545577043667436, | |
| "learning_rate": 1.125402078078356e-05, | |
| "step": 23700 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.4731854628761845, | |
| "grad_norm": 0.002739744959399104, | |
| "learning_rate": 1.1230879596417745e-05, | |
| "step": 23750 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 2.4783921691138184, | |
| "grad_norm": 0.14792239665985107, | |
| "learning_rate": 1.120773841205193e-05, | |
| "step": 23800 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.483598875351453, | |
| "grad_norm": 0.016194604337215424, | |
| "learning_rate": 1.1184597227686113e-05, | |
| "step": 23850 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.488805581589087, | |
| "grad_norm": 0.0649636909365654, | |
| "learning_rate": 1.1161456043320298e-05, | |
| "step": 23900 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 2.494012287826721, | |
| "grad_norm": 0.005290072411298752, | |
| "learning_rate": 1.1138314858954482e-05, | |
| "step": 23950 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 2.4992189940643548, | |
| "grad_norm": 0.010143323801457882, | |
| "learning_rate": 1.1115173674588667e-05, | |
| "step": 24000 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.504425700301989, | |
| "grad_norm": 0.00270524388179183, | |
| "learning_rate": 1.109203249022285e-05, | |
| "step": 24050 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.509632406539623, | |
| "grad_norm": 0.0045821997337043285, | |
| "learning_rate": 1.1068891305857035e-05, | |
| "step": 24100 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.514839112777257, | |
| "grad_norm": 0.003760270308703184, | |
| "learning_rate": 1.1045750121491218e-05, | |
| "step": 24150 | |
| }, | |
| { | |
| "embedding_loss": 0.0037, | |
| "epoch": 2.520045819014891, | |
| "grad_norm": 0.08812420815229416, | |
| "learning_rate": 1.1022608937125403e-05, | |
| "step": 24200 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.525252525252525, | |
| "grad_norm": 0.27958598732948303, | |
| "learning_rate": 1.0999467752759588e-05, | |
| "step": 24250 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.530459231490159, | |
| "grad_norm": 0.004292377736419439, | |
| "learning_rate": 1.0976326568393771e-05, | |
| "step": 24300 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.5356659377277935, | |
| "grad_norm": 0.21659308671951294, | |
| "learning_rate": 1.0953185384027956e-05, | |
| "step": 24350 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.5408726439654274, | |
| "grad_norm": 0.005312615539878607, | |
| "learning_rate": 1.093004419966214e-05, | |
| "step": 24400 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 2.5460793502030614, | |
| "grad_norm": 0.003285923507064581, | |
| "learning_rate": 1.0906903015296324e-05, | |
| "step": 24450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.551286056440696, | |
| "grad_norm": 0.00929224118590355, | |
| "learning_rate": 1.0883761830930508e-05, | |
| "step": 24500 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.55649276267833, | |
| "grad_norm": 0.003572756890207529, | |
| "learning_rate": 1.0860620646564693e-05, | |
| "step": 24550 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.561699468915964, | |
| "grad_norm": 0.002889364492148161, | |
| "learning_rate": 1.0837479462198876e-05, | |
| "step": 24600 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.5669061751535978, | |
| "grad_norm": 0.02578425034880638, | |
| "learning_rate": 1.0814338277833061e-05, | |
| "step": 24650 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.5721128813912317, | |
| "grad_norm": 0.010893690399825573, | |
| "learning_rate": 1.0791197093467244e-05, | |
| "step": 24700 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 2.5773195876288657, | |
| "grad_norm": 0.005054382607340813, | |
| "learning_rate": 1.0768055909101429e-05, | |
| "step": 24750 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.5825262938665, | |
| "grad_norm": 0.09210974723100662, | |
| "learning_rate": 1.0744914724735614e-05, | |
| "step": 24800 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.587733000104134, | |
| "grad_norm": 0.007207928225398064, | |
| "learning_rate": 1.0721773540369797e-05, | |
| "step": 24850 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.592939706341768, | |
| "grad_norm": 0.005150509066879749, | |
| "learning_rate": 1.0698632356003982e-05, | |
| "step": 24900 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.5981464125794025, | |
| "grad_norm": 0.007632564753293991, | |
| "learning_rate": 1.0675491171638166e-05, | |
| "step": 24950 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.6033531188170365, | |
| "grad_norm": 0.004092794377356768, | |
| "learning_rate": 1.065234998727235e-05, | |
| "step": 25000 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 2.6085598250546704, | |
| "grad_norm": 0.013759996742010117, | |
| "learning_rate": 1.0629208802906534e-05, | |
| "step": 25050 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 2.6137665312923044, | |
| "grad_norm": 0.004917910788208246, | |
| "learning_rate": 1.0606067618540719e-05, | |
| "step": 25100 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.6189732375299384, | |
| "grad_norm": 0.01083595585078001, | |
| "learning_rate": 1.0582926434174902e-05, | |
| "step": 25150 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.624179943767573, | |
| "grad_norm": 0.3743145167827606, | |
| "learning_rate": 1.0559785249809087e-05, | |
| "step": 25200 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.6293866500052068, | |
| "grad_norm": 0.003947914578020573, | |
| "learning_rate": 1.0536644065443272e-05, | |
| "step": 25250 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 2.6345933562428407, | |
| "grad_norm": 0.0035067517310380936, | |
| "learning_rate": 1.0513502881077455e-05, | |
| "step": 25300 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.6398000624804747, | |
| "grad_norm": 0.021643230691552162, | |
| "learning_rate": 1.049036169671164e-05, | |
| "step": 25350 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 2.645006768718109, | |
| "grad_norm": 0.016041336581110954, | |
| "learning_rate": 1.0467220512345823e-05, | |
| "step": 25400 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.650213474955743, | |
| "grad_norm": 0.0026006808038800955, | |
| "learning_rate": 1.0444079327980008e-05, | |
| "step": 25450 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.655420181193377, | |
| "grad_norm": 0.006043120287358761, | |
| "learning_rate": 1.0420938143614192e-05, | |
| "step": 25500 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.660626887431011, | |
| "grad_norm": 0.007799636106938124, | |
| "learning_rate": 1.0397796959248377e-05, | |
| "step": 25550 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.665833593668645, | |
| "grad_norm": 0.0032333596609532833, | |
| "learning_rate": 1.0374655774882558e-05, | |
| "step": 25600 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 2.6710402999062794, | |
| "grad_norm": 0.006502605974674225, | |
| "learning_rate": 1.0351514590516743e-05, | |
| "step": 25650 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.6762470061439134, | |
| "grad_norm": 0.002495839726179838, | |
| "learning_rate": 1.0328373406150926e-05, | |
| "step": 25700 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.6814537123815474, | |
| "grad_norm": 0.011339404620230198, | |
| "learning_rate": 1.0305232221785111e-05, | |
| "step": 25750 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 2.6866604186191814, | |
| "grad_norm": 0.0049376110546290874, | |
| "learning_rate": 1.0282091037419295e-05, | |
| "step": 25800 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.691867124856816, | |
| "grad_norm": 0.003491663606837392, | |
| "learning_rate": 1.025894985305348e-05, | |
| "step": 25850 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.6970738310944498, | |
| "grad_norm": 0.06203962489962578, | |
| "learning_rate": 1.0235808668687663e-05, | |
| "step": 25900 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.7022805373320837, | |
| "grad_norm": 0.0038036692421883345, | |
| "learning_rate": 1.0212667484321848e-05, | |
| "step": 25950 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 2.7074872435697177, | |
| "grad_norm": 0.013710272498428822, | |
| "learning_rate": 1.0189526299956031e-05, | |
| "step": 26000 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.7126939498073517, | |
| "grad_norm": 0.003352423897013068, | |
| "learning_rate": 1.0166385115590216e-05, | |
| "step": 26050 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 2.717900656044986, | |
| "grad_norm": 0.004212658852338791, | |
| "learning_rate": 1.0143243931224401e-05, | |
| "step": 26100 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.72310736228262, | |
| "grad_norm": 0.05683301389217377, | |
| "learning_rate": 1.0120102746858584e-05, | |
| "step": 26150 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 2.728314068520254, | |
| "grad_norm": 0.008711031638085842, | |
| "learning_rate": 1.009696156249277e-05, | |
| "step": 26200 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.733520774757888, | |
| "grad_norm": 0.007152698002755642, | |
| "learning_rate": 1.0073820378126952e-05, | |
| "step": 26250 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.7387274809955224, | |
| "grad_norm": 0.003356023458763957, | |
| "learning_rate": 1.0050679193761137e-05, | |
| "step": 26300 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 2.7439341872331564, | |
| "grad_norm": 0.016632454469799995, | |
| "learning_rate": 1.002753800939532e-05, | |
| "step": 26350 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.7491408934707904, | |
| "grad_norm": 0.5349009037017822, | |
| "learning_rate": 1.0004396825029506e-05, | |
| "step": 26400 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.7543475997084244, | |
| "grad_norm": 0.004687939304858446, | |
| "learning_rate": 9.981255640663689e-06, | |
| "step": 26450 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 2.7595543059460583, | |
| "grad_norm": 0.0024527718778699636, | |
| "learning_rate": 9.958114456297874e-06, | |
| "step": 26500 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.7647610121836927, | |
| "grad_norm": 0.08971556276082993, | |
| "learning_rate": 9.934973271932057e-06, | |
| "step": 26550 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.7699677184213267, | |
| "grad_norm": 0.00650964817032218, | |
| "learning_rate": 9.911832087566242e-06, | |
| "step": 26600 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 2.7751744246589607, | |
| "grad_norm": 0.0036748195998370647, | |
| "learning_rate": 9.888690903200427e-06, | |
| "step": 26650 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 2.7803811308965947, | |
| "grad_norm": 0.003135056234896183, | |
| "learning_rate": 9.86554971883461e-06, | |
| "step": 26700 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.785587837134229, | |
| "grad_norm": 0.0061689745634794235, | |
| "learning_rate": 9.842408534468795e-06, | |
| "step": 26750 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.790794543371863, | |
| "grad_norm": 0.0035342394839972258, | |
| "learning_rate": 9.819267350102979e-06, | |
| "step": 26800 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 2.796001249609497, | |
| "grad_norm": 0.08970965445041656, | |
| "learning_rate": 9.796126165737164e-06, | |
| "step": 26850 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 2.801207955847131, | |
| "grad_norm": 0.002801700960844755, | |
| "learning_rate": 9.772984981371347e-06, | |
| "step": 26900 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.806414662084765, | |
| "grad_norm": 0.06024768948554993, | |
| "learning_rate": 9.749843797005532e-06, | |
| "step": 26950 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 2.8116213683223994, | |
| "grad_norm": 0.003583586309105158, | |
| "learning_rate": 9.726702612639715e-06, | |
| "step": 27000 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.8168280745600334, | |
| "grad_norm": 0.022347550839185715, | |
| "learning_rate": 9.7035614282739e-06, | |
| "step": 27050 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.8220347807976673, | |
| "grad_norm": 0.0020712248515337706, | |
| "learning_rate": 9.680420243908085e-06, | |
| "step": 27100 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.8272414870353013, | |
| "grad_norm": 0.00296349311247468, | |
| "learning_rate": 9.657279059542268e-06, | |
| "step": 27150 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.8324481932729357, | |
| "grad_norm": 0.004647277761250734, | |
| "learning_rate": 9.634137875176453e-06, | |
| "step": 27200 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 2.8376548995105697, | |
| "grad_norm": 0.09574100375175476, | |
| "learning_rate": 9.610996690810636e-06, | |
| "step": 27250 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 2.8428616057482037, | |
| "grad_norm": 0.002023301785811782, | |
| "learning_rate": 9.587855506444821e-06, | |
| "step": 27300 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 2.8480683119858377, | |
| "grad_norm": 0.0016001787735149264, | |
| "learning_rate": 9.564714322079005e-06, | |
| "step": 27350 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.8532750182234716, | |
| "grad_norm": 0.02564910613000393, | |
| "learning_rate": 9.54157313771319e-06, | |
| "step": 27400 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.858481724461106, | |
| "grad_norm": 0.004294661805033684, | |
| "learning_rate": 9.518431953347373e-06, | |
| "step": 27450 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.86368843069874, | |
| "grad_norm": 0.02338520810008049, | |
| "learning_rate": 9.495290768981558e-06, | |
| "step": 27500 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 2.868895136936374, | |
| "grad_norm": 0.040758974850177765, | |
| "learning_rate": 9.472149584615741e-06, | |
| "step": 27550 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 2.874101843174008, | |
| "grad_norm": 0.008010084740817547, | |
| "learning_rate": 9.449008400249926e-06, | |
| "step": 27600 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 2.8793085494116424, | |
| "grad_norm": 0.007018416654318571, | |
| "learning_rate": 9.425867215884111e-06, | |
| "step": 27650 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 2.8845152556492764, | |
| "grad_norm": 0.010360274463891983, | |
| "learning_rate": 9.402726031518293e-06, | |
| "step": 27700 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 2.8897219618869103, | |
| "grad_norm": 0.007200753781944513, | |
| "learning_rate": 9.379584847152478e-06, | |
| "step": 27750 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.8949286681245443, | |
| "grad_norm": 0.0022850781679153442, | |
| "learning_rate": 9.356443662786662e-06, | |
| "step": 27800 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 2.9001353743621783, | |
| "grad_norm": 0.006049764808267355, | |
| "learning_rate": 9.333302478420846e-06, | |
| "step": 27850 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 2.9053420805998127, | |
| "grad_norm": 0.005760509520769119, | |
| "learning_rate": 9.31016129405503e-06, | |
| "step": 27900 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.9105487868374467, | |
| "grad_norm": 0.23274853825569153, | |
| "learning_rate": 9.287020109689214e-06, | |
| "step": 27950 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.9157554930750806, | |
| "grad_norm": 0.0031046303920447826, | |
| "learning_rate": 9.263878925323399e-06, | |
| "step": 28000 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 2.9209621993127146, | |
| "grad_norm": 0.007741307374089956, | |
| "learning_rate": 9.240737740957582e-06, | |
| "step": 28050 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 2.926168905550349, | |
| "grad_norm": 0.01169963926076889, | |
| "learning_rate": 9.217596556591767e-06, | |
| "step": 28100 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 2.931375611787983, | |
| "grad_norm": 0.01061971951276064, | |
| "learning_rate": 9.19445537222595e-06, | |
| "step": 28150 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 2.936582318025617, | |
| "grad_norm": 0.0040078721940517426, | |
| "learning_rate": 9.171314187860135e-06, | |
| "step": 28200 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.941789024263251, | |
| "grad_norm": 0.00869227759540081, | |
| "learning_rate": 9.14817300349432e-06, | |
| "step": 28250 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 2.946995730500885, | |
| "grad_norm": 0.03226366266608238, | |
| "learning_rate": 9.125031819128504e-06, | |
| "step": 28300 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 2.9522024367385193, | |
| "grad_norm": 0.007837221957743168, | |
| "learning_rate": 9.101890634762689e-06, | |
| "step": 28350 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 2.9574091429761533, | |
| "grad_norm": 0.016281556338071823, | |
| "learning_rate": 9.078749450396872e-06, | |
| "step": 28400 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 2.9626158492137873, | |
| "grad_norm": 0.006102351471781731, | |
| "learning_rate": 9.055608266031057e-06, | |
| "step": 28450 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 2.9678225554514217, | |
| "grad_norm": 0.004030589014291763, | |
| "learning_rate": 9.03246708166524e-06, | |
| "step": 28500 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 2.9730292616890557, | |
| "grad_norm": 0.08026989549398422, | |
| "learning_rate": 9.009325897299425e-06, | |
| "step": 28550 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 2.9782359679266897, | |
| "grad_norm": 0.004586766008287668, | |
| "learning_rate": 8.986184712933608e-06, | |
| "step": 28600 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 2.9834426741643236, | |
| "grad_norm": 0.04759465157985687, | |
| "learning_rate": 8.963043528567793e-06, | |
| "step": 28650 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 2.9886493804019576, | |
| "grad_norm": 0.002582300454378128, | |
| "learning_rate": 8.939902344201976e-06, | |
| "step": 28700 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 2.9938560866395916, | |
| "grad_norm": 0.008660154417157173, | |
| "learning_rate": 8.916761159836161e-06, | |
| "step": 28750 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 2.999062792877226, | |
| "grad_norm": 0.003941241651773453, | |
| "learning_rate": 8.893619975470346e-06, | |
| "step": 28800 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.00426949911486, | |
| "grad_norm": 0.00856933556497097, | |
| "learning_rate": 8.87047879110453e-06, | |
| "step": 28850 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.009476205352494, | |
| "grad_norm": 0.009460404515266418, | |
| "learning_rate": 8.847337606738715e-06, | |
| "step": 28900 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.014682911590128, | |
| "grad_norm": 0.006439635530114174, | |
| "learning_rate": 8.824196422372898e-06, | |
| "step": 28950 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.0198896178277623, | |
| "grad_norm": 0.020535370334982872, | |
| "learning_rate": 8.801055238007083e-06, | |
| "step": 29000 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.0250963240653963, | |
| "grad_norm": 0.027857592329382896, | |
| "learning_rate": 8.777914053641266e-06, | |
| "step": 29050 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.0303030303030303, | |
| "grad_norm": 0.009412121027708054, | |
| "learning_rate": 8.754772869275451e-06, | |
| "step": 29100 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.0355097365406642, | |
| "grad_norm": 0.05827178806066513, | |
| "learning_rate": 8.731631684909634e-06, | |
| "step": 29150 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.0407164427782982, | |
| "grad_norm": 0.002373203868046403, | |
| "learning_rate": 8.708490500543818e-06, | |
| "step": 29200 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.0459231490159326, | |
| "grad_norm": 0.006206809543073177, | |
| "learning_rate": 8.685349316178003e-06, | |
| "step": 29250 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.0511298552535666, | |
| "grad_norm": 0.00912196934223175, | |
| "learning_rate": 8.662208131812186e-06, | |
| "step": 29300 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 3.0563365614912006, | |
| "grad_norm": 0.00465911440551281, | |
| "learning_rate": 8.63906694744637e-06, | |
| "step": 29350 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 3.0615432677288346, | |
| "grad_norm": 0.18286481499671936, | |
| "learning_rate": 8.615925763080554e-06, | |
| "step": 29400 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.066749973966469, | |
| "grad_norm": 0.004343831911683083, | |
| "learning_rate": 8.592784578714739e-06, | |
| "step": 29450 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.071956680204103, | |
| "grad_norm": 0.05209621787071228, | |
| "learning_rate": 8.569643394348924e-06, | |
| "step": 29500 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.077163386441737, | |
| "grad_norm": 0.05028437450528145, | |
| "learning_rate": 8.546502209983107e-06, | |
| "step": 29550 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.082370092679371, | |
| "grad_norm": 0.0026865217369049788, | |
| "learning_rate": 8.523361025617292e-06, | |
| "step": 29600 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 3.0875767989170053, | |
| "grad_norm": 0.4508988559246063, | |
| "learning_rate": 8.500219841251475e-06, | |
| "step": 29650 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.0927835051546393, | |
| "grad_norm": 0.02336781658232212, | |
| "learning_rate": 8.47707865688566e-06, | |
| "step": 29700 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.0979902113922733, | |
| "grad_norm": 0.0036797483917325735, | |
| "learning_rate": 8.453937472519844e-06, | |
| "step": 29750 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.1031969176299072, | |
| "grad_norm": 0.007331520318984985, | |
| "learning_rate": 8.430796288154029e-06, | |
| "step": 29800 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.108403623867541, | |
| "grad_norm": 0.006399332545697689, | |
| "learning_rate": 8.407655103788212e-06, | |
| "step": 29850 | |
| }, | |
| { | |
| "embedding_loss": 0.0032, | |
| "epoch": 3.1136103301051756, | |
| "grad_norm": 0.09251190721988678, | |
| "learning_rate": 8.384513919422397e-06, | |
| "step": 29900 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.1188170363428096, | |
| "grad_norm": 0.005595037247985601, | |
| "learning_rate": 8.361372735056582e-06, | |
| "step": 29950 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.1240237425804436, | |
| "grad_norm": 0.0045051900669932365, | |
| "learning_rate": 8.338231550690765e-06, | |
| "step": 30000 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.1292304488180775, | |
| "grad_norm": 0.00950851384550333, | |
| "learning_rate": 8.31509036632495e-06, | |
| "step": 30050 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.1344371550557115, | |
| "grad_norm": 0.0038670655339956284, | |
| "learning_rate": 8.291949181959133e-06, | |
| "step": 30100 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.139643861293346, | |
| "grad_norm": 0.0030759319197386503, | |
| "learning_rate": 8.268807997593318e-06, | |
| "step": 30150 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 3.14485056753098, | |
| "grad_norm": 0.0031105412635952234, | |
| "learning_rate": 8.245666813227501e-06, | |
| "step": 30200 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.150057273768614, | |
| "grad_norm": 0.007572552189230919, | |
| "learning_rate": 8.222525628861686e-06, | |
| "step": 30250 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 3.155263980006248, | |
| "grad_norm": 0.0044418093748390675, | |
| "learning_rate": 8.19938444449587e-06, | |
| "step": 30300 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.1604706862438823, | |
| "grad_norm": 0.005121790803968906, | |
| "learning_rate": 8.176243260130055e-06, | |
| "step": 30350 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.1656773924815163, | |
| "grad_norm": 0.005301581230014563, | |
| "learning_rate": 8.153102075764238e-06, | |
| "step": 30400 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.1708840987191502, | |
| "grad_norm": 0.012782045640051365, | |
| "learning_rate": 8.129960891398423e-06, | |
| "step": 30450 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.176090804956784, | |
| "grad_norm": 0.003524052444845438, | |
| "learning_rate": 8.106819707032608e-06, | |
| "step": 30500 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.1812975111944186, | |
| "grad_norm": 0.04818173870444298, | |
| "learning_rate": 8.08367852266679e-06, | |
| "step": 30550 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.1865042174320526, | |
| "grad_norm": 0.014735080301761627, | |
| "learning_rate": 8.060537338300974e-06, | |
| "step": 30600 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.1917109236696866, | |
| "grad_norm": 0.004299947526305914, | |
| "learning_rate": 8.03739615393516e-06, | |
| "step": 30650 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.1969176299073205, | |
| "grad_norm": 0.00827470701187849, | |
| "learning_rate": 8.014254969569343e-06, | |
| "step": 30700 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.2021243361449545, | |
| "grad_norm": 0.005655727814882994, | |
| "learning_rate": 7.991113785203528e-06, | |
| "step": 30750 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 3.207331042382589, | |
| "grad_norm": 0.0024495867546647787, | |
| "learning_rate": 7.96797260083771e-06, | |
| "step": 30800 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.212537748620223, | |
| "grad_norm": 0.019548872485756874, | |
| "learning_rate": 7.944831416471896e-06, | |
| "step": 30850 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.217744454857857, | |
| "grad_norm": 0.0044856201857328415, | |
| "learning_rate": 7.921690232106079e-06, | |
| "step": 30900 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.222951161095491, | |
| "grad_norm": 0.004103007726371288, | |
| "learning_rate": 7.898549047740264e-06, | |
| "step": 30950 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.2281578673331253, | |
| "grad_norm": 0.1315273493528366, | |
| "learning_rate": 7.875407863374447e-06, | |
| "step": 31000 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.2333645735707592, | |
| "grad_norm": 0.005692615173757076, | |
| "learning_rate": 7.852266679008632e-06, | |
| "step": 31050 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 3.238571279808393, | |
| "grad_norm": 0.002233312465250492, | |
| "learning_rate": 7.829125494642816e-06, | |
| "step": 31100 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 3.243777986046027, | |
| "grad_norm": 0.03157159686088562, | |
| "learning_rate": 7.805984310277e-06, | |
| "step": 31150 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 3.248984692283661, | |
| "grad_norm": 0.008771988563239574, | |
| "learning_rate": 7.782843125911185e-06, | |
| "step": 31200 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.2541913985212956, | |
| "grad_norm": 0.003804140957072377, | |
| "learning_rate": 7.759701941545369e-06, | |
| "step": 31250 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.2593981047589295, | |
| "grad_norm": 0.0689612403512001, | |
| "learning_rate": 7.736560757179554e-06, | |
| "step": 31300 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.2646048109965635, | |
| "grad_norm": 0.07443096488714218, | |
| "learning_rate": 7.713419572813737e-06, | |
| "step": 31350 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.2698115172341975, | |
| "grad_norm": 0.021511824801564217, | |
| "learning_rate": 7.690278388447922e-06, | |
| "step": 31400 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.275018223471832, | |
| "grad_norm": 0.004147614352405071, | |
| "learning_rate": 7.667137204082105e-06, | |
| "step": 31450 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.280224929709466, | |
| "grad_norm": 0.027495531365275383, | |
| "learning_rate": 7.64399601971629e-06, | |
| "step": 31500 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.2854316359471, | |
| "grad_norm": 0.0021503553725779057, | |
| "learning_rate": 7.620854835350474e-06, | |
| "step": 31550 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.290638342184734, | |
| "grad_norm": 0.006075483746826649, | |
| "learning_rate": 7.597713650984658e-06, | |
| "step": 31600 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.2958450484223683, | |
| "grad_norm": 0.005264146253466606, | |
| "learning_rate": 7.574572466618842e-06, | |
| "step": 31650 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 3.3010517546600022, | |
| "grad_norm": 0.003356904024258256, | |
| "learning_rate": 7.5514312822530265e-06, | |
| "step": 31700 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.306258460897636, | |
| "grad_norm": 0.09297136962413788, | |
| "learning_rate": 7.528290097887211e-06, | |
| "step": 31750 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.31146516713527, | |
| "grad_norm": 0.016923336312174797, | |
| "learning_rate": 7.505148913521395e-06, | |
| "step": 31800 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.316671873372904, | |
| "grad_norm": 0.09876677393913269, | |
| "learning_rate": 7.482007729155579e-06, | |
| "step": 31850 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 3.3218785796105386, | |
| "grad_norm": 0.01769149675965309, | |
| "learning_rate": 7.458866544789764e-06, | |
| "step": 31900 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.3270852858481725, | |
| "grad_norm": 0.0034628412686288357, | |
| "learning_rate": 7.435725360423948e-06, | |
| "step": 31950 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.3322919920858065, | |
| "grad_norm": 0.003771421266719699, | |
| "learning_rate": 7.41258417605813e-06, | |
| "step": 32000 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.3374986983234405, | |
| "grad_norm": 0.003236924996599555, | |
| "learning_rate": 7.389442991692315e-06, | |
| "step": 32050 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.342705404561075, | |
| "grad_norm": 0.0025784943718463182, | |
| "learning_rate": 7.366301807326499e-06, | |
| "step": 32100 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.347912110798709, | |
| "grad_norm": 0.00431936327368021, | |
| "learning_rate": 7.3431606229606835e-06, | |
| "step": 32150 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 3.353118817036343, | |
| "grad_norm": 0.006928473711013794, | |
| "learning_rate": 7.320019438594868e-06, | |
| "step": 32200 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.358325523273977, | |
| "grad_norm": 0.002246728865429759, | |
| "learning_rate": 7.296878254229052e-06, | |
| "step": 32250 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.363532229511611, | |
| "grad_norm": 0.0024311786983162165, | |
| "learning_rate": 7.273737069863236e-06, | |
| "step": 32300 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.368738935749245, | |
| "grad_norm": 0.33023688197135925, | |
| "learning_rate": 7.25059588549742e-06, | |
| "step": 32350 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.373945641986879, | |
| "grad_norm": 0.0026327494997531176, | |
| "learning_rate": 7.227454701131604e-06, | |
| "step": 32400 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.379152348224513, | |
| "grad_norm": 0.0038416869938373566, | |
| "learning_rate": 7.204313516765788e-06, | |
| "step": 32450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.384359054462147, | |
| "grad_norm": 0.004634499549865723, | |
| "learning_rate": 7.181172332399972e-06, | |
| "step": 32500 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.3895657606997815, | |
| "grad_norm": 0.04748839512467384, | |
| "learning_rate": 7.158031148034157e-06, | |
| "step": 32550 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.3947724669374155, | |
| "grad_norm": 0.00369762210175395, | |
| "learning_rate": 7.134889963668341e-06, | |
| "step": 32600 | |
| }, | |
| { | |
| "embedding_loss": 0.0033, | |
| "epoch": 3.3999791731750495, | |
| "grad_norm": 0.003863842226564884, | |
| "learning_rate": 7.1117487793025255e-06, | |
| "step": 32650 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.4051858794126835, | |
| "grad_norm": 0.004467652644962072, | |
| "learning_rate": 7.08860759493671e-06, | |
| "step": 32700 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.4103925856503174, | |
| "grad_norm": 0.0017069701571017504, | |
| "learning_rate": 7.065466410570894e-06, | |
| "step": 32750 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.415599291887952, | |
| "grad_norm": 0.0025950015988200903, | |
| "learning_rate": 7.042325226205078e-06, | |
| "step": 32800 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.420805998125586, | |
| "grad_norm": 0.0029007880948483944, | |
| "learning_rate": 7.019184041839262e-06, | |
| "step": 32850 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 3.42601270436322, | |
| "grad_norm": 0.003898217109963298, | |
| "learning_rate": 6.996042857473446e-06, | |
| "step": 32900 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.431219410600854, | |
| "grad_norm": 0.004065630491822958, | |
| "learning_rate": 6.97290167310763e-06, | |
| "step": 32950 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.436426116838488, | |
| "grad_norm": 0.09971676766872406, | |
| "learning_rate": 6.949760488741814e-06, | |
| "step": 33000 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.441632823076122, | |
| "grad_norm": 0.004332449287176132, | |
| "learning_rate": 6.926619304375999e-06, | |
| "step": 33050 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.446839529313756, | |
| "grad_norm": 0.002061097417026758, | |
| "learning_rate": 6.903478120010183e-06, | |
| "step": 33100 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.45204623555139, | |
| "grad_norm": 0.005368870683014393, | |
| "learning_rate": 6.8803369356443674e-06, | |
| "step": 33150 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.457252941789024, | |
| "grad_norm": 0.0153218824416399, | |
| "learning_rate": 6.8571957512785516e-06, | |
| "step": 33200 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.4624596480266585, | |
| "grad_norm": 0.003146272385492921, | |
| "learning_rate": 6.834054566912736e-06, | |
| "step": 33250 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.4676663542642925, | |
| "grad_norm": 0.005474725738167763, | |
| "learning_rate": 6.81091338254692e-06, | |
| "step": 33300 | |
| }, | |
| { | |
| "embedding_loss": 0.0023, | |
| "epoch": 3.4728730605019265, | |
| "grad_norm": 0.002225042786449194, | |
| "learning_rate": 6.787772198181104e-06, | |
| "step": 33350 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.4780797667395604, | |
| "grad_norm": 0.004484266974031925, | |
| "learning_rate": 6.764631013815287e-06, | |
| "step": 33400 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 3.483286472977195, | |
| "grad_norm": 0.001994067570194602, | |
| "learning_rate": 6.741489829449471e-06, | |
| "step": 33450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.488493179214829, | |
| "grad_norm": 0.002722726669162512, | |
| "learning_rate": 6.718348645083655e-06, | |
| "step": 33500 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.493699885452463, | |
| "grad_norm": 0.003505149856209755, | |
| "learning_rate": 6.6952074607178395e-06, | |
| "step": 33550 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.4989065916900968, | |
| "grad_norm": 0.002958771074190736, | |
| "learning_rate": 6.672066276352024e-06, | |
| "step": 33600 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.5041132979277307, | |
| "grad_norm": 0.007241967599838972, | |
| "learning_rate": 6.648925091986208e-06, | |
| "step": 33650 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.5093200041653647, | |
| "grad_norm": 0.0048427823930978775, | |
| "learning_rate": 6.625783907620393e-06, | |
| "step": 33700 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.514526710402999, | |
| "grad_norm": 0.004183737561106682, | |
| "learning_rate": 6.602642723254577e-06, | |
| "step": 33750 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.519733416640633, | |
| "grad_norm": 0.005011474248021841, | |
| "learning_rate": 6.579501538888761e-06, | |
| "step": 33800 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.524940122878267, | |
| "grad_norm": 0.04754041135311127, | |
| "learning_rate": 6.556360354522945e-06, | |
| "step": 33850 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.5301468291159015, | |
| "grad_norm": 0.0030108760111033916, | |
| "learning_rate": 6.533219170157129e-06, | |
| "step": 33900 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.5353535353535355, | |
| "grad_norm": 0.002894002478569746, | |
| "learning_rate": 6.510077985791313e-06, | |
| "step": 33950 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 3.5405602415911694, | |
| "grad_norm": 0.01952524110674858, | |
| "learning_rate": 6.486936801425497e-06, | |
| "step": 34000 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.5457669478288034, | |
| "grad_norm": 0.0029994072392582893, | |
| "learning_rate": 6.4637956170596815e-06, | |
| "step": 34050 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.5509736540664374, | |
| "grad_norm": 0.006770998239517212, | |
| "learning_rate": 6.4406544326938656e-06, | |
| "step": 34100 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.556180360304072, | |
| "grad_norm": 0.0302437637001276, | |
| "learning_rate": 6.41751324832805e-06, | |
| "step": 34150 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.561387066541706, | |
| "grad_norm": 0.002121832687407732, | |
| "learning_rate": 6.394372063962234e-06, | |
| "step": 34200 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.5665937727793398, | |
| "grad_norm": 0.00341336359269917, | |
| "learning_rate": 6.371230879596419e-06, | |
| "step": 34250 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.5718004790169737, | |
| "grad_norm": 0.0027454060036689043, | |
| "learning_rate": 6.348089695230603e-06, | |
| "step": 34300 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.577007185254608, | |
| "grad_norm": 0.0077779293060302734, | |
| "learning_rate": 6.324948510864787e-06, | |
| "step": 34350 | |
| }, | |
| { | |
| "embedding_loss": 0.0028, | |
| "epoch": 3.582213891492242, | |
| "grad_norm": 0.003297444898635149, | |
| "learning_rate": 6.301807326498971e-06, | |
| "step": 34400 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.587420597729876, | |
| "grad_norm": 0.004151192959398031, | |
| "learning_rate": 6.278666142133155e-06, | |
| "step": 34450 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.59262730396751, | |
| "grad_norm": 0.011945868842303753, | |
| "learning_rate": 6.255524957767339e-06, | |
| "step": 34500 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 3.597834010205144, | |
| "grad_norm": 0.002266357187181711, | |
| "learning_rate": 6.232383773401523e-06, | |
| "step": 34550 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.6030407164427785, | |
| "grad_norm": 0.031817760318517685, | |
| "learning_rate": 6.2092425890357075e-06, | |
| "step": 34600 | |
| }, | |
| { | |
| "embedding_loss": 0.0027, | |
| "epoch": 3.6082474226804124, | |
| "grad_norm": 0.114555723965168, | |
| "learning_rate": 6.186101404669892e-06, | |
| "step": 34650 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.6134541289180464, | |
| "grad_norm": 0.0052092778496444225, | |
| "learning_rate": 6.162960220304076e-06, | |
| "step": 34700 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.6186608351556804, | |
| "grad_norm": 0.013743920251727104, | |
| "learning_rate": 6.139819035938261e-06, | |
| "step": 34750 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.623867541393315, | |
| "grad_norm": 0.04450186714529991, | |
| "learning_rate": 6.116677851572443e-06, | |
| "step": 34800 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.6290742476309488, | |
| "grad_norm": 0.011497569270431995, | |
| "learning_rate": 6.093536667206627e-06, | |
| "step": 34850 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.6342809538685827, | |
| "grad_norm": 0.001604217104613781, | |
| "learning_rate": 6.070395482840812e-06, | |
| "step": 34900 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.6394876601062167, | |
| "grad_norm": 0.014813857153058052, | |
| "learning_rate": 6.047254298474996e-06, | |
| "step": 34950 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.6446943663438507, | |
| "grad_norm": 0.002726171864196658, | |
| "learning_rate": 6.02411311410918e-06, | |
| "step": 35000 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.649901072581485, | |
| "grad_norm": 0.0028911526314914227, | |
| "learning_rate": 6.0009719297433645e-06, | |
| "step": 35050 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.655107778819119, | |
| "grad_norm": 0.0009616083116270602, | |
| "learning_rate": 5.977830745377549e-06, | |
| "step": 35100 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.660314485056753, | |
| "grad_norm": 0.0013377583818510175, | |
| "learning_rate": 5.954689561011733e-06, | |
| "step": 35150 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 3.665521191294387, | |
| "grad_norm": 0.01589621789753437, | |
| "learning_rate": 5.931548376645917e-06, | |
| "step": 35200 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.6707278975320214, | |
| "grad_norm": 0.05081808194518089, | |
| "learning_rate": 5.908407192280101e-06, | |
| "step": 35250 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 3.6759346037696554, | |
| "grad_norm": 0.04854687675833702, | |
| "learning_rate": 5.885266007914285e-06, | |
| "step": 35300 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.6811413100072894, | |
| "grad_norm": 0.0028674921486526728, | |
| "learning_rate": 5.862124823548469e-06, | |
| "step": 35350 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.6863480162449234, | |
| "grad_norm": 0.006336590740829706, | |
| "learning_rate": 5.838983639182654e-06, | |
| "step": 35400 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.6915547224825573, | |
| "grad_norm": 0.002654125215485692, | |
| "learning_rate": 5.815842454816838e-06, | |
| "step": 35450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.6967614287201918, | |
| "grad_norm": 0.001202322542667389, | |
| "learning_rate": 5.792701270451022e-06, | |
| "step": 35500 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.7019681349578257, | |
| "grad_norm": 0.0029784284997731447, | |
| "learning_rate": 5.7695600860852065e-06, | |
| "step": 35550 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.7071748411954597, | |
| "grad_norm": 0.017423637211322784, | |
| "learning_rate": 5.746418901719391e-06, | |
| "step": 35600 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.7123815474330937, | |
| "grad_norm": 0.001634717918932438, | |
| "learning_rate": 5.723277717353575e-06, | |
| "step": 35650 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.717588253670728, | |
| "grad_norm": 0.006617635954171419, | |
| "learning_rate": 5.700136532987759e-06, | |
| "step": 35700 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.722794959908362, | |
| "grad_norm": 0.0018128909869119525, | |
| "learning_rate": 5.676995348621943e-06, | |
| "step": 35750 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 3.728001666145996, | |
| "grad_norm": 0.0016381378518417478, | |
| "learning_rate": 5.653854164256127e-06, | |
| "step": 35800 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.73320837238363, | |
| "grad_norm": 0.005606998223811388, | |
| "learning_rate": 5.630712979890311e-06, | |
| "step": 35850 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.738415078621264, | |
| "grad_norm": 0.0032535437494516373, | |
| "learning_rate": 5.607571795524496e-06, | |
| "step": 35900 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.7436217848588984, | |
| "grad_norm": 0.008722545579075813, | |
| "learning_rate": 5.58443061115868e-06, | |
| "step": 35950 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.7488284910965324, | |
| "grad_norm": 0.023524988442659378, | |
| "learning_rate": 5.561289426792864e-06, | |
| "step": 36000 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.7540351973341664, | |
| "grad_norm": 0.006606587208807468, | |
| "learning_rate": 5.5381482424270484e-06, | |
| "step": 36050 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.7592419035718003, | |
| "grad_norm": 0.0010090703144669533, | |
| "learning_rate": 5.5150070580612326e-06, | |
| "step": 36100 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 3.7644486098094347, | |
| "grad_norm": 0.00230466783978045, | |
| "learning_rate": 5.491865873695417e-06, | |
| "step": 36150 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.7696553160470687, | |
| "grad_norm": 0.004099918529391289, | |
| "learning_rate": 5.468724689329601e-06, | |
| "step": 36200 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.7748620222847027, | |
| "grad_norm": 0.007035956718027592, | |
| "learning_rate": 5.445583504963784e-06, | |
| "step": 36250 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 3.7800687285223367, | |
| "grad_norm": 0.010237271897494793, | |
| "learning_rate": 5.422442320597968e-06, | |
| "step": 36300 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.7852754347599706, | |
| "grad_norm": 0.0014003911055624485, | |
| "learning_rate": 5.399301136232152e-06, | |
| "step": 36350 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 3.790482140997605, | |
| "grad_norm": 0.0034218619111925364, | |
| "learning_rate": 5.376159951866336e-06, | |
| "step": 36400 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.795688847235239, | |
| "grad_norm": 0.0065358299762010574, | |
| "learning_rate": 5.3530187675005205e-06, | |
| "step": 36450 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.800895553472873, | |
| "grad_norm": 0.002235516905784607, | |
| "learning_rate": 5.329877583134705e-06, | |
| "step": 36500 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.806102259710507, | |
| "grad_norm": 0.0020229006186127663, | |
| "learning_rate": 5.3067363987688896e-06, | |
| "step": 36550 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.8113089659481414, | |
| "grad_norm": 0.0053365700878202915, | |
| "learning_rate": 5.283595214403074e-06, | |
| "step": 36600 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.8165156721857754, | |
| "grad_norm": 0.011895844712853432, | |
| "learning_rate": 5.260454030037258e-06, | |
| "step": 36650 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.8217223784234093, | |
| "grad_norm": 0.0022297389805316925, | |
| "learning_rate": 5.237312845671442e-06, | |
| "step": 36700 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.8269290846610433, | |
| "grad_norm": 0.0022312577348202467, | |
| "learning_rate": 5.214171661305626e-06, | |
| "step": 36750 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.8321357908986773, | |
| "grad_norm": 0.004529103171080351, | |
| "learning_rate": 5.19103047693981e-06, | |
| "step": 36800 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.8373424971363117, | |
| "grad_norm": 0.0026438578497618437, | |
| "learning_rate": 5.167889292573994e-06, | |
| "step": 36850 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 3.8425492033739457, | |
| "grad_norm": 0.005113155115395784, | |
| "learning_rate": 5.144748108208178e-06, | |
| "step": 36900 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 3.8477559096115796, | |
| "grad_norm": 0.017756449058651924, | |
| "learning_rate": 5.1216069238423624e-06, | |
| "step": 36950 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.8529626158492136, | |
| "grad_norm": 0.02352430485188961, | |
| "learning_rate": 5.0984657394765466e-06, | |
| "step": 37000 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.858169322086848, | |
| "grad_norm": 0.003178997430950403, | |
| "learning_rate": 5.075324555110731e-06, | |
| "step": 37050 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.863376028324482, | |
| "grad_norm": 0.014370561577379704, | |
| "learning_rate": 5.052183370744916e-06, | |
| "step": 37100 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 3.868582734562116, | |
| "grad_norm": 0.0058501786552369595, | |
| "learning_rate": 5.0290421863791e-06, | |
| "step": 37150 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.87378944079975, | |
| "grad_norm": 0.0018966099014505744, | |
| "learning_rate": 5.005901002013284e-06, | |
| "step": 37200 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.878996147037384, | |
| "grad_norm": 0.002752570202574134, | |
| "learning_rate": 4.982759817647468e-06, | |
| "step": 37250 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.8842028532750184, | |
| "grad_norm": 0.003022131510078907, | |
| "learning_rate": 4.959618633281651e-06, | |
| "step": 37300 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 3.8894095595126523, | |
| "grad_norm": 0.006553678773343563, | |
| "learning_rate": 4.936477448915835e-06, | |
| "step": 37350 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.8946162657502863, | |
| "grad_norm": 0.0018002489814534783, | |
| "learning_rate": 4.91333626455002e-06, | |
| "step": 37400 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.8998229719879207, | |
| "grad_norm": 0.001831809408031404, | |
| "learning_rate": 4.890195080184204e-06, | |
| "step": 37450 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.9050296782255547, | |
| "grad_norm": 0.0025375783443450928, | |
| "learning_rate": 4.8670538958183885e-06, | |
| "step": 37500 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.9102363844631887, | |
| "grad_norm": 0.004411675967276096, | |
| "learning_rate": 4.843912711452573e-06, | |
| "step": 37550 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 3.9154430907008226, | |
| "grad_norm": 0.009407658129930496, | |
| "learning_rate": 4.820771527086757e-06, | |
| "step": 37600 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.9206497969384566, | |
| "grad_norm": 0.002038530306890607, | |
| "learning_rate": 4.797630342720941e-06, | |
| "step": 37650 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 3.9258565031760906, | |
| "grad_norm": 0.017908206209540367, | |
| "learning_rate": 4.774489158355125e-06, | |
| "step": 37700 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 3.931063209413725, | |
| "grad_norm": 0.0015109736705198884, | |
| "learning_rate": 4.751347973989309e-06, | |
| "step": 37750 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.936269915651359, | |
| "grad_norm": 0.0064455061219632626, | |
| "learning_rate": 4.728206789623493e-06, | |
| "step": 37800 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 3.941476621888993, | |
| "grad_norm": 0.0861746221780777, | |
| "learning_rate": 4.705065605257677e-06, | |
| "step": 37850 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 3.9466833281266274, | |
| "grad_norm": 0.0038613975048065186, | |
| "learning_rate": 4.681924420891862e-06, | |
| "step": 37900 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 3.9518900343642613, | |
| "grad_norm": 0.03049100562930107, | |
| "learning_rate": 4.658783236526046e-06, | |
| "step": 37950 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 3.9570967406018953, | |
| "grad_norm": 0.0011294811265543103, | |
| "learning_rate": 4.63564205216023e-06, | |
| "step": 38000 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 3.9623034468395293, | |
| "grad_norm": 0.003215038450434804, | |
| "learning_rate": 4.612500867794414e-06, | |
| "step": 38050 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.9675101530771633, | |
| "grad_norm": 0.002345999237149954, | |
| "learning_rate": 4.589359683428598e-06, | |
| "step": 38100 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.9727168593147972, | |
| "grad_norm": 0.0017487540608271956, | |
| "learning_rate": 4.566218499062782e-06, | |
| "step": 38150 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.9779235655524317, | |
| "grad_norm": 0.002043676795437932, | |
| "learning_rate": 4.543077314696966e-06, | |
| "step": 38200 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 3.9831302717900656, | |
| "grad_norm": 0.0033409446477890015, | |
| "learning_rate": 4.519936130331151e-06, | |
| "step": 38250 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 3.9883369780276996, | |
| "grad_norm": 0.1487550139427185, | |
| "learning_rate": 4.496794945965335e-06, | |
| "step": 38300 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 3.993543684265334, | |
| "grad_norm": 0.003084618365392089, | |
| "learning_rate": 4.473653761599519e-06, | |
| "step": 38350 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 3.998750390502968, | |
| "grad_norm": 0.004644028376787901, | |
| "learning_rate": 4.450512577233703e-06, | |
| "step": 38400 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.003957096740602, | |
| "grad_norm": 0.004961004480719566, | |
| "learning_rate": 4.4273713928678875e-06, | |
| "step": 38450 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.009163802978236, | |
| "grad_norm": 0.0015071636298671365, | |
| "learning_rate": 4.404230208502072e-06, | |
| "step": 38500 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 4.01437050921587, | |
| "grad_norm": 0.0037345190066844225, | |
| "learning_rate": 4.381089024136256e-06, | |
| "step": 38550 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.019577215453504, | |
| "grad_norm": 0.0011363272788003087, | |
| "learning_rate": 4.35794783977044e-06, | |
| "step": 38600 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 4.024783921691138, | |
| "grad_norm": 0.00206565810367465, | |
| "learning_rate": 4.334806655404624e-06, | |
| "step": 38650 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.029990627928772, | |
| "grad_norm": 0.0060499319806694984, | |
| "learning_rate": 4.311665471038808e-06, | |
| "step": 38700 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.035197334166407, | |
| "grad_norm": 0.0032932923641055822, | |
| "learning_rate": 4.288524286672992e-06, | |
| "step": 38750 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.040404040404041, | |
| "grad_norm": 0.002779960399493575, | |
| "learning_rate": 4.265383102307176e-06, | |
| "step": 38800 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.045610746641675, | |
| "grad_norm": 0.037588316947221756, | |
| "learning_rate": 4.24224191794136e-06, | |
| "step": 38850 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.050817452879309, | |
| "grad_norm": 0.0021385361906141043, | |
| "learning_rate": 4.2191007335755445e-06, | |
| "step": 38900 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 4.056024159116943, | |
| "grad_norm": 0.002653073286637664, | |
| "learning_rate": 4.195959549209729e-06, | |
| "step": 38950 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.061230865354577, | |
| "grad_norm": 0.003462142078205943, | |
| "learning_rate": 4.172818364843913e-06, | |
| "step": 39000 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.0664375715922105, | |
| "grad_norm": 0.0043731373734772205, | |
| "learning_rate": 4.149677180478097e-06, | |
| "step": 39050 | |
| }, | |
| { | |
| "embedding_loss": 0.002, | |
| "epoch": 4.0716442778298445, | |
| "grad_norm": 0.0031473205890506506, | |
| "learning_rate": 4.126535996112282e-06, | |
| "step": 39100 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.0768509840674785, | |
| "grad_norm": 0.0068083652295172215, | |
| "learning_rate": 4.103394811746466e-06, | |
| "step": 39150 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.082057690305113, | |
| "grad_norm": 0.0017057887744158506, | |
| "learning_rate": 4.08025362738065e-06, | |
| "step": 39200 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.087264396542747, | |
| "grad_norm": 0.0034488628152757883, | |
| "learning_rate": 4.057112443014834e-06, | |
| "step": 39250 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.092471102780381, | |
| "grad_norm": 0.004338666331022978, | |
| "learning_rate": 4.033971258649018e-06, | |
| "step": 39300 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.097677809018015, | |
| "grad_norm": 0.001688474789261818, | |
| "learning_rate": 4.010830074283202e-06, | |
| "step": 39350 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.102884515255649, | |
| "grad_norm": 0.005720613989979029, | |
| "learning_rate": 3.9876888899173864e-06, | |
| "step": 39400 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.108091221493283, | |
| "grad_norm": 0.010131466202437878, | |
| "learning_rate": 3.9645477055515705e-06, | |
| "step": 39450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.113297927730917, | |
| "grad_norm": 0.00117829954251647, | |
| "learning_rate": 3.941406521185755e-06, | |
| "step": 39500 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.118504633968551, | |
| "grad_norm": 0.013248096220195293, | |
| "learning_rate": 3.918265336819939e-06, | |
| "step": 39550 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.123711340206185, | |
| "grad_norm": 0.005844116676598787, | |
| "learning_rate": 3.895124152454123e-06, | |
| "step": 39600 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.12891804644382, | |
| "grad_norm": 0.002914564684033394, | |
| "learning_rate": 3.871982968088307e-06, | |
| "step": 39650 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.134124752681454, | |
| "grad_norm": 0.003980652429163456, | |
| "learning_rate": 3.848841783722491e-06, | |
| "step": 39700 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.139331458919088, | |
| "grad_norm": 0.004351139068603516, | |
| "learning_rate": 3.825700599356675e-06, | |
| "step": 39750 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.144538165156722, | |
| "grad_norm": 0.00460411561653018, | |
| "learning_rate": 3.8025594149908597e-06, | |
| "step": 39800 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.149744871394356, | |
| "grad_norm": 0.002258758759126067, | |
| "learning_rate": 3.779418230625044e-06, | |
| "step": 39850 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.15495157763199, | |
| "grad_norm": 0.00147035694681108, | |
| "learning_rate": 3.756277046259228e-06, | |
| "step": 39900 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.160158283869624, | |
| "grad_norm": 0.008124323561787605, | |
| "learning_rate": 3.733135861893412e-06, | |
| "step": 39950 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.165364990107258, | |
| "grad_norm": 0.006330924108624458, | |
| "learning_rate": 3.709994677527596e-06, | |
| "step": 40000 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.170571696344892, | |
| "grad_norm": 0.0018023628508672118, | |
| "learning_rate": 3.6868534931617807e-06, | |
| "step": 40050 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.175778402582527, | |
| "grad_norm": 0.06845732778310776, | |
| "learning_rate": 3.663712308795965e-06, | |
| "step": 40100 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.180985108820161, | |
| "grad_norm": 0.05048598721623421, | |
| "learning_rate": 3.6405711244301485e-06, | |
| "step": 40150 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.186191815057795, | |
| "grad_norm": 0.008092716336250305, | |
| "learning_rate": 3.6174299400643326e-06, | |
| "step": 40200 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.191398521295429, | |
| "grad_norm": 0.0009377990500070155, | |
| "learning_rate": 3.5942887556985167e-06, | |
| "step": 40250 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.1966052275330625, | |
| "grad_norm": 0.0021307109855115414, | |
| "learning_rate": 3.571147571332701e-06, | |
| "step": 40300 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.2018119337706965, | |
| "grad_norm": 0.007595045492053032, | |
| "learning_rate": 3.548006386966885e-06, | |
| "step": 40350 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.2070186400083305, | |
| "grad_norm": 0.0017604045569896698, | |
| "learning_rate": 3.5248652026010695e-06, | |
| "step": 40400 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.2122253462459645, | |
| "grad_norm": 0.0040459102019667625, | |
| "learning_rate": 3.5017240182352536e-06, | |
| "step": 40450 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.217432052483598, | |
| "grad_norm": 0.0473860502243042, | |
| "learning_rate": 3.4785828338694377e-06, | |
| "step": 40500 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.222638758721233, | |
| "grad_norm": 0.002496903296560049, | |
| "learning_rate": 3.455441649503622e-06, | |
| "step": 40550 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.227845464958867, | |
| "grad_norm": 0.007270964793860912, | |
| "learning_rate": 3.432300465137806e-06, | |
| "step": 40600 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.233052171196501, | |
| "grad_norm": 0.013144693337380886, | |
| "learning_rate": 3.4091592807719905e-06, | |
| "step": 40650 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.238258877434135, | |
| "grad_norm": 0.003847824176773429, | |
| "learning_rate": 3.3860180964061746e-06, | |
| "step": 40700 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.243465583671769, | |
| "grad_norm": 0.0023708331864327192, | |
| "learning_rate": 3.3628769120403587e-06, | |
| "step": 40750 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.248672289909403, | |
| "grad_norm": 0.018748441711068153, | |
| "learning_rate": 3.339735727674543e-06, | |
| "step": 40800 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 4.253878996147037, | |
| "grad_norm": 0.0020937493536621332, | |
| "learning_rate": 3.3165945433087265e-06, | |
| "step": 40850 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.259085702384671, | |
| "grad_norm": 0.042171407490968704, | |
| "learning_rate": 3.2934533589429106e-06, | |
| "step": 40900 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.264292408622305, | |
| "grad_norm": 0.00801966805011034, | |
| "learning_rate": 3.2703121745770947e-06, | |
| "step": 40950 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.26949911485994, | |
| "grad_norm": 0.008358903229236603, | |
| "learning_rate": 3.2471709902112793e-06, | |
| "step": 41000 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.274705821097574, | |
| "grad_norm": 0.001534903421998024, | |
| "learning_rate": 3.2240298058454634e-06, | |
| "step": 41050 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.279912527335208, | |
| "grad_norm": 0.0019458031747490168, | |
| "learning_rate": 3.2008886214796475e-06, | |
| "step": 41100 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.285119233572842, | |
| "grad_norm": 0.004779054783284664, | |
| "learning_rate": 3.1777474371138316e-06, | |
| "step": 41150 | |
| }, | |
| { | |
| "embedding_loss": 0.0021, | |
| "epoch": 4.290325939810476, | |
| "grad_norm": 0.0033252162393182516, | |
| "learning_rate": 3.1546062527480157e-06, | |
| "step": 41200 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.29553264604811, | |
| "grad_norm": 0.0036777497734874487, | |
| "learning_rate": 3.1314650683822002e-06, | |
| "step": 41250 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.300739352285744, | |
| "grad_norm": 0.0024451168719679117, | |
| "learning_rate": 3.1083238840163844e-06, | |
| "step": 41300 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.305946058523378, | |
| "grad_norm": 0.0027761063538491726, | |
| "learning_rate": 3.0851826996505685e-06, | |
| "step": 41350 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.311152764761012, | |
| "grad_norm": 0.002929074689745903, | |
| "learning_rate": 3.0620415152847526e-06, | |
| "step": 41400 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.316359470998647, | |
| "grad_norm": 0.26386216282844543, | |
| "learning_rate": 3.0389003309189367e-06, | |
| "step": 41450 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.321566177236281, | |
| "grad_norm": 0.004391273949295282, | |
| "learning_rate": 3.0157591465531212e-06, | |
| "step": 41500 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.3267728834739145, | |
| "grad_norm": 0.0026139123365283012, | |
| "learning_rate": 2.992617962187305e-06, | |
| "step": 41550 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.3319795897115485, | |
| "grad_norm": 0.03610287979245186, | |
| "learning_rate": 2.969476777821489e-06, | |
| "step": 41600 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.3371862959491825, | |
| "grad_norm": 0.0036759632639586926, | |
| "learning_rate": 2.946335593455673e-06, | |
| "step": 41650 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.3423930021868165, | |
| "grad_norm": 0.004986033774912357, | |
| "learning_rate": 2.9231944090898572e-06, | |
| "step": 41700 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.34759970842445, | |
| "grad_norm": 0.0021286620758473873, | |
| "learning_rate": 2.9000532247240414e-06, | |
| "step": 41750 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.352806414662084, | |
| "grad_norm": 0.0035934702027589083, | |
| "learning_rate": 2.876912040358226e-06, | |
| "step": 41800 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.358013120899718, | |
| "grad_norm": 0.0023505541030317545, | |
| "learning_rate": 2.85377085599241e-06, | |
| "step": 41850 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 4.363219827137353, | |
| "grad_norm": 0.002859236905351281, | |
| "learning_rate": 2.830629671626594e-06, | |
| "step": 41900 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.368426533374987, | |
| "grad_norm": 0.019494347274303436, | |
| "learning_rate": 2.8074884872607782e-06, | |
| "step": 41950 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.373633239612621, | |
| "grad_norm": 0.0020093335770070553, | |
| "learning_rate": 2.7843473028949623e-06, | |
| "step": 42000 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 4.378839945850255, | |
| "grad_norm": 0.003382645780220628, | |
| "learning_rate": 2.761206118529147e-06, | |
| "step": 42050 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.384046652087889, | |
| "grad_norm": 0.0038147750310599804, | |
| "learning_rate": 2.738064934163331e-06, | |
| "step": 42100 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.389253358325523, | |
| "grad_norm": 0.014739004895091057, | |
| "learning_rate": 2.714923749797515e-06, | |
| "step": 42150 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 4.394460064563157, | |
| "grad_norm": 0.003537252312526107, | |
| "learning_rate": 2.691782565431699e-06, | |
| "step": 42200 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 4.399666770800791, | |
| "grad_norm": 0.005448461975902319, | |
| "learning_rate": 2.6686413810658833e-06, | |
| "step": 42250 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.404873477038425, | |
| "grad_norm": 0.004431063774973154, | |
| "learning_rate": 2.645500196700067e-06, | |
| "step": 42300 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.41008018327606, | |
| "grad_norm": 0.08171793073415756, | |
| "learning_rate": 2.622359012334251e-06, | |
| "step": 42350 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.415286889513694, | |
| "grad_norm": 3.030642509460449, | |
| "learning_rate": 2.5992178279684356e-06, | |
| "step": 42400 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.420493595751328, | |
| "grad_norm": 0.00229825172573328, | |
| "learning_rate": 2.5760766436026198e-06, | |
| "step": 42450 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.425700301988962, | |
| "grad_norm": 0.0022334696259349585, | |
| "learning_rate": 2.552935459236804e-06, | |
| "step": 42500 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.430907008226596, | |
| "grad_norm": 0.006273935548961163, | |
| "learning_rate": 2.529794274870988e-06, | |
| "step": 42550 | |
| }, | |
| { | |
| "embedding_loss": 0.0011, | |
| "epoch": 4.43611371446423, | |
| "grad_norm": 0.002443622797727585, | |
| "learning_rate": 2.506653090505172e-06, | |
| "step": 42600 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 4.441320420701864, | |
| "grad_norm": 0.007955342531204224, | |
| "learning_rate": 2.4835119061393566e-06, | |
| "step": 42650 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.446527126939498, | |
| "grad_norm": 0.005112164653837681, | |
| "learning_rate": 2.4603707217735407e-06, | |
| "step": 42700 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.451733833177133, | |
| "grad_norm": 0.007230122108012438, | |
| "learning_rate": 2.437229537407725e-06, | |
| "step": 42750 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 4.4569405394147665, | |
| "grad_norm": 0.0016728178597986698, | |
| "learning_rate": 2.414088353041909e-06, | |
| "step": 42800 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.4621472456524005, | |
| "grad_norm": 0.003500057151541114, | |
| "learning_rate": 2.390947168676093e-06, | |
| "step": 42850 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.4673539518900345, | |
| "grad_norm": 0.0011205794289708138, | |
| "learning_rate": 2.367805984310277e-06, | |
| "step": 42900 | |
| }, | |
| { | |
| "embedding_loss": 0.0017, | |
| "epoch": 4.4725606581276685, | |
| "grad_norm": 0.0030274472665041685, | |
| "learning_rate": 2.3446647999444613e-06, | |
| "step": 42950 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.477767364365302, | |
| "grad_norm": 0.0006662325467914343, | |
| "learning_rate": 2.3215236155786454e-06, | |
| "step": 43000 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.482974070602936, | |
| "grad_norm": 0.003703465685248375, | |
| "learning_rate": 2.2983824312128295e-06, | |
| "step": 43050 | |
| }, | |
| { | |
| "embedding_loss": 0.0012, | |
| "epoch": 4.48818077684057, | |
| "grad_norm": 0.0016818788135424256, | |
| "learning_rate": 2.275241246847014e-06, | |
| "step": 43100 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.493387483078204, | |
| "grad_norm": 0.020755505189299583, | |
| "learning_rate": 2.2521000624811977e-06, | |
| "step": 43150 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.498594189315839, | |
| "grad_norm": 0.0026131754275411367, | |
| "learning_rate": 2.228958878115382e-06, | |
| "step": 43200 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.503800895553473, | |
| "grad_norm": 0.010769600979983807, | |
| "learning_rate": 2.2058176937495664e-06, | |
| "step": 43250 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.509007601791107, | |
| "grad_norm": 0.0036240960471332073, | |
| "learning_rate": 2.1826765093837505e-06, | |
| "step": 43300 | |
| }, | |
| { | |
| "embedding_loss": 0.0026, | |
| "epoch": 4.514214308028741, | |
| "grad_norm": 0.0036824876442551613, | |
| "learning_rate": 2.1595353250179346e-06, | |
| "step": 43350 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.519421014266375, | |
| "grad_norm": 0.005997397005558014, | |
| "learning_rate": 2.1363941406521187e-06, | |
| "step": 43400 | |
| }, | |
| { | |
| "embedding_loss": 0.0024, | |
| "epoch": 4.524627720504009, | |
| "grad_norm": 0.0047904313541948795, | |
| "learning_rate": 2.113252956286303e-06, | |
| "step": 43450 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.529834426741643, | |
| "grad_norm": 0.01296373549848795, | |
| "learning_rate": 2.0901117719204874e-06, | |
| "step": 43500 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.535041132979277, | |
| "grad_norm": 0.0019739430863410234, | |
| "learning_rate": 2.066970587554671e-06, | |
| "step": 43550 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.540247839216911, | |
| "grad_norm": 0.003413254162296653, | |
| "learning_rate": 2.043829403188855e-06, | |
| "step": 43600 | |
| }, | |
| { | |
| "embedding_loss": 0.0015, | |
| "epoch": 4.545454545454545, | |
| "grad_norm": 0.005623187869787216, | |
| "learning_rate": 2.0206882188230397e-06, | |
| "step": 43650 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.55066125169218, | |
| "grad_norm": 0.002288981107994914, | |
| "learning_rate": 1.997547034457224e-06, | |
| "step": 43700 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.555867957929814, | |
| "grad_norm": 0.0009039235883392394, | |
| "learning_rate": 1.974405850091408e-06, | |
| "step": 43750 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 4.561074664167448, | |
| "grad_norm": 0.004320364445447922, | |
| "learning_rate": 1.951264665725592e-06, | |
| "step": 43800 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.566281370405082, | |
| "grad_norm": 0.0032452233135700226, | |
| "learning_rate": 1.928123481359776e-06, | |
| "step": 43850 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.571488076642716, | |
| "grad_norm": 0.0020324711222201586, | |
| "learning_rate": 1.9049822969939603e-06, | |
| "step": 43900 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.57669478288035, | |
| "grad_norm": 0.0033108368515968323, | |
| "learning_rate": 1.8818411126281444e-06, | |
| "step": 43950 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.581901489117984, | |
| "grad_norm": 0.002229843521490693, | |
| "learning_rate": 1.8586999282623287e-06, | |
| "step": 44000 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.5871081953556185, | |
| "grad_norm": 0.0032805639784783125, | |
| "learning_rate": 1.8355587438965128e-06, | |
| "step": 44050 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.592314901593252, | |
| "grad_norm": 0.001333653461188078, | |
| "learning_rate": 1.812417559530697e-06, | |
| "step": 44100 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.5975216078308865, | |
| "grad_norm": 0.3373894691467285, | |
| "learning_rate": 1.7892763751648812e-06, | |
| "step": 44150 | |
| }, | |
| { | |
| "embedding_loss": 0.0016, | |
| "epoch": 4.6027283140685205, | |
| "grad_norm": 0.0035874065943062305, | |
| "learning_rate": 1.7661351907990653e-06, | |
| "step": 44200 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.607935020306154, | |
| "grad_norm": 0.001685873605310917, | |
| "learning_rate": 1.7429940064332492e-06, | |
| "step": 44250 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.613141726543788, | |
| "grad_norm": 0.0019293057266622782, | |
| "learning_rate": 1.7198528220674336e-06, | |
| "step": 44300 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.618348432781422, | |
| "grad_norm": 0.017738085240125656, | |
| "learning_rate": 1.6967116377016177e-06, | |
| "step": 44350 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.623555139019056, | |
| "grad_norm": 0.00891903880983591, | |
| "learning_rate": 1.6735704533358018e-06, | |
| "step": 44400 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.62876184525669, | |
| "grad_norm": 0.001540567958727479, | |
| "learning_rate": 1.6504292689699861e-06, | |
| "step": 44450 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.633968551494325, | |
| "grad_norm": 0.03680149465799332, | |
| "learning_rate": 1.6272880846041702e-06, | |
| "step": 44500 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.639175257731958, | |
| "grad_norm": 0.0019971744623035192, | |
| "learning_rate": 1.6041469002383545e-06, | |
| "step": 44550 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.644381963969593, | |
| "grad_norm": 0.004468118771910667, | |
| "learning_rate": 1.5810057158725384e-06, | |
| "step": 44600 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.649588670207227, | |
| "grad_norm": 0.028531698510050774, | |
| "learning_rate": 1.5578645315067226e-06, | |
| "step": 44650 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.654795376444861, | |
| "grad_norm": 0.003770474810153246, | |
| "learning_rate": 1.5347233471409067e-06, | |
| "step": 44700 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.660002082682495, | |
| "grad_norm": 0.0017020407831296325, | |
| "learning_rate": 1.511582162775091e-06, | |
| "step": 44750 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.665208788920129, | |
| "grad_norm": 0.0020539036486297846, | |
| "learning_rate": 1.488440978409275e-06, | |
| "step": 44800 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.670415495157763, | |
| "grad_norm": 0.0035052604507654905, | |
| "learning_rate": 1.4652997940434594e-06, | |
| "step": 44850 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.675622201395397, | |
| "grad_norm": 0.003664996474981308, | |
| "learning_rate": 1.4421586096776435e-06, | |
| "step": 44900 | |
| }, | |
| { | |
| "embedding_loss": 0.0, | |
| "epoch": 4.680828907633032, | |
| "grad_norm": 0.002150058513507247, | |
| "learning_rate": 1.4190174253118274e-06, | |
| "step": 44950 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.686035613870665, | |
| "grad_norm": 0.0027224977966398, | |
| "learning_rate": 1.3958762409460115e-06, | |
| "step": 45000 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.6912423201083, | |
| "grad_norm": 0.002212725579738617, | |
| "learning_rate": 1.3727350565801959e-06, | |
| "step": 45050 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.696449026345934, | |
| "grad_norm": 0.0020792309660464525, | |
| "learning_rate": 1.34959387221438e-06, | |
| "step": 45100 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.701655732583568, | |
| "grad_norm": 0.0052679735235869884, | |
| "learning_rate": 1.3264526878485643e-06, | |
| "step": 45150 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.706862438821202, | |
| "grad_norm": 0.0041116694919764996, | |
| "learning_rate": 1.3033115034827484e-06, | |
| "step": 45200 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.712069145058836, | |
| "grad_norm": 0.003463329281657934, | |
| "learning_rate": 1.2801703191169325e-06, | |
| "step": 45250 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.71727585129647, | |
| "grad_norm": 0.0025421089958399534, | |
| "learning_rate": 1.2570291347511166e-06, | |
| "step": 45300 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.722482557534104, | |
| "grad_norm": 0.0016696372767910361, | |
| "learning_rate": 1.233887950385301e-06, | |
| "step": 45350 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.7276892637717385, | |
| "grad_norm": 0.005751196760684252, | |
| "learning_rate": 1.2107467660194849e-06, | |
| "step": 45400 | |
| }, | |
| { | |
| "embedding_loss": 0.0019, | |
| "epoch": 4.7328959700093725, | |
| "grad_norm": 0.013020163401961327, | |
| "learning_rate": 1.1876055816536692e-06, | |
| "step": 45450 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.738102676247006, | |
| "grad_norm": 0.004354926757514477, | |
| "learning_rate": 1.1644643972878533e-06, | |
| "step": 45500 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.74330938248464, | |
| "grad_norm": 0.003605367848649621, | |
| "learning_rate": 1.1413232129220374e-06, | |
| "step": 45550 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.748516088722274, | |
| "grad_norm": 0.0030561047606170177, | |
| "learning_rate": 1.1181820285562215e-06, | |
| "step": 45600 | |
| }, | |
| { | |
| "embedding_loss": 0.0, | |
| "epoch": 4.753722794959908, | |
| "grad_norm": 0.007909784093499184, | |
| "learning_rate": 1.0950408441904058e-06, | |
| "step": 45650 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.758929501197542, | |
| "grad_norm": 0.002514626132324338, | |
| "learning_rate": 1.07189965982459e-06, | |
| "step": 45700 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.764136207435176, | |
| "grad_norm": 0.0016800053417682648, | |
| "learning_rate": 1.048758475458774e-06, | |
| "step": 45750 | |
| }, | |
| { | |
| "embedding_loss": 0.0, | |
| "epoch": 4.76934291367281, | |
| "grad_norm": 0.004000342451035976, | |
| "learning_rate": 1.0256172910929582e-06, | |
| "step": 45800 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.774549619910445, | |
| "grad_norm": 0.001277065253816545, | |
| "learning_rate": 1.0024761067271425e-06, | |
| "step": 45850 | |
| }, | |
| { | |
| "embedding_loss": 0.0, | |
| "epoch": 4.779756326148079, | |
| "grad_norm": 0.004461308475583792, | |
| "learning_rate": 9.793349223613266e-07, | |
| "step": 45900 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.784963032385713, | |
| "grad_norm": 0.0651107132434845, | |
| "learning_rate": 9.561937379955107e-07, | |
| "step": 45950 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.790169738623347, | |
| "grad_norm": 0.0018568108789622784, | |
| "learning_rate": 9.330525536296948e-07, | |
| "step": 46000 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.795376444860981, | |
| "grad_norm": 0.004890389274805784, | |
| "learning_rate": 9.09911369263879e-07, | |
| "step": 46050 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.800583151098615, | |
| "grad_norm": 0.002689856104552746, | |
| "learning_rate": 8.867701848980631e-07, | |
| "step": 46100 | |
| }, | |
| { | |
| "embedding_loss": 0.0, | |
| "epoch": 4.805789857336249, | |
| "grad_norm": 0.003079883521422744, | |
| "learning_rate": 8.636290005322473e-07, | |
| "step": 46150 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.810996563573883, | |
| "grad_norm": 0.0018577250884845853, | |
| "learning_rate": 8.404878161664315e-07, | |
| "step": 46200 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.816203269811517, | |
| "grad_norm": 0.004618423525243998, | |
| "learning_rate": 8.173466318006157e-07, | |
| "step": 46250 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.821409976049152, | |
| "grad_norm": 0.004892790224403143, | |
| "learning_rate": 7.942054474347997e-07, | |
| "step": 46300 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.826616682286786, | |
| "grad_norm": 0.003912623040378094, | |
| "learning_rate": 7.710642630689839e-07, | |
| "step": 46350 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.83182338852442, | |
| "grad_norm": 0.007159634493291378, | |
| "learning_rate": 7.479230787031681e-07, | |
| "step": 46400 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.837030094762054, | |
| "grad_norm": 0.0023596896789968014, | |
| "learning_rate": 7.247818943373522e-07, | |
| "step": 46450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.842236800999688, | |
| "grad_norm": 0.010279769077897072, | |
| "learning_rate": 7.016407099715364e-07, | |
| "step": 46500 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.847443507237322, | |
| "grad_norm": 0.008691814728081226, | |
| "learning_rate": 6.784995256057206e-07, | |
| "step": 46550 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.852650213474956, | |
| "grad_norm": 0.003329548519104719, | |
| "learning_rate": 6.553583412399048e-07, | |
| "step": 46600 | |
| }, | |
| { | |
| "embedding_loss": 0.0009, | |
| "epoch": 4.85785691971259, | |
| "grad_norm": 0.004583888687193394, | |
| "learning_rate": 6.322171568740888e-07, | |
| "step": 46650 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.863063625950224, | |
| "grad_norm": 0.02980988658964634, | |
| "learning_rate": 6.09075972508273e-07, | |
| "step": 46700 | |
| }, | |
| { | |
| "embedding_loss": 0.0014, | |
| "epoch": 4.868270332187858, | |
| "grad_norm": 0.007974829524755478, | |
| "learning_rate": 5.859347881424571e-07, | |
| "step": 46750 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.873477038425492, | |
| "grad_norm": 0.0035474197939038277, | |
| "learning_rate": 5.627936037766414e-07, | |
| "step": 46800 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.878683744663126, | |
| "grad_norm": 0.010695052333176136, | |
| "learning_rate": 5.396524194108255e-07, | |
| "step": 46850 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.88389045090076, | |
| "grad_norm": 0.002433580346405506, | |
| "learning_rate": 5.165112350450097e-07, | |
| "step": 46900 | |
| }, | |
| { | |
| "embedding_loss": 0.0, | |
| "epoch": 4.889097157138394, | |
| "grad_norm": 0.003585429862141609, | |
| "learning_rate": 4.933700506791938e-07, | |
| "step": 46950 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.894303863376028, | |
| "grad_norm": 0.31530049443244934, | |
| "learning_rate": 4.70228866313378e-07, | |
| "step": 47000 | |
| }, | |
| { | |
| "embedding_loss": 0.001, | |
| "epoch": 4.899510569613662, | |
| "grad_norm": 0.0049338992685079575, | |
| "learning_rate": 4.470876819475621e-07, | |
| "step": 47050 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.904717275851296, | |
| "grad_norm": 0.00397633807733655, | |
| "learning_rate": 4.239464975817463e-07, | |
| "step": 47100 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.90992398208893, | |
| "grad_norm": 0.0035032695159316063, | |
| "learning_rate": 4.0080531321593045e-07, | |
| "step": 47150 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.915130688326565, | |
| "grad_norm": 0.0034136222675442696, | |
| "learning_rate": 3.7766412885011456e-07, | |
| "step": 47200 | |
| }, | |
| { | |
| "embedding_loss": 0.0002, | |
| "epoch": 4.920337394564199, | |
| "grad_norm": 0.20558778941631317, | |
| "learning_rate": 3.545229444842987e-07, | |
| "step": 47250 | |
| }, | |
| { | |
| "embedding_loss": 0.0018, | |
| "epoch": 4.925544100801833, | |
| "grad_norm": 0.010641155764460564, | |
| "learning_rate": 3.313817601184829e-07, | |
| "step": 47300 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.930750807039467, | |
| "grad_norm": 0.0028619503136724234, | |
| "learning_rate": 3.0824057575266705e-07, | |
| "step": 47350 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.935957513277101, | |
| "grad_norm": 0.001628124387934804, | |
| "learning_rate": 2.850993913868512e-07, | |
| "step": 47400 | |
| }, | |
| { | |
| "embedding_loss": 0.0005, | |
| "epoch": 4.941164219514735, | |
| "grad_norm": 0.0017882351530715823, | |
| "learning_rate": 2.6195820702103533e-07, | |
| "step": 47450 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.946370925752369, | |
| "grad_norm": 0.00220202817581594, | |
| "learning_rate": 2.388170226552195e-07, | |
| "step": 47500 | |
| }, | |
| { | |
| "embedding_loss": 0.0013, | |
| "epoch": 4.951577631990003, | |
| "grad_norm": 0.001214580493979156, | |
| "learning_rate": 2.1567583828940368e-07, | |
| "step": 47550 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.956784338227637, | |
| "grad_norm": 0.0018583645578473806, | |
| "learning_rate": 1.9253465392358785e-07, | |
| "step": 47600 | |
| }, | |
| { | |
| "embedding_loss": 0.0004, | |
| "epoch": 4.961991044465272, | |
| "grad_norm": 0.005768468137830496, | |
| "learning_rate": 1.6939346955777198e-07, | |
| "step": 47650 | |
| }, | |
| { | |
| "embedding_loss": 0.0003, | |
| "epoch": 4.967197750702906, | |
| "grad_norm": 0.0034024049527943134, | |
| "learning_rate": 1.4625228519195615e-07, | |
| "step": 47700 | |
| }, | |
| { | |
| "embedding_loss": 0.0022, | |
| "epoch": 4.97240445694054, | |
| "grad_norm": 0.001480701263062656, | |
| "learning_rate": 1.2311110082614029e-07, | |
| "step": 47750 | |
| }, | |
| { | |
| "embedding_loss": 0.0008, | |
| "epoch": 4.977611163178174, | |
| "grad_norm": 0.0032351568806916475, | |
| "learning_rate": 9.996991646032445e-08, | |
| "step": 47800 | |
| }, | |
| { | |
| "embedding_loss": 0.0006, | |
| "epoch": 4.982817869415808, | |
| "grad_norm": 0.03766478970646858, | |
| "learning_rate": 7.68287320945086e-08, | |
| "step": 47850 | |
| }, | |
| { | |
| "embedding_loss": 0.0, | |
| "epoch": 4.988024575653442, | |
| "grad_norm": 0.0029770000837743282, | |
| "learning_rate": 5.368754772869276e-08, | |
| "step": 47900 | |
| }, | |
| { | |
| "embedding_loss": 0.0007, | |
| "epoch": 4.993231281891076, | |
| "grad_norm": 0.001190900569781661, | |
| "learning_rate": 3.0546363362876916e-08, | |
| "step": 47950 | |
| }, | |
| { | |
| "embedding_loss": 0.0001, | |
| "epoch": 4.9984379881287095, | |
| "grad_norm": 0.019358443096280098, | |
| "learning_rate": 7.40517899706107e-09, | |
| "step": 48000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 48015, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |