{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 1186, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03372681281618887, "grad_norm": 5.941967487335205, "learning_rate": 1.0453299076426935e-05, "loss": 0.2448, "step": 20 }, { "epoch": 0.06745362563237774, "grad_norm": 9.986567497253418, "learning_rate": 2.1456771788455288e-05, "loss": 0.1918, "step": 40 }, { "epoch": 0.10118043844856661, "grad_norm": 3.148054599761963, "learning_rate": 2.1082634529720233e-05, "loss": 0.14, "step": 60 }, { "epoch": 0.13490725126475547, "grad_norm": 8.24326229095459, "learning_rate": 2.070849727098518e-05, "loss": 0.186, "step": 80 }, { "epoch": 0.16863406408094436, "grad_norm": 2.4032340049743652, "learning_rate": 2.033436001225013e-05, "loss": 0.1407, "step": 100 }, { "epoch": 0.20236087689713322, "grad_norm": 3.855356216430664, "learning_rate": 1.9960222753515078e-05, "loss": 0.1672, "step": 120 }, { "epoch": 0.23608768971332209, "grad_norm": 5.036954879760742, "learning_rate": 1.9586085494780023e-05, "loss": 0.1832, "step": 140 }, { "epoch": 0.26981450252951095, "grad_norm": 1.7486367225646973, "learning_rate": 1.921194823604497e-05, "loss": 0.116, "step": 160 }, { "epoch": 0.30354131534569984, "grad_norm": 2.300610303878784, "learning_rate": 1.8837810977309916e-05, "loss": 0.1341, "step": 180 }, { "epoch": 0.3372681281618887, "grad_norm": 7.484567642211914, "learning_rate": 1.846367371857486e-05, "loss": 0.2118, "step": 200 }, { "epoch": 0.37099494097807756, "grad_norm": 3.249177932739258, "learning_rate": 1.808953645983981e-05, "loss": 0.1274, "step": 220 }, { "epoch": 0.40472175379426645, "grad_norm": 4.78777551651001, "learning_rate": 1.7715399201104758e-05, "loss": 0.1993, "step": 240 }, { "epoch": 0.43844856661045534, "grad_norm": 3.9657957553863525, "learning_rate": 1.7341261942369707e-05, "loss": 0.1561, "step": 260 }, { "epoch": 0.47217537942664417, "grad_norm": 5.606136798858643, "learning_rate": 1.6967124683634652e-05, "loss": 0.1517, "step": 280 }, { "epoch": 0.5059021922428331, "grad_norm": 5.850975036621094, "learning_rate": 1.65929874248996e-05, "loss": 0.1635, "step": 300 }, { "epoch": 0.5396290050590219, "grad_norm": 6.081123352050781, "learning_rate": 1.6218850166164545e-05, "loss": 0.1646, "step": 320 }, { "epoch": 0.5733558178752108, "grad_norm": 2.2266976833343506, "learning_rate": 1.5844712907429494e-05, "loss": 0.1337, "step": 340 }, { "epoch": 0.6070826306913997, "grad_norm": 3.113185167312622, "learning_rate": 1.5470575648694442e-05, "loss": 0.1406, "step": 360 }, { "epoch": 0.6408094435075885, "grad_norm": 3.9175527095794678, "learning_rate": 1.5096438389959387e-05, "loss": 0.1114, "step": 380 }, { "epoch": 0.6745362563237775, "grad_norm": 7.268213272094727, "learning_rate": 1.4741007994161085e-05, "loss": 0.1314, "step": 400 }, { "epoch": 0.7082630691399663, "grad_norm": 0.12521882355213165, "learning_rate": 1.4366870735426034e-05, "loss": 0.1481, "step": 420 }, { "epoch": 0.7419898819561551, "grad_norm": 0.6655579805374146, "learning_rate": 1.399273347669098e-05, "loss": 0.1932, "step": 440 }, { "epoch": 0.7757166947723441, "grad_norm": 1.8391481637954712, "learning_rate": 1.3618596217955929e-05, "loss": 0.1568, "step": 460 }, { "epoch": 0.8094435075885329, "grad_norm": 2.549755573272705, "learning_rate": 1.3244458959220876e-05, "loss": 0.1319, "step": 480 }, { "epoch": 0.8431703204047217, "grad_norm": 5.393273830413818, "learning_rate": 1.287032170048582e-05, "loss": 0.1536, "step": 500 }, { "epoch": 0.8768971332209107, "grad_norm": 6.554528713226318, "learning_rate": 1.2496184441750769e-05, "loss": 0.1462, "step": 520 }, { "epoch": 0.9106239460370995, "grad_norm": 1.6136741638183594, "learning_rate": 1.2140754045952469e-05, "loss": 0.1336, "step": 540 }, { "epoch": 0.9443507588532883, "grad_norm": 5.212509632110596, "learning_rate": 1.1766616787217416e-05, "loss": 0.1453, "step": 560 }, { "epoch": 0.9780775716694773, "grad_norm": 3.5280606746673584, "learning_rate": 1.1392479528482364e-05, "loss": 0.2005, "step": 580 }, { "epoch": 1.0118043844856661, "grad_norm": 1.6883734464645386, "learning_rate": 1.101834226974731e-05, "loss": 0.1265, "step": 600 }, { "epoch": 1.045531197301855, "grad_norm": 3.4844117164611816, "learning_rate": 1.0644205011012256e-05, "loss": 0.0702, "step": 620 }, { "epoch": 1.0792580101180438, "grad_norm": 3.2391409873962402, "learning_rate": 1.0270067752277204e-05, "loss": 0.0739, "step": 640 }, { "epoch": 1.1129848229342327, "grad_norm": 0.9840973019599915, "learning_rate": 9.895930493542151e-06, "loss": 0.049, "step": 660 }, { "epoch": 1.1467116357504217, "grad_norm": 1.558282494544983, "learning_rate": 9.521793234807098e-06, "loss": 0.0613, "step": 680 }, { "epoch": 1.1804384485666104, "grad_norm": 4.577453136444092, "learning_rate": 9.147655976072046e-06, "loss": 0.0663, "step": 700 }, { "epoch": 1.2141652613827993, "grad_norm": 3.983466386795044, "learning_rate": 8.773518717336993e-06, "loss": 0.0726, "step": 720 }, { "epoch": 1.2478920741989883, "grad_norm": 1.6742738485336304, "learning_rate": 8.399381458601938e-06, "loss": 0.0822, "step": 740 }, { "epoch": 1.281618887015177, "grad_norm": 3.7029430866241455, "learning_rate": 8.025244199866886e-06, "loss": 0.0651, "step": 760 }, { "epoch": 1.315345699831366, "grad_norm": 2.690622329711914, "learning_rate": 7.651106941131833e-06, "loss": 0.0603, "step": 780 }, { "epoch": 1.3490725126475547, "grad_norm": 2.1681394577026367, "learning_rate": 7.27696968239678e-06, "loss": 0.0468, "step": 800 }, { "epoch": 1.3827993254637436, "grad_norm": 1.7206655740737915, "learning_rate": 6.902832423661727e-06, "loss": 0.061, "step": 820 }, { "epoch": 1.4165261382799326, "grad_norm": 9.609166145324707, "learning_rate": 6.528695164926675e-06, "loss": 0.0891, "step": 840 }, { "epoch": 1.4502529510961213, "grad_norm": 7.554985046386719, "learning_rate": 6.154557906191622e-06, "loss": 0.0607, "step": 860 }, { "epoch": 1.4839797639123102, "grad_norm": 5.7379679679870605, "learning_rate": 5.7804206474565675e-06, "loss": 0.0673, "step": 880 }, { "epoch": 1.5177065767284992, "grad_norm": 0.8277637958526611, "learning_rate": 5.406283388721515e-06, "loss": 0.0728, "step": 900 }, { "epoch": 1.551433389544688, "grad_norm": 8.901878356933594, "learning_rate": 5.032146129986462e-06, "loss": 0.065, "step": 920 }, { "epoch": 1.5851602023608768, "grad_norm": 4.0218729972839355, "learning_rate": 4.658008871251409e-06, "loss": 0.0824, "step": 940 }, { "epoch": 1.6188870151770658, "grad_norm": 3.5292980670928955, "learning_rate": 4.283871612516357e-06, "loss": 0.0695, "step": 960 }, { "epoch": 1.6526138279932545, "grad_norm": 5.086415767669678, "learning_rate": 3.909734353781304e-06, "loss": 0.0626, "step": 980 }, { "epoch": 1.6863406408094435, "grad_norm": 4.6788811683654785, "learning_rate": 3.5355970950462504e-06, "loss": 0.0525, "step": 1000 }, { "epoch": 1.7200674536256324, "grad_norm": 2.9604878425598145, "learning_rate": 3.1614598363111975e-06, "loss": 0.0482, "step": 1020 }, { "epoch": 1.7537942664418211, "grad_norm": 6.82230281829834, "learning_rate": 2.7873225775761446e-06, "loss": 0.0968, "step": 1040 }, { "epoch": 1.78752107925801, "grad_norm": 3.8208751678466797, "learning_rate": 2.4131853188410918e-06, "loss": 0.0717, "step": 1060 }, { "epoch": 1.821247892074199, "grad_norm": 1.3548328876495361, "learning_rate": 2.0390480601060385e-06, "loss": 0.0704, "step": 1080 }, { "epoch": 1.8549747048903877, "grad_norm": 5.133670806884766, "learning_rate": 1.6649108013709859e-06, "loss": 0.0666, "step": 1100 }, { "epoch": 1.8887015177065767, "grad_norm": 2.3805644512176514, "learning_rate": 1.3094804055726854e-06, "loss": 0.0841, "step": 1120 }, { "epoch": 1.9224283305227656, "grad_norm": 6.70649528503418, "learning_rate": 9.353431468376325e-07, "loss": 0.0682, "step": 1140 }, { "epoch": 1.9561551433389543, "grad_norm": 1.9125243425369263, "learning_rate": 5.612058881025794e-07, "loss": 0.0584, "step": 1160 }, { "epoch": 1.9898819561551433, "grad_norm": 3.1292877197265625, "learning_rate": 1.8706862936752648e-07, "loss": 0.0423, "step": 1180 } ], "logging_steps": 20, "max_steps": 1186, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }