| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 1186, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03372681281618887, | |
| "grad_norm": 5.941967487335205, | |
| "learning_rate": 1.0453299076426935e-05, | |
| "loss": 0.2448, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06745362563237774, | |
| "grad_norm": 9.986567497253418, | |
| "learning_rate": 2.1456771788455288e-05, | |
| "loss": 0.1918, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10118043844856661, | |
| "grad_norm": 3.148054599761963, | |
| "learning_rate": 2.1082634529720233e-05, | |
| "loss": 0.14, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13490725126475547, | |
| "grad_norm": 8.24326229095459, | |
| "learning_rate": 2.070849727098518e-05, | |
| "loss": 0.186, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16863406408094436, | |
| "grad_norm": 2.4032340049743652, | |
| "learning_rate": 2.033436001225013e-05, | |
| "loss": 0.1407, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20236087689713322, | |
| "grad_norm": 3.855356216430664, | |
| "learning_rate": 1.9960222753515078e-05, | |
| "loss": 0.1672, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23608768971332209, | |
| "grad_norm": 5.036954879760742, | |
| "learning_rate": 1.9586085494780023e-05, | |
| "loss": 0.1832, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26981450252951095, | |
| "grad_norm": 1.7486367225646973, | |
| "learning_rate": 1.921194823604497e-05, | |
| "loss": 0.116, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.30354131534569984, | |
| "grad_norm": 2.300610303878784, | |
| "learning_rate": 1.8837810977309916e-05, | |
| "loss": 0.1341, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3372681281618887, | |
| "grad_norm": 7.484567642211914, | |
| "learning_rate": 1.846367371857486e-05, | |
| "loss": 0.2118, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37099494097807756, | |
| "grad_norm": 3.249177932739258, | |
| "learning_rate": 1.808953645983981e-05, | |
| "loss": 0.1274, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.40472175379426645, | |
| "grad_norm": 4.78777551651001, | |
| "learning_rate": 1.7715399201104758e-05, | |
| "loss": 0.1993, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.43844856661045534, | |
| "grad_norm": 3.9657957553863525, | |
| "learning_rate": 1.7341261942369707e-05, | |
| "loss": 0.1561, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.47217537942664417, | |
| "grad_norm": 5.606136798858643, | |
| "learning_rate": 1.6967124683634652e-05, | |
| "loss": 0.1517, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5059021922428331, | |
| "grad_norm": 5.850975036621094, | |
| "learning_rate": 1.65929874248996e-05, | |
| "loss": 0.1635, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5396290050590219, | |
| "grad_norm": 6.081123352050781, | |
| "learning_rate": 1.6218850166164545e-05, | |
| "loss": 0.1646, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5733558178752108, | |
| "grad_norm": 2.2266976833343506, | |
| "learning_rate": 1.5844712907429494e-05, | |
| "loss": 0.1337, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6070826306913997, | |
| "grad_norm": 3.113185167312622, | |
| "learning_rate": 1.5470575648694442e-05, | |
| "loss": 0.1406, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6408094435075885, | |
| "grad_norm": 3.9175527095794678, | |
| "learning_rate": 1.5096438389959387e-05, | |
| "loss": 0.1114, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6745362563237775, | |
| "grad_norm": 7.268213272094727, | |
| "learning_rate": 1.4741007994161085e-05, | |
| "loss": 0.1314, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7082630691399663, | |
| "grad_norm": 0.12521882355213165, | |
| "learning_rate": 1.4366870735426034e-05, | |
| "loss": 0.1481, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7419898819561551, | |
| "grad_norm": 0.6655579805374146, | |
| "learning_rate": 1.399273347669098e-05, | |
| "loss": 0.1932, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7757166947723441, | |
| "grad_norm": 1.8391481637954712, | |
| "learning_rate": 1.3618596217955929e-05, | |
| "loss": 0.1568, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8094435075885329, | |
| "grad_norm": 2.549755573272705, | |
| "learning_rate": 1.3244458959220876e-05, | |
| "loss": 0.1319, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8431703204047217, | |
| "grad_norm": 5.393273830413818, | |
| "learning_rate": 1.287032170048582e-05, | |
| "loss": 0.1536, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8768971332209107, | |
| "grad_norm": 6.554528713226318, | |
| "learning_rate": 1.2496184441750769e-05, | |
| "loss": 0.1462, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9106239460370995, | |
| "grad_norm": 1.6136741638183594, | |
| "learning_rate": 1.2140754045952469e-05, | |
| "loss": 0.1336, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9443507588532883, | |
| "grad_norm": 5.212509632110596, | |
| "learning_rate": 1.1766616787217416e-05, | |
| "loss": 0.1453, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9780775716694773, | |
| "grad_norm": 3.5280606746673584, | |
| "learning_rate": 1.1392479528482364e-05, | |
| "loss": 0.2005, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.0118043844856661, | |
| "grad_norm": 1.6883734464645386, | |
| "learning_rate": 1.101834226974731e-05, | |
| "loss": 0.1265, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.045531197301855, | |
| "grad_norm": 3.4844117164611816, | |
| "learning_rate": 1.0644205011012256e-05, | |
| "loss": 0.0702, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.0792580101180438, | |
| "grad_norm": 3.2391409873962402, | |
| "learning_rate": 1.0270067752277204e-05, | |
| "loss": 0.0739, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.1129848229342327, | |
| "grad_norm": 0.9840973019599915, | |
| "learning_rate": 9.895930493542151e-06, | |
| "loss": 0.049, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.1467116357504217, | |
| "grad_norm": 1.558282494544983, | |
| "learning_rate": 9.521793234807098e-06, | |
| "loss": 0.0613, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.1804384485666104, | |
| "grad_norm": 4.577453136444092, | |
| "learning_rate": 9.147655976072046e-06, | |
| "loss": 0.0663, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2141652613827993, | |
| "grad_norm": 3.983466386795044, | |
| "learning_rate": 8.773518717336993e-06, | |
| "loss": 0.0726, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.2478920741989883, | |
| "grad_norm": 1.6742738485336304, | |
| "learning_rate": 8.399381458601938e-06, | |
| "loss": 0.0822, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.281618887015177, | |
| "grad_norm": 3.7029430866241455, | |
| "learning_rate": 8.025244199866886e-06, | |
| "loss": 0.0651, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.315345699831366, | |
| "grad_norm": 2.690622329711914, | |
| "learning_rate": 7.651106941131833e-06, | |
| "loss": 0.0603, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.3490725126475547, | |
| "grad_norm": 2.1681394577026367, | |
| "learning_rate": 7.27696968239678e-06, | |
| "loss": 0.0468, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.3827993254637436, | |
| "grad_norm": 1.7206655740737915, | |
| "learning_rate": 6.902832423661727e-06, | |
| "loss": 0.061, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.4165261382799326, | |
| "grad_norm": 9.609166145324707, | |
| "learning_rate": 6.528695164926675e-06, | |
| "loss": 0.0891, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.4502529510961213, | |
| "grad_norm": 7.554985046386719, | |
| "learning_rate": 6.154557906191622e-06, | |
| "loss": 0.0607, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.4839797639123102, | |
| "grad_norm": 5.7379679679870605, | |
| "learning_rate": 5.7804206474565675e-06, | |
| "loss": 0.0673, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.5177065767284992, | |
| "grad_norm": 0.8277637958526611, | |
| "learning_rate": 5.406283388721515e-06, | |
| "loss": 0.0728, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.551433389544688, | |
| "grad_norm": 8.901878356933594, | |
| "learning_rate": 5.032146129986462e-06, | |
| "loss": 0.065, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.5851602023608768, | |
| "grad_norm": 4.0218729972839355, | |
| "learning_rate": 4.658008871251409e-06, | |
| "loss": 0.0824, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.6188870151770658, | |
| "grad_norm": 3.5292980670928955, | |
| "learning_rate": 4.283871612516357e-06, | |
| "loss": 0.0695, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.6526138279932545, | |
| "grad_norm": 5.086415767669678, | |
| "learning_rate": 3.909734353781304e-06, | |
| "loss": 0.0626, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.6863406408094435, | |
| "grad_norm": 4.6788811683654785, | |
| "learning_rate": 3.5355970950462504e-06, | |
| "loss": 0.0525, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.7200674536256324, | |
| "grad_norm": 2.9604878425598145, | |
| "learning_rate": 3.1614598363111975e-06, | |
| "loss": 0.0482, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.7537942664418211, | |
| "grad_norm": 6.82230281829834, | |
| "learning_rate": 2.7873225775761446e-06, | |
| "loss": 0.0968, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.78752107925801, | |
| "grad_norm": 3.8208751678466797, | |
| "learning_rate": 2.4131853188410918e-06, | |
| "loss": 0.0717, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.821247892074199, | |
| "grad_norm": 1.3548328876495361, | |
| "learning_rate": 2.0390480601060385e-06, | |
| "loss": 0.0704, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.8549747048903877, | |
| "grad_norm": 5.133670806884766, | |
| "learning_rate": 1.6649108013709859e-06, | |
| "loss": 0.0666, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.8887015177065767, | |
| "grad_norm": 2.3805644512176514, | |
| "learning_rate": 1.3094804055726854e-06, | |
| "loss": 0.0841, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.9224283305227656, | |
| "grad_norm": 6.70649528503418, | |
| "learning_rate": 9.353431468376325e-07, | |
| "loss": 0.0682, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.9561551433389543, | |
| "grad_norm": 1.9125243425369263, | |
| "learning_rate": 5.612058881025794e-07, | |
| "loss": 0.0584, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.9898819561551433, | |
| "grad_norm": 3.1292877197265625, | |
| "learning_rate": 1.8706862936752648e-07, | |
| "loss": 0.0423, | |
| "step": 1180 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1186, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |