| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4413, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.022660321776569226, | |
| "grad_norm": 0.09582193195819855, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.4053, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04532064355313845, | |
| "grad_norm": 0.1730571836233139, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.3825, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06798096532970768, | |
| "grad_norm": 0.3355884253978729, | |
| "learning_rate": 2e-05, | |
| "loss": 2.356, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0906412871062769, | |
| "grad_norm": 0.37008875608444214, | |
| "learning_rate": 1.9970843111690533e-05, | |
| "loss": 2.2935, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11330160888284614, | |
| "grad_norm": 0.5142782330513, | |
| "learning_rate": 1.9883542471589315e-05, | |
| "loss": 2.2561, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13596193065941536, | |
| "grad_norm": 0.42936116456985474, | |
| "learning_rate": 1.9738607162698895e-05, | |
| "loss": 2.2424, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1586222524359846, | |
| "grad_norm": 0.537521243095398, | |
| "learning_rate": 1.9536882357541958e-05, | |
| "loss": 2.242, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1812825742125538, | |
| "grad_norm": 0.602051854133606, | |
| "learning_rate": 1.927954438964115e-05, | |
| "loss": 2.214, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.20394289598912305, | |
| "grad_norm": 0.5285528898239136, | |
| "learning_rate": 1.8968093893874042e-05, | |
| "loss": 2.1943, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.22660321776569228, | |
| "grad_norm": 0.6450159549713135, | |
| "learning_rate": 1.8604347055704433e-05, | |
| "loss": 2.1566, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2492635395422615, | |
| "grad_norm": 0.6594407558441162, | |
| "learning_rate": 1.8190425020319016e-05, | |
| "loss": 2.1578, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.27192386131883073, | |
| "grad_norm": 0.6740846633911133, | |
| "learning_rate": 1.7728741523428696e-05, | |
| "loss": 2.1578, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.29458418309539997, | |
| "grad_norm": 0.6054636240005493, | |
| "learning_rate": 1.722198881586411e-05, | |
| "loss": 2.1301, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3172445048719692, | |
| "grad_norm": 0.5829110145568848, | |
| "learning_rate": 1.667312196404425e-05, | |
| "loss": 2.1366, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3399048266485384, | |
| "grad_norm": 0.6636696457862854, | |
| "learning_rate": 1.6085341617868172e-05, | |
| "loss": 2.1301, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3625651484251076, | |
| "grad_norm": 0.8352382779121399, | |
| "learning_rate": 1.546207534651667e-05, | |
| "loss": 2.1157, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.38522547020167686, | |
| "grad_norm": 0.6855395436286926, | |
| "learning_rate": 1.4806957651001911e-05, | |
| "loss": 2.1084, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4078857919782461, | |
| "grad_norm": 0.8896074891090393, | |
| "learning_rate": 1.4123808770019433e-05, | |
| "loss": 2.1494, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.43054611375481533, | |
| "grad_norm": 0.7051901817321777, | |
| "learning_rate": 1.3416612402693543e-05, | |
| "loss": 2.1406, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.45320643553138457, | |
| "grad_norm": 0.9859122633934021, | |
| "learning_rate": 1.2689492478123242e-05, | |
| "loss": 2.1142, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47586675730795375, | |
| "grad_norm": 0.9501364827156067, | |
| "learning_rate": 1.1946689107194183e-05, | |
| "loss": 2.091, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.498527079084523, | |
| "grad_norm": 1.0703001022338867, | |
| "learning_rate": 1.119253385689078e-05, | |
| "loss": 2.0765, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5211874008610923, | |
| "grad_norm": 0.669400691986084, | |
| "learning_rate": 1.0431424491293254e-05, | |
| "loss": 2.0824, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5438477226376615, | |
| "grad_norm": 0.7835758924484253, | |
| "learning_rate": 9.667799326554403e-06, | |
| "loss": 2.0818, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5665080444142306, | |
| "grad_norm": 0.8207575082778931, | |
| "learning_rate": 8.906111349401949e-06, | |
| "loss": 2.1016, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5891683661907999, | |
| "grad_norm": 0.8124341368675232, | |
| "learning_rate": 8.150802250091193e-06, | |
| "loss": 2.0647, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6118286879673691, | |
| "grad_norm": 0.8744191527366638, | |
| "learning_rate": 7.406276521231679e-06, | |
| "loss": 2.0657, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.6344890097439384, | |
| "grad_norm": 1.0869206190109253, | |
| "learning_rate": 6.676875773527383e-06, | |
| "loss": 2.0547, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.6571493315205076, | |
| "grad_norm": 0.7237268686294556, | |
| "learning_rate": 5.966853418205035e-06, | |
| "loss": 2.124, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6798096532970768, | |
| "grad_norm": 0.9836551547050476, | |
| "learning_rate": 5.2803498637669055e-06, | |
| "loss": 2.0877, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7024699750736461, | |
| "grad_norm": 0.8831650614738464, | |
| "learning_rate": 4.621368371705162e-06, | |
| "loss": 2.0978, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7251302968502152, | |
| "grad_norm": 0.8482229709625244, | |
| "learning_rate": 3.993751711972204e-06, | |
| "loss": 2.075, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7477906186267845, | |
| "grad_norm": 0.8325951099395752, | |
| "learning_rate": 3.401159754337836e-06, | |
| "loss": 2.1016, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.7704509404033537, | |
| "grad_norm": 1.3220783472061157, | |
| "learning_rate": 2.8470481263064255e-06, | |
| "loss": 2.1096, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.793111262179923, | |
| "grad_norm": 0.8809642195701599, | |
| "learning_rate": 2.3346480620478685e-06, | |
| "loss": 2.079, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8157715839564922, | |
| "grad_norm": 0.9344497919082642, | |
| "learning_rate": 1.866947559850839e-06, | |
| "loss": 2.1025, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.8384319057330614, | |
| "grad_norm": 0.9643566012382507, | |
| "learning_rate": 1.446673957976298e-06, | |
| "loss": 2.1116, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.8610922275096307, | |
| "grad_norm": 1.0109236240386963, | |
| "learning_rate": 1.0762780305181064e-06, | |
| "loss": 2.0662, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.8837525492861998, | |
| "grad_norm": 0.9456806182861328, | |
| "learning_rate": 7.579196960136958e-07, | |
| "loss": 2.0815, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9064128710627691, | |
| "grad_norm": 1.229778528213501, | |
| "learning_rate": 4.934554221433741e-07, | |
| "loss": 2.0636, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9290731928393383, | |
| "grad_norm": 0.8360131978988647, | |
| "learning_rate": 2.8442739996615956e-07, | |
| "loss": 2.0465, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.9517335146159075, | |
| "grad_norm": 0.6662079691886902, | |
| "learning_rate": 1.3205455082128228e-07, | |
| "loss": 2.0419, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.9743938363924768, | |
| "grad_norm": 0.8761087656021118, | |
| "learning_rate": 3.7225418337528685e-08, | |
| "loss": 2.0664, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.997054158169046, | |
| "grad_norm": 1.0942589044570923, | |
| "learning_rate": 4.929869997571945e-10, | |
| "loss": 2.0249, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 4413, | |
| "total_flos": 8.01984399409152e+16, | |
| "train_loss": 2.1353629073560736, | |
| "train_runtime": 1379.8897, | |
| "train_samples_per_second": 6.396, | |
| "train_steps_per_second": 3.198 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 4413, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.01984399409152e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |