| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.93968253968254, | |
| "eval_steps": 500, | |
| "global_step": 117, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12698412698412698, | |
| "grad_norm": 0.29794585704803467, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 3.8299, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.25396825396825395, | |
| "grad_norm": 0.4180847704410553, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 3.8377, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 0.602063775062561, | |
| "learning_rate": 4.989935734988098e-05, | |
| "loss": 3.7329, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.5079365079365079, | |
| "grad_norm": 0.7148785591125488, | |
| "learning_rate": 4.928725095732169e-05, | |
| "loss": 3.2317, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 0.695216953754425, | |
| "learning_rate": 4.813260751184992e-05, | |
| "loss": 2.8175, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 0.690677285194397, | |
| "learning_rate": 4.6461219840046654e-05, | |
| "loss": 2.4144, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.6858775615692139, | |
| "learning_rate": 4.431042398061499e-05, | |
| "loss": 2.0554, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6853828430175781, | |
| "learning_rate": 4.172826515897146e-05, | |
| "loss": 1.6682, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.126984126984127, | |
| "grad_norm": 0.7094295024871826, | |
| "learning_rate": 3.8772424536302564e-05, | |
| "loss": 1.4453, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.253968253968254, | |
| "grad_norm": 0.6357172727584839, | |
| "learning_rate": 3.550893070773914e-05, | |
| "loss": 1.4124, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.380952380952381, | |
| "grad_norm": 0.6284698843955994, | |
| "learning_rate": 3.201068473265007e-05, | |
| "loss": 1.2353, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.507936507936508, | |
| "grad_norm": 0.7772500514984131, | |
| "learning_rate": 2.8355831645441388e-05, | |
| "loss": 1.302, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.6349206349206349, | |
| "grad_norm": 0.9123637676239014, | |
| "learning_rate": 2.4626014824618415e-05, | |
| "loss": 1.1044, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.7619047619047619, | |
| "grad_norm": 0.7838051915168762, | |
| "learning_rate": 2.090455221462156e-05, | |
| "loss": 1.1902, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 0.7451229691505432, | |
| "learning_rate": 1.7274575140626318e-05, | |
| "loss": 1.1313, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.0073789358139038, | |
| "learning_rate": 1.3817171292109183e-05, | |
| "loss": 0.9442, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.126984126984127, | |
| "grad_norm": 1.04133939743042, | |
| "learning_rate": 1.0609573357858166e-05, | |
| "loss": 1.0857, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.253968253968254, | |
| "grad_norm": 0.6492704153060913, | |
| "learning_rate": 7.723433775328384e-06, | |
| "loss": 1.012, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.87400221824646, | |
| "learning_rate": 5.223224133591476e-06, | |
| "loss": 0.99, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.507936507936508, | |
| "grad_norm": 0.6877263784408569, | |
| "learning_rate": 3.164794984571759e-06, | |
| "loss": 1.1175, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.634920634920635, | |
| "grad_norm": 0.6045323610305786, | |
| "learning_rate": 1.59412823400657e-06, | |
| "loss": 1.0271, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 0.5473117232322693, | |
| "learning_rate": 5.463099816548579e-07, | |
| "loss": 0.9615, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 1.0096474885940552, | |
| "learning_rate": 4.474675580662113e-08, | |
| "loss": 1.0617, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.93968253968254, | |
| "step": 117, | |
| "total_flos": 9999220585005056.0, | |
| "train_loss": 1.7545859487647684, | |
| "train_runtime": 1382.7748, | |
| "train_samples_per_second": 2.734, | |
| "train_steps_per_second": 0.085 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 117, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 39, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9999220585005056.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |