| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 10350, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.966183574879227, |
| "grad_norm": 18068.357421875, |
| "learning_rate": 0.0005993999999999999, |
| "loss": 1.1382, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.31820882561564307, |
| "eval_loss": 4.372585296630859, |
| "eval_runtime": 671.6885, |
| "eval_samples_per_second": 50.702, |
| "eval_steps_per_second": 1.586, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.9323671497584543, |
| "grad_norm": 48934.1015625, |
| "learning_rate": 0.0005358930481283422, |
| "loss": 1.0291, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.31461109065441656, |
| "eval_loss": 4.359684944152832, |
| "eval_runtime": 666.115, |
| "eval_samples_per_second": 51.126, |
| "eval_steps_per_second": 1.599, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.898550724637681, |
| "grad_norm": 52002.11328125, |
| "learning_rate": 0.00047172192513368985, |
| "loss": 1.0267, |
| "step": 3000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.31789266625201634, |
| "eval_loss": 4.333441257476807, |
| "eval_runtime": 665.863, |
| "eval_samples_per_second": 51.146, |
| "eval_steps_per_second": 1.599, |
| "step": 3105 |
| }, |
| { |
| "epoch": 3.864734299516908, |
| "grad_norm": 55701.2734375, |
| "learning_rate": 0.0004075508021390374, |
| "loss": 1.0192, |
| "step": 4000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.3215952705041587, |
| "eval_loss": 4.2829365730285645, |
| "eval_runtime": 664.6956, |
| "eval_samples_per_second": 51.235, |
| "eval_steps_per_second": 1.602, |
| "step": 4140 |
| }, |
| { |
| "epoch": 4.830917874396135, |
| "grad_norm": 83004.7734375, |
| "learning_rate": 0.000343379679144385, |
| "loss": 1.0042, |
| "step": 5000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.32670145683844315, |
| "eval_loss": 4.2311296463012695, |
| "eval_runtime": 664.7858, |
| "eval_samples_per_second": 51.229, |
| "eval_steps_per_second": 1.602, |
| "step": 5175 |
| }, |
| { |
| "epoch": 5.797101449275362, |
| "grad_norm": 21215.708984375, |
| "learning_rate": 0.0002792085561497326, |
| "loss": 0.9873, |
| "step": 6000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.33200261385989327, |
| "eval_loss": 4.1559600830078125, |
| "eval_runtime": 666.0654, |
| "eval_samples_per_second": 51.13, |
| "eval_steps_per_second": 1.599, |
| "step": 6210 |
| }, |
| { |
| "epoch": 6.763285024154589, |
| "grad_norm": 18555.8984375, |
| "learning_rate": 0.0002150374331550802, |
| "loss": 0.9662, |
| "step": 7000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.34101654601813886, |
| "eval_loss": 4.062748908996582, |
| "eval_runtime": 665.7453, |
| "eval_samples_per_second": 51.155, |
| "eval_steps_per_second": 1.6, |
| "step": 7245 |
| }, |
| { |
| "epoch": 7.729468599033816, |
| "grad_norm": 10123.9931640625, |
| "learning_rate": 0.0001508663101604278, |
| "loss": 0.9422, |
| "step": 8000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.34926088123762544, |
| "eval_loss": 3.976839542388916, |
| "eval_runtime": 665.8149, |
| "eval_samples_per_second": 51.149, |
| "eval_steps_per_second": 1.6, |
| "step": 8280 |
| }, |
| { |
| "epoch": 8.695652173913043, |
| "grad_norm": 8124.27392578125, |
| "learning_rate": 8.66951871657754e-05, |
| "loss": 0.9207, |
| "step": 9000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.3541727864362461, |
| "eval_loss": 3.9173777103424072, |
| "eval_runtime": 665.3441, |
| "eval_samples_per_second": 51.186, |
| "eval_steps_per_second": 1.601, |
| "step": 9315 |
| }, |
| { |
| "epoch": 9.66183574879227, |
| "grad_norm": 7507.09521484375, |
| "learning_rate": 2.252406417112299e-05, |
| "loss": 0.9058, |
| "step": 10000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.3569124320159682, |
| "eval_loss": 3.889129400253296, |
| "eval_runtime": 665.1197, |
| "eval_samples_per_second": 51.203, |
| "eval_steps_per_second": 1.601, |
| "step": 10350 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 10350, |
| "total_flos": 8.6487662592e+16, |
| "train_loss": 0.990625182810613, |
| "train_runtime": 16693.5239, |
| "train_samples_per_second": 19.828, |
| "train_steps_per_second": 0.62 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 10350, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.6487662592e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|