| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 8790, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.2074662004430449, |
| "eval_loss": 5.136288642883301, |
| "eval_runtime": 6.2541, |
| "eval_samples_per_second": 31.499, |
| "eval_steps_per_second": 1.119, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.1376564277588168, |
| "grad_norm": 12871.9697265625, |
| "learning_rate": 0.0005993999999999999, |
| "loss": 1.2933, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.23289657981264963, |
| "eval_loss": 4.844233989715576, |
| "eval_runtime": 5.189, |
| "eval_samples_per_second": 37.965, |
| "eval_steps_per_second": 1.349, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.2753128555176336, |
| "grad_norm": 17373.572265625, |
| "learning_rate": 0.0005230551989730423, |
| "loss": 1.1247, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.23807205936404185, |
| "eval_loss": 4.751773834228516, |
| "eval_runtime": 5.1988, |
| "eval_samples_per_second": 37.893, |
| "eval_steps_per_second": 1.346, |
| "step": 2637 |
| }, |
| { |
| "epoch": 3.4129692832764507, |
| "grad_norm": 20363.787109375, |
| "learning_rate": 0.0004460333761232349, |
| "loss": 1.0823, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.2448866063357406, |
| "eval_loss": 4.701653957366943, |
| "eval_runtime": 5.2667, |
| "eval_samples_per_second": 37.405, |
| "eval_steps_per_second": 1.329, |
| "step": 3516 |
| }, |
| { |
| "epoch": 4.550625711035267, |
| "grad_norm": 25301.0234375, |
| "learning_rate": 0.00036901155327342743, |
| "loss": 1.0676, |
| "step": 4000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.24713163201446353, |
| "eval_loss": 4.654669284820557, |
| "eval_runtime": 5.195, |
| "eval_samples_per_second": 37.921, |
| "eval_steps_per_second": 1.347, |
| "step": 4395 |
| }, |
| { |
| "epoch": 5.688282138794084, |
| "grad_norm": 24580.740234375, |
| "learning_rate": 0.00029198973042362, |
| "loss": 1.0504, |
| "step": 5000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.25679716292330157, |
| "eval_loss": 4.568830966949463, |
| "eval_runtime": 5.1829, |
| "eval_samples_per_second": 38.009, |
| "eval_steps_per_second": 1.351, |
| "step": 5274 |
| }, |
| { |
| "epoch": 6.825938566552901, |
| "grad_norm": 18673.5703125, |
| "learning_rate": 0.00021496790757381256, |
| "loss": 1.0252, |
| "step": 6000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.2680719600266224, |
| "eval_loss": 4.470284461975098, |
| "eval_runtime": 5.1877, |
| "eval_samples_per_second": 37.974, |
| "eval_steps_per_second": 1.349, |
| "step": 6153 |
| }, |
| { |
| "epoch": 7.963594994311718, |
| "grad_norm": 14578.203125, |
| "learning_rate": 0.0001379460847240051, |
| "loss": 0.9998, |
| "step": 7000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.27541299532120755, |
| "eval_loss": 4.384091377258301, |
| "eval_runtime": 5.2699, |
| "eval_samples_per_second": 37.382, |
| "eval_steps_per_second": 1.328, |
| "step": 7032 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.2796547031301221, |
| "eval_loss": 4.340909957885742, |
| "eval_runtime": 5.1886, |
| "eval_samples_per_second": 37.968, |
| "eval_steps_per_second": 1.349, |
| "step": 7911 |
| }, |
| { |
| "epoch": 9.101251422070535, |
| "grad_norm": 12082.904296875, |
| "learning_rate": 6.092426187419768e-05, |
| "loss": 0.9762, |
| "step": 8000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.2842540256489217, |
| "eval_loss": 4.3034586906433105, |
| "eval_runtime": 5.2131, |
| "eval_samples_per_second": 37.789, |
| "eval_steps_per_second": 1.343, |
| "step": 8790 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 8790, |
| "total_flos": 7.34753193984e+16, |
| "train_loss": 1.0670098383950157, |
| "train_runtime": 10225.3656, |
| "train_samples_per_second": 27.5, |
| "train_steps_per_second": 0.86 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 8790, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.34753193984e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|