| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 10200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 19500.830078125, | |
| "learning_rate": 0.0005993999999999999, | |
| "loss": 1.1428, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.3207814862667259, | |
| "eval_loss": 4.39427375793457, | |
| "eval_runtime": 724.8975, | |
| "eval_samples_per_second": 46.365, | |
| "eval_steps_per_second": 1.45, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.9607843137254903, | |
| "grad_norm": 34896.4453125, | |
| "learning_rate": 0.0005348478260869565, | |
| "loss": 1.0267, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.32142458300605947, | |
| "eval_loss": 4.34135627746582, | |
| "eval_runtime": 725.3947, | |
| "eval_samples_per_second": 46.333, | |
| "eval_steps_per_second": 1.449, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 36974.3828125, | |
| "learning_rate": 0.00046963043478260864, | |
| "loss": 1.0196, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.32609924709063504, | |
| "eval_loss": 4.303652286529541, | |
| "eval_runtime": 725.0526, | |
| "eval_samples_per_second": 46.355, | |
| "eval_steps_per_second": 1.45, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 3.9215686274509802, | |
| "grad_norm": 24522.501953125, | |
| "learning_rate": 0.0004044130434782608, | |
| "loss": 1.0028, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.33053349954671724, | |
| "eval_loss": 4.243885517120361, | |
| "eval_runtime": 724.6953, | |
| "eval_samples_per_second": 46.378, | |
| "eval_steps_per_second": 1.45, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 4.901960784313726, | |
| "grad_norm": 42106.47265625, | |
| "learning_rate": 0.000339195652173913, | |
| "loss": 0.9826, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.33797758448323145, | |
| "eval_loss": 4.16740083694458, | |
| "eval_runtime": 724.9419, | |
| "eval_samples_per_second": 46.362, | |
| "eval_steps_per_second": 1.45, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 5.882352941176471, | |
| "grad_norm": 14265.330078125, | |
| "learning_rate": 0.0002739782608695652, | |
| "loss": 0.9647, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.3449917349405026, | |
| "eval_loss": 4.089650630950928, | |
| "eval_runtime": 725.2922, | |
| "eval_samples_per_second": 46.34, | |
| "eval_steps_per_second": 1.449, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 6.862745098039216, | |
| "grad_norm": 9816.4091796875, | |
| "learning_rate": 0.00020876086956521736, | |
| "loss": 0.9376, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.3534573218412422, | |
| "eval_loss": 3.99379825592041, | |
| "eval_runtime": 725.1507, | |
| "eval_samples_per_second": 46.349, | |
| "eval_steps_per_second": 1.449, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 7.8431372549019605, | |
| "grad_norm": 8245.6923828125, | |
| "learning_rate": 0.00014354347826086956, | |
| "loss": 0.9117, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.3596525938429237, | |
| "eval_loss": 3.915853261947632, | |
| "eval_runtime": 725.91, | |
| "eval_samples_per_second": 46.301, | |
| "eval_steps_per_second": 1.448, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 8.823529411764707, | |
| "grad_norm": 7736.12646484375, | |
| "learning_rate": 7.832608695652173e-05, | |
| "loss": 0.8938, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.3645499108864138, | |
| "eval_loss": 3.866149663925171, | |
| "eval_runtime": 724.9064, | |
| "eval_samples_per_second": 46.365, | |
| "eval_steps_per_second": 1.45, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 9.803921568627452, | |
| "grad_norm": 7472.7890625, | |
| "learning_rate": 1.3108695652173912e-05, | |
| "loss": 0.8816, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.3663948328676292, | |
| "eval_loss": 3.842345714569092, | |
| "eval_runtime": 725.2736, | |
| "eval_samples_per_second": 46.341, | |
| "eval_steps_per_second": 1.449, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 10200, | |
| "total_flos": 8.525436420096e+16, | |
| "train_loss": 0.9743627047071269, | |
| "train_runtime": 17661.1762, | |
| "train_samples_per_second": 18.474, | |
| "train_steps_per_second": 0.578 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 10200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.525436420096e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |