| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 9590, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.22007166537097106, | |
| "eval_loss": 4.9203200340271, | |
| "eval_runtime": 6.672, | |
| "eval_samples_per_second": 32.824, | |
| "eval_steps_per_second": 1.049, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.0427528675703859, | |
| "grad_norm": 14641.076171875, | |
| "learning_rate": 0.0005993999999999999, | |
| "loss": 1.251, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.26578738081834347, | |
| "eval_loss": 4.47660493850708, | |
| "eval_runtime": 5.7576, | |
| "eval_samples_per_second": 38.037, | |
| "eval_steps_per_second": 1.216, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.0855057351407718, | |
| "grad_norm": 10230.9931640625, | |
| "learning_rate": 0.0005302211874272409, | |
| "loss": 1.0398, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.27962898426400024, | |
| "eval_loss": 4.3035736083984375, | |
| "eval_runtime": 5.7523, | |
| "eval_samples_per_second": 38.071, | |
| "eval_steps_per_second": 1.217, | |
| "step": 2877 | |
| }, | |
| { | |
| "epoch": 3.1282586027111576, | |
| "grad_norm": 13416.8994140625, | |
| "learning_rate": 0.00046037252619324795, | |
| "loss": 0.978, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.2879125003350937, | |
| "eval_loss": 4.201538562774658, | |
| "eval_runtime": 5.7517, | |
| "eval_samples_per_second": 38.076, | |
| "eval_steps_per_second": 1.217, | |
| "step": 3836 | |
| }, | |
| { | |
| "epoch": 4.1710114702815435, | |
| "grad_norm": 12218.20703125, | |
| "learning_rate": 0.00039052386495925487, | |
| "loss": 0.9464, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.2940960959350901, | |
| "eval_loss": 4.131160259246826, | |
| "eval_runtime": 5.7355, | |
| "eval_samples_per_second": 38.183, | |
| "eval_steps_per_second": 1.22, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 5.213764337851929, | |
| "grad_norm": 11805.5654296875, | |
| "learning_rate": 0.0003206752037252619, | |
| "loss": 0.9242, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.3038450884200556, | |
| "eval_loss": 4.05396842956543, | |
| "eval_runtime": 5.7181, | |
| "eval_samples_per_second": 38.3, | |
| "eval_steps_per_second": 1.224, | |
| "step": 5754 | |
| }, | |
| { | |
| "epoch": 6.256517205422315, | |
| "grad_norm": 13648.1845703125, | |
| "learning_rate": 0.0002508265424912689, | |
| "loss": 0.9027, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.30757133027727884, | |
| "eval_loss": 3.9987871646881104, | |
| "eval_runtime": 5.7301, | |
| "eval_samples_per_second": 38.219, | |
| "eval_steps_per_second": 1.222, | |
| "step": 6713 | |
| }, | |
| { | |
| "epoch": 7.299270072992701, | |
| "grad_norm": 12588.6240234375, | |
| "learning_rate": 0.0001809778812572759, | |
| "loss": 0.8825, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.31707905530386293, | |
| "eval_loss": 3.932004690170288, | |
| "eval_runtime": 5.7156, | |
| "eval_samples_per_second": 38.316, | |
| "eval_steps_per_second": 1.225, | |
| "step": 7672 | |
| }, | |
| { | |
| "epoch": 8.342022940563087, | |
| "grad_norm": 10292.294921875, | |
| "learning_rate": 0.00011112922002328288, | |
| "loss": 0.8632, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.32071593884316724, | |
| "eval_loss": 3.891136884689331, | |
| "eval_runtime": 5.8599, | |
| "eval_samples_per_second": 37.372, | |
| "eval_steps_per_second": 1.195, | |
| "step": 8631 | |
| }, | |
| { | |
| "epoch": 9.384775808133472, | |
| "grad_norm": 10414.537109375, | |
| "learning_rate": 4.128055878928987e-05, | |
| "loss": 0.8454, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.32392390245646013, | |
| "eval_loss": 3.869386911392212, | |
| "eval_runtime": 5.7187, | |
| "eval_samples_per_second": 38.295, | |
| "eval_steps_per_second": 1.224, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 9590, | |
| "total_flos": 8.010952409088e+16, | |
| "train_loss": 0.9517836235611232, | |
| "train_runtime": 10856.3074, | |
| "train_samples_per_second": 28.241, | |
| "train_steps_per_second": 0.883 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 9590, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.010952409088e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |