| { | |
| "best_metric": 0.15153372287750244, | |
| "best_model_checkpoint": "../models/b_ll_asc1k-2/checkpoint-378", | |
| "epoch": 5.1, | |
| "eval_steps": 126, | |
| "global_step": 378, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0015873015873015873, | |
| "grad_norm": 1.7664669752120972, | |
| "learning_rate": 1e-05, | |
| "loss": 2.9008, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.1034066677093506, | |
| "learning_rate": 0.00063, | |
| "loss": 1.2295, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 0.6373379230499268, | |
| "learning_rate": 0.0009509433962264152, | |
| "loss": 0.2402, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_loss": 0.20612144470214844, | |
| "eval_runtime": 25.198, | |
| "eval_samples_per_second": 7.223, | |
| "eval_steps_per_second": 0.913, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 0.5278385281562805, | |
| "learning_rate": 0.0008320754716981132, | |
| "loss": 0.1486, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.562412440776825, | |
| "learning_rate": 0.0007132075471698113, | |
| "loss": 0.1, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "eval_loss": 0.1705094277858734, | |
| "eval_runtime": 24.6952, | |
| "eval_samples_per_second": 7.37, | |
| "eval_steps_per_second": 0.931, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 0.3925608992576599, | |
| "learning_rate": 0.0005943396226415095, | |
| "loss": 0.0655, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "grad_norm": 0.39186909794807434, | |
| "learning_rate": 0.0004754716981132076, | |
| "loss": 0.0411, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "eval_loss": 0.15153372287750244, | |
| "eval_runtime": 24.8181, | |
| "eval_samples_per_second": 7.333, | |
| "eval_steps_per_second": 0.927, | |
| "step": 378 | |
| } | |
| ], | |
| "logging_steps": 63, | |
| "max_steps": 630, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 126, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.28296230912e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |