| { | |
| "best_metric": 0.07653788477182388, | |
| "best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-100", | |
| "epoch": 4.0, | |
| "eval_steps": 100, | |
| "global_step": 228, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17543859649122806, | |
| "grad_norm": 1.3317383527755737, | |
| "learning_rate": 0.0001912280701754386, | |
| "loss": 0.6591, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3508771929824561, | |
| "grad_norm": 0.42164257168769836, | |
| "learning_rate": 0.0001824561403508772, | |
| "loss": 0.2337, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.3815741539001465, | |
| "learning_rate": 0.0001736842105263158, | |
| "loss": 0.1533, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.7017543859649122, | |
| "grad_norm": 0.1837645322084427, | |
| "learning_rate": 0.0001649122807017544, | |
| "loss": 0.0954, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8771929824561403, | |
| "grad_norm": 0.2024652510881424, | |
| "learning_rate": 0.00015614035087719297, | |
| "loss": 0.0513, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 0.1376986801624298, | |
| "learning_rate": 0.00014736842105263158, | |
| "loss": 0.0899, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.2280701754385965, | |
| "grad_norm": 0.10381469875574112, | |
| "learning_rate": 0.00013859649122807018, | |
| "loss": 0.0236, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.4035087719298245, | |
| "grad_norm": 0.0913369208574295, | |
| "learning_rate": 0.0001298245614035088, | |
| "loss": 0.0201, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 0.17469696700572968, | |
| "learning_rate": 0.00012105263157894738, | |
| "loss": 0.0182, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.7543859649122808, | |
| "grad_norm": 0.07072672992944717, | |
| "learning_rate": 0.00011228070175438597, | |
| "loss": 0.0428, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.7543859649122808, | |
| "eval_accuracy": 0.98, | |
| "eval_loss": 0.07653788477182388, | |
| "eval_runtime": 1.9204, | |
| "eval_samples_per_second": 78.109, | |
| "eval_steps_per_second": 9.894, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.9298245614035088, | |
| "grad_norm": 0.07370463013648987, | |
| "learning_rate": 0.00010350877192982457, | |
| "loss": 0.0138, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 0.06612452119588852, | |
| "learning_rate": 9.473684210526316e-05, | |
| "loss": 0.0302, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.280701754385965, | |
| "grad_norm": 0.06168673187494278, | |
| "learning_rate": 8.596491228070177e-05, | |
| "loss": 0.0118, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.456140350877193, | |
| "grad_norm": 0.058557216078042984, | |
| "learning_rate": 7.719298245614036e-05, | |
| "loss": 0.0111, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.05309578776359558, | |
| "learning_rate": 6.842105263157895e-05, | |
| "loss": 0.0106, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.807017543859649, | |
| "grad_norm": 0.052480150014162064, | |
| "learning_rate": 5.9649122807017544e-05, | |
| "loss": 0.0104, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.982456140350877, | |
| "grad_norm": 0.051260001957416534, | |
| "learning_rate": 5.087719298245615e-05, | |
| "loss": 0.0099, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 0.05052126199007034, | |
| "learning_rate": 4.210526315789474e-05, | |
| "loss": 0.0091, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.04928547888994217, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.009, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 3.5087719298245617, | |
| "grad_norm": 0.05073897913098335, | |
| "learning_rate": 2.456140350877193e-05, | |
| "loss": 0.0089, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.5087719298245617, | |
| "eval_accuracy": 0.98, | |
| "eval_loss": 0.07699422538280487, | |
| "eval_runtime": 1.3709, | |
| "eval_samples_per_second": 109.421, | |
| "eval_steps_per_second": 13.86, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 0.046641260385513306, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 0.0086, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.8596491228070176, | |
| "grad_norm": 0.046586308628320694, | |
| "learning_rate": 7.017543859649123e-06, | |
| "loss": 0.0085, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 228, | |
| "total_flos": 2.789736629428224e+17, | |
| "train_loss": 0.06738054856919405, | |
| "train_runtime": 117.8215, | |
| "train_samples_per_second": 30.555, | |
| "train_steps_per_second": 1.935 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 228, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.789736629428224e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |