| { |
| "best_global_step": 660, |
| "best_metric": 0.7489437899118977, |
| "best_model_checkpoint": "./segment_classification_model/checkpoint-660", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 660, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07575757575757576, |
| "grad_norm": 9.61915397644043, |
| "learning_rate": 1.9621212121212123e-05, |
| "loss": 1.6946, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 14.36313247680664, |
| "learning_rate": 1.9242424242424244e-05, |
| "loss": 1.3665, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22727272727272727, |
| "grad_norm": 10.924254417419434, |
| "learning_rate": 1.8863636363636366e-05, |
| "loss": 1.3023, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 17.46079444885254, |
| "learning_rate": 1.8484848484848487e-05, |
| "loss": 1.0992, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3787878787878788, |
| "grad_norm": 9.854267120361328, |
| "learning_rate": 1.810606060606061e-05, |
| "loss": 1.0762, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 19.95755958557129, |
| "learning_rate": 1.772727272727273e-05, |
| "loss": 1.0276, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5303030303030303, |
| "grad_norm": 53.840389251708984, |
| "learning_rate": 1.734848484848485e-05, |
| "loss": 0.9617, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 46.907447814941406, |
| "learning_rate": 1.6969696969696972e-05, |
| "loss": 0.9543, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6818181818181818, |
| "grad_norm": 16.145023345947266, |
| "learning_rate": 1.6590909090909094e-05, |
| "loss": 0.8941, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 20.84531021118164, |
| "learning_rate": 1.6212121212121212e-05, |
| "loss": 0.8396, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 5.9522199630737305, |
| "learning_rate": 1.5833333333333333e-05, |
| "loss": 0.8208, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 11.470800399780273, |
| "learning_rate": 1.5454545454545454e-05, |
| "loss": 0.8438, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9848484848484849, |
| "grad_norm": 7.92372989654541, |
| "learning_rate": 1.5075757575757577e-05, |
| "loss": 0.8505, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7610619469026548, |
| "eval_f1": 0.7489437899118977, |
| "eval_loss": 0.7818353176116943, |
| "eval_runtime": 15.4487, |
| "eval_samples_per_second": 73.146, |
| "eval_steps_per_second": 9.192, |
| "step": 660 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2640, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 693986562240000.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|