| { | |
| "best_metric": 0.7899897141616012, | |
| "best_model_checkpoint": "/kaggle/working/reels-clf/checkpoint-6258", | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 6258, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4793863854266539, | |
| "grad_norm": 43369.4140625, | |
| "learning_rate": 4.6005113454777885e-05, | |
| "loss": 0.2922, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9587727708533078, | |
| "grad_norm": 16354.4609375, | |
| "learning_rate": 4.2010226909555774e-05, | |
| "loss": 0.0957, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9896901036983756, | |
| "eval_f1": 0.3894120956863034, | |
| "eval_loss": 0.0804702639579773, | |
| "eval_runtime": 151.5528, | |
| "eval_samples_per_second": 110.08, | |
| "eval_steps_per_second": 1.722, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.4381591562799616, | |
| "grad_norm": 592.91064453125, | |
| "learning_rate": 3.8015340364333656e-05, | |
| "loss": 0.0814, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.9175455417066156, | |
| "grad_norm": 63719.01171875, | |
| "learning_rate": 3.402045381911154e-05, | |
| "loss": 0.074, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9937061679554037, | |
| "eval_f1": 0.5246078522111747, | |
| "eval_loss": 0.06898169219493866, | |
| "eval_runtime": 151.5845, | |
| "eval_samples_per_second": 110.057, | |
| "eval_steps_per_second": 1.722, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.396931927133269, | |
| "grad_norm": 91346.8984375, | |
| "learning_rate": 3.002556727388942e-05, | |
| "loss": 0.057, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.876318312559923, | |
| "grad_norm": 0.34639281034469604, | |
| "learning_rate": 2.6030680728667306e-05, | |
| "loss": 0.0565, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9947251693340526, | |
| "eval_f1": 0.5763717231704362, | |
| "eval_loss": 0.07740658521652222, | |
| "eval_runtime": 151.2075, | |
| "eval_samples_per_second": 110.332, | |
| "eval_steps_per_second": 1.726, | |
| "step": 3129 | |
| }, | |
| { | |
| "epoch": 3.3557046979865772, | |
| "grad_norm": 0.3635995090007782, | |
| "learning_rate": 2.2035794183445192e-05, | |
| "loss": 0.0405, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.8350910834132312, | |
| "grad_norm": 45141.87109375, | |
| "learning_rate": 1.8040907638223074e-05, | |
| "loss": 0.0347, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9955044056824313, | |
| "eval_f1": 0.6777953957165785, | |
| "eval_loss": 0.06517060101032257, | |
| "eval_runtime": 152.3821, | |
| "eval_samples_per_second": 109.481, | |
| "eval_steps_per_second": 1.713, | |
| "step": 4172 | |
| }, | |
| { | |
| "epoch": 4.314477468839885, | |
| "grad_norm": 0.10806822776794434, | |
| "learning_rate": 1.4046021093000961e-05, | |
| "loss": 0.0214, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.793863854266538, | |
| "grad_norm": 0.16395215690135956, | |
| "learning_rate": 1.0051134547778844e-05, | |
| "loss": 0.0269, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9956242881975664, | |
| "eval_f1": 0.7422681606882496, | |
| "eval_loss": 0.06420310586690903, | |
| "eval_runtime": 151.8009, | |
| "eval_samples_per_second": 109.901, | |
| "eval_steps_per_second": 1.719, | |
| "step": 5215 | |
| }, | |
| { | |
| "epoch": 5.273250239693192, | |
| "grad_norm": 90.11448669433594, | |
| "learning_rate": 6.056248002556728e-06, | |
| "loss": 0.0169, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.752636625119846, | |
| "grad_norm": 0.08740052580833435, | |
| "learning_rate": 2.0613614573346116e-06, | |
| "loss": 0.0213, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9957441707127016, | |
| "eval_f1": 0.7899897141616012, | |
| "eval_loss": 0.061532892286777496, | |
| "eval_runtime": 151.598, | |
| "eval_samples_per_second": 110.048, | |
| "eval_steps_per_second": 1.722, | |
| "step": 6258 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 6258, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.307637757681664e+16, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |