| { | |
| "best_metric": 0.7072715848684014, | |
| "best_model_checkpoint": "./models/mr_M_IB/checkpoint-47805", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 47805, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9998954084300805, | |
| "grad_norm": 2.287510395050049, | |
| "learning_rate": 4.1667538263082664e-05, | |
| "loss": 0.267, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.6238872954778499, | |
| "eval_loss": 0.2593071460723877, | |
| "eval_precision": 0.6048524199667669, | |
| "eval_recall": 0.6441591653027823, | |
| "eval_runtime": 135.2349, | |
| "eval_samples_per_second": 502.755, | |
| "eval_steps_per_second": 7.86, | |
| "step": 9561 | |
| }, | |
| { | |
| "epoch": 1.999790816860161, | |
| "grad_norm": 1.7041232585906982, | |
| "learning_rate": 3.3335076526165327e-05, | |
| "loss": 0.1889, | |
| "step": 19120 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.6720800989317163, | |
| "eval_loss": 0.2354426085948944, | |
| "eval_precision": 0.6531648502283017, | |
| "eval_recall": 0.6921235679214403, | |
| "eval_runtime": 281.6345, | |
| "eval_samples_per_second": 241.412, | |
| "eval_steps_per_second": 3.774, | |
| "step": 19122 | |
| }, | |
| { | |
| "epoch": 2.9996862252902416, | |
| "grad_norm": 1.1123149394989014, | |
| "learning_rate": 2.5002614789247985e-05, | |
| "loss": 0.1426, | |
| "step": 28680 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.6782475538938828, | |
| "eval_loss": 0.23962680995464325, | |
| "eval_precision": 0.6598243305989785, | |
| "eval_recall": 0.6977291325695582, | |
| "eval_runtime": 278.6945, | |
| "eval_samples_per_second": 243.959, | |
| "eval_steps_per_second": 3.814, | |
| "step": 28683 | |
| }, | |
| { | |
| "epoch": 3.999581633720322, | |
| "grad_norm": 1.4725306034088135, | |
| "learning_rate": 1.667015305233065e-05, | |
| "loss": 0.1088, | |
| "step": 38240 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.700093388400861, | |
| "eval_loss": 0.22952789068222046, | |
| "eval_precision": 0.6839742776568858, | |
| "eval_recall": 0.716990589198036, | |
| "eval_runtime": 276.263, | |
| "eval_samples_per_second": 246.106, | |
| "eval_steps_per_second": 3.848, | |
| "step": 38244 | |
| }, | |
| { | |
| "epoch": 4.999477042150403, | |
| "grad_norm": 1.0658468008041382, | |
| "learning_rate": 8.337691315413311e-06, | |
| "loss": 0.0802, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.7072715848684014, | |
| "eval_loss": 0.24495984613895416, | |
| "eval_precision": 0.6900188763695098, | |
| "eval_recall": 0.7254091653027823, | |
| "eval_runtime": 276.7682, | |
| "eval_samples_per_second": 245.657, | |
| "eval_steps_per_second": 3.841, | |
| "step": 47805 | |
| } | |
| ], | |
| "logging_steps": 9560, | |
| "max_steps": 57366, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1909647240601254e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |