{ "best_global_step": 448, "best_metric": 0.10987533628940582, "best_model_checkpoint": "./finetuned-model-16-full\\checkpoint-448", "epoch": 9.0, "eval_steps": 500, "global_step": 576, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7889546351084813, "grad_norm": 0.21714453399181366, "learning_rate": 4.603174603174603e-05, "loss": 0.5977, "step": 50 }, { "epoch": 1.0, "eval_loss": 0.1679154932498932, "eval_runtime": 449.444, "eval_samples_per_second": 0.556, "eval_steps_per_second": 0.556, "step": 64 }, { "epoch": 1.5680473372781065, "grad_norm": 0.13297569751739502, "learning_rate": 4.2063492063492065e-05, "loss": 0.1177, "step": 100 }, { "epoch": 2.0, "eval_loss": 0.13711020350456238, "eval_runtime": 448.5722, "eval_samples_per_second": 0.557, "eval_steps_per_second": 0.557, "step": 128 }, { "epoch": 2.3471400394477318, "grad_norm": 0.11708807200193405, "learning_rate": 3.809523809523809e-05, "loss": 0.0936, "step": 150 }, { "epoch": 3.0, "eval_loss": 0.12583088874816895, "eval_runtime": 449.2291, "eval_samples_per_second": 0.557, "eval_steps_per_second": 0.557, "step": 192 }, { "epoch": 3.126232741617357, "grad_norm": 0.13195854425430298, "learning_rate": 3.412698412698413e-05, "loss": 0.0876, "step": 200 }, { "epoch": 3.9151873767258385, "grad_norm": 0.1334521323442459, "learning_rate": 3.0158730158730158e-05, "loss": 0.0773, "step": 250 }, { "epoch": 4.0, "eval_loss": 0.11675416678190231, "eval_runtime": 447.9794, "eval_samples_per_second": 0.558, "eval_steps_per_second": 0.558, "step": 256 }, { "epoch": 4.6942800788954635, "grad_norm": 0.1745067685842514, "learning_rate": 2.6190476190476192e-05, "loss": 0.0715, "step": 300 }, { "epoch": 5.0, "eval_loss": 0.11324296146631241, "eval_runtime": 449.3754, "eval_samples_per_second": 0.556, "eval_steps_per_second": 0.556, "step": 320 }, { "epoch": 5.4733727810650885, "grad_norm": 0.14304669201374054, "learning_rate": 2.2222222222222223e-05, "loss": 0.0678, "step": 350 }, { "epoch": 6.0, "eval_loss": 0.11094386130571365, "eval_runtime": 448.7599, "eval_samples_per_second": 0.557, "eval_steps_per_second": 0.557, "step": 384 }, { "epoch": 6.252465483234714, "grad_norm": 0.20627053081989288, "learning_rate": 1.8253968253968254e-05, "loss": 0.0628, "step": 400 }, { "epoch": 7.0, "eval_loss": 0.10987533628940582, "eval_runtime": 446.2745, "eval_samples_per_second": 0.56, "eval_steps_per_second": 0.56, "step": 448 }, { "epoch": 7.031558185404339, "grad_norm": 0.18030017614364624, "learning_rate": 1.4285714285714285e-05, "loss": 0.0578, "step": 450 }, { "epoch": 7.82051282051282, "grad_norm": 0.20803600549697876, "learning_rate": 1.0317460317460318e-05, "loss": 0.0563, "step": 500 }, { "epoch": 8.0, "eval_loss": 0.11132891476154327, "eval_runtime": 449.0806, "eval_samples_per_second": 0.557, "eval_steps_per_second": 0.557, "step": 512 }, { "epoch": 8.599605522682445, "grad_norm": 0.2382732778787613, "learning_rate": 6.349206349206349e-06, "loss": 0.0518, "step": 550 }, { "epoch": 9.0, "eval_loss": 0.11033967137336731, "eval_runtime": 447.1892, "eval_samples_per_second": 0.559, "eval_steps_per_second": 0.559, "step": 576 } ], "logging_steps": 50, "max_steps": 630, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.2377604877018726e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }