| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8764241893076249, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.035056967572304996, | |
| "grad_norm": 12.451324462890625, | |
| "learning_rate": 1.8446601941747574e-06, | |
| "loss": 3.237, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07011393514460999, | |
| "grad_norm": 4.131791114807129, | |
| "learning_rate": 3.7864077669902915e-06, | |
| "loss": 2.5266, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10517090271691498, | |
| "grad_norm": 2.2451958656311035, | |
| "learning_rate": 5.728155339805825e-06, | |
| "loss": 1.8755, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14022787028921999, | |
| "grad_norm": 1.4421552419662476, | |
| "learning_rate": 7.66990291262136e-06, | |
| "loss": 1.3649, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.175284837861525, | |
| "grad_norm": 0.9530179500579834, | |
| "learning_rate": 9.611650485436894e-06, | |
| "loss": 1.0674, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21034180543382996, | |
| "grad_norm": 0.7194477319717407, | |
| "learning_rate": 9.99942798060303e-06, | |
| "loss": 0.9241, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.24539877300613497, | |
| "grad_norm": 0.6556061506271362, | |
| "learning_rate": 9.997104376116195e-06, | |
| "loss": 0.8575, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.28045574057843997, | |
| "grad_norm": 0.5718048810958862, | |
| "learning_rate": 9.992994265395959e-06, | |
| "loss": 0.829, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.31551270815074495, | |
| "grad_norm": 0.4922148287296295, | |
| "learning_rate": 9.987099117840969e-06, | |
| "loss": 0.8034, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.35056967572305, | |
| "grad_norm": 0.47302234172821045, | |
| "learning_rate": 9.979421041015336e-06, | |
| "loss": 0.7839, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.38562664329535495, | |
| "grad_norm": 0.49009189009666443, | |
| "learning_rate": 9.969962779895172e-06, | |
| "loss": 0.768, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.42068361086765993, | |
| "grad_norm": 0.4963654577732086, | |
| "learning_rate": 9.958727715887218e-06, | |
| "loss": 0.7628, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.45574057843996496, | |
| "grad_norm": 0.5206854343414307, | |
| "learning_rate": 9.94571986561998e-06, | |
| "loss": 0.7488, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.49079754601226994, | |
| "grad_norm": 0.48924869298934937, | |
| "learning_rate": 9.930943879507748e-06, | |
| "loss": 0.7436, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5258545135845749, | |
| "grad_norm": 0.43540337681770325, | |
| "learning_rate": 9.914405040088026e-06, | |
| "loss": 0.7375, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5609114811568799, | |
| "grad_norm": 0.44258421659469604, | |
| "learning_rate": 9.896109260132993e-06, | |
| "loss": 0.7277, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.595968448729185, | |
| "grad_norm": 0.4955386519432068, | |
| "learning_rate": 9.876063080535627e-06, | |
| "loss": 0.7284, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6310254163014899, | |
| "grad_norm": 0.5027541518211365, | |
| "learning_rate": 9.85427366797129e-06, | |
| "loss": 0.7231, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6660823838737949, | |
| "grad_norm": 0.4675957262516022, | |
| "learning_rate": 9.830748812335576e-06, | |
| "loss": 0.7212, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7011393514461, | |
| "grad_norm": 0.4283595383167267, | |
| "learning_rate": 9.805496923959363e-06, | |
| "loss": 0.7164, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7361963190184049, | |
| "grad_norm": 0.452084481716156, | |
| "learning_rate": 9.778527030602049e-06, | |
| "loss": 0.711, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7712532865907099, | |
| "grad_norm": 0.4737929105758667, | |
| "learning_rate": 9.74984877422405e-06, | |
| "loss": 0.7084, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8063102541630149, | |
| "grad_norm": 0.4964485466480255, | |
| "learning_rate": 9.719472407539725e-06, | |
| "loss": 0.7028, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8413672217353199, | |
| "grad_norm": 0.44363030791282654, | |
| "learning_rate": 9.68740879035194e-06, | |
| "loss": 0.7045, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8764241893076249, | |
| "grad_norm": 0.5004998445510864, | |
| "learning_rate": 9.6536693856696e-06, | |
| "loss": 0.6937, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 3426, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.385590809427968e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |