| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1092896174863388, | |
| "grad_norm": 5.629181211587324, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 1.1669, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2185792349726776, | |
| "grad_norm": 3.7386173626564387, | |
| "learning_rate": 1.3571428571428574e-05, | |
| "loss": 1.0248, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.32786885245901637, | |
| "grad_norm": 2.9491368749384907, | |
| "learning_rate": 1.9999197656053288e-05, | |
| "loss": 0.9134, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4371584699453552, | |
| "grad_norm": 2.7436474358647462, | |
| "learning_rate": 1.9903072073887507e-05, | |
| "loss": 0.8956, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.546448087431694, | |
| "grad_norm": 3.0106057087982374, | |
| "learning_rate": 1.9648243365358145e-05, | |
| "loss": 0.9241, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 2.66797989180512, | |
| "learning_rate": 1.9238795325112867e-05, | |
| "loss": 0.8953, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7650273224043715, | |
| "grad_norm": 2.8575484762358982, | |
| "learning_rate": 1.8681289622458485e-05, | |
| "loss": 0.8643, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8743169398907104, | |
| "grad_norm": 2.773718560321069, | |
| "learning_rate": 1.7984660646372138e-05, | |
| "loss": 0.8766, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9836065573770492, | |
| "grad_norm": 2.586934627181951, | |
| "learning_rate": 1.716007232614723e-05, | |
| "loss": 0.8668, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0874316939890711, | |
| "grad_norm": 2.4443282978325187, | |
| "learning_rate": 1.6220739222215738e-05, | |
| "loss": 0.462, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1967213114754098, | |
| "grad_norm": 2.258878821594189, | |
| "learning_rate": 1.518171475428202e-05, | |
| "loss": 0.3453, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.3060109289617485, | |
| "grad_norm": 2.3222371327876257, | |
| "learning_rate": 1.4059649960549071e-05, | |
| "loss": 0.3133, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4153005464480874, | |
| "grad_norm": 2.230291489323581, | |
| "learning_rate": 1.287252665407645e-05, | |
| "loss": 0.3143, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.5245901639344264, | |
| "grad_norm": 2.1777402941170196, | |
| "learning_rate": 1.1639369252611552e-05, | |
| "loss": 0.3093, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.633879781420765, | |
| "grad_norm": 2.010133026850896, | |
| "learning_rate": 1.0379939900007394e-05, | |
| "loss": 0.3067, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7431693989071038, | |
| "grad_norm": 2.732315831768471, | |
| "learning_rate": 9.114421765103e-06, | |
| "loss": 0.2925, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8524590163934427, | |
| "grad_norm": 2.355078992045447, | |
| "learning_rate": 7.86309559340649e-06, | |
| "loss": 0.2781, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.9617486338797814, | |
| "grad_norm": 2.167900178805429, | |
| "learning_rate": 6.64601469504903e-06, | |
| "loss": 0.2697, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0655737704918034, | |
| "grad_norm": 1.6703544324781385, | |
| "learning_rate": 5.482683577537529e-06, | |
| "loss": 0.1434, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.1748633879781423, | |
| "grad_norm": 1.5979369986158134, | |
| "learning_rate": 4.391745373423957e-06, | |
| "loss": 0.0692, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.2841530054644807, | |
| "grad_norm": 1.5064975563594014, | |
| "learning_rate": 3.3906830720645943e-06, | |
| "loss": 0.0649, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.3934426229508197, | |
| "grad_norm": 1.3556820461823196, | |
| "learning_rate": 2.4955393434233756e-06, | |
| "loss": 0.0656, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.5027322404371586, | |
| "grad_norm": 1.747856176918053, | |
| "learning_rate": 1.7206594439237867e-06, | |
| "loss": 0.066, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.612021857923497, | |
| "grad_norm": 1.3940151860135361, | |
| "learning_rate": 1.0784613244490816e-06, | |
| "loss": 0.0593, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.721311475409836, | |
| "grad_norm": 1.350833871421024, | |
| "learning_rate": 5.792366246586512e-07, | |
| "loss": 0.0644, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.830601092896175, | |
| "grad_norm": 1.4360599333696886, | |
| "learning_rate": 2.3098574281513185e-07, | |
| "loss": 0.0598, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.939890710382514, | |
| "grad_norm": 1.7579516749085016, | |
| "learning_rate": 3.928962423479066e-08, | |
| "loss": 0.0671, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 276, | |
| "total_flos": 14707949568000.0, | |
| "train_loss": 0.4346494403654251, | |
| "train_runtime": 4050.6424, | |
| "train_samples_per_second": 2.159, | |
| "train_steps_per_second": 0.068 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 14707949568000.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |