| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1269, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07092198581560284, | |
| "grad_norm": 9.762572248475378, | |
| "learning_rate": 2.3622047244094486e-07, | |
| "loss": 1.7706, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14184397163120568, | |
| "grad_norm": 3.4583552539096543, | |
| "learning_rate": 4.7244094488188973e-07, | |
| "loss": 1.4434, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 1.3598550426281755, | |
| "learning_rate": 7.086614173228345e-07, | |
| "loss": 1.0759, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "grad_norm": 1.3743463190702838, | |
| "learning_rate": 9.448818897637795e-07, | |
| "loss": 1.0064, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3546099290780142, | |
| "grad_norm": 1.1981662053288118, | |
| "learning_rate": 9.989994971559894e-07, | |
| "loss": 1.0093, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 1.2583045292839343, | |
| "learning_rate": 9.946949460850346e-07, | |
| "loss": 0.9817, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.49645390070921985, | |
| "grad_norm": 1.1677550744491674, | |
| "learning_rate": 9.87022946156295e-07, | |
| "loss": 0.9747, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "grad_norm": 1.216779086187824, | |
| "learning_rate": 9.760357216093787e-07, | |
| "loss": 0.9827, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 1.2452036131959259, | |
| "learning_rate": 9.61808063822373e-07, | |
| "loss": 0.9718, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "grad_norm": 1.1895584707820317, | |
| "learning_rate": 9.444368221978101e-07, | |
| "loss": 0.9635, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7801418439716312, | |
| "grad_norm": 1.1291128908214976, | |
| "learning_rate": 9.240402448969654e-07, | |
| "loss": 0.9578, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 1.2303793421613098, | |
| "learning_rate": 9.007571739101968e-07, | |
| "loss": 0.9572, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9219858156028369, | |
| "grad_norm": 1.247221208470203, | |
| "learning_rate": 8.747460999425753e-07, | |
| "loss": 0.9586, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9929078014184397, | |
| "grad_norm": 1.1333241549863073, | |
| "learning_rate": 8.461840835483178e-07, | |
| "loss": 0.9607, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 1.1412157130651417, | |
| "learning_rate": 8.152655498579901e-07, | |
| "loss": 0.9323, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.1347517730496455, | |
| "grad_norm": 1.210106453824033, | |
| "learning_rate": 7.822009651029209e-07, | |
| "loss": 0.9235, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2056737588652482, | |
| "grad_norm": 1.2218781729669022, | |
| "learning_rate": 7.47215403945885e-07, | |
| "loss": 0.9202, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.2765957446808511, | |
| "grad_norm": 1.2768696264008492, | |
| "learning_rate": 7.10547017370412e-07, | |
| "loss": 0.9303, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.3475177304964538, | |
| "grad_norm": 1.2147394699930192, | |
| "learning_rate": 6.72445411557983e-07, | |
| "loss": 0.9297, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.4184397163120568, | |
| "grad_norm": 1.0413090675263184, | |
| "learning_rate": 6.331699487882987e-07, | |
| "loss": 0.9313, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.4893617021276595, | |
| "grad_norm": 1.2468465591864386, | |
| "learning_rate": 5.929879819285943e-07, | |
| "loss": 0.9156, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.5602836879432624, | |
| "grad_norm": 1.1709179738830184, | |
| "learning_rate": 5.521730345300521e-07, | |
| "loss": 0.9214, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.6312056737588652, | |
| "grad_norm": 1.196304661417416, | |
| "learning_rate": 5.110029389196154e-07, | |
| "loss": 0.927, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.702127659574468, | |
| "grad_norm": 1.1997915383164859, | |
| "learning_rate": 4.6975794496143883e-07, | |
| "loss": 0.9301, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.773049645390071, | |
| "grad_norm": 1.1657232269036162, | |
| "learning_rate": 4.2871881236186827e-07, | |
| "loss": 0.9247, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.8439716312056738, | |
| "grad_norm": 1.1793234089551605, | |
| "learning_rate": 3.88164899503864e-07, | |
| "loss": 0.9186, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.9148936170212765, | |
| "grad_norm": 1.1862988949349038, | |
| "learning_rate": 3.483722618204018e-07, | |
| "loss": 0.9165, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.9858156028368794, | |
| "grad_norm": 1.1783323907513619, | |
| "learning_rate": 3.096117726514577e-07, | |
| "loss": 0.9125, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.0567375886524824, | |
| "grad_norm": 1.1879358719289141, | |
| "learning_rate": 2.721472793761329e-07, | |
| "loss": 0.8983, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.127659574468085, | |
| "grad_norm": 1.1919968255744133, | |
| "learning_rate": 2.362338073713509e-07, | |
| "loss": 0.9086, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.198581560283688, | |
| "grad_norm": 1.153629785682646, | |
| "learning_rate": 2.0211582402300005e-07, | |
| "loss": 0.9015, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.269503546099291, | |
| "grad_norm": 1.1627654464467705, | |
| "learning_rate": 1.7002557460660927e-07, | |
| "loss": 0.8982, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.3404255319148937, | |
| "grad_norm": 1.2071255033933053, | |
| "learning_rate": 1.4018150136542062e-07, | |
| "loss": 0.9047, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.4113475177304964, | |
| "grad_norm": 1.1386036620767097, | |
| "learning_rate": 1.1278675654738578e-07, | |
| "loss": 0.9004, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.482269503546099, | |
| "grad_norm": 1.2209668000534488, | |
| "learning_rate": 8.802781952302779e-08, | |
| "loss": 0.8962, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.5531914893617023, | |
| "grad_norm": 1.1443962953177318, | |
| "learning_rate": 6.607322739761217e-08, | |
| "loss": 0.8977, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.624113475177305, | |
| "grad_norm": 1.1513325678220279, | |
| "learning_rate": 4.70724277585075e-08, | |
| "loss": 0.8893, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.6950354609929077, | |
| "grad_norm": 1.18721940392998, | |
| "learning_rate": 3.1154761367223614e-08, | |
| "loss": 0.8896, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.7659574468085104, | |
| "grad_norm": 1.1435969045652372, | |
| "learning_rate": 1.8428581721066483e-08, | |
| "loss": 0.8918, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.8368794326241136, | |
| "grad_norm": 1.19689860096638, | |
| "learning_rate": 8.980517477661542e-09, | |
| "loss": 0.9105, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.9078014184397163, | |
| "grad_norm": 1.1773544295635308, | |
| "learning_rate": 2.8748827631111526e-09, | |
| "loss": 0.8977, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.978723404255319, | |
| "grad_norm": 1.3323463631749488, | |
| "learning_rate": 1.5323937789102347e-10, | |
| "loss": 0.9024, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1269, | |
| "total_flos": 186297695600640.0, | |
| "train_loss": 0.9641318550440531, | |
| "train_runtime": 8318.19, | |
| "train_samples_per_second": 19.527, | |
| "train_steps_per_second": 0.153 | |
| } | |
| ], | |
| "logging_steps": 30, | |
| "max_steps": 1269, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 186297695600640.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |