| { | |
| "best_metric": 1.5053555965423584, | |
| "best_model_checkpoint": "./tmp/clm-gpt2/checkpoint-11500", | |
| "epoch": 2.9996095275283094, | |
| "eval_steps": 500, | |
| "global_step": 11523, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13015749056358195, | |
| "grad_norm": 0.3655782639980316, | |
| "learning_rate": 0.0015, | |
| "loss": 2.4536, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13015749056358195, | |
| "eval_accuracy": 0.4954853308958881, | |
| "eval_loss": 2.1316182613372803, | |
| "eval_runtime": 630.0742, | |
| "eval_samples_per_second": 24.442, | |
| "eval_steps_per_second": 6.11, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2603149811271639, | |
| "grad_norm": 0.2795959711074829, | |
| "learning_rate": 0.003, | |
| "loss": 2.1054, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2603149811271639, | |
| "eval_accuracy": 0.5220788107298371, | |
| "eval_loss": 2.0124123096466064, | |
| "eval_runtime": 571.1208, | |
| "eval_samples_per_second": 26.965, | |
| "eval_steps_per_second": 6.741, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3904724716907458, | |
| "grad_norm": 0.2532959282398224, | |
| "learning_rate": 0.0028574550983559823, | |
| "loss": 1.9756, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3904724716907458, | |
| "eval_accuracy": 0.5452574551548158, | |
| "eval_loss": 1.902522087097168, | |
| "eval_runtime": 521.1164, | |
| "eval_samples_per_second": 29.552, | |
| "eval_steps_per_second": 7.388, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5206299622543278, | |
| "grad_norm": 0.1999560445547104, | |
| "learning_rate": 0.002714910196711964, | |
| "loss": 1.8863, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5206299622543278, | |
| "eval_accuracy": 0.5600627134351475, | |
| "eval_loss": 1.836737871170044, | |
| "eval_runtime": 521.7562, | |
| "eval_samples_per_second": 29.516, | |
| "eval_steps_per_second": 7.379, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6507874528179096, | |
| "grad_norm": 0.18795448541641235, | |
| "learning_rate": 0.0025723652950679463, | |
| "loss": 1.8283, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6507874528179096, | |
| "eval_accuracy": 0.5686439806527783, | |
| "eval_loss": 1.7926692962646484, | |
| "eval_runtime": 521.5355, | |
| "eval_samples_per_second": 29.528, | |
| "eval_steps_per_second": 7.382, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7809449433814916, | |
| "grad_norm": 0.21605215966701508, | |
| "learning_rate": 0.0024298203934239285, | |
| "loss": 1.7893, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7809449433814916, | |
| "eval_accuracy": 0.5759645681786444, | |
| "eval_loss": 1.758531093597412, | |
| "eval_runtime": 521.9502, | |
| "eval_samples_per_second": 29.505, | |
| "eval_steps_per_second": 7.376, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9111024339450735, | |
| "grad_norm": 0.17780712246894836, | |
| "learning_rate": 0.0022872754917799107, | |
| "loss": 1.7555, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9111024339450735, | |
| "eval_accuracy": 0.581453644107603, | |
| "eval_loss": 1.732818841934204, | |
| "eval_runtime": 521.3914, | |
| "eval_samples_per_second": 29.536, | |
| "eval_steps_per_second": 7.384, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.0412599245086556, | |
| "grad_norm": 0.17426873743534088, | |
| "learning_rate": 0.002144730590135893, | |
| "loss": 1.7143, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.0412599245086556, | |
| "eval_accuracy": 0.5881837859110587, | |
| "eval_loss": 1.701590895652771, | |
| "eval_runtime": 522.4527, | |
| "eval_samples_per_second": 29.476, | |
| "eval_steps_per_second": 7.369, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.1714174150722374, | |
| "grad_norm": 0.1862766295671463, | |
| "learning_rate": 0.002002185688491875, | |
| "loss": 1.6697, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.1714174150722374, | |
| "eval_accuracy": 0.592953625065062, | |
| "eval_loss": 1.6812903881072998, | |
| "eval_runtime": 520.916, | |
| "eval_samples_per_second": 29.563, | |
| "eval_steps_per_second": 7.391, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.3015749056358192, | |
| "grad_norm": 0.17118503153324127, | |
| "learning_rate": 0.0018596407868478571, | |
| "loss": 1.6584, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.3015749056358192, | |
| "eval_accuracy": 0.5972079191580658, | |
| "eval_loss": 1.6614623069763184, | |
| "eval_runtime": 520.2098, | |
| "eval_samples_per_second": 29.603, | |
| "eval_steps_per_second": 7.401, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.4317323961994013, | |
| "grad_norm": 0.17990975081920624, | |
| "learning_rate": 0.0017170958852038393, | |
| "loss": 1.6438, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.4317323961994013, | |
| "eval_accuracy": 0.6009242614667835, | |
| "eval_loss": 1.6421809196472168, | |
| "eval_runtime": 520.0082, | |
| "eval_samples_per_second": 29.615, | |
| "eval_steps_per_second": 7.404, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.5618898867629833, | |
| "grad_norm": 0.1535763144493103, | |
| "learning_rate": 0.0015745509835598213, | |
| "loss": 1.6184, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.5618898867629833, | |
| "eval_accuracy": 0.6048871412067893, | |
| "eval_loss": 1.6235625743865967, | |
| "eval_runtime": 518.9862, | |
| "eval_samples_per_second": 29.673, | |
| "eval_steps_per_second": 7.418, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.6920473773265652, | |
| "grad_norm": 0.16722074151039124, | |
| "learning_rate": 0.0014320060819158036, | |
| "loss": 1.6086, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.6920473773265652, | |
| "eval_accuracy": 0.6081556664254611, | |
| "eval_loss": 1.6102288961410522, | |
| "eval_runtime": 519.7651, | |
| "eval_samples_per_second": 29.629, | |
| "eval_steps_per_second": 7.407, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.822204867890147, | |
| "grad_norm": 0.1687261015176773, | |
| "learning_rate": 0.0012894611802717856, | |
| "loss": 1.5882, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.822204867890147, | |
| "eval_accuracy": 0.6113670005458862, | |
| "eval_loss": 1.5938153266906738, | |
| "eval_runtime": 518.0575, | |
| "eval_samples_per_second": 29.726, | |
| "eval_steps_per_second": 7.432, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.9523623584537289, | |
| "grad_norm": 0.17714297771453857, | |
| "learning_rate": 0.0011469162786277678, | |
| "loss": 1.5719, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.9523623584537289, | |
| "eval_accuracy": 0.6148032270759544, | |
| "eval_loss": 1.578608751296997, | |
| "eval_runtime": 518.0664, | |
| "eval_samples_per_second": 29.726, | |
| "eval_steps_per_second": 7.431, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.082519849017311, | |
| "grad_norm": 0.16494958102703094, | |
| "learning_rate": 0.00100437137698375, | |
| "loss": 1.5272, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.082519849017311, | |
| "eval_accuracy": 0.617450838506557, | |
| "eval_loss": 1.5717881917953491, | |
| "eval_runtime": 518.5565, | |
| "eval_samples_per_second": 29.698, | |
| "eval_steps_per_second": 7.424, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.212677339580893, | |
| "grad_norm": 0.14182300865650177, | |
| "learning_rate": 0.0008618264753397321, | |
| "loss": 1.4971, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.212677339580893, | |
| "eval_accuracy": 0.6203893564890632, | |
| "eval_loss": 1.5592997074127197, | |
| "eval_runtime": 517.6299, | |
| "eval_samples_per_second": 29.751, | |
| "eval_steps_per_second": 7.438, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.342834830144475, | |
| "grad_norm": 0.15439514815807343, | |
| "learning_rate": 0.0007192815736957142, | |
| "loss": 1.4893, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.342834830144475, | |
| "eval_accuracy": 0.6227424432849653, | |
| "eval_loss": 1.5474613904953003, | |
| "eval_runtime": 517.7895, | |
| "eval_samples_per_second": 29.742, | |
| "eval_steps_per_second": 7.435, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.4729923207080566, | |
| "grad_norm": 0.1444994956254959, | |
| "learning_rate": 0.0005767366720516963, | |
| "loss": 1.4808, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.4729923207080566, | |
| "eval_accuracy": 0.6250960378819617, | |
| "eval_loss": 1.5381561517715454, | |
| "eval_runtime": 518.2423, | |
| "eval_samples_per_second": 29.716, | |
| "eval_steps_per_second": 7.429, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.6031498112716385, | |
| "grad_norm": 0.14975884556770325, | |
| "learning_rate": 0.00043419177040767847, | |
| "loss": 1.4689, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.6031498112716385, | |
| "eval_accuracy": 0.6274774980640083, | |
| "eval_loss": 1.5274103879928589, | |
| "eval_runtime": 518.577, | |
| "eval_samples_per_second": 29.697, | |
| "eval_steps_per_second": 7.424, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.7333073018352207, | |
| "grad_norm": 0.14258165657520294, | |
| "learning_rate": 0.00029164686876366057, | |
| "loss": 1.4572, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.7333073018352207, | |
| "eval_accuracy": 0.6297954196341293, | |
| "eval_loss": 1.5169227123260498, | |
| "eval_runtime": 518.5892, | |
| "eval_samples_per_second": 29.696, | |
| "eval_steps_per_second": 7.424, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.8634647923988026, | |
| "grad_norm": 0.14571569859981537, | |
| "learning_rate": 0.00014910196711964268, | |
| "loss": 1.4488, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.8634647923988026, | |
| "eval_accuracy": 0.6314827791953892, | |
| "eval_loss": 1.5106253623962402, | |
| "eval_runtime": 519.3798, | |
| "eval_samples_per_second": 29.651, | |
| "eval_steps_per_second": 7.413, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.9936222829623844, | |
| "grad_norm": 0.15030799806118011, | |
| "learning_rate": 6.557065475624823e-06, | |
| "loss": 1.4465, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.9936222829623844, | |
| "eval_accuracy": 0.6325420522781227, | |
| "eval_loss": 1.5053555965423584, | |
| "eval_runtime": 518.7173, | |
| "eval_samples_per_second": 29.689, | |
| "eval_steps_per_second": 7.422, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.9996095275283094, | |
| "step": 11523, | |
| "total_flos": 9.6346733543424e+16, | |
| "train_loss": 1.6814149037051365, | |
| "train_runtime": 28276.4686, | |
| "train_samples_per_second": 6.521, | |
| "train_steps_per_second": 0.408 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 11523, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.6346733543424e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |