| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9710982658959537, | |
| "eval_steps": 500, | |
| "global_step": 258, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.057803468208092484, | |
| "grad_norm": 0.7586865425109863, | |
| "learning_rate": 9.615384615384616e-06, | |
| "loss": 1.5305, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.11560693641618497, | |
| "grad_norm": 1.5047568082809448, | |
| "learning_rate": 1.923076923076923e-05, | |
| "loss": 1.3586, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17341040462427745, | |
| "grad_norm": 1.0730587244033813, | |
| "learning_rate": 2.8846153846153845e-05, | |
| "loss": 1.3282, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.23121387283236994, | |
| "grad_norm": 1.0204637050628662, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 1.246, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.28901734104046245, | |
| "grad_norm": 0.5301410555839539, | |
| "learning_rate": 4.8076923076923084e-05, | |
| "loss": 1.3034, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.3468208092485549, | |
| "grad_norm": 0.723696768283844, | |
| "learning_rate": 4.996333534627809e-05, | |
| "loss": 1.1816, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4046242774566474, | |
| "grad_norm": 1.3614885807037354, | |
| "learning_rate": 4.981456948708014e-05, | |
| "loss": 1.2341, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.4624277456647399, | |
| "grad_norm": 1.0017260313034058, | |
| "learning_rate": 4.95520920685539e-05, | |
| "loss": 1.2838, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5202312138728323, | |
| "grad_norm": 0.6403581500053406, | |
| "learning_rate": 4.9177105880720173e-05, | |
| "loss": 1.2135, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.5780346820809249, | |
| "grad_norm": 0.5783727765083313, | |
| "learning_rate": 4.869132927957007e-05, | |
| "loss": 1.11, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6358381502890174, | |
| "grad_norm": 0.7453054189682007, | |
| "learning_rate": 4.8096988312782174e-05, | |
| "loss": 1.2103, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.6936416184971098, | |
| "grad_norm": 1.236171007156372, | |
| "learning_rate": 4.73968065189672e-05, | |
| "loss": 1.2226, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7514450867052023, | |
| "grad_norm": 0.44787439703941345, | |
| "learning_rate": 4.6593992447184586e-05, | |
| "loss": 1.1403, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8092485549132948, | |
| "grad_norm": 0.7945877313613892, | |
| "learning_rate": 4.5692224953922266e-05, | |
| "loss": 1.1933, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8670520231213873, | |
| "grad_norm": 1.6053190231323242, | |
| "learning_rate": 4.469563634491554e-05, | |
| "loss": 1.1941, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.9248554913294798, | |
| "grad_norm": 1.0948492288589478, | |
| "learning_rate": 4.360879343905676e-05, | |
| "loss": 1.3349, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9826589595375722, | |
| "grad_norm": 0.726474940776825, | |
| "learning_rate": 4.243667664116956e-05, | |
| "loss": 1.3004, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.0346820809248556, | |
| "grad_norm": 1.4559762477874756, | |
| "learning_rate": 4.118465711954569e-05, | |
| "loss": 1.0116, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.092485549132948, | |
| "grad_norm": 1.5781135559082031, | |
| "learning_rate": 3.985847219282725e-05, | |
| "loss": 0.8764, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.1502890173410405, | |
| "grad_norm": 0.6205704212188721, | |
| "learning_rate": 3.8464199039022605e-05, | |
| "loss": 0.9051, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.208092485549133, | |
| "grad_norm": 1.4496972560882568, | |
| "learning_rate": 3.700822684713349e-05, | |
| "loss": 0.9408, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.2658959537572254, | |
| "grad_norm": 0.4967881143093109, | |
| "learning_rate": 3.5497227539006614e-05, | |
| "loss": 0.7376, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.323699421965318, | |
| "grad_norm": 1.4739594459533691, | |
| "learning_rate": 3.3938125195576e-05, | |
| "loss": 0.9192, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.3815028901734103, | |
| "grad_norm": 1.443954348564148, | |
| "learning_rate": 3.233806432759837e-05, | |
| "loss": 0.7502, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.439306358381503, | |
| "grad_norm": 0.8857870697975159, | |
| "learning_rate": 3.070437713627965e-05, | |
| "loss": 0.7896, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.4971098265895955, | |
| "grad_norm": 0.49113208055496216, | |
| "learning_rate": 2.9044549913819124e-05, | |
| "loss": 0.7826, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.5549132947976878, | |
| "grad_norm": 0.5606523752212524, | |
| "learning_rate": 2.7366188737839026e-05, | |
| "loss": 0.7622, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.6127167630057804, | |
| "grad_norm": 0.5834754705429077, | |
| "learning_rate": 2.5676984616903367e-05, | |
| "loss": 0.6622, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6705202312138727, | |
| "grad_norm": 0.9665216207504272, | |
| "learning_rate": 2.3984678246844677e-05, | |
| "loss": 0.809, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.7283236994219653, | |
| "grad_norm": 0.7581700086593628, | |
| "learning_rate": 2.2297024539401463e-05, | |
| "loss": 0.7095, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7861271676300579, | |
| "grad_norm": 0.8531942367553711, | |
| "learning_rate": 2.0621757085711734e-05, | |
| "loss": 0.8316, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.8439306358381504, | |
| "grad_norm": 1.121618390083313, | |
| "learning_rate": 1.8966552717507364e-05, | |
| "loss": 0.7683, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.9017341040462428, | |
| "grad_norm": 1.0460470914840698, | |
| "learning_rate": 1.7338996328405526e-05, | |
| "loss": 0.7656, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.9595375722543351, | |
| "grad_norm": 1.1561076641082764, | |
| "learning_rate": 1.574654611650214e-05, | |
| "loss": 0.7079, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.0115606936416186, | |
| "grad_norm": 0.6570937037467957, | |
| "learning_rate": 1.4196499407541359e-05, | |
| "loss": 0.7448, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.069364161849711, | |
| "grad_norm": 0.8971176147460938, | |
| "learning_rate": 1.2695959215274816e-05, | |
| "loss": 0.5049, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.1271676300578033, | |
| "grad_norm": 0.7877609133720398, | |
| "learning_rate": 1.125180169224613e-05, | |
| "loss": 0.4581, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.184971098265896, | |
| "grad_norm": 1.099473476409912, | |
| "learning_rate": 9.870644620155877e-06, | |
| "loss": 0.4307, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.2427745664739884, | |
| "grad_norm": 1.1364312171936035, | |
| "learning_rate": 8.558817084198387e-06, | |
| "loss": 0.4858, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.300578034682081, | |
| "grad_norm": 0.7978260517120361, | |
| "learning_rate": 7.3223304703363135e-06, | |
| "loss": 0.4745, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.3583815028901736, | |
| "grad_norm": 0.6734775900840759, | |
| "learning_rate": 6.166850918416406e-06, | |
| "loss": 0.5683, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 2.416184971098266, | |
| "grad_norm": 0.9826372265815735, | |
| "learning_rate": 5.097673357358907e-06, | |
| "loss": 0.466, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.4739884393063583, | |
| "grad_norm": 1.0204384326934814, | |
| "learning_rate": 4.119697241402998e-06, | |
| "loss": 0.4577, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 2.531791907514451, | |
| "grad_norm": 1.038061261177063, | |
| "learning_rate": 3.2374040985957004e-06, | |
| "loss": 0.3862, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.5895953757225434, | |
| "grad_norm": 0.9037131071090698, | |
| "learning_rate": 2.4548369944073004e-06, | |
| "loss": 0.4205, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.647398843930636, | |
| "grad_norm": 0.7115334272384644, | |
| "learning_rate": 1.7755820045802145e-06, | |
| "loss": 0.3581, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.705202312138728, | |
| "grad_norm": 0.8673137426376343, | |
| "learning_rate": 1.2027517821111112e-06, | |
| "loss": 0.4342, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.7630057803468207, | |
| "grad_norm": 0.8103125691413879, | |
| "learning_rate": 7.389712936697129e-07, | |
| "loss": 0.4275, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.820809248554913, | |
| "grad_norm": 0.638612687587738, | |
| "learning_rate": 3.8636579081657577e-07, | |
| "loss": 0.4198, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.878612716763006, | |
| "grad_norm": 0.8524174690246582, | |
| "learning_rate": 1.4655107114101007e-07, | |
| "loss": 0.5151, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.9364161849710984, | |
| "grad_norm": 0.9686955809593201, | |
| "learning_rate": 2.0626073947138668e-08, | |
| "loss": 0.4155, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.9710982658959537, | |
| "step": 258, | |
| "total_flos": 2.017433878246195e+16, | |
| "train_loss": 0.8397066662477892, | |
| "train_runtime": 2578.3607, | |
| "train_samples_per_second": 0.804, | |
| "train_steps_per_second": 0.1 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 258, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.017433878246195e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |