| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 267, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05649717514124294, | |
| "grad_norm": 1.9709218740463257, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 2.0949, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.11299435028248588, | |
| "grad_norm": 0.6447364687919617, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 2.0169, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 0.4998757839202881, | |
| "learning_rate": 1.5555555555555555e-05, | |
| "loss": 1.9899, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.22598870056497175, | |
| "grad_norm": 0.45615148544311523, | |
| "learning_rate": 2.111111111111111e-05, | |
| "loss": 1.9512, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2824858757062147, | |
| "grad_norm": 0.40060025453567505, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.8531, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 0.3707159161567688, | |
| "learning_rate": 2.9994859874633358e-05, | |
| "loss": 1.8622, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.3954802259887006, | |
| "grad_norm": 0.37233778834342957, | |
| "learning_rate": 2.9937073913619926e-05, | |
| "loss": 1.7941, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.4519774011299435, | |
| "grad_norm": 0.47624891996383667, | |
| "learning_rate": 2.981532510892707e-05, | |
| "loss": 1.6613, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 0.4137255847454071, | |
| "learning_rate": 2.963013480762769e-05, | |
| "loss": 1.6513, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.5649717514124294, | |
| "grad_norm": 0.5203472971916199, | |
| "learning_rate": 2.9382296023022895e-05, | |
| "loss": 1.5814, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6214689265536724, | |
| "grad_norm": 0.558957576751709, | |
| "learning_rate": 2.9072870038837266e-05, | |
| "loss": 1.5635, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.5810022950172424, | |
| "learning_rate": 2.8703181864639013e-05, | |
| "loss": 1.4292, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7344632768361582, | |
| "grad_norm": 0.6703647971153259, | |
| "learning_rate": 2.827481456194563e-05, | |
| "loss": 1.377, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.7909604519774012, | |
| "grad_norm": 0.6997694969177246, | |
| "learning_rate": 2.7789602465311384e-05, | |
| "loss": 1.2762, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 0.7786891460418701, | |
| "learning_rate": 2.7249623327425187e-05, | |
| "loss": 1.2512, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.903954802259887, | |
| "grad_norm": 0.9769183993339539, | |
| "learning_rate": 2.6657189421854564e-05, | |
| "loss": 1.1935, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.96045197740113, | |
| "grad_norm": 1.0085625648498535, | |
| "learning_rate": 2.6014837641535285e-05, | |
| "loss": 1.1678, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.0112994350282485, | |
| "grad_norm": 1.1803938150405884, | |
| "learning_rate": 2.5325318635406308e-05, | |
| "loss": 1.0728, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0677966101694916, | |
| "grad_norm": 1.2611010074615479, | |
| "learning_rate": 2.4591585029708772e-05, | |
| "loss": 0.9305, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.1242937853107344, | |
| "grad_norm": 1.2549060583114624, | |
| "learning_rate": 2.3816778784387097e-05, | |
| "loss": 0.9051, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1807909604519775, | |
| "grad_norm": 1.189209222793579, | |
| "learning_rate": 2.3004217738734173e-05, | |
| "loss": 0.857, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.2372881355932204, | |
| "grad_norm": 1.4436615705490112, | |
| "learning_rate": 2.2157381403894126e-05, | |
| "loss": 0.7732, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.2937853107344632, | |
| "grad_norm": 1.2701079845428467, | |
| "learning_rate": 2.1279896063061422e-05, | |
| "loss": 0.7486, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.3502824858757063, | |
| "grad_norm": 1.6101775169372559, | |
| "learning_rate": 2.03755192431795e-05, | |
| "loss": 0.703, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4067796610169492, | |
| "grad_norm": 1.269362211227417, | |
| "learning_rate": 1.9448123624633565e-05, | |
| "loss": 0.6978, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.463276836158192, | |
| "grad_norm": 1.514592170715332, | |
| "learning_rate": 1.8501680457838582e-05, | |
| "loss": 0.6686, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.5197740112994351, | |
| "grad_norm": 1.4410349130630493, | |
| "learning_rate": 1.7540242557735366e-05, | |
| "loss": 0.5763, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.576271186440678, | |
| "grad_norm": 1.8117526769638062, | |
| "learning_rate": 1.6567926949014805e-05, | |
| "loss": 0.5527, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6327683615819208, | |
| "grad_norm": 1.3523154258728027, | |
| "learning_rate": 1.558889723638603e-05, | |
| "loss": 0.5502, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.689265536723164, | |
| "grad_norm": 1.4709467887878418, | |
| "learning_rate": 1.4607345775381906e-05, | |
| "loss": 0.5152, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7457627118644068, | |
| "grad_norm": 1.6748976707458496, | |
| "learning_rate": 1.3627475720048966e-05, | |
| "loss": 0.5193, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.8022598870056497, | |
| "grad_norm": 1.3551208972930908, | |
| "learning_rate": 1.2653483024396535e-05, | |
| "loss": 0.4585, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8587570621468927, | |
| "grad_norm": 1.7820173501968384, | |
| "learning_rate": 1.1689538474677485e-05, | |
| "loss": 0.4549, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.9152542372881356, | |
| "grad_norm": 1.6256532669067383, | |
| "learning_rate": 1.073976982944116e-05, | |
| "loss": 0.4349, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.9717514124293785, | |
| "grad_norm": 1.5240833759307861, | |
| "learning_rate": 9.808244143837603e-06, | |
| "loss": 0.3836, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.022598870056497, | |
| "grad_norm": 1.3894063234329224, | |
| "learning_rate": 8.898950353863e-06, | |
| "loss": 0.3354, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.07909604519774, | |
| "grad_norm": 1.5614291429519653, | |
| "learning_rate": 8.015782195123329e-06, | |
| "loss": 0.3003, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.135593220338983, | |
| "grad_norm": 1.3802332878112793, | |
| "learning_rate": 7.1625215292607685e-06, | |
| "loss": 0.2765, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.1920903954802258, | |
| "grad_norm": 1.4348934888839722, | |
| "learning_rate": 6.3428221494414976e-06, | |
| "loss": 0.2754, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.248587570621469, | |
| "grad_norm": 1.1548937559127808, | |
| "learning_rate": 5.560194134252441e-06, | |
| "loss": 0.2583, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.305084745762712, | |
| "grad_norm": 1.5925812721252441, | |
| "learning_rate": 4.817988817005873e-06, | |
| "loss": 0.2607, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 2.361581920903955, | |
| "grad_norm": 1.2499586343765259, | |
| "learning_rate": 4.119384434815689e-06, | |
| "loss": 0.2518, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.4180790960451977, | |
| "grad_norm": 1.2626211643218994, | |
| "learning_rate": 3.4673725188981083e-06, | |
| "loss": 0.2318, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 2.4745762711864407, | |
| "grad_norm": 1.214537262916565, | |
| "learning_rate": 2.86474508437579e-06, | |
| "loss": 0.2329, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.5310734463276834, | |
| "grad_norm": 1.0615917444229126, | |
| "learning_rate": 2.314082674440402e-06, | |
| "loss": 0.2075, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.5875706214689265, | |
| "grad_norm": 1.1500240564346313, | |
| "learning_rate": 1.817743310070521e-06, | |
| "loss": 0.2267, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.6440677966101696, | |
| "grad_norm": 1.0365818738937378, | |
| "learning_rate": 1.3778523926237797e-06, | |
| "loss": 0.1853, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.7005649717514126, | |
| "grad_norm": 1.3539810180664062, | |
| "learning_rate": 9.962936025419755e-07, | |
| "loss": 0.2178, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.7570621468926553, | |
| "grad_norm": 1.1099207401275635, | |
| "learning_rate": 6.747008331422006e-07, | |
| "loss": 0.2211, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.8135593220338984, | |
| "grad_norm": 1.1119697093963623, | |
| "learning_rate": 4.1445119403485165e-07, | |
| "loss": 0.2054, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.870056497175141, | |
| "grad_norm": 1.1232099533081055, | |
| "learning_rate": 2.1665911412883376e-07, | |
| "loss": 0.2283, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.926553672316384, | |
| "grad_norm": 1.1913937330245972, | |
| "learning_rate": 8.217156947590066e-08, | |
| "loss": 0.1973, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.983050847457627, | |
| "grad_norm": 1.1854861974716187, | |
| "learning_rate": 1.1564456389156486e-08, | |
| "loss": 0.2062, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 267, | |
| "total_flos": 4.7428635473844634e+17, | |
| "train_loss": 0.821970669629422, | |
| "train_runtime": 236.2192, | |
| "train_samples_per_second": 35.967, | |
| "train_steps_per_second": 1.13 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 267, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.7428635473844634e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |