{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 295, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.033927056827820185, "grad_norm": 0.5479967594146729, "learning_rate": 0.0001977324263038549, "loss": 1.8868, "step": 10 }, { "epoch": 0.06785411365564037, "grad_norm": 0.224782794713974, "learning_rate": 0.00019546485260770976, "loss": 1.2591, "step": 20 }, { "epoch": 0.10178117048346055, "grad_norm": 0.24415747821331024, "learning_rate": 0.00019319727891156462, "loss": 1.0807, "step": 30 }, { "epoch": 0.13570822731128074, "grad_norm": 0.22518040239810944, "learning_rate": 0.0001909297052154195, "loss": 1.0525, "step": 40 }, { "epoch": 0.16963528413910092, "grad_norm": 0.2071247398853302, "learning_rate": 0.0001886621315192744, "loss": 1.0399, "step": 50 }, { "epoch": 0.2035623409669211, "grad_norm": 0.21717332303524017, "learning_rate": 0.00018639455782312926, "loss": 0.9676, "step": 60 }, { "epoch": 0.23748939779474132, "grad_norm": 0.23291213810443878, "learning_rate": 0.00018412698412698412, "loss": 0.9593, "step": 70 }, { "epoch": 0.2714164546225615, "grad_norm": 0.24413667619228363, "learning_rate": 0.000181859410430839, "loss": 0.9712, "step": 80 }, { "epoch": 0.3053435114503817, "grad_norm": 0.24343140423297882, "learning_rate": 0.0001795918367346939, "loss": 0.9398, "step": 90 }, { "epoch": 0.33927056827820185, "grad_norm": 0.2566365897655487, "learning_rate": 0.00017732426303854876, "loss": 0.9026, "step": 100 }, { "epoch": 0.37319762510602206, "grad_norm": 0.2497139573097229, "learning_rate": 0.00017505668934240365, "loss": 0.8716, "step": 110 }, { "epoch": 0.4071246819338422, "grad_norm": 0.2669306993484497, "learning_rate": 0.0001727891156462585, "loss": 0.8351, "step": 120 }, { "epoch": 0.4410517387616624, "grad_norm": 0.27545222640037537, "learning_rate": 0.0001705215419501134, "loss": 0.8784, "step": 130 }, { "epoch": 0.47497879558948264, "grad_norm": 0.2573504149913788, "learning_rate": 0.00016825396825396826, "loss": 0.8346, "step": 140 }, { "epoch": 0.5089058524173028, "grad_norm": 0.2542634904384613, "learning_rate": 0.00016598639455782315, "loss": 0.8113, "step": 150 }, { "epoch": 0.542832909245123, "grad_norm": 0.314864844083786, "learning_rate": 0.000163718820861678, "loss": 0.8093, "step": 160 }, { "epoch": 0.5767599660729432, "grad_norm": 0.2655967175960541, "learning_rate": 0.00016145124716553287, "loss": 0.8426, "step": 170 }, { "epoch": 0.6106870229007634, "grad_norm": 0.27555230259895325, "learning_rate": 0.00015918367346938776, "loss": 0.8129, "step": 180 }, { "epoch": 0.6446140797285835, "grad_norm": 0.28799182176589966, "learning_rate": 0.00015691609977324265, "loss": 0.7833, "step": 190 }, { "epoch": 0.6785411365564037, "grad_norm": 0.27187833189964294, "learning_rate": 0.00015464852607709753, "loss": 0.7795, "step": 200 }, { "epoch": 0.712468193384224, "grad_norm": 0.28134599328041077, "learning_rate": 0.00015238095238095237, "loss": 0.8008, "step": 210 }, { "epoch": 0.7463952502120441, "grad_norm": 0.29679593443870544, "learning_rate": 0.00015011337868480726, "loss": 0.7823, "step": 220 }, { "epoch": 0.7803223070398643, "grad_norm": 0.30081549286842346, "learning_rate": 0.00014784580498866215, "loss": 0.7772, "step": 230 }, { "epoch": 0.8142493638676844, "grad_norm": 0.28066059947013855, "learning_rate": 0.000145578231292517, "loss": 0.7776, "step": 240 }, { "epoch": 0.8481764206955047, "grad_norm": 0.2732291519641876, "learning_rate": 0.0001433106575963719, "loss": 0.7719, "step": 250 }, { "epoch": 0.8821034775233249, "grad_norm": 0.2929159104824066, "learning_rate": 0.00014104308390022676, "loss": 0.778, "step": 260 }, { "epoch": 0.916030534351145, "grad_norm": 0.29763197898864746, "learning_rate": 0.00013877551020408165, "loss": 0.8044, "step": 270 }, { "epoch": 0.9499575911789653, "grad_norm": 0.28522127866744995, "learning_rate": 0.0001365079365079365, "loss": 0.7877, "step": 280 }, { "epoch": 0.9838846480067854, "grad_norm": 0.3052780032157898, "learning_rate": 0.0001342403628117914, "loss": 0.7463, "step": 290 } ], "logging_steps": 10, "max_steps": 882, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.3630672985299354e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }