{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 366, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0547945205479452, "grad_norm": 1.181526780128479, "learning_rate": 4.736842105263158e-05, "loss": 1.799, "step": 10 }, { "epoch": 0.1095890410958904, "grad_norm": 0.522590160369873, "learning_rate": 0.0001, "loss": 1.2094, "step": 20 }, { "epoch": 0.1643835616438356, "grad_norm": 0.3870140314102173, "learning_rate": 9.979522160511282e-05, "loss": 0.8021, "step": 30 }, { "epoch": 0.2191780821917808, "grad_norm": 0.3272338807582855, "learning_rate": 9.918256378809177e-05, "loss": 0.7125, "step": 40 }, { "epoch": 0.273972602739726, "grad_norm": 0.39106306433677673, "learning_rate": 9.816704491231226e-05, "loss": 0.6694, "step": 50 }, { "epoch": 0.3287671232876712, "grad_norm": 0.5298248529434204, "learning_rate": 9.675698323078865e-05, "loss": 0.6647, "step": 60 }, { "epoch": 0.3835616438356164, "grad_norm": 0.33846038579940796, "learning_rate": 9.496392875023432e-05, "loss": 0.612, "step": 70 }, { "epoch": 0.4383561643835616, "grad_norm": 0.3002408444881439, "learning_rate": 9.280256862338821e-05, "loss": 0.5868, "step": 80 }, { "epoch": 0.4931506849315068, "grad_norm": 0.27812713384628296, "learning_rate": 9.029060684455229e-05, "loss": 0.599, "step": 90 }, { "epoch": 0.547945205479452, "grad_norm": 0.3248140513896942, "learning_rate": 8.744861923377001e-05, "loss": 0.5933, "step": 100 }, { "epoch": 0.6027397260273972, "grad_norm": 0.36872419714927673, "learning_rate": 8.429988489749046e-05, "loss": 0.5952, "step": 110 }, { "epoch": 0.6575342465753424, "grad_norm": 0.3348616659641266, "learning_rate": 8.087019554624595e-05, "loss": 0.5813, "step": 120 }, { "epoch": 0.7123287671232876, "grad_norm": 0.33727309107780457, "learning_rate": 7.718764423124892e-05, "loss": 0.5511, "step": 130 }, { "epoch": 0.7671232876712328, "grad_norm": 0.35852885246276855, "learning_rate": 7.32823952303943e-05, "loss": 0.5837, "step": 140 }, { "epoch": 0.821917808219178, "grad_norm": 0.37122035026550293, "learning_rate": 6.918643696856333e-05, "loss": 0.5604, "step": 150 }, { "epoch": 0.8767123287671232, "grad_norm": 0.3272222578525543, "learning_rate": 6.493331999609131e-05, "loss": 0.5318, "step": 160 }, { "epoch": 0.9315068493150684, "grad_norm": 0.3521849513053894, "learning_rate": 6.055788217165383e-05, "loss": 0.5598, "step": 170 }, { "epoch": 0.9863013698630136, "grad_norm": 0.3393417000770569, "learning_rate": 5.609596330063558e-05, "loss": 0.5626, "step": 180 }, { "epoch": 1.0383561643835617, "grad_norm": 0.29794371128082275, "learning_rate": 5.1584111566417515e-05, "loss": 0.5401, "step": 190 }, { "epoch": 1.093150684931507, "grad_norm": 0.40294867753982544, "learning_rate": 4.705928415924373e-05, "loss": 0.4977, "step": 200 }, { "epoch": 1.1479452054794521, "grad_norm": 0.3701549768447876, "learning_rate": 4.255854455485753e-05, "loss": 0.4893, "step": 210 }, { "epoch": 1.2027397260273973, "grad_norm": 0.41681772470474243, "learning_rate": 3.811875892253853e-05, "loss": 0.509, "step": 220 }, { "epoch": 1.2575342465753425, "grad_norm": 0.47252005338668823, "learning_rate": 3.377629414930396e-05, "loss": 0.4866, "step": 230 }, { "epoch": 1.3123287671232877, "grad_norm": 0.5522735118865967, "learning_rate": 2.9566719953798473e-05, "loss": 0.4788, "step": 240 }, { "epoch": 1.367123287671233, "grad_norm": 0.3663751780986786, "learning_rate": 2.552451752989865e-05, "loss": 0.5067, "step": 250 }, { "epoch": 1.4219178082191781, "grad_norm": 0.3987874388694763, "learning_rate": 2.168279710657149e-05, "loss": 0.4634, "step": 260 }, { "epoch": 1.4767123287671233, "grad_norm": 0.5115519762039185, "learning_rate": 1.8073026737492782e-05, "loss": 0.4912, "step": 270 }, { "epoch": 1.5315068493150685, "grad_norm": 0.5251284241676331, "learning_rate": 1.4724774541946146e-05, "loss": 0.5087, "step": 280 }, { "epoch": 1.5863013698630137, "grad_norm": 0.5367189049720764, "learning_rate": 1.1665466508342877e-05, "loss": 0.462, "step": 290 }, { "epoch": 1.641095890410959, "grad_norm": 0.6373235583305359, "learning_rate": 8.920161844226415e-06, "loss": 0.482, "step": 300 }, { "epoch": 1.6958904109589041, "grad_norm": 0.5205274820327759, "learning_rate": 6.511347712900545e-06, "loss": 0.4816, "step": 310 }, { "epoch": 1.7506849315068493, "grad_norm": 0.42115122079849243, "learning_rate": 4.458755038021029e-06, "loss": 0.4663, "step": 320 }, { "epoch": 1.8054794520547945, "grad_norm": 0.5167638659477234, "learning_rate": 2.7791968849206428e-06, "loss": 0.4938, "step": 330 }, { "epoch": 1.8602739726027397, "grad_norm": 0.544147253036499, "learning_rate": 1.4864307425078327e-06, "loss": 0.4824, "step": 340 }, { "epoch": 1.915068493150685, "grad_norm": 0.4460655748844147, "learning_rate": 5.910458338069191e-07, "loss": 0.4946, "step": 350 }, { "epoch": 1.9698630136986301, "grad_norm": 0.5717807412147522, "learning_rate": 1.0037637819431123e-07, "loss": 0.4736, "step": 360 } ], "logging_steps": 10, "max_steps": 366, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.27377858339115e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }