{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.04740670508585058, "eval_steps": 500, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002962919067865661, "grad_norm": 0.0781773254275322, "learning_rate": 0.0004985778620526191, "loss": 1.4945, "step": 50 }, { "epoch": 0.005925838135731322, "grad_norm": 0.08408337086439133, "learning_rate": 0.0004970964683574307, "loss": 1.1653, "step": 100 }, { "epoch": 0.008888757203596984, "grad_norm": 0.10583573579788208, "learning_rate": 0.0004956150746622422, "loss": 1.1169, "step": 150 }, { "epoch": 0.011851676271462644, "grad_norm": 0.08750592172145844, "learning_rate": 0.0004941336809670538, "loss": 1.0972, "step": 200 }, { "epoch": 0.014814595339328307, "grad_norm": 0.07067917287349701, "learning_rate": 0.0004926522872718654, "loss": 1.1012, "step": 250 }, { "epoch": 0.017777514407193967, "grad_norm": 0.12936587631702423, "learning_rate": 0.0004911708935766769, "loss": 1.0908, "step": 300 }, { "epoch": 0.02074043347505963, "grad_norm": 0.09118826687335968, "learning_rate": 0.0004896894998814885, "loss": 1.0773, "step": 350 }, { "epoch": 0.02370335254292529, "grad_norm": 0.08850109577178955, "learning_rate": 0.00048820810618630005, "loss": 1.1012, "step": 400 }, { "epoch": 0.02666627161079095, "grad_norm": 0.07888604700565338, "learning_rate": 0.00048672671249111167, "loss": 1.1343, "step": 450 }, { "epoch": 0.029629190678656613, "grad_norm": 0.0906878113746643, "learning_rate": 0.00048524531879592323, "loss": 1.1184, "step": 500 }, { "epoch": 0.032592109746522276, "grad_norm": 0.07720430195331573, "learning_rate": 0.0004837639251007348, "loss": 1.0968, "step": 550 }, { "epoch": 0.035555028814387934, "grad_norm": 0.0831717997789383, "learning_rate": 0.00048228253140554636, "loss": 1.0983, "step": 600 }, { "epoch": 0.03851794788225359, "grad_norm": 0.09815791249275208, "learning_rate": 0.0004808011377103579, "loss": 1.1235, "step": 650 }, { "epoch": 0.04148086695011926, "grad_norm": 0.07672577351331711, "learning_rate": 0.0004793197440151695, "loss": 1.0744, "step": 700 }, { "epoch": 0.04444378601798492, "grad_norm": 0.08625241369009018, "learning_rate": 0.0004778679781938848, "loss": 1.0897, "step": 750 }, { "epoch": 0.04740670508585058, "grad_norm": 0.10188218951225281, "learning_rate": 0.0004763865844986964, "loss": 1.0737, "step": 800 } ], "logging_steps": 50, "max_steps": 16876, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.4844182233088e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }