| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.04740670508585058, | |
| "eval_steps": 500, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002962919067865661, | |
| "grad_norm": 0.0781773254275322, | |
| "learning_rate": 0.0004985778620526191, | |
| "loss": 1.4945, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.005925838135731322, | |
| "grad_norm": 0.08408337086439133, | |
| "learning_rate": 0.0004970964683574307, | |
| "loss": 1.1653, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008888757203596984, | |
| "grad_norm": 0.10583573579788208, | |
| "learning_rate": 0.0004956150746622422, | |
| "loss": 1.1169, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.011851676271462644, | |
| "grad_norm": 0.08750592172145844, | |
| "learning_rate": 0.0004941336809670538, | |
| "loss": 1.0972, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.014814595339328307, | |
| "grad_norm": 0.07067917287349701, | |
| "learning_rate": 0.0004926522872718654, | |
| "loss": 1.1012, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.017777514407193967, | |
| "grad_norm": 0.12936587631702423, | |
| "learning_rate": 0.0004911708935766769, | |
| "loss": 1.0908, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.02074043347505963, | |
| "grad_norm": 0.09118826687335968, | |
| "learning_rate": 0.0004896894998814885, | |
| "loss": 1.0773, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.02370335254292529, | |
| "grad_norm": 0.08850109577178955, | |
| "learning_rate": 0.00048820810618630005, | |
| "loss": 1.1012, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.02666627161079095, | |
| "grad_norm": 0.07888604700565338, | |
| "learning_rate": 0.00048672671249111167, | |
| "loss": 1.1343, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.029629190678656613, | |
| "grad_norm": 0.0906878113746643, | |
| "learning_rate": 0.00048524531879592323, | |
| "loss": 1.1184, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.032592109746522276, | |
| "grad_norm": 0.07720430195331573, | |
| "learning_rate": 0.0004837639251007348, | |
| "loss": 1.0968, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.035555028814387934, | |
| "grad_norm": 0.0831717997789383, | |
| "learning_rate": 0.00048228253140554636, | |
| "loss": 1.0983, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03851794788225359, | |
| "grad_norm": 0.09815791249275208, | |
| "learning_rate": 0.0004808011377103579, | |
| "loss": 1.1235, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.04148086695011926, | |
| "grad_norm": 0.07672577351331711, | |
| "learning_rate": 0.0004793197440151695, | |
| "loss": 1.0744, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.04444378601798492, | |
| "grad_norm": 0.08625241369009018, | |
| "learning_rate": 0.0004778679781938848, | |
| "loss": 1.0897, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04740670508585058, | |
| "grad_norm": 0.10188218951225281, | |
| "learning_rate": 0.0004763865844986964, | |
| "loss": 1.0737, | |
| "step": 800 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 16876, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.4844182233088e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |