{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9966329966329966, "eval_steps": 500, "global_step": 185, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0053872053872053875, "grad_norm": 0.0002994377864524722, "learning_rate": 1.0526315789473684e-05, "loss": 0.8054, "step": 1 }, { "epoch": 0.05387205387205387, "grad_norm": 0.0027586156502366066, "learning_rate": 0.00010526315789473685, "loss": 1.4791, "step": 10 }, { "epoch": 0.10774410774410774, "grad_norm": 0.0025585689581930637, "learning_rate": 0.00019879518072289158, "loss": 1.3673, "step": 20 }, { "epoch": 0.16161616161616163, "grad_norm": 0.003544808831065893, "learning_rate": 0.00018674698795180723, "loss": 1.2927, "step": 30 }, { "epoch": 0.21548821548821548, "grad_norm": 0.004599341191351414, "learning_rate": 0.0001746987951807229, "loss": 1.0129, "step": 40 }, { "epoch": 0.26936026936026936, "grad_norm": 0.002882454078644514, "learning_rate": 0.00016265060240963855, "loss": 0.9945, "step": 50 }, { "epoch": 0.32323232323232326, "grad_norm": 0.0020832906011492014, "learning_rate": 0.00015060240963855423, "loss": 0.9074, "step": 60 }, { "epoch": 0.3771043771043771, "grad_norm": 0.0012261488009244204, "learning_rate": 0.00013855421686746988, "loss": 0.8895, "step": 70 }, { "epoch": 0.43097643097643096, "grad_norm": 0.0009639089112170041, "learning_rate": 0.00012650602409638556, "loss": 0.9137, "step": 80 }, { "epoch": 0.48484848484848486, "grad_norm": 0.0008281469927169383, "learning_rate": 0.0001144578313253012, "loss": 0.8956, "step": 90 }, { "epoch": 0.5387205387205387, "grad_norm": 0.002506977878510952, "learning_rate": 0.00010240963855421688, "loss": 0.9406, "step": 100 }, { "epoch": 0.5925925925925926, "grad_norm": 0.0017301805783063173, "learning_rate": 9.036144578313253e-05, "loss": 0.8768, "step": 110 }, { "epoch": 0.6464646464646465, "grad_norm": 0.0010177810909226537, "learning_rate": 7.83132530120482e-05, "loss": 0.8657, "step": 120 }, { "epoch": 0.7003367003367004, "grad_norm": 0.0009739714441820979, "learning_rate": 6.626506024096386e-05, "loss": 0.891, "step": 130 }, { "epoch": 0.7542087542087542, "grad_norm": 0.0008057655650191009, "learning_rate": 5.4216867469879516e-05, "loss": 0.8853, "step": 140 }, { "epoch": 0.8080808080808081, "grad_norm": 0.0023450921289622784, "learning_rate": 4.2168674698795186e-05, "loss": 0.9276, "step": 150 }, { "epoch": 0.8619528619528619, "grad_norm": 0.0011686653597280383, "learning_rate": 3.012048192771085e-05, "loss": 0.8676, "step": 160 }, { "epoch": 0.9158249158249159, "grad_norm": 0.00091711146524176, "learning_rate": 1.8072289156626505e-05, "loss": 0.8601, "step": 170 }, { "epoch": 0.9696969696969697, "grad_norm": 0.0008369652787223458, "learning_rate": 6.024096385542169e-06, "loss": 0.885, "step": 180 } ], "logging_steps": 10, "max_steps": 185, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.985763285898625e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }