| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9966329966329966, |
| "eval_steps": 500, |
| "global_step": 185, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0053872053872053875, |
| "grad_norm": 0.0002994377864524722, |
| "learning_rate": 1.0526315789473684e-05, |
| "loss": 0.8054, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.05387205387205387, |
| "grad_norm": 0.0027586156502366066, |
| "learning_rate": 0.00010526315789473685, |
| "loss": 1.4791, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10774410774410774, |
| "grad_norm": 0.0025585689581930637, |
| "learning_rate": 0.00019879518072289158, |
| "loss": 1.3673, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.16161616161616163, |
| "grad_norm": 0.003544808831065893, |
| "learning_rate": 0.00018674698795180723, |
| "loss": 1.2927, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.21548821548821548, |
| "grad_norm": 0.004599341191351414, |
| "learning_rate": 0.0001746987951807229, |
| "loss": 1.0129, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.26936026936026936, |
| "grad_norm": 0.002882454078644514, |
| "learning_rate": 0.00016265060240963855, |
| "loss": 0.9945, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.32323232323232326, |
| "grad_norm": 0.0020832906011492014, |
| "learning_rate": 0.00015060240963855423, |
| "loss": 0.9074, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3771043771043771, |
| "grad_norm": 0.0012261488009244204, |
| "learning_rate": 0.00013855421686746988, |
| "loss": 0.8895, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.43097643097643096, |
| "grad_norm": 0.0009639089112170041, |
| "learning_rate": 0.00012650602409638556, |
| "loss": 0.9137, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 0.0008281469927169383, |
| "learning_rate": 0.0001144578313253012, |
| "loss": 0.8956, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5387205387205387, |
| "grad_norm": 0.002506977878510952, |
| "learning_rate": 0.00010240963855421688, |
| "loss": 0.9406, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.0017301805783063173, |
| "learning_rate": 9.036144578313253e-05, |
| "loss": 0.8768, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6464646464646465, |
| "grad_norm": 0.0010177810909226537, |
| "learning_rate": 7.83132530120482e-05, |
| "loss": 0.8657, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7003367003367004, |
| "grad_norm": 0.0009739714441820979, |
| "learning_rate": 6.626506024096386e-05, |
| "loss": 0.891, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7542087542087542, |
| "grad_norm": 0.0008057655650191009, |
| "learning_rate": 5.4216867469879516e-05, |
| "loss": 0.8853, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 0.0023450921289622784, |
| "learning_rate": 4.2168674698795186e-05, |
| "loss": 0.9276, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8619528619528619, |
| "grad_norm": 0.0011686653597280383, |
| "learning_rate": 3.012048192771085e-05, |
| "loss": 0.8676, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9158249158249159, |
| "grad_norm": 0.00091711146524176, |
| "learning_rate": 1.8072289156626505e-05, |
| "loss": 0.8601, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9696969696969697, |
| "grad_norm": 0.0008369652787223458, |
| "learning_rate": 6.024096385542169e-06, |
| "loss": 0.885, |
| "step": 180 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 185, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.985763285898625e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|