| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9975550122249389, | |
| "eval_steps": 500, | |
| "global_step": 204, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004889975550122249, | |
| "grad_norm": 0.36700921478372245, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 1.8349, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02444987775061125, | |
| "grad_norm": 0.3892962682403987, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 1.7937, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0488997555012225, | |
| "grad_norm": 0.21809103204571412, | |
| "learning_rate": 9.523809523809524e-05, | |
| "loss": 1.7603, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.07334963325183375, | |
| "grad_norm": 0.1763235766556296, | |
| "learning_rate": 0.00014285714285714287, | |
| "loss": 1.6895, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.097799511002445, | |
| "grad_norm": 0.1617556242623244, | |
| "learning_rate": 0.00019047619047619048, | |
| "loss": 1.6347, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12224938875305623, | |
| "grad_norm": 0.15643068252772752, | |
| "learning_rate": 0.00019976432316860067, | |
| "loss": 1.556, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1466992665036675, | |
| "grad_norm": 0.15134695775897136, | |
| "learning_rate": 0.00019880878960910772, | |
| "loss": 1.5133, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17114914425427874, | |
| "grad_norm": 0.11939382176274925, | |
| "learning_rate": 0.00019712569994658315, | |
| "loss": 1.4775, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.19559902200489, | |
| "grad_norm": 0.10829777367965486, | |
| "learning_rate": 0.0001947274472298717, | |
| "loss": 1.46, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2200488997555012, | |
| "grad_norm": 0.13772866457253763, | |
| "learning_rate": 0.0001916316904487005, | |
| "loss": 1.4135, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.24449877750611246, | |
| "grad_norm": 0.09822827366409893, | |
| "learning_rate": 0.00018786122450571485, | |
| "loss": 1.4417, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.26894865525672373, | |
| "grad_norm": 0.11587926176152005, | |
| "learning_rate": 0.00018344381237138472, | |
| "loss": 1.3964, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.293398533007335, | |
| "grad_norm": 0.11240730186157354, | |
| "learning_rate": 0.00017841198065767107, | |
| "loss": 1.3837, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.31784841075794623, | |
| "grad_norm": 0.1128346094848023, | |
| "learning_rate": 0.00017280278011569847, | |
| "loss": 1.3633, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3422982885085575, | |
| "grad_norm": 0.09015460553796734, | |
| "learning_rate": 0.00016665751282095634, | |
| "loss": 1.3806, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.36674816625916873, | |
| "grad_norm": 0.10347143235019492, | |
| "learning_rate": 0.00016002142805483685, | |
| "loss": 1.3788, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.39119804400978, | |
| "grad_norm": 0.6669561111631297, | |
| "learning_rate": 0.0001529433891218185, | |
| "loss": 1.3851, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4156479217603912, | |
| "grad_norm": 0.09186122563595991, | |
| "learning_rate": 0.0001454755135556106, | |
| "loss": 1.3688, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4400977995110024, | |
| "grad_norm": 0.09496253504589665, | |
| "learning_rate": 0.00013767278936351854, | |
| "loss": 1.3288, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.46454767726161367, | |
| "grad_norm": 0.0934552174596641, | |
| "learning_rate": 0.00012959267013472892, | |
| "loss": 1.3816, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4889975550122249, | |
| "grad_norm": 0.10547587865492095, | |
| "learning_rate": 0.00012129465199384157, | |
| "loss": 1.3508, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5134474327628362, | |
| "grad_norm": 0.09760937352012419, | |
| "learning_rate": 0.00011283983551465511, | |
| "loss": 1.3434, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5378973105134475, | |
| "grad_norm": 0.09910476129769094, | |
| "learning_rate": 0.00010429047581995546, | |
| "loss": 1.337, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5623471882640587, | |
| "grad_norm": 0.09961470132705542, | |
| "learning_rate": 9.570952418004455e-05, | |
| "loss": 1.3663, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.58679706601467, | |
| "grad_norm": 0.09739741126425534, | |
| "learning_rate": 8.71601644853449e-05, | |
| "loss": 1.3389, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6112469437652812, | |
| "grad_norm": 0.10283288071724549, | |
| "learning_rate": 7.870534800615845e-05, | |
| "loss": 1.3369, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6356968215158925, | |
| "grad_norm": 0.10097594818776578, | |
| "learning_rate": 7.040732986527108e-05, | |
| "loss": 1.3555, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6601466992665037, | |
| "grad_norm": 0.09722568422567038, | |
| "learning_rate": 6.232721063648148e-05, | |
| "loss": 1.3537, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.684596577017115, | |
| "grad_norm": 0.09586006955336507, | |
| "learning_rate": 5.452448644438946e-05, | |
| "loss": 1.3479, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7090464547677262, | |
| "grad_norm": 0.09299592243784432, | |
| "learning_rate": 4.7056610878181486e-05, | |
| "loss": 1.3585, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7334963325183375, | |
| "grad_norm": 0.10069734955186067, | |
| "learning_rate": 3.997857194516319e-05, | |
| "loss": 1.36, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7579462102689487, | |
| "grad_norm": 0.09194306954899185, | |
| "learning_rate": 3.334248717904368e-05, | |
| "loss": 1.3571, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.78239608801956, | |
| "grad_norm": 0.0926753184350128, | |
| "learning_rate": 2.719721988430153e-05, | |
| "loss": 1.3312, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8068459657701712, | |
| "grad_norm": 0.09878832937752359, | |
| "learning_rate": 2.1588019342328968e-05, | |
| "loss": 1.3208, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8312958435207825, | |
| "grad_norm": 0.0960429927663343, | |
| "learning_rate": 1.6556187628615273e-05, | |
| "loss": 1.3149, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8557457212713936, | |
| "grad_norm": 0.0899273808312634, | |
| "learning_rate": 1.2138775494285182e-05, | |
| "loss": 1.3296, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8801955990220048, | |
| "grad_norm": 0.09384612891193284, | |
| "learning_rate": 8.368309551299536e-06, | |
| "loss": 1.3378, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9046454767726161, | |
| "grad_norm": 0.11142899089067898, | |
| "learning_rate": 5.272552770128314e-06, | |
| "loss": 1.3591, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9290953545232273, | |
| "grad_norm": 0.09142510348253172, | |
| "learning_rate": 2.8743000534168675e-06, | |
| "loss": 1.3623, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9535452322738386, | |
| "grad_norm": 0.09320844556768892, | |
| "learning_rate": 1.1912103908922945e-06, | |
| "loss": 1.3553, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9779951100244498, | |
| "grad_norm": 0.09161534630758739, | |
| "learning_rate": 2.3567683139936735e-07, | |
| "loss": 1.3407, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9975550122249389, | |
| "eval_loss": 1.3294435739517212, | |
| "eval_runtime": 1.8191, | |
| "eval_samples_per_second": 3.848, | |
| "eval_steps_per_second": 0.55, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9975550122249389, | |
| "step": 204, | |
| "total_flos": 2562962409652224.0, | |
| "train_loss": 1.4071934585477792, | |
| "train_runtime": 4285.5681, | |
| "train_samples_per_second": 3.053, | |
| "train_steps_per_second": 0.048 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 204, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2562962409652224.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |