| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4125, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.024242424242424242, | |
| "grad_norm": 0.08251222968101501, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.4006, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.048484848484848485, | |
| "grad_norm": 0.23244759440422058, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.3802, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07272727272727272, | |
| "grad_norm": 0.2762894332408905, | |
| "learning_rate": 2e-05, | |
| "loss": 2.3613, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09696969696969697, | |
| "grad_norm": 0.3933228552341461, | |
| "learning_rate": 1.9966289692316944e-05, | |
| "loss": 2.2924, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 0.45315420627593994, | |
| "learning_rate": 1.9865386046236597e-05, | |
| "loss": 2.2223, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14545454545454545, | |
| "grad_norm": 0.4649055302143097, | |
| "learning_rate": 1.9697969360350098e-05, | |
| "loss": 2.2044, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1696969696969697, | |
| "grad_norm": 0.5842418670654297, | |
| "learning_rate": 1.9465168368255946e-05, | |
| "loss": 2.1239, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.19393939393939394, | |
| "grad_norm": 0.676047146320343, | |
| "learning_rate": 1.9168552628568632e-05, | |
| "loss": 2.1596, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21818181818181817, | |
| "grad_norm": 0.6532862186431885, | |
| "learning_rate": 1.8810121942857848e-05, | |
| "loss": 2.134, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 0.7170696258544922, | |
| "learning_rate": 1.839229287286327e-05, | |
| "loss": 2.1441, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 0.7590866684913635, | |
| "learning_rate": 1.7917882447886585e-05, | |
| "loss": 2.0895, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2909090909090909, | |
| "grad_norm": 0.6926172375679016, | |
| "learning_rate": 1.7390089172206594e-05, | |
| "loss": 2.0802, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3151515151515151, | |
| "grad_norm": 0.8891839981079102, | |
| "learning_rate": 1.681247146056654e-05, | |
| "loss": 2.0769, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3393939393939394, | |
| "grad_norm": 0.7070772647857666, | |
| "learning_rate": 1.6188923647122946e-05, | |
| "loss": 2.0446, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.8614781498908997, | |
| "learning_rate": 1.552364972960506e-05, | |
| "loss": 2.0567, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3878787878787879, | |
| "grad_norm": 0.7985982894897461, | |
| "learning_rate": 1.4821135025703491e-05, | |
| "loss": 2.0322, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4121212121212121, | |
| "grad_norm": 0.8504014611244202, | |
| "learning_rate": 1.4086115932782316e-05, | |
| "loss": 2.0247, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.43636363636363634, | |
| "grad_norm": 0.8554436564445496, | |
| "learning_rate": 1.3323547994796597e-05, | |
| "loss": 2.0462, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.46060606060606063, | |
| "grad_norm": 0.8365380764007568, | |
| "learning_rate": 1.2538572491710079e-05, | |
| "loss": 2.0412, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 0.9090964794158936, | |
| "learning_rate": 1.1736481776669307e-05, | |
| "loss": 1.9871, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.509090909090909, | |
| "grad_norm": 0.9732162356376648, | |
| "learning_rate": 1.092268359463302e-05, | |
| "loss": 2.0424, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.8215560913085938, | |
| "learning_rate": 1.01026646230229e-05, | |
| "loss": 2.026, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5575757575757576, | |
| "grad_norm": 0.802616536617279, | |
| "learning_rate": 9.281953480206725e-06, | |
| "loss": 2.0041, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5818181818181818, | |
| "grad_norm": 0.9250068068504333, | |
| "learning_rate": 8.466083451213145e-06, | |
| "loss": 2.0377, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 1.0361137390136719, | |
| "learning_rate": 7.660555181983517e-06, | |
| "loss": 1.9966, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6303030303030303, | |
| "grad_norm": 0.9372274279594421, | |
| "learning_rate": 6.870799593678459e-06, | |
| "loss": 1.9911, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6545454545454545, | |
| "grad_norm": 0.8156213164329529, | |
| "learning_rate": 6.102141267073207e-06, | |
| "loss": 1.9825, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.6787878787878788, | |
| "grad_norm": 1.2848368883132935, | |
| "learning_rate": 5.3597625439063685e-06, | |
| "loss": 2.0076, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.703030303030303, | |
| "grad_norm": 1.0307776927947998, | |
| "learning_rate": 4.648668587212998e-06, | |
| "loss": 1.9921, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.89860600233078, | |
| "learning_rate": 3.973653636207437e-06, | |
| "loss": 1.9687, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7515151515151515, | |
| "grad_norm": 0.991875410079956, | |
| "learning_rate": 3.339268683227499e-06, | |
| "loss": 2.0015, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7757575757575758, | |
| "grad_norm": 1.1908742189407349, | |
| "learning_rate": 2.749790790664074e-06, | |
| "loss": 1.9698, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.0616734027862549, | |
| "learning_rate": 2.209194254743295e-06, | |
| "loss": 2.0068, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.8242424242424242, | |
| "grad_norm": 1.0849283933639526, | |
| "learning_rate": 1.7211238105768213e-06, | |
| "loss": 2.0146, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8484848484848485, | |
| "grad_norm": 0.9579031467437744, | |
| "learning_rate": 1.2888700591334225e-06, | |
| "loss": 2.0373, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8727272727272727, | |
| "grad_norm": 0.9680696725845337, | |
| "learning_rate": 9.153472818047627e-07, | |
| "loss": 2.0193, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.896969696969697, | |
| "grad_norm": 0.988714337348938, | |
| "learning_rate": 6.030737921409169e-07, | |
| "loss": 1.9609, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.9212121212121213, | |
| "grad_norm": 0.9824697971343994, | |
| "learning_rate": 3.541549572254488e-07, | |
| "loss": 1.9667, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9454545454545454, | |
| "grad_norm": 0.9895309209823608, | |
| "learning_rate": 1.7026900316098217e-07, | |
| "loss": 2.0066, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 0.8761349320411682, | |
| "learning_rate": 5.265570036553813e-08, | |
| "loss": 1.9828, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9939393939393939, | |
| "grad_norm": 1.041921854019165, | |
| "learning_rate": 2.108004964086474e-09, | |
| "loss": 1.9977, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 4125, | |
| "total_flos": 7.49645512704e+16, | |
| "train_loss": 2.070154784231475, | |
| "train_runtime": 1281.9871, | |
| "train_samples_per_second": 6.435, | |
| "train_steps_per_second": 3.218 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 4125, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.49645512704e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |