{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.024242424242424242, "grad_norm": 0.08251222968101501, "learning_rate": 6.666666666666667e-06, "loss": 2.4006, "step": 100 }, { "epoch": 0.048484848484848485, "grad_norm": 0.23244759440422058, "learning_rate": 1.3333333333333333e-05, "loss": 2.3802, "step": 200 }, { "epoch": 0.07272727272727272, "grad_norm": 0.2762894332408905, "learning_rate": 2e-05, "loss": 2.3613, "step": 300 }, { "epoch": 0.09696969696969697, "grad_norm": 0.3933228552341461, "learning_rate": 1.9966289692316944e-05, "loss": 2.2924, "step": 400 }, { "epoch": 0.12121212121212122, "grad_norm": 0.45315420627593994, "learning_rate": 1.9865386046236597e-05, "loss": 2.2223, "step": 500 }, { "epoch": 0.14545454545454545, "grad_norm": 0.4649055302143097, "learning_rate": 1.9697969360350098e-05, "loss": 2.2044, "step": 600 }, { "epoch": 0.1696969696969697, "grad_norm": 0.5842418670654297, "learning_rate": 1.9465168368255946e-05, "loss": 2.1239, "step": 700 }, { "epoch": 0.19393939393939394, "grad_norm": 0.676047146320343, "learning_rate": 1.9168552628568632e-05, "loss": 2.1596, "step": 800 }, { "epoch": 0.21818181818181817, "grad_norm": 0.6532862186431885, "learning_rate": 1.8810121942857848e-05, "loss": 2.134, "step": 900 }, { "epoch": 0.24242424242424243, "grad_norm": 0.7170696258544922, "learning_rate": 1.839229287286327e-05, "loss": 2.1441, "step": 1000 }, { "epoch": 0.26666666666666666, "grad_norm": 0.7590866684913635, "learning_rate": 1.7917882447886585e-05, "loss": 2.0895, "step": 1100 }, { "epoch": 0.2909090909090909, "grad_norm": 0.6926172375679016, "learning_rate": 1.7390089172206594e-05, "loss": 2.0802, "step": 1200 }, { "epoch": 0.3151515151515151, "grad_norm": 0.8891839981079102, "learning_rate": 1.681247146056654e-05, "loss": 2.0769, "step": 1300 }, { "epoch": 0.3393939393939394, "grad_norm": 0.7070772647857666, "learning_rate": 1.6188923647122946e-05, "loss": 2.0446, "step": 1400 }, { "epoch": 0.36363636363636365, "grad_norm": 0.8614781498908997, "learning_rate": 1.552364972960506e-05, "loss": 2.0567, "step": 1500 }, { "epoch": 0.3878787878787879, "grad_norm": 0.7985982894897461, "learning_rate": 1.4821135025703491e-05, "loss": 2.0322, "step": 1600 }, { "epoch": 0.4121212121212121, "grad_norm": 0.8504014611244202, "learning_rate": 1.4086115932782316e-05, "loss": 2.0247, "step": 1700 }, { "epoch": 0.43636363636363634, "grad_norm": 0.8554436564445496, "learning_rate": 1.3323547994796597e-05, "loss": 2.0462, "step": 1800 }, { "epoch": 0.46060606060606063, "grad_norm": 0.8365380764007568, "learning_rate": 1.2538572491710079e-05, "loss": 2.0412, "step": 1900 }, { "epoch": 0.48484848484848486, "grad_norm": 0.9090964794158936, "learning_rate": 1.1736481776669307e-05, "loss": 1.9871, "step": 2000 }, { "epoch": 0.509090909090909, "grad_norm": 0.9732162356376648, "learning_rate": 1.092268359463302e-05, "loss": 2.0424, "step": 2100 }, { "epoch": 0.5333333333333333, "grad_norm": 0.8215560913085938, "learning_rate": 1.01026646230229e-05, "loss": 2.026, "step": 2200 }, { "epoch": 0.5575757575757576, "grad_norm": 0.802616536617279, "learning_rate": 9.281953480206725e-06, "loss": 2.0041, "step": 2300 }, { "epoch": 0.5818181818181818, "grad_norm": 0.9250068068504333, "learning_rate": 8.466083451213145e-06, "loss": 2.0377, "step": 2400 }, { "epoch": 0.6060606060606061, "grad_norm": 1.0361137390136719, "learning_rate": 7.660555181983517e-06, "loss": 1.9966, "step": 2500 }, { "epoch": 0.6303030303030303, "grad_norm": 0.9372274279594421, "learning_rate": 6.870799593678459e-06, "loss": 1.9911, "step": 2600 }, { "epoch": 0.6545454545454545, "grad_norm": 0.8156213164329529, "learning_rate": 6.102141267073207e-06, "loss": 1.9825, "step": 2700 }, { "epoch": 0.6787878787878788, "grad_norm": 1.2848368883132935, "learning_rate": 5.3597625439063685e-06, "loss": 2.0076, "step": 2800 }, { "epoch": 0.703030303030303, "grad_norm": 1.0307776927947998, "learning_rate": 4.648668587212998e-06, "loss": 1.9921, "step": 2900 }, { "epoch": 0.7272727272727273, "grad_norm": 0.89860600233078, "learning_rate": 3.973653636207437e-06, "loss": 1.9687, "step": 3000 }, { "epoch": 0.7515151515151515, "grad_norm": 0.991875410079956, "learning_rate": 3.339268683227499e-06, "loss": 2.0015, "step": 3100 }, { "epoch": 0.7757575757575758, "grad_norm": 1.1908742189407349, "learning_rate": 2.749790790664074e-06, "loss": 1.9698, "step": 3200 }, { "epoch": 0.8, "grad_norm": 1.0616734027862549, "learning_rate": 2.209194254743295e-06, "loss": 2.0068, "step": 3300 }, { "epoch": 0.8242424242424242, "grad_norm": 1.0849283933639526, "learning_rate": 1.7211238105768213e-06, "loss": 2.0146, "step": 3400 }, { "epoch": 0.8484848484848485, "grad_norm": 0.9579031467437744, "learning_rate": 1.2888700591334225e-06, "loss": 2.0373, "step": 3500 }, { "epoch": 0.8727272727272727, "grad_norm": 0.9680696725845337, "learning_rate": 9.153472818047627e-07, "loss": 2.0193, "step": 3600 }, { "epoch": 0.896969696969697, "grad_norm": 0.988714337348938, "learning_rate": 6.030737921409169e-07, "loss": 1.9609, "step": 3700 }, { "epoch": 0.9212121212121213, "grad_norm": 0.9824697971343994, "learning_rate": 3.541549572254488e-07, "loss": 1.9667, "step": 3800 }, { "epoch": 0.9454545454545454, "grad_norm": 0.9895309209823608, "learning_rate": 1.7026900316098217e-07, "loss": 2.0066, "step": 3900 }, { "epoch": 0.9696969696969697, "grad_norm": 0.8761349320411682, "learning_rate": 5.265570036553813e-08, "loss": 1.9828, "step": 4000 }, { "epoch": 0.9939393939393939, "grad_norm": 1.041921854019165, "learning_rate": 2.108004964086474e-09, "loss": 1.9977, "step": 4100 }, { "epoch": 1.0, "step": 4125, "total_flos": 7.49645512704e+16, "train_loss": 2.070154784231475, "train_runtime": 1281.9871, "train_samples_per_second": 6.435, "train_steps_per_second": 3.218 } ], "logging_steps": 100, "max_steps": 4125, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.49645512704e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }