| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 990, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06060606060606061, | |
| "grad_norm": 0.42527449131011963, | |
| "learning_rate": 5.05050505050505e-07, | |
| "loss": 1.3881, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 0.4301815330982208, | |
| "learning_rate": 1.01010101010101e-06, | |
| "loss": 1.4123, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 0.44010022282600403, | |
| "learning_rate": 1.5151515151515152e-06, | |
| "loss": 1.3928, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 0.4670265018939972, | |
| "learning_rate": 2.02020202020202e-06, | |
| "loss": 1.3912, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 0.4875299632549286, | |
| "learning_rate": 2.5252525252525258e-06, | |
| "loss": 1.3781, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.37787771224975586, | |
| "learning_rate": 3.0303030303030305e-06, | |
| "loss": 1.3347, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.42424242424242425, | |
| "grad_norm": 0.370220810174942, | |
| "learning_rate": 3.5353535353535356e-06, | |
| "loss": 1.303, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 0.3872508406639099, | |
| "learning_rate": 4.04040404040404e-06, | |
| "loss": 1.2378, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.4406280815601349, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 1.1575, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 0.5964057445526123, | |
| "learning_rate": 4.999921328558333e-06, | |
| "loss": 1.0205, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.51554936170578, | |
| "learning_rate": 4.990486745229364e-06, | |
| "loss": 0.8522, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.5323845148086548, | |
| "learning_rate": 4.965385884295467e-06, | |
| "loss": 0.7621, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7878787878787878, | |
| "grad_norm": 0.4954449236392975, | |
| "learning_rate": 4.924776641419513e-06, | |
| "loss": 0.6637, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8484848484848485, | |
| "grad_norm": 0.7475800514221191, | |
| "learning_rate": 4.868914466936038e-06, | |
| "loss": 0.6373, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.4374482333660126, | |
| "learning_rate": 4.798150758954164e-06, | |
| "loss": 0.6296, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 0.47152256965637207, | |
| "learning_rate": 4.7129306529060415e-06, | |
| "loss": 0.5948, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0303030303030303, | |
| "grad_norm": 0.5148698687553406, | |
| "learning_rate": 4.613790221445511e-06, | |
| "loss": 0.5775, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 0.375209778547287, | |
| "learning_rate": 4.501353102310901e-06, | |
| "loss": 0.5469, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.1515151515151516, | |
| "grad_norm": 0.4789840877056122, | |
| "learning_rate": 4.376326575364206e-06, | |
| "loss": 0.5645, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 0.5624423027038574, | |
| "learning_rate": 4.239497113483819e-06, | |
| "loss": 0.5436, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 0.46569859981536865, | |
| "learning_rate": 4.091725435297721e-06, | |
| "loss": 0.548, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.44934600591659546, | |
| "learning_rate": 3.933941090877615e-06, | |
| "loss": 0.5509, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.393939393939394, | |
| "grad_norm": 0.41648173332214355, | |
| "learning_rate": 3.767136614452458e-06, | |
| "loss": 0.528, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 0.5370034575462341, | |
| "learning_rate": 3.5923612809233987e-06, | |
| "loss": 0.5338, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 0.4544106125831604, | |
| "learning_rate": 3.410714505454486e-06, | |
| "loss": 0.543, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.5757575757575757, | |
| "grad_norm": 0.4658797085285187, | |
| "learning_rate": 3.2233389276586325e-06, | |
| "loss": 0.522, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 0.4451984465122223, | |
| "learning_rate": 3.0314132238824416e-06, | |
| "loss": 0.5223, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.696969696969697, | |
| "grad_norm": 0.43969258666038513, | |
| "learning_rate": 2.8361446928038298e-06, | |
| "loss": 0.5184, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.7575757575757576, | |
| "grad_norm": 0.4263152778148651, | |
| "learning_rate": 2.6387616609823506e-06, | |
| "loss": 0.5223, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.4743904769420624, | |
| "learning_rate": 2.440505756134732e-06, | |
| "loss": 0.5376, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.878787878787879, | |
| "grad_norm": 0.45021238923072815, | |
| "learning_rate": 2.242624096740164e-06, | |
| "loss": 0.5187, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.9393939393939394, | |
| "grad_norm": 0.45092540979385376, | |
| "learning_rate": 2.046361447106244e-06, | |
| "loss": 0.5207, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.4477311372756958, | |
| "learning_rate": 1.852952387243698e-06, | |
| "loss": 0.5171, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.0606060606060606, | |
| "grad_norm": 0.41556811332702637, | |
| "learning_rate": 1.6636135468049122e-06, | |
| "loss": 0.5086, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.121212121212121, | |
| "grad_norm": 0.4341486990451813, | |
| "learning_rate": 1.479535951938243e-06, | |
| "loss": 0.5067, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.4101578891277313, | |
| "learning_rate": 1.301877533199859e-06, | |
| "loss": 0.5119, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.242424242424242, | |
| "grad_norm": 0.43040916323661804, | |
| "learning_rate": 1.1317558416516696e-06, | |
| "loss": 0.5132, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.303030303030303, | |
| "grad_norm": 0.4996863901615143, | |
| "learning_rate": 9.702410189643838e-07, | |
| "loss": 0.5038, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 0.5682224035263062, | |
| "learning_rate": 8.183490657468687e-07, | |
| "loss": 0.5146, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 0.5291648507118225, | |
| "learning_rate": 6.770354504470575e-07, | |
| "loss": 0.4873, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.484848484848485, | |
| "grad_norm": 0.4898635149002075, | |
| "learning_rate": 5.471890990272666e-07, | |
| "loss": 0.5072, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.4250613749027252, | |
| "learning_rate": 4.2962680322157335e-07, | |
| "loss": 0.5074, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.606060606060606, | |
| "grad_norm": 0.5462311506271362, | |
| "learning_rate": 3.250880825498026e-07, | |
| "loss": 0.5058, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.4744057059288025, | |
| "learning_rate": 2.3423053240837518e-07, | |
| "loss": 0.5264, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.4270976781845093, | |
| "learning_rate": 1.5762568750059604e-07, | |
| "loss": 0.5156, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.787878787878788, | |
| "grad_norm": 0.5914854407310486, | |
| "learning_rate": 9.575542662726756e-08, | |
| "loss": 0.4896, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.8484848484848486, | |
| "grad_norm": 0.4599030315876007, | |
| "learning_rate": 4.9008941453107527e-08, | |
| "loss": 0.5131, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 0.5342442989349365, | |
| "learning_rate": 1.768028831677926e-08, | |
| "loss": 0.5163, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.9696969696969697, | |
| "grad_norm": 0.4838986098766327, | |
| "learning_rate": 1.9665384847583622e-09, | |
| "loss": 0.5083, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 990, | |
| "total_flos": 4.559176483209216e+16, | |
| "train_loss": 0.70023385491034, | |
| "train_runtime": 1270.7835, | |
| "train_samples_per_second": 3.116, | |
| "train_steps_per_second": 0.779 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 990, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.559176483209216e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |