| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 96, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.9785, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.0381, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2e-05, | |
| "loss": 0.5466, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.999429490929718e-05, | |
| "loss": 2.5332, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9977186146800707e-05, | |
| "loss": 0.4666, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.994869323391895e-05, | |
| "loss": 0.2064, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.990884868158239e-05, | |
| "loss": 0.2202, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.985769795314804e-05, | |
| "loss": 0.1354, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.9795299412524948e-05, | |
| "loss": 0.2023, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9721724257579907e-05, | |
| "loss": 0.1093, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.963705643889941e-05, | |
| "loss": 0.126, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.954139256400049e-05, | |
| "loss": 0.1307, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.9434841787099804e-05, | |
| "loss": 0.1108, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.9317525684566686e-05, | |
| "loss": 0.1157, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.918957811620231e-05, | |
| "loss": 0.1541, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.9051145072503216e-05, | |
| "loss": 0.1068, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.8902384508083518e-05, | |
| "loss": 0.1263, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.8743466161445823e-05, | |
| "loss": 0.1382, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.857457136130651e-05, | |
| "loss": 0.1142, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.839589281969639e-05, | |
| "loss": 0.1077, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.8207634412072765e-05, | |
| "loss": 0.116, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.8010010944693846e-05, | |
| "loss": 0.1183, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.780324790952092e-05, | |
| "loss": 0.1205, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.758758122692791e-05, | |
| "loss": 0.1078, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.7363256976511972e-05, | |
| "loss": 0.1143, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 1.7130531116312202e-05, | |
| "loss": 0.1083, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.688966919075687e-05, | |
| "loss": 0.1096, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 1.6640946027672395e-05, | |
| "loss": 0.0986, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.6384645424699835e-05, | |
| "loss": 0.11, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.612105982547663e-05, | |
| "loss": 0.0924, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.5850489985953076e-05, | |
| "loss": 0.1262, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.5573244631224364e-05, | |
| "loss": 0.0916, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5289640103269626e-05, | |
| "loss": 0.0937, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.1017, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.4704654806027558e-05, | |
| "loss": 0.0928, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.4403941515576344e-05, | |
| "loss": 0.1088, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.4098203247965876e-05, | |
| "loss": 0.1034, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.3787788856105762e-05, | |
| "loss": 0.1024, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.3473052528448203e-05, | |
| "loss": 0.0874, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.3154353384852559e-05, | |
| "loss": 0.097, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.283205506682304e-05, | |
| "loss": 0.0948, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 1.2506525322587207e-05, | |
| "loss": 0.0788, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.2178135587488515e-05, | |
| "loss": 0.087, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 1.1847260560171895e-05, | |
| "loss": 0.0866, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.1514277775045768e-05, | |
| "loss": 0.0992, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.1179567171508463e-05, | |
| "loss": 0.0916, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0843510660430447e-05, | |
| "loss": 0.0786, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.0506491688387128e-05, | |
| "loss": 0.0915, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.0168894800139311e-05, | |
| "loss": 0.0815, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 9.83110519986069e-06, | |
| "loss": 0.0762, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 9.493508311612874e-06, | |
| "loss": 0.0908, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 9.156489339569555e-06, | |
| "loss": 0.0753, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 8.820432828491542e-06, | |
| "loss": 0.0576, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 8.485722224954237e-06, | |
| "loss": 0.0742, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 8.15273943982811e-06, | |
| "loss": 0.0976, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 7.821864412511485e-06, | |
| "loss": 0.0964, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 7.493474677412795e-06, | |
| "loss": 0.0894, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 7.16794493317696e-06, | |
| "loss": 0.0877, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 6.845646615147445e-06, | |
| "loss": 0.0871, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 6.526947471551799e-06, | |
| "loss": 0.0714, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 6.21221114389424e-06, | |
| "loss": 0.0806, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 5.901796752034128e-06, | |
| "loss": 0.0879, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 5.5960584844236565e-06, | |
| "loss": 0.0731, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 5.295345193972445e-06, | |
| "loss": 0.0867, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 5.000000000000003e-06, | |
| "loss": 0.0633, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 4.710359896730379e-06, | |
| "loss": 0.0632, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 4.426755368775637e-06, | |
| "loss": 0.0824, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 4.149510014046922e-06, | |
| "loss": 0.0883, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 3.878940174523371e-06, | |
| "loss": 0.0598, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 3.6153545753001663e-06, | |
| "loss": 0.0612, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 3.3590539723276083e-06, | |
| "loss": 0.0597, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 3.110330809243134e-06, | |
| "loss": 0.0798, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 2.869468883687798e-06, | |
| "loss": 0.0694, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 2.6367430234880286e-06, | |
| "loss": 0.0791, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 2.4124187730720916e-06, | |
| "loss": 0.0692, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.196752090479083e-06, | |
| "loss": 0.0687, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.9899890553061565e-06, | |
| "loss": 0.0582, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 1.7923655879272395e-06, | |
| "loss": 0.056, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.60410718030361e-06, | |
| "loss": 0.0554, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.425428638693489e-06, | |
| "loss": 0.0484, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 1.2565338385541792e-06, | |
| "loss": 0.0637, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 1.097615491916485e-06, | |
| "loss": 0.0595, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 9.488549274967873e-07, | |
| "loss": 0.0533, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 8.10421883797694e-07, | |
| "loss": 0.0834, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 6.824743154333157e-07, | |
| "loss": 0.0598, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 5.651582129001987e-07, | |
| "loss": 0.0504, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 4.5860743599951186e-07, | |
| "loss": 0.0688, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 3.629435611005916e-07, | |
| "loss": 0.0621, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 2.7827574242009434e-07, | |
| "loss": 0.0482, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.0470058747505516e-07, | |
| "loss": 0.0515, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 1.4230204685196202e-07, | |
| "loss": 0.0695, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 9.11513184176116e-08, | |
| "loss": 0.051, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 5.1306766081048456e-08, | |
| "loss": 0.0379, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 2.2813853199292745e-08, | |
| "loss": 0.0565, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 5.705090702819993e-09, | |
| "loss": 0.0524, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0478, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 96, | |
| "total_flos": 2277230247936.0, | |
| "train_loss": 0.14340813954671225, | |
| "train_runtime": 691.2623, | |
| "train_samples_per_second": 8.515, | |
| "train_steps_per_second": 0.139 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 96, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 800, | |
| "total_flos": 2277230247936.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |