| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.687523259161477, | |
| "global_step": 810000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9824459158667857e-05, | |
| "loss": 1.0594, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.964891831733571e-05, | |
| "loss": 1.0381, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.947337747600357e-05, | |
| "loss": 1.014, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9297836634671426e-05, | |
| "loss": 0.9997, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.912229579333928e-05, | |
| "loss": 0.9936, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8946754952007134e-05, | |
| "loss": 0.9765, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.877121411067499e-05, | |
| "loss": 0.975, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.859567326934284e-05, | |
| "loss": 0.9627, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.84201324280107e-05, | |
| "loss": 0.9641, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.8244591586678564e-05, | |
| "loss": 0.9562, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.806905074534642e-05, | |
| "loss": 0.9634, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.789350990401427e-05, | |
| "loss": 0.9532, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.771796906268213e-05, | |
| "loss": 0.9539, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.754242822134998e-05, | |
| "loss": 0.9599, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.7366887380017835e-05, | |
| "loss": 0.9437, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.7191346538685696e-05, | |
| "loss": 0.9395, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.701580569735355e-05, | |
| "loss": 0.9385, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.6840264856021404e-05, | |
| "loss": 0.947, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.666472401468926e-05, | |
| "loss": 0.9402, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.648918317335711e-05, | |
| "loss": 0.9317, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.631364233202497e-05, | |
| "loss": 0.9316, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.613810149069283e-05, | |
| "loss": 0.9399, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.596256064936068e-05, | |
| "loss": 0.9395, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.5787019808028536e-05, | |
| "loss": 0.9303, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.561147896669639e-05, | |
| "loss": 0.9304, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.5435938125364244e-05, | |
| "loss": 0.9126, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.5260397284032105e-05, | |
| "loss": 0.9165, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.5084856442699966e-05, | |
| "loss": 0.9311, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.490931560136782e-05, | |
| "loss": 0.9139, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.4733774760035675e-05, | |
| "loss": 0.9097, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.455823391870353e-05, | |
| "loss": 0.9086, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.438269307737138e-05, | |
| "loss": 0.9172, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.420715223603924e-05, | |
| "loss": 0.9106, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.40316113947071e-05, | |
| "loss": 0.9001, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.385607055337495e-05, | |
| "loss": 0.9108, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.3680529712042806e-05, | |
| "loss": 0.911, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.350498887071066e-05, | |
| "loss": 0.9007, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.3329448029378515e-05, | |
| "loss": 0.8908, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.315390718804637e-05, | |
| "loss": 0.897, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.297836634671423e-05, | |
| "loss": 0.902, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.2802825505382084e-05, | |
| "loss": 0.8918, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 4.262728466404994e-05, | |
| "loss": 0.889, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.245174382271779e-05, | |
| "loss": 0.8929, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.2276202981385646e-05, | |
| "loss": 0.8974, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.210066214005351e-05, | |
| "loss": 0.8932, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.192512129872136e-05, | |
| "loss": 0.8901, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 4.174958045738922e-05, | |
| "loss": 0.8849, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.1574039616057077e-05, | |
| "loss": 0.8801, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.139849877472493e-05, | |
| "loss": 0.8807, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.1222957933392785e-05, | |
| "loss": 0.8847, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 4.104741709206064e-05, | |
| "loss": 0.8753, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.08718762507285e-05, | |
| "loss": 0.8764, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 4.0696335409396354e-05, | |
| "loss": 0.8748, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.052079456806421e-05, | |
| "loss": 0.8789, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 4.034525372673206e-05, | |
| "loss": 0.875, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.016971288539992e-05, | |
| "loss": 0.8711, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.999417204406777e-05, | |
| "loss": 0.8688, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.981863120273563e-05, | |
| "loss": 0.8594, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.9643090361403486e-05, | |
| "loss": 0.8595, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.946754952007134e-05, | |
| "loss": 0.8536, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.9292008678739194e-05, | |
| "loss": 0.8618, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.911646783740705e-05, | |
| "loss": 0.8511, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.894092699607491e-05, | |
| "loss": 0.8543, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.8765386154742763e-05, | |
| "loss": 0.856, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.8589845313410624e-05, | |
| "loss": 0.8608, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.841430447207848e-05, | |
| "loss": 0.8449, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.823876363074633e-05, | |
| "loss": 0.8527, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.806322278941419e-05, | |
| "loss": 0.8441, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.788768194808204e-05, | |
| "loss": 0.8436, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.77121411067499e-05, | |
| "loss": 0.8448, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.7536600265417756e-05, | |
| "loss": 0.8348, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.736105942408561e-05, | |
| "loss": 0.8466, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.7185518582753464e-05, | |
| "loss": 0.8354, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.700997774142132e-05, | |
| "loss": 0.8421, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 3.683443690008917e-05, | |
| "loss": 0.8354, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.6658896058757034e-05, | |
| "loss": 0.8358, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.648335521742489e-05, | |
| "loss": 0.8414, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.630781437609274e-05, | |
| "loss": 0.8428, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.6132273534760596e-05, | |
| "loss": 0.8303, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.595673269342845e-05, | |
| "loss": 0.834, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 3.578119185209631e-05, | |
| "loss": 0.8311, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.5605651010764165e-05, | |
| "loss": 0.8245, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 3.5430110169432026e-05, | |
| "loss": 0.8211, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.525456932809988e-05, | |
| "loss": 0.8154, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.5079028486767735e-05, | |
| "loss": 0.8196, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 3.490348764543559e-05, | |
| "loss": 0.8188, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 3.472794680410344e-05, | |
| "loss": 0.81, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 3.45524059627713e-05, | |
| "loss": 0.8054, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 3.437686512143916e-05, | |
| "loss": 0.8156, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.420132428010701e-05, | |
| "loss": 0.8032, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 3.4025783438774866e-05, | |
| "loss": 0.8109, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 3.385024259744272e-05, | |
| "loss": 0.812, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.3674701756110575e-05, | |
| "loss": 0.8102, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 3.3499160914778436e-05, | |
| "loss": 0.8032, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 3.332362007344629e-05, | |
| "loss": 0.8098, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 3.3148079232114144e-05, | |
| "loss": 0.8057, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 3.2972538390782e-05, | |
| "loss": 0.8026, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 3.279699754944985e-05, | |
| "loss": 0.8039, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 3.262145670811771e-05, | |
| "loss": 0.8024, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 3.244591586678557e-05, | |
| "loss": 0.8018, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 3.227037502545343e-05, | |
| "loss": 0.7953, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 3.209483418412128e-05, | |
| "loss": 0.7895, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 3.191929334278914e-05, | |
| "loss": 0.7878, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 3.174375250145699e-05, | |
| "loss": 0.794, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 3.1568211660124845e-05, | |
| "loss": 0.7998, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 3.13926708187927e-05, | |
| "loss": 0.7819, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 3.121712997746056e-05, | |
| "loss": 0.7917, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 3.1041589136128414e-05, | |
| "loss": 0.7827, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 3.086604829479627e-05, | |
| "loss": 0.7874, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 3.069050745346412e-05, | |
| "loss": 0.7788, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 3.051496661213198e-05, | |
| "loss": 0.7821, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 3.0339425770799834e-05, | |
| "loss": 0.7828, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 3.016388492946769e-05, | |
| "loss": 0.7769, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 2.9988344088135546e-05, | |
| "loss": 0.7779, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 2.98128032468034e-05, | |
| "loss": 0.7633, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 2.963726240547126e-05, | |
| "loss": 0.765, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 2.9461721564139115e-05, | |
| "loss": 0.7614, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 2.9286180722806973e-05, | |
| "loss": 0.7669, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 2.9110639881474827e-05, | |
| "loss": 0.7721, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 2.893509904014268e-05, | |
| "loss": 0.7633, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 2.875955819881054e-05, | |
| "loss": 0.7618, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 2.8584017357478393e-05, | |
| "loss": 0.7603, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 2.8408476516146247e-05, | |
| "loss": 0.7594, | |
| "step": 615000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 2.8232935674814104e-05, | |
| "loss": 0.7612, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 2.805739483348196e-05, | |
| "loss": 0.7616, | |
| "step": 625000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 2.7881853992149816e-05, | |
| "loss": 0.7628, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 2.770631315081767e-05, | |
| "loss": 0.7638, | |
| "step": 635000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 2.7530772309485525e-05, | |
| "loss": 0.7469, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 2.7355231468153382e-05, | |
| "loss": 0.7477, | |
| "step": 645000 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 2.7179690626821236e-05, | |
| "loss": 0.7501, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 2.700414978548909e-05, | |
| "loss": 0.7513, | |
| "step": 655000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 2.6828608944156948e-05, | |
| "loss": 0.751, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 2.6653068102824802e-05, | |
| "loss": 0.7491, | |
| "step": 665000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 2.6477527261492663e-05, | |
| "loss": 0.7516, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.6301986420160517e-05, | |
| "loss": 0.7511, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "eval_loss": 0.6953830122947693, | |
| "eval_runtime": 12.1778, | |
| "eval_samples_per_second": 82.116, | |
| "eval_steps_per_second": 10.265, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 2.6126445578828375e-05, | |
| "loss": 0.7395, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "eval_loss": 0.6852219104766846, | |
| "eval_runtime": 12.2181, | |
| "eval_samples_per_second": 81.846, | |
| "eval_steps_per_second": 10.231, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 2.595090473749623e-05, | |
| "loss": 0.7445, | |
| "step": 685000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "eval_loss": 0.7140026688575745, | |
| "eval_runtime": 12.2246, | |
| "eval_samples_per_second": 81.802, | |
| "eval_steps_per_second": 10.225, | |
| "step": 685000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 2.5775363896164083e-05, | |
| "loss": 0.7448, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "eval_loss": 0.678001880645752, | |
| "eval_runtime": 12.1996, | |
| "eval_samples_per_second": 81.97, | |
| "eval_steps_per_second": 10.246, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 2.559982305483194e-05, | |
| "loss": 0.7392, | |
| "step": 695000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "eval_loss": 0.6525120139122009, | |
| "eval_runtime": 12.2116, | |
| "eval_samples_per_second": 81.889, | |
| "eval_steps_per_second": 10.236, | |
| "step": 695000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.5424282213499795e-05, | |
| "loss": 0.7499, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "eval_loss": 0.6543171405792236, | |
| "eval_runtime": 12.2207, | |
| "eval_samples_per_second": 81.828, | |
| "eval_steps_per_second": 10.229, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 2.524874137216765e-05, | |
| "loss": 0.7393, | |
| "step": 705000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "eval_loss": 0.665242075920105, | |
| "eval_runtime": 5.3758, | |
| "eval_samples_per_second": 186.02, | |
| "eval_steps_per_second": 23.253, | |
| "step": 705000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.5073200530835506e-05, | |
| "loss": 0.7324, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_loss": 0.6618428826332092, | |
| "eval_runtime": 5.3906, | |
| "eval_samples_per_second": 185.507, | |
| "eval_steps_per_second": 23.188, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 2.489765968950336e-05, | |
| "loss": 0.7261, | |
| "step": 715000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "eval_loss": 0.6423526406288147, | |
| "eval_runtime": 5.3892, | |
| "eval_samples_per_second": 185.555, | |
| "eval_steps_per_second": 23.194, | |
| "step": 715000 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.4722118848171215e-05, | |
| "loss": 0.7327, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "eval_loss": 0.6585870385169983, | |
| "eval_runtime": 5.3853, | |
| "eval_samples_per_second": 185.69, | |
| "eval_steps_per_second": 23.211, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.4546578006839072e-05, | |
| "loss": 0.7265, | |
| "step": 725000 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "eval_loss": 0.6187921762466431, | |
| "eval_runtime": 5.3825, | |
| "eval_samples_per_second": 185.787, | |
| "eval_steps_per_second": 23.223, | |
| "step": 725000 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 2.4371037165506926e-05, | |
| "loss": 0.7247, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "eval_loss": 0.6582339406013489, | |
| "eval_runtime": 5.3823, | |
| "eval_samples_per_second": 185.796, | |
| "eval_steps_per_second": 23.224, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 2.419549632417478e-05, | |
| "loss": 0.7265, | |
| "step": 735000 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "eval_loss": 0.7226254940032959, | |
| "eval_runtime": 5.3797, | |
| "eval_samples_per_second": 185.883, | |
| "eval_steps_per_second": 23.235, | |
| "step": 735000 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 2.401995548284264e-05, | |
| "loss": 0.7166, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "eval_loss": 0.6698991656303406, | |
| "eval_runtime": 5.3768, | |
| "eval_samples_per_second": 185.986, | |
| "eval_steps_per_second": 23.248, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 2.3844414641510496e-05, | |
| "loss": 0.7214, | |
| "step": 745000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "eval_loss": 0.6653444170951843, | |
| "eval_runtime": 5.3777, | |
| "eval_samples_per_second": 185.954, | |
| "eval_steps_per_second": 23.244, | |
| "step": 745000 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.366887380017835e-05, | |
| "loss": 0.7268, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "eval_loss": 0.6490678787231445, | |
| "eval_runtime": 5.3962, | |
| "eval_samples_per_second": 185.314, | |
| "eval_steps_per_second": 23.164, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 2.3493332958846207e-05, | |
| "loss": 0.7177, | |
| "step": 755000 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "eval_loss": 0.6720253825187683, | |
| "eval_runtime": 5.3813, | |
| "eval_samples_per_second": 185.828, | |
| "eval_steps_per_second": 23.229, | |
| "step": 755000 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 2.331779211751406e-05, | |
| "loss": 0.7173, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "eval_loss": 0.636309027671814, | |
| "eval_runtime": 5.3741, | |
| "eval_samples_per_second": 186.079, | |
| "eval_steps_per_second": 23.26, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 2.3142251276181916e-05, | |
| "loss": 0.7222, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "eval_loss": 0.6736326813697815, | |
| "eval_runtime": 5.3844, | |
| "eval_samples_per_second": 185.723, | |
| "eval_steps_per_second": 23.215, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 2.2966710434849773e-05, | |
| "loss": 0.7189, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "eval_loss": 0.6502253413200378, | |
| "eval_runtime": 5.3808, | |
| "eval_samples_per_second": 185.846, | |
| "eval_steps_per_second": 23.231, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.2791169593517627e-05, | |
| "loss": 0.7142, | |
| "step": 775000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "eval_loss": 0.6675522327423096, | |
| "eval_runtime": 5.3769, | |
| "eval_samples_per_second": 185.982, | |
| "eval_steps_per_second": 23.248, | |
| "step": 775000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.2615628752185485e-05, | |
| "loss": 0.7123, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "eval_loss": 0.7307547330856323, | |
| "eval_runtime": 5.3752, | |
| "eval_samples_per_second": 186.041, | |
| "eval_steps_per_second": 23.255, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 2.2440087910853343e-05, | |
| "loss": 0.7149, | |
| "step": 785000 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "eval_loss": 0.6528046727180481, | |
| "eval_runtime": 5.3874, | |
| "eval_samples_per_second": 185.618, | |
| "eval_steps_per_second": 23.202, | |
| "step": 785000 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 2.2264547069521197e-05, | |
| "loss": 0.7111, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "eval_loss": 0.6415424942970276, | |
| "eval_runtime": 5.3848, | |
| "eval_samples_per_second": 185.708, | |
| "eval_steps_per_second": 23.213, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 2.208900622818905e-05, | |
| "loss": 0.7126, | |
| "step": 795000 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "eval_loss": 0.664243221282959, | |
| "eval_runtime": 5.3818, | |
| "eval_samples_per_second": 185.812, | |
| "eval_steps_per_second": 23.226, | |
| "step": 795000 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.191346538685691e-05, | |
| "loss": 0.7075, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "eval_loss": 0.6190058588981628, | |
| "eval_runtime": 5.3768, | |
| "eval_samples_per_second": 185.985, | |
| "eval_steps_per_second": 23.248, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 2.1737924545524763e-05, | |
| "loss": 0.7047, | |
| "step": 805000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "eval_loss": 0.645745038986206, | |
| "eval_runtime": 5.3814, | |
| "eval_samples_per_second": 185.824, | |
| "eval_steps_per_second": 23.228, | |
| "step": 805000 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 2.1562383704192617e-05, | |
| "loss": 0.7002, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "eval_loss": 0.6156101226806641, | |
| "eval_runtime": 5.3698, | |
| "eval_samples_per_second": 186.226, | |
| "eval_steps_per_second": 23.278, | |
| "step": 810000 | |
| } | |
| ], | |
| "max_steps": 1424170, | |
| "num_train_epochs": 10, | |
| "total_flos": 3.3150958919481446e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |