| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.817204301075268, | |
| "eval_steps": 500, | |
| "global_step": 1600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.030721966205837174, | |
| "grad_norm": 5.377892017364502, | |
| "learning_rate": 1.6460905349794242e-07, | |
| "loss": 0.6829, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06144393241167435, | |
| "grad_norm": 4.802157878875732, | |
| "learning_rate": 3.7037037037037036e-07, | |
| "loss": 0.6782, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09216589861751152, | |
| "grad_norm": 4.367337226867676, | |
| "learning_rate": 5.761316872427984e-07, | |
| "loss": 0.6663, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1228878648233487, | |
| "grad_norm": 3.747973918914795, | |
| "learning_rate": 7.818930041152265e-07, | |
| "loss": 0.655, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15360983102918588, | |
| "grad_norm": 2.209118127822876, | |
| "learning_rate": 9.876543209876544e-07, | |
| "loss": 0.6173, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.18433179723502305, | |
| "grad_norm": 1.1678818464279175, | |
| "learning_rate": 1.1934156378600823e-06, | |
| "loss": 0.6048, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.21505376344086022, | |
| "grad_norm": 0.8436072468757629, | |
| "learning_rate": 1.3991769547325104e-06, | |
| "loss": 0.5856, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.2457757296466974, | |
| "grad_norm": 0.9035472273826599, | |
| "learning_rate": 1.6049382716049383e-06, | |
| "loss": 0.5731, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2764976958525346, | |
| "grad_norm": 0.6879045963287354, | |
| "learning_rate": 1.8106995884773665e-06, | |
| "loss": 0.5672, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.30721966205837176, | |
| "grad_norm": 0.5572217106819153, | |
| "learning_rate": 2.0164609053497946e-06, | |
| "loss": 0.5652, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3379416282642089, | |
| "grad_norm": 0.475868821144104, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.5569, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3686635944700461, | |
| "grad_norm": 0.4037950932979584, | |
| "learning_rate": 2.4279835390946504e-06, | |
| "loss": 0.552, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.39938556067588327, | |
| "grad_norm": 0.3646543323993683, | |
| "learning_rate": 2.6337448559670788e-06, | |
| "loss": 0.5486, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.43010752688172044, | |
| "grad_norm": 0.3199547231197357, | |
| "learning_rate": 2.8395061728395062e-06, | |
| "loss": 0.548, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4608294930875576, | |
| "grad_norm": 0.29529860615730286, | |
| "learning_rate": 3.0452674897119346e-06, | |
| "loss": 0.5412, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4915514592933948, | |
| "grad_norm": 0.2877354025840759, | |
| "learning_rate": 3.2510288065843625e-06, | |
| "loss": 0.5384, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.522273425499232, | |
| "grad_norm": 0.27960506081581116, | |
| "learning_rate": 3.4567901234567904e-06, | |
| "loss": 0.5408, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5529953917050692, | |
| "grad_norm": 0.3203478455543518, | |
| "learning_rate": 3.6625514403292183e-06, | |
| "loss": 0.5385, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5837173579109063, | |
| "grad_norm": 0.293573260307312, | |
| "learning_rate": 3.868312757201647e-06, | |
| "loss": 0.5367, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6144393241167435, | |
| "grad_norm": 0.2991442084312439, | |
| "learning_rate": 4.074074074074074e-06, | |
| "loss": 0.5318, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 0.28697723150253296, | |
| "learning_rate": 4.2798353909465025e-06, | |
| "loss": 0.5325, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6758832565284179, | |
| "grad_norm": 0.29979708790779114, | |
| "learning_rate": 4.485596707818931e-06, | |
| "loss": 0.5329, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.706605222734255, | |
| "grad_norm": 0.28516969084739685, | |
| "learning_rate": 4.691358024691358e-06, | |
| "loss": 0.5303, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.7373271889400922, | |
| "grad_norm": 0.2979312837123871, | |
| "learning_rate": 4.897119341563787e-06, | |
| "loss": 0.532, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7680491551459293, | |
| "grad_norm": 0.29658472537994385, | |
| "learning_rate": 5.102880658436214e-06, | |
| "loss": 0.5361, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7987711213517665, | |
| "grad_norm": 0.3170669972896576, | |
| "learning_rate": 5.3086419753086425e-06, | |
| "loss": 0.5307, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8294930875576036, | |
| "grad_norm": 0.3079938590526581, | |
| "learning_rate": 5.514403292181071e-06, | |
| "loss": 0.5277, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8602150537634409, | |
| "grad_norm": 0.33612361550331116, | |
| "learning_rate": 5.720164609053498e-06, | |
| "loss": 0.5298, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.890937019969278, | |
| "grad_norm": 0.3119032382965088, | |
| "learning_rate": 5.925925925925926e-06, | |
| "loss": 0.5248, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9216589861751152, | |
| "grad_norm": 0.33814293146133423, | |
| "learning_rate": 6.131687242798354e-06, | |
| "loss": 0.5284, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 0.322230726480484, | |
| "learning_rate": 6.3374485596707825e-06, | |
| "loss": 0.5281, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.9831029185867896, | |
| "grad_norm": 0.3144535720348358, | |
| "learning_rate": 6.543209876543211e-06, | |
| "loss": 0.5242, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.012288786482335, | |
| "grad_norm": 0.29515042901039124, | |
| "learning_rate": 6.748971193415639e-06, | |
| "loss": 0.5163, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.043010752688172, | |
| "grad_norm": 0.3272690176963806, | |
| "learning_rate": 6.954732510288067e-06, | |
| "loss": 0.5168, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0737327188940091, | |
| "grad_norm": 0.38112202286720276, | |
| "learning_rate": 7.160493827160494e-06, | |
| "loss": 0.5175, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.1044546850998465, | |
| "grad_norm": 0.3627144992351532, | |
| "learning_rate": 7.3662551440329225e-06, | |
| "loss": 0.5146, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1351766513056836, | |
| "grad_norm": 0.34759828448295593, | |
| "learning_rate": 7.57201646090535e-06, | |
| "loss": 0.5091, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.1658986175115207, | |
| "grad_norm": 0.32023346424102783, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.5154, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.1966205837173578, | |
| "grad_norm": 0.34595441818237305, | |
| "learning_rate": 7.983539094650207e-06, | |
| "loss": 0.514, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.2273425499231951, | |
| "grad_norm": 0.35786375403404236, | |
| "learning_rate": 8.189300411522634e-06, | |
| "loss": 0.515, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2580645161290323, | |
| "grad_norm": 0.3521522283554077, | |
| "learning_rate": 8.395061728395062e-06, | |
| "loss": 0.5096, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.2887864823348694, | |
| "grad_norm": 0.3382728397846222, | |
| "learning_rate": 8.60082304526749e-06, | |
| "loss": 0.5092, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.3195084485407067, | |
| "grad_norm": 0.3584599494934082, | |
| "learning_rate": 8.806584362139918e-06, | |
| "loss": 0.5088, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.3502304147465438, | |
| "grad_norm": 0.3886154294013977, | |
| "learning_rate": 9.012345679012346e-06, | |
| "loss": 0.5103, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.380952380952381, | |
| "grad_norm": 0.35392460227012634, | |
| "learning_rate": 9.218106995884775e-06, | |
| "loss": 0.5122, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.411674347158218, | |
| "grad_norm": 0.3449483811855316, | |
| "learning_rate": 9.423868312757202e-06, | |
| "loss": 0.5101, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.4423963133640554, | |
| "grad_norm": 0.40504640340805054, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.5079, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.4731182795698925, | |
| "grad_norm": 0.3839814066886902, | |
| "learning_rate": 9.835390946502057e-06, | |
| "loss": 0.5075, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.5038402457757296, | |
| "grad_norm": 0.3998360335826874, | |
| "learning_rate": 9.999994841278135e-06, | |
| "loss": 0.5117, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.5345622119815667, | |
| "grad_norm": 0.3241407573223114, | |
| "learning_rate": 9.99981428713058e-06, | |
| "loss": 0.5116, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.565284178187404, | |
| "grad_norm": 0.3408064544200897, | |
| "learning_rate": 9.999375807534642e-06, | |
| "loss": 0.5086, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.5960061443932412, | |
| "grad_norm": 0.3956799805164337, | |
| "learning_rate": 9.998679425110168e-06, | |
| "loss": 0.5057, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6267281105990783, | |
| "grad_norm": 0.34674304723739624, | |
| "learning_rate": 9.997725175781445e-06, | |
| "loss": 0.5042, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.6574500768049156, | |
| "grad_norm": 0.33803871273994446, | |
| "learning_rate": 9.996513108775338e-06, | |
| "loss": 0.5094, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.6881720430107527, | |
| "grad_norm": 0.3286557197570801, | |
| "learning_rate": 9.995043286618752e-06, | |
| "loss": 0.5082, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.7188940092165899, | |
| "grad_norm": 0.4859721064567566, | |
| "learning_rate": 9.993315785135417e-06, | |
| "loss": 0.5062, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.7496159754224272, | |
| "grad_norm": 0.39187705516815186, | |
| "learning_rate": 9.991330693441956e-06, | |
| "loss": 0.5004, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.780337941628264, | |
| "grad_norm": 0.3706142008304596, | |
| "learning_rate": 9.989088113943309e-06, | |
| "loss": 0.5074, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.8110599078341014, | |
| "grad_norm": 0.36376601457595825, | |
| "learning_rate": 9.986588162327436e-06, | |
| "loss": 0.5043, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.8417818740399385, | |
| "grad_norm": 0.3372829854488373, | |
| "learning_rate": 9.983830967559355e-06, | |
| "loss": 0.505, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.8725038402457757, | |
| "grad_norm": 0.3605220913887024, | |
| "learning_rate": 9.98081667187449e-06, | |
| "loss": 0.506, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.903225806451613, | |
| "grad_norm": 0.37473252415657043, | |
| "learning_rate": 9.977545430771332e-06, | |
| "loss": 0.5065, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.93394777265745, | |
| "grad_norm": 0.3622889816761017, | |
| "learning_rate": 9.974017413003407e-06, | |
| "loss": 0.5049, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.9646697388632872, | |
| "grad_norm": 0.36003556847572327, | |
| "learning_rate": 9.970232800570594e-06, | |
| "loss": 0.5042, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.9953917050691246, | |
| "grad_norm": 0.35878923535346985, | |
| "learning_rate": 9.966191788709716e-06, | |
| "loss": 0.498, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.02457757296467, | |
| "grad_norm": 0.3277081847190857, | |
| "learning_rate": 9.961894585884472e-06, | |
| "loss": 0.4833, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.055299539170507, | |
| "grad_norm": 0.35245636105537415, | |
| "learning_rate": 9.957341413774693e-06, | |
| "loss": 0.4823, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.086021505376344, | |
| "grad_norm": 0.3628138601779938, | |
| "learning_rate": 9.952532507264892e-06, | |
| "loss": 0.4789, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.1167434715821813, | |
| "grad_norm": 0.36662936210632324, | |
| "learning_rate": 9.947468114432156e-06, | |
| "loss": 0.4876, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.1474654377880182, | |
| "grad_norm": 0.3806234896183014, | |
| "learning_rate": 9.942148496533348e-06, | |
| "loss": 0.4797, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.1781874039938556, | |
| "grad_norm": 0.3836243152618408, | |
| "learning_rate": 9.936573927991631e-06, | |
| "loss": 0.4823, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.208909370199693, | |
| "grad_norm": 0.3716926872730255, | |
| "learning_rate": 9.930744696382298e-06, | |
| "loss": 0.4846, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.23963133640553, | |
| "grad_norm": 0.3589572608470917, | |
| "learning_rate": 9.924661102417959e-06, | |
| "loss": 0.4794, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.270353302611367, | |
| "grad_norm": 0.44799497723579407, | |
| "learning_rate": 9.918323459933006e-06, | |
| "loss": 0.4849, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.3010752688172045, | |
| "grad_norm": 0.35237064957618713, | |
| "learning_rate": 9.911732095867443e-06, | |
| "loss": 0.4819, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.3317972350230414, | |
| "grad_norm": 0.3844442665576935, | |
| "learning_rate": 9.904887350250002e-06, | |
| "loss": 0.4828, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.3625192012288787, | |
| "grad_norm": 0.34357205033302307, | |
| "learning_rate": 9.897789576180617e-06, | |
| "loss": 0.4795, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.3932411674347156, | |
| "grad_norm": 0.34739232063293457, | |
| "learning_rate": 9.8904391398122e-06, | |
| "loss": 0.4817, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.423963133640553, | |
| "grad_norm": 0.3262459337711334, | |
| "learning_rate": 9.882836420331753e-06, | |
| "loss": 0.4807, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.4546850998463903, | |
| "grad_norm": 0.32715994119644165, | |
| "learning_rate": 9.87498180994081e-06, | |
| "loss": 0.4825, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.485407066052227, | |
| "grad_norm": 0.3524874150753021, | |
| "learning_rate": 9.8668757138352e-06, | |
| "loss": 0.4832, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.5161290322580645, | |
| "grad_norm": 0.3556855618953705, | |
| "learning_rate": 9.858518550184154e-06, | |
| "loss": 0.4784, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.546850998463902, | |
| "grad_norm": 0.350763201713562, | |
| "learning_rate": 9.849910750108718e-06, | |
| "loss": 0.4796, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.5775729646697387, | |
| "grad_norm": 0.40554359555244446, | |
| "learning_rate": 9.841052757659525e-06, | |
| "loss": 0.4795, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.608294930875576, | |
| "grad_norm": 0.38155123591423035, | |
| "learning_rate": 9.831945029793884e-06, | |
| "loss": 0.4824, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.6390168970814134, | |
| "grad_norm": 0.34588319063186646, | |
| "learning_rate": 9.822588036352201e-06, | |
| "loss": 0.4812, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.6697388632872503, | |
| "grad_norm": 0.3738536536693573, | |
| "learning_rate": 9.812982260033753e-06, | |
| "loss": 0.4776, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.7004608294930876, | |
| "grad_norm": 0.34988853335380554, | |
| "learning_rate": 9.803128196371778e-06, | |
| "loss": 0.4827, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.731182795698925, | |
| "grad_norm": 0.3567947447299957, | |
| "learning_rate": 9.793026353707915e-06, | |
| "loss": 0.4824, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 0.3680736720561981, | |
| "learning_rate": 9.782677253165979e-06, | |
| "loss": 0.4817, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.792626728110599, | |
| "grad_norm": 0.3302510380744934, | |
| "learning_rate": 9.77208142862508e-06, | |
| "loss": 0.4799, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.823348694316436, | |
| "grad_norm": 0.3362921178340912, | |
| "learning_rate": 9.761239426692077e-06, | |
| "loss": 0.4792, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.8540706605222734, | |
| "grad_norm": 0.3084135949611664, | |
| "learning_rate": 9.750151806673389e-06, | |
| "loss": 0.4798, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.8847926267281108, | |
| "grad_norm": 0.3991258442401886, | |
| "learning_rate": 9.738819140546135e-06, | |
| "loss": 0.4776, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.9155145929339477, | |
| "grad_norm": 0.3840397298336029, | |
| "learning_rate": 9.727242012928622e-06, | |
| "loss": 0.4827, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.946236559139785, | |
| "grad_norm": 0.3366018235683441, | |
| "learning_rate": 9.715421021050205e-06, | |
| "loss": 0.478, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.976958525345622, | |
| "grad_norm": 0.3289054036140442, | |
| "learning_rate": 9.703356774720454e-06, | |
| "loss": 0.4806, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.0061443932411676, | |
| "grad_norm": 0.41949138045310974, | |
| "learning_rate": 9.69104989629772e-06, | |
| "loss": 0.474, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.0368663594470044, | |
| "grad_norm": 0.3739219605922699, | |
| "learning_rate": 9.678501020657008e-06, | |
| "loss": 0.4555, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.067588325652842, | |
| "grad_norm": 0.3918289244174957, | |
| "learning_rate": 9.665710795157236e-06, | |
| "loss": 0.4559, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.098310291858679, | |
| "grad_norm": 0.37078753113746643, | |
| "learning_rate": 9.652679879607843e-06, | |
| "loss": 0.4523, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.129032258064516, | |
| "grad_norm": 0.39428192377090454, | |
| "learning_rate": 9.639408946234745e-06, | |
| "loss": 0.455, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.1597542242703534, | |
| "grad_norm": 0.36103686690330505, | |
| "learning_rate": 9.625898679645656e-06, | |
| "loss": 0.4539, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.1904761904761907, | |
| "grad_norm": 0.3871241807937622, | |
| "learning_rate": 9.612149776794776e-06, | |
| "loss": 0.4585, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.2211981566820276, | |
| "grad_norm": 0.3608538508415222, | |
| "learning_rate": 9.59816294694684e-06, | |
| "loss": 0.4545, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.251920122887865, | |
| "grad_norm": 0.33820873498916626, | |
| "learning_rate": 9.583938911640513e-06, | |
| "loss": 0.4581, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.282642089093702, | |
| "grad_norm": 0.3311152160167694, | |
| "learning_rate": 9.569478404651192e-06, | |
| "loss": 0.4572, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.313364055299539, | |
| "grad_norm": 0.3974754512310028, | |
| "learning_rate": 9.55478217195313e-06, | |
| "loss": 0.4579, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.3440860215053765, | |
| "grad_norm": 0.36764049530029297, | |
| "learning_rate": 9.53985097168097e-06, | |
| "loss": 0.4548, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.3748079877112134, | |
| "grad_norm": 0.3310830295085907, | |
| "learning_rate": 9.524685574090627e-06, | |
| "loss": 0.4596, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.4055299539170507, | |
| "grad_norm": 0.35807356238365173, | |
| "learning_rate": 9.50928676151955e-06, | |
| "loss": 0.4561, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.436251920122888, | |
| "grad_norm": 0.3509482741355896, | |
| "learning_rate": 9.493655328346378e-06, | |
| "loss": 0.4601, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.466973886328725, | |
| "grad_norm": 0.32899123430252075, | |
| "learning_rate": 9.477792080949938e-06, | |
| "loss": 0.458, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.4976958525345623, | |
| "grad_norm": 0.3493783473968506, | |
| "learning_rate": 9.461697837667668e-06, | |
| "loss": 0.4578, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.528417818740399, | |
| "grad_norm": 0.42410966753959656, | |
| "learning_rate": 9.445373428753386e-06, | |
| "loss": 0.457, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.5591397849462365, | |
| "grad_norm": 0.39236894249916077, | |
| "learning_rate": 9.42881969633447e-06, | |
| "loss": 0.4621, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.589861751152074, | |
| "grad_norm": 0.3428690433502197, | |
| "learning_rate": 9.412037494368412e-06, | |
| "loss": 0.4613, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.6205837173579107, | |
| "grad_norm": 0.3500923216342926, | |
| "learning_rate": 9.395027688598756e-06, | |
| "loss": 0.4553, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.651305683563748, | |
| "grad_norm": 0.3521360456943512, | |
| "learning_rate": 9.377791156510456e-06, | |
| "loss": 0.4609, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.6820276497695854, | |
| "grad_norm": 0.3520371615886688, | |
| "learning_rate": 9.360328787284587e-06, | |
| "loss": 0.4561, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.7127496159754223, | |
| "grad_norm": 0.37317851185798645, | |
| "learning_rate": 9.342641481752492e-06, | |
| "loss": 0.4543, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.7434715821812596, | |
| "grad_norm": 0.3881990313529968, | |
| "learning_rate": 9.324730152349305e-06, | |
| "loss": 0.4573, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.774193548387097, | |
| "grad_norm": 0.34541720151901245, | |
| "learning_rate": 9.306595723066878e-06, | |
| "loss": 0.4588, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.804915514592934, | |
| "grad_norm": 0.6174806356430054, | |
| "learning_rate": 9.28823912940612e-06, | |
| "loss": 0.4615, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.835637480798771, | |
| "grad_norm": 0.37580618262290955, | |
| "learning_rate": 9.26966131832873e-06, | |
| "loss": 0.4603, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.8663594470046085, | |
| "grad_norm": 0.3373568058013916, | |
| "learning_rate": 9.250863248208357e-06, | |
| "loss": 0.4575, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.8970814132104454, | |
| "grad_norm": 0.3492389917373657, | |
| "learning_rate": 9.231845888781153e-06, | |
| "loss": 0.457, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.9278033794162828, | |
| "grad_norm": 0.353481262922287, | |
| "learning_rate": 9.212610221095748e-06, | |
| "loss": 0.4593, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.9585253456221197, | |
| "grad_norm": 0.339603066444397, | |
| "learning_rate": 9.193157237462642e-06, | |
| "loss": 0.4583, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.989247311827957, | |
| "grad_norm": 0.35986068844795227, | |
| "learning_rate": 9.173487941403011e-06, | |
| "loss": 0.4575, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.018433179723503, | |
| "grad_norm": 0.39629873633384705, | |
| "learning_rate": 9.153603347596946e-06, | |
| "loss": 0.4437, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.04915514592934, | |
| "grad_norm": 0.38085299730300903, | |
| "learning_rate": 9.133504481831103e-06, | |
| "loss": 0.4315, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.0798771121351765, | |
| "grad_norm": 0.375144898891449, | |
| "learning_rate": 9.113192380945783e-06, | |
| "loss": 0.4332, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.110599078341014, | |
| "grad_norm": 0.3690689206123352, | |
| "learning_rate": 9.092668092781454e-06, | |
| "loss": 0.4286, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.141321044546851, | |
| "grad_norm": 0.3713686764240265, | |
| "learning_rate": 9.071932676124686e-06, | |
| "loss": 0.4321, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.172043010752688, | |
| "grad_norm": 0.37255361676216125, | |
| "learning_rate": 9.050987200653538e-06, | |
| "loss": 0.4308, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.202764976958525, | |
| "grad_norm": 0.4153440296649933, | |
| "learning_rate": 9.029832746882372e-06, | |
| "loss": 0.434, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.233486943164363, | |
| "grad_norm": 0.3848015367984772, | |
| "learning_rate": 9.008470406106118e-06, | |
| "loss": 0.4321, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.2642089093702, | |
| "grad_norm": 0.38491949439048767, | |
| "learning_rate": 8.986901280343973e-06, | |
| "loss": 0.437, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.2949308755760365, | |
| "grad_norm": 0.40272125601768494, | |
| "learning_rate": 8.96512648228255e-06, | |
| "loss": 0.4327, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.325652841781874, | |
| "grad_norm": 0.40901532769203186, | |
| "learning_rate": 8.943147135218482e-06, | |
| "loss": 0.4355, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.356374807987711, | |
| "grad_norm": 0.37816864252090454, | |
| "learning_rate": 8.920964373000474e-06, | |
| "loss": 0.4309, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.387096774193548, | |
| "grad_norm": 0.3686360716819763, | |
| "learning_rate": 8.898579339970806e-06, | |
| "loss": 0.4333, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.417818740399386, | |
| "grad_norm": 0.3904341161251068, | |
| "learning_rate": 8.875993190906309e-06, | |
| "loss": 0.436, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.448540706605223, | |
| "grad_norm": 0.369642049074173, | |
| "learning_rate": 8.85320709095878e-06, | |
| "loss": 0.4393, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.47926267281106, | |
| "grad_norm": 0.39105841517448425, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.4335, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.509984639016897, | |
| "grad_norm": 0.35647451877593994, | |
| "learning_rate": 8.80703975053554e-06, | |
| "loss": 0.4365, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.540706605222734, | |
| "grad_norm": 0.37886905670166016, | |
| "learning_rate": 8.783660891694683e-06, | |
| "loss": 0.4358, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 0.33613675832748413, | |
| "learning_rate": 8.760086845117648e-06, | |
| "loss": 0.4339, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.602150537634409, | |
| "grad_norm": 0.3609409034252167, | |
| "learning_rate": 8.736318826918909e-06, | |
| "loss": 0.4367, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.632872503840246, | |
| "grad_norm": 0.3324005603790283, | |
| "learning_rate": 8.71235806321936e-06, | |
| "loss": 0.4368, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.663594470046083, | |
| "grad_norm": 0.34170496463775635, | |
| "learning_rate": 8.688205790083053e-06, | |
| "loss": 0.4364, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.6943164362519205, | |
| "grad_norm": 0.3765306770801544, | |
| "learning_rate": 8.663863253453444e-06, | |
| "loss": 0.4381, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.725038402457757, | |
| "grad_norm": 0.3638916611671448, | |
| "learning_rate": 8.639331709089107e-06, | |
| "loss": 0.438, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.755760368663594, | |
| "grad_norm": 0.3378274738788605, | |
| "learning_rate": 8.614612422498965e-06, | |
| "loss": 0.4396, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.786482334869431, | |
| "grad_norm": 0.3760294020175934, | |
| "learning_rate": 8.589706668876995e-06, | |
| "loss": 0.4387, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.817204301075269, | |
| "grad_norm": 0.3364088535308838, | |
| "learning_rate": 8.564615733036457e-06, | |
| "loss": 0.4388, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.847926267281106, | |
| "grad_norm": 0.3584051728248596, | |
| "learning_rate": 8.539340909343597e-06, | |
| "loss": 0.4355, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.878648233486944, | |
| "grad_norm": 0.3589382469654083, | |
| "learning_rate": 8.513883501650892e-06, | |
| "loss": 0.4393, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 4.9093701996927805, | |
| "grad_norm": 0.362913578748703, | |
| "learning_rate": 8.488244823229781e-06, | |
| "loss": 0.4391, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.940092165898617, | |
| "grad_norm": 0.38569971919059753, | |
| "learning_rate": 8.462426196702912e-06, | |
| "loss": 0.44, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 4.970814132104454, | |
| "grad_norm": 0.7799672484397888, | |
| "learning_rate": 8.436428953975921e-06, | |
| "loss": 0.4402, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.35950392484664917, | |
| "learning_rate": 8.41025443616872e-06, | |
| "loss": 0.4385, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 5.030721966205837, | |
| "grad_norm": 0.434950053691864, | |
| "learning_rate": 8.38390399354631e-06, | |
| "loss": 0.4124, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.061443932411675, | |
| "grad_norm": 0.38890355825424194, | |
| "learning_rate": 8.357378985449124e-06, | |
| "loss": 0.4077, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 5.092165898617512, | |
| "grad_norm": 0.3747502267360687, | |
| "learning_rate": 8.330680780222907e-06, | |
| "loss": 0.4116, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.1228878648233485, | |
| "grad_norm": 0.4041999578475952, | |
| "learning_rate": 8.303810755148127e-06, | |
| "loss": 0.4125, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 5.153609831029186, | |
| "grad_norm": 0.8506478667259216, | |
| "learning_rate": 8.276770296368922e-06, | |
| "loss": 0.4086, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.184331797235023, | |
| "grad_norm": 0.43535116314888, | |
| "learning_rate": 8.249560798821592e-06, | |
| "loss": 0.4118, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 5.21505376344086, | |
| "grad_norm": 0.4166457951068878, | |
| "learning_rate": 8.222183666162647e-06, | |
| "loss": 0.41, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.245775729646698, | |
| "grad_norm": 0.3790026009082794, | |
| "learning_rate": 8.194640310696383e-06, | |
| "loss": 0.4131, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 5.276497695852535, | |
| "grad_norm": 0.4068205654621124, | |
| "learning_rate": 8.16693215330204e-06, | |
| "loss": 0.4149, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.307219662058372, | |
| "grad_norm": 0.40233853459358215, | |
| "learning_rate": 8.139060623360494e-06, | |
| "loss": 0.414, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 5.337941628264209, | |
| "grad_norm": 0.4058436155319214, | |
| "learning_rate": 8.111027158680516e-06, | |
| "loss": 0.4128, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.368663594470046, | |
| "grad_norm": 0.35581091046333313, | |
| "learning_rate": 8.082833205424614e-06, | |
| "loss": 0.412, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 5.399385560675883, | |
| "grad_norm": 0.39174729585647583, | |
| "learning_rate": 8.054480218034415e-06, | |
| "loss": 0.4127, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.43010752688172, | |
| "grad_norm": 0.4122447371482849, | |
| "learning_rate": 8.02596965915564e-06, | |
| "loss": 0.4143, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 5.460829493087558, | |
| "grad_norm": 0.37394076585769653, | |
| "learning_rate": 7.997302999562657e-06, | |
| "loss": 0.4165, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.491551459293395, | |
| "grad_norm": 0.38974493741989136, | |
| "learning_rate": 7.968481718082601e-06, | |
| "loss": 0.4158, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 5.522273425499232, | |
| "grad_norm": 0.3667392134666443, | |
| "learning_rate": 7.93950730151908e-06, | |
| "loss": 0.4186, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.552995391705069, | |
| "grad_norm": 0.3641802668571472, | |
| "learning_rate": 7.910381244575491e-06, | |
| "loss": 0.4146, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 5.583717357910906, | |
| "grad_norm": 0.37418097257614136, | |
| "learning_rate": 7.881105049777902e-06, | |
| "loss": 0.4146, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.614439324116743, | |
| "grad_norm": 0.3662942051887512, | |
| "learning_rate": 7.851680227397541e-06, | |
| "loss": 0.4181, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 5.645161290322581, | |
| "grad_norm": 0.3564474284648895, | |
| "learning_rate": 7.82210829537289e-06, | |
| "loss": 0.4122, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.675883256528418, | |
| "grad_norm": 0.3735935091972351, | |
| "learning_rate": 7.792390779231374e-06, | |
| "loss": 0.4152, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 5.706605222734255, | |
| "grad_norm": 0.3896511197090149, | |
| "learning_rate": 7.762529212010675e-06, | |
| "loss": 0.4125, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.7373271889400925, | |
| "grad_norm": 0.42632153630256653, | |
| "learning_rate": 7.732525134179625e-06, | |
| "loss": 0.4138, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 5.768049155145929, | |
| "grad_norm": 0.3700067698955536, | |
| "learning_rate": 7.702380093558766e-06, | |
| "loss": 0.4128, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.798771121351766, | |
| "grad_norm": 0.3713553547859192, | |
| "learning_rate": 7.672095645240479e-06, | |
| "loss": 0.4153, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 5.829493087557603, | |
| "grad_norm": 0.49530503153800964, | |
| "learning_rate": 7.641673351508774e-06, | |
| "loss": 0.4159, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.860215053763441, | |
| "grad_norm": 0.3351239562034607, | |
| "learning_rate": 7.6111147817586925e-06, | |
| "loss": 0.4181, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 5.890937019969278, | |
| "grad_norm": 0.3583086133003235, | |
| "learning_rate": 7.580421512415349e-06, | |
| "loss": 0.4148, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.921658986175116, | |
| "grad_norm": 0.3566780388355255, | |
| "learning_rate": 7.549595126852605e-06, | |
| "loss": 0.4133, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 5.9523809523809526, | |
| "grad_norm": 0.3630661964416504, | |
| "learning_rate": 7.518637215311388e-06, | |
| "loss": 0.4151, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 5.983102918586789, | |
| "grad_norm": 0.35128363966941833, | |
| "learning_rate": 7.487549374817662e-06, | |
| "loss": 0.4159, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 6.012288786482335, | |
| "grad_norm": 0.442436158657074, | |
| "learning_rate": 7.456333209100032e-06, | |
| "loss": 0.4034, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.043010752688172, | |
| "grad_norm": 0.41370487213134766, | |
| "learning_rate": 7.424990328507017e-06, | |
| "loss": 0.3851, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 6.073732718894009, | |
| "grad_norm": 0.4333699941635132, | |
| "learning_rate": 7.393522349923981e-06, | |
| "loss": 0.3869, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.104454685099847, | |
| "grad_norm": 0.4446549415588379, | |
| "learning_rate": 7.361930896689713e-06, | |
| "loss": 0.3836, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 6.135176651305684, | |
| "grad_norm": 0.40873849391937256, | |
| "learning_rate": 7.330217598512696e-06, | |
| "loss": 0.3857, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.1658986175115205, | |
| "grad_norm": 0.4244365990161896, | |
| "learning_rate": 7.2983840913870215e-06, | |
| "loss": 0.3863, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 6.196620583717358, | |
| "grad_norm": 0.3845650851726532, | |
| "learning_rate": 7.266432017508008e-06, | |
| "loss": 0.3901, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.227342549923195, | |
| "grad_norm": 0.39868220686912537, | |
| "learning_rate": 7.234363025187474e-06, | |
| "loss": 0.3855, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 6.258064516129032, | |
| "grad_norm": 0.37892380356788635, | |
| "learning_rate": 7.202178768768711e-06, | |
| "loss": 0.3928, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.28878648233487, | |
| "grad_norm": 0.3923156261444092, | |
| "learning_rate": 7.169880908541136e-06, | |
| "loss": 0.3921, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 6.319508448540707, | |
| "grad_norm": 0.39880916476249695, | |
| "learning_rate": 7.137471110654656e-06, | |
| "loss": 0.3938, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.350230414746544, | |
| "grad_norm": 0.4066980481147766, | |
| "learning_rate": 7.104951047033697e-06, | |
| "loss": 0.3906, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 6.380952380952381, | |
| "grad_norm": 0.3751300573348999, | |
| "learning_rate": 7.0723223952909694e-06, | |
| "loss": 0.3909, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.411674347158218, | |
| "grad_norm": 0.3525156080722809, | |
| "learning_rate": 7.039586838640918e-06, | |
| "loss": 0.3894, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 6.442396313364055, | |
| "grad_norm": 0.36944258213043213, | |
| "learning_rate": 7.006746065812895e-06, | |
| "loss": 0.3909, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.473118279569892, | |
| "grad_norm": 0.3836762011051178, | |
| "learning_rate": 6.973801770964031e-06, | |
| "loss": 0.3896, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 6.50384024577573, | |
| "grad_norm": 0.41395968198776245, | |
| "learning_rate": 6.940755653591859e-06, | |
| "loss": 0.3889, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 6.534562211981567, | |
| "grad_norm": 0.42266151309013367, | |
| "learning_rate": 6.907609418446623e-06, | |
| "loss": 0.3924, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 6.565284178187404, | |
| "grad_norm": 0.38352274894714355, | |
| "learning_rate": 6.8743647754433485e-06, | |
| "loss": 0.3934, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.596006144393241, | |
| "grad_norm": 0.3761062026023865, | |
| "learning_rate": 6.841023439573623e-06, | |
| "loss": 0.3915, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 6.626728110599078, | |
| "grad_norm": 0.38670945167541504, | |
| "learning_rate": 6.807587130817134e-06, | |
| "loss": 0.3925, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 6.657450076804915, | |
| "grad_norm": 0.36626312136650085, | |
| "learning_rate": 6.774057574052932e-06, | |
| "loss": 0.3944, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 6.688172043010753, | |
| "grad_norm": 0.4045194685459137, | |
| "learning_rate": 6.740436498970453e-06, | |
| "loss": 0.3955, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 6.71889400921659, | |
| "grad_norm": 0.4138599932193756, | |
| "learning_rate": 6.706725639980294e-06, | |
| "loss": 0.3929, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 6.749615975422427, | |
| "grad_norm": 0.39506402611732483, | |
| "learning_rate": 6.6729267361247295e-06, | |
| "loss": 0.3883, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.7803379416282645, | |
| "grad_norm": 0.3903568387031555, | |
| "learning_rate": 6.639041530988009e-06, | |
| "loss": 0.3939, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 6.811059907834101, | |
| "grad_norm": 0.3678980767726898, | |
| "learning_rate": 6.605071772606404e-06, | |
| "loss": 0.394, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 6.841781874039938, | |
| "grad_norm": 0.35300132632255554, | |
| "learning_rate": 6.571019213378034e-06, | |
| "loss": 0.391, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 6.872503840245776, | |
| "grad_norm": 0.3788436949253082, | |
| "learning_rate": 6.536885609972467e-06, | |
| "loss": 0.397, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 6.903225806451613, | |
| "grad_norm": 0.38878560066223145, | |
| "learning_rate": 6.502672723240103e-06, | |
| "loss": 0.3969, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 6.93394777265745, | |
| "grad_norm": 0.4072780907154083, | |
| "learning_rate": 6.4683823181213224e-06, | |
| "loss": 0.3969, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 6.964669738863288, | |
| "grad_norm": 0.40496107935905457, | |
| "learning_rate": 6.434016163555452e-06, | |
| "loss": 0.3957, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 6.9953917050691246, | |
| "grad_norm": 0.3747064173221588, | |
| "learning_rate": 6.399576032389505e-06, | |
| "loss": 0.3984, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.024577572964669, | |
| "grad_norm": 0.5090351104736328, | |
| "learning_rate": 6.365063701286728e-06, | |
| "loss": 0.3714, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 7.055299539170507, | |
| "grad_norm": 0.42551228404045105, | |
| "learning_rate": 6.330480950634942e-06, | |
| "loss": 0.3673, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.086021505376344, | |
| "grad_norm": 0.4707318842411041, | |
| "learning_rate": 6.2958295644547026e-06, | |
| "loss": 0.3641, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 7.116743471582181, | |
| "grad_norm": 0.40848663449287415, | |
| "learning_rate": 6.261111330307272e-06, | |
| "loss": 0.3628, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.147465437788019, | |
| "grad_norm": 0.4382622539997101, | |
| "learning_rate": 6.22632803920239e-06, | |
| "loss": 0.3691, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 7.178187403993856, | |
| "grad_norm": 0.3866026699542999, | |
| "learning_rate": 6.191481485505898e-06, | |
| "loss": 0.3639, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 7.2089093701996925, | |
| "grad_norm": 0.4263141453266144, | |
| "learning_rate": 6.1565734668471614e-06, | |
| "loss": 0.3634, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 7.23963133640553, | |
| "grad_norm": 0.4050372242927551, | |
| "learning_rate": 6.121605784026339e-06, | |
| "loss": 0.3648, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 7.270353302611367, | |
| "grad_norm": 0.3879098892211914, | |
| "learning_rate": 6.086580240921486e-06, | |
| "loss": 0.3667, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 7.301075268817204, | |
| "grad_norm": 0.4055810868740082, | |
| "learning_rate": 6.051498644395496e-06, | |
| "loss": 0.3656, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 7.331797235023042, | |
| "grad_norm": 0.42201170325279236, | |
| "learning_rate": 6.01636280420289e-06, | |
| "loss": 0.3679, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 7.362519201228879, | |
| "grad_norm": 0.4164835214614868, | |
| "learning_rate": 5.981174532896459e-06, | |
| "loss": 0.367, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.393241167434716, | |
| "grad_norm": 0.39605438709259033, | |
| "learning_rate": 5.9459356457337556e-06, | |
| "loss": 0.3647, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 7.423963133640553, | |
| "grad_norm": 0.4393250644207001, | |
| "learning_rate": 5.910647960583458e-06, | |
| "loss": 0.3733, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 7.45468509984639, | |
| "grad_norm": 0.37553438544273376, | |
| "learning_rate": 5.875313297831579e-06, | |
| "loss": 0.37, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 7.485407066052227, | |
| "grad_norm": 0.3898600935935974, | |
| "learning_rate": 5.839933480287572e-06, | |
| "loss": 0.3678, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 7.516129032258064, | |
| "grad_norm": 0.4083476662635803, | |
| "learning_rate": 5.804510333090287e-06, | |
| "loss": 0.3665, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 7.546850998463902, | |
| "grad_norm": 0.40433645248413086, | |
| "learning_rate": 5.769045683613822e-06, | |
| "loss": 0.3715, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 7.577572964669739, | |
| "grad_norm": 0.4303235709667206, | |
| "learning_rate": 5.733541361373253e-06, | |
| "loss": 0.3711, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 7.6082949308755765, | |
| "grad_norm": 0.40306177735328674, | |
| "learning_rate": 5.697999197930259e-06, | |
| "loss": 0.3659, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 7.639016897081413, | |
| "grad_norm": 0.39787065982818604, | |
| "learning_rate": 5.662421026798624e-06, | |
| "loss": 0.3722, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 7.66973886328725, | |
| "grad_norm": 0.401962012052536, | |
| "learning_rate": 5.626808683349672e-06, | |
| "loss": 0.3691, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 7.700460829493087, | |
| "grad_norm": 0.38256722688674927, | |
| "learning_rate": 5.591164004717567e-06, | |
| "loss": 0.3694, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 7.731182795698925, | |
| "grad_norm": 0.4020300507545471, | |
| "learning_rate": 5.55548882970455e-06, | |
| "loss": 0.3728, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.761904761904762, | |
| "grad_norm": 0.41450026631355286, | |
| "learning_rate": 5.519784998686081e-06, | |
| "loss": 0.3673, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 7.792626728110599, | |
| "grad_norm": 0.36544522643089294, | |
| "learning_rate": 5.484054353515896e-06, | |
| "loss": 0.3729, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 7.8233486943164365, | |
| "grad_norm": 0.38962146639823914, | |
| "learning_rate": 5.448298737430992e-06, | |
| "loss": 0.3697, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 7.854070660522273, | |
| "grad_norm": 0.425886869430542, | |
| "learning_rate": 5.412519994956543e-06, | |
| "loss": 0.3733, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 7.88479262672811, | |
| "grad_norm": 0.3979520797729492, | |
| "learning_rate": 5.376719971810741e-06, | |
| "loss": 0.3734, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 7.915514592933948, | |
| "grad_norm": 0.38723668456077576, | |
| "learning_rate": 5.340900514809587e-06, | |
| "loss": 0.3726, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 7.946236559139785, | |
| "grad_norm": 0.37770572304725647, | |
| "learning_rate": 5.305063471771614e-06, | |
| "loss": 0.3699, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 7.976958525345622, | |
| "grad_norm": 0.398049533367157, | |
| "learning_rate": 5.26921069142257e-06, | |
| "loss": 0.3717, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.006144393241167, | |
| "grad_norm": 0.5838120579719543, | |
| "learning_rate": 5.233344023300037e-06, | |
| "loss": 0.3649, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 8.036866359447005, | |
| "grad_norm": 0.4888751208782196, | |
| "learning_rate": 5.197465317658036e-06, | |
| "loss": 0.3417, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.067588325652842, | |
| "grad_norm": 0.4426686465740204, | |
| "learning_rate": 5.161576425371554e-06, | |
| "loss": 0.3448, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 8.09831029185868, | |
| "grad_norm": 0.4328514635562897, | |
| "learning_rate": 5.125679197841088e-06, | |
| "loss": 0.3427, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 8.129032258064516, | |
| "grad_norm": 0.461224764585495, | |
| "learning_rate": 5.089775486897121e-06, | |
| "loss": 0.3411, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 8.159754224270353, | |
| "grad_norm": 0.41059058904647827, | |
| "learning_rate": 5.053867144704594e-06, | |
| "loss": 0.3432, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 8.19047619047619, | |
| "grad_norm": 0.4233262538909912, | |
| "learning_rate": 5.017956023667363e-06, | |
| "loss": 0.3428, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 8.221198156682028, | |
| "grad_norm": 0.44398781657218933, | |
| "learning_rate": 4.982043976332638e-06, | |
| "loss": 0.3396, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 8.251920122887865, | |
| "grad_norm": 0.43628108501434326, | |
| "learning_rate": 4.946132855295407e-06, | |
| "loss": 0.3432, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 8.282642089093702, | |
| "grad_norm": 0.45262426137924194, | |
| "learning_rate": 4.910224513102881e-06, | |
| "loss": 0.34, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 8.31336405529954, | |
| "grad_norm": 0.46370255947113037, | |
| "learning_rate": 4.8743208021589135e-06, | |
| "loss": 0.3404, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 8.344086021505376, | |
| "grad_norm": 0.40948814153671265, | |
| "learning_rate": 4.838423574628447e-06, | |
| "loss": 0.3431, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 8.374807987711213, | |
| "grad_norm": 0.4436282813549042, | |
| "learning_rate": 4.802534682341966e-06, | |
| "loss": 0.3446, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 8.40552995391705, | |
| "grad_norm": 0.4203520119190216, | |
| "learning_rate": 4.7666559766999635e-06, | |
| "loss": 0.3478, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 8.436251920122888, | |
| "grad_norm": 0.4091641306877136, | |
| "learning_rate": 4.730789308577432e-06, | |
| "loss": 0.3461, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 8.466973886328725, | |
| "grad_norm": 0.4320433735847473, | |
| "learning_rate": 4.694936528228387e-06, | |
| "loss": 0.3504, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 8.497695852534562, | |
| "grad_norm": 0.4243397116661072, | |
| "learning_rate": 4.659099485190414e-06, | |
| "loss": 0.3444, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 8.5284178187404, | |
| "grad_norm": 0.42783576250076294, | |
| "learning_rate": 4.6232800281892604e-06, | |
| "loss": 0.3398, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 8.559139784946236, | |
| "grad_norm": 0.395312637090683, | |
| "learning_rate": 4.587480005043458e-06, | |
| "loss": 0.3472, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 8.589861751152073, | |
| "grad_norm": 0.41875869035720825, | |
| "learning_rate": 4.551701262569009e-06, | |
| "loss": 0.3475, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.620583717357912, | |
| "grad_norm": 0.4307910203933716, | |
| "learning_rate": 4.515945646484105e-06, | |
| "loss": 0.3465, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 8.651305683563749, | |
| "grad_norm": 0.40852200984954834, | |
| "learning_rate": 4.480215001313919e-06, | |
| "loss": 0.3497, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 8.682027649769585, | |
| "grad_norm": 0.4345207214355469, | |
| "learning_rate": 4.444511170295451e-06, | |
| "loss": 0.3474, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 8.712749615975422, | |
| "grad_norm": 0.4096705913543701, | |
| "learning_rate": 4.408835995282434e-06, | |
| "loss": 0.3472, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 8.74347158218126, | |
| "grad_norm": 0.4314156770706177, | |
| "learning_rate": 4.373191316650328e-06, | |
| "loss": 0.3518, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 8.774193548387096, | |
| "grad_norm": 0.41832414269447327, | |
| "learning_rate": 4.3375789732013775e-06, | |
| "loss": 0.3498, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 8.804915514592935, | |
| "grad_norm": 0.42618289589881897, | |
| "learning_rate": 4.302000802069744e-06, | |
| "loss": 0.3486, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 8.835637480798772, | |
| "grad_norm": 0.43849977850914, | |
| "learning_rate": 4.2664586386267474e-06, | |
| "loss": 0.346, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 8.866359447004609, | |
| "grad_norm": 0.42157772183418274, | |
| "learning_rate": 4.230954316386179e-06, | |
| "loss": 0.3475, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 8.897081413210445, | |
| "grad_norm": 0.39600861072540283, | |
| "learning_rate": 4.195489666909714e-06, | |
| "loss": 0.3455, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 8.927803379416282, | |
| "grad_norm": 0.3980286419391632, | |
| "learning_rate": 4.160066519712428e-06, | |
| "loss": 0.3488, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 8.95852534562212, | |
| "grad_norm": 0.41449347138404846, | |
| "learning_rate": 4.1246867021684206e-06, | |
| "loss": 0.345, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 8.989247311827956, | |
| "grad_norm": 0.43595919013023376, | |
| "learning_rate": 4.089352039416543e-06, | |
| "loss": 0.3476, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 9.018433179723502, | |
| "grad_norm": 0.5251989364624023, | |
| "learning_rate": 4.054064354266244e-06, | |
| "loss": 0.3327, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 9.049155145929339, | |
| "grad_norm": 0.4793786108493805, | |
| "learning_rate": 4.018825467103542e-06, | |
| "loss": 0.318, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 9.079877112135177, | |
| "grad_norm": 0.4722115993499756, | |
| "learning_rate": 3.983637195797111e-06, | |
| "loss": 0.3217, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 9.110599078341014, | |
| "grad_norm": 0.4697054326534271, | |
| "learning_rate": 3.948501355604507e-06, | |
| "loss": 0.3184, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 9.141321044546851, | |
| "grad_norm": 0.4698534607887268, | |
| "learning_rate": 3.9134197590785164e-06, | |
| "loss": 0.3193, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 9.172043010752688, | |
| "grad_norm": 0.4833962917327881, | |
| "learning_rate": 3.878394215973663e-06, | |
| "loss": 0.3243, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 9.202764976958525, | |
| "grad_norm": 0.47097915410995483, | |
| "learning_rate": 3.843426533152841e-06, | |
| "loss": 0.3218, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.233486943164362, | |
| "grad_norm": 0.4613553285598755, | |
| "learning_rate": 3.808518514494105e-06, | |
| "loss": 0.3191, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 9.2642089093702, | |
| "grad_norm": 0.4618718922138214, | |
| "learning_rate": 3.773671960797613e-06, | |
| "loss": 0.3219, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 9.294930875576037, | |
| "grad_norm": 0.45279550552368164, | |
| "learning_rate": 3.7388886696927317e-06, | |
| "loss": 0.3235, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 9.325652841781874, | |
| "grad_norm": 0.4596066176891327, | |
| "learning_rate": 3.704170435545299e-06, | |
| "loss": 0.3183, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 9.356374807987711, | |
| "grad_norm": 0.4353365898132324, | |
| "learning_rate": 3.6695190493650608e-06, | |
| "loss": 0.3204, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 9.387096774193548, | |
| "grad_norm": 0.444594144821167, | |
| "learning_rate": 3.634936298713274e-06, | |
| "loss": 0.3225, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 9.417818740399385, | |
| "grad_norm": 0.43966248631477356, | |
| "learning_rate": 3.6004239676104957e-06, | |
| "loss": 0.3236, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 9.448540706605222, | |
| "grad_norm": 0.4758555591106415, | |
| "learning_rate": 3.5659838364445505e-06, | |
| "loss": 0.3219, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 9.47926267281106, | |
| "grad_norm": 0.4469148814678192, | |
| "learning_rate": 3.5316176818786797e-06, | |
| "loss": 0.324, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 9.509984639016897, | |
| "grad_norm": 0.4485386908054352, | |
| "learning_rate": 3.497327276759899e-06, | |
| "loss": 0.3238, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 9.540706605222734, | |
| "grad_norm": 0.44222620129585266, | |
| "learning_rate": 3.463114390027533e-06, | |
| "loss": 0.3205, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 9.571428571428571, | |
| "grad_norm": 0.45242762565612793, | |
| "learning_rate": 3.4289807866219683e-06, | |
| "loss": 0.3222, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 9.602150537634408, | |
| "grad_norm": 0.44047296047210693, | |
| "learning_rate": 3.394928227393598e-06, | |
| "loss": 0.3277, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 9.632872503840245, | |
| "grad_norm": 0.44352057576179504, | |
| "learning_rate": 3.3609584690119924e-06, | |
| "loss": 0.3231, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 9.663594470046084, | |
| "grad_norm": 0.44703468680381775, | |
| "learning_rate": 3.3270732638752713e-06, | |
| "loss": 0.3242, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 9.69431643625192, | |
| "grad_norm": 0.46220463514328003, | |
| "learning_rate": 3.293274360019707e-06, | |
| "loss": 0.3256, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 9.725038402457757, | |
| "grad_norm": 0.4843495786190033, | |
| "learning_rate": 3.259563501029548e-06, | |
| "loss": 0.3279, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 9.755760368663594, | |
| "grad_norm": 0.4653911292552948, | |
| "learning_rate": 3.2259424259470705e-06, | |
| "loss": 0.3233, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 9.786482334869431, | |
| "grad_norm": 0.45650723576545715, | |
| "learning_rate": 3.1924128691828678e-06, | |
| "loss": 0.324, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 9.817204301075268, | |
| "grad_norm": 0.5246464014053345, | |
| "learning_rate": 3.158976560426379e-06, | |
| "loss": 0.3286, | |
| "step": 1600 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2430, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.4548687991848042e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |