| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9486465975208702, |
| "eval_steps": 500, |
| "global_step": 7500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0031621553250695674, |
| "grad_norm": 11.55327320098877, |
| "learning_rate": 1.9939286617758668e-05, |
| "loss": 8.596, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.006324310650139135, |
| "grad_norm": 5.928526401519775, |
| "learning_rate": 1.9876043511257274e-05, |
| "loss": 5.501, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.009486465975208701, |
| "grad_norm": 2.7216451168060303, |
| "learning_rate": 1.9812800404755884e-05, |
| "loss": 4.9742, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.01264862130027827, |
| "grad_norm": 2.725499153137207, |
| "learning_rate": 1.974955729825449e-05, |
| "loss": 4.6023, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.015810776625347838, |
| "grad_norm": 2.318068027496338, |
| "learning_rate": 1.96863141917531e-05, |
| "loss": 4.4124, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.018972931950417403, |
| "grad_norm": 2.437415838241577, |
| "learning_rate": 1.9623071085251707e-05, |
| "loss": 4.5471, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.02213508727548697, |
| "grad_norm": 1.6126755475997925, |
| "learning_rate": 1.955982797875032e-05, |
| "loss": 4.0951, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.02529724260055654, |
| "grad_norm": 1.9190938472747803, |
| "learning_rate": 1.9496584872248927e-05, |
| "loss": 4.0572, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.028459397925626108, |
| "grad_norm": 1.6798152923583984, |
| "learning_rate": 1.9433341765747537e-05, |
| "loss": 3.9318, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.031621553250695676, |
| "grad_norm": 2.02826189994812, |
| "learning_rate": 1.9370098659246143e-05, |
| "loss": 3.9254, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.034783708575765244, |
| "grad_norm": 1.6234019994735718, |
| "learning_rate": 1.9306855552744753e-05, |
| "loss": 3.7144, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.037945863900834806, |
| "grad_norm": 2.1222445964813232, |
| "learning_rate": 1.924361244624336e-05, |
| "loss": 4.0748, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.041108019225904374, |
| "grad_norm": 1.8071123361587524, |
| "learning_rate": 1.918036933974197e-05, |
| "loss": 3.8597, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.04427017455097394, |
| "grad_norm": 1.2955825328826904, |
| "learning_rate": 1.911712623324058e-05, |
| "loss": 3.6891, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.04743232987604351, |
| "grad_norm": 1.9034608602523804, |
| "learning_rate": 1.905388312673919e-05, |
| "loss": 3.8955, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.05059448520111308, |
| "grad_norm": 1.649466872215271, |
| "learning_rate": 1.8990640020237796e-05, |
| "loss": 3.8186, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05375664052618265, |
| "grad_norm": 1.8459181785583496, |
| "learning_rate": 1.8927396913736406e-05, |
| "loss": 3.5247, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.056918795851252216, |
| "grad_norm": 1.5839076042175293, |
| "learning_rate": 1.8864153807235012e-05, |
| "loss": 3.756, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.060080951176321784, |
| "grad_norm": 1.5614947080612183, |
| "learning_rate": 1.8800910700733622e-05, |
| "loss": 3.6433, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.06324310650139135, |
| "grad_norm": 1.7831311225891113, |
| "learning_rate": 1.873766759423223e-05, |
| "loss": 3.9025, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06640526182646092, |
| "grad_norm": 1.4928420782089233, |
| "learning_rate": 1.867442448773084e-05, |
| "loss": 3.4216, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.06956741715153049, |
| "grad_norm": 1.8199015855789185, |
| "learning_rate": 1.861118138122945e-05, |
| "loss": 3.7497, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.07272957247660006, |
| "grad_norm": 1.6205722093582153, |
| "learning_rate": 1.854793827472806e-05, |
| "loss": 3.4153, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.07589172780166961, |
| "grad_norm": 1.4476927518844604, |
| "learning_rate": 1.8484695168226665e-05, |
| "loss": 3.6298, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07905388312673918, |
| "grad_norm": 1.5358431339263916, |
| "learning_rate": 1.8421452061725275e-05, |
| "loss": 3.5282, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.08221603845180875, |
| "grad_norm": 1.616495966911316, |
| "learning_rate": 1.835820895522388e-05, |
| "loss": 3.6029, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.08537819377687832, |
| "grad_norm": 1.4835790395736694, |
| "learning_rate": 1.829496584872249e-05, |
| "loss": 3.6817, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.08854034910194788, |
| "grad_norm": 1.8951404094696045, |
| "learning_rate": 1.8231722742221098e-05, |
| "loss": 3.5222, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.09170250442701745, |
| "grad_norm": 1.4792296886444092, |
| "learning_rate": 1.8168479635719707e-05, |
| "loss": 3.552, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.09486465975208702, |
| "grad_norm": 1.5331674814224243, |
| "learning_rate": 1.8105236529218317e-05, |
| "loss": 3.6274, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.09802681507715659, |
| "grad_norm": 1.3521767854690552, |
| "learning_rate": 1.8041993422716927e-05, |
| "loss": 3.4604, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.10118897040222616, |
| "grad_norm": 1.9199748039245605, |
| "learning_rate": 1.7978750316215534e-05, |
| "loss": 3.6434, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.10435112572729573, |
| "grad_norm": 1.4599398374557495, |
| "learning_rate": 1.7915507209714144e-05, |
| "loss": 3.4381, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.1075132810523653, |
| "grad_norm": 1.8845282793045044, |
| "learning_rate": 1.785226410321275e-05, |
| "loss": 3.4885, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.11067543637743486, |
| "grad_norm": 1.6054080724716187, |
| "learning_rate": 1.778902099671136e-05, |
| "loss": 3.4714, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.11383759170250443, |
| "grad_norm": 1.4243124723434448, |
| "learning_rate": 1.7725777890209967e-05, |
| "loss": 3.423, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.116999747027574, |
| "grad_norm": 1.828045129776001, |
| "learning_rate": 1.7662534783708576e-05, |
| "loss": 3.3451, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.12016190235264357, |
| "grad_norm": 1.7184784412384033, |
| "learning_rate": 1.7599291677207186e-05, |
| "loss": 3.4567, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.12332405767771312, |
| "grad_norm": 2.0031352043151855, |
| "learning_rate": 1.7536048570705796e-05, |
| "loss": 3.5227, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.1264862130027827, |
| "grad_norm": 1.8211214542388916, |
| "learning_rate": 1.7472805464204403e-05, |
| "loss": 3.4222, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.12964836832785226, |
| "grad_norm": 1.6366448402404785, |
| "learning_rate": 1.7409562357703013e-05, |
| "loss": 3.3479, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.13281052365292184, |
| "grad_norm": 1.4770481586456299, |
| "learning_rate": 1.734631925120162e-05, |
| "loss": 3.4232, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1359726789779914, |
| "grad_norm": 1.3028419017791748, |
| "learning_rate": 1.728307614470023e-05, |
| "loss": 3.2511, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.13913483430306098, |
| "grad_norm": 1.8425815105438232, |
| "learning_rate": 1.721983303819884e-05, |
| "loss": 3.3851, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.14229698962813053, |
| "grad_norm": 1.5002127885818481, |
| "learning_rate": 1.715658993169745e-05, |
| "loss": 3.5281, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.14545914495320011, |
| "grad_norm": 1.5222417116165161, |
| "learning_rate": 1.7093346825196055e-05, |
| "loss": 3.3473, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.14862130027826967, |
| "grad_norm": 1.628788948059082, |
| "learning_rate": 1.7030103718694665e-05, |
| "loss": 3.3795, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.15178345560333922, |
| "grad_norm": 1.3158140182495117, |
| "learning_rate": 1.6966860612193272e-05, |
| "loss": 3.5536, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1549456109284088, |
| "grad_norm": 1.8109098672866821, |
| "learning_rate": 1.690361750569188e-05, |
| "loss": 3.3006, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.15810776625347836, |
| "grad_norm": 2.0179309844970703, |
| "learning_rate": 1.6840374399190488e-05, |
| "loss": 3.4737, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.16126992157854794, |
| "grad_norm": 1.8066917657852173, |
| "learning_rate": 1.6777131292689098e-05, |
| "loss": 3.3135, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.1644320769036175, |
| "grad_norm": 1.4508979320526123, |
| "learning_rate": 1.6713888186187708e-05, |
| "loss": 3.154, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.16759423222868708, |
| "grad_norm": 1.324156641960144, |
| "learning_rate": 1.6650645079686318e-05, |
| "loss": 3.3076, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.17075638755375663, |
| "grad_norm": 1.7277374267578125, |
| "learning_rate": 1.6587401973184924e-05, |
| "loss": 3.3168, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.17391854287882622, |
| "grad_norm": 1.581597924232483, |
| "learning_rate": 1.6524158866683534e-05, |
| "loss": 3.2586, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.17708069820389577, |
| "grad_norm": 1.2408771514892578, |
| "learning_rate": 1.646091576018214e-05, |
| "loss": 3.3174, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.18024285352896535, |
| "grad_norm": 1.5069456100463867, |
| "learning_rate": 1.639767265368075e-05, |
| "loss": 3.2248, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.1834050088540349, |
| "grad_norm": 1.474717378616333, |
| "learning_rate": 1.6334429547179357e-05, |
| "loss": 3.2757, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.1865671641791045, |
| "grad_norm": 1.648443341255188, |
| "learning_rate": 1.6271186440677967e-05, |
| "loss": 3.2754, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.18972931950417404, |
| "grad_norm": 1.5270763635635376, |
| "learning_rate": 1.6207943334176577e-05, |
| "loss": 3.2413, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.19289147482924363, |
| "grad_norm": 1.643730640411377, |
| "learning_rate": 1.6144700227675187e-05, |
| "loss": 3.213, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.19605363015431318, |
| "grad_norm": 1.4000329971313477, |
| "learning_rate": 1.6081457121173793e-05, |
| "loss": 3.236, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.19921578547938273, |
| "grad_norm": 1.6957166194915771, |
| "learning_rate": 1.6018214014672403e-05, |
| "loss": 3.1811, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.20237794080445232, |
| "grad_norm": 1.763476848602295, |
| "learning_rate": 1.595497090817101e-05, |
| "loss": 3.2634, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.20554009612952187, |
| "grad_norm": 1.5622406005859375, |
| "learning_rate": 1.589172780166962e-05, |
| "loss": 3.2498, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.20870225145459145, |
| "grad_norm": 2.501917600631714, |
| "learning_rate": 1.5828484695168226e-05, |
| "loss": 3.3526, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.211864406779661, |
| "grad_norm": 1.5445410013198853, |
| "learning_rate": 1.5765241588666836e-05, |
| "loss": 3.25, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.2150265621047306, |
| "grad_norm": 1.420432209968567, |
| "learning_rate": 1.5701998482165446e-05, |
| "loss": 3.316, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.21818871742980014, |
| "grad_norm": 1.7002512216567993, |
| "learning_rate": 1.5638755375664056e-05, |
| "loss": 3.156, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.22135087275486973, |
| "grad_norm": 1.533703088760376, |
| "learning_rate": 1.5575512269162662e-05, |
| "loss": 3.0938, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.22451302807993928, |
| "grad_norm": 1.4791340827941895, |
| "learning_rate": 1.5512269162661272e-05, |
| "loss": 3.2756, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.22767518340500886, |
| "grad_norm": 1.783389925956726, |
| "learning_rate": 1.544902605615988e-05, |
| "loss": 3.3588, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.23083733873007842, |
| "grad_norm": 1.7652767896652222, |
| "learning_rate": 1.538578294965849e-05, |
| "loss": 3.1746, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.233999494055148, |
| "grad_norm": 1.7373157739639282, |
| "learning_rate": 1.53225398431571e-05, |
| "loss": 3.2757, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.23716164938021755, |
| "grad_norm": 1.655953049659729, |
| "learning_rate": 1.5259296736655705e-05, |
| "loss": 3.2366, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.24032380470528714, |
| "grad_norm": 1.4808825254440308, |
| "learning_rate": 1.5196053630154315e-05, |
| "loss": 3.2056, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2434859600303567, |
| "grad_norm": 1.3337376117706299, |
| "learning_rate": 1.5132810523652923e-05, |
| "loss": 2.9247, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.24664811535542625, |
| "grad_norm": 1.3115172386169434, |
| "learning_rate": 1.5069567417151531e-05, |
| "loss": 3.3964, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.24981027068049583, |
| "grad_norm": 1.3567864894866943, |
| "learning_rate": 1.500632431065014e-05, |
| "loss": 3.2777, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.2529724260055654, |
| "grad_norm": 1.5610828399658203, |
| "learning_rate": 1.4943081204148748e-05, |
| "loss": 3.0115, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.25613458133063494, |
| "grad_norm": 1.6952314376831055, |
| "learning_rate": 1.4879838097647357e-05, |
| "loss": 3.2321, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.2592967366557045, |
| "grad_norm": 1.5469592809677124, |
| "learning_rate": 1.4816594991145967e-05, |
| "loss": 3.1697, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.2624588919807741, |
| "grad_norm": 1.7456170320510864, |
| "learning_rate": 1.4753351884644576e-05, |
| "loss": 3.0305, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.2656210473058437, |
| "grad_norm": 1.8613225221633911, |
| "learning_rate": 1.4690108778143184e-05, |
| "loss": 3.1094, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.2687832026309132, |
| "grad_norm": 1.6130377054214478, |
| "learning_rate": 1.4626865671641792e-05, |
| "loss": 3.1656, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.2719453579559828, |
| "grad_norm": 1.4752004146575928, |
| "learning_rate": 1.45636225651404e-05, |
| "loss": 3.1527, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.2751075132810524, |
| "grad_norm": 1.3546524047851562, |
| "learning_rate": 1.4500379458639008e-05, |
| "loss": 3.1096, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.27826966860612196, |
| "grad_norm": 1.5383223295211792, |
| "learning_rate": 1.4437136352137617e-05, |
| "loss": 3.3534, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.2814318239311915, |
| "grad_norm": 1.7905950546264648, |
| "learning_rate": 1.4373893245636228e-05, |
| "loss": 3.2779, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.28459397925626106, |
| "grad_norm": 1.408922553062439, |
| "learning_rate": 1.4310650139134836e-05, |
| "loss": 3.0517, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.28775613458133065, |
| "grad_norm": 1.7881851196289062, |
| "learning_rate": 1.4247407032633445e-05, |
| "loss": 3.144, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.29091828990640023, |
| "grad_norm": 1.5622018575668335, |
| "learning_rate": 1.4184163926132053e-05, |
| "loss": 3.0161, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.29408044523146976, |
| "grad_norm": 1.1805533170700073, |
| "learning_rate": 1.4120920819630661e-05, |
| "loss": 3.3535, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.29724260055653934, |
| "grad_norm": 1.4643152952194214, |
| "learning_rate": 1.4057677713129269e-05, |
| "loss": 3.0817, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.3004047558816089, |
| "grad_norm": 1.640499234199524, |
| "learning_rate": 1.3994434606627877e-05, |
| "loss": 3.2889, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.30356691120667845, |
| "grad_norm": 1.4501370191574097, |
| "learning_rate": 1.3931191500126486e-05, |
| "loss": 3.0626, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.30672906653174803, |
| "grad_norm": 1.540034532546997, |
| "learning_rate": 1.3867948393625097e-05, |
| "loss": 3.1131, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.3098912218568176, |
| "grad_norm": 1.3436205387115479, |
| "learning_rate": 1.3804705287123705e-05, |
| "loss": 3.3248, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.3130533771818872, |
| "grad_norm": 1.8110958337783813, |
| "learning_rate": 1.3741462180622314e-05, |
| "loss": 3.2063, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.3162155325069567, |
| "grad_norm": 1.1593376398086548, |
| "learning_rate": 1.3678219074120922e-05, |
| "loss": 3.1707, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3193776878320263, |
| "grad_norm": 1.2828359603881836, |
| "learning_rate": 1.361497596761953e-05, |
| "loss": 3.1273, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.3225398431570959, |
| "grad_norm": 1.4801135063171387, |
| "learning_rate": 1.3551732861118138e-05, |
| "loss": 3.1145, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.32570199848216547, |
| "grad_norm": 1.6385855674743652, |
| "learning_rate": 1.3488489754616746e-05, |
| "loss": 3.1274, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.328864153807235, |
| "grad_norm": 1.3759686946868896, |
| "learning_rate": 1.3425246648115358e-05, |
| "loss": 2.9515, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.3320263091323046, |
| "grad_norm": 1.3556058406829834, |
| "learning_rate": 1.3362003541613966e-05, |
| "loss": 3.1251, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.33518846445737416, |
| "grad_norm": 1.8028017282485962, |
| "learning_rate": 1.3298760435112574e-05, |
| "loss": 3.1851, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.33835061978244374, |
| "grad_norm": 1.3875707387924194, |
| "learning_rate": 1.3235517328611182e-05, |
| "loss": 3.1831, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.34151277510751327, |
| "grad_norm": 1.3739882707595825, |
| "learning_rate": 1.317227422210979e-05, |
| "loss": 2.944, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.34467493043258285, |
| "grad_norm": 1.7388805150985718, |
| "learning_rate": 1.3109031115608399e-05, |
| "loss": 2.9621, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.34783708575765243, |
| "grad_norm": 1.4218101501464844, |
| "learning_rate": 1.3045788009107007e-05, |
| "loss": 3.1734, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.35099924108272196, |
| "grad_norm": 1.5523903369903564, |
| "learning_rate": 1.2982544902605615e-05, |
| "loss": 3.1323, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.35416139640779154, |
| "grad_norm": 1.2134604454040527, |
| "learning_rate": 1.2919301796104227e-05, |
| "loss": 2.9738, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3573235517328611, |
| "grad_norm": 1.7392768859863281, |
| "learning_rate": 1.2856058689602835e-05, |
| "loss": 3.2412, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.3604857070579307, |
| "grad_norm": 1.5656300783157349, |
| "learning_rate": 1.2792815583101443e-05, |
| "loss": 3.1549, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.36364786238300023, |
| "grad_norm": 1.592602014541626, |
| "learning_rate": 1.2729572476600051e-05, |
| "loss": 3.2068, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.3668100177080698, |
| "grad_norm": 1.6737302541732788, |
| "learning_rate": 1.266632937009866e-05, |
| "loss": 3.0918, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.3699721730331394, |
| "grad_norm": 1.4579912424087524, |
| "learning_rate": 1.2603086263597268e-05, |
| "loss": 3.17, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.373134328358209, |
| "grad_norm": 1.9524872303009033, |
| "learning_rate": 1.2539843157095876e-05, |
| "loss": 3.0157, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.3762964836832785, |
| "grad_norm": 1.5913100242614746, |
| "learning_rate": 1.2476600050594488e-05, |
| "loss": 2.9347, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.3794586390083481, |
| "grad_norm": 1.5421273708343506, |
| "learning_rate": 1.2413356944093096e-05, |
| "loss": 3.0643, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.38262079433341767, |
| "grad_norm": 1.4578076601028442, |
| "learning_rate": 1.2350113837591704e-05, |
| "loss": 3.1577, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.38578294965848725, |
| "grad_norm": 1.4034929275512695, |
| "learning_rate": 1.2286870731090312e-05, |
| "loss": 3.0522, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.3889451049835568, |
| "grad_norm": 1.4334189891815186, |
| "learning_rate": 1.222362762458892e-05, |
| "loss": 3.0195, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.39210726030862636, |
| "grad_norm": 1.5584102869033813, |
| "learning_rate": 1.2160384518087529e-05, |
| "loss": 2.9582, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.39526941563369594, |
| "grad_norm": 1.4117885828018188, |
| "learning_rate": 1.2097141411586137e-05, |
| "loss": 3.1979, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.39843157095876547, |
| "grad_norm": 1.6135624647140503, |
| "learning_rate": 1.2033898305084745e-05, |
| "loss": 3.0185, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.40159372628383505, |
| "grad_norm": 1.9495983123779297, |
| "learning_rate": 1.1970655198583357e-05, |
| "loss": 3.1753, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.40475588160890463, |
| "grad_norm": 1.3732932806015015, |
| "learning_rate": 1.1907412092081965e-05, |
| "loss": 2.9982, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.4079180369339742, |
| "grad_norm": 1.6417973041534424, |
| "learning_rate": 1.1844168985580573e-05, |
| "loss": 2.9491, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.41108019225904374, |
| "grad_norm": 1.599778413772583, |
| "learning_rate": 1.1780925879079181e-05, |
| "loss": 2.9619, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.4142423475841133, |
| "grad_norm": 1.5678138732910156, |
| "learning_rate": 1.171768277257779e-05, |
| "loss": 3.1179, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.4174045029091829, |
| "grad_norm": 1.4923375844955444, |
| "learning_rate": 1.1654439666076398e-05, |
| "loss": 3.193, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.4205666582342525, |
| "grad_norm": 1.5655521154403687, |
| "learning_rate": 1.1591196559575006e-05, |
| "loss": 3.0813, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.423728813559322, |
| "grad_norm": 1.7306265830993652, |
| "learning_rate": 1.1527953453073617e-05, |
| "loss": 3.1584, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.4268909688843916, |
| "grad_norm": 1.4964895248413086, |
| "learning_rate": 1.1464710346572226e-05, |
| "loss": 2.9885, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.4300531242094612, |
| "grad_norm": 1.386781930923462, |
| "learning_rate": 1.1401467240070834e-05, |
| "loss": 2.7611, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.43321527953453076, |
| "grad_norm": 1.223809838294983, |
| "learning_rate": 1.1338224133569442e-05, |
| "loss": 2.9793, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.4363774348596003, |
| "grad_norm": 1.7949477434158325, |
| "learning_rate": 1.127498102706805e-05, |
| "loss": 3.1053, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.43953959018466987, |
| "grad_norm": 1.2614104747772217, |
| "learning_rate": 1.1211737920566658e-05, |
| "loss": 3.1497, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.44270174550973945, |
| "grad_norm": 1.4934375286102295, |
| "learning_rate": 1.1148494814065267e-05, |
| "loss": 3.2349, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.445863900834809, |
| "grad_norm": 1.692384958267212, |
| "learning_rate": 1.1085251707563875e-05, |
| "loss": 3.2143, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.44902605615987856, |
| "grad_norm": 1.430724024772644, |
| "learning_rate": 1.1022008601062486e-05, |
| "loss": 2.9926, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.45218821148494814, |
| "grad_norm": 1.6542253494262695, |
| "learning_rate": 1.0958765494561095e-05, |
| "loss": 3.0877, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.4553503668100177, |
| "grad_norm": 1.761777400970459, |
| "learning_rate": 1.0895522388059703e-05, |
| "loss": 3.1343, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.45851252213508725, |
| "grad_norm": 1.3432550430297852, |
| "learning_rate": 1.0832279281558311e-05, |
| "loss": 2.9125, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.46167467746015683, |
| "grad_norm": 1.6349095106124878, |
| "learning_rate": 1.0769036175056919e-05, |
| "loss": 3.0102, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.4648368327852264, |
| "grad_norm": 1.5871011018753052, |
| "learning_rate": 1.0705793068555527e-05, |
| "loss": 2.8361, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.467998988110296, |
| "grad_norm": 1.5581876039505005, |
| "learning_rate": 1.0642549962054136e-05, |
| "loss": 2.9176, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.4711611434353655, |
| "grad_norm": 1.9018642902374268, |
| "learning_rate": 1.0579306855552747e-05, |
| "loss": 3.1236, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.4743232987604351, |
| "grad_norm": 1.9724278450012207, |
| "learning_rate": 1.0516063749051355e-05, |
| "loss": 3.2517, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.4774854540855047, |
| "grad_norm": 1.4184868335723877, |
| "learning_rate": 1.0452820642549963e-05, |
| "loss": 2.7384, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.4806476094105743, |
| "grad_norm": 1.6251132488250732, |
| "learning_rate": 1.0389577536048572e-05, |
| "loss": 2.9232, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.4838097647356438, |
| "grad_norm": 1.526455283164978, |
| "learning_rate": 1.032633442954718e-05, |
| "loss": 3.0109, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.4869719200607134, |
| "grad_norm": 1.5482044219970703, |
| "learning_rate": 1.0263091323045788e-05, |
| "loss": 2.9056, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.49013407538578296, |
| "grad_norm": 1.5261505842208862, |
| "learning_rate": 1.0199848216544396e-05, |
| "loss": 2.8749, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.4932962307108525, |
| "grad_norm": 1.8025535345077515, |
| "learning_rate": 1.0136605110043004e-05, |
| "loss": 3.1128, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.4964583860359221, |
| "grad_norm": 1.428043246269226, |
| "learning_rate": 1.0073362003541616e-05, |
| "loss": 3.0411, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.49962054136099165, |
| "grad_norm": 1.3403562307357788, |
| "learning_rate": 1.0010118897040224e-05, |
| "loss": 2.9253, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.5027826966860612, |
| "grad_norm": 1.6576727628707886, |
| "learning_rate": 9.946875790538832e-06, |
| "loss": 2.9877, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.5059448520111308, |
| "grad_norm": 1.8511061668395996, |
| "learning_rate": 9.88363268403744e-06, |
| "loss": 2.919, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5091070073362004, |
| "grad_norm": 1.690691590309143, |
| "learning_rate": 9.820389577536049e-06, |
| "loss": 2.7579, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.5122691626612699, |
| "grad_norm": 1.4383487701416016, |
| "learning_rate": 9.757146471034657e-06, |
| "loss": 2.8421, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.5154313179863395, |
| "grad_norm": 1.5412685871124268, |
| "learning_rate": 9.693903364533267e-06, |
| "loss": 2.9609, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.518593473311409, |
| "grad_norm": 1.7865098714828491, |
| "learning_rate": 9.630660258031875e-06, |
| "loss": 2.887, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5217556286364786, |
| "grad_norm": 1.4453928470611572, |
| "learning_rate": 9.567417151530483e-06, |
| "loss": 2.9248, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.5249177839615482, |
| "grad_norm": 1.6136345863342285, |
| "learning_rate": 9.504174045029093e-06, |
| "loss": 3.1814, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.5280799392866178, |
| "grad_norm": 1.3460476398468018, |
| "learning_rate": 9.440930938527701e-06, |
| "loss": 2.7766, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.5312420946116874, |
| "grad_norm": 1.6284033060073853, |
| "learning_rate": 9.37768783202631e-06, |
| "loss": 2.9087, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.534404249936757, |
| "grad_norm": 1.3353221416473389, |
| "learning_rate": 9.314444725524918e-06, |
| "loss": 2.9512, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.5375664052618264, |
| "grad_norm": 1.9415779113769531, |
| "learning_rate": 9.251201619023528e-06, |
| "loss": 2.7603, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.540728560586896, |
| "grad_norm": 1.6942236423492432, |
| "learning_rate": 9.187958512522136e-06, |
| "loss": 3.1245, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.5438907159119656, |
| "grad_norm": 2.2894318103790283, |
| "learning_rate": 9.124715406020744e-06, |
| "loss": 2.9695, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.5470528712370352, |
| "grad_norm": 1.1514796018600464, |
| "learning_rate": 9.061472299519352e-06, |
| "loss": 3.0047, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.5502150265621047, |
| "grad_norm": 1.5051628351211548, |
| "learning_rate": 8.998229193017962e-06, |
| "loss": 3.1653, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.5533771818871743, |
| "grad_norm": 1.4090489149093628, |
| "learning_rate": 8.93498608651657e-06, |
| "loss": 2.9528, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.5565393372122439, |
| "grad_norm": 1.6863518953323364, |
| "learning_rate": 8.871742980015179e-06, |
| "loss": 2.8309, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.5597014925373134, |
| "grad_norm": 1.453518271446228, |
| "learning_rate": 8.808499873513787e-06, |
| "loss": 2.8803, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.562863647862383, |
| "grad_norm": 1.987695336341858, |
| "learning_rate": 8.745256767012397e-06, |
| "loss": 3.001, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.5660258031874525, |
| "grad_norm": 1.3925899267196655, |
| "learning_rate": 8.682013660511005e-06, |
| "loss": 2.9407, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.5691879585125221, |
| "grad_norm": 1.802440881729126, |
| "learning_rate": 8.618770554009613e-06, |
| "loss": 3.0096, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5723501138375917, |
| "grad_norm": 1.4417994022369385, |
| "learning_rate": 8.555527447508223e-06, |
| "loss": 2.976, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.5755122691626613, |
| "grad_norm": 1.7502068281173706, |
| "learning_rate": 8.492284341006831e-06, |
| "loss": 2.8715, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.5786744244877309, |
| "grad_norm": 1.7311025857925415, |
| "learning_rate": 8.42904123450544e-06, |
| "loss": 2.9176, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.5818365798128005, |
| "grad_norm": 1.6848989725112915, |
| "learning_rate": 8.365798128004048e-06, |
| "loss": 2.9213, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.5849987351378699, |
| "grad_norm": 1.4339770078659058, |
| "learning_rate": 8.302555021502657e-06, |
| "loss": 2.944, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.5881608904629395, |
| "grad_norm": 1.4645488262176514, |
| "learning_rate": 8.239311915001266e-06, |
| "loss": 2.8865, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.5913230457880091, |
| "grad_norm": 1.793530821800232, |
| "learning_rate": 8.176068808499874e-06, |
| "loss": 3.0749, |
| "step": 4675 |
| }, |
| { |
| "epoch": 0.5944852011130787, |
| "grad_norm": 1.753974437713623, |
| "learning_rate": 8.112825701998482e-06, |
| "loss": 2.9768, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.5976473564381483, |
| "grad_norm": 1.4546597003936768, |
| "learning_rate": 8.049582595497092e-06, |
| "loss": 2.8419, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.6008095117632178, |
| "grad_norm": 2.1190099716186523, |
| "learning_rate": 7.9863394889957e-06, |
| "loss": 2.8468, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.6039716670882874, |
| "grad_norm": 2.026573657989502, |
| "learning_rate": 7.923096382494308e-06, |
| "loss": 3.0328, |
| "step": 4775 |
| }, |
| { |
| "epoch": 0.6071338224133569, |
| "grad_norm": 1.5188226699829102, |
| "learning_rate": 7.859853275992917e-06, |
| "loss": 2.7536, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6102959777384265, |
| "grad_norm": 1.9499627351760864, |
| "learning_rate": 7.796610169491526e-06, |
| "loss": 3.0033, |
| "step": 4825 |
| }, |
| { |
| "epoch": 0.6134581330634961, |
| "grad_norm": 1.4692327976226807, |
| "learning_rate": 7.733367062990135e-06, |
| "loss": 2.9335, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.6166202883885656, |
| "grad_norm": 1.4884922504425049, |
| "learning_rate": 7.670123956488743e-06, |
| "loss": 2.9247, |
| "step": 4875 |
| }, |
| { |
| "epoch": 0.6197824437136352, |
| "grad_norm": 1.5420995950698853, |
| "learning_rate": 7.606880849987353e-06, |
| "loss": 2.9574, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6229445990387048, |
| "grad_norm": 1.8658238649368286, |
| "learning_rate": 7.543637743485961e-06, |
| "loss": 2.9001, |
| "step": 4925 |
| }, |
| { |
| "epoch": 0.6261067543637744, |
| "grad_norm": 1.5181258916854858, |
| "learning_rate": 7.480394636984569e-06, |
| "loss": 2.7142, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.629268909688844, |
| "grad_norm": 1.4289120435714722, |
| "learning_rate": 7.417151530483177e-06, |
| "loss": 3.0176, |
| "step": 4975 |
| }, |
| { |
| "epoch": 0.6324310650139134, |
| "grad_norm": 1.7044678926467896, |
| "learning_rate": 7.353908423981787e-06, |
| "loss": 2.9688, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.635593220338983, |
| "grad_norm": 1.2686058282852173, |
| "learning_rate": 7.290665317480395e-06, |
| "loss": 3.0132, |
| "step": 5025 |
| }, |
| { |
| "epoch": 0.6387553756640526, |
| "grad_norm": 1.4413508176803589, |
| "learning_rate": 7.2274222109790036e-06, |
| "loss": 2.948, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.6419175309891222, |
| "grad_norm": 1.5472356081008911, |
| "learning_rate": 7.164179104477612e-06, |
| "loss": 2.8153, |
| "step": 5075 |
| }, |
| { |
| "epoch": 0.6450796863141918, |
| "grad_norm": 1.3920260667800903, |
| "learning_rate": 7.100935997976222e-06, |
| "loss": 2.9269, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.6482418416392614, |
| "grad_norm": 1.8862273693084717, |
| "learning_rate": 7.03769289147483e-06, |
| "loss": 2.9641, |
| "step": 5125 |
| }, |
| { |
| "epoch": 0.6514039969643309, |
| "grad_norm": 1.2860732078552246, |
| "learning_rate": 6.974449784973438e-06, |
| "loss": 2.9628, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.6545661522894004, |
| "grad_norm": 1.6975411176681519, |
| "learning_rate": 6.911206678472046e-06, |
| "loss": 2.8478, |
| "step": 5175 |
| }, |
| { |
| "epoch": 0.65772830761447, |
| "grad_norm": 1.577736258506775, |
| "learning_rate": 6.847963571970656e-06, |
| "loss": 2.8673, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.6608904629395396, |
| "grad_norm": 1.5640876293182373, |
| "learning_rate": 6.784720465469264e-06, |
| "loss": 2.7907, |
| "step": 5225 |
| }, |
| { |
| "epoch": 0.6640526182646092, |
| "grad_norm": 1.5373408794403076, |
| "learning_rate": 6.7214773589678725e-06, |
| "loss": 2.9308, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.6672147735896787, |
| "grad_norm": 1.499895453453064, |
| "learning_rate": 6.6582342524664824e-06, |
| "loss": 2.8954, |
| "step": 5275 |
| }, |
| { |
| "epoch": 0.6703769289147483, |
| "grad_norm": 1.7884222269058228, |
| "learning_rate": 6.594991145965091e-06, |
| "loss": 3.0374, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.6735390842398179, |
| "grad_norm": 1.4689120054244995, |
| "learning_rate": 6.531748039463699e-06, |
| "loss": 2.7937, |
| "step": 5325 |
| }, |
| { |
| "epoch": 0.6767012395648875, |
| "grad_norm": 1.2251571416854858, |
| "learning_rate": 6.468504932962307e-06, |
| "loss": 2.6993, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.679863394889957, |
| "grad_norm": 1.6900901794433594, |
| "learning_rate": 6.405261826460917e-06, |
| "loss": 2.9683, |
| "step": 5375 |
| }, |
| { |
| "epoch": 0.6830255502150265, |
| "grad_norm": 1.3138439655303955, |
| "learning_rate": 6.342018719959525e-06, |
| "loss": 2.9179, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.6861877055400961, |
| "grad_norm": 1.3106989860534668, |
| "learning_rate": 6.278775613458133e-06, |
| "loss": 2.8901, |
| "step": 5425 |
| }, |
| { |
| "epoch": 0.6893498608651657, |
| "grad_norm": 1.7244914770126343, |
| "learning_rate": 6.2155325069567415e-06, |
| "loss": 2.8481, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.6925120161902353, |
| "grad_norm": 1.1945232152938843, |
| "learning_rate": 6.152289400455351e-06, |
| "loss": 2.8619, |
| "step": 5475 |
| }, |
| { |
| "epoch": 0.6956741715153049, |
| "grad_norm": 1.3923369646072388, |
| "learning_rate": 6.08904629395396e-06, |
| "loss": 2.9955, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.6988363268403744, |
| "grad_norm": 1.7948927879333496, |
| "learning_rate": 6.025803187452568e-06, |
| "loss": 3.0742, |
| "step": 5525 |
| }, |
| { |
| "epoch": 0.7019984821654439, |
| "grad_norm": 1.4123344421386719, |
| "learning_rate": 5.962560080951176e-06, |
| "loss": 2.9499, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.7051606374905135, |
| "grad_norm": 1.5082272291183472, |
| "learning_rate": 5.899316974449786e-06, |
| "loss": 3.0861, |
| "step": 5575 |
| }, |
| { |
| "epoch": 0.7083227928155831, |
| "grad_norm": 1.6818978786468506, |
| "learning_rate": 5.836073867948394e-06, |
| "loss": 2.7944, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7114849481406527, |
| "grad_norm": 2.072303295135498, |
| "learning_rate": 5.772830761447002e-06, |
| "loss": 3.021, |
| "step": 5625 |
| }, |
| { |
| "epoch": 0.7146471034657222, |
| "grad_norm": 1.2416434288024902, |
| "learning_rate": 5.7095876549456105e-06, |
| "loss": 2.7849, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.7178092587907918, |
| "grad_norm": 1.6787675619125366, |
| "learning_rate": 5.64634454844422e-06, |
| "loss": 2.9102, |
| "step": 5675 |
| }, |
| { |
| "epoch": 0.7209714141158614, |
| "grad_norm": 1.542455792427063, |
| "learning_rate": 5.5831014419428286e-06, |
| "loss": 2.8268, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.724133569440931, |
| "grad_norm": 2.0236566066741943, |
| "learning_rate": 5.519858335441437e-06, |
| "loss": 2.8222, |
| "step": 5725 |
| }, |
| { |
| "epoch": 0.7272957247660005, |
| "grad_norm": 1.5775691270828247, |
| "learning_rate": 5.456615228940047e-06, |
| "loss": 2.9778, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.73045788009107, |
| "grad_norm": 1.792724847793579, |
| "learning_rate": 5.393372122438655e-06, |
| "loss": 2.9323, |
| "step": 5775 |
| }, |
| { |
| "epoch": 0.7336200354161396, |
| "grad_norm": 1.5170587301254272, |
| "learning_rate": 5.330129015937263e-06, |
| "loss": 2.8671, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.7367821907412092, |
| "grad_norm": 1.418395757675171, |
| "learning_rate": 5.266885909435871e-06, |
| "loss": 2.8153, |
| "step": 5825 |
| }, |
| { |
| "epoch": 0.7399443460662788, |
| "grad_norm": 1.4458298683166504, |
| "learning_rate": 5.203642802934481e-06, |
| "loss": 2.9178, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.7431065013913484, |
| "grad_norm": 1.6139628887176514, |
| "learning_rate": 5.140399696433089e-06, |
| "loss": 2.8857, |
| "step": 5875 |
| }, |
| { |
| "epoch": 0.746268656716418, |
| "grad_norm": 1.1631035804748535, |
| "learning_rate": 5.0771565899316975e-06, |
| "loss": 2.9213, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.7494308120414874, |
| "grad_norm": 1.5703893899917603, |
| "learning_rate": 5.013913483430306e-06, |
| "loss": 2.891, |
| "step": 5925 |
| }, |
| { |
| "epoch": 0.752592967366557, |
| "grad_norm": 1.4471451044082642, |
| "learning_rate": 4.950670376928915e-06, |
| "loss": 2.8261, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.7557551226916266, |
| "grad_norm": 1.6305294036865234, |
| "learning_rate": 4.887427270427524e-06, |
| "loss": 2.8918, |
| "step": 5975 |
| }, |
| { |
| "epoch": 0.7589172780166962, |
| "grad_norm": 1.3186579942703247, |
| "learning_rate": 4.824184163926132e-06, |
| "loss": 2.9478, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.7620794333417658, |
| "grad_norm": 1.4258198738098145, |
| "learning_rate": 4.760941057424741e-06, |
| "loss": 2.9313, |
| "step": 6025 |
| }, |
| { |
| "epoch": 0.7652415886668353, |
| "grad_norm": 1.5036190748214722, |
| "learning_rate": 4.69769795092335e-06, |
| "loss": 2.9562, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.7684037439919049, |
| "grad_norm": 1.7764675617218018, |
| "learning_rate": 4.634454844421958e-06, |
| "loss": 2.9396, |
| "step": 6075 |
| }, |
| { |
| "epoch": 0.7715658993169745, |
| "grad_norm": 1.624964714050293, |
| "learning_rate": 4.571211737920567e-06, |
| "loss": 2.8789, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.774728054642044, |
| "grad_norm": 1.1885554790496826, |
| "learning_rate": 4.5079686314191755e-06, |
| "loss": 2.9756, |
| "step": 6125 |
| }, |
| { |
| "epoch": 0.7778902099671136, |
| "grad_norm": 1.752863883972168, |
| "learning_rate": 4.444725524917785e-06, |
| "loss": 2.7279, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.7810523652921831, |
| "grad_norm": 1.4653725624084473, |
| "learning_rate": 4.381482418416393e-06, |
| "loss": 3.0123, |
| "step": 6175 |
| }, |
| { |
| "epoch": 0.7842145206172527, |
| "grad_norm": 1.5357298851013184, |
| "learning_rate": 4.318239311915002e-06, |
| "loss": 2.7978, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.7873766759423223, |
| "grad_norm": 1.4677993059158325, |
| "learning_rate": 4.25499620541361e-06, |
| "loss": 2.8016, |
| "step": 6225 |
| }, |
| { |
| "epoch": 0.7905388312673919, |
| "grad_norm": 1.3810391426086426, |
| "learning_rate": 4.191753098912219e-06, |
| "loss": 2.7919, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.7937009865924615, |
| "grad_norm": 1.827034831047058, |
| "learning_rate": 4.128509992410827e-06, |
| "loss": 2.9552, |
| "step": 6275 |
| }, |
| { |
| "epoch": 0.7968631419175309, |
| "grad_norm": 1.7732640504837036, |
| "learning_rate": 4.065266885909436e-06, |
| "loss": 3.03, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.8000252972426005, |
| "grad_norm": 2.2995574474334717, |
| "learning_rate": 4.0020237794080445e-06, |
| "loss": 3.027, |
| "step": 6325 |
| }, |
| { |
| "epoch": 0.8031874525676701, |
| "grad_norm": 1.664284110069275, |
| "learning_rate": 3.9387806729066536e-06, |
| "loss": 2.9782, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.8063496078927397, |
| "grad_norm": 1.7057468891143799, |
| "learning_rate": 3.875537566405262e-06, |
| "loss": 2.8451, |
| "step": 6375 |
| }, |
| { |
| "epoch": 0.8095117632178093, |
| "grad_norm": 1.548420786857605, |
| "learning_rate": 3.812294459903871e-06, |
| "loss": 2.7688, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.8126739185428788, |
| "grad_norm": 1.467391014099121, |
| "learning_rate": 3.749051353402479e-06, |
| "loss": 2.9415, |
| "step": 6425 |
| }, |
| { |
| "epoch": 0.8158360738679484, |
| "grad_norm": 1.6299670934677124, |
| "learning_rate": 3.685808246901088e-06, |
| "loss": 2.9104, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.818998229193018, |
| "grad_norm": 1.7904433012008667, |
| "learning_rate": 3.622565140399697e-06, |
| "loss": 2.8673, |
| "step": 6475 |
| }, |
| { |
| "epoch": 0.8221603845180875, |
| "grad_norm": 1.5487452745437622, |
| "learning_rate": 3.5593220338983053e-06, |
| "loss": 2.991, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.8253225398431571, |
| "grad_norm": 2.021498918533325, |
| "learning_rate": 3.4960789273969143e-06, |
| "loss": 2.9309, |
| "step": 6525 |
| }, |
| { |
| "epoch": 0.8284846951682266, |
| "grad_norm": 1.6797212362289429, |
| "learning_rate": 3.4328358208955225e-06, |
| "loss": 2.7787, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.8316468504932962, |
| "grad_norm": 1.5939486026763916, |
| "learning_rate": 3.3695927143941316e-06, |
| "loss": 2.8101, |
| "step": 6575 |
| }, |
| { |
| "epoch": 0.8348090058183658, |
| "grad_norm": 1.506430983543396, |
| "learning_rate": 3.3063496078927398e-06, |
| "loss": 2.9788, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.8379711611434354, |
| "grad_norm": 1.3282697200775146, |
| "learning_rate": 3.243106501391349e-06, |
| "loss": 2.8371, |
| "step": 6625 |
| }, |
| { |
| "epoch": 0.841133316468505, |
| "grad_norm": 1.799822449684143, |
| "learning_rate": 3.179863394889957e-06, |
| "loss": 2.9774, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.8442954717935744, |
| "grad_norm": 1.6544405221939087, |
| "learning_rate": 3.116620288388566e-06, |
| "loss": 2.8366, |
| "step": 6675 |
| }, |
| { |
| "epoch": 0.847457627118644, |
| "grad_norm": 1.7522649765014648, |
| "learning_rate": 3.0533771818871742e-06, |
| "loss": 2.9712, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.8506197824437136, |
| "grad_norm": 1.5311869382858276, |
| "learning_rate": 2.9901340753857833e-06, |
| "loss": 2.792, |
| "step": 6725 |
| }, |
| { |
| "epoch": 0.8537819377687832, |
| "grad_norm": 1.1723778247833252, |
| "learning_rate": 2.9268909688843915e-06, |
| "loss": 2.9395, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.8569440930938528, |
| "grad_norm": 1.9257378578186035, |
| "learning_rate": 2.8636478623830005e-06, |
| "loss": 2.9973, |
| "step": 6775 |
| }, |
| { |
| "epoch": 0.8601062484189224, |
| "grad_norm": 1.4320708513259888, |
| "learning_rate": 2.8004047558816087e-06, |
| "loss": 2.739, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.8632684037439919, |
| "grad_norm": 1.961350679397583, |
| "learning_rate": 2.7371616493802178e-06, |
| "loss": 3.0129, |
| "step": 6825 |
| }, |
| { |
| "epoch": 0.8664305590690615, |
| "grad_norm": 1.7785652875900269, |
| "learning_rate": 2.673918542878827e-06, |
| "loss": 3.0007, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.869592714394131, |
| "grad_norm": 1.5985599756240845, |
| "learning_rate": 2.610675436377435e-06, |
| "loss": 2.8996, |
| "step": 6875 |
| }, |
| { |
| "epoch": 0.8727548697192006, |
| "grad_norm": 1.4986529350280762, |
| "learning_rate": 2.547432329876044e-06, |
| "loss": 2.917, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.8759170250442702, |
| "grad_norm": 1.4311537742614746, |
| "learning_rate": 2.4841892233746523e-06, |
| "loss": 2.8844, |
| "step": 6925 |
| }, |
| { |
| "epoch": 0.8790791803693397, |
| "grad_norm": 1.8189743757247925, |
| "learning_rate": 2.420946116873261e-06, |
| "loss": 2.9588, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.8822413356944093, |
| "grad_norm": 1.1215569972991943, |
| "learning_rate": 2.3577030103718695e-06, |
| "loss": 2.8752, |
| "step": 6975 |
| }, |
| { |
| "epoch": 0.8854034910194789, |
| "grad_norm": 1.4951814413070679, |
| "learning_rate": 2.294459903870478e-06, |
| "loss": 2.8954, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.8885656463445485, |
| "grad_norm": 1.464539885520935, |
| "learning_rate": 2.231216797369087e-06, |
| "loss": 2.7321, |
| "step": 7025 |
| }, |
| { |
| "epoch": 0.891727801669618, |
| "grad_norm": 1.4621193408966064, |
| "learning_rate": 2.1679736908676958e-06, |
| "loss": 2.8688, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.8948899569946875, |
| "grad_norm": 1.6291460990905762, |
| "learning_rate": 2.1047305843663044e-06, |
| "loss": 2.8068, |
| "step": 7075 |
| }, |
| { |
| "epoch": 0.8980521123197571, |
| "grad_norm": 1.4216442108154297, |
| "learning_rate": 2.041487477864913e-06, |
| "loss": 2.9351, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.9012142676448267, |
| "grad_norm": 1.306211233139038, |
| "learning_rate": 1.9782443713635216e-06, |
| "loss": 2.9398, |
| "step": 7125 |
| }, |
| { |
| "epoch": 0.9043764229698963, |
| "grad_norm": 1.6560629606246948, |
| "learning_rate": 1.9150012648621303e-06, |
| "loss": 2.9728, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.9075385782949659, |
| "grad_norm": 1.9747501611709595, |
| "learning_rate": 1.8517581583607389e-06, |
| "loss": 2.903, |
| "step": 7175 |
| }, |
| { |
| "epoch": 0.9107007336200355, |
| "grad_norm": 1.3126964569091797, |
| "learning_rate": 1.7885150518593475e-06, |
| "loss": 2.796, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.913862888945105, |
| "grad_norm": 1.5026005506515503, |
| "learning_rate": 1.7252719453579561e-06, |
| "loss": 2.7565, |
| "step": 7225 |
| }, |
| { |
| "epoch": 0.9170250442701745, |
| "grad_norm": 1.5117318630218506, |
| "learning_rate": 1.6620288388565647e-06, |
| "loss": 2.8329, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.9201871995952441, |
| "grad_norm": 1.812044382095337, |
| "learning_rate": 1.5987857323551734e-06, |
| "loss": 2.9988, |
| "step": 7275 |
| }, |
| { |
| "epoch": 0.9233493549203137, |
| "grad_norm": 1.25447678565979, |
| "learning_rate": 1.535542625853782e-06, |
| "loss": 2.9395, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.9265115102453833, |
| "grad_norm": 1.6009807586669922, |
| "learning_rate": 1.4722995193523906e-06, |
| "loss": 2.8487, |
| "step": 7325 |
| }, |
| { |
| "epoch": 0.9296736655704528, |
| "grad_norm": 2.0983338356018066, |
| "learning_rate": 1.4090564128509992e-06, |
| "loss": 2.841, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.9328358208955224, |
| "grad_norm": 1.5755832195281982, |
| "learning_rate": 1.3458133063496079e-06, |
| "loss": 2.8646, |
| "step": 7375 |
| }, |
| { |
| "epoch": 0.935997976220592, |
| "grad_norm": 1.4814996719360352, |
| "learning_rate": 1.2825701998482165e-06, |
| "loss": 3.0229, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.9391601315456615, |
| "grad_norm": 1.687947392463684, |
| "learning_rate": 1.2193270933468253e-06, |
| "loss": 2.8668, |
| "step": 7425 |
| }, |
| { |
| "epoch": 0.942322286870731, |
| "grad_norm": 1.557085394859314, |
| "learning_rate": 1.156083986845434e-06, |
| "loss": 2.9006, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.9454844421958006, |
| "grad_norm": 1.4846750497817993, |
| "learning_rate": 1.0928408803440425e-06, |
| "loss": 2.9055, |
| "step": 7475 |
| }, |
| { |
| "epoch": 0.9486465975208702, |
| "grad_norm": 2.095038652420044, |
| "learning_rate": 1.0295977738426512e-06, |
| "loss": 3.0127, |
| "step": 7500 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 7906, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1995574026240000.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|