| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 7906, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0031621553250695674, | |
| "grad_norm": 11.55327320098877, | |
| "learning_rate": 1.9939286617758668e-05, | |
| "loss": 8.596, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.006324310650139135, | |
| "grad_norm": 5.928526401519775, | |
| "learning_rate": 1.9876043511257274e-05, | |
| "loss": 5.501, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.009486465975208701, | |
| "grad_norm": 2.7216451168060303, | |
| "learning_rate": 1.9812800404755884e-05, | |
| "loss": 4.9742, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.01264862130027827, | |
| "grad_norm": 2.725499153137207, | |
| "learning_rate": 1.974955729825449e-05, | |
| "loss": 4.6023, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.015810776625347838, | |
| "grad_norm": 2.318068027496338, | |
| "learning_rate": 1.96863141917531e-05, | |
| "loss": 4.4124, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.018972931950417403, | |
| "grad_norm": 2.437415838241577, | |
| "learning_rate": 1.9623071085251707e-05, | |
| "loss": 4.5471, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02213508727548697, | |
| "grad_norm": 1.6126755475997925, | |
| "learning_rate": 1.955982797875032e-05, | |
| "loss": 4.0951, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.02529724260055654, | |
| "grad_norm": 1.9190938472747803, | |
| "learning_rate": 1.9496584872248927e-05, | |
| "loss": 4.0572, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.028459397925626108, | |
| "grad_norm": 1.6798152923583984, | |
| "learning_rate": 1.9433341765747537e-05, | |
| "loss": 3.9318, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.031621553250695676, | |
| "grad_norm": 2.02826189994812, | |
| "learning_rate": 1.9370098659246143e-05, | |
| "loss": 3.9254, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.034783708575765244, | |
| "grad_norm": 1.6234019994735718, | |
| "learning_rate": 1.9306855552744753e-05, | |
| "loss": 3.7144, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.037945863900834806, | |
| "grad_norm": 2.1222445964813232, | |
| "learning_rate": 1.924361244624336e-05, | |
| "loss": 4.0748, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.041108019225904374, | |
| "grad_norm": 1.8071123361587524, | |
| "learning_rate": 1.918036933974197e-05, | |
| "loss": 3.8597, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.04427017455097394, | |
| "grad_norm": 1.2955825328826904, | |
| "learning_rate": 1.911712623324058e-05, | |
| "loss": 3.6891, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04743232987604351, | |
| "grad_norm": 1.9034608602523804, | |
| "learning_rate": 1.905388312673919e-05, | |
| "loss": 3.8955, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.05059448520111308, | |
| "grad_norm": 1.649466872215271, | |
| "learning_rate": 1.8990640020237796e-05, | |
| "loss": 3.8186, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05375664052618265, | |
| "grad_norm": 1.8459181785583496, | |
| "learning_rate": 1.8927396913736406e-05, | |
| "loss": 3.5247, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.056918795851252216, | |
| "grad_norm": 1.5839076042175293, | |
| "learning_rate": 1.8864153807235012e-05, | |
| "loss": 3.756, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.060080951176321784, | |
| "grad_norm": 1.5614947080612183, | |
| "learning_rate": 1.8800910700733622e-05, | |
| "loss": 3.6433, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.06324310650139135, | |
| "grad_norm": 1.7831311225891113, | |
| "learning_rate": 1.873766759423223e-05, | |
| "loss": 3.9025, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06640526182646092, | |
| "grad_norm": 1.4928420782089233, | |
| "learning_rate": 1.867442448773084e-05, | |
| "loss": 3.4216, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.06956741715153049, | |
| "grad_norm": 1.8199015855789185, | |
| "learning_rate": 1.861118138122945e-05, | |
| "loss": 3.7497, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.07272957247660006, | |
| "grad_norm": 1.6205722093582153, | |
| "learning_rate": 1.854793827472806e-05, | |
| "loss": 3.4153, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.07589172780166961, | |
| "grad_norm": 1.4476927518844604, | |
| "learning_rate": 1.8484695168226665e-05, | |
| "loss": 3.6298, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07905388312673918, | |
| "grad_norm": 1.5358431339263916, | |
| "learning_rate": 1.8421452061725275e-05, | |
| "loss": 3.5282, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.08221603845180875, | |
| "grad_norm": 1.616495966911316, | |
| "learning_rate": 1.835820895522388e-05, | |
| "loss": 3.6029, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.08537819377687832, | |
| "grad_norm": 1.4835790395736694, | |
| "learning_rate": 1.829496584872249e-05, | |
| "loss": 3.6817, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.08854034910194788, | |
| "grad_norm": 1.8951404094696045, | |
| "learning_rate": 1.8231722742221098e-05, | |
| "loss": 3.5222, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.09170250442701745, | |
| "grad_norm": 1.4792296886444092, | |
| "learning_rate": 1.8168479635719707e-05, | |
| "loss": 3.552, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.09486465975208702, | |
| "grad_norm": 1.5331674814224243, | |
| "learning_rate": 1.8105236529218317e-05, | |
| "loss": 3.6274, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.09802681507715659, | |
| "grad_norm": 1.3521767854690552, | |
| "learning_rate": 1.8041993422716927e-05, | |
| "loss": 3.4604, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.10118897040222616, | |
| "grad_norm": 1.9199748039245605, | |
| "learning_rate": 1.7978750316215534e-05, | |
| "loss": 3.6434, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.10435112572729573, | |
| "grad_norm": 1.4599398374557495, | |
| "learning_rate": 1.7915507209714144e-05, | |
| "loss": 3.4381, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.1075132810523653, | |
| "grad_norm": 1.8845282793045044, | |
| "learning_rate": 1.785226410321275e-05, | |
| "loss": 3.4885, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.11067543637743486, | |
| "grad_norm": 1.6054080724716187, | |
| "learning_rate": 1.778902099671136e-05, | |
| "loss": 3.4714, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.11383759170250443, | |
| "grad_norm": 1.4243124723434448, | |
| "learning_rate": 1.7725777890209967e-05, | |
| "loss": 3.423, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.116999747027574, | |
| "grad_norm": 1.828045129776001, | |
| "learning_rate": 1.7662534783708576e-05, | |
| "loss": 3.3451, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.12016190235264357, | |
| "grad_norm": 1.7184784412384033, | |
| "learning_rate": 1.7599291677207186e-05, | |
| "loss": 3.4567, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.12332405767771312, | |
| "grad_norm": 2.0031352043151855, | |
| "learning_rate": 1.7536048570705796e-05, | |
| "loss": 3.5227, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.1264862130027827, | |
| "grad_norm": 1.8211214542388916, | |
| "learning_rate": 1.7472805464204403e-05, | |
| "loss": 3.4222, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12964836832785226, | |
| "grad_norm": 1.6366448402404785, | |
| "learning_rate": 1.7409562357703013e-05, | |
| "loss": 3.3479, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.13281052365292184, | |
| "grad_norm": 1.4770481586456299, | |
| "learning_rate": 1.734631925120162e-05, | |
| "loss": 3.4232, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.1359726789779914, | |
| "grad_norm": 1.3028419017791748, | |
| "learning_rate": 1.728307614470023e-05, | |
| "loss": 3.2511, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.13913483430306098, | |
| "grad_norm": 1.8425815105438232, | |
| "learning_rate": 1.721983303819884e-05, | |
| "loss": 3.3851, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.14229698962813053, | |
| "grad_norm": 1.5002127885818481, | |
| "learning_rate": 1.715658993169745e-05, | |
| "loss": 3.5281, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.14545914495320011, | |
| "grad_norm": 1.5222417116165161, | |
| "learning_rate": 1.7093346825196055e-05, | |
| "loss": 3.3473, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.14862130027826967, | |
| "grad_norm": 1.628788948059082, | |
| "learning_rate": 1.7030103718694665e-05, | |
| "loss": 3.3795, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.15178345560333922, | |
| "grad_norm": 1.3158140182495117, | |
| "learning_rate": 1.6966860612193272e-05, | |
| "loss": 3.5536, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1549456109284088, | |
| "grad_norm": 1.8109098672866821, | |
| "learning_rate": 1.690361750569188e-05, | |
| "loss": 3.3006, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.15810776625347836, | |
| "grad_norm": 2.0179309844970703, | |
| "learning_rate": 1.6840374399190488e-05, | |
| "loss": 3.4737, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.16126992157854794, | |
| "grad_norm": 1.8066917657852173, | |
| "learning_rate": 1.6777131292689098e-05, | |
| "loss": 3.3135, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.1644320769036175, | |
| "grad_norm": 1.4508979320526123, | |
| "learning_rate": 1.6713888186187708e-05, | |
| "loss": 3.154, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.16759423222868708, | |
| "grad_norm": 1.324156641960144, | |
| "learning_rate": 1.6650645079686318e-05, | |
| "loss": 3.3076, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.17075638755375663, | |
| "grad_norm": 1.7277374267578125, | |
| "learning_rate": 1.6587401973184924e-05, | |
| "loss": 3.3168, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.17391854287882622, | |
| "grad_norm": 1.581597924232483, | |
| "learning_rate": 1.6524158866683534e-05, | |
| "loss": 3.2586, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.17708069820389577, | |
| "grad_norm": 1.2408771514892578, | |
| "learning_rate": 1.646091576018214e-05, | |
| "loss": 3.3174, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.18024285352896535, | |
| "grad_norm": 1.5069456100463867, | |
| "learning_rate": 1.639767265368075e-05, | |
| "loss": 3.2248, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.1834050088540349, | |
| "grad_norm": 1.474717378616333, | |
| "learning_rate": 1.6334429547179357e-05, | |
| "loss": 3.2757, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.1865671641791045, | |
| "grad_norm": 1.648443341255188, | |
| "learning_rate": 1.6271186440677967e-05, | |
| "loss": 3.2754, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.18972931950417404, | |
| "grad_norm": 1.5270763635635376, | |
| "learning_rate": 1.6207943334176577e-05, | |
| "loss": 3.2413, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19289147482924363, | |
| "grad_norm": 1.643730640411377, | |
| "learning_rate": 1.6144700227675187e-05, | |
| "loss": 3.213, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.19605363015431318, | |
| "grad_norm": 1.4000329971313477, | |
| "learning_rate": 1.6081457121173793e-05, | |
| "loss": 3.236, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.19921578547938273, | |
| "grad_norm": 1.6957166194915771, | |
| "learning_rate": 1.6018214014672403e-05, | |
| "loss": 3.1811, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.20237794080445232, | |
| "grad_norm": 1.763476848602295, | |
| "learning_rate": 1.595497090817101e-05, | |
| "loss": 3.2634, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.20554009612952187, | |
| "grad_norm": 1.5622406005859375, | |
| "learning_rate": 1.589172780166962e-05, | |
| "loss": 3.2498, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.20870225145459145, | |
| "grad_norm": 2.501917600631714, | |
| "learning_rate": 1.5828484695168226e-05, | |
| "loss": 3.3526, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.211864406779661, | |
| "grad_norm": 1.5445410013198853, | |
| "learning_rate": 1.5765241588666836e-05, | |
| "loss": 3.25, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.2150265621047306, | |
| "grad_norm": 1.420432209968567, | |
| "learning_rate": 1.5701998482165446e-05, | |
| "loss": 3.316, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.21818871742980014, | |
| "grad_norm": 1.7002512216567993, | |
| "learning_rate": 1.5638755375664056e-05, | |
| "loss": 3.156, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.22135087275486973, | |
| "grad_norm": 1.533703088760376, | |
| "learning_rate": 1.5575512269162662e-05, | |
| "loss": 3.0938, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.22451302807993928, | |
| "grad_norm": 1.4791340827941895, | |
| "learning_rate": 1.5512269162661272e-05, | |
| "loss": 3.2756, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.22767518340500886, | |
| "grad_norm": 1.783389925956726, | |
| "learning_rate": 1.544902605615988e-05, | |
| "loss": 3.3588, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.23083733873007842, | |
| "grad_norm": 1.7652767896652222, | |
| "learning_rate": 1.538578294965849e-05, | |
| "loss": 3.1746, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.233999494055148, | |
| "grad_norm": 1.7373157739639282, | |
| "learning_rate": 1.53225398431571e-05, | |
| "loss": 3.2757, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.23716164938021755, | |
| "grad_norm": 1.655953049659729, | |
| "learning_rate": 1.5259296736655705e-05, | |
| "loss": 3.2366, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.24032380470528714, | |
| "grad_norm": 1.4808825254440308, | |
| "learning_rate": 1.5196053630154315e-05, | |
| "loss": 3.2056, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.2434859600303567, | |
| "grad_norm": 1.3337376117706299, | |
| "learning_rate": 1.5132810523652923e-05, | |
| "loss": 2.9247, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.24664811535542625, | |
| "grad_norm": 1.3115172386169434, | |
| "learning_rate": 1.5069567417151531e-05, | |
| "loss": 3.3964, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.24981027068049583, | |
| "grad_norm": 1.3567864894866943, | |
| "learning_rate": 1.500632431065014e-05, | |
| "loss": 3.2777, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.2529724260055654, | |
| "grad_norm": 1.5610828399658203, | |
| "learning_rate": 1.4943081204148748e-05, | |
| "loss": 3.0115, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.25613458133063494, | |
| "grad_norm": 1.6952314376831055, | |
| "learning_rate": 1.4879838097647357e-05, | |
| "loss": 3.2321, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.2592967366557045, | |
| "grad_norm": 1.5469592809677124, | |
| "learning_rate": 1.4816594991145967e-05, | |
| "loss": 3.1697, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.2624588919807741, | |
| "grad_norm": 1.7456170320510864, | |
| "learning_rate": 1.4753351884644576e-05, | |
| "loss": 3.0305, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.2656210473058437, | |
| "grad_norm": 1.8613225221633911, | |
| "learning_rate": 1.4690108778143184e-05, | |
| "loss": 3.1094, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2687832026309132, | |
| "grad_norm": 1.6130377054214478, | |
| "learning_rate": 1.4626865671641792e-05, | |
| "loss": 3.1656, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.2719453579559828, | |
| "grad_norm": 1.4752004146575928, | |
| "learning_rate": 1.45636225651404e-05, | |
| "loss": 3.1527, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.2751075132810524, | |
| "grad_norm": 1.3546524047851562, | |
| "learning_rate": 1.4500379458639008e-05, | |
| "loss": 3.1096, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.27826966860612196, | |
| "grad_norm": 1.5383223295211792, | |
| "learning_rate": 1.4437136352137617e-05, | |
| "loss": 3.3534, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2814318239311915, | |
| "grad_norm": 1.7905950546264648, | |
| "learning_rate": 1.4373893245636228e-05, | |
| "loss": 3.2779, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.28459397925626106, | |
| "grad_norm": 1.408922553062439, | |
| "learning_rate": 1.4310650139134836e-05, | |
| "loss": 3.0517, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.28775613458133065, | |
| "grad_norm": 1.7881851196289062, | |
| "learning_rate": 1.4247407032633445e-05, | |
| "loss": 3.144, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.29091828990640023, | |
| "grad_norm": 1.5622018575668335, | |
| "learning_rate": 1.4184163926132053e-05, | |
| "loss": 3.0161, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.29408044523146976, | |
| "grad_norm": 1.1805533170700073, | |
| "learning_rate": 1.4120920819630661e-05, | |
| "loss": 3.3535, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.29724260055653934, | |
| "grad_norm": 1.4643152952194214, | |
| "learning_rate": 1.4057677713129269e-05, | |
| "loss": 3.0817, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.3004047558816089, | |
| "grad_norm": 1.640499234199524, | |
| "learning_rate": 1.3994434606627877e-05, | |
| "loss": 3.2889, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.30356691120667845, | |
| "grad_norm": 1.4501370191574097, | |
| "learning_rate": 1.3931191500126486e-05, | |
| "loss": 3.0626, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.30672906653174803, | |
| "grad_norm": 1.540034532546997, | |
| "learning_rate": 1.3867948393625097e-05, | |
| "loss": 3.1131, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.3098912218568176, | |
| "grad_norm": 1.3436205387115479, | |
| "learning_rate": 1.3804705287123705e-05, | |
| "loss": 3.3248, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.3130533771818872, | |
| "grad_norm": 1.8110958337783813, | |
| "learning_rate": 1.3741462180622314e-05, | |
| "loss": 3.2063, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.3162155325069567, | |
| "grad_norm": 1.1593376398086548, | |
| "learning_rate": 1.3678219074120922e-05, | |
| "loss": 3.1707, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3193776878320263, | |
| "grad_norm": 1.2828359603881836, | |
| "learning_rate": 1.361497596761953e-05, | |
| "loss": 3.1273, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.3225398431570959, | |
| "grad_norm": 1.4801135063171387, | |
| "learning_rate": 1.3551732861118138e-05, | |
| "loss": 3.1145, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.32570199848216547, | |
| "grad_norm": 1.6385855674743652, | |
| "learning_rate": 1.3488489754616746e-05, | |
| "loss": 3.1274, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.328864153807235, | |
| "grad_norm": 1.3759686946868896, | |
| "learning_rate": 1.3425246648115358e-05, | |
| "loss": 2.9515, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.3320263091323046, | |
| "grad_norm": 1.3556058406829834, | |
| "learning_rate": 1.3362003541613966e-05, | |
| "loss": 3.1251, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.33518846445737416, | |
| "grad_norm": 1.8028017282485962, | |
| "learning_rate": 1.3298760435112574e-05, | |
| "loss": 3.1851, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.33835061978244374, | |
| "grad_norm": 1.3875707387924194, | |
| "learning_rate": 1.3235517328611182e-05, | |
| "loss": 3.1831, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.34151277510751327, | |
| "grad_norm": 1.3739882707595825, | |
| "learning_rate": 1.317227422210979e-05, | |
| "loss": 2.944, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.34467493043258285, | |
| "grad_norm": 1.7388805150985718, | |
| "learning_rate": 1.3109031115608399e-05, | |
| "loss": 2.9621, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.34783708575765243, | |
| "grad_norm": 1.4218101501464844, | |
| "learning_rate": 1.3045788009107007e-05, | |
| "loss": 3.1734, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.35099924108272196, | |
| "grad_norm": 1.5523903369903564, | |
| "learning_rate": 1.2982544902605615e-05, | |
| "loss": 3.1323, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.35416139640779154, | |
| "grad_norm": 1.2134604454040527, | |
| "learning_rate": 1.2919301796104227e-05, | |
| "loss": 2.9738, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.3573235517328611, | |
| "grad_norm": 1.7392768859863281, | |
| "learning_rate": 1.2856058689602835e-05, | |
| "loss": 3.2412, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.3604857070579307, | |
| "grad_norm": 1.5656300783157349, | |
| "learning_rate": 1.2792815583101443e-05, | |
| "loss": 3.1549, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.36364786238300023, | |
| "grad_norm": 1.592602014541626, | |
| "learning_rate": 1.2729572476600051e-05, | |
| "loss": 3.2068, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.3668100177080698, | |
| "grad_norm": 1.6737302541732788, | |
| "learning_rate": 1.266632937009866e-05, | |
| "loss": 3.0918, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.3699721730331394, | |
| "grad_norm": 1.4579912424087524, | |
| "learning_rate": 1.2603086263597268e-05, | |
| "loss": 3.17, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 1.9524872303009033, | |
| "learning_rate": 1.2539843157095876e-05, | |
| "loss": 3.0157, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3762964836832785, | |
| "grad_norm": 1.5913100242614746, | |
| "learning_rate": 1.2476600050594488e-05, | |
| "loss": 2.9347, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.3794586390083481, | |
| "grad_norm": 1.5421273708343506, | |
| "learning_rate": 1.2413356944093096e-05, | |
| "loss": 3.0643, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.38262079433341767, | |
| "grad_norm": 1.4578076601028442, | |
| "learning_rate": 1.2350113837591704e-05, | |
| "loss": 3.1577, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.38578294965848725, | |
| "grad_norm": 1.4034929275512695, | |
| "learning_rate": 1.2286870731090312e-05, | |
| "loss": 3.0522, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.3889451049835568, | |
| "grad_norm": 1.4334189891815186, | |
| "learning_rate": 1.222362762458892e-05, | |
| "loss": 3.0195, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.39210726030862636, | |
| "grad_norm": 1.5584102869033813, | |
| "learning_rate": 1.2160384518087529e-05, | |
| "loss": 2.9582, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.39526941563369594, | |
| "grad_norm": 1.4117885828018188, | |
| "learning_rate": 1.2097141411586137e-05, | |
| "loss": 3.1979, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.39843157095876547, | |
| "grad_norm": 1.6135624647140503, | |
| "learning_rate": 1.2033898305084745e-05, | |
| "loss": 3.0185, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.40159372628383505, | |
| "grad_norm": 1.9495983123779297, | |
| "learning_rate": 1.1970655198583357e-05, | |
| "loss": 3.1753, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.40475588160890463, | |
| "grad_norm": 1.3732932806015015, | |
| "learning_rate": 1.1907412092081965e-05, | |
| "loss": 2.9982, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.4079180369339742, | |
| "grad_norm": 1.6417973041534424, | |
| "learning_rate": 1.1844168985580573e-05, | |
| "loss": 2.9491, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.41108019225904374, | |
| "grad_norm": 1.599778413772583, | |
| "learning_rate": 1.1780925879079181e-05, | |
| "loss": 2.9619, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.4142423475841133, | |
| "grad_norm": 1.5678138732910156, | |
| "learning_rate": 1.171768277257779e-05, | |
| "loss": 3.1179, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.4174045029091829, | |
| "grad_norm": 1.4923375844955444, | |
| "learning_rate": 1.1654439666076398e-05, | |
| "loss": 3.193, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.4205666582342525, | |
| "grad_norm": 1.5655521154403687, | |
| "learning_rate": 1.1591196559575006e-05, | |
| "loss": 3.0813, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.423728813559322, | |
| "grad_norm": 1.7306265830993652, | |
| "learning_rate": 1.1527953453073617e-05, | |
| "loss": 3.1584, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.4268909688843916, | |
| "grad_norm": 1.4964895248413086, | |
| "learning_rate": 1.1464710346572226e-05, | |
| "loss": 2.9885, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.4300531242094612, | |
| "grad_norm": 1.386781930923462, | |
| "learning_rate": 1.1401467240070834e-05, | |
| "loss": 2.7611, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.43321527953453076, | |
| "grad_norm": 1.223809838294983, | |
| "learning_rate": 1.1338224133569442e-05, | |
| "loss": 2.9793, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.4363774348596003, | |
| "grad_norm": 1.7949477434158325, | |
| "learning_rate": 1.127498102706805e-05, | |
| "loss": 3.1053, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.43953959018466987, | |
| "grad_norm": 1.2614104747772217, | |
| "learning_rate": 1.1211737920566658e-05, | |
| "loss": 3.1497, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.44270174550973945, | |
| "grad_norm": 1.4934375286102295, | |
| "learning_rate": 1.1148494814065267e-05, | |
| "loss": 3.2349, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.445863900834809, | |
| "grad_norm": 1.692384958267212, | |
| "learning_rate": 1.1085251707563875e-05, | |
| "loss": 3.2143, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.44902605615987856, | |
| "grad_norm": 1.430724024772644, | |
| "learning_rate": 1.1022008601062486e-05, | |
| "loss": 2.9926, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.45218821148494814, | |
| "grad_norm": 1.6542253494262695, | |
| "learning_rate": 1.0958765494561095e-05, | |
| "loss": 3.0877, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.4553503668100177, | |
| "grad_norm": 1.761777400970459, | |
| "learning_rate": 1.0895522388059703e-05, | |
| "loss": 3.1343, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.45851252213508725, | |
| "grad_norm": 1.3432550430297852, | |
| "learning_rate": 1.0832279281558311e-05, | |
| "loss": 2.9125, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.46167467746015683, | |
| "grad_norm": 1.6349095106124878, | |
| "learning_rate": 1.0769036175056919e-05, | |
| "loss": 3.0102, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.4648368327852264, | |
| "grad_norm": 1.5871011018753052, | |
| "learning_rate": 1.0705793068555527e-05, | |
| "loss": 2.8361, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.467998988110296, | |
| "grad_norm": 1.5581876039505005, | |
| "learning_rate": 1.0642549962054136e-05, | |
| "loss": 2.9176, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.4711611434353655, | |
| "grad_norm": 1.9018642902374268, | |
| "learning_rate": 1.0579306855552747e-05, | |
| "loss": 3.1236, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.4743232987604351, | |
| "grad_norm": 1.9724278450012207, | |
| "learning_rate": 1.0516063749051355e-05, | |
| "loss": 3.2517, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.4774854540855047, | |
| "grad_norm": 1.4184868335723877, | |
| "learning_rate": 1.0452820642549963e-05, | |
| "loss": 2.7384, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.4806476094105743, | |
| "grad_norm": 1.6251132488250732, | |
| "learning_rate": 1.0389577536048572e-05, | |
| "loss": 2.9232, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4838097647356438, | |
| "grad_norm": 1.526455283164978, | |
| "learning_rate": 1.032633442954718e-05, | |
| "loss": 3.0109, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.4869719200607134, | |
| "grad_norm": 1.5482044219970703, | |
| "learning_rate": 1.0263091323045788e-05, | |
| "loss": 2.9056, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.49013407538578296, | |
| "grad_norm": 1.5261505842208862, | |
| "learning_rate": 1.0199848216544396e-05, | |
| "loss": 2.8749, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.4932962307108525, | |
| "grad_norm": 1.8025535345077515, | |
| "learning_rate": 1.0136605110043004e-05, | |
| "loss": 3.1128, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.4964583860359221, | |
| "grad_norm": 1.428043246269226, | |
| "learning_rate": 1.0073362003541616e-05, | |
| "loss": 3.0411, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.49962054136099165, | |
| "grad_norm": 1.3403562307357788, | |
| "learning_rate": 1.0010118897040224e-05, | |
| "loss": 2.9253, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.5027826966860612, | |
| "grad_norm": 1.6576727628707886, | |
| "learning_rate": 9.946875790538832e-06, | |
| "loss": 2.9877, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.5059448520111308, | |
| "grad_norm": 1.8511061668395996, | |
| "learning_rate": 9.88363268403744e-06, | |
| "loss": 2.919, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5091070073362004, | |
| "grad_norm": 1.690691590309143, | |
| "learning_rate": 9.820389577536049e-06, | |
| "loss": 2.7579, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.5122691626612699, | |
| "grad_norm": 1.4383487701416016, | |
| "learning_rate": 9.757146471034657e-06, | |
| "loss": 2.8421, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.5154313179863395, | |
| "grad_norm": 1.5412685871124268, | |
| "learning_rate": 9.693903364533267e-06, | |
| "loss": 2.9609, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.518593473311409, | |
| "grad_norm": 1.7865098714828491, | |
| "learning_rate": 9.630660258031875e-06, | |
| "loss": 2.887, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.5217556286364786, | |
| "grad_norm": 1.4453928470611572, | |
| "learning_rate": 9.567417151530483e-06, | |
| "loss": 2.9248, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.5249177839615482, | |
| "grad_norm": 1.6136345863342285, | |
| "learning_rate": 9.504174045029093e-06, | |
| "loss": 3.1814, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.5280799392866178, | |
| "grad_norm": 1.3460476398468018, | |
| "learning_rate": 9.440930938527701e-06, | |
| "loss": 2.7766, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.5312420946116874, | |
| "grad_norm": 1.6284033060073853, | |
| "learning_rate": 9.37768783202631e-06, | |
| "loss": 2.9087, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.534404249936757, | |
| "grad_norm": 1.3353221416473389, | |
| "learning_rate": 9.314444725524918e-06, | |
| "loss": 2.9512, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.5375664052618264, | |
| "grad_norm": 1.9415779113769531, | |
| "learning_rate": 9.251201619023528e-06, | |
| "loss": 2.7603, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.540728560586896, | |
| "grad_norm": 1.6942236423492432, | |
| "learning_rate": 9.187958512522136e-06, | |
| "loss": 3.1245, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.5438907159119656, | |
| "grad_norm": 2.2894318103790283, | |
| "learning_rate": 9.124715406020744e-06, | |
| "loss": 2.9695, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.5470528712370352, | |
| "grad_norm": 1.1514796018600464, | |
| "learning_rate": 9.061472299519352e-06, | |
| "loss": 3.0047, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.5502150265621047, | |
| "grad_norm": 1.5051628351211548, | |
| "learning_rate": 8.998229193017962e-06, | |
| "loss": 3.1653, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.5533771818871743, | |
| "grad_norm": 1.4090489149093628, | |
| "learning_rate": 8.93498608651657e-06, | |
| "loss": 2.9528, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.5565393372122439, | |
| "grad_norm": 1.6863518953323364, | |
| "learning_rate": 8.871742980015179e-06, | |
| "loss": 2.8309, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.5597014925373134, | |
| "grad_norm": 1.453518271446228, | |
| "learning_rate": 8.808499873513787e-06, | |
| "loss": 2.8803, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.562863647862383, | |
| "grad_norm": 1.987695336341858, | |
| "learning_rate": 8.745256767012397e-06, | |
| "loss": 3.001, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.5660258031874525, | |
| "grad_norm": 1.3925899267196655, | |
| "learning_rate": 8.682013660511005e-06, | |
| "loss": 2.9407, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.5691879585125221, | |
| "grad_norm": 1.802440881729126, | |
| "learning_rate": 8.618770554009613e-06, | |
| "loss": 3.0096, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5723501138375917, | |
| "grad_norm": 1.4417994022369385, | |
| "learning_rate": 8.555527447508223e-06, | |
| "loss": 2.976, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.5755122691626613, | |
| "grad_norm": 1.7502068281173706, | |
| "learning_rate": 8.492284341006831e-06, | |
| "loss": 2.8715, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.5786744244877309, | |
| "grad_norm": 1.7311025857925415, | |
| "learning_rate": 8.42904123450544e-06, | |
| "loss": 2.9176, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.5818365798128005, | |
| "grad_norm": 1.6848989725112915, | |
| "learning_rate": 8.365798128004048e-06, | |
| "loss": 2.9213, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5849987351378699, | |
| "grad_norm": 1.4339770078659058, | |
| "learning_rate": 8.302555021502657e-06, | |
| "loss": 2.944, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.5881608904629395, | |
| "grad_norm": 1.4645488262176514, | |
| "learning_rate": 8.239311915001266e-06, | |
| "loss": 2.8865, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.5913230457880091, | |
| "grad_norm": 1.793530821800232, | |
| "learning_rate": 8.176068808499874e-06, | |
| "loss": 3.0749, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.5944852011130787, | |
| "grad_norm": 1.753974437713623, | |
| "learning_rate": 8.112825701998482e-06, | |
| "loss": 2.9768, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.5976473564381483, | |
| "grad_norm": 1.4546597003936768, | |
| "learning_rate": 8.049582595497092e-06, | |
| "loss": 2.8419, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.6008095117632178, | |
| "grad_norm": 2.1190099716186523, | |
| "learning_rate": 7.9863394889957e-06, | |
| "loss": 2.8468, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.6039716670882874, | |
| "grad_norm": 2.026573657989502, | |
| "learning_rate": 7.923096382494308e-06, | |
| "loss": 3.0328, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.6071338224133569, | |
| "grad_norm": 1.5188226699829102, | |
| "learning_rate": 7.859853275992917e-06, | |
| "loss": 2.7536, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.6102959777384265, | |
| "grad_norm": 1.9499627351760864, | |
| "learning_rate": 7.796610169491526e-06, | |
| "loss": 3.0033, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 0.6134581330634961, | |
| "grad_norm": 1.4692327976226807, | |
| "learning_rate": 7.733367062990135e-06, | |
| "loss": 2.9335, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.6166202883885656, | |
| "grad_norm": 1.4884922504425049, | |
| "learning_rate": 7.670123956488743e-06, | |
| "loss": 2.9247, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.6197824437136352, | |
| "grad_norm": 1.5420995950698853, | |
| "learning_rate": 7.606880849987353e-06, | |
| "loss": 2.9574, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6229445990387048, | |
| "grad_norm": 1.8658238649368286, | |
| "learning_rate": 7.543637743485961e-06, | |
| "loss": 2.9001, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 0.6261067543637744, | |
| "grad_norm": 1.5181258916854858, | |
| "learning_rate": 7.480394636984569e-06, | |
| "loss": 2.7142, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.629268909688844, | |
| "grad_norm": 1.4289120435714722, | |
| "learning_rate": 7.417151530483177e-06, | |
| "loss": 3.0176, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 0.6324310650139134, | |
| "grad_norm": 1.7044678926467896, | |
| "learning_rate": 7.353908423981787e-06, | |
| "loss": 2.9688, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.635593220338983, | |
| "grad_norm": 1.2686058282852173, | |
| "learning_rate": 7.290665317480395e-06, | |
| "loss": 3.0132, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.6387553756640526, | |
| "grad_norm": 1.4413508176803589, | |
| "learning_rate": 7.2274222109790036e-06, | |
| "loss": 2.948, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.6419175309891222, | |
| "grad_norm": 1.5472356081008911, | |
| "learning_rate": 7.164179104477612e-06, | |
| "loss": 2.8153, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.6450796863141918, | |
| "grad_norm": 1.3920260667800903, | |
| "learning_rate": 7.100935997976222e-06, | |
| "loss": 2.9269, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.6482418416392614, | |
| "grad_norm": 1.8862273693084717, | |
| "learning_rate": 7.03769289147483e-06, | |
| "loss": 2.9641, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.6514039969643309, | |
| "grad_norm": 1.2860732078552246, | |
| "learning_rate": 6.974449784973438e-06, | |
| "loss": 2.9628, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.6545661522894004, | |
| "grad_norm": 1.6975411176681519, | |
| "learning_rate": 6.911206678472046e-06, | |
| "loss": 2.8478, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.65772830761447, | |
| "grad_norm": 1.577736258506775, | |
| "learning_rate": 6.847963571970656e-06, | |
| "loss": 2.8673, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.6608904629395396, | |
| "grad_norm": 1.5640876293182373, | |
| "learning_rate": 6.784720465469264e-06, | |
| "loss": 2.7907, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 0.6640526182646092, | |
| "grad_norm": 1.5373408794403076, | |
| "learning_rate": 6.7214773589678725e-06, | |
| "loss": 2.9308, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.6672147735896787, | |
| "grad_norm": 1.499895453453064, | |
| "learning_rate": 6.6582342524664824e-06, | |
| "loss": 2.8954, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 0.6703769289147483, | |
| "grad_norm": 1.7884222269058228, | |
| "learning_rate": 6.594991145965091e-06, | |
| "loss": 3.0374, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.6735390842398179, | |
| "grad_norm": 1.4689120054244995, | |
| "learning_rate": 6.531748039463699e-06, | |
| "loss": 2.7937, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.6767012395648875, | |
| "grad_norm": 1.2251571416854858, | |
| "learning_rate": 6.468504932962307e-06, | |
| "loss": 2.6993, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.679863394889957, | |
| "grad_norm": 1.6900901794433594, | |
| "learning_rate": 6.405261826460917e-06, | |
| "loss": 2.9683, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.6830255502150265, | |
| "grad_norm": 1.3138439655303955, | |
| "learning_rate": 6.342018719959525e-06, | |
| "loss": 2.9179, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.6861877055400961, | |
| "grad_norm": 1.3106989860534668, | |
| "learning_rate": 6.278775613458133e-06, | |
| "loss": 2.8901, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.6893498608651657, | |
| "grad_norm": 1.7244914770126343, | |
| "learning_rate": 6.2155325069567415e-06, | |
| "loss": 2.8481, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.6925120161902353, | |
| "grad_norm": 1.1945232152938843, | |
| "learning_rate": 6.152289400455351e-06, | |
| "loss": 2.8619, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.6956741715153049, | |
| "grad_norm": 1.3923369646072388, | |
| "learning_rate": 6.08904629395396e-06, | |
| "loss": 2.9955, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.6988363268403744, | |
| "grad_norm": 1.7948927879333496, | |
| "learning_rate": 6.025803187452568e-06, | |
| "loss": 3.0742, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 0.7019984821654439, | |
| "grad_norm": 1.4123344421386719, | |
| "learning_rate": 5.962560080951176e-06, | |
| "loss": 2.9499, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.7051606374905135, | |
| "grad_norm": 1.5082272291183472, | |
| "learning_rate": 5.899316974449786e-06, | |
| "loss": 3.0861, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 0.7083227928155831, | |
| "grad_norm": 1.6818978786468506, | |
| "learning_rate": 5.836073867948394e-06, | |
| "loss": 2.7944, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.7114849481406527, | |
| "grad_norm": 2.072303295135498, | |
| "learning_rate": 5.772830761447002e-06, | |
| "loss": 3.021, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.7146471034657222, | |
| "grad_norm": 1.2416434288024902, | |
| "learning_rate": 5.7095876549456105e-06, | |
| "loss": 2.7849, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.7178092587907918, | |
| "grad_norm": 1.6787675619125366, | |
| "learning_rate": 5.64634454844422e-06, | |
| "loss": 2.9102, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 0.7209714141158614, | |
| "grad_norm": 1.542455792427063, | |
| "learning_rate": 5.5831014419428286e-06, | |
| "loss": 2.8268, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.724133569440931, | |
| "grad_norm": 2.0236566066741943, | |
| "learning_rate": 5.519858335441437e-06, | |
| "loss": 2.8222, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 0.7272957247660005, | |
| "grad_norm": 1.5775691270828247, | |
| "learning_rate": 5.456615228940047e-06, | |
| "loss": 2.9778, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.73045788009107, | |
| "grad_norm": 1.792724847793579, | |
| "learning_rate": 5.393372122438655e-06, | |
| "loss": 2.9323, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 0.7336200354161396, | |
| "grad_norm": 1.5170587301254272, | |
| "learning_rate": 5.330129015937263e-06, | |
| "loss": 2.8671, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.7367821907412092, | |
| "grad_norm": 1.418395757675171, | |
| "learning_rate": 5.266885909435871e-06, | |
| "loss": 2.8153, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 0.7399443460662788, | |
| "grad_norm": 1.4458298683166504, | |
| "learning_rate": 5.203642802934481e-06, | |
| "loss": 2.9178, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.7431065013913484, | |
| "grad_norm": 1.6139628887176514, | |
| "learning_rate": 5.140399696433089e-06, | |
| "loss": 2.8857, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 1.1631035804748535, | |
| "learning_rate": 5.0771565899316975e-06, | |
| "loss": 2.9213, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.7494308120414874, | |
| "grad_norm": 1.5703893899917603, | |
| "learning_rate": 5.013913483430306e-06, | |
| "loss": 2.891, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 0.752592967366557, | |
| "grad_norm": 1.4471451044082642, | |
| "learning_rate": 4.950670376928915e-06, | |
| "loss": 2.8261, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.7557551226916266, | |
| "grad_norm": 1.6305294036865234, | |
| "learning_rate": 4.887427270427524e-06, | |
| "loss": 2.8918, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 0.7589172780166962, | |
| "grad_norm": 1.3186579942703247, | |
| "learning_rate": 4.824184163926132e-06, | |
| "loss": 2.9478, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7620794333417658, | |
| "grad_norm": 1.4258198738098145, | |
| "learning_rate": 4.760941057424741e-06, | |
| "loss": 2.9313, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 0.7652415886668353, | |
| "grad_norm": 1.5036190748214722, | |
| "learning_rate": 4.69769795092335e-06, | |
| "loss": 2.9562, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.7684037439919049, | |
| "grad_norm": 1.7764675617218018, | |
| "learning_rate": 4.634454844421958e-06, | |
| "loss": 2.9396, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 0.7715658993169745, | |
| "grad_norm": 1.624964714050293, | |
| "learning_rate": 4.571211737920567e-06, | |
| "loss": 2.8789, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.774728054642044, | |
| "grad_norm": 1.1885554790496826, | |
| "learning_rate": 4.5079686314191755e-06, | |
| "loss": 2.9756, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.7778902099671136, | |
| "grad_norm": 1.752863883972168, | |
| "learning_rate": 4.444725524917785e-06, | |
| "loss": 2.7279, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.7810523652921831, | |
| "grad_norm": 1.4653725624084473, | |
| "learning_rate": 4.381482418416393e-06, | |
| "loss": 3.0123, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 0.7842145206172527, | |
| "grad_norm": 1.5357298851013184, | |
| "learning_rate": 4.318239311915002e-06, | |
| "loss": 2.7978, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.7873766759423223, | |
| "grad_norm": 1.4677993059158325, | |
| "learning_rate": 4.25499620541361e-06, | |
| "loss": 2.8016, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 0.7905388312673919, | |
| "grad_norm": 1.3810391426086426, | |
| "learning_rate": 4.191753098912219e-06, | |
| "loss": 2.7919, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.7937009865924615, | |
| "grad_norm": 1.827034831047058, | |
| "learning_rate": 4.128509992410827e-06, | |
| "loss": 2.9552, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 0.7968631419175309, | |
| "grad_norm": 1.7732640504837036, | |
| "learning_rate": 4.065266885909436e-06, | |
| "loss": 3.03, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.8000252972426005, | |
| "grad_norm": 2.2995574474334717, | |
| "learning_rate": 4.0020237794080445e-06, | |
| "loss": 3.027, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 0.8031874525676701, | |
| "grad_norm": 1.664284110069275, | |
| "learning_rate": 3.9387806729066536e-06, | |
| "loss": 2.9782, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.8063496078927397, | |
| "grad_norm": 1.7057468891143799, | |
| "learning_rate": 3.875537566405262e-06, | |
| "loss": 2.8451, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 0.8095117632178093, | |
| "grad_norm": 1.548420786857605, | |
| "learning_rate": 3.812294459903871e-06, | |
| "loss": 2.7688, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.8126739185428788, | |
| "grad_norm": 1.467391014099121, | |
| "learning_rate": 3.749051353402479e-06, | |
| "loss": 2.9415, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 0.8158360738679484, | |
| "grad_norm": 1.6299670934677124, | |
| "learning_rate": 3.685808246901088e-06, | |
| "loss": 2.9104, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.818998229193018, | |
| "grad_norm": 1.7904433012008667, | |
| "learning_rate": 3.622565140399697e-06, | |
| "loss": 2.8673, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 0.8221603845180875, | |
| "grad_norm": 1.5487452745437622, | |
| "learning_rate": 3.5593220338983053e-06, | |
| "loss": 2.991, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.8253225398431571, | |
| "grad_norm": 2.021498918533325, | |
| "learning_rate": 3.4960789273969143e-06, | |
| "loss": 2.9309, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 0.8284846951682266, | |
| "grad_norm": 1.6797212362289429, | |
| "learning_rate": 3.4328358208955225e-06, | |
| "loss": 2.7787, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.8316468504932962, | |
| "grad_norm": 1.5939486026763916, | |
| "learning_rate": 3.3695927143941316e-06, | |
| "loss": 2.8101, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 0.8348090058183658, | |
| "grad_norm": 1.506430983543396, | |
| "learning_rate": 3.3063496078927398e-06, | |
| "loss": 2.9788, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.8379711611434354, | |
| "grad_norm": 1.3282697200775146, | |
| "learning_rate": 3.243106501391349e-06, | |
| "loss": 2.8371, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 0.841133316468505, | |
| "grad_norm": 1.799822449684143, | |
| "learning_rate": 3.179863394889957e-06, | |
| "loss": 2.9774, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.8442954717935744, | |
| "grad_norm": 1.6544405221939087, | |
| "learning_rate": 3.116620288388566e-06, | |
| "loss": 2.8366, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 1.7522649765014648, | |
| "learning_rate": 3.0533771818871742e-06, | |
| "loss": 2.9712, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.8506197824437136, | |
| "grad_norm": 1.5311869382858276, | |
| "learning_rate": 2.9901340753857833e-06, | |
| "loss": 2.792, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 0.8537819377687832, | |
| "grad_norm": 1.1723778247833252, | |
| "learning_rate": 2.9268909688843915e-06, | |
| "loss": 2.9395, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.8569440930938528, | |
| "grad_norm": 1.9257378578186035, | |
| "learning_rate": 2.8636478623830005e-06, | |
| "loss": 2.9973, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 0.8601062484189224, | |
| "grad_norm": 1.4320708513259888, | |
| "learning_rate": 2.8004047558816087e-06, | |
| "loss": 2.739, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.8632684037439919, | |
| "grad_norm": 1.961350679397583, | |
| "learning_rate": 2.7371616493802178e-06, | |
| "loss": 3.0129, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 0.8664305590690615, | |
| "grad_norm": 1.7785652875900269, | |
| "learning_rate": 2.673918542878827e-06, | |
| "loss": 3.0007, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.869592714394131, | |
| "grad_norm": 1.5985599756240845, | |
| "learning_rate": 2.610675436377435e-06, | |
| "loss": 2.8996, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 0.8727548697192006, | |
| "grad_norm": 1.4986529350280762, | |
| "learning_rate": 2.547432329876044e-06, | |
| "loss": 2.917, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.8759170250442702, | |
| "grad_norm": 1.4311537742614746, | |
| "learning_rate": 2.4841892233746523e-06, | |
| "loss": 2.8844, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 0.8790791803693397, | |
| "grad_norm": 1.8189743757247925, | |
| "learning_rate": 2.420946116873261e-06, | |
| "loss": 2.9588, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.8822413356944093, | |
| "grad_norm": 1.1215569972991943, | |
| "learning_rate": 2.3577030103718695e-06, | |
| "loss": 2.8752, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 0.8854034910194789, | |
| "grad_norm": 1.4951814413070679, | |
| "learning_rate": 2.294459903870478e-06, | |
| "loss": 2.8954, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8885656463445485, | |
| "grad_norm": 1.464539885520935, | |
| "learning_rate": 2.231216797369087e-06, | |
| "loss": 2.7321, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 0.891727801669618, | |
| "grad_norm": 1.4621193408966064, | |
| "learning_rate": 2.1679736908676958e-06, | |
| "loss": 2.8688, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.8948899569946875, | |
| "grad_norm": 1.6291460990905762, | |
| "learning_rate": 2.1047305843663044e-06, | |
| "loss": 2.8068, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 0.8980521123197571, | |
| "grad_norm": 1.4216442108154297, | |
| "learning_rate": 2.041487477864913e-06, | |
| "loss": 2.9351, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.9012142676448267, | |
| "grad_norm": 1.306211233139038, | |
| "learning_rate": 1.9782443713635216e-06, | |
| "loss": 2.9398, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 0.9043764229698963, | |
| "grad_norm": 1.6560629606246948, | |
| "learning_rate": 1.9150012648621303e-06, | |
| "loss": 2.9728, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.9075385782949659, | |
| "grad_norm": 1.9747501611709595, | |
| "learning_rate": 1.8517581583607389e-06, | |
| "loss": 2.903, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 0.9107007336200355, | |
| "grad_norm": 1.3126964569091797, | |
| "learning_rate": 1.7885150518593475e-06, | |
| "loss": 2.796, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.913862888945105, | |
| "grad_norm": 1.5026005506515503, | |
| "learning_rate": 1.7252719453579561e-06, | |
| "loss": 2.7565, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 0.9170250442701745, | |
| "grad_norm": 1.5117318630218506, | |
| "learning_rate": 1.6620288388565647e-06, | |
| "loss": 2.8329, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.9201871995952441, | |
| "grad_norm": 1.812044382095337, | |
| "learning_rate": 1.5987857323551734e-06, | |
| "loss": 2.9988, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 0.9233493549203137, | |
| "grad_norm": 1.25447678565979, | |
| "learning_rate": 1.535542625853782e-06, | |
| "loss": 2.9395, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.9265115102453833, | |
| "grad_norm": 1.6009807586669922, | |
| "learning_rate": 1.4722995193523906e-06, | |
| "loss": 2.8487, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 0.9296736655704528, | |
| "grad_norm": 2.0983338356018066, | |
| "learning_rate": 1.4090564128509992e-06, | |
| "loss": 2.841, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.9328358208955224, | |
| "grad_norm": 1.5755832195281982, | |
| "learning_rate": 1.3458133063496079e-06, | |
| "loss": 2.8646, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 0.935997976220592, | |
| "grad_norm": 1.4814996719360352, | |
| "learning_rate": 1.2825701998482165e-06, | |
| "loss": 3.0229, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.9391601315456615, | |
| "grad_norm": 1.687947392463684, | |
| "learning_rate": 1.2193270933468253e-06, | |
| "loss": 2.8668, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 0.942322286870731, | |
| "grad_norm": 1.557085394859314, | |
| "learning_rate": 1.156083986845434e-06, | |
| "loss": 2.9006, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.9454844421958006, | |
| "grad_norm": 1.4846750497817993, | |
| "learning_rate": 1.0928408803440425e-06, | |
| "loss": 2.9055, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 0.9486465975208702, | |
| "grad_norm": 2.095038652420044, | |
| "learning_rate": 1.0295977738426512e-06, | |
| "loss": 3.0127, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9518087528459398, | |
| "grad_norm": 1.618682622909546, | |
| "learning_rate": 9.6635466734126e-07, | |
| "loss": 2.8352, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 0.9549709081710094, | |
| "grad_norm": 1.6419885158538818, | |
| "learning_rate": 9.031115608398685e-07, | |
| "loss": 2.9209, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.958133063496079, | |
| "grad_norm": 1.6612589359283447, | |
| "learning_rate": 8.398684543384771e-07, | |
| "loss": 2.8939, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 0.9612952188211485, | |
| "grad_norm": 1.4516048431396484, | |
| "learning_rate": 7.766253478370859e-07, | |
| "loss": 2.9458, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.964457374146218, | |
| "grad_norm": 1.864473581314087, | |
| "learning_rate": 7.133822413356945e-07, | |
| "loss": 2.958, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 0.9676195294712876, | |
| "grad_norm": 1.5845332145690918, | |
| "learning_rate": 6.501391348343031e-07, | |
| "loss": 2.8868, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.9707816847963572, | |
| "grad_norm": 1.4741908311843872, | |
| "learning_rate": 5.868960283329117e-07, | |
| "loss": 2.8258, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 0.9739438401214268, | |
| "grad_norm": 1.5956405401229858, | |
| "learning_rate": 5.236529218315205e-07, | |
| "loss": 2.8274, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.9771059954464963, | |
| "grad_norm": 1.5453468561172485, | |
| "learning_rate": 4.60409815330129e-07, | |
| "loss": 2.9071, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 0.9802681507715659, | |
| "grad_norm": 1.745120882987976, | |
| "learning_rate": 3.9716670882873775e-07, | |
| "loss": 3.0668, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.9834303060966355, | |
| "grad_norm": 1.3923741579055786, | |
| "learning_rate": 3.3392360232734637e-07, | |
| "loss": 3.0027, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 0.986592461421705, | |
| "grad_norm": 1.4872705936431885, | |
| "learning_rate": 2.70680495825955e-07, | |
| "loss": 2.9386, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.9897546167467746, | |
| "grad_norm": 2.8140368461608887, | |
| "learning_rate": 2.0743738932456364e-07, | |
| "loss": 2.7396, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 0.9929167720718441, | |
| "grad_norm": 1.2857846021652222, | |
| "learning_rate": 1.4419428282317229e-07, | |
| "loss": 2.8295, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.9960789273969137, | |
| "grad_norm": 1.593768835067749, | |
| "learning_rate": 8.095117632178092e-08, | |
| "loss": 3.0062, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 0.9992410827219833, | |
| "grad_norm": 1.3941960334777832, | |
| "learning_rate": 1.770806982038958e-08, | |
| "loss": 2.7836, | |
| "step": 7900 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 7906, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2103601100193792.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |