| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9891774989096567, | |
| "eval_steps": 500, | |
| "global_step": 110000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008992522717360514, | |
| "grad_norm": 0.5267877578735352, | |
| "learning_rate": 0.0004999002617394902, | |
| "loss": 4.4189, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.017985045434721028, | |
| "grad_norm": 0.4186614155769348, | |
| "learning_rate": 0.0004996010866580058, | |
| "loss": 3.2692, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.026977568152081546, | |
| "grad_norm": 0.37206095457077026, | |
| "learning_rate": 0.0004991027135159133, | |
| "loss": 3.055, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.035970090869442056, | |
| "grad_norm": 0.3365996479988098, | |
| "learning_rate": 0.0004984055400477128, | |
| "loss": 2.9514, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.04496261358680257, | |
| "grad_norm": 0.3055461645126343, | |
| "learning_rate": 0.0004975101226436211, | |
| "loss": 2.8843, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.05395513630416309, | |
| "grad_norm": 0.2653150260448456, | |
| "learning_rate": 0.0004964171759055367, | |
| "loss": 2.8359, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.0629476590215236, | |
| "grad_norm": 0.26524779200553894, | |
| "learning_rate": 0.0004951275720767395, | |
| "loss": 2.7964, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.07194018173888411, | |
| "grad_norm": 0.23818659782409668, | |
| "learning_rate": 0.0004936423403457847, | |
| "loss": 2.7717, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.08093270445624463, | |
| "grad_norm": 0.24217627942562103, | |
| "learning_rate": 0.0004919626660251412, | |
| "loss": 2.7383, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.08992522717360514, | |
| "grad_norm": 0.2376166582107544, | |
| "learning_rate": 0.0004900898896052357, | |
| "loss": 2.7244, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.09891774989096566, | |
| "grad_norm": 0.2203313708305359, | |
| "learning_rate": 0.00048802550568465263, | |
| "loss": 2.7031, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.10791027260832618, | |
| "grad_norm": 0.20930123329162598, | |
| "learning_rate": 0.00048577116177734653, | |
| "loss": 2.6876, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.11690279532568669, | |
| "grad_norm": 0.2004300355911255, | |
| "learning_rate": 0.0004833286569978177, | |
| "loss": 2.6722, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.1258953180430472, | |
| "grad_norm": 0.19555214047431946, | |
| "learning_rate": 0.0004806999406253004, | |
| "loss": 2.6577, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.13488784076040772, | |
| "grad_norm": 0.1877773553133011, | |
| "learning_rate": 0.0004778871105481104, | |
| "loss": 2.647, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.14388036347776822, | |
| "grad_norm": 0.18455323576927185, | |
| "learning_rate": 0.0004748924115893922, | |
| "loss": 2.6388, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.15287288619512876, | |
| "grad_norm": 0.18309278786182404, | |
| "learning_rate": 0.0004717214967118909, | |
| "loss": 2.6254, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.16186540891248927, | |
| "grad_norm": 0.17841506004333496, | |
| "learning_rate": 0.0004683705487586517, | |
| "loss": 2.6187, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.17085793162984977, | |
| "grad_norm": 0.17425057291984558, | |
| "learning_rate": 0.00046484532676522683, | |
| "loss": 2.6115, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.17985045434721028, | |
| "grad_norm": 0.17270472645759583, | |
| "learning_rate": 0.00046114864409029877, | |
| "loss": 2.6017, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.18884297706457082, | |
| "grad_norm": 0.1661667376756668, | |
| "learning_rate": 0.00045728739927797956, | |
| "loss": 2.5935, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.19783549978193132, | |
| "grad_norm": 0.16307614743709564, | |
| "learning_rate": 0.0004532569441500434, | |
| "loss": 2.593, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.20682802249929183, | |
| "grad_norm": 0.15843474864959717, | |
| "learning_rate": 0.0004490642766310399, | |
| "loss": 2.5831, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.21582054521665237, | |
| "grad_norm": 0.15826140344142914, | |
| "learning_rate": 0.0004447171724872102, | |
| "loss": 2.575, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.22481306793401287, | |
| "grad_norm": 0.15876850485801697, | |
| "learning_rate": 0.0004402103986613901, | |
| "loss": 2.5718, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.23380559065137338, | |
| "grad_norm": 0.15799590945243835, | |
| "learning_rate": 0.0004355518244446819, | |
| "loss": 2.5643, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.2427981133687339, | |
| "grad_norm": 0.15866918861865997, | |
| "learning_rate": 0.000430750047042202, | |
| "loss": 2.5611, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.2517906360860944, | |
| "grad_norm": 0.1551973819732666, | |
| "learning_rate": 0.0004257992860597374, | |
| "loss": 2.5532, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.26078315880345493, | |
| "grad_norm": 0.1489175260066986, | |
| "learning_rate": 0.00042070822570074265, | |
| "loss": 2.5508, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.26977568152081544, | |
| "grad_norm": 0.14987464249134064, | |
| "learning_rate": 0.0004154809289657581, | |
| "loss": 2.5451, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.27876820423817594, | |
| "grad_norm": 0.14638318121433258, | |
| "learning_rate": 0.00041013241527853406, | |
| "loss": 2.5382, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.28776072695553645, | |
| "grad_norm": 0.14251314103603363, | |
| "learning_rate": 0.0004046455176043083, | |
| "loss": 2.5351, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.296753249672897, | |
| "grad_norm": 0.14875943958759308, | |
| "learning_rate": 0.00039904087312981354, | |
| "loss": 2.5315, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.3057457723902575, | |
| "grad_norm": 0.14232666790485382, | |
| "learning_rate": 0.00039331173496701843, | |
| "loss": 2.5287, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.314738295107618, | |
| "grad_norm": 0.1461074948310852, | |
| "learning_rate": 0.00038746822462720277, | |
| "loss": 2.5252, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.32373081782497853, | |
| "grad_norm": 0.14645366370677948, | |
| "learning_rate": 0.00038152101205969716, | |
| "loss": 2.5184, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.33272334054233904, | |
| "grad_norm": 0.14445528388023376, | |
| "learning_rate": 0.00037546293799195995, | |
| "loss": 2.5201, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.34171586325969955, | |
| "grad_norm": 0.13728487491607666, | |
| "learning_rate": 0.00036930473625947265, | |
| "loss": 2.5149, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.35070838597706006, | |
| "grad_norm": 0.13824057579040527, | |
| "learning_rate": 0.00036305132151167983, | |
| "loss": 2.5122, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.35970090869442056, | |
| "grad_norm": 0.1395253986120224, | |
| "learning_rate": 0.00035672045834706856, | |
| "loss": 2.5069, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.3686934314117811, | |
| "grad_norm": 0.14017699658870697, | |
| "learning_rate": 0.0003502918267002188, | |
| "loss": 2.5047, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.37768595412914163, | |
| "grad_norm": 0.1350419521331787, | |
| "learning_rate": 0.00034378315558863357, | |
| "loss": 2.5011, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.38667847684650214, | |
| "grad_norm": 0.14121927320957184, | |
| "learning_rate": 0.00033719963935934026, | |
| "loss": 2.496, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.39567099956386265, | |
| "grad_norm": 0.1398804783821106, | |
| "learning_rate": 0.0003305532181958758, | |
| "loss": 2.4966, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.40466352228122315, | |
| "grad_norm": 0.14015056192874908, | |
| "learning_rate": 0.00032383589111626814, | |
| "loss": 2.493, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.41365604499858366, | |
| "grad_norm": 0.1367267668247223, | |
| "learning_rate": 0.00031705963814611035, | |
| "loss": 2.484, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.42264856771594417, | |
| "grad_norm": 0.13933929800987244, | |
| "learning_rate": 0.0003102367218727284, | |
| "loss": 2.4843, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.43164109043330473, | |
| "grad_norm": 0.13233740627765656, | |
| "learning_rate": 0.0003033589288488015, | |
| "loss": 2.4813, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.44063361315066524, | |
| "grad_norm": 0.13488726317882538, | |
| "learning_rate": 0.0002964385518951125, | |
| "loss": 2.4786, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.44962613586802574, | |
| "grad_norm": 0.1470656394958496, | |
| "learning_rate": 0.0002894811139269912, | |
| "loss": 2.4743, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.45861865858538625, | |
| "grad_norm": 0.1314244419336319, | |
| "learning_rate": 0.0002825061730718414, | |
| "loss": 2.4751, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.46761118130274676, | |
| "grad_norm": 0.13198289275169373, | |
| "learning_rate": 0.0002754913419788723, | |
| "loss": 2.4729, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.47660370402010727, | |
| "grad_norm": 0.12992499768733978, | |
| "learning_rate": 0.00026845616712076794, | |
| "loss": 2.4694, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.4855962267374678, | |
| "grad_norm": 0.13007935881614685, | |
| "learning_rate": 0.00026141331841860756, | |
| "loss": 2.463, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.49458874945482834, | |
| "grad_norm": 0.1366134136915207, | |
| "learning_rate": 0.000254354317669745, | |
| "loss": 2.4592, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.5035812721721888, | |
| "grad_norm": 0.12824617326259613, | |
| "learning_rate": 0.0002472918418893963, | |
| "loss": 2.4585, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.5125737948895494, | |
| "grad_norm": 0.1312493234872818, | |
| "learning_rate": 0.00024023858475626683, | |
| "loss": 2.4571, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.5215663176069099, | |
| "grad_norm": 0.12798364460468292, | |
| "learning_rate": 0.0002331860555380091, | |
| "loss": 2.4561, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.5305588403242704, | |
| "grad_norm": 0.1297149360179901, | |
| "learning_rate": 0.00022614694495174873, | |
| "loss": 2.4524, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.5395513630416309, | |
| "grad_norm": 0.12818291783332825, | |
| "learning_rate": 0.00021912687067005265, | |
| "loss": 2.4496, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.5485438857589914, | |
| "grad_norm": 0.13012410700321198, | |
| "learning_rate": 0.00021213841644168087, | |
| "loss": 2.4465, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.5575364084763519, | |
| "grad_norm": 0.13271279633045197, | |
| "learning_rate": 0.00020518706618325146, | |
| "loss": 2.4471, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.5665289311937124, | |
| "grad_norm": 0.13144823908805847, | |
| "learning_rate": 0.00019825751696178184, | |
| "loss": 2.4405, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.5755214539110729, | |
| "grad_norm": 0.1301935911178589, | |
| "learning_rate": 0.00019136926164015156, | |
| "loss": 2.4368, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.5845139766284334, | |
| "grad_norm": 0.1324065774679184, | |
| "learning_rate": 0.00018452779749851848, | |
| "loss": 2.4383, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.593506499345794, | |
| "grad_norm": 0.13288547098636627, | |
| "learning_rate": 0.00017774534578113516, | |
| "loss": 2.4307, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.6024990220631545, | |
| "grad_norm": 0.13380388915538788, | |
| "learning_rate": 0.00017101374175203582, | |
| "loss": 2.4311, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.611491544780515, | |
| "grad_norm": 0.12911923229694366, | |
| "learning_rate": 0.00016434517394472685, | |
| "loss": 2.4301, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.6204840674978755, | |
| "grad_norm": 0.129085510969162, | |
| "learning_rate": 0.0001577515286210997, | |
| "loss": 2.4237, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.629476590215236, | |
| "grad_norm": 0.12780623137950897, | |
| "learning_rate": 0.00015122486833089863, | |
| "loss": 2.4248, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.6384691129325966, | |
| "grad_norm": 0.13019651174545288, | |
| "learning_rate": 0.00014478344382118653, | |
| "loss": 2.4217, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.6474616356499571, | |
| "grad_norm": 0.13168035447597504, | |
| "learning_rate": 0.00013841950088558575, | |
| "loss": 2.4179, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.6564541583673176, | |
| "grad_norm": 0.12985067069530487, | |
| "learning_rate": 0.000132150835258465, | |
| "loss": 2.4188, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.6654466810846781, | |
| "grad_norm": 0.1350133717060089, | |
| "learning_rate": 0.00012596990070111393, | |
| "loss": 2.4159, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.6744392038020386, | |
| "grad_norm": 0.13242337107658386, | |
| "learning_rate": 0.00011988795030912905, | |
| "loss": 2.4116, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.6834317265193991, | |
| "grad_norm": 0.1263236254453659, | |
| "learning_rate": 0.00011391576252872856, | |
| "loss": 2.4121, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.6924242492367596, | |
| "grad_norm": 0.1311461478471756, | |
| "learning_rate": 0.00010804614802213383, | |
| "loss": 2.4086, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.7014167719541201, | |
| "grad_norm": 0.1291423887014389, | |
| "learning_rate": 0.00010229552021442814, | |
| "loss": 2.4061, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.7104092946714806, | |
| "grad_norm": 0.13248379528522491, | |
| "learning_rate": 9.66569564824003e-05, | |
| "loss": 2.403, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.7194018173888411, | |
| "grad_norm": 0.13015016913414001, | |
| "learning_rate": 9.115167786655006e-05, | |
| "loss": 2.4049, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.7283943401062017, | |
| "grad_norm": 0.13090303540229797, | |
| "learning_rate": 8.576201416635018e-05, | |
| "loss": 2.4002, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.7373868628235623, | |
| "grad_norm": 0.13276338577270508, | |
| "learning_rate": 8.050342316576978e-05, | |
| "loss": 2.3996, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.7463793855409228, | |
| "grad_norm": 0.13524140417575836, | |
| "learning_rate": 7.538010156580435e-05, | |
| "loss": 2.3976, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.7553719082582833, | |
| "grad_norm": 0.13214819133281708, | |
| "learning_rate": 7.039613811358328e-05, | |
| "loss": 2.3951, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.7643644309756438, | |
| "grad_norm": 0.13076630234718323, | |
| "learning_rate": 6.555551033928139e-05, | |
| "loss": 2.3936, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.7733569536930043, | |
| "grad_norm": 0.1300399899482727, | |
| "learning_rate": 6.08620813817882e-05, | |
| "loss": 2.3891, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.7823494764103648, | |
| "grad_norm": 0.12917381525039673, | |
| "learning_rate": 5.631959690566982e-05, | |
| "loss": 2.3865, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.7913419991277253, | |
| "grad_norm": 0.13091173768043518, | |
| "learning_rate": 5.1940301332541934e-05, | |
| "loss": 2.3814, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.8003345218450858, | |
| "grad_norm": 0.13044433295726776, | |
| "learning_rate": 4.771013851078279e-05, | |
| "loss": 2.3799, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.8093270445624463, | |
| "grad_norm": 0.12928803265094757, | |
| "learning_rate": 4.364141628461085e-05, | |
| "loss": 2.3801, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.8183195672798068, | |
| "grad_norm": 0.1315431147813797, | |
| "learning_rate": 3.973738176159078e-05, | |
| "loss": 2.3749, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.8273120899971673, | |
| "grad_norm": 0.13129934668540955, | |
| "learning_rate": 3.6004802027335776e-05, | |
| "loss": 2.3751, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.8363046127145278, | |
| "grad_norm": 0.13383065164089203, | |
| "learning_rate": 3.243918379103131e-05, | |
| "loss": 2.3747, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.8452971354318883, | |
| "grad_norm": 0.13397032022476196, | |
| "learning_rate": 2.905049772583343e-05, | |
| "loss": 2.3716, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.854289658149249, | |
| "grad_norm": 0.13112303614616394, | |
| "learning_rate": 2.5834664521170502e-05, | |
| "loss": 2.3705, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.8632821808666095, | |
| "grad_norm": 0.1314103603363037, | |
| "learning_rate": 2.2797729977089537e-05, | |
| "loss": 2.3649, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.87227470358397, | |
| "grad_norm": 0.1317732334136963, | |
| "learning_rate": 1.994488203786088e-05, | |
| "loss": 2.3652, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.8812672263013305, | |
| "grad_norm": 0.13032500445842743, | |
| "learning_rate": 1.7272686436475505e-05, | |
| "loss": 2.3646, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.890259749018691, | |
| "grad_norm": 0.1355644017457962, | |
| "learning_rate": 1.4788615534578526e-05, | |
| "loss": 2.363, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.8992522717360515, | |
| "grad_norm": 0.1347675770521164, | |
| "learning_rate": 1.2489679000077859e-05, | |
| "loss": 2.354, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.908244794453412, | |
| "grad_norm": 0.13323768973350525, | |
| "learning_rate": 1.0380291301085198e-05, | |
| "loss": 2.3521, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.9172373171707725, | |
| "grad_norm": 0.13284514844417572, | |
| "learning_rate": 8.463957980162084e-06, | |
| "loss": 2.3492, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.926229839888133, | |
| "grad_norm": 0.1306186467409134, | |
| "learning_rate": 6.738372149439731e-06, | |
| "loss": 2.3454, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.9352223626054935, | |
| "grad_norm": 0.13234929740428925, | |
| "learning_rate": 5.206925074830115e-06, | |
| "loss": 2.3452, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.944214885322854, | |
| "grad_norm": 0.1304798424243927, | |
| "learning_rate": 3.8708389516820544e-06, | |
| "loss": 2.3448, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.9532074080402145, | |
| "grad_norm": 0.1340903788805008, | |
| "learning_rate": 2.733262717159124e-06, | |
| "loss": 2.338, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.962199930757575, | |
| "grad_norm": 0.13070346415042877, | |
| "learning_rate": 1.7905451118825079e-06, | |
| "loss": 2.3358, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.9711924534749355, | |
| "grad_norm": 0.13099098205566406, | |
| "learning_rate": 1.0459149536610202e-06, | |
| "loss": 2.3329, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.980184976192296, | |
| "grad_norm": 0.13041888177394867, | |
| "learning_rate": 5.004130549572938e-07, | |
| "loss": 2.3332, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.9891774989096567, | |
| "grad_norm": 0.1311406046152115, | |
| "learning_rate": 1.5338273892975818e-07, | |
| "loss": 2.3253, | |
| "step": 110000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 111203, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 2000, | |
| "total_flos": 9.3094825426944e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |