| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999279383151978, | |
| "eval_steps": 100, | |
| "global_step": 3469, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0014412336960438136, | |
| "grad_norm": 22.562244415283203, | |
| "learning_rate": 2.8735632183908047e-07, | |
| "loss": 2.0425, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0028824673920876272, | |
| "grad_norm": 21.29774284362793, | |
| "learning_rate": 5.747126436781609e-07, | |
| "loss": 1.8066, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004323701088131441, | |
| "grad_norm": 21.9217586517334, | |
| "learning_rate": 8.620689655172415e-07, | |
| "loss": 1.8381, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0057649347841752544, | |
| "grad_norm": 21.61351776123047, | |
| "learning_rate": 1.1494252873563219e-06, | |
| "loss": 2.0628, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007206168480219067, | |
| "grad_norm": 19.952226638793945, | |
| "learning_rate": 1.4367816091954023e-06, | |
| "loss": 1.9157, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.008647402176262881, | |
| "grad_norm": 21.637529373168945, | |
| "learning_rate": 1.724137931034483e-06, | |
| "loss": 1.7546, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.010088635872306694, | |
| "grad_norm": 17.36451530456543, | |
| "learning_rate": 2.0114942528735633e-06, | |
| "loss": 1.5482, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.011529869568350509, | |
| "grad_norm": 19.64199447631836, | |
| "learning_rate": 2.2988505747126437e-06, | |
| "loss": 1.4528, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.012971103264394322, | |
| "grad_norm": 19.360851287841797, | |
| "learning_rate": 2.5862068965517246e-06, | |
| "loss": 1.5399, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.014412336960438135, | |
| "grad_norm": 20.030967712402344, | |
| "learning_rate": 2.8735632183908046e-06, | |
| "loss": 1.607, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01585357065648195, | |
| "grad_norm": 20.46259117126465, | |
| "learning_rate": 3.1609195402298854e-06, | |
| "loss": 1.2416, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.017294804352525762, | |
| "grad_norm": 9.801645278930664, | |
| "learning_rate": 3.448275862068966e-06, | |
| "loss": 1.0737, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.018736038048569575, | |
| "grad_norm": 12.106565475463867, | |
| "learning_rate": 3.7356321839080462e-06, | |
| "loss": 0.949, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.02017727174461339, | |
| "grad_norm": 6.313957214355469, | |
| "learning_rate": 4.022988505747127e-06, | |
| "loss": 0.7346, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0216185054406572, | |
| "grad_norm": 10.853752136230469, | |
| "learning_rate": 4.310344827586207e-06, | |
| "loss": 0.721, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.023059739136701018, | |
| "grad_norm": 3.9490792751312256, | |
| "learning_rate": 4.5977011494252875e-06, | |
| "loss": 0.6074, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02450097283274483, | |
| "grad_norm": 1.7576261758804321, | |
| "learning_rate": 4.885057471264369e-06, | |
| "loss": 0.5638, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.025942206528788644, | |
| "grad_norm": 2.3737521171569824, | |
| "learning_rate": 5.172413793103449e-06, | |
| "loss": 0.7098, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.027383440224832457, | |
| "grad_norm": 2.490168333053589, | |
| "learning_rate": 5.45977011494253e-06, | |
| "loss": 0.4592, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.02882467392087627, | |
| "grad_norm": 3.4372336864471436, | |
| "learning_rate": 5.747126436781609e-06, | |
| "loss": 0.5276, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02882467392087627, | |
| "eval_loss": 0.5012353658676147, | |
| "eval_mse": 0.5012353515625, | |
| "eval_runtime": 3.7775, | |
| "eval_samples_per_second": 264.724, | |
| "eval_steps_per_second": 16.678, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.030265907616920083, | |
| "grad_norm": 6.628878116607666, | |
| "learning_rate": 6.03448275862069e-06, | |
| "loss": 0.6036, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0317071413129639, | |
| "grad_norm": 5.881587505340576, | |
| "learning_rate": 6.321839080459771e-06, | |
| "loss": 0.4636, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03314837500900771, | |
| "grad_norm": 3.570418119430542, | |
| "learning_rate": 6.609195402298851e-06, | |
| "loss": 0.4917, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.034589608705051525, | |
| "grad_norm": 4.549156665802002, | |
| "learning_rate": 6.896551724137932e-06, | |
| "loss": 0.4228, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03603084240109534, | |
| "grad_norm": 6.016390800476074, | |
| "learning_rate": 7.183908045977011e-06, | |
| "loss": 0.3889, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03747207609713915, | |
| "grad_norm": 3.7417054176330566, | |
| "learning_rate": 7.4712643678160925e-06, | |
| "loss": 0.3697, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.038913309793182964, | |
| "grad_norm": 2.5890052318573, | |
| "learning_rate": 7.758620689655173e-06, | |
| "loss": 0.3944, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.04035454348922678, | |
| "grad_norm": 7.596370220184326, | |
| "learning_rate": 8.045977011494253e-06, | |
| "loss": 0.3378, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04179577718527059, | |
| "grad_norm": 2.7181951999664307, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.3575, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.0432370108813144, | |
| "grad_norm": 3.7169361114501953, | |
| "learning_rate": 8.620689655172414e-06, | |
| "loss": 0.339, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.044678244577358216, | |
| "grad_norm": 7.888335227966309, | |
| "learning_rate": 8.908045977011495e-06, | |
| "loss": 0.35, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.046119478273402036, | |
| "grad_norm": 12.077622413635254, | |
| "learning_rate": 9.195402298850575e-06, | |
| "loss": 0.3382, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04756071196944585, | |
| "grad_norm": 16.399019241333008, | |
| "learning_rate": 9.482758620689655e-06, | |
| "loss": 0.34, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.04900194566548966, | |
| "grad_norm": 7.357732772827148, | |
| "learning_rate": 9.770114942528738e-06, | |
| "loss": 0.331, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.050443179361533474, | |
| "grad_norm": 10.48930549621582, | |
| "learning_rate": 9.996965098634295e-06, | |
| "loss": 0.3058, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.05188441305757729, | |
| "grad_norm": 8.28653621673584, | |
| "learning_rate": 9.981790591805767e-06, | |
| "loss": 0.3416, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0533256467536211, | |
| "grad_norm": 3.4774363040924072, | |
| "learning_rate": 9.966616084977238e-06, | |
| "loss": 0.2923, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.05476688044966491, | |
| "grad_norm": 17.32866096496582, | |
| "learning_rate": 9.951441578148711e-06, | |
| "loss": 0.3346, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.056208114145708726, | |
| "grad_norm": 6.1874003410339355, | |
| "learning_rate": 9.936267071320182e-06, | |
| "loss": 0.3512, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.05764934784175254, | |
| "grad_norm": 7.11575984954834, | |
| "learning_rate": 9.921092564491654e-06, | |
| "loss": 0.3307, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05764934784175254, | |
| "eval_loss": 0.3466827869415283, | |
| "eval_mse": 0.3466828079223633, | |
| "eval_runtime": 3.6079, | |
| "eval_samples_per_second": 277.17, | |
| "eval_steps_per_second": 17.462, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05909058153779635, | |
| "grad_norm": 9.548958778381348, | |
| "learning_rate": 9.905918057663127e-06, | |
| "loss": 0.2987, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.060531815233840165, | |
| "grad_norm": 5.386926651000977, | |
| "learning_rate": 9.890743550834598e-06, | |
| "loss": 0.3275, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06197304892988398, | |
| "grad_norm": 3.712883710861206, | |
| "learning_rate": 9.87556904400607e-06, | |
| "loss": 0.2812, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.0634142826259278, | |
| "grad_norm": 3.6099259853363037, | |
| "learning_rate": 9.860394537177543e-06, | |
| "loss": 0.2938, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0648555163219716, | |
| "grad_norm": 4.330367088317871, | |
| "learning_rate": 9.845220030349014e-06, | |
| "loss": 0.3534, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.06629675001801542, | |
| "grad_norm": 3.1101043224334717, | |
| "learning_rate": 9.830045523520486e-06, | |
| "loss": 0.2815, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06773798371405923, | |
| "grad_norm": 2.8998003005981445, | |
| "learning_rate": 9.814871016691959e-06, | |
| "loss": 0.3151, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.06917921741010305, | |
| "grad_norm": 3.0117270946502686, | |
| "learning_rate": 9.79969650986343e-06, | |
| "loss": 0.2948, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07062045110614686, | |
| "grad_norm": 3.8122599124908447, | |
| "learning_rate": 9.784522003034902e-06, | |
| "loss": 0.303, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.07206168480219068, | |
| "grad_norm": 9.389573097229004, | |
| "learning_rate": 9.769347496206375e-06, | |
| "loss": 0.268, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07350291849823448, | |
| "grad_norm": 4.838011264801025, | |
| "learning_rate": 9.754172989377846e-06, | |
| "loss": 0.2573, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.0749441521942783, | |
| "grad_norm": 3.3773627281188965, | |
| "learning_rate": 9.738998482549317e-06, | |
| "loss": 0.3056, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07638538589032212, | |
| "grad_norm": 8.778388977050781, | |
| "learning_rate": 9.72382397572079e-06, | |
| "loss": 0.3394, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.07782661958636593, | |
| "grad_norm": 9.080647468566895, | |
| "learning_rate": 9.708649468892262e-06, | |
| "loss": 0.3213, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07926785328240975, | |
| "grad_norm": 5.629511833190918, | |
| "learning_rate": 9.693474962063733e-06, | |
| "loss": 0.3378, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.08070908697845355, | |
| "grad_norm": 5.277593612670898, | |
| "learning_rate": 9.678300455235205e-06, | |
| "loss": 0.2955, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08215032067449737, | |
| "grad_norm": 12.505846977233887, | |
| "learning_rate": 9.663125948406678e-06, | |
| "loss": 0.3378, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.08359155437054118, | |
| "grad_norm": 20.066940307617188, | |
| "learning_rate": 9.64795144157815e-06, | |
| "loss": 0.3333, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.085032788066585, | |
| "grad_norm": 9.962249755859375, | |
| "learning_rate": 9.63277693474962e-06, | |
| "loss": 0.3223, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.0864740217626288, | |
| "grad_norm": 8.705531120300293, | |
| "learning_rate": 9.617602427921094e-06, | |
| "loss": 0.2994, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0864740217626288, | |
| "eval_loss": 0.29482540488243103, | |
| "eval_mse": 0.29482541751861574, | |
| "eval_runtime": 3.5313, | |
| "eval_samples_per_second": 283.184, | |
| "eval_steps_per_second": 17.841, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08791525545867263, | |
| "grad_norm": 3.253650426864624, | |
| "learning_rate": 9.602427921092565e-06, | |
| "loss": 0.2902, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.08935648915471643, | |
| "grad_norm": 2.954401969909668, | |
| "learning_rate": 9.587253414264037e-06, | |
| "loss": 0.2631, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09079772285076025, | |
| "grad_norm": 5.84781551361084, | |
| "learning_rate": 9.57207890743551e-06, | |
| "loss": 0.3064, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.09223895654680407, | |
| "grad_norm": 6.994048595428467, | |
| "learning_rate": 9.556904400606981e-06, | |
| "loss": 0.2599, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.09368019024284788, | |
| "grad_norm": 2.8386964797973633, | |
| "learning_rate": 9.541729893778452e-06, | |
| "loss": 0.2409, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.0951214239388917, | |
| "grad_norm": 8.831405639648438, | |
| "learning_rate": 9.526555386949926e-06, | |
| "loss": 0.2823, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.0965626576349355, | |
| "grad_norm": 5.2568840980529785, | |
| "learning_rate": 9.511380880121397e-06, | |
| "loss": 0.2995, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.09800389133097932, | |
| "grad_norm": 8.514307022094727, | |
| "learning_rate": 9.496206373292868e-06, | |
| "loss": 0.2998, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09944512502702313, | |
| "grad_norm": 7.257457256317139, | |
| "learning_rate": 9.481031866464341e-06, | |
| "loss": 0.2657, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.10088635872306695, | |
| "grad_norm": 16.146055221557617, | |
| "learning_rate": 9.465857359635813e-06, | |
| "loss": 0.3007, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10232759241911075, | |
| "grad_norm": 9.077672958374023, | |
| "learning_rate": 9.450682852807284e-06, | |
| "loss": 0.2855, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.10376882611515457, | |
| "grad_norm": 2.436467170715332, | |
| "learning_rate": 9.435508345978757e-06, | |
| "loss": 0.2809, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.10521005981119838, | |
| "grad_norm": 3.1497037410736084, | |
| "learning_rate": 9.420333839150229e-06, | |
| "loss": 0.2773, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1066512935072422, | |
| "grad_norm": 11.72987174987793, | |
| "learning_rate": 9.4051593323217e-06, | |
| "loss": 0.2475, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.108092527203286, | |
| "grad_norm": 7.571060657501221, | |
| "learning_rate": 9.389984825493173e-06, | |
| "loss": 0.3025, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.10953376089932983, | |
| "grad_norm": 4.485091686248779, | |
| "learning_rate": 9.374810318664645e-06, | |
| "loss": 0.2742, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.11097499459537365, | |
| "grad_norm": 9.997501373291016, | |
| "learning_rate": 9.359635811836116e-06, | |
| "loss": 0.2845, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.11241622829141745, | |
| "grad_norm": 8.28499984741211, | |
| "learning_rate": 9.344461305007587e-06, | |
| "loss": 0.2786, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.11385746198746127, | |
| "grad_norm": 7.368741512298584, | |
| "learning_rate": 9.32928679817906e-06, | |
| "loss": 0.2712, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.11529869568350508, | |
| "grad_norm": 2.7350659370422363, | |
| "learning_rate": 9.314112291350532e-06, | |
| "loss": 0.2813, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11529869568350508, | |
| "eval_loss": 0.27986839413642883, | |
| "eval_mse": 0.27986840057373047, | |
| "eval_runtime": 3.5237, | |
| "eval_samples_per_second": 283.794, | |
| "eval_steps_per_second": 17.879, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1167399293795489, | |
| "grad_norm": 6.319039344787598, | |
| "learning_rate": 9.298937784522003e-06, | |
| "loss": 0.2822, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1181811630755927, | |
| "grad_norm": 3.2821357250213623, | |
| "learning_rate": 9.283763277693477e-06, | |
| "loss": 0.2529, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.11962239677163652, | |
| "grad_norm": 10.913554191589355, | |
| "learning_rate": 9.268588770864948e-06, | |
| "loss": 0.2906, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.12106363046768033, | |
| "grad_norm": 2.793478488922119, | |
| "learning_rate": 9.25341426403642e-06, | |
| "loss": 0.2947, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.12250486416372415, | |
| "grad_norm": 7.466792583465576, | |
| "learning_rate": 9.238239757207892e-06, | |
| "loss": 0.3253, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.12394609785976796, | |
| "grad_norm": 3.626875877380371, | |
| "learning_rate": 9.223065250379364e-06, | |
| "loss": 0.2886, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.12538733155581178, | |
| "grad_norm": 3.203030586242676, | |
| "learning_rate": 9.207890743550835e-06, | |
| "loss": 0.2855, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.1268285652518556, | |
| "grad_norm": 5.602776050567627, | |
| "learning_rate": 9.192716236722308e-06, | |
| "loss": 0.2634, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1282697989478994, | |
| "grad_norm": 4.7141804695129395, | |
| "learning_rate": 9.17754172989378e-06, | |
| "loss": 0.2729, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.1297110326439432, | |
| "grad_norm": 3.62369704246521, | |
| "learning_rate": 9.162367223065251e-06, | |
| "loss": 0.2595, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.13115226633998703, | |
| "grad_norm": 7.390626907348633, | |
| "learning_rate": 9.147192716236724e-06, | |
| "loss": 0.2729, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.13259350003603085, | |
| "grad_norm": 5.839654445648193, | |
| "learning_rate": 9.132018209408196e-06, | |
| "loss": 0.3046, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.13403473373207467, | |
| "grad_norm": 5.694056987762451, | |
| "learning_rate": 9.116843702579667e-06, | |
| "loss": 0.315, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.13547596742811846, | |
| "grad_norm": 5.082228660583496, | |
| "learning_rate": 9.10166919575114e-06, | |
| "loss": 0.2727, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.13691720112416228, | |
| "grad_norm": 16.798309326171875, | |
| "learning_rate": 9.08649468892261e-06, | |
| "loss": 0.2771, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.1383584348202061, | |
| "grad_norm": 6.507099151611328, | |
| "learning_rate": 9.071320182094083e-06, | |
| "loss": 0.2788, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13979966851624992, | |
| "grad_norm": 6.645677089691162, | |
| "learning_rate": 9.056145675265554e-06, | |
| "loss": 0.2655, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.1412409022122937, | |
| "grad_norm": 8.834277153015137, | |
| "learning_rate": 9.040971168437026e-06, | |
| "loss": 0.306, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.14268213590833753, | |
| "grad_norm": 7.980821132659912, | |
| "learning_rate": 9.025796661608497e-06, | |
| "loss": 0.2498, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.14412336960438135, | |
| "grad_norm": 6.141432285308838, | |
| "learning_rate": 9.01062215477997e-06, | |
| "loss": 0.2707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14412336960438135, | |
| "eval_loss": 0.3016921281814575, | |
| "eval_mse": 0.30169212198257445, | |
| "eval_runtime": 3.5058, | |
| "eval_samples_per_second": 285.241, | |
| "eval_steps_per_second": 17.97, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14556460330042517, | |
| "grad_norm": 3.130420446395874, | |
| "learning_rate": 8.995447647951442e-06, | |
| "loss": 0.2848, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.14700583699646896, | |
| "grad_norm": 3.3286259174346924, | |
| "learning_rate": 8.980273141122913e-06, | |
| "loss": 0.2534, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.14844707069251278, | |
| "grad_norm": 4.090871334075928, | |
| "learning_rate": 8.965098634294386e-06, | |
| "loss": 0.2423, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.1498883043885566, | |
| "grad_norm": 3.3191046714782715, | |
| "learning_rate": 8.949924127465858e-06, | |
| "loss": 0.2641, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.15132953808460042, | |
| "grad_norm": 4.689390182495117, | |
| "learning_rate": 8.934749620637329e-06, | |
| "loss": 0.2966, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.15277077178064424, | |
| "grad_norm": 12.92320728302002, | |
| "learning_rate": 8.919575113808802e-06, | |
| "loss": 0.2754, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.15421200547668804, | |
| "grad_norm": 6.917271137237549, | |
| "learning_rate": 8.904400606980273e-06, | |
| "loss": 0.2606, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.15565323917273186, | |
| "grad_norm": 11.144198417663574, | |
| "learning_rate": 8.889226100151745e-06, | |
| "loss": 0.2724, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.15709447286877568, | |
| "grad_norm": 8.833760261535645, | |
| "learning_rate": 8.874051593323218e-06, | |
| "loss": 0.2824, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.1585357065648195, | |
| "grad_norm": 10.618779182434082, | |
| "learning_rate": 8.85887708649469e-06, | |
| "loss": 0.2735, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1599769402608633, | |
| "grad_norm": 6.135279655456543, | |
| "learning_rate": 8.84370257966616e-06, | |
| "loss": 0.2704, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.1614181739569071, | |
| "grad_norm": 9.724879264831543, | |
| "learning_rate": 8.828528072837634e-06, | |
| "loss": 0.3164, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.16285940765295093, | |
| "grad_norm": 19.34429168701172, | |
| "learning_rate": 8.813353566009105e-06, | |
| "loss": 0.3358, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.16430064134899475, | |
| "grad_norm": 3.7202112674713135, | |
| "learning_rate": 8.798179059180577e-06, | |
| "loss": 0.2559, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.16574187504503854, | |
| "grad_norm": 14.137849807739258, | |
| "learning_rate": 8.78300455235205e-06, | |
| "loss": 0.3265, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.16718310874108236, | |
| "grad_norm": 13.450931549072266, | |
| "learning_rate": 8.767830045523521e-06, | |
| "loss": 0.2847, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.16862434243712618, | |
| "grad_norm": 11.542911529541016, | |
| "learning_rate": 8.752655538694993e-06, | |
| "loss": 0.2993, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.17006557613317, | |
| "grad_norm": 2.6248021125793457, | |
| "learning_rate": 8.737481031866466e-06, | |
| "loss": 0.2237, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.17150680982921382, | |
| "grad_norm": 2.968738079071045, | |
| "learning_rate": 8.722306525037937e-06, | |
| "loss": 0.2788, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.1729480435252576, | |
| "grad_norm": 3.263702869415283, | |
| "learning_rate": 8.707132018209408e-06, | |
| "loss": 0.2506, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1729480435252576, | |
| "eval_loss": 0.26987460255622864, | |
| "eval_mse": 0.26987459245696666, | |
| "eval_runtime": 3.7034, | |
| "eval_samples_per_second": 270.023, | |
| "eval_steps_per_second": 17.011, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.17438927722130143, | |
| "grad_norm": 4.830836772918701, | |
| "learning_rate": 8.69195751138088e-06, | |
| "loss": 0.2604, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.17583051091734525, | |
| "grad_norm": 9.858766555786133, | |
| "learning_rate": 8.676783004552353e-06, | |
| "loss": 0.2707, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.17727174461338907, | |
| "grad_norm": 14.384836196899414, | |
| "learning_rate": 8.661608497723824e-06, | |
| "loss": 0.2804, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.17871297830943286, | |
| "grad_norm": 3.5715346336364746, | |
| "learning_rate": 8.646433990895296e-06, | |
| "loss": 0.2705, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.18015421200547668, | |
| "grad_norm": 3.551644802093506, | |
| "learning_rate": 8.631259484066769e-06, | |
| "loss": 0.2455, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.1815954457015205, | |
| "grad_norm": 5.014893054962158, | |
| "learning_rate": 8.61608497723824e-06, | |
| "loss": 0.2518, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.18303667939756432, | |
| "grad_norm": 8.507904052734375, | |
| "learning_rate": 8.600910470409712e-06, | |
| "loss": 0.2794, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.18447791309360814, | |
| "grad_norm": 3.0522546768188477, | |
| "learning_rate": 8.585735963581185e-06, | |
| "loss": 0.2676, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.18591914678965193, | |
| "grad_norm": 5.574901580810547, | |
| "learning_rate": 8.570561456752656e-06, | |
| "loss": 0.2909, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.18736038048569575, | |
| "grad_norm": 4.334587097167969, | |
| "learning_rate": 8.555386949924128e-06, | |
| "loss": 0.2427, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.18880161418173957, | |
| "grad_norm": 3.7379872798919678, | |
| "learning_rate": 8.5402124430956e-06, | |
| "loss": 0.2761, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.1902428478777834, | |
| "grad_norm": 12.186504364013672, | |
| "learning_rate": 8.525037936267072e-06, | |
| "loss": 0.2533, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.1916840815738272, | |
| "grad_norm": 3.4297823905944824, | |
| "learning_rate": 8.509863429438544e-06, | |
| "loss": 0.2627, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.193125315269871, | |
| "grad_norm": 6.119399547576904, | |
| "learning_rate": 8.494688922610017e-06, | |
| "loss": 0.284, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.19456654896591483, | |
| "grad_norm": 3.3624227046966553, | |
| "learning_rate": 8.479514415781488e-06, | |
| "loss": 0.232, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.19600778266195865, | |
| "grad_norm": 3.575873374938965, | |
| "learning_rate": 8.46433990895296e-06, | |
| "loss": 0.2472, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.19744901635800244, | |
| "grad_norm": 4.15811014175415, | |
| "learning_rate": 8.449165402124433e-06, | |
| "loss": 0.2802, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.19889025005404626, | |
| "grad_norm": 10.572412490844727, | |
| "learning_rate": 8.433990895295904e-06, | |
| "loss": 0.239, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.20033148375009008, | |
| "grad_norm": 4.094681262969971, | |
| "learning_rate": 8.418816388467375e-06, | |
| "loss": 0.2569, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.2017727174461339, | |
| "grad_norm": 7.369691371917725, | |
| "learning_rate": 8.403641881638848e-06, | |
| "loss": 0.2584, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2017727174461339, | |
| "eval_loss": 0.2632690966129303, | |
| "eval_mse": 0.26326909382641317, | |
| "eval_runtime": 3.532, | |
| "eval_samples_per_second": 283.128, | |
| "eval_steps_per_second": 17.837, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.20321395114217772, | |
| "grad_norm": 5.504802703857422, | |
| "learning_rate": 8.38846737481032e-06, | |
| "loss": 0.2701, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.2046551848382215, | |
| "grad_norm": 2.7906060218811035, | |
| "learning_rate": 8.373292867981791e-06, | |
| "loss": 0.2547, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.20609641853426533, | |
| "grad_norm": 4.351840496063232, | |
| "learning_rate": 8.358118361153263e-06, | |
| "loss": 0.2734, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.20753765223030915, | |
| "grad_norm": 4.175307750701904, | |
| "learning_rate": 8.342943854324736e-06, | |
| "loss": 0.282, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.20897888592635297, | |
| "grad_norm": 6.214290142059326, | |
| "learning_rate": 8.327769347496207e-06, | |
| "loss": 0.2786, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.21042011962239676, | |
| "grad_norm": 4.1190505027771, | |
| "learning_rate": 8.312594840667679e-06, | |
| "loss": 0.2671, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.21186135331844058, | |
| "grad_norm": 9.297944068908691, | |
| "learning_rate": 8.297420333839152e-06, | |
| "loss": 0.2467, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.2133025870144844, | |
| "grad_norm": 10.469265937805176, | |
| "learning_rate": 8.282245827010623e-06, | |
| "loss": 0.2941, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.21474382071052822, | |
| "grad_norm": 8.109935760498047, | |
| "learning_rate": 8.267071320182094e-06, | |
| "loss": 0.2759, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.216185054406572, | |
| "grad_norm": 7.648597717285156, | |
| "learning_rate": 8.251896813353568e-06, | |
| "loss": 0.2568, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.21762628810261583, | |
| "grad_norm": 5.525659084320068, | |
| "learning_rate": 8.236722306525039e-06, | |
| "loss": 0.275, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.21906752179865965, | |
| "grad_norm": 4.415822505950928, | |
| "learning_rate": 8.22154779969651e-06, | |
| "loss": 0.2663, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.22050875549470347, | |
| "grad_norm": 5.961898326873779, | |
| "learning_rate": 8.206373292867983e-06, | |
| "loss": 0.2702, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.2219499891907473, | |
| "grad_norm": 7.850853443145752, | |
| "learning_rate": 8.191198786039455e-06, | |
| "loss": 0.2699, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.22339122288679109, | |
| "grad_norm": 7.393783092498779, | |
| "learning_rate": 8.176024279210926e-06, | |
| "loss": 0.2571, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.2248324565828349, | |
| "grad_norm": 4.719627380371094, | |
| "learning_rate": 8.1608497723824e-06, | |
| "loss": 0.2531, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.22627369027887873, | |
| "grad_norm": 2.988987684249878, | |
| "learning_rate": 8.14567526555387e-06, | |
| "loss": 0.2608, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.22771492397492255, | |
| "grad_norm": 3.3279573917388916, | |
| "learning_rate": 8.130500758725342e-06, | |
| "loss": 0.255, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.22915615767096634, | |
| "grad_norm": 5.147717475891113, | |
| "learning_rate": 8.115326251896815e-06, | |
| "loss": 0.2765, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.23059739136701016, | |
| "grad_norm": 3.649806022644043, | |
| "learning_rate": 8.100151745068287e-06, | |
| "loss": 0.2603, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.23059739136701016, | |
| "eval_loss": 0.2433857023715973, | |
| "eval_mse": 0.2433856954583898, | |
| "eval_runtime": 3.5848, | |
| "eval_samples_per_second": 278.954, | |
| "eval_steps_per_second": 17.574, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.23203862506305398, | |
| "grad_norm": 6.429337501525879, | |
| "learning_rate": 8.084977238239758e-06, | |
| "loss": 0.2499, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.2334798587590978, | |
| "grad_norm": 12.670327186584473, | |
| "learning_rate": 8.06980273141123e-06, | |
| "loss": 0.2675, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2349210924551416, | |
| "grad_norm": 8.6768159866333, | |
| "learning_rate": 8.054628224582701e-06, | |
| "loss": 0.2406, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.2363623261511854, | |
| "grad_norm": 7.374031066894531, | |
| "learning_rate": 8.039453717754174e-06, | |
| "loss": 0.2762, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.23780355984722923, | |
| "grad_norm": 5.246163845062256, | |
| "learning_rate": 8.024279210925645e-06, | |
| "loss": 0.2587, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.23924479354327305, | |
| "grad_norm": 4.083150386810303, | |
| "learning_rate": 8.009104704097117e-06, | |
| "loss": 0.2795, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.24068602723931687, | |
| "grad_norm": 4.8549065589904785, | |
| "learning_rate": 7.993930197268588e-06, | |
| "loss": 0.2625, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.24212726093536066, | |
| "grad_norm": 2.8547518253326416, | |
| "learning_rate": 7.978755690440061e-06, | |
| "loss": 0.273, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.24356849463140448, | |
| "grad_norm": 3.599306106567383, | |
| "learning_rate": 7.963581183611533e-06, | |
| "loss": 0.2595, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.2450097283274483, | |
| "grad_norm": 4.009678840637207, | |
| "learning_rate": 7.948406676783004e-06, | |
| "loss": 0.2725, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.24645096202349212, | |
| "grad_norm": 2.623904228210449, | |
| "learning_rate": 7.933232169954477e-06, | |
| "loss": 0.2577, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.2478921957195359, | |
| "grad_norm": 3.4143848419189453, | |
| "learning_rate": 7.918057663125949e-06, | |
| "loss": 0.2345, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.24933342941557973, | |
| "grad_norm": 3.2756216526031494, | |
| "learning_rate": 7.90288315629742e-06, | |
| "loss": 0.2609, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.25077466311162355, | |
| "grad_norm": 6.703480243682861, | |
| "learning_rate": 7.887708649468893e-06, | |
| "loss": 0.273, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2522158968076674, | |
| "grad_norm": 3.215533494949341, | |
| "learning_rate": 7.872534142640365e-06, | |
| "loss": 0.2448, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.2536571305037112, | |
| "grad_norm": 3.3647091388702393, | |
| "learning_rate": 7.857359635811836e-06, | |
| "loss": 0.239, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.255098364199755, | |
| "grad_norm": 5.089346408843994, | |
| "learning_rate": 7.842185128983309e-06, | |
| "loss": 0.26, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.2565395978957988, | |
| "grad_norm": 3.7883851528167725, | |
| "learning_rate": 7.82701062215478e-06, | |
| "loss": 0.2573, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2579808315918426, | |
| "grad_norm": 3.3582770824432373, | |
| "learning_rate": 7.811836115326252e-06, | |
| "loss": 0.2462, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.2594220652878864, | |
| "grad_norm": 3.166255235671997, | |
| "learning_rate": 7.796661608497725e-06, | |
| "loss": 0.2973, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2594220652878864, | |
| "eval_loss": 0.2394440621137619, | |
| "eval_mse": 0.23944407220184802, | |
| "eval_runtime": 3.4937, | |
| "eval_samples_per_second": 286.233, | |
| "eval_steps_per_second": 18.033, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.26086329898393024, | |
| "grad_norm": 9.669307708740234, | |
| "learning_rate": 7.781487101669196e-06, | |
| "loss": 0.2588, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.26230453267997406, | |
| "grad_norm": 3.7549054622650146, | |
| "learning_rate": 7.766312594840668e-06, | |
| "loss": 0.2646, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2637457663760179, | |
| "grad_norm": 8.603379249572754, | |
| "learning_rate": 7.75113808801214e-06, | |
| "loss": 0.2425, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.2651870000720617, | |
| "grad_norm": 3.408053398132324, | |
| "learning_rate": 7.735963581183612e-06, | |
| "loss": 0.2323, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2666282337681055, | |
| "grad_norm": 14.35401725769043, | |
| "learning_rate": 7.720789074355084e-06, | |
| "loss": 0.251, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.26806946746414934, | |
| "grad_norm": 6.7734599113464355, | |
| "learning_rate": 7.705614567526557e-06, | |
| "loss": 0.2523, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2695107011601931, | |
| "grad_norm": 7.644830226898193, | |
| "learning_rate": 7.690440060698028e-06, | |
| "loss": 0.2574, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.2709519348562369, | |
| "grad_norm": 4.449583053588867, | |
| "learning_rate": 7.6752655538695e-06, | |
| "loss": 0.2885, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.27239316855228074, | |
| "grad_norm": 3.3101038932800293, | |
| "learning_rate": 7.660091047040971e-06, | |
| "loss": 0.2893, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.27383440224832456, | |
| "grad_norm": 11.52640151977539, | |
| "learning_rate": 7.644916540212444e-06, | |
| "loss": 0.305, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2752756359443684, | |
| "grad_norm": 11.3882474899292, | |
| "learning_rate": 7.629742033383915e-06, | |
| "loss": 0.2692, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.2767168696404122, | |
| "grad_norm": 4.39008903503418, | |
| "learning_rate": 7.614567526555388e-06, | |
| "loss": 0.2551, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.278158103336456, | |
| "grad_norm": 7.699972629547119, | |
| "learning_rate": 7.599393019726859e-06, | |
| "loss": 0.2684, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.27959933703249984, | |
| "grad_norm": 10.91154956817627, | |
| "learning_rate": 7.584218512898331e-06, | |
| "loss": 0.3027, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.28104057072854366, | |
| "grad_norm": 3.525420904159546, | |
| "learning_rate": 7.569044006069804e-06, | |
| "loss": 0.2406, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.2824818044245874, | |
| "grad_norm": 12.457540512084961, | |
| "learning_rate": 7.553869499241275e-06, | |
| "loss": 0.2964, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.28392303812063124, | |
| "grad_norm": 10.155010223388672, | |
| "learning_rate": 7.538694992412747e-06, | |
| "loss": 0.2561, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.28536427181667506, | |
| "grad_norm": 4.487485408782959, | |
| "learning_rate": 7.5235204855842195e-06, | |
| "loss": 0.2575, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2868055055127189, | |
| "grad_norm": 3.443803071975708, | |
| "learning_rate": 7.508345978755691e-06, | |
| "loss": 0.2915, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.2882467392087627, | |
| "grad_norm": 3.3720641136169434, | |
| "learning_rate": 7.493171471927163e-06, | |
| "loss": 0.2541, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2882467392087627, | |
| "eval_loss": 0.23559316992759705, | |
| "eval_mse": 0.23559318256378173, | |
| "eval_runtime": 3.5793, | |
| "eval_samples_per_second": 279.383, | |
| "eval_steps_per_second": 17.601, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2896879729048065, | |
| "grad_norm": 2.868006467819214, | |
| "learning_rate": 7.477996965098635e-06, | |
| "loss": 0.2541, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.29112920660085034, | |
| "grad_norm": 8.352180480957031, | |
| "learning_rate": 7.462822458270107e-06, | |
| "loss": 0.347, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.29257044029689416, | |
| "grad_norm": 3.547825813293457, | |
| "learning_rate": 7.447647951441579e-06, | |
| "loss": 0.2627, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.2940116739929379, | |
| "grad_norm": 7.52193021774292, | |
| "learning_rate": 7.4324734446130505e-06, | |
| "loss": 0.2261, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.29545290768898175, | |
| "grad_norm": 3.890550374984741, | |
| "learning_rate": 7.417298937784523e-06, | |
| "loss": 0.2755, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.29689414138502557, | |
| "grad_norm": 3.059263229370117, | |
| "learning_rate": 7.402124430955995e-06, | |
| "loss": 0.2643, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.2983353750810694, | |
| "grad_norm": 3.5128462314605713, | |
| "learning_rate": 7.386949924127466e-06, | |
| "loss": 0.2388, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.2997766087771132, | |
| "grad_norm": 4.26191520690918, | |
| "learning_rate": 7.371775417298939e-06, | |
| "loss": 0.2605, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.301217842473157, | |
| "grad_norm": 3.458613157272339, | |
| "learning_rate": 7.356600910470411e-06, | |
| "loss": 0.2461, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.30265907616920085, | |
| "grad_norm": 11.277898788452148, | |
| "learning_rate": 7.341426403641882e-06, | |
| "loss": 0.24, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.30410030986524467, | |
| "grad_norm": 3.508758544921875, | |
| "learning_rate": 7.3262518968133545e-06, | |
| "loss": 0.2536, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.3055415435612885, | |
| "grad_norm": 6.12369441986084, | |
| "learning_rate": 7.311077389984827e-06, | |
| "loss": 0.2553, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.30698277725733225, | |
| "grad_norm": 8.630524635314941, | |
| "learning_rate": 7.295902883156298e-06, | |
| "loss": 0.239, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.30842401095337607, | |
| "grad_norm": 6.543661117553711, | |
| "learning_rate": 7.2807283763277704e-06, | |
| "loss": 0.2436, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3098652446494199, | |
| "grad_norm": 2.481372594833374, | |
| "learning_rate": 7.265553869499242e-06, | |
| "loss": 0.2497, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.3113064783454637, | |
| "grad_norm": 2.966479539871216, | |
| "learning_rate": 7.250379362670714e-06, | |
| "loss": 0.2792, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.31274771204150753, | |
| "grad_norm": 2.8353137969970703, | |
| "learning_rate": 7.235204855842186e-06, | |
| "loss": 0.2745, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.31418894573755135, | |
| "grad_norm": 3.3317484855651855, | |
| "learning_rate": 7.220030349013658e-06, | |
| "loss": 0.2748, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.31563017943359517, | |
| "grad_norm": 9.185257911682129, | |
| "learning_rate": 7.20485584218513e-06, | |
| "loss": 0.2659, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.317071413129639, | |
| "grad_norm": 3.2732787132263184, | |
| "learning_rate": 7.189681335356602e-06, | |
| "loss": 0.2837, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.317071413129639, | |
| "eval_loss": 0.2436872273683548, | |
| "eval_mse": 0.24368724367022515, | |
| "eval_runtime": 3.6657, | |
| "eval_samples_per_second": 272.802, | |
| "eval_steps_per_second": 17.187, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3185126468256828, | |
| "grad_norm": 7.124091625213623, | |
| "learning_rate": 7.174506828528074e-06, | |
| "loss": 0.2601, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.3199538805217266, | |
| "grad_norm": 2.949673652648926, | |
| "learning_rate": 7.159332321699546e-06, | |
| "loss": 0.2378, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.3213951142177704, | |
| "grad_norm": 6.16537618637085, | |
| "learning_rate": 7.144157814871018e-06, | |
| "loss": 0.2681, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.3228363479138142, | |
| "grad_norm": 10.283601760864258, | |
| "learning_rate": 7.1289833080424896e-06, | |
| "loss": 0.2537, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.32427758160985803, | |
| "grad_norm": 8.933276176452637, | |
| "learning_rate": 7.113808801213962e-06, | |
| "loss": 0.2522, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.32571881530590185, | |
| "grad_norm": 6.161346435546875, | |
| "learning_rate": 7.098634294385432e-06, | |
| "loss": 0.2717, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3271600490019457, | |
| "grad_norm": 3.492527961730957, | |
| "learning_rate": 7.083459787556905e-06, | |
| "loss": 0.2691, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.3286012826979895, | |
| "grad_norm": 11.494355201721191, | |
| "learning_rate": 7.068285280728376e-06, | |
| "loss": 0.2508, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.3300425163940333, | |
| "grad_norm": 5.227607250213623, | |
| "learning_rate": 7.053110773899848e-06, | |
| "loss": 0.2791, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.3314837500900771, | |
| "grad_norm": 9.131232261657715, | |
| "learning_rate": 7.0379362670713205e-06, | |
| "loss": 0.2617, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3329249837861209, | |
| "grad_norm": 8.7017183303833, | |
| "learning_rate": 7.022761760242792e-06, | |
| "loss": 0.2456, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.3343662174821647, | |
| "grad_norm": 10.420509338378906, | |
| "learning_rate": 7.007587253414264e-06, | |
| "loss": 0.2494, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.33580745117820854, | |
| "grad_norm": 10.890824317932129, | |
| "learning_rate": 6.9924127465857364e-06, | |
| "loss": 0.2571, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.33724868487425236, | |
| "grad_norm": 7.0846428871154785, | |
| "learning_rate": 6.977238239757208e-06, | |
| "loss": 0.2786, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3386899185702962, | |
| "grad_norm": 11.748177528381348, | |
| "learning_rate": 6.96206373292868e-06, | |
| "loss": 0.243, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.34013115226634, | |
| "grad_norm": 3.483414888381958, | |
| "learning_rate": 6.946889226100152e-06, | |
| "loss": 0.2426, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3415723859623838, | |
| "grad_norm": 8.315218925476074, | |
| "learning_rate": 6.931714719271624e-06, | |
| "loss": 0.2457, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.34301361965842764, | |
| "grad_norm": 3.617290496826172, | |
| "learning_rate": 6.916540212443096e-06, | |
| "loss": 0.2278, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3444548533544714, | |
| "grad_norm": 3.7248239517211914, | |
| "learning_rate": 6.901365705614567e-06, | |
| "loss": 0.2359, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.3458960870505152, | |
| "grad_norm": 4.484503269195557, | |
| "learning_rate": 6.88619119878604e-06, | |
| "loss": 0.242, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3458960870505152, | |
| "eval_loss": 0.23793897032737732, | |
| "eval_mse": 0.237938963919878, | |
| "eval_runtime": 3.5407, | |
| "eval_samples_per_second": 282.427, | |
| "eval_steps_per_second": 17.793, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.34733732074655904, | |
| "grad_norm": 3.734285593032837, | |
| "learning_rate": 6.871016691957512e-06, | |
| "loss": 0.2488, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.34877855444260286, | |
| "grad_norm": 2.971663475036621, | |
| "learning_rate": 6.855842185128983e-06, | |
| "loss": 0.2463, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3502197881386467, | |
| "grad_norm": 20.05165672302246, | |
| "learning_rate": 6.8406676783004556e-06, | |
| "loss": 0.2428, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.3516610218346905, | |
| "grad_norm": 2.946723461151123, | |
| "learning_rate": 6.825493171471928e-06, | |
| "loss": 0.252, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3531022555307343, | |
| "grad_norm": 3.993689775466919, | |
| "learning_rate": 6.810318664643399e-06, | |
| "loss": 0.2588, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.35454348922677814, | |
| "grad_norm": 3.7553279399871826, | |
| "learning_rate": 6.7951441578148715e-06, | |
| "loss": 0.2432, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.35598472292282196, | |
| "grad_norm": 10.069433212280273, | |
| "learning_rate": 6.779969650986343e-06, | |
| "loss": 0.24, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.3574259566188657, | |
| "grad_norm": 3.12581205368042, | |
| "learning_rate": 6.764795144157815e-06, | |
| "loss": 0.2396, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.35886719031490955, | |
| "grad_norm": 3.955724000930786, | |
| "learning_rate": 6.749620637329287e-06, | |
| "loss": 0.2582, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.36030842401095337, | |
| "grad_norm": 5.299466133117676, | |
| "learning_rate": 6.734446130500759e-06, | |
| "loss": 0.2537, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3617496577069972, | |
| "grad_norm": 3.8833680152893066, | |
| "learning_rate": 6.719271623672231e-06, | |
| "loss": 0.2178, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.363190891403041, | |
| "grad_norm": 3.391704559326172, | |
| "learning_rate": 6.704097116843703e-06, | |
| "loss": 0.2616, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.3646321250990848, | |
| "grad_norm": 5.185522079467773, | |
| "learning_rate": 6.688922610015175e-06, | |
| "loss": 0.2529, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.36607335879512864, | |
| "grad_norm": 7.748248100280762, | |
| "learning_rate": 6.673748103186647e-06, | |
| "loss": 0.2918, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.36751459249117246, | |
| "grad_norm": 6.879116535186768, | |
| "learning_rate": 6.658573596358119e-06, | |
| "loss": 0.2522, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.3689558261872163, | |
| "grad_norm": 5.699965476989746, | |
| "learning_rate": 6.643399089529591e-06, | |
| "loss": 0.2467, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.37039705988326005, | |
| "grad_norm": 6.8762617111206055, | |
| "learning_rate": 6.628224582701063e-06, | |
| "loss": 0.247, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.37183829357930387, | |
| "grad_norm": 3.2006216049194336, | |
| "learning_rate": 6.613050075872534e-06, | |
| "loss": 0.235, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3732795272753477, | |
| "grad_norm": 5.277589321136475, | |
| "learning_rate": 6.5978755690440065e-06, | |
| "loss": 0.2583, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.3747207609713915, | |
| "grad_norm": 3.9958581924438477, | |
| "learning_rate": 6.582701062215479e-06, | |
| "loss": 0.2379, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3747207609713915, | |
| "eval_loss": 0.2270413339138031, | |
| "eval_mse": 0.2270413387455046, | |
| "eval_runtime": 3.4467, | |
| "eval_samples_per_second": 290.129, | |
| "eval_steps_per_second": 18.278, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.37616199466743533, | |
| "grad_norm": 3.3491854667663574, | |
| "learning_rate": 6.56752655538695e-06, | |
| "loss": 0.2653, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.37760322836347915, | |
| "grad_norm": 5.074587345123291, | |
| "learning_rate": 6.552352048558422e-06, | |
| "loss": 0.2377, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.37904446205952297, | |
| "grad_norm": 7.423491477966309, | |
| "learning_rate": 6.537177541729895e-06, | |
| "loss": 0.2854, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.3804856957555668, | |
| "grad_norm": 14.916816711425781, | |
| "learning_rate": 6.522003034901366e-06, | |
| "loss": 0.2513, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.38192692945161055, | |
| "grad_norm": 5.700295448303223, | |
| "learning_rate": 6.506828528072838e-06, | |
| "loss": 0.2609, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.3833681631476544, | |
| "grad_norm": 3.671921968460083, | |
| "learning_rate": 6.4916540212443106e-06, | |
| "loss": 0.2536, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.3848093968436982, | |
| "grad_norm": 7.694835186004639, | |
| "learning_rate": 6.476479514415782e-06, | |
| "loss": 0.2482, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.386250630539742, | |
| "grad_norm": 6.534417629241943, | |
| "learning_rate": 6.461305007587254e-06, | |
| "loss": 0.2571, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.38769186423578583, | |
| "grad_norm": 4.938977241516113, | |
| "learning_rate": 6.446130500758726e-06, | |
| "loss": 0.2513, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.38913309793182965, | |
| "grad_norm": 4.277524471282959, | |
| "learning_rate": 6.430955993930198e-06, | |
| "loss": 0.2433, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3905743316278735, | |
| "grad_norm": 10.038058280944824, | |
| "learning_rate": 6.41578148710167e-06, | |
| "loss": 0.2622, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.3920155653239173, | |
| "grad_norm": 7.558711051940918, | |
| "learning_rate": 6.4006069802731415e-06, | |
| "loss": 0.2434, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.3934567990199611, | |
| "grad_norm": 9.363914489746094, | |
| "learning_rate": 6.385432473444614e-06, | |
| "loss": 0.2491, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.3948980327160049, | |
| "grad_norm": 7.465854644775391, | |
| "learning_rate": 6.370257966616086e-06, | |
| "loss": 0.2545, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.3963392664120487, | |
| "grad_norm": 6.826533794403076, | |
| "learning_rate": 6.3550834597875574e-06, | |
| "loss": 0.2912, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.3977805001080925, | |
| "grad_norm": 6.965171813964844, | |
| "learning_rate": 6.33990895295903e-06, | |
| "loss": 0.2773, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.39922173380413634, | |
| "grad_norm": 3.642481565475464, | |
| "learning_rate": 6.324734446130502e-06, | |
| "loss": 0.245, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.40066296750018016, | |
| "grad_norm": 3.2742958068847656, | |
| "learning_rate": 6.309559939301973e-06, | |
| "loss": 0.2422, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.402104201196224, | |
| "grad_norm": 3.7424025535583496, | |
| "learning_rate": 6.294385432473446e-06, | |
| "loss": 0.247, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.4035454348922678, | |
| "grad_norm": 8.01791763305664, | |
| "learning_rate": 6.279210925644917e-06, | |
| "loss": 0.23, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4035454348922678, | |
| "eval_loss": 0.23571762442588806, | |
| "eval_mse": 0.23571763192489742, | |
| "eval_runtime": 3.6319, | |
| "eval_samples_per_second": 275.335, | |
| "eval_steps_per_second": 17.346, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4049866685883116, | |
| "grad_norm": 11.114055633544922, | |
| "learning_rate": 6.264036418816389e-06, | |
| "loss": 0.2732, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.40642790228435544, | |
| "grad_norm": 4.4829020500183105, | |
| "learning_rate": 6.2488619119878615e-06, | |
| "loss": 0.2467, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.4078691359803992, | |
| "grad_norm": 9.322883605957031, | |
| "learning_rate": 6.233687405159333e-06, | |
| "loss": 0.2525, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.409310369676443, | |
| "grad_norm": 4.860049247741699, | |
| "learning_rate": 6.218512898330805e-06, | |
| "loss": 0.2567, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.41075160337248684, | |
| "grad_norm": 3.1241393089294434, | |
| "learning_rate": 6.203338391502277e-06, | |
| "loss": 0.2244, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.41219283706853066, | |
| "grad_norm": 5.4952263832092285, | |
| "learning_rate": 6.188163884673749e-06, | |
| "loss": 0.2841, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.4136340707645745, | |
| "grad_norm": 11.66360092163086, | |
| "learning_rate": 6.172989377845221e-06, | |
| "loss": 0.2669, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.4150753044606183, | |
| "grad_norm": 5.991528511047363, | |
| "learning_rate": 6.157814871016693e-06, | |
| "loss": 0.2819, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.4165165381566621, | |
| "grad_norm": 4.639535903930664, | |
| "learning_rate": 6.142640364188165e-06, | |
| "loss": 0.2536, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.41795777185270594, | |
| "grad_norm": 15.960262298583984, | |
| "learning_rate": 6.127465857359637e-06, | |
| "loss": 0.2553, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4193990055487497, | |
| "grad_norm": 2.875967025756836, | |
| "learning_rate": 6.112291350531108e-06, | |
| "loss": 0.2521, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.4208402392447935, | |
| "grad_norm": 8.322701454162598, | |
| "learning_rate": 6.09711684370258e-06, | |
| "loss": 0.2637, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.42228147294083734, | |
| "grad_norm": 10.47356128692627, | |
| "learning_rate": 6.081942336874051e-06, | |
| "loss": 0.247, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.42372270663688116, | |
| "grad_norm": 4.5936598777771, | |
| "learning_rate": 6.0667678300455234e-06, | |
| "loss": 0.2284, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.425163940332925, | |
| "grad_norm": 6.534598350524902, | |
| "learning_rate": 6.051593323216996e-06, | |
| "loss": 0.2431, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.4266051740289688, | |
| "grad_norm": 5.094123363494873, | |
| "learning_rate": 6.036418816388467e-06, | |
| "loss": 0.2582, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.4280464077250126, | |
| "grad_norm": 6.307914733886719, | |
| "learning_rate": 6.021244309559939e-06, | |
| "loss": 0.2723, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.42948764142105644, | |
| "grad_norm": 3.139702081680298, | |
| "learning_rate": 6.006069802731412e-06, | |
| "loss": 0.2624, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.43092887511710026, | |
| "grad_norm": 8.088966369628906, | |
| "learning_rate": 5.990895295902883e-06, | |
| "loss": 0.2114, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.432370108813144, | |
| "grad_norm": 5.096198558807373, | |
| "learning_rate": 5.975720789074355e-06, | |
| "loss": 0.2345, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.432370108813144, | |
| "eval_loss": 0.24168826639652252, | |
| "eval_mse": 0.24168826324585826, | |
| "eval_runtime": 3.4764, | |
| "eval_samples_per_second": 287.655, | |
| "eval_steps_per_second": 18.122, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.43381134250918785, | |
| "grad_norm": 4.601346015930176, | |
| "learning_rate": 5.9605462822458275e-06, | |
| "loss": 0.2741, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.43525257620523167, | |
| "grad_norm": 3.155237913131714, | |
| "learning_rate": 5.945371775417299e-06, | |
| "loss": 0.2726, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4366938099012755, | |
| "grad_norm": 10.061796188354492, | |
| "learning_rate": 5.930197268588771e-06, | |
| "loss": 0.2424, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.4381350435973193, | |
| "grad_norm": 3.544672966003418, | |
| "learning_rate": 5.9150227617602426e-06, | |
| "loss": 0.2538, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4395762772933631, | |
| "grad_norm": 16.151227951049805, | |
| "learning_rate": 5.899848254931715e-06, | |
| "loss": 0.279, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.44101751098940695, | |
| "grad_norm": 6.2358598709106445, | |
| "learning_rate": 5.884673748103187e-06, | |
| "loss": 0.2487, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.44245874468545077, | |
| "grad_norm": 4.63312292098999, | |
| "learning_rate": 5.8694992412746585e-06, | |
| "loss": 0.232, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.4438999783814946, | |
| "grad_norm": 6.458159923553467, | |
| "learning_rate": 5.854324734446131e-06, | |
| "loss": 0.2283, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.44534121207753835, | |
| "grad_norm": 5.087281227111816, | |
| "learning_rate": 5.839150227617603e-06, | |
| "loss": 0.2657, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.44678244577358217, | |
| "grad_norm": 5.383090972900391, | |
| "learning_rate": 5.823975720789074e-06, | |
| "loss": 0.2505, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.448223679469626, | |
| "grad_norm": 3.1167826652526855, | |
| "learning_rate": 5.808801213960547e-06, | |
| "loss": 0.2581, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.4496649131656698, | |
| "grad_norm": 3.727855682373047, | |
| "learning_rate": 5.793626707132019e-06, | |
| "loss": 0.232, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.45110614686171363, | |
| "grad_norm": 4.394417762756348, | |
| "learning_rate": 5.77845220030349e-06, | |
| "loss": 0.2481, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.45254738055775745, | |
| "grad_norm": 8.402990341186523, | |
| "learning_rate": 5.7632776934749625e-06, | |
| "loss": 0.2528, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.45398861425380127, | |
| "grad_norm": 6.247002601623535, | |
| "learning_rate": 5.748103186646434e-06, | |
| "loss": 0.255, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.4554298479498451, | |
| "grad_norm": 4.592987060546875, | |
| "learning_rate": 5.732928679817906e-06, | |
| "loss": 0.2551, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.45687108164588885, | |
| "grad_norm": 5.450745105743408, | |
| "learning_rate": 5.7177541729893784e-06, | |
| "loss": 0.2292, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.4583123153419327, | |
| "grad_norm": 2.5550763607025146, | |
| "learning_rate": 5.70257966616085e-06, | |
| "loss": 0.2342, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4597535490379765, | |
| "grad_norm": 4.07436990737915, | |
| "learning_rate": 5.687405159332322e-06, | |
| "loss": 0.2504, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.4611947827340203, | |
| "grad_norm": 14.1714506149292, | |
| "learning_rate": 5.672230652503794e-06, | |
| "loss": 0.2574, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4611947827340203, | |
| "eval_loss": 0.2556320130825043, | |
| "eval_mse": 0.25563200883287934, | |
| "eval_runtime": 3.4708, | |
| "eval_samples_per_second": 288.122, | |
| "eval_steps_per_second": 18.152, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.46263601643006413, | |
| "grad_norm": 2.311433792114258, | |
| "learning_rate": 5.657056145675266e-06, | |
| "loss": 0.2406, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.46407725012610795, | |
| "grad_norm": 5.986364841461182, | |
| "learning_rate": 5.641881638846738e-06, | |
| "loss": 0.2466, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.4655184838221518, | |
| "grad_norm": 11.425749778747559, | |
| "learning_rate": 5.62670713201821e-06, | |
| "loss": 0.2676, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.4669597175181956, | |
| "grad_norm": 4.642142295837402, | |
| "learning_rate": 5.611532625189682e-06, | |
| "loss": 0.2226, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4684009512142394, | |
| "grad_norm": 3.2466318607330322, | |
| "learning_rate": 5.596358118361154e-06, | |
| "loss": 0.2508, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.4698421849102832, | |
| "grad_norm": 5.736583232879639, | |
| "learning_rate": 5.581183611532625e-06, | |
| "loss": 0.2495, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.471283418606327, | |
| "grad_norm": 3.0507047176361084, | |
| "learning_rate": 5.5660091047040976e-06, | |
| "loss": 0.2345, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.4727246523023708, | |
| "grad_norm": 7.51447868347168, | |
| "learning_rate": 5.55083459787557e-06, | |
| "loss": 0.259, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.47416588599841464, | |
| "grad_norm": 4.414575576782227, | |
| "learning_rate": 5.535660091047041e-06, | |
| "loss": 0.2347, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.47560711969445846, | |
| "grad_norm": 5.172975540161133, | |
| "learning_rate": 5.5204855842185135e-06, | |
| "loss": 0.2588, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4770483533905023, | |
| "grad_norm": 3.5544350147247314, | |
| "learning_rate": 5.505311077389986e-06, | |
| "loss": 0.243, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.4784895870865461, | |
| "grad_norm": 6.046934127807617, | |
| "learning_rate": 5.490136570561457e-06, | |
| "loss": 0.2428, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.4799308207825899, | |
| "grad_norm": 12.10718059539795, | |
| "learning_rate": 5.474962063732929e-06, | |
| "loss": 0.2343, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.48137205447863374, | |
| "grad_norm": 5.280265808105469, | |
| "learning_rate": 5.459787556904402e-06, | |
| "loss": 0.2546, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.4828132881746775, | |
| "grad_norm": 2.352365255355835, | |
| "learning_rate": 5.444613050075873e-06, | |
| "loss": 0.2445, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.4842545218707213, | |
| "grad_norm": 3.669762372970581, | |
| "learning_rate": 5.429438543247345e-06, | |
| "loss": 0.2468, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.48569575556676514, | |
| "grad_norm": 2.9449355602264404, | |
| "learning_rate": 5.414264036418817e-06, | |
| "loss": 0.239, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.48713698926280896, | |
| "grad_norm": 6.094593048095703, | |
| "learning_rate": 5.399089529590289e-06, | |
| "loss": 0.2371, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4885782229588528, | |
| "grad_norm": 4.748923301696777, | |
| "learning_rate": 5.383915022761761e-06, | |
| "loss": 0.2494, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.4900194566548966, | |
| "grad_norm": 3.353111743927002, | |
| "learning_rate": 5.368740515933233e-06, | |
| "loss": 0.264, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4900194566548966, | |
| "eval_loss": 0.24517062306404114, | |
| "eval_mse": 0.2451706298738718, | |
| "eval_runtime": 3.6449, | |
| "eval_samples_per_second": 274.356, | |
| "eval_steps_per_second": 17.284, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4914606903509404, | |
| "grad_norm": 3.940274238586426, | |
| "learning_rate": 5.353566009104705e-06, | |
| "loss": 0.2538, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.49290192404698424, | |
| "grad_norm": 5.480809688568115, | |
| "learning_rate": 5.338391502276177e-06, | |
| "loss": 0.2316, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.494343157743028, | |
| "grad_norm": 3.5788941383361816, | |
| "learning_rate": 5.3232169954476485e-06, | |
| "loss": 0.2452, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.4957843914390718, | |
| "grad_norm": 2.8091890811920166, | |
| "learning_rate": 5.308042488619121e-06, | |
| "loss": 0.2649, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.49722562513511565, | |
| "grad_norm": 5.758116722106934, | |
| "learning_rate": 5.292867981790593e-06, | |
| "loss": 0.23, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.49866685883115947, | |
| "grad_norm": 5.408109188079834, | |
| "learning_rate": 5.277693474962064e-06, | |
| "loss": 0.2686, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.5001080925272033, | |
| "grad_norm": 3.6406140327453613, | |
| "learning_rate": 5.262518968133537e-06, | |
| "loss": 0.2427, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.5015493262232471, | |
| "grad_norm": 3.9658877849578857, | |
| "learning_rate": 5.247344461305008e-06, | |
| "loss": 0.231, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.5029905599192909, | |
| "grad_norm": 2.78668212890625, | |
| "learning_rate": 5.23216995447648e-06, | |
| "loss": 0.2545, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.5044317936153347, | |
| "grad_norm": 3.2075002193450928, | |
| "learning_rate": 5.2169954476479526e-06, | |
| "loss": 0.2527, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5058730273113785, | |
| "grad_norm": 4.351621150970459, | |
| "learning_rate": 5.201820940819424e-06, | |
| "loss": 0.2538, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.5073142610074224, | |
| "grad_norm": 8.305010795593262, | |
| "learning_rate": 5.186646433990896e-06, | |
| "loss": 0.2959, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5087554947034661, | |
| "grad_norm": 6.016130447387695, | |
| "learning_rate": 5.1714719271623685e-06, | |
| "loss": 0.2508, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.51019672839951, | |
| "grad_norm": 3.914813995361328, | |
| "learning_rate": 5.15629742033384e-06, | |
| "loss": 0.256, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5116379620955538, | |
| "grad_norm": 7.858732223510742, | |
| "learning_rate": 5.141122913505312e-06, | |
| "loss": 0.2762, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.5130791957915976, | |
| "grad_norm": 5.898010730743408, | |
| "learning_rate": 5.125948406676784e-06, | |
| "loss": 0.2777, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5145204294876414, | |
| "grad_norm": 2.817342758178711, | |
| "learning_rate": 5.110773899848256e-06, | |
| "loss": 0.2631, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.5159616631836852, | |
| "grad_norm": 3.927349328994751, | |
| "learning_rate": 5.095599393019726e-06, | |
| "loss": 0.225, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5174028968797291, | |
| "grad_norm": 9.196603775024414, | |
| "learning_rate": 5.080424886191199e-06, | |
| "loss": 0.2591, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.5188441305757728, | |
| "grad_norm": 2.575307607650757, | |
| "learning_rate": 5.065250379362671e-06, | |
| "loss": 0.2596, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5188441305757728, | |
| "eval_loss": 0.22148238122463226, | |
| "eval_mse": 0.2214823840931058, | |
| "eval_runtime": 3.4856, | |
| "eval_samples_per_second": 286.898, | |
| "eval_steps_per_second": 18.075, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5202853642718167, | |
| "grad_norm": 4.5332350730896, | |
| "learning_rate": 5.050075872534142e-06, | |
| "loss": 0.2515, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.5217265979678605, | |
| "grad_norm": 4.379772663116455, | |
| "learning_rate": 5.0349013657056145e-06, | |
| "loss": 0.2285, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5231678316639043, | |
| "grad_norm": 4.068484783172607, | |
| "learning_rate": 5.019726858877087e-06, | |
| "loss": 0.2309, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.5246090653599481, | |
| "grad_norm": 6.4193525314331055, | |
| "learning_rate": 5.004552352048558e-06, | |
| "loss": 0.2637, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5260502990559919, | |
| "grad_norm": 5.752609729766846, | |
| "learning_rate": 4.989377845220031e-06, | |
| "loss": 0.2531, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.5274915327520358, | |
| "grad_norm": 7.622284889221191, | |
| "learning_rate": 4.974203338391503e-06, | |
| "loss": 0.2417, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5289327664480795, | |
| "grad_norm": 3.0805881023406982, | |
| "learning_rate": 4.959028831562975e-06, | |
| "loss": 0.2608, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.5303740001441234, | |
| "grad_norm": 6.141805648803711, | |
| "learning_rate": 4.943854324734446e-06, | |
| "loss": 0.2381, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5318152338401672, | |
| "grad_norm": 3.3085532188415527, | |
| "learning_rate": 4.9286798179059185e-06, | |
| "loss": 0.2315, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.533256467536211, | |
| "grad_norm": 4.580583095550537, | |
| "learning_rate": 4.913505311077391e-06, | |
| "loss": 0.2621, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5346977012322548, | |
| "grad_norm": 4.834953308105469, | |
| "learning_rate": 4.898330804248862e-06, | |
| "loss": 0.2486, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.5361389349282987, | |
| "grad_norm": 5.170509338378906, | |
| "learning_rate": 4.8831562974203345e-06, | |
| "loss": 0.2621, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5375801686243424, | |
| "grad_norm": 2.793675184249878, | |
| "learning_rate": 4.867981790591807e-06, | |
| "loss": 0.2469, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.5390214023203862, | |
| "grad_norm": 11.32933521270752, | |
| "learning_rate": 4.852807283763278e-06, | |
| "loss": 0.2499, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5404626360164301, | |
| "grad_norm": 7.267145156860352, | |
| "learning_rate": 4.8376327769347495e-06, | |
| "loss": 0.2429, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.5419038697124738, | |
| "grad_norm": 3.166592597961426, | |
| "learning_rate": 4.822458270106222e-06, | |
| "loss": 0.2465, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5433451034085177, | |
| "grad_norm": 2.622795820236206, | |
| "learning_rate": 4.807283763277694e-06, | |
| "loss": 0.2472, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.5447863371045615, | |
| "grad_norm": 8.5530424118042, | |
| "learning_rate": 4.792109256449165e-06, | |
| "loss": 0.2758, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5462275708006054, | |
| "grad_norm": 3.895925521850586, | |
| "learning_rate": 4.776934749620638e-06, | |
| "loss": 0.2346, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.5476688044966491, | |
| "grad_norm": 3.4110894203186035, | |
| "learning_rate": 4.761760242792109e-06, | |
| "loss": 0.244, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5476688044966491, | |
| "eval_loss": 0.22688308358192444, | |
| "eval_mse": 0.22688308400684037, | |
| "eval_runtime": 3.4433, | |
| "eval_samples_per_second": 290.417, | |
| "eval_steps_per_second": 18.296, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.549110038192693, | |
| "grad_norm": 4.175052165985107, | |
| "learning_rate": 4.746585735963581e-06, | |
| "loss": 0.2459, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.5505512718887368, | |
| "grad_norm": 4.640260219573975, | |
| "learning_rate": 4.731411229135054e-06, | |
| "loss": 0.2269, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5519925055847805, | |
| "grad_norm": 3.7191929817199707, | |
| "learning_rate": 4.716236722306525e-06, | |
| "loss": 0.2627, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.5534337392808244, | |
| "grad_norm": 5.770391941070557, | |
| "learning_rate": 4.701062215477997e-06, | |
| "loss": 0.2484, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5548749729768682, | |
| "grad_norm": 2.703814744949341, | |
| "learning_rate": 4.6858877086494695e-06, | |
| "loss": 0.2298, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.556316206672912, | |
| "grad_norm": 3.706900119781494, | |
| "learning_rate": 4.670713201820941e-06, | |
| "loss": 0.2493, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5577574403689558, | |
| "grad_norm": 2.6456151008605957, | |
| "learning_rate": 4.655538694992413e-06, | |
| "loss": 0.2533, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.5591986740649997, | |
| "grad_norm": 5.848125457763672, | |
| "learning_rate": 4.640364188163885e-06, | |
| "loss": 0.2217, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5606399077610434, | |
| "grad_norm": 2.838711738586426, | |
| "learning_rate": 4.625189681335357e-06, | |
| "loss": 0.2255, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.5620811414570873, | |
| "grad_norm": 5.126152992248535, | |
| "learning_rate": 4.610015174506829e-06, | |
| "loss": 0.2732, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5635223751531311, | |
| "grad_norm": 3.699435234069824, | |
| "learning_rate": 4.5948406676783005e-06, | |
| "loss": 0.2703, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.5649636088491748, | |
| "grad_norm": 3.9639079570770264, | |
| "learning_rate": 4.579666160849773e-06, | |
| "loss": 0.2552, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5664048425452187, | |
| "grad_norm": 12.423652648925781, | |
| "learning_rate": 4.564491654021245e-06, | |
| "loss": 0.2473, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.5678460762412625, | |
| "grad_norm": 9.687994003295898, | |
| "learning_rate": 4.549317147192716e-06, | |
| "loss": 0.2383, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5692873099373064, | |
| "grad_norm": 4.642548561096191, | |
| "learning_rate": 4.534142640364189e-06, | |
| "loss": 0.2444, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.5707285436333501, | |
| "grad_norm": 3.047471523284912, | |
| "learning_rate": 4.518968133535661e-06, | |
| "loss": 0.2159, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.572169777329394, | |
| "grad_norm": 5.581223011016846, | |
| "learning_rate": 4.503793626707132e-06, | |
| "loss": 0.2415, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.5736110110254378, | |
| "grad_norm": 5.823172092437744, | |
| "learning_rate": 4.4886191198786045e-06, | |
| "loss": 0.2445, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5750522447214816, | |
| "grad_norm": 5.474643707275391, | |
| "learning_rate": 4.473444613050077e-06, | |
| "loss": 0.2253, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.5764934784175254, | |
| "grad_norm": 5.476977825164795, | |
| "learning_rate": 4.458270106221548e-06, | |
| "loss": 0.2225, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5764934784175254, | |
| "eval_loss": 0.23420479893684387, | |
| "eval_mse": 0.23420481357816605, | |
| "eval_runtime": 3.5455, | |
| "eval_samples_per_second": 282.05, | |
| "eval_steps_per_second": 17.769, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5779347121135692, | |
| "grad_norm": 4.759982109069824, | |
| "learning_rate": 4.44309559939302e-06, | |
| "loss": 0.2449, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.579375945809613, | |
| "grad_norm": 5.200133323669434, | |
| "learning_rate": 4.427921092564492e-06, | |
| "loss": 0.2382, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5808171795056568, | |
| "grad_norm": 5.744959831237793, | |
| "learning_rate": 4.412746585735964e-06, | |
| "loss": 0.2606, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.5822584132017007, | |
| "grad_norm": 2.84411358833313, | |
| "learning_rate": 4.397572078907436e-06, | |
| "loss": 0.2506, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5836996468977445, | |
| "grad_norm": 3.2607600688934326, | |
| "learning_rate": 4.382397572078908e-06, | |
| "loss": 0.2516, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.5851408805937883, | |
| "grad_norm": 2.637953519821167, | |
| "learning_rate": 4.36722306525038e-06, | |
| "loss": 0.2389, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5865821142898321, | |
| "grad_norm": 4.82386589050293, | |
| "learning_rate": 4.352048558421852e-06, | |
| "loss": 0.272, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.5880233479858759, | |
| "grad_norm": 10.822226524353027, | |
| "learning_rate": 4.336874051593324e-06, | |
| "loss": 0.2727, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5894645816819197, | |
| "grad_norm": 4.939727783203125, | |
| "learning_rate": 4.321699544764795e-06, | |
| "loss": 0.2575, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.5909058153779635, | |
| "grad_norm": 4.966119289398193, | |
| "learning_rate": 4.306525037936267e-06, | |
| "loss": 0.2289, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5923470490740074, | |
| "grad_norm": 7.144054889678955, | |
| "learning_rate": 4.2913505311077395e-06, | |
| "loss": 0.2519, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.5937882827700511, | |
| "grad_norm": 2.4000091552734375, | |
| "learning_rate": 4.276176024279211e-06, | |
| "loss": 0.22, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.595229516466095, | |
| "grad_norm": 3.6598424911499023, | |
| "learning_rate": 4.261001517450683e-06, | |
| "loss": 0.2355, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.5966707501621388, | |
| "grad_norm": 5.501832485198975, | |
| "learning_rate": 4.245827010622155e-06, | |
| "loss": 0.2197, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5981119838581826, | |
| "grad_norm": 5.9661688804626465, | |
| "learning_rate": 4.230652503793627e-06, | |
| "loss": 0.2239, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.5995532175542264, | |
| "grad_norm": 7.844972610473633, | |
| "learning_rate": 4.215477996965099e-06, | |
| "loss": 0.235, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.6009944512502702, | |
| "grad_norm": 3.511651039123535, | |
| "learning_rate": 4.2003034901365705e-06, | |
| "loss": 0.2096, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.602435684946314, | |
| "grad_norm": 13.119202613830566, | |
| "learning_rate": 4.185128983308043e-06, | |
| "loss": 0.2381, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.6038769186423578, | |
| "grad_norm": 3.2882187366485596, | |
| "learning_rate": 4.169954476479515e-06, | |
| "loss": 0.2296, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.6053181523384017, | |
| "grad_norm": 8.070931434631348, | |
| "learning_rate": 4.154779969650986e-06, | |
| "loss": 0.2475, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6053181523384017, | |
| "eval_loss": 0.24032321572303772, | |
| "eval_mse": 0.2403232262916863, | |
| "eval_runtime": 3.6198, | |
| "eval_samples_per_second": 276.258, | |
| "eval_steps_per_second": 17.404, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6067593860344455, | |
| "grad_norm": 4.749974727630615, | |
| "learning_rate": 4.139605462822459e-06, | |
| "loss": 0.232, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.6082006197304893, | |
| "grad_norm": 10.697235107421875, | |
| "learning_rate": 4.124430955993931e-06, | |
| "loss": 0.2725, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6096418534265331, | |
| "grad_norm": 3.0119149684906006, | |
| "learning_rate": 4.109256449165402e-06, | |
| "loss": 0.2379, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.611083087122577, | |
| "grad_norm": 12.935966491699219, | |
| "learning_rate": 4.0940819423368746e-06, | |
| "loss": 0.2514, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6125243208186207, | |
| "grad_norm": 3.34983229637146, | |
| "learning_rate": 4.078907435508346e-06, | |
| "loss": 0.2616, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.6139655545146645, | |
| "grad_norm": 3.52375864982605, | |
| "learning_rate": 4.063732928679818e-06, | |
| "loss": 0.2353, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6154067882107084, | |
| "grad_norm": 2.455650806427002, | |
| "learning_rate": 4.0485584218512905e-06, | |
| "loss": 0.238, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.6168480219067521, | |
| "grad_norm": 6.911266803741455, | |
| "learning_rate": 4.033383915022762e-06, | |
| "loss": 0.2177, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.618289255602796, | |
| "grad_norm": 3.4753849506378174, | |
| "learning_rate": 4.018209408194234e-06, | |
| "loss": 0.244, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.6197304892988398, | |
| "grad_norm": 4.0766520500183105, | |
| "learning_rate": 4.003034901365706e-06, | |
| "loss": 0.2446, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6211717229948837, | |
| "grad_norm": 3.6783783435821533, | |
| "learning_rate": 3.987860394537178e-06, | |
| "loss": 0.2342, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.6226129566909274, | |
| "grad_norm": 4.708898544311523, | |
| "learning_rate": 3.97268588770865e-06, | |
| "loss": 0.2444, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6240541903869713, | |
| "grad_norm": 7.179166316986084, | |
| "learning_rate": 3.957511380880122e-06, | |
| "loss": 0.2562, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.6254954240830151, | |
| "grad_norm": 7.478214740753174, | |
| "learning_rate": 3.942336874051594e-06, | |
| "loss": 0.2508, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6269366577790588, | |
| "grad_norm": 3.4198427200317383, | |
| "learning_rate": 3.927162367223066e-06, | |
| "loss": 0.2601, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.6283778914751027, | |
| "grad_norm": 10.68290901184082, | |
| "learning_rate": 3.911987860394537e-06, | |
| "loss": 0.2632, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6298191251711465, | |
| "grad_norm": 3.058023452758789, | |
| "learning_rate": 3.89681335356601e-06, | |
| "loss": 0.226, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.6312603588671903, | |
| "grad_norm": 6.483073711395264, | |
| "learning_rate": 3.881638846737482e-06, | |
| "loss": 0.2929, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.6327015925632341, | |
| "grad_norm": 9.623044967651367, | |
| "learning_rate": 3.866464339908953e-06, | |
| "loss": 0.2588, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.634142826259278, | |
| "grad_norm": 3.588764190673828, | |
| "learning_rate": 3.8512898330804255e-06, | |
| "loss": 0.253, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.634142826259278, | |
| "eval_loss": 0.23261044919490814, | |
| "eval_mse": 0.23261045941268094, | |
| "eval_runtime": 3.5178, | |
| "eval_samples_per_second": 284.272, | |
| "eval_steps_per_second": 17.909, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6355840599553217, | |
| "grad_norm": 5.355119705200195, | |
| "learning_rate": 3.836115326251897e-06, | |
| "loss": 0.2447, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.6370252936513656, | |
| "grad_norm": 4.401165008544922, | |
| "learning_rate": 3.820940819423369e-06, | |
| "loss": 0.2484, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6384665273474094, | |
| "grad_norm": 3.379408597946167, | |
| "learning_rate": 3.8057663125948406e-06, | |
| "loss": 0.2141, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.6399077610434531, | |
| "grad_norm": 4.606984615325928, | |
| "learning_rate": 3.790591805766313e-06, | |
| "loss": 0.2521, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.641348994739497, | |
| "grad_norm": 6.997488975524902, | |
| "learning_rate": 3.7754172989377846e-06, | |
| "loss": 0.2429, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.6427902284355408, | |
| "grad_norm": 5.074779987335205, | |
| "learning_rate": 3.7602427921092565e-06, | |
| "loss": 0.2337, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6442314621315847, | |
| "grad_norm": 4.323925971984863, | |
| "learning_rate": 3.7450682852807287e-06, | |
| "loss": 0.2238, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.6456726958276284, | |
| "grad_norm": 2.953834056854248, | |
| "learning_rate": 3.7298937784522006e-06, | |
| "loss": 0.2502, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6471139295236723, | |
| "grad_norm": 7.569371223449707, | |
| "learning_rate": 3.7147192716236724e-06, | |
| "loss": 0.2641, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.6485551632197161, | |
| "grad_norm": 3.7635698318481445, | |
| "learning_rate": 3.699544764795144e-06, | |
| "loss": 0.2396, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6499963969157599, | |
| "grad_norm": 5.548856735229492, | |
| "learning_rate": 3.6843702579666165e-06, | |
| "loss": 0.2391, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.6514376306118037, | |
| "grad_norm": 5.984725475311279, | |
| "learning_rate": 3.6691957511380883e-06, | |
| "loss": 0.1981, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6528788643078475, | |
| "grad_norm": 3.8673911094665527, | |
| "learning_rate": 3.65402124430956e-06, | |
| "loss": 0.2753, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.6543200980038913, | |
| "grad_norm": 3.2690844535827637, | |
| "learning_rate": 3.638846737481032e-06, | |
| "loss": 0.2352, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6557613316999351, | |
| "grad_norm": 6.600918292999268, | |
| "learning_rate": 3.623672230652504e-06, | |
| "loss": 0.2107, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.657202565395979, | |
| "grad_norm": 4.141593933105469, | |
| "learning_rate": 3.608497723823976e-06, | |
| "loss": 0.2366, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6586437990920228, | |
| "grad_norm": 3.730548143386841, | |
| "learning_rate": 3.593323216995448e-06, | |
| "loss": 0.2665, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.6600850327880666, | |
| "grad_norm": 4.160298824310303, | |
| "learning_rate": 3.57814871016692e-06, | |
| "loss": 0.2794, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6615262664841104, | |
| "grad_norm": 3.7238965034484863, | |
| "learning_rate": 3.562974203338392e-06, | |
| "loss": 0.2177, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.6629675001801542, | |
| "grad_norm": 3.0806283950805664, | |
| "learning_rate": 3.5477996965098638e-06, | |
| "loss": 0.2435, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6629675001801542, | |
| "eval_loss": 0.21606960892677307, | |
| "eval_mse": 0.2160696000645403, | |
| "eval_runtime": 3.6091, | |
| "eval_samples_per_second": 277.081, | |
| "eval_steps_per_second": 17.456, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.664408733876198, | |
| "grad_norm": 2.149531126022339, | |
| "learning_rate": 3.5326251896813356e-06, | |
| "loss": 0.2111, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.6658499675722418, | |
| "grad_norm": 3.548011541366577, | |
| "learning_rate": 3.517450682852808e-06, | |
| "loss": 0.2549, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6672912012682857, | |
| "grad_norm": 3.9522154331207275, | |
| "learning_rate": 3.5022761760242797e-06, | |
| "loss": 0.2398, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.6687324349643294, | |
| "grad_norm": 5.864704132080078, | |
| "learning_rate": 3.4871016691957515e-06, | |
| "loss": 0.2703, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6701736686603733, | |
| "grad_norm": 2.8301804065704346, | |
| "learning_rate": 3.4719271623672233e-06, | |
| "loss": 0.2537, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.6716149023564171, | |
| "grad_norm": 6.02512264251709, | |
| "learning_rate": 3.4567526555386956e-06, | |
| "loss": 0.2673, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.673056136052461, | |
| "grad_norm": 5.798670768737793, | |
| "learning_rate": 3.4415781487101674e-06, | |
| "loss": 0.2269, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.6744973697485047, | |
| "grad_norm": 3.2476484775543213, | |
| "learning_rate": 3.4264036418816392e-06, | |
| "loss": 0.2453, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6759386034445485, | |
| "grad_norm": 2.640549659729004, | |
| "learning_rate": 3.4112291350531115e-06, | |
| "loss": 0.2378, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.6773798371405924, | |
| "grad_norm": 2.7928059101104736, | |
| "learning_rate": 3.3960546282245833e-06, | |
| "loss": 0.2269, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6788210708366361, | |
| "grad_norm": 3.8571255207061768, | |
| "learning_rate": 3.380880121396055e-06, | |
| "loss": 0.2241, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.68026230453268, | |
| "grad_norm": 6.047236442565918, | |
| "learning_rate": 3.365705614567527e-06, | |
| "loss": 0.2427, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.6817035382287238, | |
| "grad_norm": 2.478774070739746, | |
| "learning_rate": 3.350531107738999e-06, | |
| "loss": 0.2397, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.6831447719247676, | |
| "grad_norm": 5.034613609313965, | |
| "learning_rate": 3.3353566009104706e-06, | |
| "loss": 0.2545, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6845860056208114, | |
| "grad_norm": 8.068209648132324, | |
| "learning_rate": 3.3201820940819424e-06, | |
| "loss": 0.2692, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.6860272393168553, | |
| "grad_norm": 4.195474624633789, | |
| "learning_rate": 3.3050075872534143e-06, | |
| "loss": 0.2334, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.687468473012899, | |
| "grad_norm": 2.976191759109497, | |
| "learning_rate": 3.289833080424886e-06, | |
| "loss": 0.2519, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.6889097067089428, | |
| "grad_norm": 3.802194118499756, | |
| "learning_rate": 3.2746585735963583e-06, | |
| "loss": 0.2178, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6903509404049867, | |
| "grad_norm": 3.7784626483917236, | |
| "learning_rate": 3.25948406676783e-06, | |
| "loss": 0.2381, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.6917921741010304, | |
| "grad_norm": 4.433982849121094, | |
| "learning_rate": 3.244309559939302e-06, | |
| "loss": 0.2865, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6917921741010304, | |
| "eval_loss": 0.22648707032203674, | |
| "eval_mse": 0.2264870922613336, | |
| "eval_runtime": 3.5396, | |
| "eval_samples_per_second": 282.517, | |
| "eval_steps_per_second": 17.799, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6932334077970743, | |
| "grad_norm": 6.46554708480835, | |
| "learning_rate": 3.229135053110774e-06, | |
| "loss": 0.2477, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.6946746414931181, | |
| "grad_norm": 4.361721992492676, | |
| "learning_rate": 3.213960546282246e-06, | |
| "loss": 0.2319, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.696115875189162, | |
| "grad_norm": 6.774047374725342, | |
| "learning_rate": 3.198786039453718e-06, | |
| "loss": 0.2454, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.6975571088852057, | |
| "grad_norm": 8.801002502441406, | |
| "learning_rate": 3.1836115326251897e-06, | |
| "loss": 0.2417, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6989983425812496, | |
| "grad_norm": 3.7401864528656006, | |
| "learning_rate": 3.168437025796662e-06, | |
| "loss": 0.2472, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.7004395762772934, | |
| "grad_norm": 4.724398136138916, | |
| "learning_rate": 3.153262518968134e-06, | |
| "loss": 0.2413, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.7018808099733371, | |
| "grad_norm": 4.55704927444458, | |
| "learning_rate": 3.1380880121396056e-06, | |
| "loss": 0.2486, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.703322043669381, | |
| "grad_norm": 4.389082431793213, | |
| "learning_rate": 3.1229135053110775e-06, | |
| "loss": 0.2163, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.7047632773654248, | |
| "grad_norm": 2.8571767807006836, | |
| "learning_rate": 3.1077389984825497e-06, | |
| "loss": 0.2638, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.7062045110614686, | |
| "grad_norm": 4.311636447906494, | |
| "learning_rate": 3.0925644916540215e-06, | |
| "loss": 0.234, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7076457447575124, | |
| "grad_norm": 2.979557752609253, | |
| "learning_rate": 3.0773899848254934e-06, | |
| "loss": 0.2644, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.7090869784535563, | |
| "grad_norm": 3.3358638286590576, | |
| "learning_rate": 3.062215477996965e-06, | |
| "loss": 0.2606, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7105282121496, | |
| "grad_norm": 2.8190908432006836, | |
| "learning_rate": 3.0470409711684375e-06, | |
| "loss": 0.2195, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.7119694458456439, | |
| "grad_norm": 4.225391387939453, | |
| "learning_rate": 3.0318664643399093e-06, | |
| "loss": 0.2236, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7134106795416877, | |
| "grad_norm": 8.34825611114502, | |
| "learning_rate": 3.016691957511381e-06, | |
| "loss": 0.2587, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.7148519132377315, | |
| "grad_norm": 3.4229488372802734, | |
| "learning_rate": 3.0015174506828534e-06, | |
| "loss": 0.2353, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7162931469337753, | |
| "grad_norm": 13.701717376708984, | |
| "learning_rate": 2.986342943854325e-06, | |
| "loss": 0.2678, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.7177343806298191, | |
| "grad_norm": 3.0285487174987793, | |
| "learning_rate": 2.971168437025797e-06, | |
| "loss": 0.2346, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.719175614325863, | |
| "grad_norm": 6.881831645965576, | |
| "learning_rate": 2.955993930197269e-06, | |
| "loss": 0.2431, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.7206168480219067, | |
| "grad_norm": 4.84171199798584, | |
| "learning_rate": 2.940819423368741e-06, | |
| "loss": 0.2351, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7206168480219067, | |
| "eval_loss": 0.23427650332450867, | |
| "eval_mse": 0.2342765065105632, | |
| "eval_runtime": 3.5318, | |
| "eval_samples_per_second": 283.14, | |
| "eval_steps_per_second": 17.838, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7220580817179506, | |
| "grad_norm": 4.6257548332214355, | |
| "learning_rate": 2.925644916540213e-06, | |
| "loss": 0.2287, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.7234993154139944, | |
| "grad_norm": 3.9493324756622314, | |
| "learning_rate": 2.9104704097116847e-06, | |
| "loss": 0.2516, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7249405491100382, | |
| "grad_norm": 2.4812047481536865, | |
| "learning_rate": 2.8952959028831566e-06, | |
| "loss": 0.2903, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.726381782806082, | |
| "grad_norm": 6.331687927246094, | |
| "learning_rate": 2.880121396054629e-06, | |
| "loss": 0.2332, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7278230165021258, | |
| "grad_norm": 4.676543712615967, | |
| "learning_rate": 2.8649468892261007e-06, | |
| "loss": 0.2508, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.7292642501981697, | |
| "grad_norm": 4.514668941497803, | |
| "learning_rate": 2.849772382397572e-06, | |
| "loss": 0.2277, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7307054838942134, | |
| "grad_norm": 4.100338459014893, | |
| "learning_rate": 2.834597875569044e-06, | |
| "loss": 0.2104, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.7321467175902573, | |
| "grad_norm": 4.366134166717529, | |
| "learning_rate": 2.8194233687405157e-06, | |
| "loss": 0.2409, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.733587951286301, | |
| "grad_norm": 2.7329518795013428, | |
| "learning_rate": 2.804248861911988e-06, | |
| "loss": 0.2085, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.7350291849823449, | |
| "grad_norm": 5.565128803253174, | |
| "learning_rate": 2.78907435508346e-06, | |
| "loss": 0.2284, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7364704186783887, | |
| "grad_norm": 2.5566251277923584, | |
| "learning_rate": 2.7738998482549316e-06, | |
| "loss": 0.2087, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.7379116523744326, | |
| "grad_norm": 3.898559331893921, | |
| "learning_rate": 2.758725341426404e-06, | |
| "loss": 0.2541, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7393528860704763, | |
| "grad_norm": 5.346958160400391, | |
| "learning_rate": 2.7435508345978757e-06, | |
| "loss": 0.2322, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.7407941197665201, | |
| "grad_norm": 9.296390533447266, | |
| "learning_rate": 2.7283763277693475e-06, | |
| "loss": 0.2522, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.742235353462564, | |
| "grad_norm": 3.838529348373413, | |
| "learning_rate": 2.7132018209408194e-06, | |
| "loss": 0.2562, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.7436765871586077, | |
| "grad_norm": 2.6012656688690186, | |
| "learning_rate": 2.6980273141122916e-06, | |
| "loss": 0.2637, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7451178208546516, | |
| "grad_norm": 5.522032737731934, | |
| "learning_rate": 2.6828528072837634e-06, | |
| "loss": 0.2537, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.7465590545506954, | |
| "grad_norm": 5.43183708190918, | |
| "learning_rate": 2.6676783004552353e-06, | |
| "loss": 0.2322, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7480002882467393, | |
| "grad_norm": 3.3480212688446045, | |
| "learning_rate": 2.652503793626707e-06, | |
| "loss": 0.2349, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.749441521942783, | |
| "grad_norm": 3.7614428997039795, | |
| "learning_rate": 2.6373292867981793e-06, | |
| "loss": 0.2582, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.749441521942783, | |
| "eval_loss": 0.2341535985469818, | |
| "eval_mse": 0.23415358681604267, | |
| "eval_runtime": 3.5332, | |
| "eval_samples_per_second": 283.03, | |
| "eval_steps_per_second": 17.831, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7508827556388268, | |
| "grad_norm": 9.034743309020996, | |
| "learning_rate": 2.622154779969651e-06, | |
| "loss": 0.2608, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.7523239893348707, | |
| "grad_norm": 7.9333062171936035, | |
| "learning_rate": 2.606980273141123e-06, | |
| "loss": 0.2403, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7537652230309144, | |
| "grad_norm": 2.5331106185913086, | |
| "learning_rate": 2.5918057663125952e-06, | |
| "loss": 0.2326, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.7552064567269583, | |
| "grad_norm": 4.369472026824951, | |
| "learning_rate": 2.576631259484067e-06, | |
| "loss": 0.2631, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7566476904230021, | |
| "grad_norm": 4.154514789581299, | |
| "learning_rate": 2.561456752655539e-06, | |
| "loss": 0.222, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.7580889241190459, | |
| "grad_norm": 3.586236000061035, | |
| "learning_rate": 2.5462822458270107e-06, | |
| "loss": 0.2319, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7595301578150897, | |
| "grad_norm": 4.088715076446533, | |
| "learning_rate": 2.531107738998483e-06, | |
| "loss": 0.2207, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.7609713915111336, | |
| "grad_norm": 3.201357364654541, | |
| "learning_rate": 2.515933232169955e-06, | |
| "loss": 0.2472, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.7624126252071773, | |
| "grad_norm": 11.038939476013184, | |
| "learning_rate": 2.5007587253414266e-06, | |
| "loss": 0.2467, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.7638538589032211, | |
| "grad_norm": 2.891178846359253, | |
| "learning_rate": 2.4855842185128985e-06, | |
| "loss": 0.2492, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.765295092599265, | |
| "grad_norm": 5.062381267547607, | |
| "learning_rate": 2.4704097116843703e-06, | |
| "loss": 0.2564, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.7667363262953087, | |
| "grad_norm": 7.891109943389893, | |
| "learning_rate": 2.455235204855842e-06, | |
| "loss": 0.2163, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7681775599913526, | |
| "grad_norm": 2.734602451324463, | |
| "learning_rate": 2.4400606980273144e-06, | |
| "loss": 0.2392, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.7696187936873964, | |
| "grad_norm": 3.1891098022460938, | |
| "learning_rate": 2.424886191198786e-06, | |
| "loss": 0.236, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7710600273834403, | |
| "grad_norm": 2.7162604331970215, | |
| "learning_rate": 2.409711684370258e-06, | |
| "loss": 0.2412, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.772501261079484, | |
| "grad_norm": 9.879570960998535, | |
| "learning_rate": 2.39453717754173e-06, | |
| "loss": 0.2362, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7739424947755279, | |
| "grad_norm": 5.890117645263672, | |
| "learning_rate": 2.379362670713202e-06, | |
| "loss": 0.2489, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.7753837284715717, | |
| "grad_norm": 4.443430423736572, | |
| "learning_rate": 2.364188163884674e-06, | |
| "loss": 0.2402, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.7768249621676154, | |
| "grad_norm": 8.724933624267578, | |
| "learning_rate": 2.3490136570561458e-06, | |
| "loss": 0.2343, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.7782661958636593, | |
| "grad_norm": 5.362820148468018, | |
| "learning_rate": 2.333839150227618e-06, | |
| "loss": 0.2167, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7782661958636593, | |
| "eval_loss": 0.23367303609848022, | |
| "eval_mse": 0.23367304604459788, | |
| "eval_runtime": 3.4711, | |
| "eval_samples_per_second": 288.093, | |
| "eval_steps_per_second": 18.15, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7797074295597031, | |
| "grad_norm": 4.84460973739624, | |
| "learning_rate": 2.31866464339909e-06, | |
| "loss": 0.2462, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.781148663255747, | |
| "grad_norm": 3.910109043121338, | |
| "learning_rate": 2.3034901365705617e-06, | |
| "loss": 0.2238, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.7825898969517907, | |
| "grad_norm": 5.745606422424316, | |
| "learning_rate": 2.2883156297420335e-06, | |
| "loss": 0.2653, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.7840311306478346, | |
| "grad_norm": 3.182253837585449, | |
| "learning_rate": 2.2731411229135057e-06, | |
| "loss": 0.2527, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.7854723643438783, | |
| "grad_norm": 5.271828651428223, | |
| "learning_rate": 2.2579666160849776e-06, | |
| "loss": 0.2354, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.7869135980399222, | |
| "grad_norm": 10.707908630371094, | |
| "learning_rate": 2.2427921092564494e-06, | |
| "loss": 0.2249, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.788354831735966, | |
| "grad_norm": 8.2650785446167, | |
| "learning_rate": 2.2276176024279212e-06, | |
| "loss": 0.2293, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.7897960654320098, | |
| "grad_norm": 5.198644161224365, | |
| "learning_rate": 2.212443095599393e-06, | |
| "loss": 0.2669, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.7912372991280536, | |
| "grad_norm": 3.3063764572143555, | |
| "learning_rate": 2.197268588770865e-06, | |
| "loss": 0.2177, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.7926785328240974, | |
| "grad_norm": 7.53934383392334, | |
| "learning_rate": 2.182094081942337e-06, | |
| "loss": 0.2339, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7941197665201413, | |
| "grad_norm": 2.6870410442352295, | |
| "learning_rate": 2.166919575113809e-06, | |
| "loss": 0.2357, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.795561000216185, | |
| "grad_norm": 6.6113810539245605, | |
| "learning_rate": 2.1517450682852808e-06, | |
| "loss": 0.2345, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7970022339122289, | |
| "grad_norm": 6.589913368225098, | |
| "learning_rate": 2.1365705614567526e-06, | |
| "loss": 0.2586, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.7984434676082727, | |
| "grad_norm": 3.827324867248535, | |
| "learning_rate": 2.121396054628225e-06, | |
| "loss": 0.2378, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7998847013043165, | |
| "grad_norm": 4.446374893188477, | |
| "learning_rate": 2.1062215477996967e-06, | |
| "loss": 0.2181, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.8013259350003603, | |
| "grad_norm": 3.70227313041687, | |
| "learning_rate": 2.0910470409711685e-06, | |
| "loss": 0.2358, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.8027671686964041, | |
| "grad_norm": 3.6692309379577637, | |
| "learning_rate": 2.0758725341426408e-06, | |
| "loss": 0.2394, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.804208402392448, | |
| "grad_norm": 3.3776164054870605, | |
| "learning_rate": 2.0606980273141126e-06, | |
| "loss": 0.2564, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.8056496360884917, | |
| "grad_norm": 3.1166326999664307, | |
| "learning_rate": 2.0455235204855844e-06, | |
| "loss": 0.2523, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.8070908697845356, | |
| "grad_norm": 5.939455986022949, | |
| "learning_rate": 2.0303490136570563e-06, | |
| "loss": 0.2495, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8070908697845356, | |
| "eval_loss": 0.22727453708648682, | |
| "eval_mse": 0.22727454181946813, | |
| "eval_runtime": 3.4934, | |
| "eval_samples_per_second": 286.258, | |
| "eval_steps_per_second": 18.034, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8085321034805794, | |
| "grad_norm": 5.415030002593994, | |
| "learning_rate": 2.0151745068285285e-06, | |
| "loss": 0.2211, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.8099733371766232, | |
| "grad_norm": 7.75388240814209, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.2482, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.811414570872667, | |
| "grad_norm": 4.087270736694336, | |
| "learning_rate": 1.984825493171472e-06, | |
| "loss": 0.2305, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.8128558045687109, | |
| "grad_norm": 5.57621955871582, | |
| "learning_rate": 1.969650986342944e-06, | |
| "loss": 0.2371, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.8142970382647546, | |
| "grad_norm": 3.64129638671875, | |
| "learning_rate": 1.954476479514416e-06, | |
| "loss": 0.2188, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.8157382719607984, | |
| "grad_norm": 5.807697772979736, | |
| "learning_rate": 1.9393019726858876e-06, | |
| "loss": 0.2417, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8171795056568423, | |
| "grad_norm": 3.478083610534668, | |
| "learning_rate": 1.92412746585736e-06, | |
| "loss": 0.2443, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.818620739352886, | |
| "grad_norm": 4.476086139678955, | |
| "learning_rate": 1.9089529590288317e-06, | |
| "loss": 0.2196, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8200619730489299, | |
| "grad_norm": 3.3738081455230713, | |
| "learning_rate": 1.8937784522003035e-06, | |
| "loss": 0.222, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.8215032067449737, | |
| "grad_norm": 6.0412163734436035, | |
| "learning_rate": 1.8786039453717756e-06, | |
| "loss": 0.2297, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8229444404410176, | |
| "grad_norm": 4.279974937438965, | |
| "learning_rate": 1.8634294385432474e-06, | |
| "loss": 0.2597, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.8243856741370613, | |
| "grad_norm": 7.096454620361328, | |
| "learning_rate": 1.8482549317147195e-06, | |
| "loss": 0.2466, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8258269078331051, | |
| "grad_norm": 2.768843412399292, | |
| "learning_rate": 1.8330804248861913e-06, | |
| "loss": 0.2363, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.827268141529149, | |
| "grad_norm": 4.103148460388184, | |
| "learning_rate": 1.8179059180576633e-06, | |
| "loss": 0.2451, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8287093752251927, | |
| "grad_norm": 6.537170886993408, | |
| "learning_rate": 1.8027314112291352e-06, | |
| "loss": 0.2394, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.8301506089212366, | |
| "grad_norm": 3.861488103866577, | |
| "learning_rate": 1.7875569044006072e-06, | |
| "loss": 0.2407, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.8315918426172804, | |
| "grad_norm": 4.997881889343262, | |
| "learning_rate": 1.7723823975720792e-06, | |
| "loss": 0.2369, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 0.8330330763133242, | |
| "grad_norm": 3.4017415046691895, | |
| "learning_rate": 1.757207890743551e-06, | |
| "loss": 0.2272, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.834474310009368, | |
| "grad_norm": 3.679633140563965, | |
| "learning_rate": 1.742033383915023e-06, | |
| "loss": 0.223, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.8359155437054119, | |
| "grad_norm": 3.0705554485321045, | |
| "learning_rate": 1.726858877086495e-06, | |
| "loss": 0.2364, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8359155437054119, | |
| "eval_loss": 0.22982758283615112, | |
| "eval_mse": 0.229827576600248, | |
| "eval_runtime": 3.5078, | |
| "eval_samples_per_second": 285.082, | |
| "eval_steps_per_second": 17.96, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8373567774014556, | |
| "grad_norm": 3.1811139583587646, | |
| "learning_rate": 1.7116843702579665e-06, | |
| "loss": 0.2422, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.8387980110974994, | |
| "grad_norm": 2.974726915359497, | |
| "learning_rate": 1.6965098634294386e-06, | |
| "loss": 0.2405, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8402392447935433, | |
| "grad_norm": 9.266916275024414, | |
| "learning_rate": 1.6813353566009106e-06, | |
| "loss": 0.2312, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 0.841680478489587, | |
| "grad_norm": 6.627812385559082, | |
| "learning_rate": 1.6661608497723824e-06, | |
| "loss": 0.2605, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8431217121856309, | |
| "grad_norm": 3.63108491897583, | |
| "learning_rate": 1.6509863429438545e-06, | |
| "loss": 0.197, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.8445629458816747, | |
| "grad_norm": 3.545898914337158, | |
| "learning_rate": 1.6358118361153263e-06, | |
| "loss": 0.231, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.8460041795777186, | |
| "grad_norm": 4.493469715118408, | |
| "learning_rate": 1.6206373292867984e-06, | |
| "loss": 0.2516, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 0.8474454132737623, | |
| "grad_norm": 3.4782025814056396, | |
| "learning_rate": 1.6054628224582702e-06, | |
| "loss": 0.2234, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.8488866469698062, | |
| "grad_norm": 3.453437566757202, | |
| "learning_rate": 1.5902883156297422e-06, | |
| "loss": 0.2201, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.85032788066585, | |
| "grad_norm": 3.0764260292053223, | |
| "learning_rate": 1.575113808801214e-06, | |
| "loss": 0.2397, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8517691143618937, | |
| "grad_norm": 4.054821014404297, | |
| "learning_rate": 1.559939301972686e-06, | |
| "loss": 0.2319, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.8532103480579376, | |
| "grad_norm": 4.305934429168701, | |
| "learning_rate": 1.544764795144158e-06, | |
| "loss": 0.257, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.8546515817539814, | |
| "grad_norm": 4.810431480407715, | |
| "learning_rate": 1.52959028831563e-06, | |
| "loss": 0.229, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 0.8560928154500252, | |
| "grad_norm": 3.385409116744995, | |
| "learning_rate": 1.514415781487102e-06, | |
| "loss": 0.2382, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.857534049146069, | |
| "grad_norm": 3.6940057277679443, | |
| "learning_rate": 1.4992412746585738e-06, | |
| "loss": 0.2206, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.8589752828421129, | |
| "grad_norm": 4.147489547729492, | |
| "learning_rate": 1.4840667678300459e-06, | |
| "loss": 0.2429, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.8604165165381567, | |
| "grad_norm": 2.8008570671081543, | |
| "learning_rate": 1.4688922610015175e-06, | |
| "loss": 0.229, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.8618577502342005, | |
| "grad_norm": 3.3151803016662598, | |
| "learning_rate": 1.4537177541729893e-06, | |
| "loss": 0.2556, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.8632989839302443, | |
| "grad_norm": 3.3559606075286865, | |
| "learning_rate": 1.4385432473444613e-06, | |
| "loss": 0.2048, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 0.864740217626288, | |
| "grad_norm": 7.827072620391846, | |
| "learning_rate": 1.4233687405159332e-06, | |
| "loss": 0.2236, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.864740217626288, | |
| "eval_loss": 0.21699398756027222, | |
| "eval_mse": 0.21699400277157838, | |
| "eval_runtime": 3.6477, | |
| "eval_samples_per_second": 274.144, | |
| "eval_steps_per_second": 17.271, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8661814513223319, | |
| "grad_norm": 4.992173194885254, | |
| "learning_rate": 1.4081942336874052e-06, | |
| "loss": 0.2542, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 0.8676226850183757, | |
| "grad_norm": 3.356741428375244, | |
| "learning_rate": 1.3930197268588772e-06, | |
| "loss": 0.2368, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.8690639187144196, | |
| "grad_norm": 5.88596773147583, | |
| "learning_rate": 1.377845220030349e-06, | |
| "loss": 0.2497, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.8705051524104633, | |
| "grad_norm": 4.098241806030273, | |
| "learning_rate": 1.3626707132018211e-06, | |
| "loss": 0.2639, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.8719463861065072, | |
| "grad_norm": 5.0098347663879395, | |
| "learning_rate": 1.347496206373293e-06, | |
| "loss": 0.2341, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.873387619802551, | |
| "grad_norm": 4.149919033050537, | |
| "learning_rate": 1.332321699544765e-06, | |
| "loss": 0.2148, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.8748288534985948, | |
| "grad_norm": 3.0215213298797607, | |
| "learning_rate": 1.3171471927162368e-06, | |
| "loss": 0.2426, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 0.8762700871946386, | |
| "grad_norm": 3.6104202270507812, | |
| "learning_rate": 1.3019726858877088e-06, | |
| "loss": 0.2096, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.8777113208906824, | |
| "grad_norm": 6.203906059265137, | |
| "learning_rate": 1.2867981790591807e-06, | |
| "loss": 0.2306, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.8791525545867263, | |
| "grad_norm": 9.710282325744629, | |
| "learning_rate": 1.2716236722306527e-06, | |
| "loss": 0.2262, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.88059378828277, | |
| "grad_norm": 6.790435791015625, | |
| "learning_rate": 1.2564491654021245e-06, | |
| "loss": 0.2329, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 0.8820350219788139, | |
| "grad_norm": 5.565480709075928, | |
| "learning_rate": 1.2412746585735964e-06, | |
| "loss": 0.2614, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.8834762556748577, | |
| "grad_norm": 3.470287322998047, | |
| "learning_rate": 1.2261001517450684e-06, | |
| "loss": 0.2437, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 0.8849174893709015, | |
| "grad_norm": 3.8940699100494385, | |
| "learning_rate": 1.2109256449165402e-06, | |
| "loss": 0.2458, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.8863587230669453, | |
| "grad_norm": 3.414523124694824, | |
| "learning_rate": 1.1957511380880123e-06, | |
| "loss": 0.2401, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.8877999567629892, | |
| "grad_norm": 3.306699752807617, | |
| "learning_rate": 1.1805766312594843e-06, | |
| "loss": 0.266, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.8892411904590329, | |
| "grad_norm": 5.706084251403809, | |
| "learning_rate": 1.1654021244309561e-06, | |
| "loss": 0.2496, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 0.8906824241550767, | |
| "grad_norm": 4.052499771118164, | |
| "learning_rate": 1.150227617602428e-06, | |
| "loss": 0.2488, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.8921236578511206, | |
| "grad_norm": 4.791928291320801, | |
| "learning_rate": 1.1350531107738998e-06, | |
| "loss": 0.2431, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 0.8935648915471643, | |
| "grad_norm": 2.6201539039611816, | |
| "learning_rate": 1.1198786039453718e-06, | |
| "loss": 0.231, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8935648915471643, | |
| "eval_loss": 0.22340208292007446, | |
| "eval_mse": 0.22340208877553233, | |
| "eval_runtime": 3.6046, | |
| "eval_samples_per_second": 277.421, | |
| "eval_steps_per_second": 17.477, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8950061252432082, | |
| "grad_norm": 3.4521875381469727, | |
| "learning_rate": 1.1047040971168439e-06, | |
| "loss": 0.2156, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.896447358939252, | |
| "grad_norm": 3.851257085800171, | |
| "learning_rate": 1.0895295902883157e-06, | |
| "loss": 0.222, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.8978885926352959, | |
| "grad_norm": 3.1783621311187744, | |
| "learning_rate": 1.0743550834597877e-06, | |
| "loss": 0.2571, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.8993298263313396, | |
| "grad_norm": 6.458141326904297, | |
| "learning_rate": 1.0591805766312596e-06, | |
| "loss": 0.2194, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.9007710600273834, | |
| "grad_norm": 6.427337169647217, | |
| "learning_rate": 1.0440060698027316e-06, | |
| "loss": 0.2468, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.9022122937234273, | |
| "grad_norm": 3.6548845767974854, | |
| "learning_rate": 1.0288315629742034e-06, | |
| "loss": 0.2646, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.903653527419471, | |
| "grad_norm": 10.87209415435791, | |
| "learning_rate": 1.0136570561456753e-06, | |
| "loss": 0.2473, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.9050947611155149, | |
| "grad_norm": 3.511836290359497, | |
| "learning_rate": 9.984825493171473e-07, | |
| "loss": 0.211, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.9065359948115587, | |
| "grad_norm": 6.754063129425049, | |
| "learning_rate": 9.833080424886191e-07, | |
| "loss": 0.2296, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 0.9079772285076025, | |
| "grad_norm": 3.369685173034668, | |
| "learning_rate": 9.681335356600912e-07, | |
| "loss": 0.2473, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.9094184622036463, | |
| "grad_norm": 3.534219741821289, | |
| "learning_rate": 9.529590288315631e-07, | |
| "loss": 0.2478, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 0.9108596958996902, | |
| "grad_norm": 2.9221699237823486, | |
| "learning_rate": 9.37784522003035e-07, | |
| "loss": 0.2464, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.912300929595734, | |
| "grad_norm": 3.83062481880188, | |
| "learning_rate": 9.22610015174507e-07, | |
| "loss": 0.2334, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.9137421632917777, | |
| "grad_norm": 2.8956944942474365, | |
| "learning_rate": 9.074355083459788e-07, | |
| "loss": 0.2328, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.9151833969878216, | |
| "grad_norm": 5.482194900512695, | |
| "learning_rate": 8.922610015174507e-07, | |
| "loss": 0.2301, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.9166246306838653, | |
| "grad_norm": 4.709765434265137, | |
| "learning_rate": 8.770864946889227e-07, | |
| "loss": 0.2486, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.9180658643799092, | |
| "grad_norm": 4.38163948059082, | |
| "learning_rate": 8.619119878603946e-07, | |
| "loss": 0.2463, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 0.919507098075953, | |
| "grad_norm": 8.512566566467285, | |
| "learning_rate": 8.467374810318665e-07, | |
| "loss": 0.2662, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.9209483317719969, | |
| "grad_norm": 4.098446369171143, | |
| "learning_rate": 8.315629742033385e-07, | |
| "loss": 0.2466, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.9223895654680406, | |
| "grad_norm": 3.8778812885284424, | |
| "learning_rate": 8.163884673748104e-07, | |
| "loss": 0.2474, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9223895654680406, | |
| "eval_loss": 0.22270123660564423, | |
| "eval_mse": 0.2227012378773652, | |
| "eval_runtime": 3.5199, | |
| "eval_samples_per_second": 284.101, | |
| "eval_steps_per_second": 17.898, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9238307991640845, | |
| "grad_norm": 4.652983665466309, | |
| "learning_rate": 8.012139605462823e-07, | |
| "loss": 0.2426, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 0.9252720328601283, | |
| "grad_norm": 2.9939351081848145, | |
| "learning_rate": 7.860394537177542e-07, | |
| "loss": 0.2258, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.926713266556172, | |
| "grad_norm": 3.162224531173706, | |
| "learning_rate": 7.708649468892261e-07, | |
| "loss": 0.2489, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 0.9281545002522159, | |
| "grad_norm": 6.1820807456970215, | |
| "learning_rate": 7.55690440060698e-07, | |
| "loss": 0.244, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9295957339482597, | |
| "grad_norm": 3.0030033588409424, | |
| "learning_rate": 7.4051593323217e-07, | |
| "loss": 0.2482, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.9310369676443035, | |
| "grad_norm": 2.886375904083252, | |
| "learning_rate": 7.253414264036419e-07, | |
| "loss": 0.2329, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.9324782013403473, | |
| "grad_norm": 4.034816741943359, | |
| "learning_rate": 7.101669195751138e-07, | |
| "loss": 0.2241, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 0.9339194350363912, | |
| "grad_norm": 3.043692111968994, | |
| "learning_rate": 6.949924127465859e-07, | |
| "loss": 0.2271, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.935360668732435, | |
| "grad_norm": 3.8499319553375244, | |
| "learning_rate": 6.798179059180578e-07, | |
| "loss": 0.237, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 0.9368019024284788, | |
| "grad_norm": 5.247629165649414, | |
| "learning_rate": 6.646433990895297e-07, | |
| "loss": 0.2495, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9382431361245226, | |
| "grad_norm": 2.750441312789917, | |
| "learning_rate": 6.494688922610016e-07, | |
| "loss": 0.2398, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.9396843698205664, | |
| "grad_norm": 3.8449652194976807, | |
| "learning_rate": 6.342943854324735e-07, | |
| "loss": 0.2387, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9411256035166102, | |
| "grad_norm": 3.128649950027466, | |
| "learning_rate": 6.191198786039454e-07, | |
| "loss": 0.2164, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 0.942566837212654, | |
| "grad_norm": 7.363992691040039, | |
| "learning_rate": 6.039453717754174e-07, | |
| "loss": 0.2351, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9440080709086979, | |
| "grad_norm": 4.601086616516113, | |
| "learning_rate": 5.887708649468893e-07, | |
| "loss": 0.2237, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.9454493046047416, | |
| "grad_norm": 5.33753776550293, | |
| "learning_rate": 5.735963581183612e-07, | |
| "loss": 0.2281, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.9468905383007855, | |
| "grad_norm": 3.417290449142456, | |
| "learning_rate": 5.584218512898331e-07, | |
| "loss": 0.2377, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.9483317719968293, | |
| "grad_norm": 3.4570603370666504, | |
| "learning_rate": 5.43247344461305e-07, | |
| "loss": 0.2203, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.9497730056928732, | |
| "grad_norm": 2.8994665145874023, | |
| "learning_rate": 5.28072837632777e-07, | |
| "loss": 0.2236, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 0.9512142393889169, | |
| "grad_norm": 3.222146511077881, | |
| "learning_rate": 5.12898330804249e-07, | |
| "loss": 0.2333, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9512142393889169, | |
| "eval_loss": 0.2240542769432068, | |
| "eval_mse": 0.22405428479570036, | |
| "eval_runtime": 3.5952, | |
| "eval_samples_per_second": 278.147, | |
| "eval_steps_per_second": 17.523, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9526554730849607, | |
| "grad_norm": 3.720475673675537, | |
| "learning_rate": 4.977238239757208e-07, | |
| "loss": 0.2587, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 0.9540967067810046, | |
| "grad_norm": 2.5176913738250732, | |
| "learning_rate": 4.825493171471927e-07, | |
| "loss": 0.2155, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.9555379404770483, | |
| "grad_norm": 4.59359884262085, | |
| "learning_rate": 4.673748103186647e-07, | |
| "loss": 0.2151, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.9569791741730922, | |
| "grad_norm": 5.00642728805542, | |
| "learning_rate": 4.5220030349013665e-07, | |
| "loss": 0.215, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.958420407869136, | |
| "grad_norm": 3.947004556655884, | |
| "learning_rate": 4.3702579666160853e-07, | |
| "loss": 0.2259, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.9598616415651798, | |
| "grad_norm": 7.238663196563721, | |
| "learning_rate": 4.2185128983308046e-07, | |
| "loss": 0.2444, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.9613028752612236, | |
| "grad_norm": 5.583510398864746, | |
| "learning_rate": 4.066767830045524e-07, | |
| "loss": 0.2388, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 0.9627441089572675, | |
| "grad_norm": 4.246332168579102, | |
| "learning_rate": 3.9150227617602433e-07, | |
| "loss": 0.2343, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.9641853426533112, | |
| "grad_norm": 2.9725794792175293, | |
| "learning_rate": 3.763277693474962e-07, | |
| "loss": 0.2356, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.965626576349355, | |
| "grad_norm": 3.7293708324432373, | |
| "learning_rate": 3.6115326251896814e-07, | |
| "loss": 0.2317, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.9670678100453989, | |
| "grad_norm": 3.179847002029419, | |
| "learning_rate": 3.459787556904401e-07, | |
| "loss": 0.1998, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 0.9685090437414426, | |
| "grad_norm": 7.184839725494385, | |
| "learning_rate": 3.3080424886191206e-07, | |
| "loss": 0.2042, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.9699502774374865, | |
| "grad_norm": 3.559626817703247, | |
| "learning_rate": 3.156297420333839e-07, | |
| "loss": 0.2339, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 0.9713915111335303, | |
| "grad_norm": 8.27930736541748, | |
| "learning_rate": 3.004552352048559e-07, | |
| "loss": 0.2266, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.9728327448295742, | |
| "grad_norm": 9.843157768249512, | |
| "learning_rate": 2.852807283763278e-07, | |
| "loss": 0.2377, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.9742739785256179, | |
| "grad_norm": 4.575967311859131, | |
| "learning_rate": 2.701062215477997e-07, | |
| "loss": 0.229, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.9757152122216617, | |
| "grad_norm": 6.101067543029785, | |
| "learning_rate": 2.549317147192716e-07, | |
| "loss": 0.237, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 0.9771564459177056, | |
| "grad_norm": 5.833640098571777, | |
| "learning_rate": 2.3975720789074356e-07, | |
| "loss": 0.2441, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.9785976796137493, | |
| "grad_norm": 6.027054309844971, | |
| "learning_rate": 2.245827010622155e-07, | |
| "loss": 0.2194, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 0.9800389133097932, | |
| "grad_norm": 4.420630931854248, | |
| "learning_rate": 2.0940819423368745e-07, | |
| "loss": 0.2265, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9800389133097932, | |
| "eval_loss": 0.21972429752349854, | |
| "eval_mse": 0.2197243231460452, | |
| "eval_runtime": 3.6053, | |
| "eval_samples_per_second": 277.37, | |
| "eval_steps_per_second": 17.474, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.981480147005837, | |
| "grad_norm": 5.711348056793213, | |
| "learning_rate": 1.9423368740515936e-07, | |
| "loss": 0.2362, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.9829213807018808, | |
| "grad_norm": 4.06050968170166, | |
| "learning_rate": 1.790591805766313e-07, | |
| "loss": 0.2623, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.9843626143979246, | |
| "grad_norm": 3.5431113243103027, | |
| "learning_rate": 1.638846737481032e-07, | |
| "loss": 0.241, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 0.9858038480939685, | |
| "grad_norm": 7.887821674346924, | |
| "learning_rate": 1.4871016691957513e-07, | |
| "loss": 0.2662, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.9872450817900122, | |
| "grad_norm": 3.6304845809936523, | |
| "learning_rate": 1.3353566009104704e-07, | |
| "loss": 0.2154, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.988686315486056, | |
| "grad_norm": 2.7374932765960693, | |
| "learning_rate": 1.1836115326251897e-07, | |
| "loss": 0.2107, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.9901275491820999, | |
| "grad_norm": 3.0526680946350098, | |
| "learning_rate": 1.031866464339909e-07, | |
| "loss": 0.2426, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.9915687828781437, | |
| "grad_norm": 4.16213321685791, | |
| "learning_rate": 8.801213960546283e-08, | |
| "loss": 0.2483, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.9930100165741875, | |
| "grad_norm": 2.5456955432891846, | |
| "learning_rate": 7.283763277693476e-08, | |
| "loss": 0.199, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 0.9944512502702313, | |
| "grad_norm": 3.8972580432891846, | |
| "learning_rate": 5.7663125948406686e-08, | |
| "loss": 0.2504, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.9958924839662752, | |
| "grad_norm": 4.49379301071167, | |
| "learning_rate": 4.248861911987861e-08, | |
| "loss": 0.2226, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 0.9973337176623189, | |
| "grad_norm": 3.561121702194214, | |
| "learning_rate": 2.7314112291350533e-08, | |
| "loss": 0.2555, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.9987749513583628, | |
| "grad_norm": 4.0361647605896, | |
| "learning_rate": 1.213960546282246e-08, | |
| "loss": 0.2121, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.9999279383151978, | |
| "step": 3469, | |
| "total_flos": 5.881871499303322e+16, | |
| "train_loss": 0.2843814373497456, | |
| "train_runtime": 1907.7343, | |
| "train_samples_per_second": 232.764, | |
| "train_steps_per_second": 1.818 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3469, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.881871499303322e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |