| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 35.5, |
| "eval_steps": 100, |
| "global_step": 142000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.025, |
| "grad_norm": 0.7927406430244446, |
| "learning_rate": 5.82e-05, |
| "loss": 203.8328, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.901040256023407, |
| "learning_rate": 0.0001182, |
| "loss": 181.6551, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.14473982155323029, |
| "learning_rate": 0.00017819999999999997, |
| "loss": 174.6394, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.13423211872577667, |
| "learning_rate": 0.0002382, |
| "loss": 171.818, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.13924159109592438, |
| "learning_rate": 0.0002982, |
| "loss": 168.1486, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.11850500851869583, |
| "learning_rate": 0.000299996362272642, |
| "loss": 162.8829, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.15106040239334106, |
| "learning_rate": 0.0002999926120382524, |
| "loss": 158.5516, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.11745048314332962, |
| "learning_rate": 0.0002999888618038627, |
| "loss": 154.1395, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.22588345408439636, |
| "learning_rate": 0.00029998511156947307, |
| "loss": 150.8583, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.1475830227136612, |
| "learning_rate": 0.0002999813613350834, |
| "loss": 148.7021, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.14757394790649414, |
| "learning_rate": 0.00029997761110069375, |
| "loss": 145.111, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.13360479474067688, |
| "learning_rate": 0.0002999738608663041, |
| "loss": 142.679, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.11122659593820572, |
| "learning_rate": 0.0002999701106319145, |
| "loss": 140.4614, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.10133378952741623, |
| "learning_rate": 0.0002999663603975248, |
| "loss": 137.84, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.12196547538042068, |
| "learning_rate": 0.00029996261016313516, |
| "loss": 136.1062, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.09694620966911316, |
| "learning_rate": 0.0002999588599287455, |
| "loss": 134.5708, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 0.14449502527713776, |
| "learning_rate": 0.0002999551096943559, |
| "loss": 131.672, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.10163229703903198, |
| "learning_rate": 0.0002999513594599662, |
| "loss": 128.9171, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 0.09789746254682541, |
| "learning_rate": 0.00029994760922557657, |
| "loss": 127.3757, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.0996888279914856, |
| "learning_rate": 0.00029994385899118693, |
| "loss": 124.4876, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 0.08484259247779846, |
| "learning_rate": 0.0002999401087567973, |
| "loss": 122.1805, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.11729967594146729, |
| "learning_rate": 0.0002999363585224076, |
| "loss": 117.8535, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 0.1445324867963791, |
| "learning_rate": 0.000299932608288018, |
| "loss": 116.244, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.11317744106054306, |
| "learning_rate": 0.0002999288580536283, |
| "loss": 113.5543, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.09375651925802231, |
| "learning_rate": 0.0002999251078192387, |
| "loss": 110.8541, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.0896710455417633, |
| "learning_rate": 0.000299921357584849, |
| "loss": 110.1387, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.675, |
| "grad_norm": 0.09820675849914551, |
| "learning_rate": 0.0002999176073504594, |
| "loss": 107.0062, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.09842734783887863, |
| "learning_rate": 0.0002999138571160697, |
| "loss": 105.1786, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.725, |
| "grad_norm": 0.09370853751897812, |
| "learning_rate": 0.00029991010688168007, |
| "loss": 103.8245, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.12121213972568512, |
| "learning_rate": 0.00029990635664729043, |
| "loss": 101.6897, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.775, |
| "grad_norm": 0.09974240511655807, |
| "learning_rate": 0.0002999026064129008, |
| "loss": 100.3376, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.09277965873479843, |
| "learning_rate": 0.0002998988561785111, |
| "loss": 99.2098, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.825, |
| "grad_norm": 0.12521271407604218, |
| "learning_rate": 0.0002998951059441215, |
| "loss": 98.4138, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.1051282286643982, |
| "learning_rate": 0.00029989135570973184, |
| "loss": 99.5873, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.13997547328472137, |
| "learning_rate": 0.0002998876054753422, |
| "loss": 97.4617, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.1003558561205864, |
| "learning_rate": 0.0002998838552409525, |
| "loss": 96.093, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.925, |
| "grad_norm": 0.09967362880706787, |
| "learning_rate": 0.0002998801050065629, |
| "loss": 93.6796, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.13389019668102264, |
| "learning_rate": 0.00029987635477217325, |
| "loss": 92.9668, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.975, |
| "grad_norm": 0.10552455484867096, |
| "learning_rate": 0.0002998726045377836, |
| "loss": 91.9125, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.10877016931772232, |
| "learning_rate": 0.00029986885430339393, |
| "loss": 91.2492, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.025, |
| "grad_norm": 0.09188541024923325, |
| "learning_rate": 0.0002998651040690043, |
| "loss": 88.3832, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.10517989099025726, |
| "learning_rate": 0.0002998613538346146, |
| "loss": 87.4386, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.075, |
| "grad_norm": 0.08605173230171204, |
| "learning_rate": 0.000299857603600225, |
| "loss": 86.7098, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.13910797238349915, |
| "learning_rate": 0.00029985385336583534, |
| "loss": 85.1566, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 0.08505425602197647, |
| "learning_rate": 0.00029985010313144565, |
| "loss": 86.1376, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.10330720990896225, |
| "learning_rate": 0.000299846352897056, |
| "loss": 84.9761, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.175, |
| "grad_norm": 0.1150883138179779, |
| "learning_rate": 0.0002998426026626664, |
| "loss": 83.4733, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.08464270830154419, |
| "learning_rate": 0.00029983885242827675, |
| "loss": 84.0231, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.225, |
| "grad_norm": 0.11479545384645462, |
| "learning_rate": 0.00029983510219388707, |
| "loss": 82.2074, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.10978193581104279, |
| "learning_rate": 0.00029983135195949743, |
| "loss": 81.2586, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.275, |
| "grad_norm": 0.10087323933839798, |
| "learning_rate": 0.0002998276017251078, |
| "loss": 80.0028, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.0992458313703537, |
| "learning_rate": 0.00029982385149071816, |
| "loss": 81.4542, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.325, |
| "grad_norm": 0.08898110687732697, |
| "learning_rate": 0.0002998201012563285, |
| "loss": 80.3485, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.11424868553876877, |
| "learning_rate": 0.00029981635102193884, |
| "loss": 79.4734, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 0.09483993798494339, |
| "learning_rate": 0.0002998126007875492, |
| "loss": 78.8044, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.08650317788124084, |
| "learning_rate": 0.0002998088505531596, |
| "loss": 78.476, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.425, |
| "grad_norm": 0.08040408045053482, |
| "learning_rate": 0.0002998051003187699, |
| "loss": 77.8633, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.08953177183866501, |
| "learning_rate": 0.00029980135008438025, |
| "loss": 76.5257, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.475, |
| "grad_norm": 0.10908912867307663, |
| "learning_rate": 0.00029979759984999056, |
| "loss": 76.2689, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.12598766386508942, |
| "learning_rate": 0.00029979384961560093, |
| "loss": 76.7776, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.525, |
| "grad_norm": 0.0955086201429367, |
| "learning_rate": 0.0002997900993812113, |
| "loss": 76.5905, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.08597240597009659, |
| "learning_rate": 0.00029978634914682166, |
| "loss": 74.2009, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.575, |
| "grad_norm": 0.08754386007785797, |
| "learning_rate": 0.000299782598912432, |
| "loss": 74.1175, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.12214329093694687, |
| "learning_rate": 0.00029977884867804234, |
| "loss": 73.2265, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 0.08221092820167542, |
| "learning_rate": 0.0002997750984436527, |
| "loss": 72.1494, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.1369631290435791, |
| "learning_rate": 0.0002997713482092631, |
| "loss": 73.5853, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.675, |
| "grad_norm": 0.0787581205368042, |
| "learning_rate": 0.0002997675979748734, |
| "loss": 72.0935, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.07737889885902405, |
| "learning_rate": 0.00029976384774048375, |
| "loss": 71.3515, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.725, |
| "grad_norm": 0.11298476159572601, |
| "learning_rate": 0.0002997600975060941, |
| "loss": 71.5356, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.07955294102430344, |
| "learning_rate": 0.0002997563472717045, |
| "loss": 71.9312, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.775, |
| "grad_norm": 0.11449731886386871, |
| "learning_rate": 0.0002997525970373148, |
| "loss": 70.1805, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.07159914076328278, |
| "learning_rate": 0.00029974884680292516, |
| "loss": 70.1074, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.825, |
| "grad_norm": 0.07785623520612717, |
| "learning_rate": 0.00029974509656853553, |
| "loss": 70.5433, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.0750761404633522, |
| "learning_rate": 0.0002997413463341459, |
| "loss": 68.6654, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.0909292995929718, |
| "learning_rate": 0.0002997375960997562, |
| "loss": 69.5312, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.1320108026266098, |
| "learning_rate": 0.00029973384586536657, |
| "loss": 67.3222, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.925, |
| "grad_norm": 0.12221457809209824, |
| "learning_rate": 0.0002997300956309769, |
| "loss": 66.3137, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.11239924281835556, |
| "learning_rate": 0.00029972634539658725, |
| "loss": 67.8054, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.975, |
| "grad_norm": 0.0858956053853035, |
| "learning_rate": 0.0002997225951621976, |
| "loss": 67.9956, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.10778280347585678, |
| "learning_rate": 0.000299718844927808, |
| "loss": 66.5141, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.025, |
| "grad_norm": 0.10166219621896744, |
| "learning_rate": 0.0002997150946934183, |
| "loss": 65.9891, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 0.09062575548887253, |
| "learning_rate": 0.00029971134445902866, |
| "loss": 67.5705, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.075, |
| "grad_norm": 0.0936209186911583, |
| "learning_rate": 0.000299707594224639, |
| "loss": 65.6743, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 0.08781470358371735, |
| "learning_rate": 0.00029970384399024934, |
| "loss": 66.3408, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.125, |
| "grad_norm": 0.18813404440879822, |
| "learning_rate": 0.0002997000937558597, |
| "loss": 65.7238, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 0.09089367091655731, |
| "learning_rate": 0.00029969634352147007, |
| "loss": 64.8326, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.175, |
| "grad_norm": 0.09775424748659134, |
| "learning_rate": 0.00029969259328708044, |
| "loss": 64.9571, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.07110758870840073, |
| "learning_rate": 0.00029968888055503464, |
| "loss": 64.1227, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.225, |
| "grad_norm": 0.08944450318813324, |
| "learning_rate": 0.000299685130320645, |
| "loss": 63.0563, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.0880662053823471, |
| "learning_rate": 0.0002996813800862554, |
| "loss": 63.5158, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.275, |
| "grad_norm": 0.08363056182861328, |
| "learning_rate": 0.00029967762985186574, |
| "loss": 63.1458, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 0.0970577672123909, |
| "learning_rate": 0.00029967387961747605, |
| "loss": 63.6672, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.325, |
| "grad_norm": 0.07709024846553802, |
| "learning_rate": 0.0002996701293830864, |
| "loss": 62.5691, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 0.09662684798240662, |
| "learning_rate": 0.00029966637914869673, |
| "loss": 63.201, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.375, |
| "grad_norm": 0.09886329621076584, |
| "learning_rate": 0.0002996626289143071, |
| "loss": 61.905, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.09152296930551529, |
| "learning_rate": 0.00029965887867991746, |
| "loss": 62.0162, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.425, |
| "grad_norm": 0.08669120818376541, |
| "learning_rate": 0.00029965512844552783, |
| "loss": 61.177, |
| "step": 9700 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 0.08084509521722794, |
| "learning_rate": 0.00029965137821113814, |
| "loss": 60.4171, |
| "step": 9800 |
| }, |
| { |
| "epoch": 2.475, |
| "grad_norm": 0.07486914098262787, |
| "learning_rate": 0.0002996476279767485, |
| "loss": 60.7016, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.09742671251296997, |
| "learning_rate": 0.0002996438777423589, |
| "loss": 60.1792, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.525, |
| "grad_norm": 0.0987100750207901, |
| "learning_rate": 0.00029964012750796924, |
| "loss": 61.4537, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 0.06886423379182816, |
| "learning_rate": 0.00029963637727357955, |
| "loss": 61.8643, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.575, |
| "grad_norm": 0.082525834441185, |
| "learning_rate": 0.0002996326270391899, |
| "loss": 60.4919, |
| "step": 10300 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.08272566646337509, |
| "learning_rate": 0.0002996288768048003, |
| "loss": 60.0661, |
| "step": 10400 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 0.09038376808166504, |
| "learning_rate": 0.00029962512657041065, |
| "loss": 60.936, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 0.07726665586233139, |
| "learning_rate": 0.00029962137633602096, |
| "loss": 59.5663, |
| "step": 10600 |
| }, |
| { |
| "epoch": 2.675, |
| "grad_norm": 0.07424433529376984, |
| "learning_rate": 0.00029961762610163133, |
| "loss": 59.158, |
| "step": 10700 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 0.07766600698232651, |
| "learning_rate": 0.0002996138758672417, |
| "loss": 60.6268, |
| "step": 10800 |
| }, |
| { |
| "epoch": 2.725, |
| "grad_norm": 0.06614714115858078, |
| "learning_rate": 0.00029961012563285206, |
| "loss": 59.6028, |
| "step": 10900 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.10867344588041306, |
| "learning_rate": 0.0002996063753984624, |
| "loss": 58.8979, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.775, |
| "grad_norm": 0.08278031647205353, |
| "learning_rate": 0.00029960262516407274, |
| "loss": 58.4585, |
| "step": 11100 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.0777415856719017, |
| "learning_rate": 0.00029959887492968305, |
| "loss": 58.2955, |
| "step": 11200 |
| }, |
| { |
| "epoch": 2.825, |
| "grad_norm": 0.08938944339752197, |
| "learning_rate": 0.0002995951246952934, |
| "loss": 58.4243, |
| "step": 11300 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 0.07335088402032852, |
| "learning_rate": 0.0002995913744609038, |
| "loss": 58.3433, |
| "step": 11400 |
| }, |
| { |
| "epoch": 2.875, |
| "grad_norm": 0.08737402409315109, |
| "learning_rate": 0.00029958762422651415, |
| "loss": 58.083, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.08511873334646225, |
| "learning_rate": 0.00029958387399212446, |
| "loss": 57.179, |
| "step": 11600 |
| }, |
| { |
| "epoch": 2.925, |
| "grad_norm": 0.10887938737869263, |
| "learning_rate": 0.00029958012375773483, |
| "loss": 56.4871, |
| "step": 11700 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 0.06436943262815475, |
| "learning_rate": 0.0002995763735233452, |
| "loss": 56.647, |
| "step": 11800 |
| }, |
| { |
| "epoch": 2.975, |
| "grad_norm": 0.0767776370048523, |
| "learning_rate": 0.00029957262328895556, |
| "loss": 56.8327, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.07136838138103485, |
| "learning_rate": 0.0002995688730545659, |
| "loss": 56.1021, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.025, |
| "grad_norm": 0.07126389443874359, |
| "learning_rate": 0.00029956512282017624, |
| "loss": 54.9375, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.05, |
| "grad_norm": 0.08064913004636765, |
| "learning_rate": 0.0002995613725857866, |
| "loss": 55.8513, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.075, |
| "grad_norm": 0.09110742062330246, |
| "learning_rate": 0.0002995576223513969, |
| "loss": 55.3327, |
| "step": 12300 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 0.0769059956073761, |
| "learning_rate": 0.0002995538721170073, |
| "loss": 54.0639, |
| "step": 12400 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 0.06642630696296692, |
| "learning_rate": 0.0002995501218826176, |
| "loss": 53.5245, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.15, |
| "grad_norm": 0.07648100703954697, |
| "learning_rate": 0.000299546371648228, |
| "loss": 53.7525, |
| "step": 12600 |
| }, |
| { |
| "epoch": 3.175, |
| "grad_norm": 0.07088977843523026, |
| "learning_rate": 0.00029954262141383833, |
| "loss": 52.302, |
| "step": 12700 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.07282839715480804, |
| "learning_rate": 0.0002995388711794487, |
| "loss": 52.6612, |
| "step": 12800 |
| }, |
| { |
| "epoch": 3.225, |
| "grad_norm": 0.07733161747455597, |
| "learning_rate": 0.000299535120945059, |
| "loss": 51.6131, |
| "step": 12900 |
| }, |
| { |
| "epoch": 3.25, |
| "grad_norm": 0.06774196773767471, |
| "learning_rate": 0.00029953137071066937, |
| "loss": 51.9959, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.275, |
| "grad_norm": 0.08115985989570618, |
| "learning_rate": 0.00029952762047627974, |
| "loss": 49.8227, |
| "step": 13100 |
| }, |
| { |
| "epoch": 3.3, |
| "grad_norm": 0.0886857658624649, |
| "learning_rate": 0.0002995238702418901, |
| "loss": 50.5718, |
| "step": 13200 |
| }, |
| { |
| "epoch": 3.325, |
| "grad_norm": 0.07071532309055328, |
| "learning_rate": 0.0002995201200075004, |
| "loss": 51.6469, |
| "step": 13300 |
| }, |
| { |
| "epoch": 3.35, |
| "grad_norm": 0.09553579241037369, |
| "learning_rate": 0.0002995163697731108, |
| "loss": 50.2462, |
| "step": 13400 |
| }, |
| { |
| "epoch": 3.375, |
| "grad_norm": 0.07065360993146896, |
| "learning_rate": 0.00029951261953872115, |
| "loss": 49.4932, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.07770080119371414, |
| "learning_rate": 0.0002995088693043315, |
| "loss": 49.8068, |
| "step": 13600 |
| }, |
| { |
| "epoch": 3.425, |
| "grad_norm": 0.08060113340616226, |
| "learning_rate": 0.0002995051190699418, |
| "loss": 48.4129, |
| "step": 13700 |
| }, |
| { |
| "epoch": 3.45, |
| "grad_norm": 0.07022694498300552, |
| "learning_rate": 0.0002995013688355522, |
| "loss": 48.5766, |
| "step": 13800 |
| }, |
| { |
| "epoch": 3.475, |
| "grad_norm": 0.08857674151659012, |
| "learning_rate": 0.00029949761860116256, |
| "loss": 47.6903, |
| "step": 13900 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.069500632584095, |
| "learning_rate": 0.0002994938683667729, |
| "loss": 48.2677, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.525, |
| "grad_norm": 0.08871123939752579, |
| "learning_rate": 0.00029949011813238324, |
| "loss": 46.9917, |
| "step": 14100 |
| }, |
| { |
| "epoch": 3.55, |
| "grad_norm": 0.08282507210969925, |
| "learning_rate": 0.0002994863678979936, |
| "loss": 47.6174, |
| "step": 14200 |
| }, |
| { |
| "epoch": 3.575, |
| "grad_norm": 0.07892107963562012, |
| "learning_rate": 0.0002994826176636039, |
| "loss": 47.7429, |
| "step": 14300 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.08358065783977509, |
| "learning_rate": 0.00029947886742921434, |
| "loss": 46.8444, |
| "step": 14400 |
| }, |
| { |
| "epoch": 3.625, |
| "grad_norm": 0.08042451739311218, |
| "learning_rate": 0.00029947511719482465, |
| "loss": 47.1196, |
| "step": 14500 |
| }, |
| { |
| "epoch": 3.65, |
| "grad_norm": 0.07715913653373718, |
| "learning_rate": 0.000299471366960435, |
| "loss": 46.1787, |
| "step": 14600 |
| }, |
| { |
| "epoch": 3.675, |
| "grad_norm": 0.07201175391674042, |
| "learning_rate": 0.0002994676167260453, |
| "loss": 44.82, |
| "step": 14700 |
| }, |
| { |
| "epoch": 3.7, |
| "grad_norm": 0.07503117620944977, |
| "learning_rate": 0.0002994638664916557, |
| "loss": 45.3985, |
| "step": 14800 |
| }, |
| { |
| "epoch": 3.725, |
| "grad_norm": 0.08126576244831085, |
| "learning_rate": 0.00029946011625726606, |
| "loss": 44.4742, |
| "step": 14900 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 0.07859744131565094, |
| "learning_rate": 0.0002994563660228764, |
| "loss": 44.9098, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.775, |
| "grad_norm": 0.09183020889759064, |
| "learning_rate": 0.00029945261578848674, |
| "loss": 44.9649, |
| "step": 15100 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.07173748314380646, |
| "learning_rate": 0.0002994488655540971, |
| "loss": 44.2067, |
| "step": 15200 |
| }, |
| { |
| "epoch": 3.825, |
| "grad_norm": 0.07911107689142227, |
| "learning_rate": 0.00029944511531970747, |
| "loss": 43.3721, |
| "step": 15300 |
| }, |
| { |
| "epoch": 3.85, |
| "grad_norm": 0.0707039088010788, |
| "learning_rate": 0.00029944136508531783, |
| "loss": 43.5256, |
| "step": 15400 |
| }, |
| { |
| "epoch": 3.875, |
| "grad_norm": 0.08927769958972931, |
| "learning_rate": 0.00029943761485092815, |
| "loss": 42.8865, |
| "step": 15500 |
| }, |
| { |
| "epoch": 3.9, |
| "grad_norm": 0.0942542776465416, |
| "learning_rate": 0.0002994338646165385, |
| "loss": 43.4099, |
| "step": 15600 |
| }, |
| { |
| "epoch": 3.925, |
| "grad_norm": 0.07037200033664703, |
| "learning_rate": 0.0002994301143821489, |
| "loss": 43.2838, |
| "step": 15700 |
| }, |
| { |
| "epoch": 3.95, |
| "grad_norm": 0.07836440950632095, |
| "learning_rate": 0.00029942636414775924, |
| "loss": 42.6156, |
| "step": 15800 |
| }, |
| { |
| "epoch": 3.975, |
| "grad_norm": 0.1048571839928627, |
| "learning_rate": 0.00029942261391336956, |
| "loss": 41.1921, |
| "step": 15900 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.07439113408327103, |
| "learning_rate": 0.0002994188636789799, |
| "loss": 40.3632, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.025, |
| "grad_norm": 0.07776340842247009, |
| "learning_rate": 0.00029941511344459023, |
| "loss": 41.4027, |
| "step": 16100 |
| }, |
| { |
| "epoch": 4.05, |
| "grad_norm": 0.08847617357969284, |
| "learning_rate": 0.0002994113632102006, |
| "loss": 39.8482, |
| "step": 16200 |
| }, |
| { |
| "epoch": 4.075, |
| "grad_norm": 0.07630669325590134, |
| "learning_rate": 0.00029940761297581097, |
| "loss": 39.8514, |
| "step": 16300 |
| }, |
| { |
| "epoch": 4.1, |
| "grad_norm": 0.09090664237737656, |
| "learning_rate": 0.0002994038627414213, |
| "loss": 39.827, |
| "step": 16400 |
| }, |
| { |
| "epoch": 4.125, |
| "grad_norm": 0.07954572886228561, |
| "learning_rate": 0.00029940011250703164, |
| "loss": 39.1342, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.15, |
| "grad_norm": 0.09102310240268707, |
| "learning_rate": 0.000299396362272642, |
| "loss": 39.2371, |
| "step": 16600 |
| }, |
| { |
| "epoch": 4.175, |
| "grad_norm": 0.08122776448726654, |
| "learning_rate": 0.0002993926120382524, |
| "loss": 38.2627, |
| "step": 16700 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 0.0793018564581871, |
| "learning_rate": 0.0002993888618038627, |
| "loss": 37.7778, |
| "step": 16800 |
| }, |
| { |
| "epoch": 4.225, |
| "grad_norm": 0.08967263251543045, |
| "learning_rate": 0.00029938511156947306, |
| "loss": 37.3333, |
| "step": 16900 |
| }, |
| { |
| "epoch": 4.25, |
| "grad_norm": 0.08178253471851349, |
| "learning_rate": 0.0002993813613350834, |
| "loss": 37.0271, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.275, |
| "grad_norm": 0.07139851152896881, |
| "learning_rate": 0.0002993776111006938, |
| "loss": 36.2547, |
| "step": 17100 |
| }, |
| { |
| "epoch": 4.3, |
| "grad_norm": 0.0816299095749855, |
| "learning_rate": 0.0002993738608663041, |
| "loss": 35.7427, |
| "step": 17200 |
| }, |
| { |
| "epoch": 4.325, |
| "grad_norm": 0.08794036507606506, |
| "learning_rate": 0.00029937011063191447, |
| "loss": 36.1878, |
| "step": 17300 |
| }, |
| { |
| "epoch": 4.35, |
| "grad_norm": 0.07489024847745895, |
| "learning_rate": 0.00029936636039752483, |
| "loss": 35.8839, |
| "step": 17400 |
| }, |
| { |
| "epoch": 4.375, |
| "grad_norm": 0.07704652100801468, |
| "learning_rate": 0.0002993626101631352, |
| "loss": 34.6569, |
| "step": 17500 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.08644381910562515, |
| "learning_rate": 0.0002993588974310894, |
| "loss": 36.0711, |
| "step": 17600 |
| }, |
| { |
| "epoch": 4.425, |
| "grad_norm": 0.0718245580792427, |
| "learning_rate": 0.00029935514719669977, |
| "loss": 34.2787, |
| "step": 17700 |
| }, |
| { |
| "epoch": 4.45, |
| "grad_norm": 0.06881660968065262, |
| "learning_rate": 0.0002993513969623101, |
| "loss": 34.3262, |
| "step": 17800 |
| }, |
| { |
| "epoch": 4.475, |
| "grad_norm": 0.09241487085819244, |
| "learning_rate": 0.00029934764672792045, |
| "loss": 32.8671, |
| "step": 17900 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.10901615768671036, |
| "learning_rate": 0.0002993438964935308, |
| "loss": 32.8513, |
| "step": 18000 |
| }, |
| { |
| "epoch": 4.525, |
| "grad_norm": 0.10043422877788544, |
| "learning_rate": 0.0002993401462591412, |
| "loss": 33.2156, |
| "step": 18100 |
| }, |
| { |
| "epoch": 4.55, |
| "grad_norm": 0.0931539386510849, |
| "learning_rate": 0.0002993363960247515, |
| "loss": 32.9817, |
| "step": 18200 |
| }, |
| { |
| "epoch": 4.575, |
| "grad_norm": 0.07910791784524918, |
| "learning_rate": 0.00029933264579036186, |
| "loss": 32.266, |
| "step": 18300 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 0.07403460144996643, |
| "learning_rate": 0.0002993288955559722, |
| "loss": 32.3611, |
| "step": 18400 |
| }, |
| { |
| "epoch": 4.625, |
| "grad_norm": 0.0901438444852829, |
| "learning_rate": 0.0002993251453215826, |
| "loss": 31.6647, |
| "step": 18500 |
| }, |
| { |
| "epoch": 4.65, |
| "grad_norm": 0.08572247624397278, |
| "learning_rate": 0.0002993213950871929, |
| "loss": 31.4374, |
| "step": 18600 |
| }, |
| { |
| "epoch": 4.675, |
| "grad_norm": 0.10135528445243835, |
| "learning_rate": 0.00029931764485280327, |
| "loss": 30.899, |
| "step": 18700 |
| }, |
| { |
| "epoch": 4.7, |
| "grad_norm": 0.07215873152017593, |
| "learning_rate": 0.00029931389461841364, |
| "loss": 30.9789, |
| "step": 18800 |
| }, |
| { |
| "epoch": 4.725, |
| "grad_norm": 0.08922874182462692, |
| "learning_rate": 0.000299310144384024, |
| "loss": 30.7143, |
| "step": 18900 |
| }, |
| { |
| "epoch": 4.75, |
| "grad_norm": 0.08180548250675201, |
| "learning_rate": 0.0002993063941496343, |
| "loss": 30.1035, |
| "step": 19000 |
| }, |
| { |
| "epoch": 4.775, |
| "grad_norm": 0.07757364213466644, |
| "learning_rate": 0.0002993026439152447, |
| "loss": 29.8003, |
| "step": 19100 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.09399455040693283, |
| "learning_rate": 0.00029929889368085505, |
| "loss": 29.8595, |
| "step": 19200 |
| }, |
| { |
| "epoch": 4.825, |
| "grad_norm": 0.08426772803068161, |
| "learning_rate": 0.0002992951434464654, |
| "loss": 29.8153, |
| "step": 19300 |
| }, |
| { |
| "epoch": 4.85, |
| "grad_norm": 0.08488670736551285, |
| "learning_rate": 0.0002992913932120757, |
| "loss": 29.5577, |
| "step": 19400 |
| }, |
| { |
| "epoch": 4.875, |
| "grad_norm": 0.06904991716146469, |
| "learning_rate": 0.0002992876429776861, |
| "loss": 28.5755, |
| "step": 19500 |
| }, |
| { |
| "epoch": 4.9, |
| "grad_norm": 0.11179706454277039, |
| "learning_rate": 0.0002992838927432964, |
| "loss": 28.8428, |
| "step": 19600 |
| }, |
| { |
| "epoch": 4.925, |
| "grad_norm": 0.0724404975771904, |
| "learning_rate": 0.00029928014250890677, |
| "loss": 28.2313, |
| "step": 19700 |
| }, |
| { |
| "epoch": 4.95, |
| "grad_norm": 0.08049552142620087, |
| "learning_rate": 0.00029927639227451714, |
| "loss": 27.1596, |
| "step": 19800 |
| }, |
| { |
| "epoch": 4.975, |
| "grad_norm": 0.07410436868667603, |
| "learning_rate": 0.0002992726420401275, |
| "loss": 26.9374, |
| "step": 19900 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.0729108527302742, |
| "learning_rate": 0.0002992688918057378, |
| "loss": 27.3767, |
| "step": 20000 |
| }, |
| { |
| "epoch": 5.025, |
| "grad_norm": 0.0834740698337555, |
| "learning_rate": 0.0002992651790736921, |
| "loss": 26.5892, |
| "step": 20100 |
| }, |
| { |
| "epoch": 5.05, |
| "grad_norm": 0.07734266668558121, |
| "learning_rate": 0.00029926142883930244, |
| "loss": 26.4578, |
| "step": 20200 |
| }, |
| { |
| "epoch": 5.075, |
| "grad_norm": 0.07236121594905853, |
| "learning_rate": 0.00029925767860491275, |
| "loss": 27.4309, |
| "step": 20300 |
| }, |
| { |
| "epoch": 5.1, |
| "grad_norm": 0.07896186411380768, |
| "learning_rate": 0.0002992539283705231, |
| "loss": 26.7645, |
| "step": 20400 |
| }, |
| { |
| "epoch": 5.125, |
| "grad_norm": 0.09544118493795395, |
| "learning_rate": 0.0002992501781361335, |
| "loss": 26.149, |
| "step": 20500 |
| }, |
| { |
| "epoch": 5.15, |
| "grad_norm": 0.07782524079084396, |
| "learning_rate": 0.00029924642790174385, |
| "loss": 25.7688, |
| "step": 20600 |
| }, |
| { |
| "epoch": 5.175, |
| "grad_norm": 0.07927709072828293, |
| "learning_rate": 0.00029924267766735416, |
| "loss": 25.8487, |
| "step": 20700 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 0.07417237758636475, |
| "learning_rate": 0.00029923892743296453, |
| "loss": 25.6094, |
| "step": 20800 |
| }, |
| { |
| "epoch": 5.225, |
| "grad_norm": 0.09987534582614899, |
| "learning_rate": 0.0002992352147009188, |
| "loss": 25.3336, |
| "step": 20900 |
| }, |
| { |
| "epoch": 5.25, |
| "grad_norm": 0.08160518109798431, |
| "learning_rate": 0.00029923146446652916, |
| "loss": 25.2813, |
| "step": 21000 |
| }, |
| { |
| "epoch": 5.275, |
| "grad_norm": 0.07650009542703629, |
| "learning_rate": 0.00029922771423213947, |
| "loss": 25.0793, |
| "step": 21100 |
| }, |
| { |
| "epoch": 5.3, |
| "grad_norm": 0.07089775055646896, |
| "learning_rate": 0.00029922396399774983, |
| "loss": 24.9184, |
| "step": 21200 |
| }, |
| { |
| "epoch": 5.325, |
| "grad_norm": 0.10953019559383392, |
| "learning_rate": 0.00029922021376336015, |
| "loss": 24.5976, |
| "step": 21300 |
| }, |
| { |
| "epoch": 5.35, |
| "grad_norm": 0.07163265347480774, |
| "learning_rate": 0.00029921646352897057, |
| "loss": 24.3399, |
| "step": 21400 |
| }, |
| { |
| "epoch": 5.375, |
| "grad_norm": 0.08414668589830399, |
| "learning_rate": 0.0002992127132945809, |
| "loss": 23.6757, |
| "step": 21500 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 0.07715445011854172, |
| "learning_rate": 0.00029920896306019125, |
| "loss": 24.2548, |
| "step": 21600 |
| }, |
| { |
| "epoch": 5.425, |
| "grad_norm": 0.1033063754439354, |
| "learning_rate": 0.00029920521282580156, |
| "loss": 23.3908, |
| "step": 21700 |
| }, |
| { |
| "epoch": 5.45, |
| "grad_norm": 0.0769144669175148, |
| "learning_rate": 0.0002992014625914119, |
| "loss": 23.693, |
| "step": 21800 |
| }, |
| { |
| "epoch": 5.475, |
| "grad_norm": 0.07799799740314484, |
| "learning_rate": 0.0002991977123570223, |
| "loss": 23.9314, |
| "step": 21900 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 0.07105720043182373, |
| "learning_rate": 0.00029919396212263266, |
| "loss": 23.2387, |
| "step": 22000 |
| }, |
| { |
| "epoch": 5.525, |
| "grad_norm": 0.0878797098994255, |
| "learning_rate": 0.00029919021188824297, |
| "loss": 22.7268, |
| "step": 22100 |
| }, |
| { |
| "epoch": 5.55, |
| "grad_norm": 0.0924353376030922, |
| "learning_rate": 0.00029918646165385333, |
| "loss": 23.1994, |
| "step": 22200 |
| }, |
| { |
| "epoch": 5.575, |
| "grad_norm": 0.09924343973398209, |
| "learning_rate": 0.0002991827114194637, |
| "loss": 22.7976, |
| "step": 22300 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 0.0845380574464798, |
| "learning_rate": 0.00029917896118507407, |
| "loss": 22.6053, |
| "step": 22400 |
| }, |
| { |
| "epoch": 5.625, |
| "grad_norm": 0.09131123870611191, |
| "learning_rate": 0.0002991752109506844, |
| "loss": 22.813, |
| "step": 22500 |
| }, |
| { |
| "epoch": 5.65, |
| "grad_norm": 0.08501371741294861, |
| "learning_rate": 0.00029917146071629474, |
| "loss": 22.3981, |
| "step": 22600 |
| }, |
| { |
| "epoch": 5.675, |
| "grad_norm": 0.10916517674922943, |
| "learning_rate": 0.0002991677104819051, |
| "loss": 21.6828, |
| "step": 22700 |
| }, |
| { |
| "epoch": 5.7, |
| "grad_norm": 0.08462018519639969, |
| "learning_rate": 0.0002991639602475155, |
| "loss": 22.0131, |
| "step": 22800 |
| }, |
| { |
| "epoch": 5.725, |
| "grad_norm": 0.09394313395023346, |
| "learning_rate": 0.0002991602100131258, |
| "loss": 21.7932, |
| "step": 22900 |
| }, |
| { |
| "epoch": 5.75, |
| "grad_norm": 0.08408233523368835, |
| "learning_rate": 0.00029915645977873615, |
| "loss": 21.8634, |
| "step": 23000 |
| }, |
| { |
| "epoch": 5.775, |
| "grad_norm": 0.0706961527466774, |
| "learning_rate": 0.00029915270954434647, |
| "loss": 21.6353, |
| "step": 23100 |
| }, |
| { |
| "epoch": 5.8, |
| "grad_norm": 0.08162959665060043, |
| "learning_rate": 0.00029914895930995683, |
| "loss": 21.356, |
| "step": 23200 |
| }, |
| { |
| "epoch": 5.825, |
| "grad_norm": 0.08196116983890533, |
| "learning_rate": 0.0002991452090755672, |
| "loss": 21.3074, |
| "step": 23300 |
| }, |
| { |
| "epoch": 5.85, |
| "grad_norm": 0.07449360191822052, |
| "learning_rate": 0.00029914145884117756, |
| "loss": 21.2129, |
| "step": 23400 |
| }, |
| { |
| "epoch": 5.875, |
| "grad_norm": 0.08260208368301392, |
| "learning_rate": 0.0002991377086067879, |
| "loss": 20.7806, |
| "step": 23500 |
| }, |
| { |
| "epoch": 5.9, |
| "grad_norm": 0.07383255660533905, |
| "learning_rate": 0.00029913395837239824, |
| "loss": 20.9318, |
| "step": 23600 |
| }, |
| { |
| "epoch": 5.925, |
| "grad_norm": 0.08240984380245209, |
| "learning_rate": 0.0002991302081380086, |
| "loss": 20.5751, |
| "step": 23700 |
| }, |
| { |
| "epoch": 5.95, |
| "grad_norm": 0.06921262294054031, |
| "learning_rate": 0.000299126457903619, |
| "loss": 20.9214, |
| "step": 23800 |
| }, |
| { |
| "epoch": 5.975, |
| "grad_norm": 0.07990318536758423, |
| "learning_rate": 0.0002991227076692293, |
| "loss": 20.6422, |
| "step": 23900 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.083002008497715, |
| "learning_rate": 0.00029911895743483965, |
| "loss": 19.8315, |
| "step": 24000 |
| }, |
| { |
| "epoch": 6.025, |
| "grad_norm": 0.08495783805847168, |
| "learning_rate": 0.00029911520720045, |
| "loss": 20.1271, |
| "step": 24100 |
| }, |
| { |
| "epoch": 6.05, |
| "grad_norm": 0.1061740592122078, |
| "learning_rate": 0.00029911145696606033, |
| "loss": 20.1241, |
| "step": 24200 |
| }, |
| { |
| "epoch": 6.075, |
| "grad_norm": 0.08326783776283264, |
| "learning_rate": 0.0002991077067316707, |
| "loss": 19.5344, |
| "step": 24300 |
| }, |
| { |
| "epoch": 6.1, |
| "grad_norm": 0.08668112009763718, |
| "learning_rate": 0.00029910395649728106, |
| "loss": 19.8691, |
| "step": 24400 |
| }, |
| { |
| "epoch": 6.125, |
| "grad_norm": 0.07595008611679077, |
| "learning_rate": 0.00029910020626289143, |
| "loss": 19.5726, |
| "step": 24500 |
| }, |
| { |
| "epoch": 6.15, |
| "grad_norm": 0.09996142983436584, |
| "learning_rate": 0.00029909645602850174, |
| "loss": 19.3215, |
| "step": 24600 |
| }, |
| { |
| "epoch": 6.175, |
| "grad_norm": 0.07515228539705276, |
| "learning_rate": 0.0002990927057941121, |
| "loss": 19.5642, |
| "step": 24700 |
| }, |
| { |
| "epoch": 6.2, |
| "grad_norm": 0.06983605772256851, |
| "learning_rate": 0.0002990889555597224, |
| "loss": 19.1783, |
| "step": 24800 |
| }, |
| { |
| "epoch": 6.225, |
| "grad_norm": 0.07114838808774948, |
| "learning_rate": 0.0002990852053253328, |
| "loss": 19.0791, |
| "step": 24900 |
| }, |
| { |
| "epoch": 6.25, |
| "grad_norm": 0.08623602986335754, |
| "learning_rate": 0.00029908145509094315, |
| "loss": 19.5374, |
| "step": 25000 |
| }, |
| { |
| "epoch": 6.275, |
| "grad_norm": 0.09096742421388626, |
| "learning_rate": 0.0002990777048565535, |
| "loss": 18.8189, |
| "step": 25100 |
| }, |
| { |
| "epoch": 6.3, |
| "grad_norm": 0.08167672157287598, |
| "learning_rate": 0.00029907395462216383, |
| "loss": 18.4164, |
| "step": 25200 |
| }, |
| { |
| "epoch": 6.325, |
| "grad_norm": 0.08562010526657104, |
| "learning_rate": 0.0002990702043877742, |
| "loss": 18.3827, |
| "step": 25300 |
| }, |
| { |
| "epoch": 6.35, |
| "grad_norm": 0.08020398765802383, |
| "learning_rate": 0.00029906645415338456, |
| "loss": 18.8151, |
| "step": 25400 |
| }, |
| { |
| "epoch": 6.375, |
| "grad_norm": 0.08050194382667542, |
| "learning_rate": 0.00029906270391899493, |
| "loss": 17.9696, |
| "step": 25500 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 0.09030721336603165, |
| "learning_rate": 0.00029905895368460524, |
| "loss": 17.9795, |
| "step": 25600 |
| }, |
| { |
| "epoch": 6.425, |
| "grad_norm": 0.09238829463720322, |
| "learning_rate": 0.0002990552034502156, |
| "loss": 17.8095, |
| "step": 25700 |
| }, |
| { |
| "epoch": 6.45, |
| "grad_norm": 0.08500493317842484, |
| "learning_rate": 0.000299051453215826, |
| "loss": 18.3223, |
| "step": 25800 |
| }, |
| { |
| "epoch": 6.475, |
| "grad_norm": 0.08180621266365051, |
| "learning_rate": 0.00029904770298143634, |
| "loss": 17.7836, |
| "step": 25900 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 0.09796881675720215, |
| "learning_rate": 0.00029904395274704665, |
| "loss": 17.7483, |
| "step": 26000 |
| }, |
| { |
| "epoch": 6.525, |
| "grad_norm": 0.08432163298130035, |
| "learning_rate": 0.000299040202512657, |
| "loss": 18.2479, |
| "step": 26100 |
| }, |
| { |
| "epoch": 6.55, |
| "grad_norm": 0.08197837322950363, |
| "learning_rate": 0.0002990364897806113, |
| "loss": 17.7703, |
| "step": 26200 |
| }, |
| { |
| "epoch": 6.575, |
| "grad_norm": 0.07721620053052902, |
| "learning_rate": 0.00029903273954622164, |
| "loss": 17.1537, |
| "step": 26300 |
| }, |
| { |
| "epoch": 6.6, |
| "grad_norm": 0.0785108208656311, |
| "learning_rate": 0.00029902898931183196, |
| "loss": 17.5139, |
| "step": 26400 |
| }, |
| { |
| "epoch": 6.625, |
| "grad_norm": 0.08640828728675842, |
| "learning_rate": 0.0002990252390774423, |
| "loss": 16.7445, |
| "step": 26500 |
| }, |
| { |
| "epoch": 6.65, |
| "grad_norm": 0.09119407832622528, |
| "learning_rate": 0.00029902148884305263, |
| "loss": 17.1573, |
| "step": 26600 |
| }, |
| { |
| "epoch": 6.675, |
| "grad_norm": 0.07212173193693161, |
| "learning_rate": 0.000299017738608663, |
| "loss": 17.0759, |
| "step": 26700 |
| }, |
| { |
| "epoch": 6.7, |
| "grad_norm": 0.08220189809799194, |
| "learning_rate": 0.00029901398837427337, |
| "loss": 16.9119, |
| "step": 26800 |
| }, |
| { |
| "epoch": 6.725, |
| "grad_norm": 0.10024359822273254, |
| "learning_rate": 0.00029901023813988373, |
| "loss": 16.7596, |
| "step": 26900 |
| }, |
| { |
| "epoch": 6.75, |
| "grad_norm": 0.0850207731127739, |
| "learning_rate": 0.00029900648790549405, |
| "loss": 16.9184, |
| "step": 27000 |
| }, |
| { |
| "epoch": 6.775, |
| "grad_norm": 0.07585939019918442, |
| "learning_rate": 0.0002990027376711044, |
| "loss": 16.4899, |
| "step": 27100 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 0.08519823104143143, |
| "learning_rate": 0.0002989989874367148, |
| "loss": 16.8922, |
| "step": 27200 |
| }, |
| { |
| "epoch": 6.825, |
| "grad_norm": 0.08368838578462601, |
| "learning_rate": 0.00029899523720232514, |
| "loss": 16.8136, |
| "step": 27300 |
| }, |
| { |
| "epoch": 6.85, |
| "grad_norm": 0.08928319811820984, |
| "learning_rate": 0.00029899148696793546, |
| "loss": 16.2412, |
| "step": 27400 |
| }, |
| { |
| "epoch": 6.875, |
| "grad_norm": 0.08436159044504166, |
| "learning_rate": 0.0002989877367335458, |
| "loss": 16.6282, |
| "step": 27500 |
| }, |
| { |
| "epoch": 6.9, |
| "grad_norm": 0.0907684713602066, |
| "learning_rate": 0.0002989839864991562, |
| "loss": 16.3234, |
| "step": 27600 |
| }, |
| { |
| "epoch": 6.925, |
| "grad_norm": 0.08816706389188766, |
| "learning_rate": 0.00029898023626476655, |
| "loss": 16.164, |
| "step": 27700 |
| }, |
| { |
| "epoch": 6.95, |
| "grad_norm": 0.08335541933774948, |
| "learning_rate": 0.00029897648603037687, |
| "loss": 16.1988, |
| "step": 27800 |
| }, |
| { |
| "epoch": 6.975, |
| "grad_norm": 0.07165244221687317, |
| "learning_rate": 0.00029897273579598723, |
| "loss": 16.1657, |
| "step": 27900 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.0803430899977684, |
| "learning_rate": 0.0002989689855615976, |
| "loss": 15.7038, |
| "step": 28000 |
| }, |
| { |
| "epoch": 7.025, |
| "grad_norm": 0.0674068033695221, |
| "learning_rate": 0.0002989652353272079, |
| "loss": 15.5932, |
| "step": 28100 |
| }, |
| { |
| "epoch": 7.05, |
| "grad_norm": 0.07914315909147263, |
| "learning_rate": 0.0002989614850928183, |
| "loss": 16.1827, |
| "step": 28200 |
| }, |
| { |
| "epoch": 7.075, |
| "grad_norm": 0.0919245108962059, |
| "learning_rate": 0.0002989577348584286, |
| "loss": 15.7686, |
| "step": 28300 |
| }, |
| { |
| "epoch": 7.1, |
| "grad_norm": 0.09044385701417923, |
| "learning_rate": 0.00029895398462403895, |
| "loss": 15.6737, |
| "step": 28400 |
| }, |
| { |
| "epoch": 7.125, |
| "grad_norm": 0.08890822529792786, |
| "learning_rate": 0.0002989502343896493, |
| "loss": 15.8661, |
| "step": 28500 |
| }, |
| { |
| "epoch": 7.15, |
| "grad_norm": 0.08436182141304016, |
| "learning_rate": 0.0002989464841552597, |
| "loss": 15.5255, |
| "step": 28600 |
| }, |
| { |
| "epoch": 7.175, |
| "grad_norm": 0.08775323629379272, |
| "learning_rate": 0.00029894273392087, |
| "loss": 15.4992, |
| "step": 28700 |
| }, |
| { |
| "epoch": 7.2, |
| "grad_norm": 0.09018935263156891, |
| "learning_rate": 0.00029893898368648036, |
| "loss": 15.3418, |
| "step": 28800 |
| }, |
| { |
| "epoch": 7.225, |
| "grad_norm": 0.08356596529483795, |
| "learning_rate": 0.00029893523345209073, |
| "loss": 15.0965, |
| "step": 28900 |
| }, |
| { |
| "epoch": 7.25, |
| "grad_norm": 0.09058874845504761, |
| "learning_rate": 0.0002989314832177011, |
| "loss": 15.0762, |
| "step": 29000 |
| }, |
| { |
| "epoch": 7.275, |
| "grad_norm": 0.07803665101528168, |
| "learning_rate": 0.0002989277329833114, |
| "loss": 14.6331, |
| "step": 29100 |
| }, |
| { |
| "epoch": 7.3, |
| "grad_norm": 0.08148869127035141, |
| "learning_rate": 0.0002989239827489218, |
| "loss": 14.8405, |
| "step": 29200 |
| }, |
| { |
| "epoch": 7.325, |
| "grad_norm": 0.08294442296028137, |
| "learning_rate": 0.00029892023251453214, |
| "loss": 15.2037, |
| "step": 29300 |
| }, |
| { |
| "epoch": 7.35, |
| "grad_norm": 0.0803549587726593, |
| "learning_rate": 0.0002989164822801425, |
| "loss": 14.8633, |
| "step": 29400 |
| }, |
| { |
| "epoch": 7.375, |
| "grad_norm": 0.08180885016918182, |
| "learning_rate": 0.0002989127320457528, |
| "loss": 14.8036, |
| "step": 29500 |
| }, |
| { |
| "epoch": 7.4, |
| "grad_norm": 0.08756575733423233, |
| "learning_rate": 0.0002989089818113632, |
| "loss": 14.2077, |
| "step": 29600 |
| }, |
| { |
| "epoch": 7.425, |
| "grad_norm": 0.0851132944226265, |
| "learning_rate": 0.00029890523157697355, |
| "loss": 14.569, |
| "step": 29700 |
| }, |
| { |
| "epoch": 7.45, |
| "grad_norm": 0.08879829198122025, |
| "learning_rate": 0.0002989014813425839, |
| "loss": 14.5104, |
| "step": 29800 |
| }, |
| { |
| "epoch": 7.475, |
| "grad_norm": 0.0918511152267456, |
| "learning_rate": 0.00029889773110819423, |
| "loss": 14.3482, |
| "step": 29900 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.07251127064228058, |
| "learning_rate": 0.0002988939808738046, |
| "loss": 14.2309, |
| "step": 30000 |
| }, |
| { |
| "epoch": 7.525, |
| "grad_norm": 0.07517971098423004, |
| "learning_rate": 0.0002988902306394149, |
| "loss": 14.0291, |
| "step": 30100 |
| }, |
| { |
| "epoch": 7.55, |
| "grad_norm": 0.08854610472917557, |
| "learning_rate": 0.00029888651790736917, |
| "loss": 14.1938, |
| "step": 30200 |
| }, |
| { |
| "epoch": 7.575, |
| "grad_norm": 0.0849192887544632, |
| "learning_rate": 0.00029888276767297954, |
| "loss": 14.4531, |
| "step": 30300 |
| }, |
| { |
| "epoch": 7.6, |
| "grad_norm": 0.08010224252939224, |
| "learning_rate": 0.0002988790174385899, |
| "loss": 14.2434, |
| "step": 30400 |
| }, |
| { |
| "epoch": 7.625, |
| "grad_norm": 0.09017332643270493, |
| "learning_rate": 0.0002988752672042002, |
| "loss": 14.2892, |
| "step": 30500 |
| }, |
| { |
| "epoch": 7.65, |
| "grad_norm": 0.08440462499856949, |
| "learning_rate": 0.0002988715544721545, |
| "loss": 13.8386, |
| "step": 30600 |
| }, |
| { |
| "epoch": 7.675, |
| "grad_norm": 0.08667606860399246, |
| "learning_rate": 0.00029886780423776484, |
| "loss": 13.9581, |
| "step": 30700 |
| }, |
| { |
| "epoch": 7.7, |
| "grad_norm": 0.08237945288419724, |
| "learning_rate": 0.0002988640540033752, |
| "loss": 13.8813, |
| "step": 30800 |
| }, |
| { |
| "epoch": 7.725, |
| "grad_norm": 0.09895262122154236, |
| "learning_rate": 0.0002988603037689855, |
| "loss": 13.7951, |
| "step": 30900 |
| }, |
| { |
| "epoch": 7.75, |
| "grad_norm": 0.07596876472234726, |
| "learning_rate": 0.0002988565535345959, |
| "loss": 13.7703, |
| "step": 31000 |
| }, |
| { |
| "epoch": 7.775, |
| "grad_norm": 0.07925312221050262, |
| "learning_rate": 0.00029885280330020625, |
| "loss": 13.4507, |
| "step": 31100 |
| }, |
| { |
| "epoch": 7.8, |
| "grad_norm": 0.06997061520814896, |
| "learning_rate": 0.0002988490530658166, |
| "loss": 13.2481, |
| "step": 31200 |
| }, |
| { |
| "epoch": 7.825, |
| "grad_norm": 0.07986485958099365, |
| "learning_rate": 0.00029884530283142693, |
| "loss": 13.3403, |
| "step": 31300 |
| }, |
| { |
| "epoch": 7.85, |
| "grad_norm": 0.0819752886891365, |
| "learning_rate": 0.0002988415525970373, |
| "loss": 13.5279, |
| "step": 31400 |
| }, |
| { |
| "epoch": 7.875, |
| "grad_norm": 0.08534371107816696, |
| "learning_rate": 0.00029883780236264766, |
| "loss": 13.528, |
| "step": 31500 |
| }, |
| { |
| "epoch": 7.9, |
| "grad_norm": 0.06895570456981659, |
| "learning_rate": 0.00029883405212825803, |
| "loss": 13.0555, |
| "step": 31600 |
| }, |
| { |
| "epoch": 7.925, |
| "grad_norm": 0.07396534085273743, |
| "learning_rate": 0.00029883030189386834, |
| "loss": 13.1404, |
| "step": 31700 |
| }, |
| { |
| "epoch": 7.95, |
| "grad_norm": 0.0788232609629631, |
| "learning_rate": 0.00029882655165947865, |
| "loss": 13.1032, |
| "step": 31800 |
| }, |
| { |
| "epoch": 7.975, |
| "grad_norm": 0.0716477558016777, |
| "learning_rate": 0.000298822801425089, |
| "loss": 13.4664, |
| "step": 31900 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.07852466404438019, |
| "learning_rate": 0.0002988190511906994, |
| "loss": 13.006, |
| "step": 32000 |
| }, |
| { |
| "epoch": 8.025, |
| "grad_norm": 0.1100274920463562, |
| "learning_rate": 0.00029881530095630975, |
| "loss": 13.0427, |
| "step": 32100 |
| }, |
| { |
| "epoch": 8.05, |
| "grad_norm": 0.07130661606788635, |
| "learning_rate": 0.00029881155072192006, |
| "loss": 12.7575, |
| "step": 32200 |
| }, |
| { |
| "epoch": 8.075, |
| "grad_norm": 0.0846419557929039, |
| "learning_rate": 0.00029880780048753043, |
| "loss": 12.788, |
| "step": 32300 |
| }, |
| { |
| "epoch": 8.1, |
| "grad_norm": 0.07769067585468292, |
| "learning_rate": 0.0002988040502531408, |
| "loss": 12.8833, |
| "step": 32400 |
| }, |
| { |
| "epoch": 8.125, |
| "grad_norm": 0.06623586267232895, |
| "learning_rate": 0.00029880030001875116, |
| "loss": 12.5255, |
| "step": 32500 |
| }, |
| { |
| "epoch": 8.15, |
| "grad_norm": 0.0744013637304306, |
| "learning_rate": 0.00029879654978436147, |
| "loss": 12.7006, |
| "step": 32600 |
| }, |
| { |
| "epoch": 8.175, |
| "grad_norm": 0.07793931663036346, |
| "learning_rate": 0.00029879279954997184, |
| "loss": 12.2209, |
| "step": 32700 |
| }, |
| { |
| "epoch": 8.2, |
| "grad_norm": 0.07592390477657318, |
| "learning_rate": 0.0002987890493155822, |
| "loss": 12.2655, |
| "step": 32800 |
| }, |
| { |
| "epoch": 8.225, |
| "grad_norm": 0.07824064791202545, |
| "learning_rate": 0.00029878529908119257, |
| "loss": 12.3666, |
| "step": 32900 |
| }, |
| { |
| "epoch": 8.25, |
| "grad_norm": 0.06895022094249725, |
| "learning_rate": 0.0002987815488468029, |
| "loss": 12.3957, |
| "step": 33000 |
| }, |
| { |
| "epoch": 8.275, |
| "grad_norm": 0.08005383610725403, |
| "learning_rate": 0.00029877779861241325, |
| "loss": 12.3892, |
| "step": 33100 |
| }, |
| { |
| "epoch": 8.3, |
| "grad_norm": 0.0835549384355545, |
| "learning_rate": 0.0002987740483780236, |
| "loss": 12.1796, |
| "step": 33200 |
| }, |
| { |
| "epoch": 8.325, |
| "grad_norm": 0.08501383662223816, |
| "learning_rate": 0.000298770298143634, |
| "loss": 11.9921, |
| "step": 33300 |
| }, |
| { |
| "epoch": 8.35, |
| "grad_norm": 0.08822602778673172, |
| "learning_rate": 0.0002987665479092443, |
| "loss": 12.4392, |
| "step": 33400 |
| }, |
| { |
| "epoch": 8.375, |
| "grad_norm": 0.07659414410591125, |
| "learning_rate": 0.00029876279767485466, |
| "loss": 12.0612, |
| "step": 33500 |
| }, |
| { |
| "epoch": 8.4, |
| "grad_norm": 0.08337811380624771, |
| "learning_rate": 0.00029875904744046497, |
| "loss": 12.0035, |
| "step": 33600 |
| }, |
| { |
| "epoch": 8.425, |
| "grad_norm": 0.07944267988204956, |
| "learning_rate": 0.00029875529720607534, |
| "loss": 11.8415, |
| "step": 33700 |
| }, |
| { |
| "epoch": 8.45, |
| "grad_norm": 0.0773790031671524, |
| "learning_rate": 0.0002987515469716857, |
| "loss": 12.1775, |
| "step": 33800 |
| }, |
| { |
| "epoch": 8.475, |
| "grad_norm": 0.08871705085039139, |
| "learning_rate": 0.00029874779673729607, |
| "loss": 12.208, |
| "step": 33900 |
| }, |
| { |
| "epoch": 8.5, |
| "grad_norm": 0.07573138922452927, |
| "learning_rate": 0.0002987440465029064, |
| "loss": 11.6756, |
| "step": 34000 |
| }, |
| { |
| "epoch": 8.525, |
| "grad_norm": 0.07265728712081909, |
| "learning_rate": 0.00029874029626851675, |
| "loss": 11.4454, |
| "step": 34100 |
| }, |
| { |
| "epoch": 8.55, |
| "grad_norm": 0.0791819617152214, |
| "learning_rate": 0.0002987365460341271, |
| "loss": 11.9128, |
| "step": 34200 |
| }, |
| { |
| "epoch": 8.575, |
| "grad_norm": 0.07876613736152649, |
| "learning_rate": 0.0002987327957997375, |
| "loss": 11.7746, |
| "step": 34300 |
| }, |
| { |
| "epoch": 8.6, |
| "grad_norm": 0.08273490518331528, |
| "learning_rate": 0.0002987290455653478, |
| "loss": 11.6367, |
| "step": 34400 |
| }, |
| { |
| "epoch": 8.625, |
| "grad_norm": 0.07402598857879639, |
| "learning_rate": 0.00029872529533095816, |
| "loss": 11.6052, |
| "step": 34500 |
| }, |
| { |
| "epoch": 8.65, |
| "grad_norm": 0.06618580222129822, |
| "learning_rate": 0.0002987215825989124, |
| "loss": 11.7364, |
| "step": 34600 |
| }, |
| { |
| "epoch": 8.675, |
| "grad_norm": 0.07777924090623856, |
| "learning_rate": 0.0002987178323645228, |
| "loss": 11.3839, |
| "step": 34700 |
| }, |
| { |
| "epoch": 8.7, |
| "grad_norm": 0.09256916493177414, |
| "learning_rate": 0.0002987140821301331, |
| "loss": 11.4444, |
| "step": 34800 |
| }, |
| { |
| "epoch": 8.725, |
| "grad_norm": 0.08080556988716125, |
| "learning_rate": 0.00029871033189574346, |
| "loss": 11.5891, |
| "step": 34900 |
| }, |
| { |
| "epoch": 8.75, |
| "grad_norm": 0.08270179480314255, |
| "learning_rate": 0.00029870658166135383, |
| "loss": 11.3784, |
| "step": 35000 |
| }, |
| { |
| "epoch": 8.775, |
| "grad_norm": 0.08168449997901917, |
| "learning_rate": 0.0002987028314269642, |
| "loss": 11.1576, |
| "step": 35100 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 0.07069560140371323, |
| "learning_rate": 0.0002986990811925745, |
| "loss": 11.2748, |
| "step": 35200 |
| }, |
| { |
| "epoch": 8.825, |
| "grad_norm": 0.07771777361631393, |
| "learning_rate": 0.0002986953309581849, |
| "loss": 11.2124, |
| "step": 35300 |
| }, |
| { |
| "epoch": 8.85, |
| "grad_norm": 0.0844758003950119, |
| "learning_rate": 0.0002986915807237952, |
| "loss": 10.9886, |
| "step": 35400 |
| }, |
| { |
| "epoch": 8.875, |
| "grad_norm": 0.07531385868787766, |
| "learning_rate": 0.00029868783048940555, |
| "loss": 11.4722, |
| "step": 35500 |
| }, |
| { |
| "epoch": 8.9, |
| "grad_norm": 0.08248105645179749, |
| "learning_rate": 0.0002986840802550159, |
| "loss": 11.1052, |
| "step": 35600 |
| }, |
| { |
| "epoch": 8.925, |
| "grad_norm": 0.08126658946275711, |
| "learning_rate": 0.0002986803300206263, |
| "loss": 11.0637, |
| "step": 35700 |
| }, |
| { |
| "epoch": 8.95, |
| "grad_norm": 0.07933900505304337, |
| "learning_rate": 0.0002986765797862366, |
| "loss": 10.6369, |
| "step": 35800 |
| }, |
| { |
| "epoch": 8.975, |
| "grad_norm": 0.07628486305475235, |
| "learning_rate": 0.00029867282955184696, |
| "loss": 10.8511, |
| "step": 35900 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.07509356737136841, |
| "learning_rate": 0.00029866907931745733, |
| "loss": 10.9576, |
| "step": 36000 |
| }, |
| { |
| "epoch": 9.025, |
| "grad_norm": 0.085249163210392, |
| "learning_rate": 0.00029866532908306764, |
| "loss": 10.9181, |
| "step": 36100 |
| }, |
| { |
| "epoch": 9.05, |
| "grad_norm": 0.08377708494663239, |
| "learning_rate": 0.000298661578848678, |
| "loss": 10.7095, |
| "step": 36200 |
| }, |
| { |
| "epoch": 9.075, |
| "grad_norm": 0.06539880484342575, |
| "learning_rate": 0.00029865786611663227, |
| "loss": 10.4937, |
| "step": 36300 |
| }, |
| { |
| "epoch": 9.1, |
| "grad_norm": 0.08634931594133377, |
| "learning_rate": 0.00029865411588224263, |
| "loss": 11.043, |
| "step": 36400 |
| }, |
| { |
| "epoch": 9.125, |
| "grad_norm": 0.06905148923397064, |
| "learning_rate": 0.00029865036564785295, |
| "loss": 11.0456, |
| "step": 36500 |
| }, |
| { |
| "epoch": 9.15, |
| "grad_norm": 0.07896845042705536, |
| "learning_rate": 0.0002986466154134633, |
| "loss": 10.5105, |
| "step": 36600 |
| }, |
| { |
| "epoch": 9.175, |
| "grad_norm": 0.07206033915281296, |
| "learning_rate": 0.0002986428651790737, |
| "loss": 10.7025, |
| "step": 36700 |
| }, |
| { |
| "epoch": 9.2, |
| "grad_norm": 0.06719633936882019, |
| "learning_rate": 0.00029863911494468405, |
| "loss": 10.3498, |
| "step": 36800 |
| }, |
| { |
| "epoch": 9.225, |
| "grad_norm": 0.07648395001888275, |
| "learning_rate": 0.00029863536471029436, |
| "loss": 10.4292, |
| "step": 36900 |
| }, |
| { |
| "epoch": 9.25, |
| "grad_norm": 0.08475750684738159, |
| "learning_rate": 0.0002986316144759047, |
| "loss": 10.5922, |
| "step": 37000 |
| }, |
| { |
| "epoch": 9.275, |
| "grad_norm": 0.09004350751638412, |
| "learning_rate": 0.00029862786424151504, |
| "loss": 10.3239, |
| "step": 37100 |
| }, |
| { |
| "epoch": 9.3, |
| "grad_norm": 0.06373389810323715, |
| "learning_rate": 0.0002986241140071254, |
| "loss": 10.2006, |
| "step": 37200 |
| }, |
| { |
| "epoch": 9.325, |
| "grad_norm": 0.07837036997079849, |
| "learning_rate": 0.00029862036377273577, |
| "loss": 10.193, |
| "step": 37300 |
| }, |
| { |
| "epoch": 9.35, |
| "grad_norm": 0.07210332155227661, |
| "learning_rate": 0.00029861661353834613, |
| "loss": 10.2084, |
| "step": 37400 |
| }, |
| { |
| "epoch": 9.375, |
| "grad_norm": 0.07254429906606674, |
| "learning_rate": 0.00029861286330395645, |
| "loss": 10.2551, |
| "step": 37500 |
| }, |
| { |
| "epoch": 9.4, |
| "grad_norm": 0.06640215963125229, |
| "learning_rate": 0.0002986091130695668, |
| "loss": 10.2847, |
| "step": 37600 |
| }, |
| { |
| "epoch": 9.425, |
| "grad_norm": 0.07777173817157745, |
| "learning_rate": 0.0002986053628351772, |
| "loss": 10.2434, |
| "step": 37700 |
| }, |
| { |
| "epoch": 9.45, |
| "grad_norm": 0.07829392701387405, |
| "learning_rate": 0.00029860161260078754, |
| "loss": 10.0319, |
| "step": 37800 |
| }, |
| { |
| "epoch": 9.475, |
| "grad_norm": 0.07961380481719971, |
| "learning_rate": 0.00029859786236639786, |
| "loss": 10.1739, |
| "step": 37900 |
| }, |
| { |
| "epoch": 9.5, |
| "grad_norm": 0.07749368995428085, |
| "learning_rate": 0.0002985941121320082, |
| "loss": 9.6391, |
| "step": 38000 |
| }, |
| { |
| "epoch": 9.525, |
| "grad_norm": 0.0826738029718399, |
| "learning_rate": 0.0002985903618976186, |
| "loss": 10.4704, |
| "step": 38100 |
| }, |
| { |
| "epoch": 9.55, |
| "grad_norm": 0.06573819369077682, |
| "learning_rate": 0.00029858661166322895, |
| "loss": 9.7767, |
| "step": 38200 |
| }, |
| { |
| "epoch": 9.575, |
| "grad_norm": 0.08020669966936111, |
| "learning_rate": 0.00029858286142883927, |
| "loss": 9.7305, |
| "step": 38300 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 0.06815823167562485, |
| "learning_rate": 0.00029857911119444963, |
| "loss": 9.597, |
| "step": 38400 |
| }, |
| { |
| "epoch": 9.625, |
| "grad_norm": 0.07290255278348923, |
| "learning_rate": 0.0002985753984624039, |
| "loss": 9.8638, |
| "step": 38500 |
| }, |
| { |
| "epoch": 9.65, |
| "grad_norm": 0.06887535005807877, |
| "learning_rate": 0.00029857164822801426, |
| "loss": 9.6939, |
| "step": 38600 |
| }, |
| { |
| "epoch": 9.675, |
| "grad_norm": 0.08159805834293365, |
| "learning_rate": 0.00029856789799362457, |
| "loss": 9.8011, |
| "step": 38700 |
| }, |
| { |
| "epoch": 9.7, |
| "grad_norm": 0.08071273565292358, |
| "learning_rate": 0.00029856414775923494, |
| "loss": 9.5514, |
| "step": 38800 |
| }, |
| { |
| "epoch": 9.725, |
| "grad_norm": 0.07089462131261826, |
| "learning_rate": 0.00029856039752484525, |
| "loss": 9.8858, |
| "step": 38900 |
| }, |
| { |
| "epoch": 9.75, |
| "grad_norm": 0.08935658633708954, |
| "learning_rate": 0.0002985566472904556, |
| "loss": 9.6155, |
| "step": 39000 |
| }, |
| { |
| "epoch": 9.775, |
| "grad_norm": 0.08028286695480347, |
| "learning_rate": 0.000298552897056066, |
| "loss": 9.6638, |
| "step": 39100 |
| }, |
| { |
| "epoch": 9.8, |
| "grad_norm": 0.07186749577522278, |
| "learning_rate": 0.00029854914682167635, |
| "loss": 9.3091, |
| "step": 39200 |
| }, |
| { |
| "epoch": 9.825, |
| "grad_norm": 0.06545951217412949, |
| "learning_rate": 0.00029854539658728666, |
| "loss": 9.5374, |
| "step": 39300 |
| }, |
| { |
| "epoch": 9.85, |
| "grad_norm": 0.0787624716758728, |
| "learning_rate": 0.000298541646352897, |
| "loss": 9.4178, |
| "step": 39400 |
| }, |
| { |
| "epoch": 9.875, |
| "grad_norm": 0.07585486769676208, |
| "learning_rate": 0.0002985378961185074, |
| "loss": 9.2153, |
| "step": 39500 |
| }, |
| { |
| "epoch": 9.9, |
| "grad_norm": 0.07809693366289139, |
| "learning_rate": 0.0002985341458841177, |
| "loss": 9.2729, |
| "step": 39600 |
| }, |
| { |
| "epoch": 9.925, |
| "grad_norm": 0.12963560223579407, |
| "learning_rate": 0.00029853039564972807, |
| "loss": 9.1279, |
| "step": 39700 |
| }, |
| { |
| "epoch": 9.95, |
| "grad_norm": 0.06803625822067261, |
| "learning_rate": 0.00029852664541533844, |
| "loss": 9.3529, |
| "step": 39800 |
| }, |
| { |
| "epoch": 9.975, |
| "grad_norm": 0.07478567957878113, |
| "learning_rate": 0.0002985228951809488, |
| "loss": 9.1627, |
| "step": 39900 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.07844047993421555, |
| "learning_rate": 0.0002985191449465591, |
| "loss": 9.0775, |
| "step": 40000 |
| }, |
| { |
| "epoch": 10.025, |
| "grad_norm": 0.07982715219259262, |
| "learning_rate": 0.0002985153947121695, |
| "loss": 9.4258, |
| "step": 40100 |
| }, |
| { |
| "epoch": 10.05, |
| "grad_norm": 0.0806502029299736, |
| "learning_rate": 0.00029851164447777985, |
| "loss": 9.3455, |
| "step": 40200 |
| }, |
| { |
| "epoch": 10.075, |
| "grad_norm": 0.06514900177717209, |
| "learning_rate": 0.0002985078942433902, |
| "loss": 8.9195, |
| "step": 40300 |
| }, |
| { |
| "epoch": 10.1, |
| "grad_norm": 0.08182831853628159, |
| "learning_rate": 0.0002985041440090005, |
| "loss": 8.9772, |
| "step": 40400 |
| }, |
| { |
| "epoch": 10.125, |
| "grad_norm": 0.07242997735738754, |
| "learning_rate": 0.0002985003937746109, |
| "loss": 9.3286, |
| "step": 40500 |
| }, |
| { |
| "epoch": 10.15, |
| "grad_norm": 0.07168876379728317, |
| "learning_rate": 0.0002984966435402212, |
| "loss": 8.8118, |
| "step": 40600 |
| }, |
| { |
| "epoch": 10.175, |
| "grad_norm": 0.07878579944372177, |
| "learning_rate": 0.00029849289330583157, |
| "loss": 9.0127, |
| "step": 40700 |
| }, |
| { |
| "epoch": 10.2, |
| "grad_norm": 0.06614303588867188, |
| "learning_rate": 0.00029848914307144194, |
| "loss": 8.8964, |
| "step": 40800 |
| }, |
| { |
| "epoch": 10.225, |
| "grad_norm": 0.07991635799407959, |
| "learning_rate": 0.0002984853928370523, |
| "loss": 8.7963, |
| "step": 40900 |
| }, |
| { |
| "epoch": 10.25, |
| "grad_norm": 0.07721689343452454, |
| "learning_rate": 0.0002984816426026626, |
| "loss": 8.797, |
| "step": 41000 |
| }, |
| { |
| "epoch": 10.275, |
| "grad_norm": 0.07666311413049698, |
| "learning_rate": 0.000298477892368273, |
| "loss": 8.6722, |
| "step": 41100 |
| }, |
| { |
| "epoch": 10.3, |
| "grad_norm": 0.0791340246796608, |
| "learning_rate": 0.00029847414213388335, |
| "loss": 8.6547, |
| "step": 41200 |
| }, |
| { |
| "epoch": 10.325, |
| "grad_norm": 0.0760653093457222, |
| "learning_rate": 0.0002984703918994937, |
| "loss": 8.696, |
| "step": 41300 |
| }, |
| { |
| "epoch": 10.35, |
| "grad_norm": 0.06864143908023834, |
| "learning_rate": 0.000298466641665104, |
| "loss": 8.8221, |
| "step": 41400 |
| }, |
| { |
| "epoch": 10.375, |
| "grad_norm": 0.07417836040258408, |
| "learning_rate": 0.0002984628914307144, |
| "loss": 8.5974, |
| "step": 41500 |
| }, |
| { |
| "epoch": 10.4, |
| "grad_norm": 0.073348268866539, |
| "learning_rate": 0.00029845914119632476, |
| "loss": 8.309, |
| "step": 41600 |
| }, |
| { |
| "epoch": 10.425, |
| "grad_norm": 0.0775461494922638, |
| "learning_rate": 0.0002984553909619351, |
| "loss": 8.6313, |
| "step": 41700 |
| }, |
| { |
| "epoch": 10.45, |
| "grad_norm": 0.07109999656677246, |
| "learning_rate": 0.00029845164072754543, |
| "loss": 8.3238, |
| "step": 41800 |
| }, |
| { |
| "epoch": 10.475, |
| "grad_norm": 0.06957342475652695, |
| "learning_rate": 0.0002984478904931558, |
| "loss": 8.3179, |
| "step": 41900 |
| }, |
| { |
| "epoch": 10.5, |
| "grad_norm": 0.07247728109359741, |
| "learning_rate": 0.00029844414025876617, |
| "loss": 8.3806, |
| "step": 42000 |
| }, |
| { |
| "epoch": 10.525, |
| "grad_norm": 0.08276287466287613, |
| "learning_rate": 0.00029844039002437653, |
| "loss": 8.495, |
| "step": 42100 |
| }, |
| { |
| "epoch": 10.55, |
| "grad_norm": 0.07794822007417679, |
| "learning_rate": 0.00029843663978998685, |
| "loss": 8.0454, |
| "step": 42200 |
| }, |
| { |
| "epoch": 10.575, |
| "grad_norm": 0.07254128903150558, |
| "learning_rate": 0.0002984328895555972, |
| "loss": 8.5174, |
| "step": 42300 |
| }, |
| { |
| "epoch": 10.6, |
| "grad_norm": 0.08386515080928802, |
| "learning_rate": 0.0002984291393212075, |
| "loss": 8.5586, |
| "step": 42400 |
| }, |
| { |
| "epoch": 10.625, |
| "grad_norm": 0.0731733962893486, |
| "learning_rate": 0.0002984254265891618, |
| "loss": 8.1163, |
| "step": 42500 |
| }, |
| { |
| "epoch": 10.65, |
| "grad_norm": 0.07960132509469986, |
| "learning_rate": 0.00029842167635477215, |
| "loss": 8.0072, |
| "step": 42600 |
| }, |
| { |
| "epoch": 10.675, |
| "grad_norm": 0.07048605382442474, |
| "learning_rate": 0.0002984179261203825, |
| "loss": 8.3243, |
| "step": 42700 |
| }, |
| { |
| "epoch": 10.7, |
| "grad_norm": 0.07215945422649384, |
| "learning_rate": 0.00029841417588599283, |
| "loss": 8.2795, |
| "step": 42800 |
| }, |
| { |
| "epoch": 10.725, |
| "grad_norm": 0.07723450660705566, |
| "learning_rate": 0.0002984104256516032, |
| "loss": 8.261, |
| "step": 42900 |
| }, |
| { |
| "epoch": 10.75, |
| "grad_norm": 0.06688930839300156, |
| "learning_rate": 0.00029840667541721356, |
| "loss": 8.1896, |
| "step": 43000 |
| }, |
| { |
| "epoch": 10.775, |
| "grad_norm": 0.07152280956506729, |
| "learning_rate": 0.00029840292518282393, |
| "loss": 7.8468, |
| "step": 43100 |
| }, |
| { |
| "epoch": 10.8, |
| "grad_norm": 0.0700908899307251, |
| "learning_rate": 0.00029839917494843424, |
| "loss": 8.2157, |
| "step": 43200 |
| }, |
| { |
| "epoch": 10.825, |
| "grad_norm": 0.08827432245016098, |
| "learning_rate": 0.0002983954247140446, |
| "loss": 8.2091, |
| "step": 43300 |
| }, |
| { |
| "epoch": 10.85, |
| "grad_norm": 0.07007287442684174, |
| "learning_rate": 0.00029839167447965497, |
| "loss": 8.2475, |
| "step": 43400 |
| }, |
| { |
| "epoch": 10.875, |
| "grad_norm": 0.07239579409360886, |
| "learning_rate": 0.0002983879242452653, |
| "loss": 7.9446, |
| "step": 43500 |
| }, |
| { |
| "epoch": 10.9, |
| "grad_norm": 0.06851651519536972, |
| "learning_rate": 0.00029838417401087565, |
| "loss": 7.9521, |
| "step": 43600 |
| }, |
| { |
| "epoch": 10.925, |
| "grad_norm": 0.07283764332532883, |
| "learning_rate": 0.00029838042377648596, |
| "loss": 7.9522, |
| "step": 43700 |
| }, |
| { |
| "epoch": 10.95, |
| "grad_norm": 0.06353294104337692, |
| "learning_rate": 0.0002983766735420964, |
| "loss": 7.9084, |
| "step": 43800 |
| }, |
| { |
| "epoch": 10.975, |
| "grad_norm": 0.07374967634677887, |
| "learning_rate": 0.0002983729608100506, |
| "loss": 7.6851, |
| "step": 43900 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.08643588423728943, |
| "learning_rate": 0.00029836921057566096, |
| "loss": 7.7639, |
| "step": 44000 |
| }, |
| { |
| "epoch": 11.025, |
| "grad_norm": 0.06952405720949173, |
| "learning_rate": 0.00029836546034127127, |
| "loss": 7.8923, |
| "step": 44100 |
| }, |
| { |
| "epoch": 11.05, |
| "grad_norm": 0.0842747688293457, |
| "learning_rate": 0.00029836171010688163, |
| "loss": 7.7411, |
| "step": 44200 |
| }, |
| { |
| "epoch": 11.075, |
| "grad_norm": 0.07051684707403183, |
| "learning_rate": 0.000298357959872492, |
| "loss": 7.7914, |
| "step": 44300 |
| }, |
| { |
| "epoch": 11.1, |
| "grad_norm": 0.07264287769794464, |
| "learning_rate": 0.00029835420963810237, |
| "loss": 7.7216, |
| "step": 44400 |
| }, |
| { |
| "epoch": 11.125, |
| "grad_norm": 0.07382502406835556, |
| "learning_rate": 0.0002983504594037127, |
| "loss": 7.8505, |
| "step": 44500 |
| }, |
| { |
| "epoch": 11.15, |
| "grad_norm": 0.07358778268098831, |
| "learning_rate": 0.00029834670916932304, |
| "loss": 7.7822, |
| "step": 44600 |
| }, |
| { |
| "epoch": 11.175, |
| "grad_norm": 0.07758370041847229, |
| "learning_rate": 0.0002983429589349334, |
| "loss": 8.0006, |
| "step": 44700 |
| }, |
| { |
| "epoch": 11.2, |
| "grad_norm": 0.07674399763345718, |
| "learning_rate": 0.0002983392087005438, |
| "loss": 7.2497, |
| "step": 44800 |
| }, |
| { |
| "epoch": 11.225, |
| "grad_norm": 0.06659264862537384, |
| "learning_rate": 0.0002983354584661541, |
| "loss": 7.5115, |
| "step": 44900 |
| }, |
| { |
| "epoch": 11.25, |
| "grad_norm": 0.0640081837773323, |
| "learning_rate": 0.00029833170823176445, |
| "loss": 7.4374, |
| "step": 45000 |
| }, |
| { |
| "epoch": 11.275, |
| "grad_norm": 0.07784521579742432, |
| "learning_rate": 0.0002983279579973748, |
| "loss": 7.6097, |
| "step": 45100 |
| }, |
| { |
| "epoch": 11.3, |
| "grad_norm": 0.08755332231521606, |
| "learning_rate": 0.0002983242077629852, |
| "loss": 7.5832, |
| "step": 45200 |
| }, |
| { |
| "epoch": 11.325, |
| "grad_norm": 0.06300461292266846, |
| "learning_rate": 0.0002983204575285955, |
| "loss": 7.281, |
| "step": 45300 |
| }, |
| { |
| "epoch": 11.35, |
| "grad_norm": 0.06807196140289307, |
| "learning_rate": 0.00029831670729420586, |
| "loss": 7.2347, |
| "step": 45400 |
| }, |
| { |
| "epoch": 11.375, |
| "grad_norm": 0.07403436303138733, |
| "learning_rate": 0.00029831295705981623, |
| "loss": 7.0346, |
| "step": 45500 |
| }, |
| { |
| "epoch": 11.4, |
| "grad_norm": 0.07038521021604538, |
| "learning_rate": 0.0002983092068254266, |
| "loss": 7.6505, |
| "step": 45600 |
| }, |
| { |
| "epoch": 11.425, |
| "grad_norm": 0.08596746623516083, |
| "learning_rate": 0.0002983054565910369, |
| "loss": 7.2829, |
| "step": 45700 |
| }, |
| { |
| "epoch": 11.45, |
| "grad_norm": 0.06901860982179642, |
| "learning_rate": 0.0002983017063566473, |
| "loss": 7.4822, |
| "step": 45800 |
| }, |
| { |
| "epoch": 11.475, |
| "grad_norm": 0.07062174379825592, |
| "learning_rate": 0.0002982979561222576, |
| "loss": 7.2426, |
| "step": 45900 |
| }, |
| { |
| "epoch": 11.5, |
| "grad_norm": 0.06718676537275314, |
| "learning_rate": 0.00029829420588786795, |
| "loss": 7.2257, |
| "step": 46000 |
| }, |
| { |
| "epoch": 11.525, |
| "grad_norm": 0.10105819255113602, |
| "learning_rate": 0.0002982904556534783, |
| "loss": 7.1366, |
| "step": 46100 |
| }, |
| { |
| "epoch": 11.55, |
| "grad_norm": 0.06286392360925674, |
| "learning_rate": 0.0002982867054190887, |
| "loss": 7.4181, |
| "step": 46200 |
| }, |
| { |
| "epoch": 11.575, |
| "grad_norm": 0.09307048469781876, |
| "learning_rate": 0.000298282955184699, |
| "loss": 7.4101, |
| "step": 46300 |
| }, |
| { |
| "epoch": 11.6, |
| "grad_norm": 0.06440640985965729, |
| "learning_rate": 0.00029827920495030936, |
| "loss": 7.3866, |
| "step": 46400 |
| }, |
| { |
| "epoch": 11.625, |
| "grad_norm": 0.06852256506681442, |
| "learning_rate": 0.00029827545471591973, |
| "loss": 7.084, |
| "step": 46500 |
| }, |
| { |
| "epoch": 11.65, |
| "grad_norm": 0.06919901072978973, |
| "learning_rate": 0.0002982717044815301, |
| "loss": 6.9507, |
| "step": 46600 |
| }, |
| { |
| "epoch": 11.675, |
| "grad_norm": 0.0683809369802475, |
| "learning_rate": 0.0002982679542471404, |
| "loss": 7.1805, |
| "step": 46700 |
| }, |
| { |
| "epoch": 11.7, |
| "grad_norm": 0.06878841668367386, |
| "learning_rate": 0.0002982642040127508, |
| "loss": 7.2514, |
| "step": 46800 |
| }, |
| { |
| "epoch": 11.725, |
| "grad_norm": 0.06913451850414276, |
| "learning_rate": 0.00029826045377836114, |
| "loss": 6.9969, |
| "step": 46900 |
| }, |
| { |
| "epoch": 11.75, |
| "grad_norm": 0.06999741494655609, |
| "learning_rate": 0.0002982567035439715, |
| "loss": 6.8401, |
| "step": 47000 |
| }, |
| { |
| "epoch": 11.775, |
| "grad_norm": 0.07473236322402954, |
| "learning_rate": 0.0002982529533095818, |
| "loss": 6.8587, |
| "step": 47100 |
| }, |
| { |
| "epoch": 11.8, |
| "grad_norm": 0.07786587625741959, |
| "learning_rate": 0.0002982492030751922, |
| "loss": 7.0751, |
| "step": 47200 |
| }, |
| { |
| "epoch": 11.825, |
| "grad_norm": 0.0667233094573021, |
| "learning_rate": 0.00029824545284080255, |
| "loss": 6.9344, |
| "step": 47300 |
| }, |
| { |
| "epoch": 11.85, |
| "grad_norm": 0.07131955772638321, |
| "learning_rate": 0.0002982417026064129, |
| "loss": 7.0165, |
| "step": 47400 |
| }, |
| { |
| "epoch": 11.875, |
| "grad_norm": 0.08371793478727341, |
| "learning_rate": 0.00029823795237202323, |
| "loss": 6.7392, |
| "step": 47500 |
| }, |
| { |
| "epoch": 11.9, |
| "grad_norm": 0.07992976158857346, |
| "learning_rate": 0.00029823420213763354, |
| "loss": 6.7678, |
| "step": 47600 |
| }, |
| { |
| "epoch": 11.925, |
| "grad_norm": 0.07361280173063278, |
| "learning_rate": 0.0002982304519032439, |
| "loss": 6.5933, |
| "step": 47700 |
| }, |
| { |
| "epoch": 11.95, |
| "grad_norm": 0.0853012353181839, |
| "learning_rate": 0.00029822670166885427, |
| "loss": 6.6292, |
| "step": 47800 |
| }, |
| { |
| "epoch": 11.975, |
| "grad_norm": 0.07077699154615402, |
| "learning_rate": 0.00029822298893680853, |
| "loss": 7.045, |
| "step": 47900 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.06884802132844925, |
| "learning_rate": 0.00029821923870241885, |
| "loss": 6.7302, |
| "step": 48000 |
| }, |
| { |
| "epoch": 12.025, |
| "grad_norm": 0.07187984138727188, |
| "learning_rate": 0.0002982154884680292, |
| "loss": 6.7884, |
| "step": 48100 |
| }, |
| { |
| "epoch": 12.05, |
| "grad_norm": 0.06950085610151291, |
| "learning_rate": 0.0002982117382336396, |
| "loss": 6.6858, |
| "step": 48200 |
| }, |
| { |
| "epoch": 12.075, |
| "grad_norm": 0.06879769265651703, |
| "learning_rate": 0.00029820798799924994, |
| "loss": 6.4815, |
| "step": 48300 |
| }, |
| { |
| "epoch": 12.1, |
| "grad_norm": 0.07400238513946533, |
| "learning_rate": 0.00029820423776486026, |
| "loss": 6.7837, |
| "step": 48400 |
| }, |
| { |
| "epoch": 12.125, |
| "grad_norm": 0.0689275860786438, |
| "learning_rate": 0.0002982004875304706, |
| "loss": 6.3745, |
| "step": 48500 |
| }, |
| { |
| "epoch": 12.15, |
| "grad_norm": 0.07304348796606064, |
| "learning_rate": 0.000298196737296081, |
| "loss": 6.7639, |
| "step": 48600 |
| }, |
| { |
| "epoch": 12.175, |
| "grad_norm": 0.07872481644153595, |
| "learning_rate": 0.00029819298706169135, |
| "loss": 6.5761, |
| "step": 48700 |
| }, |
| { |
| "epoch": 12.2, |
| "grad_norm": 0.06597219407558441, |
| "learning_rate": 0.00029818923682730167, |
| "loss": 6.6663, |
| "step": 48800 |
| }, |
| { |
| "epoch": 12.225, |
| "grad_norm": 0.060123708099126816, |
| "learning_rate": 0.00029818548659291203, |
| "loss": 6.5317, |
| "step": 48900 |
| }, |
| { |
| "epoch": 12.25, |
| "grad_norm": 0.07376055419445038, |
| "learning_rate": 0.00029818173635852234, |
| "loss": 6.4394, |
| "step": 49000 |
| }, |
| { |
| "epoch": 12.275, |
| "grad_norm": 0.06217016279697418, |
| "learning_rate": 0.00029817798612413277, |
| "loss": 6.4522, |
| "step": 49100 |
| }, |
| { |
| "epoch": 12.3, |
| "grad_norm": 0.06492452323436737, |
| "learning_rate": 0.0002981742358897431, |
| "loss": 6.5623, |
| "step": 49200 |
| }, |
| { |
| "epoch": 12.325, |
| "grad_norm": 0.08026625216007233, |
| "learning_rate": 0.00029817048565535344, |
| "loss": 6.3981, |
| "step": 49300 |
| }, |
| { |
| "epoch": 12.35, |
| "grad_norm": 0.07046521455049515, |
| "learning_rate": 0.00029816673542096376, |
| "loss": 6.4173, |
| "step": 49400 |
| }, |
| { |
| "epoch": 12.375, |
| "grad_norm": 0.07843586057424545, |
| "learning_rate": 0.0002981629851865741, |
| "loss": 6.499, |
| "step": 49500 |
| }, |
| { |
| "epoch": 12.4, |
| "grad_norm": 0.06976750493049622, |
| "learning_rate": 0.0002981592349521845, |
| "loss": 6.4019, |
| "step": 49600 |
| }, |
| { |
| "epoch": 12.425, |
| "grad_norm": 0.06601151078939438, |
| "learning_rate": 0.00029815548471779485, |
| "loss": 6.3474, |
| "step": 49700 |
| }, |
| { |
| "epoch": 12.45, |
| "grad_norm": 0.07471803575754166, |
| "learning_rate": 0.00029815173448340517, |
| "loss": 6.1884, |
| "step": 49800 |
| }, |
| { |
| "epoch": 12.475, |
| "grad_norm": 0.06310160458087921, |
| "learning_rate": 0.0002981480217513594, |
| "loss": 6.2996, |
| "step": 49900 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.060027483850717545, |
| "learning_rate": 0.0002981442715169698, |
| "loss": 6.2398, |
| "step": 50000 |
| }, |
| { |
| "epoch": 12.525, |
| "grad_norm": 0.07511355727910995, |
| "learning_rate": 0.00029814052128258016, |
| "loss": 6.0126, |
| "step": 50100 |
| }, |
| { |
| "epoch": 12.55, |
| "grad_norm": 0.09251129627227783, |
| "learning_rate": 0.00029813677104819047, |
| "loss": 6.1201, |
| "step": 50200 |
| }, |
| { |
| "epoch": 12.575, |
| "grad_norm": 0.06512793153524399, |
| "learning_rate": 0.00029813302081380084, |
| "loss": 6.2464, |
| "step": 50300 |
| }, |
| { |
| "epoch": 12.6, |
| "grad_norm": 0.06275767832994461, |
| "learning_rate": 0.0002981292705794112, |
| "loss": 6.215, |
| "step": 50400 |
| }, |
| { |
| "epoch": 12.625, |
| "grad_norm": 0.07693471014499664, |
| "learning_rate": 0.00029812552034502157, |
| "loss": 6.1931, |
| "step": 50500 |
| }, |
| { |
| "epoch": 12.65, |
| "grad_norm": 0.06782624125480652, |
| "learning_rate": 0.0002981217701106319, |
| "loss": 6.3334, |
| "step": 50600 |
| }, |
| { |
| "epoch": 12.675, |
| "grad_norm": 0.06484679132699966, |
| "learning_rate": 0.00029811801987624225, |
| "loss": 5.9756, |
| "step": 50700 |
| }, |
| { |
| "epoch": 12.7, |
| "grad_norm": 0.07431244850158691, |
| "learning_rate": 0.0002981142696418526, |
| "loss": 6.2173, |
| "step": 50800 |
| }, |
| { |
| "epoch": 12.725, |
| "grad_norm": 0.07316889613866806, |
| "learning_rate": 0.000298110519407463, |
| "loss": 6.0987, |
| "step": 50900 |
| }, |
| { |
| "epoch": 12.75, |
| "grad_norm": 0.06565624475479126, |
| "learning_rate": 0.0002981067691730733, |
| "loss": 6.0928, |
| "step": 51000 |
| }, |
| { |
| "epoch": 12.775, |
| "grad_norm": 0.07335751503705978, |
| "learning_rate": 0.00029810301893868366, |
| "loss": 6.1505, |
| "step": 51100 |
| }, |
| { |
| "epoch": 12.8, |
| "grad_norm": 0.0684492215514183, |
| "learning_rate": 0.00029809926870429397, |
| "loss": 5.9197, |
| "step": 51200 |
| }, |
| { |
| "epoch": 12.825, |
| "grad_norm": 0.06604496389627457, |
| "learning_rate": 0.00029809551846990434, |
| "loss": 6.2255, |
| "step": 51300 |
| }, |
| { |
| "epoch": 12.85, |
| "grad_norm": 0.06465475261211395, |
| "learning_rate": 0.0002980917682355147, |
| "loss": 5.8412, |
| "step": 51400 |
| }, |
| { |
| "epoch": 12.875, |
| "grad_norm": 0.06663598865270615, |
| "learning_rate": 0.000298088018001125, |
| "loss": 5.7792, |
| "step": 51500 |
| }, |
| { |
| "epoch": 12.9, |
| "grad_norm": 0.06258101016283035, |
| "learning_rate": 0.0002980842677667354, |
| "loss": 5.7024, |
| "step": 51600 |
| }, |
| { |
| "epoch": 12.925, |
| "grad_norm": 0.06694167107343674, |
| "learning_rate": 0.00029808051753234575, |
| "loss": 5.9832, |
| "step": 51700 |
| }, |
| { |
| "epoch": 12.95, |
| "grad_norm": 0.06682337820529938, |
| "learning_rate": 0.0002980767672979561, |
| "loss": 5.8905, |
| "step": 51800 |
| }, |
| { |
| "epoch": 12.975, |
| "grad_norm": 0.07507793605327606, |
| "learning_rate": 0.0002980730545659103, |
| "loss": 5.8869, |
| "step": 51900 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.0638195350766182, |
| "learning_rate": 0.0002980693043315207, |
| "loss": 5.9508, |
| "step": 52000 |
| }, |
| { |
| "epoch": 13.025, |
| "grad_norm": 0.089790940284729, |
| "learning_rate": 0.00029806555409713105, |
| "loss": 5.807, |
| "step": 52100 |
| }, |
| { |
| "epoch": 13.05, |
| "grad_norm": 0.06941410899162292, |
| "learning_rate": 0.0002980618038627414, |
| "loss": 5.8974, |
| "step": 52200 |
| }, |
| { |
| "epoch": 13.075, |
| "grad_norm": 0.06374108046293259, |
| "learning_rate": 0.00029805805362835173, |
| "loss": 6.02, |
| "step": 52300 |
| }, |
| { |
| "epoch": 13.1, |
| "grad_norm": 0.06581106036901474, |
| "learning_rate": 0.0002980543033939621, |
| "loss": 5.8285, |
| "step": 52400 |
| }, |
| { |
| "epoch": 13.125, |
| "grad_norm": 0.062402479350566864, |
| "learning_rate": 0.00029805055315957246, |
| "loss": 5.9327, |
| "step": 52500 |
| }, |
| { |
| "epoch": 13.15, |
| "grad_norm": 0.0768311470746994, |
| "learning_rate": 0.00029804680292518283, |
| "loss": 5.7586, |
| "step": 52600 |
| }, |
| { |
| "epoch": 13.175, |
| "grad_norm": 0.09206507354974747, |
| "learning_rate": 0.00029804305269079314, |
| "loss": 5.7239, |
| "step": 52700 |
| }, |
| { |
| "epoch": 13.2, |
| "grad_norm": 0.09109029918909073, |
| "learning_rate": 0.0002980393024564035, |
| "loss": 5.8506, |
| "step": 52800 |
| }, |
| { |
| "epoch": 13.225, |
| "grad_norm": 0.06463731825351715, |
| "learning_rate": 0.0002980355522220138, |
| "loss": 5.8716, |
| "step": 52900 |
| }, |
| { |
| "epoch": 13.25, |
| "grad_norm": 0.07239048928022385, |
| "learning_rate": 0.0002980318019876242, |
| "loss": 5.515, |
| "step": 53000 |
| }, |
| { |
| "epoch": 13.275, |
| "grad_norm": 0.06180089712142944, |
| "learning_rate": 0.00029802805175323455, |
| "loss": 5.4248, |
| "step": 53100 |
| }, |
| { |
| "epoch": 13.3, |
| "grad_norm": 0.05961550027132034, |
| "learning_rate": 0.0002980243015188449, |
| "loss": 5.8408, |
| "step": 53200 |
| }, |
| { |
| "epoch": 13.325, |
| "grad_norm": 0.06609106063842773, |
| "learning_rate": 0.00029802055128445523, |
| "loss": 5.5214, |
| "step": 53300 |
| }, |
| { |
| "epoch": 13.35, |
| "grad_norm": 0.07037625461816788, |
| "learning_rate": 0.0002980168010500656, |
| "loss": 5.6422, |
| "step": 53400 |
| }, |
| { |
| "epoch": 13.375, |
| "grad_norm": 0.05968979373574257, |
| "learning_rate": 0.00029801305081567596, |
| "loss": 5.4027, |
| "step": 53500 |
| }, |
| { |
| "epoch": 13.4, |
| "grad_norm": 0.06201528012752533, |
| "learning_rate": 0.00029800930058128633, |
| "loss": 5.5331, |
| "step": 53600 |
| }, |
| { |
| "epoch": 13.425, |
| "grad_norm": 0.07820463925600052, |
| "learning_rate": 0.00029800555034689664, |
| "loss": 5.6112, |
| "step": 53700 |
| }, |
| { |
| "epoch": 13.45, |
| "grad_norm": 0.07531889528036118, |
| "learning_rate": 0.000298001800112507, |
| "loss": 5.5128, |
| "step": 53800 |
| }, |
| { |
| "epoch": 13.475, |
| "grad_norm": 0.06690291315317154, |
| "learning_rate": 0.00029799808738046127, |
| "loss": 5.443, |
| "step": 53900 |
| }, |
| { |
| "epoch": 13.5, |
| "grad_norm": 0.08288581669330597, |
| "learning_rate": 0.00029799433714607163, |
| "loss": 5.5471, |
| "step": 54000 |
| }, |
| { |
| "epoch": 13.525, |
| "grad_norm": 0.06512220948934555, |
| "learning_rate": 0.00029799058691168195, |
| "loss": 5.475, |
| "step": 54100 |
| }, |
| { |
| "epoch": 13.55, |
| "grad_norm": 0.07862843573093414, |
| "learning_rate": 0.0002979868366772923, |
| "loss": 5.6017, |
| "step": 54200 |
| }, |
| { |
| "epoch": 13.575, |
| "grad_norm": 0.06599980592727661, |
| "learning_rate": 0.0002979830864429027, |
| "loss": 5.4367, |
| "step": 54300 |
| }, |
| { |
| "epoch": 13.6, |
| "grad_norm": 0.07014311850070953, |
| "learning_rate": 0.00029797933620851304, |
| "loss": 5.3765, |
| "step": 54400 |
| }, |
| { |
| "epoch": 13.625, |
| "grad_norm": 0.09498297423124313, |
| "learning_rate": 0.00029797558597412336, |
| "loss": 5.3329, |
| "step": 54500 |
| }, |
| { |
| "epoch": 13.65, |
| "grad_norm": 0.06557220965623856, |
| "learning_rate": 0.0002979718357397337, |
| "loss": 5.4082, |
| "step": 54600 |
| }, |
| { |
| "epoch": 13.675, |
| "grad_norm": 0.06320352107286453, |
| "learning_rate": 0.00029796808550534403, |
| "loss": 5.3671, |
| "step": 54700 |
| }, |
| { |
| "epoch": 13.7, |
| "grad_norm": 0.07630398869514465, |
| "learning_rate": 0.0002979643352709544, |
| "loss": 5.4613, |
| "step": 54800 |
| }, |
| { |
| "epoch": 13.725, |
| "grad_norm": 0.07285916805267334, |
| "learning_rate": 0.00029796058503656477, |
| "loss": 5.0222, |
| "step": 54900 |
| }, |
| { |
| "epoch": 13.75, |
| "grad_norm": 0.07314100861549377, |
| "learning_rate": 0.00029795683480217513, |
| "loss": 5.1593, |
| "step": 55000 |
| }, |
| { |
| "epoch": 13.775, |
| "grad_norm": 0.0632672905921936, |
| "learning_rate": 0.00029795308456778544, |
| "loss": 5.2524, |
| "step": 55100 |
| }, |
| { |
| "epoch": 13.8, |
| "grad_norm": 0.06146818399429321, |
| "learning_rate": 0.0002979493343333958, |
| "loss": 5.2068, |
| "step": 55200 |
| }, |
| { |
| "epoch": 13.825, |
| "grad_norm": 0.08438315987586975, |
| "learning_rate": 0.0002979455840990062, |
| "loss": 5.1854, |
| "step": 55300 |
| }, |
| { |
| "epoch": 13.85, |
| "grad_norm": 0.06263713538646698, |
| "learning_rate": 0.0002979418338646165, |
| "loss": 5.1888, |
| "step": 55400 |
| }, |
| { |
| "epoch": 13.875, |
| "grad_norm": 0.06485722959041595, |
| "learning_rate": 0.00029793808363022685, |
| "loss": 5.3774, |
| "step": 55500 |
| }, |
| { |
| "epoch": 13.9, |
| "grad_norm": 0.09563236683607101, |
| "learning_rate": 0.0002979343333958372, |
| "loss": 5.201, |
| "step": 55600 |
| }, |
| { |
| "epoch": 13.925, |
| "grad_norm": 0.06357564777135849, |
| "learning_rate": 0.0002979305831614476, |
| "loss": 5.1221, |
| "step": 55700 |
| }, |
| { |
| "epoch": 13.95, |
| "grad_norm": 0.06070085987448692, |
| "learning_rate": 0.0002979268329270579, |
| "loss": 5.1584, |
| "step": 55800 |
| }, |
| { |
| "epoch": 13.975, |
| "grad_norm": 0.0757615715265274, |
| "learning_rate": 0.00029792312019501216, |
| "loss": 5.0797, |
| "step": 55900 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.07182688266038895, |
| "learning_rate": 0.0002979193699606225, |
| "loss": 5.2988, |
| "step": 56000 |
| }, |
| { |
| "epoch": 14.025, |
| "grad_norm": 0.06348109245300293, |
| "learning_rate": 0.0002979156197262329, |
| "loss": 4.992, |
| "step": 56100 |
| }, |
| { |
| "epoch": 14.05, |
| "grad_norm": 0.07352128624916077, |
| "learning_rate": 0.0002979118694918432, |
| "loss": 4.9483, |
| "step": 56200 |
| }, |
| { |
| "epoch": 14.075, |
| "grad_norm": 0.0681919977068901, |
| "learning_rate": 0.00029790811925745357, |
| "loss": 5.1792, |
| "step": 56300 |
| }, |
| { |
| "epoch": 14.1, |
| "grad_norm": 0.06682088226079941, |
| "learning_rate": 0.0002979043690230639, |
| "loss": 4.8559, |
| "step": 56400 |
| }, |
| { |
| "epoch": 14.125, |
| "grad_norm": 0.06291857361793518, |
| "learning_rate": 0.00029790061878867425, |
| "loss": 4.9382, |
| "step": 56500 |
| }, |
| { |
| "epoch": 14.15, |
| "grad_norm": 0.07243198156356812, |
| "learning_rate": 0.0002978968685542846, |
| "loss": 5.0399, |
| "step": 56600 |
| }, |
| { |
| "epoch": 14.175, |
| "grad_norm": 0.06961022317409515, |
| "learning_rate": 0.000297893118319895, |
| "loss": 5.0745, |
| "step": 56700 |
| }, |
| { |
| "epoch": 14.2, |
| "grad_norm": 0.06203046441078186, |
| "learning_rate": 0.0002978893680855053, |
| "loss": 5.1403, |
| "step": 56800 |
| }, |
| { |
| "epoch": 14.225, |
| "grad_norm": 0.06188129261136055, |
| "learning_rate": 0.00029788561785111566, |
| "loss": 4.9122, |
| "step": 56900 |
| }, |
| { |
| "epoch": 14.25, |
| "grad_norm": 0.05759645998477936, |
| "learning_rate": 0.000297881867616726, |
| "loss": 5.0696, |
| "step": 57000 |
| }, |
| { |
| "epoch": 14.275, |
| "grad_norm": 0.0592036135494709, |
| "learning_rate": 0.0002978781173823364, |
| "loss": 5.1164, |
| "step": 57100 |
| }, |
| { |
| "epoch": 14.3, |
| "grad_norm": 0.06267797201871872, |
| "learning_rate": 0.0002978743671479467, |
| "loss": 5.0722, |
| "step": 57200 |
| }, |
| { |
| "epoch": 14.325, |
| "grad_norm": 0.07611776143312454, |
| "learning_rate": 0.00029787061691355707, |
| "loss": 4.9118, |
| "step": 57300 |
| }, |
| { |
| "epoch": 14.35, |
| "grad_norm": 0.061794403940439224, |
| "learning_rate": 0.00029786686667916744, |
| "loss": 5.013, |
| "step": 57400 |
| }, |
| { |
| "epoch": 14.375, |
| "grad_norm": 0.2047680765390396, |
| "learning_rate": 0.0002978631164447778, |
| "loss": 4.7667, |
| "step": 57500 |
| }, |
| { |
| "epoch": 14.4, |
| "grad_norm": 0.0633254125714302, |
| "learning_rate": 0.0002978593662103881, |
| "loss": 4.8633, |
| "step": 57600 |
| }, |
| { |
| "epoch": 14.425, |
| "grad_norm": 0.06651504337787628, |
| "learning_rate": 0.0002978556159759985, |
| "loss": 4.9452, |
| "step": 57700 |
| }, |
| { |
| "epoch": 14.45, |
| "grad_norm": 0.07252359390258789, |
| "learning_rate": 0.00029785186574160885, |
| "loss": 4.8268, |
| "step": 57800 |
| }, |
| { |
| "epoch": 14.475, |
| "grad_norm": 0.07088153064250946, |
| "learning_rate": 0.0002978481530095631, |
| "loss": 4.7381, |
| "step": 57900 |
| }, |
| { |
| "epoch": 14.5, |
| "grad_norm": 0.06644707918167114, |
| "learning_rate": 0.0002978444027751734, |
| "loss": 4.6716, |
| "step": 58000 |
| }, |
| { |
| "epoch": 14.525, |
| "grad_norm": 0.06577486544847488, |
| "learning_rate": 0.0002978406525407838, |
| "loss": 4.8125, |
| "step": 58100 |
| }, |
| { |
| "epoch": 14.55, |
| "grad_norm": 0.06577962636947632, |
| "learning_rate": 0.0002978369023063941, |
| "loss": 4.6842, |
| "step": 58200 |
| }, |
| { |
| "epoch": 14.575, |
| "grad_norm": 0.060136351734399796, |
| "learning_rate": 0.00029783315207200446, |
| "loss": 4.6219, |
| "step": 58300 |
| }, |
| { |
| "epoch": 14.6, |
| "grad_norm": 0.06826278567314148, |
| "learning_rate": 0.00029782940183761483, |
| "loss": 4.7876, |
| "step": 58400 |
| }, |
| { |
| "epoch": 14.625, |
| "grad_norm": 0.06896788626909256, |
| "learning_rate": 0.0002978256516032252, |
| "loss": 4.8651, |
| "step": 58500 |
| }, |
| { |
| "epoch": 14.65, |
| "grad_norm": 0.06548253446817398, |
| "learning_rate": 0.0002978219013688355, |
| "loss": 4.9228, |
| "step": 58600 |
| }, |
| { |
| "epoch": 14.675, |
| "grad_norm": 0.08236391097307205, |
| "learning_rate": 0.0002978181511344459, |
| "loss": 4.7074, |
| "step": 58700 |
| }, |
| { |
| "epoch": 14.7, |
| "grad_norm": 0.06781431287527084, |
| "learning_rate": 0.00029781440090005624, |
| "loss": 5.0659, |
| "step": 58800 |
| }, |
| { |
| "epoch": 14.725, |
| "grad_norm": 0.06290601193904877, |
| "learning_rate": 0.0002978106506656666, |
| "loss": 4.8844, |
| "step": 58900 |
| }, |
| { |
| "epoch": 14.75, |
| "grad_norm": 0.0578296072781086, |
| "learning_rate": 0.0002978069004312769, |
| "loss": 4.7095, |
| "step": 59000 |
| }, |
| { |
| "epoch": 14.775, |
| "grad_norm": 0.05320196598768234, |
| "learning_rate": 0.0002978031501968873, |
| "loss": 4.6838, |
| "step": 59100 |
| }, |
| { |
| "epoch": 14.8, |
| "grad_norm": 0.07847319543361664, |
| "learning_rate": 0.00029779939996249765, |
| "loss": 4.7263, |
| "step": 59200 |
| }, |
| { |
| "epoch": 14.825, |
| "grad_norm": 0.07580792158842087, |
| "learning_rate": 0.00029779564972810796, |
| "loss": 4.5927, |
| "step": 59300 |
| }, |
| { |
| "epoch": 14.85, |
| "grad_norm": 0.06336116045713425, |
| "learning_rate": 0.00029779189949371833, |
| "loss": 4.6524, |
| "step": 59400 |
| }, |
| { |
| "epoch": 14.875, |
| "grad_norm": 0.0706322193145752, |
| "learning_rate": 0.0002977881492593287, |
| "loss": 4.5591, |
| "step": 59500 |
| }, |
| { |
| "epoch": 14.9, |
| "grad_norm": 0.09078390896320343, |
| "learning_rate": 0.00029778439902493906, |
| "loss": 4.6377, |
| "step": 59600 |
| }, |
| { |
| "epoch": 14.925, |
| "grad_norm": 0.07508181035518646, |
| "learning_rate": 0.00029778064879054937, |
| "loss": 4.4043, |
| "step": 59700 |
| }, |
| { |
| "epoch": 14.95, |
| "grad_norm": 0.06288613379001617, |
| "learning_rate": 0.00029777689855615974, |
| "loss": 4.5363, |
| "step": 59800 |
| }, |
| { |
| "epoch": 14.975, |
| "grad_norm": 0.0686824843287468, |
| "learning_rate": 0.00029777318582411395, |
| "loss": 4.6031, |
| "step": 59900 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.0657496452331543, |
| "learning_rate": 0.0002977694355897243, |
| "loss": 4.4645, |
| "step": 60000 |
| }, |
| { |
| "epoch": 15.025, |
| "grad_norm": 0.0680643618106842, |
| "learning_rate": 0.0002977656853553347, |
| "loss": 4.6015, |
| "step": 60100 |
| }, |
| { |
| "epoch": 15.05, |
| "grad_norm": 0.06540867686271667, |
| "learning_rate": 0.00029776193512094504, |
| "loss": 4.4411, |
| "step": 60200 |
| }, |
| { |
| "epoch": 15.075, |
| "grad_norm": 0.060959845781326294, |
| "learning_rate": 0.00029775818488655536, |
| "loss": 4.2446, |
| "step": 60300 |
| }, |
| { |
| "epoch": 15.1, |
| "grad_norm": 0.07395045459270477, |
| "learning_rate": 0.0002977544346521657, |
| "loss": 4.4593, |
| "step": 60400 |
| }, |
| { |
| "epoch": 15.125, |
| "grad_norm": 0.0660228282213211, |
| "learning_rate": 0.0002977506844177761, |
| "loss": 4.2359, |
| "step": 60500 |
| }, |
| { |
| "epoch": 15.15, |
| "grad_norm": 0.06423047930002213, |
| "learning_rate": 0.00029774693418338645, |
| "loss": 4.4333, |
| "step": 60600 |
| }, |
| { |
| "epoch": 15.175, |
| "grad_norm": 0.07680130749940872, |
| "learning_rate": 0.00029774318394899677, |
| "loss": 4.4737, |
| "step": 60700 |
| }, |
| { |
| "epoch": 15.2, |
| "grad_norm": 0.0686013400554657, |
| "learning_rate": 0.00029773943371460713, |
| "loss": 4.301, |
| "step": 60800 |
| }, |
| { |
| "epoch": 15.225, |
| "grad_norm": 0.0519595630466938, |
| "learning_rate": 0.0002977356834802175, |
| "loss": 4.4112, |
| "step": 60900 |
| }, |
| { |
| "epoch": 15.25, |
| "grad_norm": 0.06710193306207657, |
| "learning_rate": 0.00029773193324582787, |
| "loss": 4.3652, |
| "step": 61000 |
| }, |
| { |
| "epoch": 15.275, |
| "grad_norm": 0.07808689773082733, |
| "learning_rate": 0.0002977281830114382, |
| "loss": 4.3473, |
| "step": 61100 |
| }, |
| { |
| "epoch": 15.3, |
| "grad_norm": 0.0767969936132431, |
| "learning_rate": 0.00029772443277704854, |
| "loss": 4.3302, |
| "step": 61200 |
| }, |
| { |
| "epoch": 15.325, |
| "grad_norm": 0.06145559623837471, |
| "learning_rate": 0.0002977206825426589, |
| "loss": 4.2091, |
| "step": 61300 |
| }, |
| { |
| "epoch": 15.35, |
| "grad_norm": 0.09096598625183105, |
| "learning_rate": 0.0002977169323082693, |
| "loss": 4.4397, |
| "step": 61400 |
| }, |
| { |
| "epoch": 15.375, |
| "grad_norm": 0.06596633046865463, |
| "learning_rate": 0.0002977131820738796, |
| "loss": 4.1544, |
| "step": 61500 |
| }, |
| { |
| "epoch": 15.4, |
| "grad_norm": 0.0632476657629013, |
| "learning_rate": 0.00029770943183948995, |
| "loss": 4.1507, |
| "step": 61600 |
| }, |
| { |
| "epoch": 15.425, |
| "grad_norm": 0.05707848072052002, |
| "learning_rate": 0.00029770568160510027, |
| "loss": 4.5147, |
| "step": 61700 |
| }, |
| { |
| "epoch": 15.45, |
| "grad_norm": 0.06603705137968063, |
| "learning_rate": 0.00029770193137071063, |
| "loss": 4.3091, |
| "step": 61800 |
| }, |
| { |
| "epoch": 15.475, |
| "grad_norm": 0.08647535741329193, |
| "learning_rate": 0.000297698181136321, |
| "loss": 4.4759, |
| "step": 61900 |
| }, |
| { |
| "epoch": 15.5, |
| "grad_norm": 0.0747227743268013, |
| "learning_rate": 0.00029769443090193136, |
| "loss": 4.3265, |
| "step": 62000 |
| }, |
| { |
| "epoch": 15.525, |
| "grad_norm": 0.06563801318407059, |
| "learning_rate": 0.0002976906806675417, |
| "loss": 4.5796, |
| "step": 62100 |
| }, |
| { |
| "epoch": 15.55, |
| "grad_norm": 0.06297031790018082, |
| "learning_rate": 0.00029768693043315204, |
| "loss": 4.2309, |
| "step": 62200 |
| }, |
| { |
| "epoch": 15.575, |
| "grad_norm": 0.05998208001255989, |
| "learning_rate": 0.0002976831801987624, |
| "loss": 4.244, |
| "step": 62300 |
| }, |
| { |
| "epoch": 15.6, |
| "grad_norm": 0.057426031678915024, |
| "learning_rate": 0.0002976794299643728, |
| "loss": 4.4331, |
| "step": 62400 |
| }, |
| { |
| "epoch": 15.625, |
| "grad_norm": 0.06295296549797058, |
| "learning_rate": 0.0002976756797299831, |
| "loss": 3.9931, |
| "step": 62500 |
| }, |
| { |
| "epoch": 15.65, |
| "grad_norm": 0.07305531948804855, |
| "learning_rate": 0.00029767192949559345, |
| "loss": 4.1127, |
| "step": 62600 |
| }, |
| { |
| "epoch": 15.675, |
| "grad_norm": 0.057404179126024246, |
| "learning_rate": 0.0002976681792612038, |
| "loss": 4.1888, |
| "step": 62700 |
| }, |
| { |
| "epoch": 15.7, |
| "grad_norm": 0.05540831759572029, |
| "learning_rate": 0.0002976644290268142, |
| "loss": 4.3955, |
| "step": 62800 |
| }, |
| { |
| "epoch": 15.725, |
| "grad_norm": 0.05315635725855827, |
| "learning_rate": 0.0002976606787924245, |
| "loss": 4.4189, |
| "step": 62900 |
| }, |
| { |
| "epoch": 15.75, |
| "grad_norm": 0.06974928081035614, |
| "learning_rate": 0.0002976569285580348, |
| "loss": 4.2076, |
| "step": 63000 |
| }, |
| { |
| "epoch": 15.775, |
| "grad_norm": 0.06797333806753159, |
| "learning_rate": 0.00029765317832364523, |
| "loss": 4.0685, |
| "step": 63100 |
| }, |
| { |
| "epoch": 15.8, |
| "grad_norm": 0.07094912976026535, |
| "learning_rate": 0.00029764942808925554, |
| "loss": 4.0277, |
| "step": 63200 |
| }, |
| { |
| "epoch": 15.825, |
| "grad_norm": 0.0728229507803917, |
| "learning_rate": 0.0002976456778548659, |
| "loss": 4.2609, |
| "step": 63300 |
| }, |
| { |
| "epoch": 15.85, |
| "grad_norm": 0.05918316915631294, |
| "learning_rate": 0.0002976419276204762, |
| "loss": 4.2609, |
| "step": 63400 |
| }, |
| { |
| "epoch": 15.875, |
| "grad_norm": 0.06454843282699585, |
| "learning_rate": 0.0002976381773860866, |
| "loss": 4.0982, |
| "step": 63500 |
| }, |
| { |
| "epoch": 15.9, |
| "grad_norm": 0.07737816870212555, |
| "learning_rate": 0.00029763442715169695, |
| "loss": 4.0363, |
| "step": 63600 |
| }, |
| { |
| "epoch": 15.925, |
| "grad_norm": 0.06324774026870728, |
| "learning_rate": 0.0002976306769173073, |
| "loss": 3.8008, |
| "step": 63700 |
| }, |
| { |
| "epoch": 15.95, |
| "grad_norm": 0.05786865949630737, |
| "learning_rate": 0.00029762692668291763, |
| "loss": 3.8747, |
| "step": 63800 |
| }, |
| { |
| "epoch": 15.975, |
| "grad_norm": 0.06020934507250786, |
| "learning_rate": 0.0002976232139508719, |
| "loss": 3.9662, |
| "step": 63900 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.06533800065517426, |
| "learning_rate": 0.00029761946371648226, |
| "loss": 4.0263, |
| "step": 64000 |
| }, |
| { |
| "epoch": 16.025, |
| "grad_norm": 0.05861624330282211, |
| "learning_rate": 0.0002976157134820926, |
| "loss": 4.0456, |
| "step": 64100 |
| }, |
| { |
| "epoch": 16.05, |
| "grad_norm": 0.06453926116228104, |
| "learning_rate": 0.00029761196324770294, |
| "loss": 3.9041, |
| "step": 64200 |
| }, |
| { |
| "epoch": 16.075, |
| "grad_norm": 0.06458089500665665, |
| "learning_rate": 0.0002976082130133133, |
| "loss": 3.7986, |
| "step": 64300 |
| }, |
| { |
| "epoch": 16.1, |
| "grad_norm": 0.05067475885152817, |
| "learning_rate": 0.00029760446277892367, |
| "loss": 3.9836, |
| "step": 64400 |
| }, |
| { |
| "epoch": 16.125, |
| "grad_norm": 0.0557921938598156, |
| "learning_rate": 0.00029760071254453403, |
| "loss": 3.958, |
| "step": 64500 |
| }, |
| { |
| "epoch": 16.15, |
| "grad_norm": 0.05821559205651283, |
| "learning_rate": 0.00029759696231014435, |
| "loss": 4.0563, |
| "step": 64600 |
| }, |
| { |
| "epoch": 16.175, |
| "grad_norm": 0.06078817695379257, |
| "learning_rate": 0.0002975932120757547, |
| "loss": 4.0017, |
| "step": 64700 |
| }, |
| { |
| "epoch": 16.2, |
| "grad_norm": 0.07187299430370331, |
| "learning_rate": 0.0002975894618413651, |
| "loss": 3.7798, |
| "step": 64800 |
| }, |
| { |
| "epoch": 16.225, |
| "grad_norm": 0.05477326363325119, |
| "learning_rate": 0.00029758571160697544, |
| "loss": 3.7864, |
| "step": 64900 |
| }, |
| { |
| "epoch": 16.25, |
| "grad_norm": 0.06654859334230423, |
| "learning_rate": 0.00029758196137258576, |
| "loss": 3.9514, |
| "step": 65000 |
| }, |
| { |
| "epoch": 16.275, |
| "grad_norm": 0.0737365186214447, |
| "learning_rate": 0.0002975782111381961, |
| "loss": 3.9058, |
| "step": 65100 |
| }, |
| { |
| "epoch": 16.3, |
| "grad_norm": 0.06597916781902313, |
| "learning_rate": 0.00029757446090380643, |
| "loss": 3.9946, |
| "step": 65200 |
| }, |
| { |
| "epoch": 16.325, |
| "grad_norm": 0.05861925333738327, |
| "learning_rate": 0.0002975707106694168, |
| "loss": 3.9009, |
| "step": 65300 |
| }, |
| { |
| "epoch": 16.35, |
| "grad_norm": 0.06207166984677315, |
| "learning_rate": 0.00029756696043502717, |
| "loss": 3.9892, |
| "step": 65400 |
| }, |
| { |
| "epoch": 16.375, |
| "grad_norm": 0.07432432472705841, |
| "learning_rate": 0.00029756321020063753, |
| "loss": 3.7083, |
| "step": 65500 |
| }, |
| { |
| "epoch": 16.4, |
| "grad_norm": 0.05656394734978676, |
| "learning_rate": 0.00029755945996624784, |
| "loss": 3.8139, |
| "step": 65600 |
| }, |
| { |
| "epoch": 16.425, |
| "grad_norm": 0.07284687459468842, |
| "learning_rate": 0.0002975557097318582, |
| "loss": 3.8091, |
| "step": 65700 |
| }, |
| { |
| "epoch": 16.45, |
| "grad_norm": 0.06415148079395294, |
| "learning_rate": 0.0002975519594974686, |
| "loss": 3.8954, |
| "step": 65800 |
| }, |
| { |
| "epoch": 16.475, |
| "grad_norm": 0.06300424784421921, |
| "learning_rate": 0.00029754824676542284, |
| "loss": 3.5919, |
| "step": 65900 |
| }, |
| { |
| "epoch": 16.5, |
| "grad_norm": 0.06578180938959122, |
| "learning_rate": 0.00029754449653103315, |
| "loss": 3.7936, |
| "step": 66000 |
| }, |
| { |
| "epoch": 16.525, |
| "grad_norm": 0.07465810328722, |
| "learning_rate": 0.0002975407462966435, |
| "loss": 3.6781, |
| "step": 66100 |
| }, |
| { |
| "epoch": 16.55, |
| "grad_norm": 0.05531006306409836, |
| "learning_rate": 0.0002975369960622539, |
| "loss": 3.8176, |
| "step": 66200 |
| }, |
| { |
| "epoch": 16.575, |
| "grad_norm": 0.057088643312454224, |
| "learning_rate": 0.00029753324582786425, |
| "loss": 3.8375, |
| "step": 66300 |
| }, |
| { |
| "epoch": 16.6, |
| "grad_norm": 0.06409061700105667, |
| "learning_rate": 0.00029752949559347456, |
| "loss": 3.6946, |
| "step": 66400 |
| }, |
| { |
| "epoch": 16.625, |
| "grad_norm": 0.06034286320209503, |
| "learning_rate": 0.0002975257453590849, |
| "loss": 3.7127, |
| "step": 66500 |
| }, |
| { |
| "epoch": 16.65, |
| "grad_norm": 0.06990322470664978, |
| "learning_rate": 0.0002975219951246953, |
| "loss": 3.7908, |
| "step": 66600 |
| }, |
| { |
| "epoch": 16.675, |
| "grad_norm": 0.07301350682973862, |
| "learning_rate": 0.0002975182448903056, |
| "loss": 3.6067, |
| "step": 66700 |
| }, |
| { |
| "epoch": 16.7, |
| "grad_norm": 0.06309019029140472, |
| "learning_rate": 0.00029751449465591597, |
| "loss": 3.6552, |
| "step": 66800 |
| }, |
| { |
| "epoch": 16.725, |
| "grad_norm": 0.07269258797168732, |
| "learning_rate": 0.0002975107444215263, |
| "loss": 3.6489, |
| "step": 66900 |
| }, |
| { |
| "epoch": 16.75, |
| "grad_norm": 0.07549503445625305, |
| "learning_rate": 0.00029750699418713665, |
| "loss": 3.6146, |
| "step": 67000 |
| }, |
| { |
| "epoch": 16.775, |
| "grad_norm": 0.06944973766803741, |
| "learning_rate": 0.000297503243952747, |
| "loss": 3.592, |
| "step": 67100 |
| }, |
| { |
| "epoch": 16.8, |
| "grad_norm": 0.05656867474317551, |
| "learning_rate": 0.0002974994937183574, |
| "loss": 3.7087, |
| "step": 67200 |
| }, |
| { |
| "epoch": 16.825, |
| "grad_norm": 0.06444111466407776, |
| "learning_rate": 0.0002974957434839677, |
| "loss": 3.5458, |
| "step": 67300 |
| }, |
| { |
| "epoch": 16.85, |
| "grad_norm": 0.05399918928742409, |
| "learning_rate": 0.00029749199324957806, |
| "loss": 3.6962, |
| "step": 67400 |
| }, |
| { |
| "epoch": 16.875, |
| "grad_norm": 0.06424950808286667, |
| "learning_rate": 0.0002974882430151884, |
| "loss": 3.5515, |
| "step": 67500 |
| }, |
| { |
| "epoch": 16.9, |
| "grad_norm": 0.05898202210664749, |
| "learning_rate": 0.0002974844927807988, |
| "loss": 3.6593, |
| "step": 67600 |
| }, |
| { |
| "epoch": 16.925, |
| "grad_norm": 0.06607525050640106, |
| "learning_rate": 0.0002974807425464091, |
| "loss": 3.5478, |
| "step": 67700 |
| }, |
| { |
| "epoch": 16.95, |
| "grad_norm": 0.06299087405204773, |
| "learning_rate": 0.00029747699231201947, |
| "loss": 3.7256, |
| "step": 67800 |
| }, |
| { |
| "epoch": 16.975, |
| "grad_norm": 0.063835009932518, |
| "learning_rate": 0.00029747327957997373, |
| "loss": 3.538, |
| "step": 67900 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.05786048248410225, |
| "learning_rate": 0.0002974695293455841, |
| "loss": 3.7246, |
| "step": 68000 |
| }, |
| { |
| "epoch": 17.025, |
| "grad_norm": 0.05804240703582764, |
| "learning_rate": 0.0002974657791111944, |
| "loss": 3.5207, |
| "step": 68100 |
| }, |
| { |
| "epoch": 17.05, |
| "grad_norm": 0.06179894134402275, |
| "learning_rate": 0.0002974620288768048, |
| "loss": 3.5634, |
| "step": 68200 |
| }, |
| { |
| "epoch": 17.075, |
| "grad_norm": 0.05166739225387573, |
| "learning_rate": 0.00029745827864241514, |
| "loss": 3.594, |
| "step": 68300 |
| }, |
| { |
| "epoch": 17.1, |
| "grad_norm": 0.05808790773153305, |
| "learning_rate": 0.0002974545284080255, |
| "loss": 3.4721, |
| "step": 68400 |
| }, |
| { |
| "epoch": 17.125, |
| "grad_norm": 0.058479100465774536, |
| "learning_rate": 0.0002974507781736358, |
| "loss": 3.4991, |
| "step": 68500 |
| }, |
| { |
| "epoch": 17.15, |
| "grad_norm": 0.06585648655891418, |
| "learning_rate": 0.0002974470279392462, |
| "loss": 3.4487, |
| "step": 68600 |
| }, |
| { |
| "epoch": 17.175, |
| "grad_norm": 0.07367991656064987, |
| "learning_rate": 0.0002974432777048565, |
| "loss": 3.715, |
| "step": 68700 |
| }, |
| { |
| "epoch": 17.2, |
| "grad_norm": 0.06693430244922638, |
| "learning_rate": 0.00029743952747046686, |
| "loss": 3.4574, |
| "step": 68800 |
| }, |
| { |
| "epoch": 17.225, |
| "grad_norm": 0.06379226595163345, |
| "learning_rate": 0.00029743577723607723, |
| "loss": 3.6117, |
| "step": 68900 |
| }, |
| { |
| "epoch": 17.25, |
| "grad_norm": 0.0511956624686718, |
| "learning_rate": 0.0002974320270016876, |
| "loss": 3.7448, |
| "step": 69000 |
| }, |
| { |
| "epoch": 17.275, |
| "grad_norm": 0.07336433976888657, |
| "learning_rate": 0.0002974282767672979, |
| "loss": 3.3539, |
| "step": 69100 |
| }, |
| { |
| "epoch": 17.3, |
| "grad_norm": 0.0531037300825119, |
| "learning_rate": 0.0002974245265329083, |
| "loss": 3.4722, |
| "step": 69200 |
| }, |
| { |
| "epoch": 17.325, |
| "grad_norm": 0.0836392492055893, |
| "learning_rate": 0.00029742077629851864, |
| "loss": 3.4829, |
| "step": 69300 |
| }, |
| { |
| "epoch": 17.35, |
| "grad_norm": 0.0543275885283947, |
| "learning_rate": 0.000297417026064129, |
| "loss": 3.3048, |
| "step": 69400 |
| }, |
| { |
| "epoch": 17.375, |
| "grad_norm": 0.05712301284074783, |
| "learning_rate": 0.0002974132758297393, |
| "loss": 3.3524, |
| "step": 69500 |
| }, |
| { |
| "epoch": 17.4, |
| "grad_norm": 0.07685862481594086, |
| "learning_rate": 0.0002974095255953497, |
| "loss": 3.4212, |
| "step": 69600 |
| }, |
| { |
| "epoch": 17.425, |
| "grad_norm": 0.06631585955619812, |
| "learning_rate": 0.00029740577536096005, |
| "loss": 3.3931, |
| "step": 69700 |
| }, |
| { |
| "epoch": 17.45, |
| "grad_norm": 0.05916072428226471, |
| "learning_rate": 0.0002974020251265704, |
| "loss": 3.4396, |
| "step": 69800 |
| }, |
| { |
| "epoch": 17.475, |
| "grad_norm": 0.06266429275274277, |
| "learning_rate": 0.0002973983123945246, |
| "loss": 3.618, |
| "step": 69900 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 0.07458827644586563, |
| "learning_rate": 0.000297394562160135, |
| "loss": 3.3892, |
| "step": 70000 |
| }, |
| { |
| "epoch": 17.525, |
| "grad_norm": 0.05758730694651604, |
| "learning_rate": 0.00029739081192574536, |
| "loss": 3.3696, |
| "step": 70100 |
| }, |
| { |
| "epoch": 17.55, |
| "grad_norm": 0.061953071504831314, |
| "learning_rate": 0.0002973870616913557, |
| "loss": 3.2163, |
| "step": 70200 |
| }, |
| { |
| "epoch": 17.575, |
| "grad_norm": 0.06715140491724014, |
| "learning_rate": 0.00029738331145696603, |
| "loss": 3.5115, |
| "step": 70300 |
| }, |
| { |
| "epoch": 17.6, |
| "grad_norm": 0.06628040969371796, |
| "learning_rate": 0.0002973795612225764, |
| "loss": 3.4019, |
| "step": 70400 |
| }, |
| { |
| "epoch": 17.625, |
| "grad_norm": 0.06109810248017311, |
| "learning_rate": 0.0002973758109881867, |
| "loss": 3.327, |
| "step": 70500 |
| }, |
| { |
| "epoch": 17.65, |
| "grad_norm": 0.05486061051487923, |
| "learning_rate": 0.0002973720607537971, |
| "loss": 3.4603, |
| "step": 70600 |
| }, |
| { |
| "epoch": 17.675, |
| "grad_norm": 0.058648984879255295, |
| "learning_rate": 0.00029736831051940744, |
| "loss": 3.5709, |
| "step": 70700 |
| }, |
| { |
| "epoch": 17.7, |
| "grad_norm": 0.06253077834844589, |
| "learning_rate": 0.00029736456028501776, |
| "loss": 3.1556, |
| "step": 70800 |
| }, |
| { |
| "epoch": 17.725, |
| "grad_norm": 0.05633246898651123, |
| "learning_rate": 0.0002973608100506281, |
| "loss": 3.2396, |
| "step": 70900 |
| }, |
| { |
| "epoch": 17.75, |
| "grad_norm": 0.07230902463197708, |
| "learning_rate": 0.0002973570598162385, |
| "loss": 3.2617, |
| "step": 71000 |
| }, |
| { |
| "epoch": 17.775, |
| "grad_norm": 0.06703296303749084, |
| "learning_rate": 0.00029735330958184886, |
| "loss": 3.3253, |
| "step": 71100 |
| }, |
| { |
| "epoch": 17.8, |
| "grad_norm": 0.05392139405012131, |
| "learning_rate": 0.00029734955934745917, |
| "loss": 3.1445, |
| "step": 71200 |
| }, |
| { |
| "epoch": 17.825, |
| "grad_norm": 0.059445902705192566, |
| "learning_rate": 0.00029734580911306953, |
| "loss": 3.2005, |
| "step": 71300 |
| }, |
| { |
| "epoch": 17.85, |
| "grad_norm": 0.05022546648979187, |
| "learning_rate": 0.0002973420588786799, |
| "loss": 3.2086, |
| "step": 71400 |
| }, |
| { |
| "epoch": 17.875, |
| "grad_norm": 0.05383516103029251, |
| "learning_rate": 0.00029733830864429027, |
| "loss": 3.0877, |
| "step": 71500 |
| }, |
| { |
| "epoch": 17.9, |
| "grad_norm": 0.055024441331624985, |
| "learning_rate": 0.0002973345584099006, |
| "loss": 3.27, |
| "step": 71600 |
| }, |
| { |
| "epoch": 17.925, |
| "grad_norm": 0.0565604642033577, |
| "learning_rate": 0.00029733080817551094, |
| "loss": 3.4085, |
| "step": 71700 |
| }, |
| { |
| "epoch": 17.95, |
| "grad_norm": 0.056899093091487885, |
| "learning_rate": 0.0002973270579411213, |
| "loss": 3.3568, |
| "step": 71800 |
| }, |
| { |
| "epoch": 17.975, |
| "grad_norm": 0.06129912659525871, |
| "learning_rate": 0.00029732334520907557, |
| "loss": 3.1591, |
| "step": 71900 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.06037045270204544, |
| "learning_rate": 0.0002973195949746859, |
| "loss": 3.3884, |
| "step": 72000 |
| }, |
| { |
| "epoch": 18.025, |
| "grad_norm": 0.059694815427064896, |
| "learning_rate": 0.00029731584474029625, |
| "loss": 3.0768, |
| "step": 72100 |
| }, |
| { |
| "epoch": 18.05, |
| "grad_norm": 0.06282085925340652, |
| "learning_rate": 0.00029731209450590656, |
| "loss": 3.3816, |
| "step": 72200 |
| }, |
| { |
| "epoch": 18.075, |
| "grad_norm": 0.05453978106379509, |
| "learning_rate": 0.00029730834427151693, |
| "loss": 3.1041, |
| "step": 72300 |
| }, |
| { |
| "epoch": 18.1, |
| "grad_norm": 0.0587979331612587, |
| "learning_rate": 0.0002973045940371273, |
| "loss": 3.1357, |
| "step": 72400 |
| }, |
| { |
| "epoch": 18.125, |
| "grad_norm": 0.05731925368309021, |
| "learning_rate": 0.00029730084380273766, |
| "loss": 3.0224, |
| "step": 72500 |
| }, |
| { |
| "epoch": 18.15, |
| "grad_norm": 0.05748147889971733, |
| "learning_rate": 0.00029729709356834797, |
| "loss": 3.1868, |
| "step": 72600 |
| }, |
| { |
| "epoch": 18.175, |
| "grad_norm": 0.2291877716779709, |
| "learning_rate": 0.00029729334333395834, |
| "loss": 3.5219, |
| "step": 72700 |
| }, |
| { |
| "epoch": 18.2, |
| "grad_norm": 0.05291415750980377, |
| "learning_rate": 0.0002972895930995687, |
| "loss": 3.0433, |
| "step": 72800 |
| }, |
| { |
| "epoch": 18.225, |
| "grad_norm": 0.05900726094841957, |
| "learning_rate": 0.00029728584286517907, |
| "loss": 3.2394, |
| "step": 72900 |
| }, |
| { |
| "epoch": 18.25, |
| "grad_norm": 0.05879193916916847, |
| "learning_rate": 0.0002972820926307894, |
| "loss": 3.2482, |
| "step": 73000 |
| }, |
| { |
| "epoch": 18.275, |
| "grad_norm": 0.061925821006298065, |
| "learning_rate": 0.00029727834239639975, |
| "loss": 3.1974, |
| "step": 73100 |
| }, |
| { |
| "epoch": 18.3, |
| "grad_norm": 0.07049068808555603, |
| "learning_rate": 0.0002972745921620101, |
| "loss": 3.2512, |
| "step": 73200 |
| }, |
| { |
| "epoch": 18.325, |
| "grad_norm": 0.06102385371923447, |
| "learning_rate": 0.0002972708419276205, |
| "loss": 3.1982, |
| "step": 73300 |
| }, |
| { |
| "epoch": 18.35, |
| "grad_norm": 0.05520262196660042, |
| "learning_rate": 0.0002972670916932308, |
| "loss": 3.054, |
| "step": 73400 |
| }, |
| { |
| "epoch": 18.375, |
| "grad_norm": 0.05517415702342987, |
| "learning_rate": 0.00029726334145884116, |
| "loss": 3.0914, |
| "step": 73500 |
| }, |
| { |
| "epoch": 18.4, |
| "grad_norm": 0.06400242447853088, |
| "learning_rate": 0.0002972595912244515, |
| "loss": 3.1063, |
| "step": 73600 |
| }, |
| { |
| "epoch": 18.425, |
| "grad_norm": 0.061084117740392685, |
| "learning_rate": 0.0002972558409900619, |
| "loss": 3.1149, |
| "step": 73700 |
| }, |
| { |
| "epoch": 18.45, |
| "grad_norm": 0.09352370351552963, |
| "learning_rate": 0.0002972520907556722, |
| "loss": 3.0725, |
| "step": 73800 |
| }, |
| { |
| "epoch": 18.475, |
| "grad_norm": 0.059218719601631165, |
| "learning_rate": 0.00029724837802362646, |
| "loss": 3.0702, |
| "step": 73900 |
| }, |
| { |
| "epoch": 18.5, |
| "grad_norm": 0.06091728433966637, |
| "learning_rate": 0.0002972446277892368, |
| "loss": 2.8734, |
| "step": 74000 |
| }, |
| { |
| "epoch": 18.525, |
| "grad_norm": 0.056753043085336685, |
| "learning_rate": 0.00029724087755484714, |
| "loss": 3.0829, |
| "step": 74100 |
| }, |
| { |
| "epoch": 18.55, |
| "grad_norm": 0.053419552743434906, |
| "learning_rate": 0.0002972371273204575, |
| "loss": 3.1694, |
| "step": 74200 |
| }, |
| { |
| "epoch": 18.575, |
| "grad_norm": 0.054798588156700134, |
| "learning_rate": 0.0002972333770860679, |
| "loss": 3.1102, |
| "step": 74300 |
| }, |
| { |
| "epoch": 18.6, |
| "grad_norm": 0.058476317673921585, |
| "learning_rate": 0.0002972296268516782, |
| "loss": 3.1152, |
| "step": 74400 |
| }, |
| { |
| "epoch": 18.625, |
| "grad_norm": 0.059114113450050354, |
| "learning_rate": 0.00029722587661728855, |
| "loss": 3.1364, |
| "step": 74500 |
| }, |
| { |
| "epoch": 18.65, |
| "grad_norm": 0.06834947317838669, |
| "learning_rate": 0.0002972221263828989, |
| "loss": 3.126, |
| "step": 74600 |
| }, |
| { |
| "epoch": 18.675, |
| "grad_norm": 0.05191313102841377, |
| "learning_rate": 0.00029721837614850923, |
| "loss": 3.0284, |
| "step": 74700 |
| }, |
| { |
| "epoch": 18.7, |
| "grad_norm": 0.07164154201745987, |
| "learning_rate": 0.0002972146259141196, |
| "loss": 2.9633, |
| "step": 74800 |
| }, |
| { |
| "epoch": 18.725, |
| "grad_norm": 0.05095268040895462, |
| "learning_rate": 0.00029721087567972996, |
| "loss": 3.0032, |
| "step": 74900 |
| }, |
| { |
| "epoch": 18.75, |
| "grad_norm": 0.05199890211224556, |
| "learning_rate": 0.00029720712544534033, |
| "loss": 3.0957, |
| "step": 75000 |
| }, |
| { |
| "epoch": 18.775, |
| "grad_norm": 0.08117477595806122, |
| "learning_rate": 0.00029720337521095064, |
| "loss": 3.0001, |
| "step": 75100 |
| }, |
| { |
| "epoch": 18.8, |
| "grad_norm": 0.05241430178284645, |
| "learning_rate": 0.000297199624976561, |
| "loss": 2.9402, |
| "step": 75200 |
| }, |
| { |
| "epoch": 18.825, |
| "grad_norm": 0.05886770412325859, |
| "learning_rate": 0.0002971958747421714, |
| "loss": 3.095, |
| "step": 75300 |
| }, |
| { |
| "epoch": 18.85, |
| "grad_norm": 0.05727067589759827, |
| "learning_rate": 0.00029719212450778174, |
| "loss": 2.9662, |
| "step": 75400 |
| }, |
| { |
| "epoch": 18.875, |
| "grad_norm": 0.0689665749669075, |
| "learning_rate": 0.00029718837427339205, |
| "loss": 2.7757, |
| "step": 75500 |
| }, |
| { |
| "epoch": 18.9, |
| "grad_norm": 0.05945652350783348, |
| "learning_rate": 0.0002971846240390024, |
| "loss": 2.822, |
| "step": 75600 |
| }, |
| { |
| "epoch": 18.925, |
| "grad_norm": 0.05478528141975403, |
| "learning_rate": 0.00029718087380461273, |
| "loss": 3.0564, |
| "step": 75700 |
| }, |
| { |
| "epoch": 18.95, |
| "grad_norm": 0.05541827157139778, |
| "learning_rate": 0.0002971771235702231, |
| "loss": 2.9453, |
| "step": 75800 |
| }, |
| { |
| "epoch": 18.975, |
| "grad_norm": 0.05722896754741669, |
| "learning_rate": 0.00029717341083817736, |
| "loss": 2.9093, |
| "step": 75900 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.055735573172569275, |
| "learning_rate": 0.0002971696606037877, |
| "loss": 2.9792, |
| "step": 76000 |
| }, |
| { |
| "epoch": 19.025, |
| "grad_norm": 0.05422914773225784, |
| "learning_rate": 0.00029716591036939804, |
| "loss": 2.847, |
| "step": 76100 |
| }, |
| { |
| "epoch": 19.05, |
| "grad_norm": 0.059790875762701035, |
| "learning_rate": 0.0002971621601350084, |
| "loss": 2.9788, |
| "step": 76200 |
| }, |
| { |
| "epoch": 19.075, |
| "grad_norm": 0.07695723325014114, |
| "learning_rate": 0.00029715840990061877, |
| "loss": 2.9956, |
| "step": 76300 |
| }, |
| { |
| "epoch": 19.1, |
| "grad_norm": 0.0579293929040432, |
| "learning_rate": 0.00029715465966622913, |
| "loss": 3.2976, |
| "step": 76400 |
| }, |
| { |
| "epoch": 19.125, |
| "grad_norm": 0.05396733060479164, |
| "learning_rate": 0.00029715090943183945, |
| "loss": 2.9022, |
| "step": 76500 |
| }, |
| { |
| "epoch": 19.15, |
| "grad_norm": 0.056989822536706924, |
| "learning_rate": 0.0002971471591974498, |
| "loss": 2.7142, |
| "step": 76600 |
| }, |
| { |
| "epoch": 19.175, |
| "grad_norm": 0.05296149477362633, |
| "learning_rate": 0.0002971434089630602, |
| "loss": 2.8858, |
| "step": 76700 |
| }, |
| { |
| "epoch": 19.2, |
| "grad_norm": 0.061122532933950424, |
| "learning_rate": 0.00029713965872867054, |
| "loss": 2.7446, |
| "step": 76800 |
| }, |
| { |
| "epoch": 19.225, |
| "grad_norm": 0.05955662950873375, |
| "learning_rate": 0.00029713590849428086, |
| "loss": 2.6995, |
| "step": 76900 |
| }, |
| { |
| "epoch": 19.25, |
| "grad_norm": 0.0610017292201519, |
| "learning_rate": 0.0002971321582598912, |
| "loss": 2.7667, |
| "step": 77000 |
| }, |
| { |
| "epoch": 19.275, |
| "grad_norm": 0.05846131220459938, |
| "learning_rate": 0.0002971284080255016, |
| "loss": 2.7781, |
| "step": 77100 |
| }, |
| { |
| "epoch": 19.3, |
| "grad_norm": 0.05651117116212845, |
| "learning_rate": 0.00029712465779111195, |
| "loss": 2.814, |
| "step": 77200 |
| }, |
| { |
| "epoch": 19.325, |
| "grad_norm": 0.05765095725655556, |
| "learning_rate": 0.00029712090755672227, |
| "loss": 2.7334, |
| "step": 77300 |
| }, |
| { |
| "epoch": 19.35, |
| "grad_norm": 0.0659993514418602, |
| "learning_rate": 0.00029711715732233263, |
| "loss": 2.8981, |
| "step": 77400 |
| }, |
| { |
| "epoch": 19.375, |
| "grad_norm": 0.0573100671172142, |
| "learning_rate": 0.00029711340708794294, |
| "loss": 2.8433, |
| "step": 77500 |
| }, |
| { |
| "epoch": 19.4, |
| "grad_norm": 0.06855395436286926, |
| "learning_rate": 0.0002971096568535533, |
| "loss": 2.9283, |
| "step": 77600 |
| }, |
| { |
| "epoch": 19.425, |
| "grad_norm": 0.05601441487669945, |
| "learning_rate": 0.0002971059066191637, |
| "loss": 2.8565, |
| "step": 77700 |
| }, |
| { |
| "epoch": 19.45, |
| "grad_norm": 0.07347328960895538, |
| "learning_rate": 0.00029710215638477404, |
| "loss": 2.7694, |
| "step": 77800 |
| }, |
| { |
| "epoch": 19.475, |
| "grad_norm": 0.05399454012513161, |
| "learning_rate": 0.00029709844365272825, |
| "loss": 2.8265, |
| "step": 77900 |
| }, |
| { |
| "epoch": 19.5, |
| "grad_norm": 0.05960391089320183, |
| "learning_rate": 0.0002970946934183386, |
| "loss": 2.86, |
| "step": 78000 |
| }, |
| { |
| "epoch": 19.525, |
| "grad_norm": 0.050205573439598083, |
| "learning_rate": 0.000297090943183949, |
| "loss": 2.7896, |
| "step": 78100 |
| }, |
| { |
| "epoch": 19.55, |
| "grad_norm": 0.061351437121629715, |
| "learning_rate": 0.0002970871929495593, |
| "loss": 2.7925, |
| "step": 78200 |
| }, |
| { |
| "epoch": 19.575, |
| "grad_norm": 0.05008727312088013, |
| "learning_rate": 0.00029708344271516966, |
| "loss": 2.745, |
| "step": 78300 |
| }, |
| { |
| "epoch": 19.6, |
| "grad_norm": 0.05771077796816826, |
| "learning_rate": 0.0002970797299831239, |
| "loss": 2.8652, |
| "step": 78400 |
| }, |
| { |
| "epoch": 19.625, |
| "grad_norm": 0.053159620612859726, |
| "learning_rate": 0.0002970759797487343, |
| "loss": 2.8612, |
| "step": 78500 |
| }, |
| { |
| "epoch": 19.65, |
| "grad_norm": 0.05607482045888901, |
| "learning_rate": 0.0002970722295143446, |
| "loss": 2.8825, |
| "step": 78600 |
| }, |
| { |
| "epoch": 19.675, |
| "grad_norm": 0.05175361409783363, |
| "learning_rate": 0.00029706847927995497, |
| "loss": 2.9037, |
| "step": 78700 |
| }, |
| { |
| "epoch": 19.7, |
| "grad_norm": 0.059691160917282104, |
| "learning_rate": 0.00029706472904556533, |
| "loss": 2.745, |
| "step": 78800 |
| }, |
| { |
| "epoch": 19.725, |
| "grad_norm": 0.062432222068309784, |
| "learning_rate": 0.0002970609788111757, |
| "loss": 2.6383, |
| "step": 78900 |
| }, |
| { |
| "epoch": 19.75, |
| "grad_norm": 0.06708359718322754, |
| "learning_rate": 0.000297057228576786, |
| "loss": 2.5807, |
| "step": 79000 |
| }, |
| { |
| "epoch": 19.775, |
| "grad_norm": 0.060443244874477386, |
| "learning_rate": 0.0002970534783423964, |
| "loss": 2.7167, |
| "step": 79100 |
| }, |
| { |
| "epoch": 19.8, |
| "grad_norm": 0.060145530849695206, |
| "learning_rate": 0.0002970497281080067, |
| "loss": 2.7283, |
| "step": 79200 |
| }, |
| { |
| "epoch": 19.825, |
| "grad_norm": 0.06600401550531387, |
| "learning_rate": 0.00029704597787361705, |
| "loss": 2.8868, |
| "step": 79300 |
| }, |
| { |
| "epoch": 19.85, |
| "grad_norm": 0.0514482781291008, |
| "learning_rate": 0.0002970422276392274, |
| "loss": 2.59, |
| "step": 79400 |
| }, |
| { |
| "epoch": 19.875, |
| "grad_norm": 0.07618112862110138, |
| "learning_rate": 0.0002970384774048378, |
| "loss": 2.5086, |
| "step": 79500 |
| }, |
| { |
| "epoch": 19.9, |
| "grad_norm": 0.05636357143521309, |
| "learning_rate": 0.0002970347271704481, |
| "loss": 2.7034, |
| "step": 79600 |
| }, |
| { |
| "epoch": 19.925, |
| "grad_norm": 0.056812651455402374, |
| "learning_rate": 0.00029703097693605847, |
| "loss": 2.6996, |
| "step": 79700 |
| }, |
| { |
| "epoch": 19.95, |
| "grad_norm": 0.07078476995229721, |
| "learning_rate": 0.00029702722670166883, |
| "loss": 2.8029, |
| "step": 79800 |
| }, |
| { |
| "epoch": 19.975, |
| "grad_norm": 0.055067744106054306, |
| "learning_rate": 0.0002970234764672792, |
| "loss": 2.8455, |
| "step": 79900 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.054148148745298386, |
| "learning_rate": 0.0002970197262328895, |
| "loss": 2.7438, |
| "step": 80000 |
| }, |
| { |
| "epoch": 20.025, |
| "grad_norm": 0.0576615035533905, |
| "learning_rate": 0.0002970159759984999, |
| "loss": 2.7244, |
| "step": 80100 |
| }, |
| { |
| "epoch": 20.05, |
| "grad_norm": 0.05849044770002365, |
| "learning_rate": 0.00029701222576411024, |
| "loss": 2.6015, |
| "step": 80200 |
| }, |
| { |
| "epoch": 20.075, |
| "grad_norm": 0.05542527511715889, |
| "learning_rate": 0.0002970084755297206, |
| "loss": 2.6276, |
| "step": 80300 |
| }, |
| { |
| "epoch": 20.1, |
| "grad_norm": 0.06275394558906555, |
| "learning_rate": 0.0002970047252953309, |
| "loss": 2.601, |
| "step": 80400 |
| }, |
| { |
| "epoch": 20.125, |
| "grad_norm": 0.05756799504160881, |
| "learning_rate": 0.0002970009750609413, |
| "loss": 2.6095, |
| "step": 80500 |
| }, |
| { |
| "epoch": 20.15, |
| "grad_norm": 0.05315446853637695, |
| "learning_rate": 0.00029699722482655165, |
| "loss": 2.8117, |
| "step": 80600 |
| }, |
| { |
| "epoch": 20.175, |
| "grad_norm": 0.06292139738798141, |
| "learning_rate": 0.000296993474592162, |
| "loss": 2.5364, |
| "step": 80700 |
| }, |
| { |
| "epoch": 20.2, |
| "grad_norm": 0.05451088026165962, |
| "learning_rate": 0.00029698972435777233, |
| "loss": 2.6838, |
| "step": 80800 |
| }, |
| { |
| "epoch": 20.225, |
| "grad_norm": 0.05063945800065994, |
| "learning_rate": 0.0002969859741233827, |
| "loss": 2.573, |
| "step": 80900 |
| }, |
| { |
| "epoch": 20.25, |
| "grad_norm": 0.058889806270599365, |
| "learning_rate": 0.000296982223888993, |
| "loss": 2.4947, |
| "step": 81000 |
| }, |
| { |
| "epoch": 20.275, |
| "grad_norm": 0.07975181192159653, |
| "learning_rate": 0.0002969784736546034, |
| "loss": 2.5364, |
| "step": 81100 |
| }, |
| { |
| "epoch": 20.3, |
| "grad_norm": 0.05763572081923485, |
| "learning_rate": 0.00029697472342021374, |
| "loss": 2.4907, |
| "step": 81200 |
| }, |
| { |
| "epoch": 20.325, |
| "grad_norm": 0.05867898836731911, |
| "learning_rate": 0.0002969709731858241, |
| "loss": 2.5361, |
| "step": 81300 |
| }, |
| { |
| "epoch": 20.35, |
| "grad_norm": 0.0528886653482914, |
| "learning_rate": 0.0002969672229514344, |
| "loss": 2.6669, |
| "step": 81400 |
| }, |
| { |
| "epoch": 20.375, |
| "grad_norm": 0.060931917279958725, |
| "learning_rate": 0.0002969634727170448, |
| "loss": 2.4697, |
| "step": 81500 |
| }, |
| { |
| "epoch": 20.4, |
| "grad_norm": 0.05871622636914253, |
| "learning_rate": 0.00029695972248265515, |
| "loss": 2.4717, |
| "step": 81600 |
| }, |
| { |
| "epoch": 20.425, |
| "grad_norm": 0.060853052884340286, |
| "learning_rate": 0.0002969559722482655, |
| "loss": 2.5891, |
| "step": 81700 |
| }, |
| { |
| "epoch": 20.45, |
| "grad_norm": 0.052957359701395035, |
| "learning_rate": 0.00029695222201387583, |
| "loss": 2.5919, |
| "step": 81800 |
| }, |
| { |
| "epoch": 20.475, |
| "grad_norm": 0.054768215864896774, |
| "learning_rate": 0.0002969484717794862, |
| "loss": 2.5348, |
| "step": 81900 |
| }, |
| { |
| "epoch": 20.5, |
| "grad_norm": 0.049939971417188644, |
| "learning_rate": 0.00029694472154509656, |
| "loss": 2.6501, |
| "step": 82000 |
| }, |
| { |
| "epoch": 20.525, |
| "grad_norm": 0.056562915444374084, |
| "learning_rate": 0.0002969409713107069, |
| "loss": 2.6031, |
| "step": 82100 |
| }, |
| { |
| "epoch": 20.55, |
| "grad_norm": 0.05061310529708862, |
| "learning_rate": 0.00029693722107631724, |
| "loss": 2.5924, |
| "step": 82200 |
| }, |
| { |
| "epoch": 20.575, |
| "grad_norm": 0.05474073067307472, |
| "learning_rate": 0.0002969334708419276, |
| "loss": 2.7109, |
| "step": 82300 |
| }, |
| { |
| "epoch": 20.6, |
| "grad_norm": 0.062750443816185, |
| "learning_rate": 0.00029692975810988187, |
| "loss": 2.5636, |
| "step": 82400 |
| }, |
| { |
| "epoch": 20.625, |
| "grad_norm": 0.05921516939997673, |
| "learning_rate": 0.0002969260078754922, |
| "loss": 2.4478, |
| "step": 82500 |
| }, |
| { |
| "epoch": 20.65, |
| "grad_norm": 0.06074066460132599, |
| "learning_rate": 0.00029692225764110254, |
| "loss": 2.5207, |
| "step": 82600 |
| }, |
| { |
| "epoch": 20.675, |
| "grad_norm": 0.06394727528095245, |
| "learning_rate": 0.00029691850740671286, |
| "loss": 2.7291, |
| "step": 82700 |
| }, |
| { |
| "epoch": 20.7, |
| "grad_norm": 0.06293661147356033, |
| "learning_rate": 0.0002969147571723232, |
| "loss": 2.5454, |
| "step": 82800 |
| }, |
| { |
| "epoch": 20.725, |
| "grad_norm": 0.049685824662446976, |
| "learning_rate": 0.0002969110069379336, |
| "loss": 2.7017, |
| "step": 82900 |
| }, |
| { |
| "epoch": 20.75, |
| "grad_norm": 0.0517297200858593, |
| "learning_rate": 0.00029690725670354396, |
| "loss": 2.5524, |
| "step": 83000 |
| }, |
| { |
| "epoch": 20.775, |
| "grad_norm": 0.061634670943021774, |
| "learning_rate": 0.00029690350646915427, |
| "loss": 2.4389, |
| "step": 83100 |
| }, |
| { |
| "epoch": 20.8, |
| "grad_norm": 0.06085900962352753, |
| "learning_rate": 0.00029689975623476463, |
| "loss": 2.4254, |
| "step": 83200 |
| }, |
| { |
| "epoch": 20.825, |
| "grad_norm": 0.05363364890217781, |
| "learning_rate": 0.000296896006000375, |
| "loss": 2.3591, |
| "step": 83300 |
| }, |
| { |
| "epoch": 20.85, |
| "grad_norm": 0.051609691232442856, |
| "learning_rate": 0.00029689225576598537, |
| "loss": 2.5282, |
| "step": 83400 |
| }, |
| { |
| "epoch": 20.875, |
| "grad_norm": 0.04989041015505791, |
| "learning_rate": 0.0002968885055315957, |
| "loss": 2.537, |
| "step": 83500 |
| }, |
| { |
| "epoch": 20.9, |
| "grad_norm": 0.053229689598083496, |
| "learning_rate": 0.00029688475529720604, |
| "loss": 2.5949, |
| "step": 83600 |
| }, |
| { |
| "epoch": 20.925, |
| "grad_norm": 0.05230165645480156, |
| "learning_rate": 0.0002968810050628164, |
| "loss": 2.4183, |
| "step": 83700 |
| }, |
| { |
| "epoch": 20.95, |
| "grad_norm": 0.05094073340296745, |
| "learning_rate": 0.0002968772548284268, |
| "loss": 2.6191, |
| "step": 83800 |
| }, |
| { |
| "epoch": 20.975, |
| "grad_norm": 0.05941576883196831, |
| "learning_rate": 0.0002968735045940371, |
| "loss": 2.3788, |
| "step": 83900 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 0.05283214896917343, |
| "learning_rate": 0.00029686975435964745, |
| "loss": 2.5303, |
| "step": 84000 |
| }, |
| { |
| "epoch": 21.025, |
| "grad_norm": 0.06153716892004013, |
| "learning_rate": 0.0002968660041252578, |
| "loss": 2.4201, |
| "step": 84100 |
| }, |
| { |
| "epoch": 21.05, |
| "grad_norm": 0.05074555054306984, |
| "learning_rate": 0.0002968622538908682, |
| "loss": 2.4179, |
| "step": 84200 |
| }, |
| { |
| "epoch": 21.075, |
| "grad_norm": 0.05797216296195984, |
| "learning_rate": 0.0002968585036564785, |
| "loss": 2.3018, |
| "step": 84300 |
| }, |
| { |
| "epoch": 21.1, |
| "grad_norm": 0.053176261484622955, |
| "learning_rate": 0.00029685475342208886, |
| "loss": 2.4499, |
| "step": 84400 |
| }, |
| { |
| "epoch": 21.125, |
| "grad_norm": 0.0612250491976738, |
| "learning_rate": 0.00029685104069004307, |
| "loss": 2.5186, |
| "step": 84500 |
| }, |
| { |
| "epoch": 21.15, |
| "grad_norm": 0.055981434881687164, |
| "learning_rate": 0.00029684729045565344, |
| "loss": 2.3994, |
| "step": 84600 |
| }, |
| { |
| "epoch": 21.175, |
| "grad_norm": 0.07191935181617737, |
| "learning_rate": 0.0002968435402212638, |
| "loss": 2.4054, |
| "step": 84700 |
| }, |
| { |
| "epoch": 21.2, |
| "grad_norm": 0.05557156354188919, |
| "learning_rate": 0.00029683978998687417, |
| "loss": 2.3924, |
| "step": 84800 |
| }, |
| { |
| "epoch": 21.225, |
| "grad_norm": 0.06246166303753853, |
| "learning_rate": 0.0002968360397524845, |
| "loss": 2.4453, |
| "step": 84900 |
| }, |
| { |
| "epoch": 21.25, |
| "grad_norm": 0.061136774718761444, |
| "learning_rate": 0.00029683228951809485, |
| "loss": 2.3465, |
| "step": 85000 |
| }, |
| { |
| "epoch": 21.275, |
| "grad_norm": 0.06496226042509079, |
| "learning_rate": 0.0002968285392837052, |
| "loss": 2.356, |
| "step": 85100 |
| }, |
| { |
| "epoch": 21.3, |
| "grad_norm": 0.10879474133253098, |
| "learning_rate": 0.0002968247890493156, |
| "loss": 2.3113, |
| "step": 85200 |
| }, |
| { |
| "epoch": 21.325, |
| "grad_norm": 0.07896184921264648, |
| "learning_rate": 0.0002968210388149259, |
| "loss": 2.3167, |
| "step": 85300 |
| }, |
| { |
| "epoch": 21.35, |
| "grad_norm": 0.05807124823331833, |
| "learning_rate": 0.00029681728858053626, |
| "loss": 2.464, |
| "step": 85400 |
| }, |
| { |
| "epoch": 21.375, |
| "grad_norm": 0.05621746554970741, |
| "learning_rate": 0.0002968135383461466, |
| "loss": 2.4666, |
| "step": 85500 |
| }, |
| { |
| "epoch": 21.4, |
| "grad_norm": 0.06423439085483551, |
| "learning_rate": 0.000296809788111757, |
| "loss": 2.4151, |
| "step": 85600 |
| }, |
| { |
| "epoch": 21.425, |
| "grad_norm": 0.053314123302698135, |
| "learning_rate": 0.0002968060378773673, |
| "loss": 2.5222, |
| "step": 85700 |
| }, |
| { |
| "epoch": 21.45, |
| "grad_norm": 0.060538969933986664, |
| "learning_rate": 0.00029680228764297767, |
| "loss": 2.2422, |
| "step": 85800 |
| }, |
| { |
| "epoch": 21.475, |
| "grad_norm": 0.05905874818563461, |
| "learning_rate": 0.00029679853740858804, |
| "loss": 2.2856, |
| "step": 85900 |
| }, |
| { |
| "epoch": 21.5, |
| "grad_norm": 0.05516530200839043, |
| "learning_rate": 0.00029679478717419835, |
| "loss": 2.3191, |
| "step": 86000 |
| }, |
| { |
| "epoch": 21.525, |
| "grad_norm": 0.06160394474864006, |
| "learning_rate": 0.0002967910369398087, |
| "loss": 2.3382, |
| "step": 86100 |
| }, |
| { |
| "epoch": 21.55, |
| "grad_norm": 0.05599430948495865, |
| "learning_rate": 0.000296787286705419, |
| "loss": 2.4985, |
| "step": 86200 |
| }, |
| { |
| "epoch": 21.575, |
| "grad_norm": 0.06205850839614868, |
| "learning_rate": 0.0002967835364710294, |
| "loss": 2.4363, |
| "step": 86300 |
| }, |
| { |
| "epoch": 21.6, |
| "grad_norm": 0.05747246369719505, |
| "learning_rate": 0.00029677978623663976, |
| "loss": 2.3009, |
| "step": 86400 |
| }, |
| { |
| "epoch": 21.625, |
| "grad_norm": 0.05334313213825226, |
| "learning_rate": 0.000296776073504594, |
| "loss": 2.213, |
| "step": 86500 |
| }, |
| { |
| "epoch": 21.65, |
| "grad_norm": 0.05755939334630966, |
| "learning_rate": 0.00029677232327020433, |
| "loss": 2.3473, |
| "step": 86600 |
| }, |
| { |
| "epoch": 21.675, |
| "grad_norm": 0.06077682599425316, |
| "learning_rate": 0.0002967685730358147, |
| "loss": 2.3133, |
| "step": 86700 |
| }, |
| { |
| "epoch": 21.7, |
| "grad_norm": 0.04741760715842247, |
| "learning_rate": 0.00029676482280142506, |
| "loss": 2.2298, |
| "step": 86800 |
| }, |
| { |
| "epoch": 21.725, |
| "grad_norm": 0.05226515606045723, |
| "learning_rate": 0.00029676107256703543, |
| "loss": 2.3709, |
| "step": 86900 |
| }, |
| { |
| "epoch": 21.75, |
| "grad_norm": 0.05925588309764862, |
| "learning_rate": 0.00029675732233264574, |
| "loss": 2.3128, |
| "step": 87000 |
| }, |
| { |
| "epoch": 21.775, |
| "grad_norm": 0.05521254613995552, |
| "learning_rate": 0.0002967535720982561, |
| "loss": 2.1846, |
| "step": 87100 |
| }, |
| { |
| "epoch": 21.8, |
| "grad_norm": 0.058398790657520294, |
| "learning_rate": 0.0002967498218638665, |
| "loss": 2.2529, |
| "step": 87200 |
| }, |
| { |
| "epoch": 21.825, |
| "grad_norm": 0.051581237465143204, |
| "learning_rate": 0.00029674607162947684, |
| "loss": 2.3331, |
| "step": 87300 |
| }, |
| { |
| "epoch": 21.85, |
| "grad_norm": 0.046482495963573456, |
| "learning_rate": 0.00029674232139508715, |
| "loss": 2.3946, |
| "step": 87400 |
| }, |
| { |
| "epoch": 21.875, |
| "grad_norm": 0.053977347910404205, |
| "learning_rate": 0.0002967385711606975, |
| "loss": 2.3074, |
| "step": 87500 |
| }, |
| { |
| "epoch": 21.9, |
| "grad_norm": 0.0516643263399601, |
| "learning_rate": 0.0002967348209263079, |
| "loss": 2.3192, |
| "step": 87600 |
| }, |
| { |
| "epoch": 21.925, |
| "grad_norm": 0.04839833453297615, |
| "learning_rate": 0.00029673107069191825, |
| "loss": 2.2164, |
| "step": 87700 |
| }, |
| { |
| "epoch": 21.95, |
| "grad_norm": 0.05504479259252548, |
| "learning_rate": 0.00029672732045752856, |
| "loss": 2.3114, |
| "step": 87800 |
| }, |
| { |
| "epoch": 21.975, |
| "grad_norm": 0.05117473378777504, |
| "learning_rate": 0.00029672357022313893, |
| "loss": 2.2976, |
| "step": 87900 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 0.052601177245378494, |
| "learning_rate": 0.00029671981998874924, |
| "loss": 2.4827, |
| "step": 88000 |
| }, |
| { |
| "epoch": 22.025, |
| "grad_norm": 0.04800357297062874, |
| "learning_rate": 0.0002967160697543596, |
| "loss": 2.2798, |
| "step": 88100 |
| }, |
| { |
| "epoch": 22.05, |
| "grad_norm": 0.06387566775083542, |
| "learning_rate": 0.00029671231951996997, |
| "loss": 2.2325, |
| "step": 88200 |
| }, |
| { |
| "epoch": 22.075, |
| "grad_norm": 0.05719434469938278, |
| "learning_rate": 0.00029670856928558034, |
| "loss": 2.2685, |
| "step": 88300 |
| }, |
| { |
| "epoch": 22.1, |
| "grad_norm": 0.05765566602349281, |
| "learning_rate": 0.00029670481905119065, |
| "loss": 2.1859, |
| "step": 88400 |
| }, |
| { |
| "epoch": 22.125, |
| "grad_norm": 0.06396758556365967, |
| "learning_rate": 0.000296701068816801, |
| "loss": 2.4629, |
| "step": 88500 |
| }, |
| { |
| "epoch": 22.15, |
| "grad_norm": 0.04949299618601799, |
| "learning_rate": 0.0002966973185824114, |
| "loss": 2.2405, |
| "step": 88600 |
| }, |
| { |
| "epoch": 22.175, |
| "grad_norm": 0.04977158457040787, |
| "learning_rate": 0.00029669356834802175, |
| "loss": 2.137, |
| "step": 88700 |
| }, |
| { |
| "epoch": 22.2, |
| "grad_norm": 0.06776726990938187, |
| "learning_rate": 0.00029668981811363206, |
| "loss": 2.1948, |
| "step": 88800 |
| }, |
| { |
| "epoch": 22.225, |
| "grad_norm": 0.05846365541219711, |
| "learning_rate": 0.0002966860678792424, |
| "loss": 2.0921, |
| "step": 88900 |
| }, |
| { |
| "epoch": 22.25, |
| "grad_norm": 0.05889894440770149, |
| "learning_rate": 0.0002966823176448528, |
| "loss": 2.3352, |
| "step": 89000 |
| }, |
| { |
| "epoch": 22.275, |
| "grad_norm": 0.04690111055970192, |
| "learning_rate": 0.00029667856741046316, |
| "loss": 2.3157, |
| "step": 89100 |
| }, |
| { |
| "epoch": 22.3, |
| "grad_norm": 0.05615220591425896, |
| "learning_rate": 0.00029667481717607347, |
| "loss": 2.1161, |
| "step": 89200 |
| }, |
| { |
| "epoch": 22.325, |
| "grad_norm": 0.0551600381731987, |
| "learning_rate": 0.00029667106694168384, |
| "loss": 2.125, |
| "step": 89300 |
| }, |
| { |
| "epoch": 22.35, |
| "grad_norm": 0.050111789256334305, |
| "learning_rate": 0.0002966673167072942, |
| "loss": 2.1135, |
| "step": 89400 |
| }, |
| { |
| "epoch": 22.375, |
| "grad_norm": 0.05537761375308037, |
| "learning_rate": 0.00029666356647290457, |
| "loss": 2.1623, |
| "step": 89500 |
| }, |
| { |
| "epoch": 22.4, |
| "grad_norm": 0.0577760748565197, |
| "learning_rate": 0.0002966598162385149, |
| "loss": 2.1871, |
| "step": 89600 |
| }, |
| { |
| "epoch": 22.425, |
| "grad_norm": 0.05141003802418709, |
| "learning_rate": 0.00029665606600412525, |
| "loss": 2.1437, |
| "step": 89700 |
| }, |
| { |
| "epoch": 22.45, |
| "grad_norm": 0.05164093151688576, |
| "learning_rate": 0.00029665231576973556, |
| "loss": 2.2704, |
| "step": 89800 |
| }, |
| { |
| "epoch": 22.475, |
| "grad_norm": 0.051070958375930786, |
| "learning_rate": 0.0002966485655353459, |
| "loss": 2.2791, |
| "step": 89900 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 0.054080720990896225, |
| "learning_rate": 0.0002966448153009563, |
| "loss": 2.1997, |
| "step": 90000 |
| }, |
| { |
| "epoch": 22.525, |
| "grad_norm": 0.057264506816864014, |
| "learning_rate": 0.0002966410650665666, |
| "loss": 2.1997, |
| "step": 90100 |
| }, |
| { |
| "epoch": 22.55, |
| "grad_norm": 0.0729178935289383, |
| "learning_rate": 0.00029663731483217697, |
| "loss": 2.1692, |
| "step": 90200 |
| }, |
| { |
| "epoch": 22.575, |
| "grad_norm": 0.05248183757066727, |
| "learning_rate": 0.00029663356459778734, |
| "loss": 2.1341, |
| "step": 90300 |
| }, |
| { |
| "epoch": 22.6, |
| "grad_norm": 0.05090828239917755, |
| "learning_rate": 0.0002966298143633977, |
| "loss": 2.2374, |
| "step": 90400 |
| }, |
| { |
| "epoch": 22.625, |
| "grad_norm": 0.12061487883329391, |
| "learning_rate": 0.0002966261016313519, |
| "loss": 2.1671, |
| "step": 90500 |
| }, |
| { |
| "epoch": 22.65, |
| "grad_norm": 0.06009404733777046, |
| "learning_rate": 0.0002966223513969623, |
| "loss": 2.2945, |
| "step": 90600 |
| }, |
| { |
| "epoch": 22.675, |
| "grad_norm": 0.06756783276796341, |
| "learning_rate": 0.00029661860116257264, |
| "loss": 2.2064, |
| "step": 90700 |
| }, |
| { |
| "epoch": 22.7, |
| "grad_norm": 0.04783422127366066, |
| "learning_rate": 0.000296614850928183, |
| "loss": 2.1548, |
| "step": 90800 |
| }, |
| { |
| "epoch": 22.725, |
| "grad_norm": 0.06468702852725983, |
| "learning_rate": 0.0002966111006937933, |
| "loss": 2.0389, |
| "step": 90900 |
| }, |
| { |
| "epoch": 22.75, |
| "grad_norm": 0.05485010892152786, |
| "learning_rate": 0.0002966073504594037, |
| "loss": 2.1214, |
| "step": 91000 |
| }, |
| { |
| "epoch": 22.775, |
| "grad_norm": 0.05827448144555092, |
| "learning_rate": 0.00029660360022501405, |
| "loss": 2.2367, |
| "step": 91100 |
| }, |
| { |
| "epoch": 22.8, |
| "grad_norm": 0.054152172058820724, |
| "learning_rate": 0.0002965998499906244, |
| "loss": 2.1022, |
| "step": 91200 |
| }, |
| { |
| "epoch": 22.825, |
| "grad_norm": 0.04739788547158241, |
| "learning_rate": 0.00029659609975623473, |
| "loss": 2.1672, |
| "step": 91300 |
| }, |
| { |
| "epoch": 22.85, |
| "grad_norm": 0.05551367625594139, |
| "learning_rate": 0.0002965923495218451, |
| "loss": 2.05, |
| "step": 91400 |
| }, |
| { |
| "epoch": 22.875, |
| "grad_norm": 0.05317440256476402, |
| "learning_rate": 0.0002965885992874554, |
| "loss": 2.012, |
| "step": 91500 |
| }, |
| { |
| "epoch": 22.9, |
| "grad_norm": 0.053941987454891205, |
| "learning_rate": 0.00029658488655540967, |
| "loss": 2.1268, |
| "step": 91600 |
| }, |
| { |
| "epoch": 22.925, |
| "grad_norm": 0.05108709633350372, |
| "learning_rate": 0.00029658113632102004, |
| "loss": 2.1342, |
| "step": 91700 |
| }, |
| { |
| "epoch": 22.95, |
| "grad_norm": 0.052761614322662354, |
| "learning_rate": 0.0002965773860866304, |
| "loss": 2.08, |
| "step": 91800 |
| }, |
| { |
| "epoch": 22.975, |
| "grad_norm": 0.05674518644809723, |
| "learning_rate": 0.0002965736358522407, |
| "loss": 2.1533, |
| "step": 91900 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 0.06261865794658661, |
| "learning_rate": 0.0002965698856178511, |
| "loss": 2.0382, |
| "step": 92000 |
| }, |
| { |
| "epoch": 23.025, |
| "grad_norm": 0.04918836057186127, |
| "learning_rate": 0.00029656613538346145, |
| "loss": 2.0315, |
| "step": 92100 |
| }, |
| { |
| "epoch": 23.05, |
| "grad_norm": 0.04982222989201546, |
| "learning_rate": 0.0002965623851490718, |
| "loss": 2.1285, |
| "step": 92200 |
| }, |
| { |
| "epoch": 23.075, |
| "grad_norm": 0.051534924656152725, |
| "learning_rate": 0.0002965586349146821, |
| "loss": 2.1746, |
| "step": 92300 |
| }, |
| { |
| "epoch": 23.1, |
| "grad_norm": 0.059025805443525314, |
| "learning_rate": 0.0002965548846802925, |
| "loss": 2.1339, |
| "step": 92400 |
| }, |
| { |
| "epoch": 23.125, |
| "grad_norm": 0.05158498138189316, |
| "learning_rate": 0.00029655113444590286, |
| "loss": 2.049, |
| "step": 92500 |
| }, |
| { |
| "epoch": 23.15, |
| "grad_norm": 0.049751464277505875, |
| "learning_rate": 0.0002965473842115132, |
| "loss": 2.0587, |
| "step": 92600 |
| }, |
| { |
| "epoch": 23.175, |
| "grad_norm": 0.05357548967003822, |
| "learning_rate": 0.00029654363397712353, |
| "loss": 2.1765, |
| "step": 92700 |
| }, |
| { |
| "epoch": 23.2, |
| "grad_norm": 0.05639924481511116, |
| "learning_rate": 0.0002965398837427339, |
| "loss": 2.0229, |
| "step": 92800 |
| }, |
| { |
| "epoch": 23.225, |
| "grad_norm": 0.057067710906267166, |
| "learning_rate": 0.00029653613350834427, |
| "loss": 2.1208, |
| "step": 92900 |
| }, |
| { |
| "epoch": 23.25, |
| "grad_norm": 0.056406810879707336, |
| "learning_rate": 0.00029653238327395463, |
| "loss": 2.1044, |
| "step": 93000 |
| }, |
| { |
| "epoch": 23.275, |
| "grad_norm": 0.05794864147901535, |
| "learning_rate": 0.00029652863303956495, |
| "loss": 1.9575, |
| "step": 93100 |
| }, |
| { |
| "epoch": 23.3, |
| "grad_norm": 0.059239715337753296, |
| "learning_rate": 0.0002965248828051753, |
| "loss": 2.1206, |
| "step": 93200 |
| }, |
| { |
| "epoch": 23.325, |
| "grad_norm": 0.05163438990712166, |
| "learning_rate": 0.0002965211325707856, |
| "loss": 1.9799, |
| "step": 93300 |
| }, |
| { |
| "epoch": 23.35, |
| "grad_norm": 0.05853526294231415, |
| "learning_rate": 0.000296517382336396, |
| "loss": 2.0314, |
| "step": 93400 |
| }, |
| { |
| "epoch": 23.375, |
| "grad_norm": 0.04642421007156372, |
| "learning_rate": 0.00029651363210200636, |
| "loss": 2.0252, |
| "step": 93500 |
| }, |
| { |
| "epoch": 23.4, |
| "grad_norm": 0.05934316664934158, |
| "learning_rate": 0.0002965098818676167, |
| "loss": 2.0889, |
| "step": 93600 |
| }, |
| { |
| "epoch": 23.425, |
| "grad_norm": 0.05159417912364006, |
| "learning_rate": 0.00029650613163322703, |
| "loss": 2.0017, |
| "step": 93700 |
| }, |
| { |
| "epoch": 23.45, |
| "grad_norm": 0.04541020095348358, |
| "learning_rate": 0.0002965023813988374, |
| "loss": 2.0592, |
| "step": 93800 |
| }, |
| { |
| "epoch": 23.475, |
| "grad_norm": 0.05421976000070572, |
| "learning_rate": 0.00029649863116444777, |
| "loss": 1.9184, |
| "step": 93900 |
| }, |
| { |
| "epoch": 23.5, |
| "grad_norm": 0.05134705454111099, |
| "learning_rate": 0.0002964948809300581, |
| "loss": 2.2841, |
| "step": 94000 |
| }, |
| { |
| "epoch": 23.525, |
| "grad_norm": 0.050796929746866226, |
| "learning_rate": 0.00029649113069566844, |
| "loss": 1.9773, |
| "step": 94100 |
| }, |
| { |
| "epoch": 23.55, |
| "grad_norm": 0.062260136008262634, |
| "learning_rate": 0.0002964873804612788, |
| "loss": 2.1259, |
| "step": 94200 |
| }, |
| { |
| "epoch": 23.575, |
| "grad_norm": 0.051263660192489624, |
| "learning_rate": 0.0002964836302268892, |
| "loss": 1.996, |
| "step": 94300 |
| }, |
| { |
| "epoch": 23.6, |
| "grad_norm": 0.052974916994571686, |
| "learning_rate": 0.0002964798799924995, |
| "loss": 2.0231, |
| "step": 94400 |
| }, |
| { |
| "epoch": 23.625, |
| "grad_norm": 0.06232937052845955, |
| "learning_rate": 0.00029647612975810985, |
| "loss": 1.9196, |
| "step": 94500 |
| }, |
| { |
| "epoch": 23.65, |
| "grad_norm": 0.05306218937039375, |
| "learning_rate": 0.0002964723795237202, |
| "loss": 1.9388, |
| "step": 94600 |
| }, |
| { |
| "epoch": 23.675, |
| "grad_norm": 0.05512924864888191, |
| "learning_rate": 0.0002964686292893306, |
| "loss": 2.1401, |
| "step": 94700 |
| }, |
| { |
| "epoch": 23.7, |
| "grad_norm": 0.056388285011053085, |
| "learning_rate": 0.0002964648790549409, |
| "loss": 2.0013, |
| "step": 94800 |
| }, |
| { |
| "epoch": 23.725, |
| "grad_norm": 0.05032140389084816, |
| "learning_rate": 0.00029646112882055126, |
| "loss": 1.9568, |
| "step": 94900 |
| }, |
| { |
| "epoch": 23.75, |
| "grad_norm": 0.04757603630423546, |
| "learning_rate": 0.0002964573785861616, |
| "loss": 1.8944, |
| "step": 95000 |
| }, |
| { |
| "epoch": 23.775, |
| "grad_norm": 0.05020546913146973, |
| "learning_rate": 0.00029645362835177194, |
| "loss": 2.0146, |
| "step": 95100 |
| }, |
| { |
| "epoch": 23.8, |
| "grad_norm": 0.056530579924583435, |
| "learning_rate": 0.0002964498781173823, |
| "loss": 1.9345, |
| "step": 95200 |
| }, |
| { |
| "epoch": 23.825, |
| "grad_norm": 0.07894182950258255, |
| "learning_rate": 0.0002964461278829927, |
| "loss": 2.1116, |
| "step": 95300 |
| }, |
| { |
| "epoch": 23.85, |
| "grad_norm": 0.05175475776195526, |
| "learning_rate": 0.000296442377648603, |
| "loss": 2.1331, |
| "step": 95400 |
| }, |
| { |
| "epoch": 23.875, |
| "grad_norm": 0.05405741557478905, |
| "learning_rate": 0.00029643862741421335, |
| "loss": 1.8724, |
| "step": 95500 |
| }, |
| { |
| "epoch": 23.9, |
| "grad_norm": 0.06405475735664368, |
| "learning_rate": 0.0002964349146821676, |
| "loss": 1.8652, |
| "step": 95600 |
| }, |
| { |
| "epoch": 23.925, |
| "grad_norm": 0.0548410564661026, |
| "learning_rate": 0.000296431164447778, |
| "loss": 1.9177, |
| "step": 95700 |
| }, |
| { |
| "epoch": 23.95, |
| "grad_norm": 0.04941118508577347, |
| "learning_rate": 0.0002964274142133883, |
| "loss": 1.981, |
| "step": 95800 |
| }, |
| { |
| "epoch": 23.975, |
| "grad_norm": 0.06233079358935356, |
| "learning_rate": 0.00029642366397899866, |
| "loss": 1.886, |
| "step": 95900 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 0.06110682711005211, |
| "learning_rate": 0.000296419913744609, |
| "loss": 1.906, |
| "step": 96000 |
| }, |
| { |
| "epoch": 24.025, |
| "grad_norm": 0.056876040995121, |
| "learning_rate": 0.0002964161635102194, |
| "loss": 1.9632, |
| "step": 96100 |
| }, |
| { |
| "epoch": 24.05, |
| "grad_norm": 0.056007348001003265, |
| "learning_rate": 0.0002964124132758297, |
| "loss": 1.8518, |
| "step": 96200 |
| }, |
| { |
| "epoch": 24.075, |
| "grad_norm": 0.052707262337207794, |
| "learning_rate": 0.00029640866304144007, |
| "loss": 2.1039, |
| "step": 96300 |
| }, |
| { |
| "epoch": 24.1, |
| "grad_norm": 0.05575592815876007, |
| "learning_rate": 0.00029640491280705044, |
| "loss": 1.8103, |
| "step": 96400 |
| }, |
| { |
| "epoch": 24.125, |
| "grad_norm": 0.05587482079863548, |
| "learning_rate": 0.0002964011625726608, |
| "loss": 1.9645, |
| "step": 96500 |
| }, |
| { |
| "epoch": 24.15, |
| "grad_norm": 0.08619283139705658, |
| "learning_rate": 0.0002963974123382711, |
| "loss": 1.9429, |
| "step": 96600 |
| }, |
| { |
| "epoch": 24.175, |
| "grad_norm": 0.09571905434131622, |
| "learning_rate": 0.0002963936621038815, |
| "loss": 1.902, |
| "step": 96700 |
| }, |
| { |
| "epoch": 24.2, |
| "grad_norm": 0.050410255789756775, |
| "learning_rate": 0.0002963899118694918, |
| "loss": 2.0446, |
| "step": 96800 |
| }, |
| { |
| "epoch": 24.225, |
| "grad_norm": 0.060695916414260864, |
| "learning_rate": 0.00029638616163510216, |
| "loss": 1.9231, |
| "step": 96900 |
| }, |
| { |
| "epoch": 24.25, |
| "grad_norm": 0.05033661425113678, |
| "learning_rate": 0.0002963824114007125, |
| "loss": 1.9065, |
| "step": 97000 |
| }, |
| { |
| "epoch": 24.275, |
| "grad_norm": 0.05458163470029831, |
| "learning_rate": 0.0002963786611663229, |
| "loss": 1.858, |
| "step": 97100 |
| }, |
| { |
| "epoch": 24.3, |
| "grad_norm": 0.05258990451693535, |
| "learning_rate": 0.0002963749109319332, |
| "loss": 2.0328, |
| "step": 97200 |
| }, |
| { |
| "epoch": 24.325, |
| "grad_norm": 0.04619702324271202, |
| "learning_rate": 0.00029637116069754357, |
| "loss": 1.8548, |
| "step": 97300 |
| }, |
| { |
| "epoch": 24.35, |
| "grad_norm": 0.06743716448545456, |
| "learning_rate": 0.00029636741046315393, |
| "loss": 1.9381, |
| "step": 97400 |
| }, |
| { |
| "epoch": 24.375, |
| "grad_norm": 0.049068696796894073, |
| "learning_rate": 0.0002963636602287643, |
| "loss": 1.9359, |
| "step": 97500 |
| }, |
| { |
| "epoch": 24.4, |
| "grad_norm": 0.061207227408885956, |
| "learning_rate": 0.0002963599474967185, |
| "loss": 1.8927, |
| "step": 97600 |
| }, |
| { |
| "epoch": 24.425, |
| "grad_norm": 0.05484483018517494, |
| "learning_rate": 0.0002963561972623289, |
| "loss": 1.88, |
| "step": 97700 |
| }, |
| { |
| "epoch": 24.45, |
| "grad_norm": 0.057467181235551834, |
| "learning_rate": 0.00029635244702793924, |
| "loss": 1.856, |
| "step": 97800 |
| }, |
| { |
| "epoch": 24.475, |
| "grad_norm": 0.049861736595630646, |
| "learning_rate": 0.00029634869679354955, |
| "loss": 2.0343, |
| "step": 97900 |
| }, |
| { |
| "epoch": 24.5, |
| "grad_norm": 0.049673888832330704, |
| "learning_rate": 0.0002963449465591599, |
| "loss": 1.8138, |
| "step": 98000 |
| }, |
| { |
| "epoch": 24.525, |
| "grad_norm": 0.06320221722126007, |
| "learning_rate": 0.0002963411963247703, |
| "loss": 1.9389, |
| "step": 98100 |
| }, |
| { |
| "epoch": 24.55, |
| "grad_norm": 0.0863277018070221, |
| "learning_rate": 0.00029633744609038065, |
| "loss": 1.9127, |
| "step": 98200 |
| }, |
| { |
| "epoch": 24.575, |
| "grad_norm": 0.04973394796252251, |
| "learning_rate": 0.00029633369585599096, |
| "loss": 1.8468, |
| "step": 98300 |
| }, |
| { |
| "epoch": 24.6, |
| "grad_norm": 0.061264049261808395, |
| "learning_rate": 0.00029632994562160133, |
| "loss": 1.9194, |
| "step": 98400 |
| }, |
| { |
| "epoch": 24.625, |
| "grad_norm": 0.05264371261000633, |
| "learning_rate": 0.00029632619538721164, |
| "loss": 1.8896, |
| "step": 98500 |
| }, |
| { |
| "epoch": 24.65, |
| "grad_norm": 0.054599445313215256, |
| "learning_rate": 0.000296322445152822, |
| "loss": 1.9001, |
| "step": 98600 |
| }, |
| { |
| "epoch": 24.675, |
| "grad_norm": 0.05259576812386513, |
| "learning_rate": 0.00029631869491843237, |
| "loss": 1.8258, |
| "step": 98700 |
| }, |
| { |
| "epoch": 24.7, |
| "grad_norm": 0.05342064052820206, |
| "learning_rate": 0.00029631494468404274, |
| "loss": 1.926, |
| "step": 98800 |
| }, |
| { |
| "epoch": 24.725, |
| "grad_norm": 0.04714656248688698, |
| "learning_rate": 0.00029631119444965305, |
| "loss": 1.8823, |
| "step": 98900 |
| }, |
| { |
| "epoch": 24.75, |
| "grad_norm": 0.050276800990104675, |
| "learning_rate": 0.0002963074442152634, |
| "loss": 1.823, |
| "step": 99000 |
| }, |
| { |
| "epoch": 24.775, |
| "grad_norm": 0.051686566323041916, |
| "learning_rate": 0.0002963036939808738, |
| "loss": 1.8796, |
| "step": 99100 |
| }, |
| { |
| "epoch": 24.8, |
| "grad_norm": 0.051118552684783936, |
| "learning_rate": 0.00029629994374648415, |
| "loss": 1.9002, |
| "step": 99200 |
| }, |
| { |
| "epoch": 24.825, |
| "grad_norm": 0.05065715312957764, |
| "learning_rate": 0.00029629619351209446, |
| "loss": 1.868, |
| "step": 99300 |
| }, |
| { |
| "epoch": 24.85, |
| "grad_norm": 0.043341364711523056, |
| "learning_rate": 0.00029629244327770483, |
| "loss": 1.9614, |
| "step": 99400 |
| }, |
| { |
| "epoch": 24.875, |
| "grad_norm": 0.052784670144319534, |
| "learning_rate": 0.0002962886930433152, |
| "loss": 1.9323, |
| "step": 99500 |
| }, |
| { |
| "epoch": 24.9, |
| "grad_norm": 0.055045951157808304, |
| "learning_rate": 0.00029628494280892556, |
| "loss": 1.8218, |
| "step": 99600 |
| }, |
| { |
| "epoch": 24.925, |
| "grad_norm": 0.058140724897384644, |
| "learning_rate": 0.00029628123007687977, |
| "loss": 1.8894, |
| "step": 99700 |
| }, |
| { |
| "epoch": 24.95, |
| "grad_norm": 0.058738358318805695, |
| "learning_rate": 0.00029627747984249013, |
| "loss": 1.7708, |
| "step": 99800 |
| }, |
| { |
| "epoch": 24.975, |
| "grad_norm": 0.05485925078392029, |
| "learning_rate": 0.0002962737296081005, |
| "loss": 1.9136, |
| "step": 99900 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.05562080442905426, |
| "learning_rate": 0.00029626997937371087, |
| "loss": 1.9072, |
| "step": 100000 |
| }, |
| { |
| "epoch": 25.025, |
| "grad_norm": 0.04997032880783081, |
| "learning_rate": 0.0002962662291393212, |
| "loss": 1.7119, |
| "step": 100100 |
| }, |
| { |
| "epoch": 25.05, |
| "grad_norm": 0.05290250480175018, |
| "learning_rate": 0.00029626247890493154, |
| "loss": 1.706, |
| "step": 100200 |
| }, |
| { |
| "epoch": 25.075, |
| "grad_norm": 0.04861506074666977, |
| "learning_rate": 0.00029625872867054186, |
| "loss": 1.7061, |
| "step": 100300 |
| }, |
| { |
| "epoch": 25.1, |
| "grad_norm": 0.05706246569752693, |
| "learning_rate": 0.0002962549784361522, |
| "loss": 1.9067, |
| "step": 100400 |
| }, |
| { |
| "epoch": 25.125, |
| "grad_norm": 0.055538617074489594, |
| "learning_rate": 0.0002962512282017626, |
| "loss": 1.8622, |
| "step": 100500 |
| }, |
| { |
| "epoch": 25.15, |
| "grad_norm": 0.06384219229221344, |
| "learning_rate": 0.00029624747796737295, |
| "loss": 1.7935, |
| "step": 100600 |
| }, |
| { |
| "epoch": 25.175, |
| "grad_norm": 0.057620443403720856, |
| "learning_rate": 0.00029624372773298327, |
| "loss": 1.8746, |
| "step": 100700 |
| }, |
| { |
| "epoch": 25.2, |
| "grad_norm": 0.05917825549840927, |
| "learning_rate": 0.00029623997749859363, |
| "loss": 1.7152, |
| "step": 100800 |
| }, |
| { |
| "epoch": 25.225, |
| "grad_norm": 0.061573103070259094, |
| "learning_rate": 0.000296236227264204, |
| "loss": 1.8928, |
| "step": 100900 |
| }, |
| { |
| "epoch": 25.25, |
| "grad_norm": 0.04456368088722229, |
| "learning_rate": 0.00029623247702981436, |
| "loss": 1.798, |
| "step": 101000 |
| }, |
| { |
| "epoch": 25.275, |
| "grad_norm": 0.06028895452618599, |
| "learning_rate": 0.0002962287267954247, |
| "loss": 1.8044, |
| "step": 101100 |
| }, |
| { |
| "epoch": 25.3, |
| "grad_norm": 0.0548817440867424, |
| "learning_rate": 0.00029622497656103504, |
| "loss": 1.9204, |
| "step": 101200 |
| }, |
| { |
| "epoch": 25.325, |
| "grad_norm": 0.045852452516555786, |
| "learning_rate": 0.0002962212263266454, |
| "loss": 1.924, |
| "step": 101300 |
| }, |
| { |
| "epoch": 25.35, |
| "grad_norm": 0.04782922565937042, |
| "learning_rate": 0.0002962174760922558, |
| "loss": 1.7096, |
| "step": 101400 |
| }, |
| { |
| "epoch": 25.375, |
| "grad_norm": 0.049990586936473846, |
| "learning_rate": 0.0002962137258578661, |
| "loss": 1.9654, |
| "step": 101500 |
| }, |
| { |
| "epoch": 25.4, |
| "grad_norm": 0.04626760631799698, |
| "learning_rate": 0.0002962099756234764, |
| "loss": 1.7223, |
| "step": 101600 |
| }, |
| { |
| "epoch": 25.425, |
| "grad_norm": 0.054343245923519135, |
| "learning_rate": 0.0002962062253890868, |
| "loss": 1.85, |
| "step": 101700 |
| }, |
| { |
| "epoch": 25.45, |
| "grad_norm": 0.04563869535923004, |
| "learning_rate": 0.000296202512657041, |
| "loss": 1.8011, |
| "step": 101800 |
| }, |
| { |
| "epoch": 25.475, |
| "grad_norm": 0.05334710702300072, |
| "learning_rate": 0.0002961987624226514, |
| "loss": 1.7863, |
| "step": 101900 |
| }, |
| { |
| "epoch": 25.5, |
| "grad_norm": 0.05533549562096596, |
| "learning_rate": 0.0002961950121882617, |
| "loss": 1.7575, |
| "step": 102000 |
| }, |
| { |
| "epoch": 25.525, |
| "grad_norm": 0.05645955726504326, |
| "learning_rate": 0.00029619126195387207, |
| "loss": 1.6948, |
| "step": 102100 |
| }, |
| { |
| "epoch": 25.55, |
| "grad_norm": 0.05024164915084839, |
| "learning_rate": 0.00029618751171948244, |
| "loss": 1.6452, |
| "step": 102200 |
| }, |
| { |
| "epoch": 25.575, |
| "grad_norm": 0.051269952207803726, |
| "learning_rate": 0.0002961837614850928, |
| "loss": 1.7991, |
| "step": 102300 |
| }, |
| { |
| "epoch": 25.6, |
| "grad_norm": 0.05763736367225647, |
| "learning_rate": 0.0002961800112507031, |
| "loss": 1.7634, |
| "step": 102400 |
| }, |
| { |
| "epoch": 25.625, |
| "grad_norm": 0.05718966946005821, |
| "learning_rate": 0.0002961762610163135, |
| "loss": 1.7013, |
| "step": 102500 |
| }, |
| { |
| "epoch": 25.65, |
| "grad_norm": 0.05326114594936371, |
| "learning_rate": 0.00029617251078192385, |
| "loss": 1.6578, |
| "step": 102600 |
| }, |
| { |
| "epoch": 25.675, |
| "grad_norm": 0.05004553496837616, |
| "learning_rate": 0.0002961687605475342, |
| "loss": 1.6707, |
| "step": 102700 |
| }, |
| { |
| "epoch": 25.7, |
| "grad_norm": 0.047597501426935196, |
| "learning_rate": 0.0002961650103131445, |
| "loss": 1.8098, |
| "step": 102800 |
| }, |
| { |
| "epoch": 25.725, |
| "grad_norm": 0.05360327288508415, |
| "learning_rate": 0.0002961612600787549, |
| "loss": 1.8259, |
| "step": 102900 |
| }, |
| { |
| "epoch": 25.75, |
| "grad_norm": 0.04639869183301926, |
| "learning_rate": 0.00029615750984436526, |
| "loss": 1.8487, |
| "step": 103000 |
| }, |
| { |
| "epoch": 25.775, |
| "grad_norm": 0.048653990030288696, |
| "learning_rate": 0.0002961537596099756, |
| "loss": 1.6956, |
| "step": 103100 |
| }, |
| { |
| "epoch": 25.8, |
| "grad_norm": 0.043963368982076645, |
| "learning_rate": 0.00029615000937558594, |
| "loss": 1.6178, |
| "step": 103200 |
| }, |
| { |
| "epoch": 25.825, |
| "grad_norm": 0.05706685408949852, |
| "learning_rate": 0.0002961462591411963, |
| "loss": 1.6809, |
| "step": 103300 |
| }, |
| { |
| "epoch": 25.85, |
| "grad_norm": 0.05852410942316055, |
| "learning_rate": 0.00029614250890680667, |
| "loss": 1.6511, |
| "step": 103400 |
| }, |
| { |
| "epoch": 25.875, |
| "grad_norm": 0.054208237677812576, |
| "learning_rate": 0.00029613875867241703, |
| "loss": 1.8168, |
| "step": 103500 |
| }, |
| { |
| "epoch": 25.9, |
| "grad_norm": 0.05457128956913948, |
| "learning_rate": 0.00029613500843802735, |
| "loss": 1.7456, |
| "step": 103600 |
| }, |
| { |
| "epoch": 25.925, |
| "grad_norm": 0.047613076865673065, |
| "learning_rate": 0.0002961312582036377, |
| "loss": 1.629, |
| "step": 103700 |
| }, |
| { |
| "epoch": 25.95, |
| "grad_norm": 0.05182652920484543, |
| "learning_rate": 0.0002961275454715919, |
| "loss": 1.6386, |
| "step": 103800 |
| }, |
| { |
| "epoch": 25.975, |
| "grad_norm": 0.046905118972063065, |
| "learning_rate": 0.0002961237952372023, |
| "loss": 1.8368, |
| "step": 103900 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 0.04973314702510834, |
| "learning_rate": 0.00029612004500281265, |
| "loss": 1.8125, |
| "step": 104000 |
| }, |
| { |
| "epoch": 26.025, |
| "grad_norm": 0.048138804733753204, |
| "learning_rate": 0.000296116294768423, |
| "loss": 1.6797, |
| "step": 104100 |
| }, |
| { |
| "epoch": 26.05, |
| "grad_norm": 0.0547357015311718, |
| "learning_rate": 0.00029611254453403333, |
| "loss": 1.67, |
| "step": 104200 |
| }, |
| { |
| "epoch": 26.075, |
| "grad_norm": 0.05443267896771431, |
| "learning_rate": 0.0002961087942996437, |
| "loss": 1.6682, |
| "step": 104300 |
| }, |
| { |
| "epoch": 26.1, |
| "grad_norm": 0.06275078654289246, |
| "learning_rate": 0.00029610504406525406, |
| "loss": 1.7022, |
| "step": 104400 |
| }, |
| { |
| "epoch": 26.125, |
| "grad_norm": 0.05464591458439827, |
| "learning_rate": 0.00029610129383086443, |
| "loss": 1.8136, |
| "step": 104500 |
| }, |
| { |
| "epoch": 26.15, |
| "grad_norm": 0.05352524295449257, |
| "learning_rate": 0.00029609754359647474, |
| "loss": 1.7319, |
| "step": 104600 |
| }, |
| { |
| "epoch": 26.175, |
| "grad_norm": 0.05525488778948784, |
| "learning_rate": 0.0002960937933620851, |
| "loss": 1.766, |
| "step": 104700 |
| }, |
| { |
| "epoch": 26.2, |
| "grad_norm": 0.05569114536046982, |
| "learning_rate": 0.00029609004312769547, |
| "loss": 1.7767, |
| "step": 104800 |
| }, |
| { |
| "epoch": 26.225, |
| "grad_norm": 0.0440787635743618, |
| "learning_rate": 0.00029608629289330584, |
| "loss": 1.6786, |
| "step": 104900 |
| }, |
| { |
| "epoch": 26.25, |
| "grad_norm": 0.05321473628282547, |
| "learning_rate": 0.00029608254265891615, |
| "loss": 1.6904, |
| "step": 105000 |
| }, |
| { |
| "epoch": 26.275, |
| "grad_norm": 0.047589514404535294, |
| "learning_rate": 0.0002960787924245265, |
| "loss": 1.5513, |
| "step": 105100 |
| }, |
| { |
| "epoch": 26.3, |
| "grad_norm": 0.0542590469121933, |
| "learning_rate": 0.0002960750421901369, |
| "loss": 1.8018, |
| "step": 105200 |
| }, |
| { |
| "epoch": 26.325, |
| "grad_norm": 0.052015386521816254, |
| "learning_rate": 0.0002960712919557472, |
| "loss": 1.6334, |
| "step": 105300 |
| }, |
| { |
| "epoch": 26.35, |
| "grad_norm": 0.16159088909626007, |
| "learning_rate": 0.00029606754172135756, |
| "loss": 1.5818, |
| "step": 105400 |
| }, |
| { |
| "epoch": 26.375, |
| "grad_norm": 0.04810553416609764, |
| "learning_rate": 0.00029606379148696787, |
| "loss": 1.6274, |
| "step": 105500 |
| }, |
| { |
| "epoch": 26.4, |
| "grad_norm": 0.053879667073488235, |
| "learning_rate": 0.00029606004125257824, |
| "loss": 1.8122, |
| "step": 105600 |
| }, |
| { |
| "epoch": 26.425, |
| "grad_norm": 0.04980600252747536, |
| "learning_rate": 0.0002960562910181886, |
| "loss": 1.7187, |
| "step": 105700 |
| }, |
| { |
| "epoch": 26.45, |
| "grad_norm": 0.059906307607889175, |
| "learning_rate": 0.00029605257828614287, |
| "loss": 1.7223, |
| "step": 105800 |
| }, |
| { |
| "epoch": 26.475, |
| "grad_norm": 0.04634363576769829, |
| "learning_rate": 0.0002960488280517532, |
| "loss": 1.6282, |
| "step": 105900 |
| }, |
| { |
| "epoch": 26.5, |
| "grad_norm": 0.052842844277620316, |
| "learning_rate": 0.00029604507781736354, |
| "loss": 1.6203, |
| "step": 106000 |
| }, |
| { |
| "epoch": 26.525, |
| "grad_norm": 0.05409262329339981, |
| "learning_rate": 0.0002960413275829739, |
| "loss": 1.7725, |
| "step": 106100 |
| }, |
| { |
| "epoch": 26.55, |
| "grad_norm": 0.04745221883058548, |
| "learning_rate": 0.0002960375773485843, |
| "loss": 1.6498, |
| "step": 106200 |
| }, |
| { |
| "epoch": 26.575, |
| "grad_norm": 0.050988294184207916, |
| "learning_rate": 0.0002960338271141946, |
| "loss": 1.6534, |
| "step": 106300 |
| }, |
| { |
| "epoch": 26.6, |
| "grad_norm": 0.046150580048561096, |
| "learning_rate": 0.00029603007687980495, |
| "loss": 1.7042, |
| "step": 106400 |
| }, |
| { |
| "epoch": 26.625, |
| "grad_norm": 0.05468379706144333, |
| "learning_rate": 0.0002960263266454153, |
| "loss": 1.6467, |
| "step": 106500 |
| }, |
| { |
| "epoch": 26.65, |
| "grad_norm": 0.05112981051206589, |
| "learning_rate": 0.0002960225764110257, |
| "loss": 1.5898, |
| "step": 106600 |
| }, |
| { |
| "epoch": 26.675, |
| "grad_norm": 0.050162170082330704, |
| "learning_rate": 0.000296018826176636, |
| "loss": 1.7128, |
| "step": 106700 |
| }, |
| { |
| "epoch": 26.7, |
| "grad_norm": 0.05202512443065643, |
| "learning_rate": 0.00029601507594224637, |
| "loss": 1.6162, |
| "step": 106800 |
| }, |
| { |
| "epoch": 26.725, |
| "grad_norm": 0.05049065127968788, |
| "learning_rate": 0.00029601132570785673, |
| "loss": 1.7741, |
| "step": 106900 |
| }, |
| { |
| "epoch": 26.75, |
| "grad_norm": 0.05425161495804787, |
| "learning_rate": 0.000296007612975811, |
| "loss": 1.5715, |
| "step": 107000 |
| }, |
| { |
| "epoch": 26.775, |
| "grad_norm": 0.04676578938961029, |
| "learning_rate": 0.0002960038627414213, |
| "loss": 1.4396, |
| "step": 107100 |
| }, |
| { |
| "epoch": 26.8, |
| "grad_norm": 0.04315830394625664, |
| "learning_rate": 0.00029600011250703167, |
| "loss": 1.648, |
| "step": 107200 |
| }, |
| { |
| "epoch": 26.825, |
| "grad_norm": 0.052309952676296234, |
| "learning_rate": 0.000295996362272642, |
| "loss": 1.5737, |
| "step": 107300 |
| }, |
| { |
| "epoch": 26.85, |
| "grad_norm": 0.05186279118061066, |
| "learning_rate": 0.00029599261203825235, |
| "loss": 1.5913, |
| "step": 107400 |
| }, |
| { |
| "epoch": 26.875, |
| "grad_norm": 0.05266883224248886, |
| "learning_rate": 0.0002959888618038627, |
| "loss": 1.567, |
| "step": 107500 |
| }, |
| { |
| "epoch": 26.9, |
| "grad_norm": 0.04454510286450386, |
| "learning_rate": 0.0002959851115694731, |
| "loss": 1.5123, |
| "step": 107600 |
| }, |
| { |
| "epoch": 26.925, |
| "grad_norm": 0.05315356329083443, |
| "learning_rate": 0.0002959813613350834, |
| "loss": 1.6372, |
| "step": 107700 |
| }, |
| { |
| "epoch": 26.95, |
| "grad_norm": 0.04607756808400154, |
| "learning_rate": 0.00029597761110069376, |
| "loss": 1.6074, |
| "step": 107800 |
| }, |
| { |
| "epoch": 26.975, |
| "grad_norm": 0.04452488571405411, |
| "learning_rate": 0.0002959738608663041, |
| "loss": 1.5927, |
| "step": 107900 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 0.05356653034687042, |
| "learning_rate": 0.0002959701106319145, |
| "loss": 1.6214, |
| "step": 108000 |
| }, |
| { |
| "epoch": 27.025, |
| "grad_norm": 0.04785982891917229, |
| "learning_rate": 0.0002959663603975248, |
| "loss": 1.6273, |
| "step": 108100 |
| }, |
| { |
| "epoch": 27.05, |
| "grad_norm": 0.04626493901014328, |
| "learning_rate": 0.00029596261016313517, |
| "loss": 1.6494, |
| "step": 108200 |
| }, |
| { |
| "epoch": 27.075, |
| "grad_norm": 0.04791727289557457, |
| "learning_rate": 0.00029595885992874554, |
| "loss": 1.5452, |
| "step": 108300 |
| }, |
| { |
| "epoch": 27.1, |
| "grad_norm": 0.06166384369134903, |
| "learning_rate": 0.0002959551096943559, |
| "loss": 1.5749, |
| "step": 108400 |
| }, |
| { |
| "epoch": 27.125, |
| "grad_norm": 0.05195313319563866, |
| "learning_rate": 0.0002959513594599662, |
| "loss": 1.536, |
| "step": 108500 |
| }, |
| { |
| "epoch": 27.15, |
| "grad_norm": 0.0505547821521759, |
| "learning_rate": 0.0002959476092255766, |
| "loss": 1.6606, |
| "step": 108600 |
| }, |
| { |
| "epoch": 27.175, |
| "grad_norm": 0.04837740212678909, |
| "learning_rate": 0.00029594385899118695, |
| "loss": 1.5617, |
| "step": 108700 |
| }, |
| { |
| "epoch": 27.2, |
| "grad_norm": 0.04828809201717377, |
| "learning_rate": 0.0002959401087567973, |
| "loss": 1.7326, |
| "step": 108800 |
| }, |
| { |
| "epoch": 27.225, |
| "grad_norm": 0.06565222144126892, |
| "learning_rate": 0.0002959363585224076, |
| "loss": 1.5621, |
| "step": 108900 |
| }, |
| { |
| "epoch": 27.25, |
| "grad_norm": 0.05221616104245186, |
| "learning_rate": 0.000295932608288018, |
| "loss": 1.7385, |
| "step": 109000 |
| }, |
| { |
| "epoch": 27.275, |
| "grad_norm": 0.05376584827899933, |
| "learning_rate": 0.0002959288580536283, |
| "loss": 1.5078, |
| "step": 109100 |
| }, |
| { |
| "epoch": 27.3, |
| "grad_norm": 0.04505067691206932, |
| "learning_rate": 0.00029592510781923867, |
| "loss": 1.6082, |
| "step": 109200 |
| }, |
| { |
| "epoch": 27.325, |
| "grad_norm": 0.047202132642269135, |
| "learning_rate": 0.00029592135758484903, |
| "loss": 1.5304, |
| "step": 109300 |
| }, |
| { |
| "epoch": 27.35, |
| "grad_norm": 0.06032031401991844, |
| "learning_rate": 0.00029591760735045935, |
| "loss": 1.6035, |
| "step": 109400 |
| }, |
| { |
| "epoch": 27.375, |
| "grad_norm": 0.044648509472608566, |
| "learning_rate": 0.0002959138571160697, |
| "loss": 1.5581, |
| "step": 109500 |
| }, |
| { |
| "epoch": 27.4, |
| "grad_norm": 0.05649425461888313, |
| "learning_rate": 0.0002959101068816801, |
| "loss": 1.5482, |
| "step": 109600 |
| }, |
| { |
| "epoch": 27.425, |
| "grad_norm": 0.05527213215827942, |
| "learning_rate": 0.00029590635664729044, |
| "loss": 1.6155, |
| "step": 109700 |
| }, |
| { |
| "epoch": 27.45, |
| "grad_norm": 0.050836507230997086, |
| "learning_rate": 0.00029590260641290076, |
| "loss": 1.4239, |
| "step": 109800 |
| }, |
| { |
| "epoch": 27.475, |
| "grad_norm": 0.06156973913311958, |
| "learning_rate": 0.0002958988561785111, |
| "loss": 1.4574, |
| "step": 109900 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 0.04659149423241615, |
| "learning_rate": 0.0002958951059441215, |
| "loss": 1.6488, |
| "step": 110000 |
| }, |
| { |
| "epoch": 27.525, |
| "grad_norm": 0.05683763325214386, |
| "learning_rate": 0.00029589135570973186, |
| "loss": 1.6128, |
| "step": 110100 |
| }, |
| { |
| "epoch": 27.55, |
| "grad_norm": 0.0504351444542408, |
| "learning_rate": 0.00029588760547534217, |
| "loss": 1.6495, |
| "step": 110200 |
| }, |
| { |
| "epoch": 27.575, |
| "grad_norm": 0.04385405406355858, |
| "learning_rate": 0.00029588385524095253, |
| "loss": 1.5644, |
| "step": 110300 |
| }, |
| { |
| "epoch": 27.6, |
| "grad_norm": 0.056605253368616104, |
| "learning_rate": 0.0002958801050065629, |
| "loss": 1.4853, |
| "step": 110400 |
| }, |
| { |
| "epoch": 27.625, |
| "grad_norm": 0.061634745448827744, |
| "learning_rate": 0.00029587635477217327, |
| "loss": 1.7518, |
| "step": 110500 |
| }, |
| { |
| "epoch": 27.65, |
| "grad_norm": 0.05308396369218826, |
| "learning_rate": 0.0002958726045377836, |
| "loss": 1.4906, |
| "step": 110600 |
| }, |
| { |
| "epoch": 27.675, |
| "grad_norm": 0.05271327123045921, |
| "learning_rate": 0.00029586885430339394, |
| "loss": 1.591, |
| "step": 110700 |
| }, |
| { |
| "epoch": 27.7, |
| "grad_norm": 0.04924798756837845, |
| "learning_rate": 0.00029586510406900426, |
| "loss": 1.5645, |
| "step": 110800 |
| }, |
| { |
| "epoch": 27.725, |
| "grad_norm": 0.05398215353488922, |
| "learning_rate": 0.0002958613538346146, |
| "loss": 1.5635, |
| "step": 110900 |
| }, |
| { |
| "epoch": 27.75, |
| "grad_norm": 0.04747261479496956, |
| "learning_rate": 0.000295857603600225, |
| "loss": 1.501, |
| "step": 111000 |
| }, |
| { |
| "epoch": 27.775, |
| "grad_norm": 0.048297274857759476, |
| "learning_rate": 0.00029585389086817925, |
| "loss": 1.4673, |
| "step": 111100 |
| }, |
| { |
| "epoch": 27.8, |
| "grad_norm": 0.047769028693437576, |
| "learning_rate": 0.00029585014063378956, |
| "loss": 1.5335, |
| "step": 111200 |
| }, |
| { |
| "epoch": 27.825, |
| "grad_norm": 0.05535224825143814, |
| "learning_rate": 0.00029584639039939993, |
| "loss": 1.5235, |
| "step": 111300 |
| }, |
| { |
| "epoch": 27.85, |
| "grad_norm": 0.04392020031809807, |
| "learning_rate": 0.0002958426401650103, |
| "loss": 1.5657, |
| "step": 111400 |
| }, |
| { |
| "epoch": 27.875, |
| "grad_norm": 0.052205685526132584, |
| "learning_rate": 0.00029583888993062066, |
| "loss": 1.5018, |
| "step": 111500 |
| }, |
| { |
| "epoch": 27.9, |
| "grad_norm": 0.0470951683819294, |
| "learning_rate": 0.00029583513969623097, |
| "loss": 1.3486, |
| "step": 111600 |
| }, |
| { |
| "epoch": 27.925, |
| "grad_norm": 0.045637097209692, |
| "learning_rate": 0.00029583138946184134, |
| "loss": 1.5814, |
| "step": 111700 |
| }, |
| { |
| "epoch": 27.95, |
| "grad_norm": 0.050197433680295944, |
| "learning_rate": 0.0002958276392274517, |
| "loss": 1.6106, |
| "step": 111800 |
| }, |
| { |
| "epoch": 27.975, |
| "grad_norm": 0.047528669238090515, |
| "learning_rate": 0.00029582388899306207, |
| "loss": 1.5872, |
| "step": 111900 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 0.052580513060092926, |
| "learning_rate": 0.0002958201387586724, |
| "loss": 1.4037, |
| "step": 112000 |
| }, |
| { |
| "epoch": 28.025, |
| "grad_norm": 0.05215739831328392, |
| "learning_rate": 0.00029581638852428275, |
| "loss": 1.5155, |
| "step": 112100 |
| }, |
| { |
| "epoch": 28.05, |
| "grad_norm": 0.0481177382171154, |
| "learning_rate": 0.0002958126382898931, |
| "loss": 1.5689, |
| "step": 112200 |
| }, |
| { |
| "epoch": 28.075, |
| "grad_norm": 0.06459362804889679, |
| "learning_rate": 0.0002958088880555035, |
| "loss": 1.4518, |
| "step": 112300 |
| }, |
| { |
| "epoch": 28.1, |
| "grad_norm": 0.0489063635468483, |
| "learning_rate": 0.0002958051378211138, |
| "loss": 1.5451, |
| "step": 112400 |
| }, |
| { |
| "epoch": 28.125, |
| "grad_norm": 0.05155845358967781, |
| "learning_rate": 0.00029580138758672416, |
| "loss": 1.4813, |
| "step": 112500 |
| }, |
| { |
| "epoch": 28.15, |
| "grad_norm": 0.05029693618416786, |
| "learning_rate": 0.00029579763735233447, |
| "loss": 1.4739, |
| "step": 112600 |
| }, |
| { |
| "epoch": 28.175, |
| "grad_norm": 0.06580676138401031, |
| "learning_rate": 0.00029579388711794484, |
| "loss": 1.5699, |
| "step": 112700 |
| }, |
| { |
| "epoch": 28.2, |
| "grad_norm": 0.04858999699354172, |
| "learning_rate": 0.0002957901368835552, |
| "loss": 1.4865, |
| "step": 112800 |
| }, |
| { |
| "epoch": 28.225, |
| "grad_norm": 0.048569995909929276, |
| "learning_rate": 0.00029578638664916557, |
| "loss": 1.466, |
| "step": 112900 |
| }, |
| { |
| "epoch": 28.25, |
| "grad_norm": 0.05034118890762329, |
| "learning_rate": 0.0002957826364147759, |
| "loss": 1.5571, |
| "step": 113000 |
| }, |
| { |
| "epoch": 28.275, |
| "grad_norm": 0.05421663448214531, |
| "learning_rate": 0.00029577888618038625, |
| "loss": 1.5187, |
| "step": 113100 |
| }, |
| { |
| "epoch": 28.3, |
| "grad_norm": 0.04554268717765808, |
| "learning_rate": 0.0002957751359459966, |
| "loss": 1.4526, |
| "step": 113200 |
| }, |
| { |
| "epoch": 28.325, |
| "grad_norm": 0.04670153930783272, |
| "learning_rate": 0.0002957713857116069, |
| "loss": 1.4785, |
| "step": 113300 |
| }, |
| { |
| "epoch": 28.35, |
| "grad_norm": 0.05041331797838211, |
| "learning_rate": 0.0002957676354772173, |
| "loss": 1.4533, |
| "step": 113400 |
| }, |
| { |
| "epoch": 28.375, |
| "grad_norm": 0.042034462094306946, |
| "learning_rate": 0.00029576388524282766, |
| "loss": 1.4947, |
| "step": 113500 |
| }, |
| { |
| "epoch": 28.4, |
| "grad_norm": 0.050760041922330856, |
| "learning_rate": 0.000295760135008438, |
| "loss": 1.5469, |
| "step": 113600 |
| }, |
| { |
| "epoch": 28.425, |
| "grad_norm": 0.04767528921365738, |
| "learning_rate": 0.00029575638477404834, |
| "loss": 1.4801, |
| "step": 113700 |
| }, |
| { |
| "epoch": 28.45, |
| "grad_norm": 0.05914180353283882, |
| "learning_rate": 0.0002957526720420026, |
| "loss": 1.5372, |
| "step": 113800 |
| }, |
| { |
| "epoch": 28.475, |
| "grad_norm": 0.05601555109024048, |
| "learning_rate": 0.00029574892180761296, |
| "loss": 1.4325, |
| "step": 113900 |
| }, |
| { |
| "epoch": 28.5, |
| "grad_norm": 0.056612931191921234, |
| "learning_rate": 0.00029574517157322333, |
| "loss": 1.4873, |
| "step": 114000 |
| }, |
| { |
| "epoch": 28.525, |
| "grad_norm": 0.04357181489467621, |
| "learning_rate": 0.00029574142133883364, |
| "loss": 1.4405, |
| "step": 114100 |
| }, |
| { |
| "epoch": 28.55, |
| "grad_norm": 0.05303529277443886, |
| "learning_rate": 0.000295737671104444, |
| "loss": 1.4365, |
| "step": 114200 |
| }, |
| { |
| "epoch": 28.575, |
| "grad_norm": 0.048596885055303574, |
| "learning_rate": 0.0002957339208700543, |
| "loss": 1.4425, |
| "step": 114300 |
| }, |
| { |
| "epoch": 28.6, |
| "grad_norm": 0.05361025035381317, |
| "learning_rate": 0.0002957301706356647, |
| "loss": 1.4063, |
| "step": 114400 |
| }, |
| { |
| "epoch": 28.625, |
| "grad_norm": 0.05975283682346344, |
| "learning_rate": 0.00029572642040127505, |
| "loss": 1.4549, |
| "step": 114500 |
| }, |
| { |
| "epoch": 28.65, |
| "grad_norm": 0.04482881724834442, |
| "learning_rate": 0.0002957226701668854, |
| "loss": 1.3836, |
| "step": 114600 |
| }, |
| { |
| "epoch": 28.675, |
| "grad_norm": 0.05114329233765602, |
| "learning_rate": 0.00029571891993249573, |
| "loss": 1.5901, |
| "step": 114700 |
| }, |
| { |
| "epoch": 28.7, |
| "grad_norm": 0.04038051888346672, |
| "learning_rate": 0.0002957151696981061, |
| "loss": 1.5117, |
| "step": 114800 |
| }, |
| { |
| "epoch": 28.725, |
| "grad_norm": 0.052758511155843735, |
| "learning_rate": 0.00029571141946371646, |
| "loss": 1.4111, |
| "step": 114900 |
| }, |
| { |
| "epoch": 28.75, |
| "grad_norm": 0.049384575337171555, |
| "learning_rate": 0.00029570766922932683, |
| "loss": 1.4381, |
| "step": 115000 |
| }, |
| { |
| "epoch": 28.775, |
| "grad_norm": 0.047072507441043854, |
| "learning_rate": 0.00029570391899493714, |
| "loss": 1.4444, |
| "step": 115100 |
| }, |
| { |
| "epoch": 28.8, |
| "grad_norm": 0.05382237955927849, |
| "learning_rate": 0.0002957001687605475, |
| "loss": 1.4174, |
| "step": 115200 |
| }, |
| { |
| "epoch": 28.825, |
| "grad_norm": 0.04967265948653221, |
| "learning_rate": 0.00029569641852615787, |
| "loss": 1.4709, |
| "step": 115300 |
| }, |
| { |
| "epoch": 28.85, |
| "grad_norm": 0.045560047030448914, |
| "learning_rate": 0.00029569266829176824, |
| "loss": 1.5302, |
| "step": 115400 |
| }, |
| { |
| "epoch": 28.875, |
| "grad_norm": 0.058798883110284805, |
| "learning_rate": 0.00029568891805737855, |
| "loss": 1.4022, |
| "step": 115500 |
| }, |
| { |
| "epoch": 28.9, |
| "grad_norm": 0.04776821285486221, |
| "learning_rate": 0.0002956851678229889, |
| "loss": 1.3512, |
| "step": 115600 |
| }, |
| { |
| "epoch": 28.925, |
| "grad_norm": 0.05173936486244202, |
| "learning_rate": 0.0002956814175885993, |
| "loss": 1.5405, |
| "step": 115700 |
| }, |
| { |
| "epoch": 28.95, |
| "grad_norm": 0.04927581176161766, |
| "learning_rate": 0.00029567766735420965, |
| "loss": 1.435, |
| "step": 115800 |
| }, |
| { |
| "epoch": 28.975, |
| "grad_norm": 0.04748755320906639, |
| "learning_rate": 0.00029567391711981996, |
| "loss": 1.4073, |
| "step": 115900 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 0.04827181622385979, |
| "learning_rate": 0.0002956701668854303, |
| "loss": 1.4046, |
| "step": 116000 |
| }, |
| { |
| "epoch": 29.025, |
| "grad_norm": 0.05039271339774132, |
| "learning_rate": 0.00029566645415338453, |
| "loss": 1.3616, |
| "step": 116100 |
| }, |
| { |
| "epoch": 29.05, |
| "grad_norm": 0.046831537038087845, |
| "learning_rate": 0.0002956627039189949, |
| "loss": 1.3991, |
| "step": 116200 |
| }, |
| { |
| "epoch": 29.075, |
| "grad_norm": 0.056436687707901, |
| "learning_rate": 0.00029565895368460527, |
| "loss": 1.448, |
| "step": 116300 |
| }, |
| { |
| "epoch": 29.1, |
| "grad_norm": 0.04817488044500351, |
| "learning_rate": 0.00029565520345021563, |
| "loss": 1.363, |
| "step": 116400 |
| }, |
| { |
| "epoch": 29.125, |
| "grad_norm": 0.05330492928624153, |
| "learning_rate": 0.00029565145321582594, |
| "loss": 1.4313, |
| "step": 116500 |
| }, |
| { |
| "epoch": 29.15, |
| "grad_norm": 0.05745427682995796, |
| "learning_rate": 0.0002956477029814363, |
| "loss": 1.5579, |
| "step": 116600 |
| }, |
| { |
| "epoch": 29.175, |
| "grad_norm": 0.05263765901327133, |
| "learning_rate": 0.0002956439527470467, |
| "loss": 1.5836, |
| "step": 116700 |
| }, |
| { |
| "epoch": 29.2, |
| "grad_norm": 0.044311635196208954, |
| "learning_rate": 0.00029564020251265704, |
| "loss": 1.4367, |
| "step": 116800 |
| }, |
| { |
| "epoch": 29.225, |
| "grad_norm": 0.053102701902389526, |
| "learning_rate": 0.00029563645227826735, |
| "loss": 1.4936, |
| "step": 116900 |
| }, |
| { |
| "epoch": 29.25, |
| "grad_norm": 0.04289867728948593, |
| "learning_rate": 0.0002956327020438777, |
| "loss": 1.438, |
| "step": 117000 |
| }, |
| { |
| "epoch": 29.275, |
| "grad_norm": 0.05283905565738678, |
| "learning_rate": 0.0002956289518094881, |
| "loss": 1.5341, |
| "step": 117100 |
| }, |
| { |
| "epoch": 29.3, |
| "grad_norm": 0.0411902479827404, |
| "learning_rate": 0.0002956252015750984, |
| "loss": 1.3774, |
| "step": 117200 |
| }, |
| { |
| "epoch": 29.325, |
| "grad_norm": 0.0581793412566185, |
| "learning_rate": 0.00029562145134070877, |
| "loss": 1.4712, |
| "step": 117300 |
| }, |
| { |
| "epoch": 29.35, |
| "grad_norm": 0.04655259847640991, |
| "learning_rate": 0.00029561770110631913, |
| "loss": 1.2906, |
| "step": 117400 |
| }, |
| { |
| "epoch": 29.375, |
| "grad_norm": 0.05028205364942551, |
| "learning_rate": 0.0002956139508719295, |
| "loss": 1.3921, |
| "step": 117500 |
| }, |
| { |
| "epoch": 29.4, |
| "grad_norm": 0.049044106155633926, |
| "learning_rate": 0.0002956102006375398, |
| "loss": 1.4684, |
| "step": 117600 |
| }, |
| { |
| "epoch": 29.425, |
| "grad_norm": 0.05344530567526817, |
| "learning_rate": 0.0002956064504031502, |
| "loss": 1.399, |
| "step": 117700 |
| }, |
| { |
| "epoch": 29.45, |
| "grad_norm": 0.05248359963297844, |
| "learning_rate": 0.0002956027001687605, |
| "loss": 1.3738, |
| "step": 117800 |
| }, |
| { |
| "epoch": 29.475, |
| "grad_norm": 0.053722232580184937, |
| "learning_rate": 0.00029559894993437085, |
| "loss": 1.27, |
| "step": 117900 |
| }, |
| { |
| "epoch": 29.5, |
| "grad_norm": 0.05581889674067497, |
| "learning_rate": 0.0002955951996999812, |
| "loss": 1.4523, |
| "step": 118000 |
| }, |
| { |
| "epoch": 29.525, |
| "grad_norm": 0.04724375531077385, |
| "learning_rate": 0.0002955914494655916, |
| "loss": 1.2637, |
| "step": 118100 |
| }, |
| { |
| "epoch": 29.55, |
| "grad_norm": 0.04487941041588783, |
| "learning_rate": 0.0002955877367335458, |
| "loss": 1.3064, |
| "step": 118200 |
| }, |
| { |
| "epoch": 29.575, |
| "grad_norm": 0.04799391329288483, |
| "learning_rate": 0.00029558398649915616, |
| "loss": 1.4433, |
| "step": 118300 |
| }, |
| { |
| "epoch": 29.6, |
| "grad_norm": 0.04437430948019028, |
| "learning_rate": 0.0002955802362647665, |
| "loss": 1.3427, |
| "step": 118400 |
| }, |
| { |
| "epoch": 29.625, |
| "grad_norm": 0.04969744756817818, |
| "learning_rate": 0.0002955764860303769, |
| "loss": 1.3415, |
| "step": 118500 |
| }, |
| { |
| "epoch": 29.65, |
| "grad_norm": 0.05268990993499756, |
| "learning_rate": 0.0002955727357959872, |
| "loss": 1.37, |
| "step": 118600 |
| }, |
| { |
| "epoch": 29.675, |
| "grad_norm": 0.05563261732459068, |
| "learning_rate": 0.00029556898556159757, |
| "loss": 1.3404, |
| "step": 118700 |
| }, |
| { |
| "epoch": 29.7, |
| "grad_norm": 0.045039862394332886, |
| "learning_rate": 0.00029556523532720794, |
| "loss": 1.2967, |
| "step": 118800 |
| }, |
| { |
| "epoch": 29.725, |
| "grad_norm": 0.06740451604127884, |
| "learning_rate": 0.0002955614850928183, |
| "loss": 1.4316, |
| "step": 118900 |
| }, |
| { |
| "epoch": 29.75, |
| "grad_norm": 0.046530742198228836, |
| "learning_rate": 0.0002955577348584286, |
| "loss": 1.3871, |
| "step": 119000 |
| }, |
| { |
| "epoch": 29.775, |
| "grad_norm": 0.04662451893091202, |
| "learning_rate": 0.000295553984624039, |
| "loss": 1.3832, |
| "step": 119100 |
| }, |
| { |
| "epoch": 29.8, |
| "grad_norm": 0.05180426687002182, |
| "learning_rate": 0.00029555023438964935, |
| "loss": 1.3783, |
| "step": 119200 |
| }, |
| { |
| "epoch": 29.825, |
| "grad_norm": 0.04919251427054405, |
| "learning_rate": 0.0002955464841552597, |
| "loss": 1.3789, |
| "step": 119300 |
| }, |
| { |
| "epoch": 29.85, |
| "grad_norm": 0.04741760343313217, |
| "learning_rate": 0.00029554273392087, |
| "loss": 1.392, |
| "step": 119400 |
| }, |
| { |
| "epoch": 29.875, |
| "grad_norm": 0.05151817202568054, |
| "learning_rate": 0.0002955389836864804, |
| "loss": 1.3472, |
| "step": 119500 |
| }, |
| { |
| "epoch": 29.9, |
| "grad_norm": 0.05211416259407997, |
| "learning_rate": 0.0002955352334520907, |
| "loss": 1.4448, |
| "step": 119600 |
| }, |
| { |
| "epoch": 29.925, |
| "grad_norm": 0.04866619408130646, |
| "learning_rate": 0.00029553148321770107, |
| "loss": 1.3788, |
| "step": 119700 |
| }, |
| { |
| "epoch": 29.95, |
| "grad_norm": 0.056409094482660294, |
| "learning_rate": 0.00029552773298331143, |
| "loss": 1.4182, |
| "step": 119800 |
| }, |
| { |
| "epoch": 29.975, |
| "grad_norm": 0.045399557799100876, |
| "learning_rate": 0.0002955239827489218, |
| "loss": 1.3579, |
| "step": 119900 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.05333389341831207, |
| "learning_rate": 0.0002955202325145321, |
| "loss": 1.4833, |
| "step": 120000 |
| }, |
| { |
| "epoch": 30.025, |
| "grad_norm": 0.047169484198093414, |
| "learning_rate": 0.0002955164822801425, |
| "loss": 1.3531, |
| "step": 120100 |
| }, |
| { |
| "epoch": 30.05, |
| "grad_norm": 0.04647146537899971, |
| "learning_rate": 0.00029551273204575285, |
| "loss": 1.3722, |
| "step": 120200 |
| }, |
| { |
| "epoch": 30.075, |
| "grad_norm": 0.05528531223535538, |
| "learning_rate": 0.0002955089818113632, |
| "loss": 1.268, |
| "step": 120300 |
| }, |
| { |
| "epoch": 30.1, |
| "grad_norm": 0.050155188888311386, |
| "learning_rate": 0.0002955052315769735, |
| "loss": 1.3659, |
| "step": 120400 |
| }, |
| { |
| "epoch": 30.125, |
| "grad_norm": 0.047319624572992325, |
| "learning_rate": 0.0002955014813425839, |
| "loss": 1.4225, |
| "step": 120500 |
| }, |
| { |
| "epoch": 30.15, |
| "grad_norm": 0.04249805584549904, |
| "learning_rate": 0.00029549773110819426, |
| "loss": 1.4412, |
| "step": 120600 |
| }, |
| { |
| "epoch": 30.175, |
| "grad_norm": 0.05880492925643921, |
| "learning_rate": 0.0002954939808738046, |
| "loss": 1.5054, |
| "step": 120700 |
| }, |
| { |
| "epoch": 30.2, |
| "grad_norm": 0.047143761068582535, |
| "learning_rate": 0.00029549023063941493, |
| "loss": 1.3931, |
| "step": 120800 |
| }, |
| { |
| "epoch": 30.225, |
| "grad_norm": 0.04481210932135582, |
| "learning_rate": 0.00029548648040502525, |
| "loss": 1.2962, |
| "step": 120900 |
| }, |
| { |
| "epoch": 30.25, |
| "grad_norm": 0.044143520295619965, |
| "learning_rate": 0.00029548273017063567, |
| "loss": 1.2338, |
| "step": 121000 |
| }, |
| { |
| "epoch": 30.275, |
| "grad_norm": 0.06169132515788078, |
| "learning_rate": 0.000295478979936246, |
| "loss": 1.3578, |
| "step": 121100 |
| }, |
| { |
| "epoch": 30.3, |
| "grad_norm": 0.061004914343357086, |
| "learning_rate": 0.00029547522970185634, |
| "loss": 1.334, |
| "step": 121200 |
| }, |
| { |
| "epoch": 30.325, |
| "grad_norm": 0.04402782768011093, |
| "learning_rate": 0.00029547147946746666, |
| "loss": 1.404, |
| "step": 121300 |
| }, |
| { |
| "epoch": 30.35, |
| "grad_norm": 0.05749357491731644, |
| "learning_rate": 0.000295467729233077, |
| "loss": 1.2942, |
| "step": 121400 |
| }, |
| { |
| "epoch": 30.375, |
| "grad_norm": 0.052716564387083054, |
| "learning_rate": 0.0002954639789986874, |
| "loss": 1.2753, |
| "step": 121500 |
| }, |
| { |
| "epoch": 30.4, |
| "grad_norm": 0.04735216125845909, |
| "learning_rate": 0.00029546022876429775, |
| "loss": 1.3316, |
| "step": 121600 |
| }, |
| { |
| "epoch": 30.425, |
| "grad_norm": 0.05518503487110138, |
| "learning_rate": 0.00029545651603225196, |
| "loss": 1.3901, |
| "step": 121700 |
| }, |
| { |
| "epoch": 30.45, |
| "grad_norm": 0.04617263004183769, |
| "learning_rate": 0.00029545276579786233, |
| "loss": 1.3542, |
| "step": 121800 |
| }, |
| { |
| "epoch": 30.475, |
| "grad_norm": 0.04624765366315842, |
| "learning_rate": 0.0002954490155634727, |
| "loss": 1.3594, |
| "step": 121900 |
| }, |
| { |
| "epoch": 30.5, |
| "grad_norm": 0.05599815025925636, |
| "learning_rate": 0.00029544526532908306, |
| "loss": 1.3957, |
| "step": 122000 |
| }, |
| { |
| "epoch": 30.525, |
| "grad_norm": 0.047623343765735626, |
| "learning_rate": 0.00029544151509469337, |
| "loss": 1.3099, |
| "step": 122100 |
| }, |
| { |
| "epoch": 30.55, |
| "grad_norm": 0.04954765364527702, |
| "learning_rate": 0.00029543776486030374, |
| "loss": 1.4809, |
| "step": 122200 |
| }, |
| { |
| "epoch": 30.575, |
| "grad_norm": 0.057207658886909485, |
| "learning_rate": 0.0002954340146259141, |
| "loss": 1.3149, |
| "step": 122300 |
| }, |
| { |
| "epoch": 30.6, |
| "grad_norm": 0.04670143872499466, |
| "learning_rate": 0.00029543026439152447, |
| "loss": 1.3461, |
| "step": 122400 |
| }, |
| { |
| "epoch": 30.625, |
| "grad_norm": 0.04433277249336243, |
| "learning_rate": 0.0002954265141571348, |
| "loss": 1.1924, |
| "step": 122500 |
| }, |
| { |
| "epoch": 30.65, |
| "grad_norm": 0.045901257544755936, |
| "learning_rate": 0.00029542276392274515, |
| "loss": 1.3508, |
| "step": 122600 |
| }, |
| { |
| "epoch": 30.675, |
| "grad_norm": 0.048084866255521774, |
| "learning_rate": 0.0002954190136883555, |
| "loss": 1.3341, |
| "step": 122700 |
| }, |
| { |
| "epoch": 30.7, |
| "grad_norm": 0.04639054462313652, |
| "learning_rate": 0.0002954152634539659, |
| "loss": 1.2832, |
| "step": 122800 |
| }, |
| { |
| "epoch": 30.725, |
| "grad_norm": 0.05224520340561867, |
| "learning_rate": 0.0002954115132195762, |
| "loss": 1.2682, |
| "step": 122900 |
| }, |
| { |
| "epoch": 30.75, |
| "grad_norm": 0.05258006602525711, |
| "learning_rate": 0.00029540776298518656, |
| "loss": 1.3085, |
| "step": 123000 |
| }, |
| { |
| "epoch": 30.775, |
| "grad_norm": 0.0506523959338665, |
| "learning_rate": 0.00029540401275079687, |
| "loss": 1.3224, |
| "step": 123100 |
| }, |
| { |
| "epoch": 30.8, |
| "grad_norm": 0.046581752598285675, |
| "learning_rate": 0.00029540026251640724, |
| "loss": 1.2794, |
| "step": 123200 |
| }, |
| { |
| "epoch": 30.825, |
| "grad_norm": 0.04979027807712555, |
| "learning_rate": 0.0002953965122820176, |
| "loss": 1.1661, |
| "step": 123300 |
| }, |
| { |
| "epoch": 30.85, |
| "grad_norm": 0.07573187351226807, |
| "learning_rate": 0.00029539276204762797, |
| "loss": 1.3565, |
| "step": 123400 |
| }, |
| { |
| "epoch": 30.875, |
| "grad_norm": 0.05088147893548012, |
| "learning_rate": 0.0002953890118132383, |
| "loss": 1.3488, |
| "step": 123500 |
| }, |
| { |
| "epoch": 30.9, |
| "grad_norm": 0.05240534245967865, |
| "learning_rate": 0.00029538526157884865, |
| "loss": 1.336, |
| "step": 123600 |
| }, |
| { |
| "epoch": 30.925, |
| "grad_norm": 0.04134645685553551, |
| "learning_rate": 0.000295381511344459, |
| "loss": 1.2747, |
| "step": 123700 |
| }, |
| { |
| "epoch": 30.95, |
| "grad_norm": 0.05094057694077492, |
| "learning_rate": 0.0002953777611100694, |
| "loss": 1.3445, |
| "step": 123800 |
| }, |
| { |
| "epoch": 30.975, |
| "grad_norm": 0.045938342809677124, |
| "learning_rate": 0.0002953740108756797, |
| "loss": 1.2555, |
| "step": 123900 |
| }, |
| { |
| "epoch": 31.0, |
| "grad_norm": 0.04664922505617142, |
| "learning_rate": 0.00029537026064129006, |
| "loss": 1.3741, |
| "step": 124000 |
| }, |
| { |
| "epoch": 31.025, |
| "grad_norm": 0.04887442663311958, |
| "learning_rate": 0.0002953665104069004, |
| "loss": 1.2055, |
| "step": 124100 |
| }, |
| { |
| "epoch": 31.05, |
| "grad_norm": 0.04919900372624397, |
| "learning_rate": 0.0002953627601725108, |
| "loss": 1.1721, |
| "step": 124200 |
| }, |
| { |
| "epoch": 31.075, |
| "grad_norm": 0.048029493540525436, |
| "learning_rate": 0.0002953590099381211, |
| "loss": 1.3029, |
| "step": 124300 |
| }, |
| { |
| "epoch": 31.1, |
| "grad_norm": 0.053546350449323654, |
| "learning_rate": 0.00029535525970373147, |
| "loss": 1.3137, |
| "step": 124400 |
| }, |
| { |
| "epoch": 31.125, |
| "grad_norm": 0.04450497403740883, |
| "learning_rate": 0.0002953515094693418, |
| "loss": 1.3236, |
| "step": 124500 |
| }, |
| { |
| "epoch": 31.15, |
| "grad_norm": 0.04896382614970207, |
| "learning_rate": 0.0002953477592349522, |
| "loss": 1.2933, |
| "step": 124600 |
| }, |
| { |
| "epoch": 31.175, |
| "grad_norm": 0.04476182907819748, |
| "learning_rate": 0.0002953440465029064, |
| "loss": 1.3332, |
| "step": 124700 |
| }, |
| { |
| "epoch": 31.2, |
| "grad_norm": 0.054897475987672806, |
| "learning_rate": 0.0002953402962685167, |
| "loss": 1.3213, |
| "step": 124800 |
| }, |
| { |
| "epoch": 31.225, |
| "grad_norm": 0.04679589346051216, |
| "learning_rate": 0.0002953365460341271, |
| "loss": 1.3065, |
| "step": 124900 |
| }, |
| { |
| "epoch": 31.25, |
| "grad_norm": 0.04921596497297287, |
| "learning_rate": 0.00029533279579973745, |
| "loss": 1.1591, |
| "step": 125000 |
| }, |
| { |
| "epoch": 31.275, |
| "grad_norm": 0.0433526448905468, |
| "learning_rate": 0.0002953290455653478, |
| "loss": 1.3262, |
| "step": 125100 |
| }, |
| { |
| "epoch": 31.3, |
| "grad_norm": 0.043862484395504, |
| "learning_rate": 0.00029532529533095813, |
| "loss": 1.2693, |
| "step": 125200 |
| }, |
| { |
| "epoch": 31.325, |
| "grad_norm": 0.06467683613300323, |
| "learning_rate": 0.0002953215450965685, |
| "loss": 1.3879, |
| "step": 125300 |
| }, |
| { |
| "epoch": 31.35, |
| "grad_norm": 0.05398791283369064, |
| "learning_rate": 0.00029531779486217886, |
| "loss": 1.2593, |
| "step": 125400 |
| }, |
| { |
| "epoch": 31.375, |
| "grad_norm": 0.06727266311645508, |
| "learning_rate": 0.00029531404462778923, |
| "loss": 1.3277, |
| "step": 125500 |
| }, |
| { |
| "epoch": 31.4, |
| "grad_norm": 0.0463390052318573, |
| "learning_rate": 0.00029531029439339954, |
| "loss": 1.3013, |
| "step": 125600 |
| }, |
| { |
| "epoch": 31.425, |
| "grad_norm": 0.04781678318977356, |
| "learning_rate": 0.0002953065441590099, |
| "loss": 1.2572, |
| "step": 125700 |
| }, |
| { |
| "epoch": 31.45, |
| "grad_norm": 0.0504741370677948, |
| "learning_rate": 0.00029530279392462027, |
| "loss": 1.276, |
| "step": 125800 |
| }, |
| { |
| "epoch": 31.475, |
| "grad_norm": 0.08227650821208954, |
| "learning_rate": 0.00029529904369023064, |
| "loss": 1.3546, |
| "step": 125900 |
| }, |
| { |
| "epoch": 31.5, |
| "grad_norm": 0.04831939563155174, |
| "learning_rate": 0.00029529529345584095, |
| "loss": 1.2622, |
| "step": 126000 |
| }, |
| { |
| "epoch": 31.525, |
| "grad_norm": 0.04759907349944115, |
| "learning_rate": 0.0002952915432214513, |
| "loss": 1.3973, |
| "step": 126100 |
| }, |
| { |
| "epoch": 31.55, |
| "grad_norm": 0.0501595176756382, |
| "learning_rate": 0.00029528779298706163, |
| "loss": 1.309, |
| "step": 126200 |
| }, |
| { |
| "epoch": 31.575, |
| "grad_norm": 0.04236988723278046, |
| "learning_rate": 0.00029528404275267205, |
| "loss": 1.2076, |
| "step": 126300 |
| }, |
| { |
| "epoch": 31.6, |
| "grad_norm": 0.045248087495565414, |
| "learning_rate": 0.00029528029251828236, |
| "loss": 1.1881, |
| "step": 126400 |
| }, |
| { |
| "epoch": 31.625, |
| "grad_norm": 0.05358180031180382, |
| "learning_rate": 0.00029527654228389273, |
| "loss": 1.242, |
| "step": 126500 |
| }, |
| { |
| "epoch": 31.65, |
| "grad_norm": 0.06812089681625366, |
| "learning_rate": 0.00029527279204950304, |
| "loss": 1.3071, |
| "step": 126600 |
| }, |
| { |
| "epoch": 31.675, |
| "grad_norm": 0.0523652583360672, |
| "learning_rate": 0.0002952690418151134, |
| "loss": 1.2635, |
| "step": 126700 |
| }, |
| { |
| "epoch": 31.7, |
| "grad_norm": 0.054195646196603775, |
| "learning_rate": 0.00029526529158072377, |
| "loss": 1.3601, |
| "step": 126800 |
| }, |
| { |
| "epoch": 31.725, |
| "grad_norm": 0.05106286332011223, |
| "learning_rate": 0.00029526154134633414, |
| "loss": 1.2716, |
| "step": 126900 |
| }, |
| { |
| "epoch": 31.75, |
| "grad_norm": 0.04490172490477562, |
| "learning_rate": 0.00029525779111194445, |
| "loss": 1.1354, |
| "step": 127000 |
| }, |
| { |
| "epoch": 31.775, |
| "grad_norm": 0.04846130311489105, |
| "learning_rate": 0.0002952540408775548, |
| "loss": 1.3259, |
| "step": 127100 |
| }, |
| { |
| "epoch": 31.8, |
| "grad_norm": 0.050297126173973083, |
| "learning_rate": 0.0002952502906431652, |
| "loss": 1.1898, |
| "step": 127200 |
| }, |
| { |
| "epoch": 31.825, |
| "grad_norm": 0.0532267764210701, |
| "learning_rate": 0.00029524654040877555, |
| "loss": 1.1544, |
| "step": 127300 |
| }, |
| { |
| "epoch": 31.85, |
| "grad_norm": 0.03898947685956955, |
| "learning_rate": 0.00029524282767672976, |
| "loss": 1.3027, |
| "step": 127400 |
| }, |
| { |
| "epoch": 31.875, |
| "grad_norm": 0.055518005043268204, |
| "learning_rate": 0.0002952390774423401, |
| "loss": 1.1795, |
| "step": 127500 |
| }, |
| { |
| "epoch": 31.9, |
| "grad_norm": 0.045770760625600815, |
| "learning_rate": 0.0002952353272079505, |
| "loss": 1.2203, |
| "step": 127600 |
| }, |
| { |
| "epoch": 31.925, |
| "grad_norm": 0.04108942300081253, |
| "learning_rate": 0.00029523157697356085, |
| "loss": 1.2737, |
| "step": 127700 |
| }, |
| { |
| "epoch": 31.95, |
| "grad_norm": 0.04591604694724083, |
| "learning_rate": 0.00029522782673917117, |
| "loss": 1.2465, |
| "step": 127800 |
| }, |
| { |
| "epoch": 31.975, |
| "grad_norm": 0.04735784977674484, |
| "learning_rate": 0.00029522407650478153, |
| "loss": 1.3007, |
| "step": 127900 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 0.04895665496587753, |
| "learning_rate": 0.0002952203262703919, |
| "loss": 1.3006, |
| "step": 128000 |
| }, |
| { |
| "epoch": 32.025, |
| "grad_norm": 0.05351528897881508, |
| "learning_rate": 0.00029521657603600226, |
| "loss": 1.2599, |
| "step": 128100 |
| }, |
| { |
| "epoch": 32.05, |
| "grad_norm": 0.04478209838271141, |
| "learning_rate": 0.0002952128258016126, |
| "loss": 1.2839, |
| "step": 128200 |
| }, |
| { |
| "epoch": 32.075, |
| "grad_norm": 0.05886415019631386, |
| "learning_rate": 0.00029520907556722294, |
| "loss": 1.2412, |
| "step": 128300 |
| }, |
| { |
| "epoch": 32.1, |
| "grad_norm": 0.04743971303105354, |
| "learning_rate": 0.00029520532533283325, |
| "loss": 1.2031, |
| "step": 128400 |
| }, |
| { |
| "epoch": 32.125, |
| "grad_norm": 0.046698570251464844, |
| "learning_rate": 0.0002952015750984436, |
| "loss": 1.2691, |
| "step": 128500 |
| }, |
| { |
| "epoch": 32.15, |
| "grad_norm": 0.04950440675020218, |
| "learning_rate": 0.000295197824864054, |
| "loss": 1.2178, |
| "step": 128600 |
| }, |
| { |
| "epoch": 32.175, |
| "grad_norm": 0.047533079981803894, |
| "learning_rate": 0.0002951940746296643, |
| "loss": 1.1742, |
| "step": 128700 |
| }, |
| { |
| "epoch": 32.2, |
| "grad_norm": 0.1709842085838318, |
| "learning_rate": 0.00029519032439527466, |
| "loss": 1.2904, |
| "step": 128800 |
| }, |
| { |
| "epoch": 32.225, |
| "grad_norm": 0.053603630512952805, |
| "learning_rate": 0.00029518657416088503, |
| "loss": 1.2806, |
| "step": 128900 |
| }, |
| { |
| "epoch": 32.25, |
| "grad_norm": 0.05528594180941582, |
| "learning_rate": 0.0002951828239264954, |
| "loss": 1.2891, |
| "step": 129000 |
| }, |
| { |
| "epoch": 32.275, |
| "grad_norm": 0.051689211279153824, |
| "learning_rate": 0.0002951790736921057, |
| "loss": 1.3107, |
| "step": 129100 |
| }, |
| { |
| "epoch": 32.3, |
| "grad_norm": 0.0504557229578495, |
| "learning_rate": 0.0002951753234577161, |
| "loss": 1.2528, |
| "step": 129200 |
| }, |
| { |
| "epoch": 32.325, |
| "grad_norm": 0.048762448132038116, |
| "learning_rate": 0.00029517157322332644, |
| "loss": 1.1503, |
| "step": 129300 |
| }, |
| { |
| "epoch": 32.35, |
| "grad_norm": 0.05114434286952019, |
| "learning_rate": 0.0002951678229889368, |
| "loss": 1.1685, |
| "step": 129400 |
| }, |
| { |
| "epoch": 32.375, |
| "grad_norm": 0.04877127707004547, |
| "learning_rate": 0.0002951640727545471, |
| "loss": 1.1642, |
| "step": 129500 |
| }, |
| { |
| "epoch": 32.4, |
| "grad_norm": 0.04645070433616638, |
| "learning_rate": 0.0002951603225201575, |
| "loss": 1.2363, |
| "step": 129600 |
| }, |
| { |
| "epoch": 32.425, |
| "grad_norm": 0.049255430698394775, |
| "learning_rate": 0.0002951565722857678, |
| "loss": 1.286, |
| "step": 129700 |
| }, |
| { |
| "epoch": 32.45, |
| "grad_norm": 0.05051419138908386, |
| "learning_rate": 0.00029515282205137816, |
| "loss": 1.2311, |
| "step": 129800 |
| }, |
| { |
| "epoch": 32.475, |
| "grad_norm": 0.05819782614707947, |
| "learning_rate": 0.00029514907181698853, |
| "loss": 1.2218, |
| "step": 129900 |
| }, |
| { |
| "epoch": 32.5, |
| "grad_norm": 0.04523173347115517, |
| "learning_rate": 0.0002951453215825989, |
| "loss": 1.17, |
| "step": 130000 |
| }, |
| { |
| "epoch": 32.525, |
| "grad_norm": 0.047802697867155075, |
| "learning_rate": 0.0002951415713482092, |
| "loss": 1.2679, |
| "step": 130100 |
| }, |
| { |
| "epoch": 32.55, |
| "grad_norm": 0.04578109085559845, |
| "learning_rate": 0.0002951378211138196, |
| "loss": 1.134, |
| "step": 130200 |
| }, |
| { |
| "epoch": 32.575, |
| "grad_norm": 0.040033962577581406, |
| "learning_rate": 0.00029513407087942994, |
| "loss": 1.222, |
| "step": 130300 |
| }, |
| { |
| "epoch": 32.6, |
| "grad_norm": 0.04128117114305496, |
| "learning_rate": 0.0002951303206450403, |
| "loss": 1.2106, |
| "step": 130400 |
| }, |
| { |
| "epoch": 32.625, |
| "grad_norm": 0.04531345143914223, |
| "learning_rate": 0.0002951265704106506, |
| "loss": 1.186, |
| "step": 130500 |
| }, |
| { |
| "epoch": 32.65, |
| "grad_norm": 0.043665412813425064, |
| "learning_rate": 0.000295122820176261, |
| "loss": 1.2078, |
| "step": 130600 |
| }, |
| { |
| "epoch": 32.675, |
| "grad_norm": 0.04887350648641586, |
| "learning_rate": 0.00029511906994187135, |
| "loss": 1.2482, |
| "step": 130700 |
| }, |
| { |
| "epoch": 32.7, |
| "grad_norm": 0.05151134356856346, |
| "learning_rate": 0.0002951153197074817, |
| "loss": 1.2568, |
| "step": 130800 |
| }, |
| { |
| "epoch": 32.725, |
| "grad_norm": 0.042473357170820236, |
| "learning_rate": 0.00029511156947309203, |
| "loss": 1.1829, |
| "step": 130900 |
| }, |
| { |
| "epoch": 32.75, |
| "grad_norm": 0.05092649906873703, |
| "learning_rate": 0.0002951078192387024, |
| "loss": 1.1481, |
| "step": 131000 |
| }, |
| { |
| "epoch": 32.775, |
| "grad_norm": 0.044292863458395004, |
| "learning_rate": 0.00029510406900431276, |
| "loss": 1.1682, |
| "step": 131100 |
| }, |
| { |
| "epoch": 32.8, |
| "grad_norm": 0.054200585931539536, |
| "learning_rate": 0.0002951003187699231, |
| "loss": 1.2387, |
| "step": 131200 |
| }, |
| { |
| "epoch": 32.825, |
| "grad_norm": 0.04644659161567688, |
| "learning_rate": 0.00029509656853553344, |
| "loss": 1.2118, |
| "step": 131300 |
| }, |
| { |
| "epoch": 32.85, |
| "grad_norm": 0.06080161780118942, |
| "learning_rate": 0.0002950928558034877, |
| "loss": 1.1483, |
| "step": 131400 |
| }, |
| { |
| "epoch": 32.875, |
| "grad_norm": 0.07698054611682892, |
| "learning_rate": 0.000295089105569098, |
| "loss": 1.1887, |
| "step": 131500 |
| }, |
| { |
| "epoch": 32.9, |
| "grad_norm": 0.038868315517902374, |
| "learning_rate": 0.00029508535533470843, |
| "loss": 1.1528, |
| "step": 131600 |
| }, |
| { |
| "epoch": 32.925, |
| "grad_norm": 0.05261719226837158, |
| "learning_rate": 0.00029508160510031874, |
| "loss": 1.085, |
| "step": 131700 |
| }, |
| { |
| "epoch": 32.95, |
| "grad_norm": 0.043816640973091125, |
| "learning_rate": 0.0002950778548659291, |
| "loss": 1.2063, |
| "step": 131800 |
| }, |
| { |
| "epoch": 32.975, |
| "grad_norm": 0.042075928300619125, |
| "learning_rate": 0.0002950741046315394, |
| "loss": 1.1792, |
| "step": 131900 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 0.04904596507549286, |
| "learning_rate": 0.0002950703543971498, |
| "loss": 1.2376, |
| "step": 132000 |
| }, |
| { |
| "epoch": 33.025, |
| "grad_norm": 0.051781512796878815, |
| "learning_rate": 0.00029506660416276015, |
| "loss": 1.181, |
| "step": 132100 |
| }, |
| { |
| "epoch": 33.05, |
| "grad_norm": 0.055431291460990906, |
| "learning_rate": 0.0002950628539283705, |
| "loss": 1.1771, |
| "step": 132200 |
| }, |
| { |
| "epoch": 33.075, |
| "grad_norm": 0.04665238782763481, |
| "learning_rate": 0.00029505910369398083, |
| "loss": 1.1322, |
| "step": 132300 |
| }, |
| { |
| "epoch": 33.1, |
| "grad_norm": 0.04755477234721184, |
| "learning_rate": 0.0002950553534595912, |
| "loss": 1.2262, |
| "step": 132400 |
| }, |
| { |
| "epoch": 33.125, |
| "grad_norm": 0.0748729407787323, |
| "learning_rate": 0.00029505164072754546, |
| "loss": 1.0936, |
| "step": 132500 |
| }, |
| { |
| "epoch": 33.15, |
| "grad_norm": 0.05131325498223305, |
| "learning_rate": 0.00029504789049315577, |
| "loss": 1.2296, |
| "step": 132600 |
| }, |
| { |
| "epoch": 33.175, |
| "grad_norm": 0.051855139434337616, |
| "learning_rate": 0.00029504414025876614, |
| "loss": 1.2527, |
| "step": 132700 |
| }, |
| { |
| "epoch": 33.2, |
| "grad_norm": 0.04259216785430908, |
| "learning_rate": 0.0002950403900243765, |
| "loss": 1.1978, |
| "step": 132800 |
| }, |
| { |
| "epoch": 33.225, |
| "grad_norm": 0.0451393760740757, |
| "learning_rate": 0.00029503663978998687, |
| "loss": 1.1695, |
| "step": 132900 |
| }, |
| { |
| "epoch": 33.25, |
| "grad_norm": 0.0477844700217247, |
| "learning_rate": 0.0002950328895555972, |
| "loss": 1.1885, |
| "step": 133000 |
| }, |
| { |
| "epoch": 33.275, |
| "grad_norm": 0.04242611676454544, |
| "learning_rate": 0.00029502913932120755, |
| "loss": 1.1393, |
| "step": 133100 |
| }, |
| { |
| "epoch": 33.3, |
| "grad_norm": 0.046090077608823776, |
| "learning_rate": 0.00029502538908681786, |
| "loss": 1.1158, |
| "step": 133200 |
| }, |
| { |
| "epoch": 33.325, |
| "grad_norm": 0.04372167959809303, |
| "learning_rate": 0.0002950216388524283, |
| "loss": 1.1583, |
| "step": 133300 |
| }, |
| { |
| "epoch": 33.35, |
| "grad_norm": 0.044858288019895554, |
| "learning_rate": 0.0002950178886180386, |
| "loss": 1.1877, |
| "step": 133400 |
| }, |
| { |
| "epoch": 33.375, |
| "grad_norm": 0.042134176939725876, |
| "learning_rate": 0.00029501413838364896, |
| "loss": 1.1365, |
| "step": 133500 |
| }, |
| { |
| "epoch": 33.4, |
| "grad_norm": 0.05012949928641319, |
| "learning_rate": 0.00029501038814925927, |
| "loss": 1.2341, |
| "step": 133600 |
| }, |
| { |
| "epoch": 33.425, |
| "grad_norm": 0.04589414969086647, |
| "learning_rate": 0.00029500663791486964, |
| "loss": 1.1346, |
| "step": 133700 |
| }, |
| { |
| "epoch": 33.45, |
| "grad_norm": 0.059703532606363297, |
| "learning_rate": 0.00029500288768048, |
| "loss": 1.2177, |
| "step": 133800 |
| }, |
| { |
| "epoch": 33.475, |
| "grad_norm": 0.04715392366051674, |
| "learning_rate": 0.00029499913744609037, |
| "loss": 1.2044, |
| "step": 133900 |
| }, |
| { |
| "epoch": 33.5, |
| "grad_norm": 0.04391086474061012, |
| "learning_rate": 0.0002949953872117007, |
| "loss": 1.1846, |
| "step": 134000 |
| }, |
| { |
| "epoch": 33.525, |
| "grad_norm": 0.04045191779732704, |
| "learning_rate": 0.00029499163697731105, |
| "loss": 1.2048, |
| "step": 134100 |
| }, |
| { |
| "epoch": 33.55, |
| "grad_norm": 0.04283670708537102, |
| "learning_rate": 0.0002949878867429214, |
| "loss": 1.2246, |
| "step": 134200 |
| }, |
| { |
| "epoch": 33.575, |
| "grad_norm": 0.04338289797306061, |
| "learning_rate": 0.0002949841365085318, |
| "loss": 1.2334, |
| "step": 134300 |
| }, |
| { |
| "epoch": 33.6, |
| "grad_norm": 0.05026433989405632, |
| "learning_rate": 0.0002949803862741421, |
| "loss": 1.1017, |
| "step": 134400 |
| }, |
| { |
| "epoch": 33.625, |
| "grad_norm": 0.04827344790101051, |
| "learning_rate": 0.00029497663603975246, |
| "loss": 1.1765, |
| "step": 134500 |
| }, |
| { |
| "epoch": 33.65, |
| "grad_norm": 0.055267006158828735, |
| "learning_rate": 0.0002949728858053628, |
| "loss": 1.0555, |
| "step": 134600 |
| }, |
| { |
| "epoch": 33.675, |
| "grad_norm": 0.05551549047231674, |
| "learning_rate": 0.0002949691730733171, |
| "loss": 1.1171, |
| "step": 134700 |
| }, |
| { |
| "epoch": 33.7, |
| "grad_norm": 0.04356600344181061, |
| "learning_rate": 0.0002949654228389274, |
| "loss": 1.2224, |
| "step": 134800 |
| }, |
| { |
| "epoch": 33.725, |
| "grad_norm": 0.049372829496860504, |
| "learning_rate": 0.00029496167260453776, |
| "loss": 1.0843, |
| "step": 134900 |
| }, |
| { |
| "epoch": 33.75, |
| "grad_norm": 0.04735811799764633, |
| "learning_rate": 0.00029495792237014813, |
| "loss": 1.2027, |
| "step": 135000 |
| }, |
| { |
| "epoch": 33.775, |
| "grad_norm": 0.048068366944789886, |
| "learning_rate": 0.0002949541721357585, |
| "loss": 1.182, |
| "step": 135100 |
| }, |
| { |
| "epoch": 33.8, |
| "grad_norm": 0.05330264940857887, |
| "learning_rate": 0.0002949504219013688, |
| "loss": 1.1519, |
| "step": 135200 |
| }, |
| { |
| "epoch": 33.825, |
| "grad_norm": 0.04151195287704468, |
| "learning_rate": 0.0002949466716669792, |
| "loss": 1.0107, |
| "step": 135300 |
| }, |
| { |
| "epoch": 33.85, |
| "grad_norm": 0.04683278128504753, |
| "learning_rate": 0.0002949429214325895, |
| "loss": 1.2629, |
| "step": 135400 |
| }, |
| { |
| "epoch": 33.875, |
| "grad_norm": 0.04796934127807617, |
| "learning_rate": 0.00029493917119819985, |
| "loss": 1.0715, |
| "step": 135500 |
| }, |
| { |
| "epoch": 33.9, |
| "grad_norm": 0.048207636922597885, |
| "learning_rate": 0.0002949354209638102, |
| "loss": 1.1114, |
| "step": 135600 |
| }, |
| { |
| "epoch": 33.925, |
| "grad_norm": 0.0472245067358017, |
| "learning_rate": 0.0002949316707294206, |
| "loss": 1.1557, |
| "step": 135700 |
| }, |
| { |
| "epoch": 33.95, |
| "grad_norm": 0.051259011030197144, |
| "learning_rate": 0.0002949279204950309, |
| "loss": 1.1246, |
| "step": 135800 |
| }, |
| { |
| "epoch": 33.975, |
| "grad_norm": 0.054303720593452454, |
| "learning_rate": 0.00029492417026064126, |
| "loss": 1.0731, |
| "step": 135900 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 0.06228245794773102, |
| "learning_rate": 0.00029492042002625163, |
| "loss": 1.1498, |
| "step": 136000 |
| }, |
| { |
| "epoch": 34.025, |
| "grad_norm": 0.04442556947469711, |
| "learning_rate": 0.000294916669791862, |
| "loss": 1.1424, |
| "step": 136100 |
| }, |
| { |
| "epoch": 34.05, |
| "grad_norm": 0.05475945398211479, |
| "learning_rate": 0.0002949129195574723, |
| "loss": 1.1854, |
| "step": 136200 |
| }, |
| { |
| "epoch": 34.075, |
| "grad_norm": 0.058647606521844864, |
| "learning_rate": 0.0002949091693230827, |
| "loss": 1.2086, |
| "step": 136300 |
| }, |
| { |
| "epoch": 34.1, |
| "grad_norm": 0.04777631536126137, |
| "learning_rate": 0.00029490541908869304, |
| "loss": 1.175, |
| "step": 136400 |
| }, |
| { |
| "epoch": 34.125, |
| "grad_norm": 0.04744923487305641, |
| "learning_rate": 0.00029490166885430335, |
| "loss": 1.0887, |
| "step": 136500 |
| }, |
| { |
| "epoch": 34.15, |
| "grad_norm": 0.04286637902259827, |
| "learning_rate": 0.0002948979186199137, |
| "loss": 1.1652, |
| "step": 136600 |
| }, |
| { |
| "epoch": 34.175, |
| "grad_norm": 0.0456664115190506, |
| "learning_rate": 0.00029489416838552403, |
| "loss": 1.0565, |
| "step": 136700 |
| }, |
| { |
| "epoch": 34.2, |
| "grad_norm": 0.06168069317936897, |
| "learning_rate": 0.0002948904181511344, |
| "loss": 1.2153, |
| "step": 136800 |
| }, |
| { |
| "epoch": 34.225, |
| "grad_norm": 0.04141145944595337, |
| "learning_rate": 0.00029488666791674476, |
| "loss": 1.1138, |
| "step": 136900 |
| }, |
| { |
| "epoch": 34.25, |
| "grad_norm": 0.04432584345340729, |
| "learning_rate": 0.00029488291768235513, |
| "loss": 1.1477, |
| "step": 137000 |
| }, |
| { |
| "epoch": 34.275, |
| "grad_norm": 0.04956555366516113, |
| "learning_rate": 0.00029487916744796544, |
| "loss": 1.0743, |
| "step": 137100 |
| }, |
| { |
| "epoch": 34.3, |
| "grad_norm": 0.04936617240309715, |
| "learning_rate": 0.0002948754172135758, |
| "loss": 0.988, |
| "step": 137200 |
| }, |
| { |
| "epoch": 34.325, |
| "grad_norm": 0.04362035542726517, |
| "learning_rate": 0.00029487166697918617, |
| "loss": 1.0981, |
| "step": 137300 |
| }, |
| { |
| "epoch": 34.35, |
| "grad_norm": 0.051287226378917694, |
| "learning_rate": 0.00029486791674479654, |
| "loss": 1.088, |
| "step": 137400 |
| }, |
| { |
| "epoch": 34.375, |
| "grad_norm": 0.03998219966888428, |
| "learning_rate": 0.00029486416651040685, |
| "loss": 1.1762, |
| "step": 137500 |
| }, |
| { |
| "epoch": 34.4, |
| "grad_norm": 0.048108555376529694, |
| "learning_rate": 0.0002948604162760172, |
| "loss": 1.084, |
| "step": 137600 |
| }, |
| { |
| "epoch": 34.425, |
| "grad_norm": 0.04450273886322975, |
| "learning_rate": 0.0002948566660416276, |
| "loss": 1.0954, |
| "step": 137700 |
| }, |
| { |
| "epoch": 34.45, |
| "grad_norm": 0.04805700480937958, |
| "learning_rate": 0.00029485291580723795, |
| "loss": 0.9584, |
| "step": 137800 |
| }, |
| { |
| "epoch": 34.475, |
| "grad_norm": 0.05516688898205757, |
| "learning_rate": 0.00029484916557284826, |
| "loss": 1.2255, |
| "step": 137900 |
| }, |
| { |
| "epoch": 34.5, |
| "grad_norm": 0.04300745949149132, |
| "learning_rate": 0.0002948454153384586, |
| "loss": 1.113, |
| "step": 138000 |
| }, |
| { |
| "epoch": 34.525, |
| "grad_norm": 0.04395318031311035, |
| "learning_rate": 0.000294841665104069, |
| "loss": 1.0804, |
| "step": 138100 |
| }, |
| { |
| "epoch": 34.55, |
| "grad_norm": 0.0548313707113266, |
| "learning_rate": 0.00029483791486967936, |
| "loss": 1.1407, |
| "step": 138200 |
| }, |
| { |
| "epoch": 34.575, |
| "grad_norm": 0.04328515753149986, |
| "learning_rate": 0.00029483416463528967, |
| "loss": 1.1493, |
| "step": 138300 |
| }, |
| { |
| "epoch": 34.6, |
| "grad_norm": 0.0498124323785305, |
| "learning_rate": 0.00029483041440090004, |
| "loss": 1.1091, |
| "step": 138400 |
| }, |
| { |
| "epoch": 34.625, |
| "grad_norm": 0.0529802069067955, |
| "learning_rate": 0.00029482666416651035, |
| "loss": 1.1526, |
| "step": 138500 |
| }, |
| { |
| "epoch": 34.65, |
| "grad_norm": 0.0480722077190876, |
| "learning_rate": 0.0002948229139321207, |
| "loss": 1.1961, |
| "step": 138600 |
| }, |
| { |
| "epoch": 34.675, |
| "grad_norm": 0.03908173367381096, |
| "learning_rate": 0.000294819201200075, |
| "loss": 1.0955, |
| "step": 138700 |
| }, |
| { |
| "epoch": 34.7, |
| "grad_norm": 0.04808943718671799, |
| "learning_rate": 0.00029481545096568534, |
| "loss": 1.1239, |
| "step": 138800 |
| }, |
| { |
| "epoch": 34.725, |
| "grad_norm": 0.046047843992710114, |
| "learning_rate": 0.00029481170073129565, |
| "loss": 1.0062, |
| "step": 138900 |
| }, |
| { |
| "epoch": 34.75, |
| "grad_norm": 0.041441336274147034, |
| "learning_rate": 0.000294807950496906, |
| "loss": 1.1386, |
| "step": 139000 |
| }, |
| { |
| "epoch": 34.775, |
| "grad_norm": 0.044936537742614746, |
| "learning_rate": 0.0002948042002625164, |
| "loss": 1.0692, |
| "step": 139100 |
| }, |
| { |
| "epoch": 34.8, |
| "grad_norm": 0.04202251508831978, |
| "learning_rate": 0.00029480045002812675, |
| "loss": 1.1048, |
| "step": 139200 |
| }, |
| { |
| "epoch": 34.825, |
| "grad_norm": 0.06056401878595352, |
| "learning_rate": 0.00029479669979373706, |
| "loss": 1.0427, |
| "step": 139300 |
| }, |
| { |
| "epoch": 34.85, |
| "grad_norm": 0.047068677842617035, |
| "learning_rate": 0.00029479294955934743, |
| "loss": 1.0166, |
| "step": 139400 |
| }, |
| { |
| "epoch": 34.875, |
| "grad_norm": 0.0437459833920002, |
| "learning_rate": 0.0002947891993249578, |
| "loss": 1.1336, |
| "step": 139500 |
| }, |
| { |
| "epoch": 34.9, |
| "grad_norm": 0.04363924637436867, |
| "learning_rate": 0.00029478544909056816, |
| "loss": 1.0419, |
| "step": 139600 |
| }, |
| { |
| "epoch": 34.925, |
| "grad_norm": 0.04847422614693642, |
| "learning_rate": 0.0002947816988561785, |
| "loss": 1.1885, |
| "step": 139700 |
| }, |
| { |
| "epoch": 34.95, |
| "grad_norm": 0.04593125358223915, |
| "learning_rate": 0.00029477794862178884, |
| "loss": 1.1173, |
| "step": 139800 |
| }, |
| { |
| "epoch": 34.975, |
| "grad_norm": 0.04662812873721123, |
| "learning_rate": 0.0002947741983873992, |
| "loss": 1.0086, |
| "step": 139900 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 0.04696165770292282, |
| "learning_rate": 0.0002947704481530096, |
| "loss": 0.9674, |
| "step": 140000 |
| }, |
| { |
| "epoch": 35.025, |
| "grad_norm": 0.04659904167056084, |
| "learning_rate": 0.0002947666979186199, |
| "loss": 1.0319, |
| "step": 140100 |
| }, |
| { |
| "epoch": 35.05, |
| "grad_norm": 0.0433788076043129, |
| "learning_rate": 0.00029476294768423025, |
| "loss": 1.0989, |
| "step": 140200 |
| }, |
| { |
| "epoch": 35.075, |
| "grad_norm": 0.04491908475756645, |
| "learning_rate": 0.00029475919744984056, |
| "loss": 1.0623, |
| "step": 140300 |
| }, |
| { |
| "epoch": 35.1, |
| "grad_norm": 0.045701559633016586, |
| "learning_rate": 0.00029475544721545093, |
| "loss": 1.1146, |
| "step": 140400 |
| }, |
| { |
| "epoch": 35.125, |
| "grad_norm": 0.04654062166810036, |
| "learning_rate": 0.0002947517344834052, |
| "loss": 1.0735, |
| "step": 140500 |
| }, |
| { |
| "epoch": 35.15, |
| "grad_norm": 0.05366494506597519, |
| "learning_rate": 0.0002947479842490155, |
| "loss": 1.1706, |
| "step": 140600 |
| }, |
| { |
| "epoch": 35.175, |
| "grad_norm": 0.047658320516347885, |
| "learning_rate": 0.00029474423401462587, |
| "loss": 1.1263, |
| "step": 140700 |
| }, |
| { |
| "epoch": 35.2, |
| "grad_norm": 0.04554996266961098, |
| "learning_rate": 0.00029474048378023624, |
| "loss": 1.1135, |
| "step": 140800 |
| }, |
| { |
| "epoch": 35.225, |
| "grad_norm": 0.04832541570067406, |
| "learning_rate": 0.0002947367335458466, |
| "loss": 1.0375, |
| "step": 140900 |
| }, |
| { |
| "epoch": 35.25, |
| "grad_norm": 0.0434059239923954, |
| "learning_rate": 0.0002947329833114569, |
| "loss": 1.0696, |
| "step": 141000 |
| }, |
| { |
| "epoch": 35.275, |
| "grad_norm": 0.04571983963251114, |
| "learning_rate": 0.0002947292330770673, |
| "loss": 1.1276, |
| "step": 141100 |
| }, |
| { |
| "epoch": 35.3, |
| "grad_norm": 0.04176199808716774, |
| "learning_rate": 0.00029472548284267765, |
| "loss": 0.957, |
| "step": 141200 |
| }, |
| { |
| "epoch": 35.325, |
| "grad_norm": 0.06178323179483414, |
| "learning_rate": 0.000294721732608288, |
| "loss": 1.0451, |
| "step": 141300 |
| }, |
| { |
| "epoch": 35.35, |
| "grad_norm": 0.05882290005683899, |
| "learning_rate": 0.0002947179823738983, |
| "loss": 1.1542, |
| "step": 141400 |
| }, |
| { |
| "epoch": 35.375, |
| "grad_norm": 0.04132578894495964, |
| "learning_rate": 0.0002947142321395087, |
| "loss": 0.9828, |
| "step": 141500 |
| }, |
| { |
| "epoch": 35.4, |
| "grad_norm": 0.04464949667453766, |
| "learning_rate": 0.00029471048190511906, |
| "loss": 1.0171, |
| "step": 141600 |
| }, |
| { |
| "epoch": 35.425, |
| "grad_norm": 0.04540353640913963, |
| "learning_rate": 0.0002947067316707294, |
| "loss": 1.1018, |
| "step": 141700 |
| }, |
| { |
| "epoch": 35.45, |
| "grad_norm": 0.04491226375102997, |
| "learning_rate": 0.00029470298143633973, |
| "loss": 1.1166, |
| "step": 141800 |
| }, |
| { |
| "epoch": 35.475, |
| "grad_norm": 0.0440848246216774, |
| "learning_rate": 0.0002946992312019501, |
| "loss": 1.039, |
| "step": 141900 |
| }, |
| { |
| "epoch": 35.5, |
| "grad_norm": 0.04919476807117462, |
| "learning_rate": 0.0002946954809675604, |
| "loss": 0.9442, |
| "step": 142000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 8000000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2000, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.68516799709184e+17, |
| "train_batch_size": 125, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|