| { |
| "best_global_step": 1000, |
| "best_metric": 2.9839203357696533, |
| "best_model_checkpoint": "outputs/checkpoint-1000", |
| "epoch": 0.6430868167202572, |
| "eval_steps": 1000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006430868167202572, |
| "grad_norm": 1.678249716758728, |
| "learning_rate": 0.0, |
| "loss": 10.412, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0012861736334405145, |
| "grad_norm": 1.7936455011367798, |
| "learning_rate": 4e-05, |
| "loss": 10.4163, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0019292604501607716, |
| "grad_norm": 1.7285282611846924, |
| "learning_rate": 8e-05, |
| "loss": 10.3998, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002572347266881029, |
| "grad_norm": 1.6896724700927734, |
| "learning_rate": 0.00012, |
| "loss": 10.3641, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.003215434083601286, |
| "grad_norm": 1.5910422801971436, |
| "learning_rate": 0.00016, |
| "loss": 10.3105, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0038585209003215433, |
| "grad_norm": 1.5040550231933594, |
| "learning_rate": 0.0002, |
| "loss": 10.2539, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0045016077170418, |
| "grad_norm": 1.4353411197662354, |
| "learning_rate": 0.00019998713412672887, |
| "loss": 10.1944, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.005144694533762058, |
| "grad_norm": 1.4172208309173584, |
| "learning_rate": 0.0001999742682534577, |
| "loss": 10.1482, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.005787781350482315, |
| "grad_norm": 1.4051563739776611, |
| "learning_rate": 0.00019996140238018656, |
| "loss": 10.0827, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.006430868167202572, |
| "grad_norm": 1.3599131107330322, |
| "learning_rate": 0.0001999485365069154, |
| "loss": 10.0401, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00707395498392283, |
| "grad_norm": 1.3481664657592773, |
| "learning_rate": 0.00019993567063364426, |
| "loss": 9.9904, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0077170418006430866, |
| "grad_norm": 1.328471302986145, |
| "learning_rate": 0.00019992280476037312, |
| "loss": 9.9554, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.008360128617363344, |
| "grad_norm": 1.3357861042022705, |
| "learning_rate": 0.00019990993888710198, |
| "loss": 9.9119, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0090032154340836, |
| "grad_norm": 1.3247236013412476, |
| "learning_rate": 0.00019989707301383084, |
| "loss": 9.8818, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00964630225080386, |
| "grad_norm": 1.3218022584915161, |
| "learning_rate": 0.00019988420714055967, |
| "loss": 9.842, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.010289389067524116, |
| "grad_norm": 1.3450143337249756, |
| "learning_rate": 0.00019987134126728853, |
| "loss": 9.7933, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.010932475884244373, |
| "grad_norm": 1.352043628692627, |
| "learning_rate": 0.0001998584753940174, |
| "loss": 9.7582, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01157556270096463, |
| "grad_norm": 1.3545559644699097, |
| "learning_rate": 0.00019984560952074622, |
| "loss": 9.713, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.012218649517684888, |
| "grad_norm": 1.3423742055892944, |
| "learning_rate": 0.00019983274364747508, |
| "loss": 9.6714, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.012861736334405145, |
| "grad_norm": 1.295168399810791, |
| "learning_rate": 0.00019981987777420391, |
| "loss": 9.6478, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.013504823151125401, |
| "grad_norm": 1.3305379152297974, |
| "learning_rate": 0.00019980701190093277, |
| "loss": 9.603, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01414790996784566, |
| "grad_norm": 1.33871591091156, |
| "learning_rate": 0.00019979414602766163, |
| "loss": 9.5493, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.014790996784565916, |
| "grad_norm": 1.335689902305603, |
| "learning_rate": 0.0001997812801543905, |
| "loss": 9.517, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.015434083601286173, |
| "grad_norm": 1.3390552997589111, |
| "learning_rate": 0.00019976841428111935, |
| "loss": 9.4742, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.01607717041800643, |
| "grad_norm": 1.337459683418274, |
| "learning_rate": 0.00019975554840784821, |
| "loss": 9.4299, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.016720257234726688, |
| "grad_norm": 1.3750091791152954, |
| "learning_rate": 0.00019974268253457705, |
| "loss": 9.3735, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.017363344051446947, |
| "grad_norm": 1.34926438331604, |
| "learning_rate": 0.0001997298166613059, |
| "loss": 9.3448, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0180064308681672, |
| "grad_norm": 1.3303914070129395, |
| "learning_rate": 0.00019971695078803474, |
| "loss": 9.3069, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01864951768488746, |
| "grad_norm": 1.3551392555236816, |
| "learning_rate": 0.0001997040849147636, |
| "loss": 9.2533, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01929260450160772, |
| "grad_norm": 1.3162682056427002, |
| "learning_rate": 0.00019969121904149246, |
| "loss": 9.2248, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.019935691318327974, |
| "grad_norm": 1.3329894542694092, |
| "learning_rate": 0.0001996783531682213, |
| "loss": 9.1831, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.020578778135048232, |
| "grad_norm": 1.373375654220581, |
| "learning_rate": 0.00019966548729495015, |
| "loss": 9.1197, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02122186495176849, |
| "grad_norm": 1.3253285884857178, |
| "learning_rate": 0.00019965262142167899, |
| "loss": 9.0978, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.021864951768488745, |
| "grad_norm": 1.3255876302719116, |
| "learning_rate": 0.00019963975554840785, |
| "loss": 9.0456, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.022508038585209004, |
| "grad_norm": 1.3354089260101318, |
| "learning_rate": 0.0001996268896751367, |
| "loss": 8.9979, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02315112540192926, |
| "grad_norm": 1.3058476448059082, |
| "learning_rate": 0.00019961402380186557, |
| "loss": 8.9718, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.023794212218649517, |
| "grad_norm": 1.3565034866333008, |
| "learning_rate": 0.00019960115792859443, |
| "loss": 8.8975, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.024437299035369776, |
| "grad_norm": 1.3475068807601929, |
| "learning_rate": 0.00019958829205532326, |
| "loss": 8.8667, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.02508038585209003, |
| "grad_norm": 1.3298248052597046, |
| "learning_rate": 0.00019957542618205212, |
| "loss": 8.845, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02572347266881029, |
| "grad_norm": 1.3165632486343384, |
| "learning_rate": 0.00019956256030878098, |
| "loss": 8.8057, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.026366559485530548, |
| "grad_norm": 1.3182189464569092, |
| "learning_rate": 0.0001995496944355098, |
| "loss": 8.7485, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.027009646302250803, |
| "grad_norm": 1.3431370258331299, |
| "learning_rate": 0.00019953682856223867, |
| "loss": 8.6918, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.02765273311897106, |
| "grad_norm": 1.317123532295227, |
| "learning_rate": 0.0001995239626889675, |
| "loss": 8.6704, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02829581993569132, |
| "grad_norm": 1.3226360082626343, |
| "learning_rate": 0.00019951109681569636, |
| "loss": 8.6149, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.028938906752411574, |
| "grad_norm": 1.330268144607544, |
| "learning_rate": 0.00019949823094242522, |
| "loss": 8.5877, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.029581993569131833, |
| "grad_norm": 1.3029155731201172, |
| "learning_rate": 0.00019948536506915408, |
| "loss": 8.552, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03022508038585209, |
| "grad_norm": 1.3699071407318115, |
| "learning_rate": 0.00019947249919588294, |
| "loss": 8.4862, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.030868167202572346, |
| "grad_norm": 1.3225411176681519, |
| "learning_rate": 0.00019945963332261178, |
| "loss": 8.4505, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.031511254019292605, |
| "grad_norm": 1.3253546953201294, |
| "learning_rate": 0.00019944676744934064, |
| "loss": 8.4268, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.03215434083601286, |
| "grad_norm": 1.3012568950653076, |
| "learning_rate": 0.0001994339015760695, |
| "loss": 8.373, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03279742765273312, |
| "grad_norm": 1.329947829246521, |
| "learning_rate": 0.00019942103570279833, |
| "loss": 8.3321, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.033440514469453377, |
| "grad_norm": 1.3405938148498535, |
| "learning_rate": 0.0001994081698295272, |
| "loss": 8.3065, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03408360128617363, |
| "grad_norm": 1.3572288751602173, |
| "learning_rate": 0.00019939530395625602, |
| "loss": 8.2457, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03472668810289389, |
| "grad_norm": 1.332276701927185, |
| "learning_rate": 0.00019938243808298488, |
| "loss": 8.2034, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03536977491961415, |
| "grad_norm": 1.297584891319275, |
| "learning_rate": 0.00019936957220971374, |
| "loss": 8.1932, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0360128617363344, |
| "grad_norm": 1.3370877504348755, |
| "learning_rate": 0.00019935670633644258, |
| "loss": 8.1234, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.036655948553054665, |
| "grad_norm": 1.3226819038391113, |
| "learning_rate": 0.00019934384046317146, |
| "loss": 8.0947, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03729903536977492, |
| "grad_norm": 1.3129390478134155, |
| "learning_rate": 0.00019933097458990032, |
| "loss": 8.0517, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.037942122186495175, |
| "grad_norm": 1.305672287940979, |
| "learning_rate": 0.00019931810871662916, |
| "loss": 8.0226, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03858520900321544, |
| "grad_norm": 1.3064535856246948, |
| "learning_rate": 0.00019930524284335802, |
| "loss": 7.992, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03922829581993569, |
| "grad_norm": 1.3159136772155762, |
| "learning_rate": 0.00019929237697008685, |
| "loss": 7.9149, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03987138263665595, |
| "grad_norm": 1.3587119579315186, |
| "learning_rate": 0.0001992795110968157, |
| "loss": 7.897, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04051446945337621, |
| "grad_norm": 1.2870018482208252, |
| "learning_rate": 0.00019926664522354457, |
| "loss": 7.8889, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.041157556270096464, |
| "grad_norm": 1.3369699716567993, |
| "learning_rate": 0.0001992537793502734, |
| "loss": 7.8203, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04180064308681672, |
| "grad_norm": 1.330676794052124, |
| "learning_rate": 0.00019924091347700226, |
| "loss": 7.7895, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04244372990353698, |
| "grad_norm": 1.2833577394485474, |
| "learning_rate": 0.0001992280476037311, |
| "loss": 7.7855, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.043086816720257236, |
| "grad_norm": 1.3233436346054077, |
| "learning_rate": 0.00019921518173045995, |
| "loss": 7.7267, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04372990353697749, |
| "grad_norm": 1.3114689588546753, |
| "learning_rate": 0.00019920231585718881, |
| "loss": 7.7057, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.044372990353697746, |
| "grad_norm": 1.3106704950332642, |
| "learning_rate": 0.00019918944998391767, |
| "loss": 7.6647, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.04501607717041801, |
| "grad_norm": 1.281671404838562, |
| "learning_rate": 0.00019917658411064653, |
| "loss": 7.6598, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04565916398713826, |
| "grad_norm": 1.2698378562927246, |
| "learning_rate": 0.00019916371823737537, |
| "loss": 7.5991, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.04630225080385852, |
| "grad_norm": 1.2966727018356323, |
| "learning_rate": 0.00019915085236410423, |
| "loss": 7.5675, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04694533762057878, |
| "grad_norm": 1.3163456916809082, |
| "learning_rate": 0.0001991379864908331, |
| "loss": 7.5388, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.047588424437299034, |
| "grad_norm": 1.3120678663253784, |
| "learning_rate": 0.00019912512061756192, |
| "loss": 7.5088, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.04823151125401929, |
| "grad_norm": 1.3121012449264526, |
| "learning_rate": 0.00019911225474429078, |
| "loss": 7.426, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.04887459807073955, |
| "grad_norm": 1.2783628702163696, |
| "learning_rate": 0.0001990993888710196, |
| "loss": 7.4365, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.049517684887459806, |
| "grad_norm": 1.275612235069275, |
| "learning_rate": 0.00019908652299774847, |
| "loss": 7.3942, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.05016077170418006, |
| "grad_norm": 1.352935552597046, |
| "learning_rate": 0.00019907365712447733, |
| "loss": 7.3319, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.05080385852090032, |
| "grad_norm": 1.299548625946045, |
| "learning_rate": 0.0001990607912512062, |
| "loss": 7.3322, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05144694533762058, |
| "grad_norm": 1.29573392868042, |
| "learning_rate": 0.00019904792537793505, |
| "loss": 7.315, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05209003215434083, |
| "grad_norm": 1.2807579040527344, |
| "learning_rate": 0.00019903505950466389, |
| "loss": 7.2971, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.052733118971061095, |
| "grad_norm": 1.2788913249969482, |
| "learning_rate": 0.00019902219363139275, |
| "loss": 7.217, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.05337620578778135, |
| "grad_norm": 1.2539947032928467, |
| "learning_rate": 0.0001990093277581216, |
| "loss": 7.2079, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.054019292604501605, |
| "grad_norm": 1.2816716432571411, |
| "learning_rate": 0.00019899646188485044, |
| "loss": 7.1968, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05466237942122187, |
| "grad_norm": 1.2161004543304443, |
| "learning_rate": 0.0001989835960115793, |
| "loss": 7.2596, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.05530546623794212, |
| "grad_norm": 1.250944972038269, |
| "learning_rate": 0.00019897073013830813, |
| "loss": 7.1328, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.05594855305466238, |
| "grad_norm": 1.2679494619369507, |
| "learning_rate": 0.000198957864265037, |
| "loss": 7.1424, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.05659163987138264, |
| "grad_norm": 1.2727211713790894, |
| "learning_rate": 0.00019894499839176585, |
| "loss": 7.1108, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.057234726688102894, |
| "grad_norm": 1.250409722328186, |
| "learning_rate": 0.00019893213251849468, |
| "loss": 7.043, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.05787781350482315, |
| "grad_norm": 1.2838069200515747, |
| "learning_rate": 0.00019891926664522354, |
| "loss": 7.0079, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05852090032154341, |
| "grad_norm": 1.269007682800293, |
| "learning_rate": 0.0001989064007719524, |
| "loss": 7.0067, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.059163987138263666, |
| "grad_norm": 1.2593508958816528, |
| "learning_rate": 0.00019889353489868126, |
| "loss": 6.9798, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.05980707395498392, |
| "grad_norm": 1.237831950187683, |
| "learning_rate": 0.00019888066902541012, |
| "loss": 6.9631, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06045016077170418, |
| "grad_norm": 1.259639024734497, |
| "learning_rate": 0.00019886780315213896, |
| "loss": 6.9282, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.06109324758842444, |
| "grad_norm": 1.245284080505371, |
| "learning_rate": 0.00019885493727886782, |
| "loss": 6.9056, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.06173633440514469, |
| "grad_norm": 1.20187246799469, |
| "learning_rate": 0.00019884207140559668, |
| "loss": 6.8874, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.062379421221864954, |
| "grad_norm": 1.2145123481750488, |
| "learning_rate": 0.0001988292055323255, |
| "loss": 6.8898, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.06302250803858521, |
| "grad_norm": 1.2658990621566772, |
| "learning_rate": 0.00019881633965905437, |
| "loss": 6.831, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06366559485530547, |
| "grad_norm": 1.2510586977005005, |
| "learning_rate": 0.0001988034737857832, |
| "loss": 6.8096, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.06430868167202572, |
| "grad_norm": 1.2315446138381958, |
| "learning_rate": 0.00019879060791251206, |
| "loss": 6.8044, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06495176848874598, |
| "grad_norm": 1.2362980842590332, |
| "learning_rate": 0.00019877774203924092, |
| "loss": 6.7773, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.06559485530546624, |
| "grad_norm": 1.2314499616622925, |
| "learning_rate": 0.00019876487616596978, |
| "loss": 6.7239, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.06623794212218649, |
| "grad_norm": 1.2099567651748657, |
| "learning_rate": 0.00019875201029269864, |
| "loss": 6.7143, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.06688102893890675, |
| "grad_norm": 1.2021255493164062, |
| "learning_rate": 0.00019873914441942748, |
| "loss": 6.7148, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06752411575562701, |
| "grad_norm": 1.2055983543395996, |
| "learning_rate": 0.00019872627854615634, |
| "loss": 6.6475, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06816720257234726, |
| "grad_norm": 1.1841413974761963, |
| "learning_rate": 0.0001987134126728852, |
| "loss": 6.6964, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.06881028938906752, |
| "grad_norm": 1.2152525186538696, |
| "learning_rate": 0.00019870054679961403, |
| "loss": 6.6206, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.06945337620578779, |
| "grad_norm": 1.2249363660812378, |
| "learning_rate": 0.0001986876809263429, |
| "loss": 6.5891, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.07009646302250803, |
| "grad_norm": 1.2074142694473267, |
| "learning_rate": 0.00019867481505307172, |
| "loss": 6.6226, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0707395498392283, |
| "grad_norm": 1.206978678703308, |
| "learning_rate": 0.00019866194917980058, |
| "loss": 6.5834, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07138263665594856, |
| "grad_norm": 1.2047126293182373, |
| "learning_rate": 0.00019864908330652944, |
| "loss": 6.5454, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0720257234726688, |
| "grad_norm": 1.1759907007217407, |
| "learning_rate": 0.00019863621743325827, |
| "loss": 6.4939, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07266881028938907, |
| "grad_norm": 1.20724356174469, |
| "learning_rate": 0.00019862335155998713, |
| "loss": 6.5207, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.07331189710610933, |
| "grad_norm": 1.1968728303909302, |
| "learning_rate": 0.000198610485686716, |
| "loss": 6.4886, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.07395498392282958, |
| "grad_norm": 1.17826247215271, |
| "learning_rate": 0.00019859761981344485, |
| "loss": 6.4474, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.07459807073954984, |
| "grad_norm": 1.1574997901916504, |
| "learning_rate": 0.00019858475394017371, |
| "loss": 6.4592, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0752411575562701, |
| "grad_norm": 1.1507346630096436, |
| "learning_rate": 0.00019857188806690255, |
| "loss": 6.4508, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.07588424437299035, |
| "grad_norm": 1.1639803647994995, |
| "learning_rate": 0.0001985590221936314, |
| "loss": 6.453, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07652733118971061, |
| "grad_norm": 1.1529334783554077, |
| "learning_rate": 0.00019854615632036024, |
| "loss": 6.4111, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.07717041800643087, |
| "grad_norm": 1.1437855958938599, |
| "learning_rate": 0.0001985332904470891, |
| "loss": 6.3518, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07781350482315112, |
| "grad_norm": 1.1764041185379028, |
| "learning_rate": 0.00019852042457381796, |
| "loss": 6.3225, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07845659163987138, |
| "grad_norm": 1.1735254526138306, |
| "learning_rate": 0.0001985075587005468, |
| "loss": 6.3173, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.07909967845659165, |
| "grad_norm": 1.1502288579940796, |
| "learning_rate": 0.00019849469282727565, |
| "loss": 6.3184, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0797427652733119, |
| "grad_norm": 1.1381540298461914, |
| "learning_rate": 0.0001984818269540045, |
| "loss": 6.2804, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.08038585209003216, |
| "grad_norm": 1.1680670976638794, |
| "learning_rate": 0.00019846896108073337, |
| "loss": 6.2858, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.08102893890675242, |
| "grad_norm": 1.1194247007369995, |
| "learning_rate": 0.00019845609520746223, |
| "loss": 6.2869, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.08167202572347267, |
| "grad_norm": 1.1161319017410278, |
| "learning_rate": 0.00019844322933419107, |
| "loss": 6.2556, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.08231511254019293, |
| "grad_norm": 1.1190017461776733, |
| "learning_rate": 0.00019843036346091993, |
| "loss": 6.1889, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08295819935691319, |
| "grad_norm": 1.1169451475143433, |
| "learning_rate": 0.00019841749758764879, |
| "loss": 6.1969, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.08360128617363344, |
| "grad_norm": 1.1366708278656006, |
| "learning_rate": 0.00019840463171437762, |
| "loss": 6.2215, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0842443729903537, |
| "grad_norm": 1.1058257818222046, |
| "learning_rate": 0.00019839176584110648, |
| "loss": 6.1801, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.08488745980707396, |
| "grad_norm": 1.1087491512298584, |
| "learning_rate": 0.0001983788999678353, |
| "loss": 6.1201, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08553054662379421, |
| "grad_norm": 1.0842581987380981, |
| "learning_rate": 0.00019836603409456417, |
| "loss": 6.1497, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.08617363344051447, |
| "grad_norm": 1.12769615650177, |
| "learning_rate": 0.00019835316822129303, |
| "loss": 6.111, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08681672025723473, |
| "grad_norm": 1.0909464359283447, |
| "learning_rate": 0.00019834030234802186, |
| "loss": 6.0962, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.08745980707395498, |
| "grad_norm": 1.0879305601119995, |
| "learning_rate": 0.00019832743647475075, |
| "loss": 6.0805, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08810289389067524, |
| "grad_norm": 1.0964096784591675, |
| "learning_rate": 0.00019831457060147958, |
| "loss": 6.0659, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.08874598070739549, |
| "grad_norm": 1.059978723526001, |
| "learning_rate": 0.00019830170472820844, |
| "loss": 6.0938, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.08938906752411575, |
| "grad_norm": 1.0322396755218506, |
| "learning_rate": 0.0001982888388549373, |
| "loss": 6.0523, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.09003215434083602, |
| "grad_norm": 1.0639983415603638, |
| "learning_rate": 0.00019827597298166614, |
| "loss": 6.0221, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09067524115755626, |
| "grad_norm": 1.0435878038406372, |
| "learning_rate": 0.000198263107108395, |
| "loss": 6.0895, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.09131832797427653, |
| "grad_norm": 1.070421814918518, |
| "learning_rate": 0.00019825024123512383, |
| "loss": 6.0178, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.09196141479099679, |
| "grad_norm": 1.0622031688690186, |
| "learning_rate": 0.0001982373753618527, |
| "loss": 6.043, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.09260450160771704, |
| "grad_norm": 1.0540876388549805, |
| "learning_rate": 0.00019822450948858155, |
| "loss": 6.014, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0932475884244373, |
| "grad_norm": 1.0724049806594849, |
| "learning_rate": 0.00019821164361531038, |
| "loss": 5.9128, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.09389067524115756, |
| "grad_norm": 1.066551685333252, |
| "learning_rate": 0.00019819877774203924, |
| "loss": 5.951, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.09453376205787781, |
| "grad_norm": 1.0557595491409302, |
| "learning_rate": 0.0001981859118687681, |
| "loss": 5.9189, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.09517684887459807, |
| "grad_norm": 1.0220601558685303, |
| "learning_rate": 0.00019817304599549696, |
| "loss": 5.9411, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.09581993569131833, |
| "grad_norm": 1.0399051904678345, |
| "learning_rate": 0.00019816018012222582, |
| "loss": 5.9042, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.09646302250803858, |
| "grad_norm": 1.0221998691558838, |
| "learning_rate": 0.00019814731424895466, |
| "loss": 5.92, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09710610932475884, |
| "grad_norm": 1.019439935684204, |
| "learning_rate": 0.00019813444837568352, |
| "loss": 5.8743, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0977491961414791, |
| "grad_norm": 1.0221492052078247, |
| "learning_rate": 0.00019812158250241235, |
| "loss": 5.8733, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.09839228295819935, |
| "grad_norm": 0.9593134522438049, |
| "learning_rate": 0.0001981087166291412, |
| "loss": 5.8632, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.09903536977491961, |
| "grad_norm": 1.0003180503845215, |
| "learning_rate": 0.00019809585075587007, |
| "loss": 5.8589, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.09967845659163987, |
| "grad_norm": 1.0050652027130127, |
| "learning_rate": 0.0001980829848825989, |
| "loss": 5.808, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.10032154340836012, |
| "grad_norm": 1.0235966444015503, |
| "learning_rate": 0.00019807011900932776, |
| "loss": 5.7926, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.10096463022508038, |
| "grad_norm": 1.0320799350738525, |
| "learning_rate": 0.00019805725313605662, |
| "loss": 5.7693, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.10160771704180065, |
| "grad_norm": 0.9429930448532104, |
| "learning_rate": 0.00019804438726278548, |
| "loss": 5.8678, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1022508038585209, |
| "grad_norm": 0.9937098622322083, |
| "learning_rate": 0.00019803152138951434, |
| "loss": 5.7777, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.10289389067524116, |
| "grad_norm": 0.9316997528076172, |
| "learning_rate": 0.00019801865551624317, |
| "loss": 5.8055, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10353697749196142, |
| "grad_norm": 0.9722780585289001, |
| "learning_rate": 0.00019800578964297203, |
| "loss": 5.7881, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.10418006430868167, |
| "grad_norm": 0.9671345353126526, |
| "learning_rate": 0.0001979929237697009, |
| "loss": 5.7469, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.10482315112540193, |
| "grad_norm": 0.9491237998008728, |
| "learning_rate": 0.00019798005789642973, |
| "loss": 5.7068, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.10546623794212219, |
| "grad_norm": 0.912465512752533, |
| "learning_rate": 0.0001979671920231586, |
| "loss": 5.7732, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.10610932475884244, |
| "grad_norm": 0.9776575565338135, |
| "learning_rate": 0.00019795432614988742, |
| "loss": 5.6843, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1067524115755627, |
| "grad_norm": 0.9189504384994507, |
| "learning_rate": 0.00019794146027661628, |
| "loss": 5.6807, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.10739549839228296, |
| "grad_norm": 0.9223593473434448, |
| "learning_rate": 0.00019792859440334514, |
| "loss": 5.6291, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.10803858520900321, |
| "grad_norm": 0.9045426249504089, |
| "learning_rate": 0.00019791572853007397, |
| "loss": 5.6484, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.10868167202572347, |
| "grad_norm": 0.9032799601554871, |
| "learning_rate": 0.00019790286265680283, |
| "loss": 5.6995, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.10932475884244373, |
| "grad_norm": 0.9077114462852478, |
| "learning_rate": 0.0001978899967835317, |
| "loss": 5.5522, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10996784565916398, |
| "grad_norm": 0.9258122444152832, |
| "learning_rate": 0.00019787713091026055, |
| "loss": 5.5694, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.11061093247588424, |
| "grad_norm": 0.9175561666488647, |
| "learning_rate": 0.00019786426503698941, |
| "loss": 5.5755, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1112540192926045, |
| "grad_norm": 0.9215130805969238, |
| "learning_rate": 0.00019785139916371825, |
| "loss": 5.5379, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.11189710610932475, |
| "grad_norm": 0.9266327023506165, |
| "learning_rate": 0.0001978385332904471, |
| "loss": 5.582, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.11254019292604502, |
| "grad_norm": 0.9182630777359009, |
| "learning_rate": 0.00019782566741717594, |
| "loss": 5.6012, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.11318327974276528, |
| "grad_norm": 0.8782434463500977, |
| "learning_rate": 0.0001978128015439048, |
| "loss": 5.5496, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.11382636655948553, |
| "grad_norm": 0.8495333194732666, |
| "learning_rate": 0.00019779993567063366, |
| "loss": 5.6246, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.11446945337620579, |
| "grad_norm": 0.8858991861343384, |
| "learning_rate": 0.0001977870697973625, |
| "loss": 5.4775, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.11511254019292605, |
| "grad_norm": 0.9661223292350769, |
| "learning_rate": 0.00019777420392409135, |
| "loss": 5.5474, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.1157556270096463, |
| "grad_norm": 0.9988970160484314, |
| "learning_rate": 0.0001977613380508202, |
| "loss": 5.596, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11639871382636656, |
| "grad_norm": 0.8391322493553162, |
| "learning_rate": 0.00019774847217754907, |
| "loss": 5.5846, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.11704180064308682, |
| "grad_norm": 1.1805757284164429, |
| "learning_rate": 0.00019773560630427793, |
| "loss": 5.5573, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11768488745980707, |
| "grad_norm": 1.0904446840286255, |
| "learning_rate": 0.00019772274043100677, |
| "loss": 5.4494, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.11832797427652733, |
| "grad_norm": 0.8829322457313538, |
| "learning_rate": 0.00019770987455773563, |
| "loss": 5.4855, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.1189710610932476, |
| "grad_norm": 1.4423890113830566, |
| "learning_rate": 0.00019769700868446446, |
| "loss": 5.5052, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.11961414790996784, |
| "grad_norm": 0.8625758290290833, |
| "learning_rate": 0.00019768414281119332, |
| "loss": 5.4188, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.1202572347266881, |
| "grad_norm": 1.0383747816085815, |
| "learning_rate": 0.00019767127693792218, |
| "loss": 5.4561, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.12090032154340837, |
| "grad_norm": 0.8543033003807068, |
| "learning_rate": 0.000197658411064651, |
| "loss": 5.427, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.12154340836012861, |
| "grad_norm": 1.1611664295196533, |
| "learning_rate": 0.00019764554519137987, |
| "loss": 5.4432, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.12218649517684887, |
| "grad_norm": 0.8035333156585693, |
| "learning_rate": 0.0001976326793181087, |
| "loss": 5.4058, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12282958199356914, |
| "grad_norm": 1.01143217086792, |
| "learning_rate": 0.00019761981344483756, |
| "loss": 5.4268, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.12347266881028938, |
| "grad_norm": 0.8253054022789001, |
| "learning_rate": 0.00019760694757156642, |
| "loss": 5.3515, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.12411575562700965, |
| "grad_norm": 1.043700098991394, |
| "learning_rate": 0.00019759408169829528, |
| "loss": 5.3555, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.12475884244372991, |
| "grad_norm": 0.8708436489105225, |
| "learning_rate": 0.00019758121582502414, |
| "loss": 5.4721, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.12540192926045016, |
| "grad_norm": 0.80093914270401, |
| "learning_rate": 0.000197568349951753, |
| "loss": 5.4312, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.12604501607717042, |
| "grad_norm": 0.8914348483085632, |
| "learning_rate": 0.00019755548407848184, |
| "loss": 5.2986, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.12668810289389068, |
| "grad_norm": 0.8047232627868652, |
| "learning_rate": 0.0001975426182052107, |
| "loss": 5.3595, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.12733118971061094, |
| "grad_norm": 0.8185480833053589, |
| "learning_rate": 0.00019752975233193953, |
| "loss": 5.3846, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.12797427652733118, |
| "grad_norm": 0.8160484433174133, |
| "learning_rate": 0.0001975168864586684, |
| "loss": 5.2739, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.12861736334405144, |
| "grad_norm": 0.8484665155410767, |
| "learning_rate": 0.00019750402058539725, |
| "loss": 5.4102, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1292604501607717, |
| "grad_norm": 0.8212493062019348, |
| "learning_rate": 0.00019749115471212608, |
| "loss": 5.338, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.12990353697749196, |
| "grad_norm": 0.8622007966041565, |
| "learning_rate": 0.00019747828883885494, |
| "loss": 5.2163, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.13054662379421222, |
| "grad_norm": 0.7989214062690735, |
| "learning_rate": 0.0001974654229655838, |
| "loss": 5.3385, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.1311897106109325, |
| "grad_norm": 0.971836268901825, |
| "learning_rate": 0.00019745255709231266, |
| "loss": 5.3164, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.13183279742765272, |
| "grad_norm": 0.7155956029891968, |
| "learning_rate": 0.00019743969121904152, |
| "loss": 5.333, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.13247588424437298, |
| "grad_norm": 0.9477896690368652, |
| "learning_rate": 0.00019742682534577036, |
| "loss": 5.2987, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.13311897106109324, |
| "grad_norm": 0.7635351419448853, |
| "learning_rate": 0.00019741395947249922, |
| "loss": 5.2301, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.1337620578778135, |
| "grad_norm": 0.8150351643562317, |
| "learning_rate": 0.00019740109359922805, |
| "loss": 5.3204, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.13440514469453377, |
| "grad_norm": 0.7588405013084412, |
| "learning_rate": 0.0001973882277259569, |
| "loss": 5.2901, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.13504823151125403, |
| "grad_norm": 0.7870814800262451, |
| "learning_rate": 0.00019737536185268577, |
| "loss": 5.1423, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.13569131832797426, |
| "grad_norm": 0.8244409561157227, |
| "learning_rate": 0.0001973624959794146, |
| "loss": 5.1665, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.13633440514469453, |
| "grad_norm": 0.7376208901405334, |
| "learning_rate": 0.00019734963010614346, |
| "loss": 5.248, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.1369774919614148, |
| "grad_norm": 0.7543022632598877, |
| "learning_rate": 0.0001973367642328723, |
| "loss": 5.2257, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.13762057877813505, |
| "grad_norm": 0.7643454074859619, |
| "learning_rate": 0.00019732389835960115, |
| "loss": 5.1449, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1382636655948553, |
| "grad_norm": 0.8774954676628113, |
| "learning_rate": 0.00019731103248633001, |
| "loss": 5.168, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.13890675241157557, |
| "grad_norm": 0.7466834187507629, |
| "learning_rate": 0.00019729816661305887, |
| "loss": 5.162, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.1395498392282958, |
| "grad_norm": 0.7911695837974548, |
| "learning_rate": 0.00019728530073978773, |
| "loss": 5.1449, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.14019292604501607, |
| "grad_norm": 0.7217370271682739, |
| "learning_rate": 0.00019727243486651657, |
| "loss": 5.2318, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.14083601286173633, |
| "grad_norm": 0.8801223039627075, |
| "learning_rate": 0.00019725956899324543, |
| "loss": 5.1067, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.1414790996784566, |
| "grad_norm": 0.7971013188362122, |
| "learning_rate": 0.0001972467031199743, |
| "loss": 5.1875, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.14212218649517686, |
| "grad_norm": 0.8382704257965088, |
| "learning_rate": 0.00019723383724670312, |
| "loss": 5.092, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.14276527331189712, |
| "grad_norm": 0.8966850638389587, |
| "learning_rate": 0.00019722097137343198, |
| "loss": 5.1178, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.14340836012861735, |
| "grad_norm": 0.7162383794784546, |
| "learning_rate": 0.0001972081055001608, |
| "loss": 5.1642, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.1440514469453376, |
| "grad_norm": 0.7657078504562378, |
| "learning_rate": 0.00019719523962688967, |
| "loss": 5.102, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.14469453376205788, |
| "grad_norm": 0.8197327256202698, |
| "learning_rate": 0.00019718237375361853, |
| "loss": 5.0262, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.14533762057877814, |
| "grad_norm": 0.7781340479850769, |
| "learning_rate": 0.0001971695078803474, |
| "loss": 5.039, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1459807073954984, |
| "grad_norm": 0.6628583073616028, |
| "learning_rate": 0.00019715664200707625, |
| "loss": 5.0771, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.14662379421221866, |
| "grad_norm": 0.7722617983818054, |
| "learning_rate": 0.00019714377613380509, |
| "loss": 4.9389, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.1472668810289389, |
| "grad_norm": 0.7367834448814392, |
| "learning_rate": 0.00019713091026053395, |
| "loss": 5.0164, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.14790996784565916, |
| "grad_norm": 0.6781874895095825, |
| "learning_rate": 0.0001971180443872628, |
| "loss": 5.0505, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.14855305466237942, |
| "grad_norm": 0.7370553016662598, |
| "learning_rate": 0.00019710517851399164, |
| "loss": 5.027, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.14919614147909968, |
| "grad_norm": 0.6999610662460327, |
| "learning_rate": 0.0001970923126407205, |
| "loss": 4.9715, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.14983922829581994, |
| "grad_norm": 0.7564293146133423, |
| "learning_rate": 0.00019707944676744936, |
| "loss": 4.9666, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.1504823151125402, |
| "grad_norm": 0.6723111271858215, |
| "learning_rate": 0.0001970665808941782, |
| "loss": 4.986, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.15112540192926044, |
| "grad_norm": 0.6594771146774292, |
| "learning_rate": 0.00019705371502090705, |
| "loss": 5.0235, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.1517684887459807, |
| "grad_norm": 0.681059718132019, |
| "learning_rate": 0.00019704084914763588, |
| "loss": 5.014, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.15241157556270096, |
| "grad_norm": 0.7105944156646729, |
| "learning_rate": 0.00019702798327436477, |
| "loss": 4.9888, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.15305466237942122, |
| "grad_norm": 0.676173210144043, |
| "learning_rate": 0.00019701511740109363, |
| "loss": 4.963, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1536977491961415, |
| "grad_norm": 0.6689148545265198, |
| "learning_rate": 0.00019700225152782246, |
| "loss": 5.0294, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.15434083601286175, |
| "grad_norm": 0.6836123466491699, |
| "learning_rate": 0.00019698938565455132, |
| "loss": 4.8988, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.15498392282958198, |
| "grad_norm": 0.7466531991958618, |
| "learning_rate": 0.00019697651978128016, |
| "loss": 4.7996, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.15562700964630224, |
| "grad_norm": 0.6778351664543152, |
| "learning_rate": 0.00019696365390800902, |
| "loss": 4.9041, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1562700964630225, |
| "grad_norm": 0.6739835739135742, |
| "learning_rate": 0.00019695078803473788, |
| "loss": 4.9133, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.15691318327974277, |
| "grad_norm": 0.7548195123672485, |
| "learning_rate": 0.0001969379221614667, |
| "loss": 4.8105, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.15755627009646303, |
| "grad_norm": 0.753843367099762, |
| "learning_rate": 0.00019692505628819557, |
| "loss": 4.8267, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.1581993569131833, |
| "grad_norm": 0.7508904933929443, |
| "learning_rate": 0.0001969121904149244, |
| "loss": 4.94, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.15884244372990353, |
| "grad_norm": 0.6692425012588501, |
| "learning_rate": 0.00019689932454165326, |
| "loss": 4.8711, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.1594855305466238, |
| "grad_norm": 0.7251532077789307, |
| "learning_rate": 0.00019688645866838212, |
| "loss": 4.9862, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.16012861736334405, |
| "grad_norm": 0.8498165011405945, |
| "learning_rate": 0.00019687359279511098, |
| "loss": 4.8864, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.1607717041800643, |
| "grad_norm": 0.6454371809959412, |
| "learning_rate": 0.00019686072692183984, |
| "loss": 4.7845, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.16141479099678457, |
| "grad_norm": 0.7783510088920593, |
| "learning_rate": 0.00019684786104856868, |
| "loss": 4.8436, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.16205787781350484, |
| "grad_norm": 0.754112720489502, |
| "learning_rate": 0.00019683499517529754, |
| "loss": 4.8208, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.16270096463022507, |
| "grad_norm": 0.6148940324783325, |
| "learning_rate": 0.0001968221293020264, |
| "loss": 4.8404, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.16334405144694533, |
| "grad_norm": 0.754871129989624, |
| "learning_rate": 0.00019680926342875523, |
| "loss": 4.8534, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.1639871382636656, |
| "grad_norm": 0.7887524962425232, |
| "learning_rate": 0.0001967963975554841, |
| "loss": 4.84, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.16463022508038586, |
| "grad_norm": 0.6893947124481201, |
| "learning_rate": 0.00019678353168221292, |
| "loss": 4.729, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.16527331189710612, |
| "grad_norm": 0.6434347629547119, |
| "learning_rate": 0.00019677066580894178, |
| "loss": 4.8239, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.16591639871382638, |
| "grad_norm": 0.650383472442627, |
| "learning_rate": 0.00019675779993567064, |
| "loss": 4.8329, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1665594855305466, |
| "grad_norm": 0.6655612587928772, |
| "learning_rate": 0.0001967449340623995, |
| "loss": 4.8266, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.16720257234726688, |
| "grad_norm": 0.6401621103286743, |
| "learning_rate": 0.00019673206818912836, |
| "loss": 4.7178, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.16784565916398714, |
| "grad_norm": 0.7202311754226685, |
| "learning_rate": 0.0001967192023158572, |
| "loss": 4.7191, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1684887459807074, |
| "grad_norm": 0.6817168593406677, |
| "learning_rate": 0.00019670633644258605, |
| "loss": 4.7444, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.16913183279742766, |
| "grad_norm": 0.7872809767723083, |
| "learning_rate": 0.00019669347056931491, |
| "loss": 4.7297, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.16977491961414792, |
| "grad_norm": 0.9827927947044373, |
| "learning_rate": 0.00019668060469604375, |
| "loss": 4.6757, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.17041800643086816, |
| "grad_norm": 0.743180513381958, |
| "learning_rate": 0.0001966677388227726, |
| "loss": 4.7413, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.17106109324758842, |
| "grad_norm": 0.9192097783088684, |
| "learning_rate": 0.00019665487294950147, |
| "loss": 4.7543, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.17170418006430868, |
| "grad_norm": 1.0367995500564575, |
| "learning_rate": 0.0001966420070762303, |
| "loss": 4.7055, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.17234726688102894, |
| "grad_norm": 0.6918021440505981, |
| "learning_rate": 0.00019662914120295916, |
| "loss": 4.7372, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1729903536977492, |
| "grad_norm": 1.148046851158142, |
| "learning_rate": 0.000196616275329688, |
| "loss": 4.7336, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.17363344051446947, |
| "grad_norm": 0.8208276629447937, |
| "learning_rate": 0.00019660340945641685, |
| "loss": 4.7057, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1742765273311897, |
| "grad_norm": 0.8050063848495483, |
| "learning_rate": 0.0001965905435831457, |
| "loss": 4.6625, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.17491961414790996, |
| "grad_norm": 0.9354422092437744, |
| "learning_rate": 0.00019657767770987457, |
| "loss": 4.6884, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.17556270096463023, |
| "grad_norm": 0.6692061424255371, |
| "learning_rate": 0.00019656481183660343, |
| "loss": 4.6549, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.1762057877813505, |
| "grad_norm": 1.0897949934005737, |
| "learning_rate": 0.00019655194596333227, |
| "loss": 4.6261, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.17684887459807075, |
| "grad_norm": 0.7366625666618347, |
| "learning_rate": 0.00019653908009006113, |
| "loss": 4.6679, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.17749196141479098, |
| "grad_norm": 0.8077712655067444, |
| "learning_rate": 0.00019652621421678999, |
| "loss": 4.6497, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.17813504823151124, |
| "grad_norm": 0.833954930305481, |
| "learning_rate": 0.00019651334834351882, |
| "loss": 4.6921, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1787781350482315, |
| "grad_norm": 0.7255340218544006, |
| "learning_rate": 0.00019650048247024768, |
| "loss": 4.5728, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.17942122186495177, |
| "grad_norm": 0.9826524257659912, |
| "learning_rate": 0.0001964876165969765, |
| "loss": 4.6316, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.18006430868167203, |
| "grad_norm": 0.6553508043289185, |
| "learning_rate": 0.00019647475072370537, |
| "loss": 4.6898, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1807073954983923, |
| "grad_norm": 0.7640435099601746, |
| "learning_rate": 0.00019646188485043423, |
| "loss": 4.67, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.18135048231511253, |
| "grad_norm": 0.8134769201278687, |
| "learning_rate": 0.0001964490189771631, |
| "loss": 4.5907, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.1819935691318328, |
| "grad_norm": 0.8734142780303955, |
| "learning_rate": 0.00019643615310389195, |
| "loss": 4.5712, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.18263665594855305, |
| "grad_norm": 0.75421541929245, |
| "learning_rate": 0.00019642328723062078, |
| "loss": 4.5915, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1832797427652733, |
| "grad_norm": 0.6870218515396118, |
| "learning_rate": 0.00019641042135734964, |
| "loss": 4.6099, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.18392282958199357, |
| "grad_norm": 0.754561722278595, |
| "learning_rate": 0.0001963975554840785, |
| "loss": 4.5375, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.18456591639871384, |
| "grad_norm": 0.6426610350608826, |
| "learning_rate": 0.00019638468961080734, |
| "loss": 4.5184, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.18520900321543407, |
| "grad_norm": 0.7033764123916626, |
| "learning_rate": 0.0001963718237375362, |
| "loss": 4.5226, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.18585209003215433, |
| "grad_norm": 0.683521032333374, |
| "learning_rate": 0.00019635895786426503, |
| "loss": 4.6466, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.1864951768488746, |
| "grad_norm": 0.7819412350654602, |
| "learning_rate": 0.0001963460919909939, |
| "loss": 4.5982, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.18713826366559486, |
| "grad_norm": 0.7259661555290222, |
| "learning_rate": 0.00019633322611772275, |
| "loss": 4.6051, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.18778135048231512, |
| "grad_norm": 0.7699325084686279, |
| "learning_rate": 0.00019632036024445158, |
| "loss": 4.5585, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.18842443729903538, |
| "grad_norm": 0.7889232039451599, |
| "learning_rate": 0.00019630749437118044, |
| "loss": 4.5264, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.18906752411575561, |
| "grad_norm": 0.6296926736831665, |
| "learning_rate": 0.0001962946284979093, |
| "loss": 4.4552, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.18971061093247588, |
| "grad_norm": 0.8028717041015625, |
| "learning_rate": 0.00019628176262463816, |
| "loss": 4.5493, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.19035369774919614, |
| "grad_norm": 0.655351996421814, |
| "learning_rate": 0.00019626889675136702, |
| "loss": 4.5392, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1909967845659164, |
| "grad_norm": 0.7414562702178955, |
| "learning_rate": 0.00019625603087809586, |
| "loss": 4.4959, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.19163987138263666, |
| "grad_norm": 0.7951900362968445, |
| "learning_rate": 0.00019624316500482472, |
| "loss": 4.4467, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.19228295819935692, |
| "grad_norm": 0.6390551328659058, |
| "learning_rate": 0.00019623029913155358, |
| "loss": 4.5044, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.19292604501607716, |
| "grad_norm": 0.9853672385215759, |
| "learning_rate": 0.0001962174332582824, |
| "loss": 4.4453, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.19356913183279742, |
| "grad_norm": 0.7224156260490417, |
| "learning_rate": 0.00019620456738501127, |
| "loss": 4.5398, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.19421221864951768, |
| "grad_norm": 0.6318510174751282, |
| "learning_rate": 0.0001961917015117401, |
| "loss": 4.5053, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.19485530546623794, |
| "grad_norm": 0.6725063920021057, |
| "learning_rate": 0.00019617883563846896, |
| "loss": 4.3985, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.1954983922829582, |
| "grad_norm": 0.5671389102935791, |
| "learning_rate": 0.00019616596976519782, |
| "loss": 4.4917, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.19614147909967847, |
| "grad_norm": 0.7262303233146667, |
| "learning_rate": 0.00019615310389192668, |
| "loss": 4.4999, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1967845659163987, |
| "grad_norm": 0.7654133439064026, |
| "learning_rate": 0.00019614023801865554, |
| "loss": 4.3863, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.19742765273311896, |
| "grad_norm": 0.6135159134864807, |
| "learning_rate": 0.00019612737214538437, |
| "loss": 4.4282, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.19807073954983923, |
| "grad_norm": 0.818827748298645, |
| "learning_rate": 0.00019611450627211323, |
| "loss": 4.3878, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.1987138263665595, |
| "grad_norm": 0.6961670517921448, |
| "learning_rate": 0.0001961016403988421, |
| "loss": 4.316, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.19935691318327975, |
| "grad_norm": 0.7855870127677917, |
| "learning_rate": 0.00019608877452557093, |
| "loss": 4.3459, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.7561706304550171, |
| "learning_rate": 0.0001960759086522998, |
| "loss": 4.4741, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.20064308681672025, |
| "grad_norm": 0.6723310351371765, |
| "learning_rate": 0.00019606304277902862, |
| "loss": 4.3894, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.2012861736334405, |
| "grad_norm": 0.8107967376708984, |
| "learning_rate": 0.00019605017690575748, |
| "loss": 4.4692, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.20192926045016077, |
| "grad_norm": 0.634990930557251, |
| "learning_rate": 0.00019603731103248634, |
| "loss": 4.4678, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.20257234726688103, |
| "grad_norm": 0.759669303894043, |
| "learning_rate": 0.00019602444515921517, |
| "loss": 4.358, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.2032154340836013, |
| "grad_norm": 0.651257336139679, |
| "learning_rate": 0.00019601157928594406, |
| "loss": 4.2959, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.20385852090032155, |
| "grad_norm": 0.9010285139083862, |
| "learning_rate": 0.0001959987134126729, |
| "loss": 4.4263, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.2045016077170418, |
| "grad_norm": 0.6414353251457214, |
| "learning_rate": 0.00019598584753940175, |
| "loss": 4.3515, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.20514469453376205, |
| "grad_norm": 1.069684624671936, |
| "learning_rate": 0.0001959729816661306, |
| "loss": 4.3952, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.2057877813504823, |
| "grad_norm": 0.9279118776321411, |
| "learning_rate": 0.00019596011579285945, |
| "loss": 4.3126, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.20643086816720257, |
| "grad_norm": 0.763932466506958, |
| "learning_rate": 0.0001959472499195883, |
| "loss": 4.3882, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.20707395498392284, |
| "grad_norm": 1.1243666410446167, |
| "learning_rate": 0.00019593438404631714, |
| "loss": 4.3228, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.2077170418006431, |
| "grad_norm": 0.7078732252120972, |
| "learning_rate": 0.000195921518173046, |
| "loss": 4.3726, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.20836012861736333, |
| "grad_norm": 0.9525623917579651, |
| "learning_rate": 0.00019590865229977486, |
| "loss": 4.3597, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.2090032154340836, |
| "grad_norm": 0.7362571954727173, |
| "learning_rate": 0.0001958957864265037, |
| "loss": 4.3049, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.20964630225080386, |
| "grad_norm": 0.8940659165382385, |
| "learning_rate": 0.00019588292055323255, |
| "loss": 4.2689, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.21028938906752412, |
| "grad_norm": 0.6892198324203491, |
| "learning_rate": 0.0001958700546799614, |
| "loss": 4.2664, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.21093247588424438, |
| "grad_norm": 0.8762657046318054, |
| "learning_rate": 0.00019585718880669027, |
| "loss": 4.3218, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.21157556270096464, |
| "grad_norm": 0.7590584754943848, |
| "learning_rate": 0.00019584432293341913, |
| "loss": 4.3964, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.21221864951768488, |
| "grad_norm": 0.7163234949111938, |
| "learning_rate": 0.00019583145706014796, |
| "loss": 4.3316, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.21286173633440514, |
| "grad_norm": 0.7926210761070251, |
| "learning_rate": 0.00019581859118687682, |
| "loss": 4.2691, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.2135048231511254, |
| "grad_norm": 0.5673405528068542, |
| "learning_rate": 0.00019580572531360568, |
| "loss": 4.3685, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.21414790996784566, |
| "grad_norm": 0.690924882888794, |
| "learning_rate": 0.00019579285944033452, |
| "loss": 4.3116, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.21479099678456592, |
| "grad_norm": 0.5892926454544067, |
| "learning_rate": 0.00019577999356706338, |
| "loss": 4.4015, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.21543408360128619, |
| "grad_norm": 0.640165388584137, |
| "learning_rate": 0.0001957671276937922, |
| "loss": 4.3069, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.21607717041800642, |
| "grad_norm": 0.6696978807449341, |
| "learning_rate": 0.00019575426182052107, |
| "loss": 4.28, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.21672025723472668, |
| "grad_norm": 0.5902023911476135, |
| "learning_rate": 0.00019574139594724993, |
| "loss": 4.2424, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.21736334405144694, |
| "grad_norm": 0.8078274726867676, |
| "learning_rate": 0.0001957285300739788, |
| "loss": 4.198, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.2180064308681672, |
| "grad_norm": 0.6141580939292908, |
| "learning_rate": 0.00019571566420070765, |
| "loss": 4.2204, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.21864951768488747, |
| "grad_norm": 0.8612291812896729, |
| "learning_rate": 0.00019570279832743648, |
| "loss": 4.2663, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.21929260450160773, |
| "grad_norm": 0.6782243251800537, |
| "learning_rate": 0.00019568993245416534, |
| "loss": 4.2699, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.21993569131832796, |
| "grad_norm": 0.6207177042961121, |
| "learning_rate": 0.0001956770665808942, |
| "loss": 4.2526, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.22057877813504823, |
| "grad_norm": 0.8165475726127625, |
| "learning_rate": 0.00019566420070762304, |
| "loss": 4.2142, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.2212218649517685, |
| "grad_norm": 0.6984496116638184, |
| "learning_rate": 0.0001956513348343519, |
| "loss": 4.3011, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.22186495176848875, |
| "grad_norm": 0.5956530570983887, |
| "learning_rate": 0.00019563846896108073, |
| "loss": 4.3658, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.222508038585209, |
| "grad_norm": 0.5854830145835876, |
| "learning_rate": 0.0001956256030878096, |
| "loss": 4.2409, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.22315112540192927, |
| "grad_norm": 0.6498653888702393, |
| "learning_rate": 0.00019561273721453845, |
| "loss": 4.3254, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.2237942122186495, |
| "grad_norm": 0.7944484949111938, |
| "learning_rate": 0.00019559987134126728, |
| "loss": 4.2179, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.22443729903536977, |
| "grad_norm": 0.7387772798538208, |
| "learning_rate": 0.00019558700546799614, |
| "loss": 4.1393, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.22508038585209003, |
| "grad_norm": 0.8723940253257751, |
| "learning_rate": 0.000195574139594725, |
| "loss": 4.0767, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2257234726688103, |
| "grad_norm": 0.7291579842567444, |
| "learning_rate": 0.00019556127372145386, |
| "loss": 4.1707, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.22636655948553056, |
| "grad_norm": 0.688996434211731, |
| "learning_rate": 0.00019554840784818272, |
| "loss": 4.3018, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.22700964630225082, |
| "grad_norm": 0.7762923240661621, |
| "learning_rate": 0.00019553554197491155, |
| "loss": 4.1497, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.22765273311897105, |
| "grad_norm": 0.6835098266601562, |
| "learning_rate": 0.00019552267610164041, |
| "loss": 4.1906, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.2282958199356913, |
| "grad_norm": 0.8440699577331543, |
| "learning_rate": 0.00019550981022836925, |
| "loss": 4.2099, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.22893890675241158, |
| "grad_norm": 0.8429433107376099, |
| "learning_rate": 0.0001954969443550981, |
| "loss": 4.2266, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.22958199356913184, |
| "grad_norm": 0.6459960341453552, |
| "learning_rate": 0.00019548407848182697, |
| "loss": 4.2333, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.2302250803858521, |
| "grad_norm": 0.9635197520256042, |
| "learning_rate": 0.0001954712126085558, |
| "loss": 4.1193, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.23086816720257236, |
| "grad_norm": 0.6429690718650818, |
| "learning_rate": 0.00019545834673528466, |
| "loss": 4.1826, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.2315112540192926, |
| "grad_norm": 0.8262260556221008, |
| "learning_rate": 0.00019544548086201352, |
| "loss": 4.1671, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23215434083601286, |
| "grad_norm": 0.6780098676681519, |
| "learning_rate": 0.00019543261498874238, |
| "loss": 4.2259, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.23279742765273312, |
| "grad_norm": 0.7936576008796692, |
| "learning_rate": 0.00019541974911547124, |
| "loss": 4.0532, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.23344051446945338, |
| "grad_norm": 0.8148082494735718, |
| "learning_rate": 0.00019540688324220007, |
| "loss": 4.2186, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.23408360128617364, |
| "grad_norm": 0.9641815423965454, |
| "learning_rate": 0.00019539401736892893, |
| "loss": 4.1883, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.2347266881028939, |
| "grad_norm": 0.8979797959327698, |
| "learning_rate": 0.0001953811514956578, |
| "loss": 4.0844, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.23536977491961414, |
| "grad_norm": 0.9056326746940613, |
| "learning_rate": 0.00019536828562238663, |
| "loss": 4.1042, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.2360128617363344, |
| "grad_norm": 1.0736181735992432, |
| "learning_rate": 0.00019535541974911549, |
| "loss": 4.1356, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.23665594855305466, |
| "grad_norm": 0.8122031688690186, |
| "learning_rate": 0.00019534255387584432, |
| "loss": 4.096, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.23729903536977492, |
| "grad_norm": 1.0321471691131592, |
| "learning_rate": 0.00019532968800257318, |
| "loss": 4.0537, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.2379421221864952, |
| "grad_norm": 0.6910458207130432, |
| "learning_rate": 0.00019531682212930204, |
| "loss": 3.9984, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.23858520900321542, |
| "grad_norm": 0.9629825949668884, |
| "learning_rate": 0.00019530395625603087, |
| "loss": 4.1706, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.23922829581993568, |
| "grad_norm": 0.7162978649139404, |
| "learning_rate": 0.00019529109038275973, |
| "loss": 4.2802, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.23987138263665594, |
| "grad_norm": 0.7020505666732788, |
| "learning_rate": 0.0001952782245094886, |
| "loss": 4.1238, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.2405144694533762, |
| "grad_norm": 1.0391113758087158, |
| "learning_rate": 0.00019526535863621745, |
| "loss": 4.1262, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.24115755627009647, |
| "grad_norm": 0.6830191016197205, |
| "learning_rate": 0.0001952524927629463, |
| "loss": 4.0751, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.24180064308681673, |
| "grad_norm": 0.9518135786056519, |
| "learning_rate": 0.00019523962688967515, |
| "loss": 4.0223, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.24244372990353696, |
| "grad_norm": 0.9503171443939209, |
| "learning_rate": 0.000195226761016404, |
| "loss": 4.1063, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.24308681672025723, |
| "grad_norm": 0.7507718205451965, |
| "learning_rate": 0.00019521389514313284, |
| "loss": 4.0348, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2437299035369775, |
| "grad_norm": 0.8583810329437256, |
| "learning_rate": 0.0001952010292698617, |
| "loss": 4.0744, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.24437299035369775, |
| "grad_norm": 0.9669693112373352, |
| "learning_rate": 0.00019518816339659056, |
| "loss": 4.0018, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.245016077170418, |
| "grad_norm": 0.8933892846107483, |
| "learning_rate": 0.0001951752975233194, |
| "loss": 3.9325, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.24565916398713827, |
| "grad_norm": 0.9541077613830566, |
| "learning_rate": 0.00019516243165004825, |
| "loss": 3.9337, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.2463022508038585, |
| "grad_norm": 0.9491611123085022, |
| "learning_rate": 0.0001951495657767771, |
| "loss": 3.9845, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.24694533762057877, |
| "grad_norm": 0.8005610108375549, |
| "learning_rate": 0.00019513669990350597, |
| "loss": 4.0319, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.24758842443729903, |
| "grad_norm": 0.8354764580726624, |
| "learning_rate": 0.00019512383403023483, |
| "loss": 4.0569, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2482315112540193, |
| "grad_norm": 0.6670393943786621, |
| "learning_rate": 0.00019511096815696366, |
| "loss": 4.1072, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.24887459807073956, |
| "grad_norm": 0.7420167922973633, |
| "learning_rate": 0.00019509810228369252, |
| "loss": 3.9871, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.24951768488745982, |
| "grad_norm": 0.8710854649543762, |
| "learning_rate": 0.00019508523641042136, |
| "loss": 4.0891, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.25016077170418005, |
| "grad_norm": 0.7787466049194336, |
| "learning_rate": 0.00019507237053715022, |
| "loss": 3.9738, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.2508038585209003, |
| "grad_norm": 0.8813666105270386, |
| "learning_rate": 0.00019505950466387908, |
| "loss": 4.0161, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2514469453376206, |
| "grad_norm": 0.7082841992378235, |
| "learning_rate": 0.0001950466387906079, |
| "loss": 4.0657, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.25209003215434084, |
| "grad_norm": 0.9131672382354736, |
| "learning_rate": 0.00019503377291733677, |
| "loss": 3.8768, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.2527331189710611, |
| "grad_norm": 0.7093556523323059, |
| "learning_rate": 0.0001950209070440656, |
| "loss": 3.9413, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.25337620578778136, |
| "grad_norm": 0.6677314639091492, |
| "learning_rate": 0.00019500804117079446, |
| "loss": 4.06, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2540192926045016, |
| "grad_norm": 0.7473803758621216, |
| "learning_rate": 0.00019499517529752332, |
| "loss": 4.0565, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.2546623794212219, |
| "grad_norm": 0.7619471549987793, |
| "learning_rate": 0.00019498230942425218, |
| "loss": 3.993, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.25530546623794215, |
| "grad_norm": 0.6803014278411865, |
| "learning_rate": 0.00019496944355098104, |
| "loss": 4.0404, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.25594855305466235, |
| "grad_norm": 0.7883164882659912, |
| "learning_rate": 0.00019495657767770988, |
| "loss": 3.9816, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.2565916398713826, |
| "grad_norm": 0.6208122968673706, |
| "learning_rate": 0.00019494371180443874, |
| "loss": 3.9774, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2572347266881029, |
| "grad_norm": 0.7383295893669128, |
| "learning_rate": 0.0001949308459311676, |
| "loss": 3.9462, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.25787781350482314, |
| "grad_norm": 0.7095021605491638, |
| "learning_rate": 0.00019491798005789643, |
| "loss": 4.0581, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.2585209003215434, |
| "grad_norm": 0.7832754254341125, |
| "learning_rate": 0.0001949051141846253, |
| "loss": 3.828, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.25916398713826366, |
| "grad_norm": 0.7597861289978027, |
| "learning_rate": 0.00019489224831135415, |
| "loss": 4.018, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.2598070739549839, |
| "grad_norm": 0.9110789895057678, |
| "learning_rate": 0.00019487938243808298, |
| "loss": 3.8421, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2604501607717042, |
| "grad_norm": 0.6640086770057678, |
| "learning_rate": 0.00019486651656481184, |
| "loss": 3.9569, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.26109324758842445, |
| "grad_norm": 0.6850053668022156, |
| "learning_rate": 0.0001948536506915407, |
| "loss": 3.9652, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.2617363344051447, |
| "grad_norm": 0.7008517980575562, |
| "learning_rate": 0.00019484078481826956, |
| "loss": 3.9146, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.262379421221865, |
| "grad_norm": 0.719175398349762, |
| "learning_rate": 0.00019482791894499842, |
| "loss": 3.9555, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.26302250803858523, |
| "grad_norm": 0.6538800001144409, |
| "learning_rate": 0.00019481505307172725, |
| "loss": 3.9122, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.26366559485530544, |
| "grad_norm": 0.7621102929115295, |
| "learning_rate": 0.00019480218719845611, |
| "loss": 4.0086, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2643086816720257, |
| "grad_norm": 0.7248497605323792, |
| "learning_rate": 0.00019478932132518495, |
| "loss": 3.9794, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.26495176848874596, |
| "grad_norm": 0.8252344131469727, |
| "learning_rate": 0.0001947764554519138, |
| "loss": 3.9337, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2655948553054662, |
| "grad_norm": 0.8331144452095032, |
| "learning_rate": 0.00019476358957864267, |
| "loss": 3.9903, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.2662379421221865, |
| "grad_norm": 0.8301029801368713, |
| "learning_rate": 0.0001947507237053715, |
| "loss": 3.944, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.26688102893890675, |
| "grad_norm": 0.739193856716156, |
| "learning_rate": 0.00019473785783210036, |
| "loss": 3.9245, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.267524115755627, |
| "grad_norm": 0.7821977734565735, |
| "learning_rate": 0.0001947249919588292, |
| "loss": 3.9548, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2681672025723473, |
| "grad_norm": 0.8310061097145081, |
| "learning_rate": 0.00019471212608555808, |
| "loss": 3.9038, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.26881028938906754, |
| "grad_norm": 0.6713045835494995, |
| "learning_rate": 0.00019469926021228694, |
| "loss": 3.9723, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2694533762057878, |
| "grad_norm": 1.0380114316940308, |
| "learning_rate": 0.00019468639433901577, |
| "loss": 3.8221, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.27009646302250806, |
| "grad_norm": 0.8434408903121948, |
| "learning_rate": 0.00019467352846574463, |
| "loss": 4.0023, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2707395498392283, |
| "grad_norm": 0.9480482339859009, |
| "learning_rate": 0.00019466066259247347, |
| "loss": 3.9496, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.27138263665594853, |
| "grad_norm": 0.869701623916626, |
| "learning_rate": 0.00019464779671920233, |
| "loss": 3.9507, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.2720257234726688, |
| "grad_norm": 0.9808540940284729, |
| "learning_rate": 0.00019463493084593119, |
| "loss": 3.8506, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.27266881028938905, |
| "grad_norm": 0.9635753035545349, |
| "learning_rate": 0.00019462206497266002, |
| "loss": 3.9128, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2733118971061093, |
| "grad_norm": 0.8993518352508545, |
| "learning_rate": 0.00019460919909938888, |
| "loss": 3.8746, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.2739549839228296, |
| "grad_norm": 1.05186927318573, |
| "learning_rate": 0.0001945963332261177, |
| "loss": 4.0274, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.27459807073954984, |
| "grad_norm": 1.055508017539978, |
| "learning_rate": 0.00019458346735284657, |
| "loss": 3.9418, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2752411575562701, |
| "grad_norm": 1.0845950841903687, |
| "learning_rate": 0.00019457060147957543, |
| "loss": 3.8926, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.27588424437299036, |
| "grad_norm": 0.9033150672912598, |
| "learning_rate": 0.0001945577356063043, |
| "loss": 3.8776, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2765273311897106, |
| "grad_norm": 0.9415053129196167, |
| "learning_rate": 0.00019454486973303315, |
| "loss": 3.9336, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2771704180064309, |
| "grad_norm": 1.02390456199646, |
| "learning_rate": 0.00019453200385976198, |
| "loss": 3.7793, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.27781350482315115, |
| "grad_norm": 0.9444904327392578, |
| "learning_rate": 0.00019451913798649084, |
| "loss": 3.9075, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2784565916398714, |
| "grad_norm": 1.0796852111816406, |
| "learning_rate": 0.0001945062721132197, |
| "loss": 3.8586, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2790996784565916, |
| "grad_norm": 0.8367241621017456, |
| "learning_rate": 0.00019449340623994854, |
| "loss": 3.93, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.2797427652733119, |
| "grad_norm": 0.9733452796936035, |
| "learning_rate": 0.0001944805403666774, |
| "loss": 3.7786, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.28038585209003214, |
| "grad_norm": 0.7164201736450195, |
| "learning_rate": 0.00019446767449340626, |
| "loss": 3.8682, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.2810289389067524, |
| "grad_norm": 0.959409236907959, |
| "learning_rate": 0.0001944548086201351, |
| "loss": 3.8415, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.28167202572347266, |
| "grad_norm": 0.9369955658912659, |
| "learning_rate": 0.00019444194274686395, |
| "loss": 3.8418, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2823151125401929, |
| "grad_norm": 1.112101435661316, |
| "learning_rate": 0.0001944290768735928, |
| "loss": 3.8607, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2829581993569132, |
| "grad_norm": 0.9493166208267212, |
| "learning_rate": 0.00019441621100032167, |
| "loss": 3.9228, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.28360128617363345, |
| "grad_norm": 0.9076783061027527, |
| "learning_rate": 0.00019440334512705053, |
| "loss": 3.8624, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2842443729903537, |
| "grad_norm": 0.9614561200141907, |
| "learning_rate": 0.00019439047925377936, |
| "loss": 3.9938, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.284887459807074, |
| "grad_norm": 0.9035642743110657, |
| "learning_rate": 0.00019437761338050822, |
| "loss": 3.8205, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.28553054662379423, |
| "grad_norm": 0.8883315324783325, |
| "learning_rate": 0.00019436474750723706, |
| "loss": 3.8716, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2861736334405145, |
| "grad_norm": 0.7665774822235107, |
| "learning_rate": 0.00019435188163396592, |
| "loss": 3.8546, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.2868167202572347, |
| "grad_norm": 0.8760365843772888, |
| "learning_rate": 0.00019433901576069478, |
| "loss": 3.8498, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.28745980707395496, |
| "grad_norm": 0.9582746624946594, |
| "learning_rate": 0.0001943261498874236, |
| "loss": 3.7724, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.2881028938906752, |
| "grad_norm": 0.7356158494949341, |
| "learning_rate": 0.00019431328401415247, |
| "loss": 3.8751, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.2887459807073955, |
| "grad_norm": 0.8500118851661682, |
| "learning_rate": 0.0001943004181408813, |
| "loss": 3.8485, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.28938906752411575, |
| "grad_norm": 1.0538520812988281, |
| "learning_rate": 0.00019428755226761016, |
| "loss": 3.8594, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.290032154340836, |
| "grad_norm": 0.7139402031898499, |
| "learning_rate": 0.00019427468639433902, |
| "loss": 3.6391, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.2906752411575563, |
| "grad_norm": 1.1346955299377441, |
| "learning_rate": 0.00019426182052106788, |
| "loss": 3.8601, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.29131832797427654, |
| "grad_norm": 0.8908848762512207, |
| "learning_rate": 0.00019424895464779674, |
| "loss": 3.7756, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.2919614147909968, |
| "grad_norm": 0.9202871918678284, |
| "learning_rate": 0.00019423608877452557, |
| "loss": 3.7043, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.29260450160771706, |
| "grad_norm": 1.1041529178619385, |
| "learning_rate": 0.00019422322290125443, |
| "loss": 3.8593, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2932475884244373, |
| "grad_norm": 0.8448892831802368, |
| "learning_rate": 0.0001942103570279833, |
| "loss": 3.7343, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2938906752411576, |
| "grad_norm": 1.2526445388793945, |
| "learning_rate": 0.00019419749115471213, |
| "loss": 3.7036, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2945337620578778, |
| "grad_norm": 0.9721910953521729, |
| "learning_rate": 0.000194184625281441, |
| "loss": 3.8348, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.29517684887459805, |
| "grad_norm": 1.0412633419036865, |
| "learning_rate": 0.00019417175940816982, |
| "loss": 3.7733, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2958199356913183, |
| "grad_norm": 1.1480042934417725, |
| "learning_rate": 0.00019415889353489868, |
| "loss": 3.8188, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2964630225080386, |
| "grad_norm": 0.8680059909820557, |
| "learning_rate": 0.00019414602766162754, |
| "loss": 3.8395, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.29710610932475884, |
| "grad_norm": 1.0635613203048706, |
| "learning_rate": 0.0001941331617883564, |
| "loss": 3.7715, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2977491961414791, |
| "grad_norm": 1.0242252349853516, |
| "learning_rate": 0.00019412029591508526, |
| "loss": 3.7702, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.29839228295819936, |
| "grad_norm": 0.841439962387085, |
| "learning_rate": 0.0001941074300418141, |
| "loss": 3.7487, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2990353697749196, |
| "grad_norm": 1.0437833070755005, |
| "learning_rate": 0.00019409456416854295, |
| "loss": 3.8626, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.2996784565916399, |
| "grad_norm": 0.9704720377922058, |
| "learning_rate": 0.0001940816982952718, |
| "loss": 3.7011, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.30032154340836015, |
| "grad_norm": 1.080511212348938, |
| "learning_rate": 0.00019406883242200065, |
| "loss": 3.7024, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.3009646302250804, |
| "grad_norm": 1.121840000152588, |
| "learning_rate": 0.0001940559665487295, |
| "loss": 3.7532, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.3016077170418006, |
| "grad_norm": 0.9904775023460388, |
| "learning_rate": 0.00019404310067545837, |
| "loss": 3.7648, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.3022508038585209, |
| "grad_norm": 1.0320771932601929, |
| "learning_rate": 0.0001940302348021872, |
| "loss": 3.6929, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.30289389067524114, |
| "grad_norm": 1.1518800258636475, |
| "learning_rate": 0.00019401736892891606, |
| "loss": 3.8161, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.3035369774919614, |
| "grad_norm": 0.9655705094337463, |
| "learning_rate": 0.0001940045030556449, |
| "loss": 3.7359, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.30418006430868166, |
| "grad_norm": 0.9549182653427124, |
| "learning_rate": 0.00019399163718237375, |
| "loss": 3.7, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.3048231511254019, |
| "grad_norm": 0.914478063583374, |
| "learning_rate": 0.0001939787713091026, |
| "loss": 3.7204, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.3054662379421222, |
| "grad_norm": 0.8414446711540222, |
| "learning_rate": 0.00019396590543583147, |
| "loss": 3.8886, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.30610932475884245, |
| "grad_norm": 1.0151910781860352, |
| "learning_rate": 0.00019395303956256033, |
| "loss": 3.7341, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.3067524115755627, |
| "grad_norm": 0.9511162042617798, |
| "learning_rate": 0.00019394017368928916, |
| "loss": 3.6728, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.307395498392283, |
| "grad_norm": 0.8905565142631531, |
| "learning_rate": 0.00019392730781601802, |
| "loss": 3.7416, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.30803858520900324, |
| "grad_norm": 0.9920605421066284, |
| "learning_rate": 0.00019391444194274688, |
| "loss": 3.7493, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.3086816720257235, |
| "grad_norm": 0.7968128323554993, |
| "learning_rate": 0.00019390157606947572, |
| "loss": 3.7592, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3093247588424437, |
| "grad_norm": 1.2561795711517334, |
| "learning_rate": 0.00019388871019620458, |
| "loss": 3.7363, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.30996784565916397, |
| "grad_norm": 0.8600074648857117, |
| "learning_rate": 0.0001938758443229334, |
| "loss": 3.7102, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.3106109324758842, |
| "grad_norm": 0.9748154282569885, |
| "learning_rate": 0.00019386297844966227, |
| "loss": 3.8, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.3112540192926045, |
| "grad_norm": 1.0293912887573242, |
| "learning_rate": 0.00019385011257639113, |
| "loss": 3.6432, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.31189710610932475, |
| "grad_norm": 0.7619652152061462, |
| "learning_rate": 0.00019383724670312, |
| "loss": 3.6905, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.312540192926045, |
| "grad_norm": 1.2902241945266724, |
| "learning_rate": 0.00019382438082984885, |
| "loss": 3.7061, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.3131832797427653, |
| "grad_norm": 0.8420130610466003, |
| "learning_rate": 0.00019381151495657768, |
| "loss": 3.7971, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.31382636655948554, |
| "grad_norm": 1.0442159175872803, |
| "learning_rate": 0.00019379864908330654, |
| "loss": 3.6643, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.3144694533762058, |
| "grad_norm": 0.7481032609939575, |
| "learning_rate": 0.0001937857832100354, |
| "loss": 3.6, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.31511254019292606, |
| "grad_norm": 0.8739355802536011, |
| "learning_rate": 0.00019377291733676424, |
| "loss": 3.6363, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3157556270096463, |
| "grad_norm": 0.7196866273880005, |
| "learning_rate": 0.0001937600514634931, |
| "loss": 3.7621, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.3163987138263666, |
| "grad_norm": 0.7693118453025818, |
| "learning_rate": 0.00019374718559022193, |
| "loss": 3.7453, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.3170418006430868, |
| "grad_norm": 0.8000239729881287, |
| "learning_rate": 0.0001937343197169508, |
| "loss": 3.697, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.31768488745980705, |
| "grad_norm": 0.6940252780914307, |
| "learning_rate": 0.00019372145384367965, |
| "loss": 3.6958, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.3183279742765273, |
| "grad_norm": 0.8711187243461609, |
| "learning_rate": 0.00019370858797040848, |
| "loss": 3.8204, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.3189710610932476, |
| "grad_norm": 0.8630790114402771, |
| "learning_rate": 0.00019369572209713737, |
| "loss": 3.6836, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.31961414790996784, |
| "grad_norm": 0.8443520665168762, |
| "learning_rate": 0.0001936828562238662, |
| "loss": 3.7011, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3202572347266881, |
| "grad_norm": 0.9406055808067322, |
| "learning_rate": 0.00019366999035059506, |
| "loss": 3.6641, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.32090032154340836, |
| "grad_norm": 0.7495781779289246, |
| "learning_rate": 0.00019365712447732392, |
| "loss": 3.6973, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.3215434083601286, |
| "grad_norm": 0.7676818370819092, |
| "learning_rate": 0.00019364425860405275, |
| "loss": 3.7072, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3221864951768489, |
| "grad_norm": 0.9088597893714905, |
| "learning_rate": 0.00019363139273078161, |
| "loss": 3.7242, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.32282958199356915, |
| "grad_norm": 0.8267994523048401, |
| "learning_rate": 0.00019361852685751047, |
| "loss": 3.5553, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.3234726688102894, |
| "grad_norm": 0.7666113376617432, |
| "learning_rate": 0.0001936056609842393, |
| "loss": 3.5928, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.32411575562700967, |
| "grad_norm": 0.7497265338897705, |
| "learning_rate": 0.00019359279511096817, |
| "loss": 3.673, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.3247588424437299, |
| "grad_norm": 0.925579845905304, |
| "learning_rate": 0.000193579929237697, |
| "loss": 3.6482, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.32540192926045014, |
| "grad_norm": 1.0170692205429077, |
| "learning_rate": 0.00019356706336442586, |
| "loss": 3.6582, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.3260450160771704, |
| "grad_norm": 0.8777898550033569, |
| "learning_rate": 0.00019355419749115472, |
| "loss": 3.6206, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.32668810289389066, |
| "grad_norm": 0.907271683216095, |
| "learning_rate": 0.00019354133161788358, |
| "loss": 3.6647, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.3273311897106109, |
| "grad_norm": 1.1781278848648071, |
| "learning_rate": 0.00019352846574461244, |
| "loss": 3.6539, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3279742765273312, |
| "grad_norm": 0.8342487215995789, |
| "learning_rate": 0.00019351559987134127, |
| "loss": 3.6976, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.32861736334405145, |
| "grad_norm": 1.1345239877700806, |
| "learning_rate": 0.00019350273399807013, |
| "loss": 3.7446, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.3292604501607717, |
| "grad_norm": 1.1331250667572021, |
| "learning_rate": 0.000193489868124799, |
| "loss": 3.6652, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.329903536977492, |
| "grad_norm": 0.9640575647354126, |
| "learning_rate": 0.00019347700225152783, |
| "loss": 3.7036, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.33054662379421224, |
| "grad_norm": 0.947372555732727, |
| "learning_rate": 0.00019346413637825669, |
| "loss": 3.7328, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3311897106109325, |
| "grad_norm": 0.9356391429901123, |
| "learning_rate": 0.00019345127050498552, |
| "loss": 3.6027, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.33183279742765276, |
| "grad_norm": 0.8815325498580933, |
| "learning_rate": 0.00019343840463171438, |
| "loss": 3.69, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.33247588424437297, |
| "grad_norm": 0.9109652042388916, |
| "learning_rate": 0.00019342553875844324, |
| "loss": 3.7513, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.3331189710610932, |
| "grad_norm": 0.9772228002548218, |
| "learning_rate": 0.0001934126728851721, |
| "loss": 3.6968, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.3337620578778135, |
| "grad_norm": 0.992035448551178, |
| "learning_rate": 0.00019339980701190096, |
| "loss": 3.604, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.33440514469453375, |
| "grad_norm": 0.9157900810241699, |
| "learning_rate": 0.0001933869411386298, |
| "loss": 3.6083, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.335048231511254, |
| "grad_norm": 1.0214614868164062, |
| "learning_rate": 0.00019337407526535865, |
| "loss": 3.62, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.3356913183279743, |
| "grad_norm": 0.9241607785224915, |
| "learning_rate": 0.0001933612093920875, |
| "loss": 3.6061, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.33633440514469454, |
| "grad_norm": 1.1566587686538696, |
| "learning_rate": 0.00019334834351881634, |
| "loss": 3.6324, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.3369774919614148, |
| "grad_norm": 0.8433347940444946, |
| "learning_rate": 0.0001933354776455452, |
| "loss": 3.548, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.33762057877813506, |
| "grad_norm": 0.9520887136459351, |
| "learning_rate": 0.00019332261177227404, |
| "loss": 3.6148, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.3382636655948553, |
| "grad_norm": 0.974737823009491, |
| "learning_rate": 0.0001933097458990029, |
| "loss": 3.6365, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3389067524115756, |
| "grad_norm": 0.8349432945251465, |
| "learning_rate": 0.00019329688002573176, |
| "loss": 3.6434, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.33954983922829585, |
| "grad_norm": 1.0445458889007568, |
| "learning_rate": 0.0001932840141524606, |
| "loss": 3.4959, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.34019292604501605, |
| "grad_norm": 0.7769454121589661, |
| "learning_rate": 0.00019327114827918945, |
| "loss": 3.5066, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.3408360128617363, |
| "grad_norm": 0.8699477910995483, |
| "learning_rate": 0.0001932582824059183, |
| "loss": 3.683, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3414790996784566, |
| "grad_norm": 0.834698498249054, |
| "learning_rate": 0.00019324541653264717, |
| "loss": 3.6224, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.34212218649517684, |
| "grad_norm": 0.8166964054107666, |
| "learning_rate": 0.00019323255065937603, |
| "loss": 3.7364, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3427652733118971, |
| "grad_norm": 0.8357145190238953, |
| "learning_rate": 0.00019321968478610486, |
| "loss": 3.5562, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.34340836012861736, |
| "grad_norm": 0.9249911904335022, |
| "learning_rate": 0.00019320681891283372, |
| "loss": 3.5898, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.3440514469453376, |
| "grad_norm": 0.7656811475753784, |
| "learning_rate": 0.00019319395303956256, |
| "loss": 3.5595, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.3446945337620579, |
| "grad_norm": 0.892476499080658, |
| "learning_rate": 0.00019318108716629142, |
| "loss": 3.681, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.34533762057877815, |
| "grad_norm": 0.8413558006286621, |
| "learning_rate": 0.00019316822129302028, |
| "loss": 3.7022, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3459807073954984, |
| "grad_norm": 0.7751324772834778, |
| "learning_rate": 0.0001931553554197491, |
| "loss": 3.5837, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.34662379421221867, |
| "grad_norm": 0.8419694304466248, |
| "learning_rate": 0.00019314248954647797, |
| "loss": 3.6111, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.34726688102893893, |
| "grad_norm": 1.005031943321228, |
| "learning_rate": 0.00019312962367320683, |
| "loss": 3.5626, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.34790996784565914, |
| "grad_norm": 0.9872586131095886, |
| "learning_rate": 0.0001931167577999357, |
| "loss": 3.5682, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.3485530546623794, |
| "grad_norm": 0.8768367767333984, |
| "learning_rate": 0.00019310389192666455, |
| "loss": 3.6085, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.34919614147909966, |
| "grad_norm": 0.9605547189712524, |
| "learning_rate": 0.00019309102605339338, |
| "loss": 3.6829, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.3498392282958199, |
| "grad_norm": 0.9464700222015381, |
| "learning_rate": 0.00019307816018012224, |
| "loss": 3.6298, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3504823151125402, |
| "grad_norm": 0.863048791885376, |
| "learning_rate": 0.0001930652943068511, |
| "loss": 3.4854, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.35112540192926045, |
| "grad_norm": 1.1198543310165405, |
| "learning_rate": 0.00019305242843357993, |
| "loss": 3.4904, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3517684887459807, |
| "grad_norm": 0.8883192539215088, |
| "learning_rate": 0.0001930395625603088, |
| "loss": 3.555, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.352411575562701, |
| "grad_norm": 0.9818452596664429, |
| "learning_rate": 0.00019302669668703763, |
| "loss": 3.5089, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.35305466237942124, |
| "grad_norm": 0.9535132646560669, |
| "learning_rate": 0.0001930138308137665, |
| "loss": 3.4332, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.3536977491961415, |
| "grad_norm": 1.1151210069656372, |
| "learning_rate": 0.00019300096494049535, |
| "loss": 3.5434, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.35434083601286176, |
| "grad_norm": 1.3266632556915283, |
| "learning_rate": 0.00019298809906722418, |
| "loss": 3.5069, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.35498392282958197, |
| "grad_norm": 1.041902780532837, |
| "learning_rate": 0.00019297523319395304, |
| "loss": 3.5696, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.35562700964630223, |
| "grad_norm": 1.1225471496582031, |
| "learning_rate": 0.0001929623673206819, |
| "loss": 3.6051, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3562700964630225, |
| "grad_norm": 1.0280983448028564, |
| "learning_rate": 0.00019294950144741076, |
| "loss": 3.5618, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.35691318327974275, |
| "grad_norm": 1.1007379293441772, |
| "learning_rate": 0.00019293663557413962, |
| "loss": 3.6334, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.357556270096463, |
| "grad_norm": 1.260547161102295, |
| "learning_rate": 0.00019292376970086845, |
| "loss": 3.5079, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3581993569131833, |
| "grad_norm": 0.7994619607925415, |
| "learning_rate": 0.0001929109038275973, |
| "loss": 3.4662, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.35884244372990354, |
| "grad_norm": 1.1786779165267944, |
| "learning_rate": 0.00019289803795432615, |
| "loss": 3.5494, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.3594855305466238, |
| "grad_norm": 1.115881085395813, |
| "learning_rate": 0.000192885172081055, |
| "loss": 3.58, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.36012861736334406, |
| "grad_norm": 1.2896467447280884, |
| "learning_rate": 0.00019287230620778387, |
| "loss": 3.5807, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3607717041800643, |
| "grad_norm": 1.0027967691421509, |
| "learning_rate": 0.0001928594403345127, |
| "loss": 3.56, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.3614147909967846, |
| "grad_norm": 1.3239651918411255, |
| "learning_rate": 0.00019284657446124156, |
| "loss": 3.4984, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.36205787781350485, |
| "grad_norm": 1.21293044090271, |
| "learning_rate": 0.00019283370858797042, |
| "loss": 3.5481, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.36270096463022505, |
| "grad_norm": 0.9017782807350159, |
| "learning_rate": 0.00019282084271469928, |
| "loss": 3.5069, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.3633440514469453, |
| "grad_norm": 1.3459876775741577, |
| "learning_rate": 0.00019280797684142814, |
| "loss": 3.5389, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3639871382636656, |
| "grad_norm": 1.2981561422348022, |
| "learning_rate": 0.00019279511096815697, |
| "loss": 3.4336, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.36463022508038584, |
| "grad_norm": 1.0287895202636719, |
| "learning_rate": 0.00019278224509488583, |
| "loss": 3.5067, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.3652733118971061, |
| "grad_norm": 1.131381630897522, |
| "learning_rate": 0.00019276937922161467, |
| "loss": 3.5079, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.36591639871382636, |
| "grad_norm": 0.9513139724731445, |
| "learning_rate": 0.00019275651334834353, |
| "loss": 3.5221, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3665594855305466, |
| "grad_norm": 0.9313328266143799, |
| "learning_rate": 0.00019274364747507239, |
| "loss": 3.5944, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3672025723472669, |
| "grad_norm": 0.8763944506645203, |
| "learning_rate": 0.00019273078160180122, |
| "loss": 3.535, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.36784565916398715, |
| "grad_norm": 1.1983128786087036, |
| "learning_rate": 0.00019271791572853008, |
| "loss": 3.6055, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.3684887459807074, |
| "grad_norm": 0.8382171392440796, |
| "learning_rate": 0.00019270504985525894, |
| "loss": 3.4601, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3691318327974277, |
| "grad_norm": 0.8634599447250366, |
| "learning_rate": 0.00019269218398198777, |
| "loss": 3.3993, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.36977491961414793, |
| "grad_norm": 1.220293402671814, |
| "learning_rate": 0.00019267931810871666, |
| "loss": 3.4932, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.37041800643086814, |
| "grad_norm": 1.0614768266677856, |
| "learning_rate": 0.0001926664522354455, |
| "loss": 3.5723, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3710610932475884, |
| "grad_norm": 1.0492634773254395, |
| "learning_rate": 0.00019265358636217435, |
| "loss": 3.5106, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.37170418006430866, |
| "grad_norm": 1.4759819507598877, |
| "learning_rate": 0.0001926407204889032, |
| "loss": 3.4732, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.3723472668810289, |
| "grad_norm": 1.150651454925537, |
| "learning_rate": 0.00019262785461563204, |
| "loss": 3.5596, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3729903536977492, |
| "grad_norm": 1.0370066165924072, |
| "learning_rate": 0.0001926149887423609, |
| "loss": 3.4118, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.37363344051446945, |
| "grad_norm": 1.4111131429672241, |
| "learning_rate": 0.00019260212286908974, |
| "loss": 3.3447, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3742765273311897, |
| "grad_norm": 0.9858524203300476, |
| "learning_rate": 0.0001925892569958186, |
| "loss": 3.4701, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.37491961414791, |
| "grad_norm": 1.0511631965637207, |
| "learning_rate": 0.00019257639112254746, |
| "loss": 3.5845, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.37556270096463024, |
| "grad_norm": 1.1130648851394653, |
| "learning_rate": 0.0001925635252492763, |
| "loss": 3.3556, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3762057877813505, |
| "grad_norm": 1.1107404232025146, |
| "learning_rate": 0.00019255065937600515, |
| "loss": 3.4739, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.37684887459807076, |
| "grad_norm": 1.00481116771698, |
| "learning_rate": 0.000192537793502734, |
| "loss": 3.5032, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.377491961414791, |
| "grad_norm": 1.0136293172836304, |
| "learning_rate": 0.00019252492762946287, |
| "loss": 3.4813, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.37813504823151123, |
| "grad_norm": 0.810249388217926, |
| "learning_rate": 0.00019251206175619173, |
| "loss": 3.4907, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.3787781350482315, |
| "grad_norm": 1.0708262920379639, |
| "learning_rate": 0.00019249919588292056, |
| "loss": 3.492, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.37942122186495175, |
| "grad_norm": 1.0980275869369507, |
| "learning_rate": 0.00019248633000964942, |
| "loss": 3.45, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.380064308681672, |
| "grad_norm": 1.0108336210250854, |
| "learning_rate": 0.00019247346413637826, |
| "loss": 3.4736, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.3807073954983923, |
| "grad_norm": 1.0177563428878784, |
| "learning_rate": 0.00019246059826310712, |
| "loss": 3.4808, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.38135048231511254, |
| "grad_norm": 1.3187386989593506, |
| "learning_rate": 0.00019244773238983598, |
| "loss": 3.4965, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.3819935691318328, |
| "grad_norm": 0.9032576084136963, |
| "learning_rate": 0.0001924348665165648, |
| "loss": 3.4265, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.38263665594855306, |
| "grad_norm": 0.9487648606300354, |
| "learning_rate": 0.00019242200064329367, |
| "loss": 3.4562, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.3832797427652733, |
| "grad_norm": 1.0431288480758667, |
| "learning_rate": 0.0001924091347700225, |
| "loss": 3.4572, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.3839228295819936, |
| "grad_norm": 1.07583487033844, |
| "learning_rate": 0.0001923962688967514, |
| "loss": 3.5218, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.38456591639871385, |
| "grad_norm": 0.9846882820129395, |
| "learning_rate": 0.00019238340302348025, |
| "loss": 3.4248, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3852090032154341, |
| "grad_norm": 1.018010139465332, |
| "learning_rate": 0.00019237053715020908, |
| "loss": 3.4861, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3858520900321543, |
| "grad_norm": 1.1103070974349976, |
| "learning_rate": 0.00019235767127693794, |
| "loss": 3.4394, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3864951768488746, |
| "grad_norm": 1.005889654159546, |
| "learning_rate": 0.00019234480540366677, |
| "loss": 3.503, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.38713826366559484, |
| "grad_norm": 1.321091651916504, |
| "learning_rate": 0.00019233193953039563, |
| "loss": 3.5028, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.3877813504823151, |
| "grad_norm": 1.2860043048858643, |
| "learning_rate": 0.0001923190736571245, |
| "loss": 3.5034, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.38842443729903536, |
| "grad_norm": 1.118884801864624, |
| "learning_rate": 0.00019230620778385333, |
| "loss": 3.456, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3890675241157556, |
| "grad_norm": 1.0882989168167114, |
| "learning_rate": 0.0001922933419105822, |
| "loss": 3.4183, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.3897106109324759, |
| "grad_norm": 0.8584060072898865, |
| "learning_rate": 0.00019228047603731105, |
| "loss": 3.5375, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.39035369774919615, |
| "grad_norm": 0.9511725902557373, |
| "learning_rate": 0.00019226761016403988, |
| "loss": 3.4394, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.3909967845659164, |
| "grad_norm": 1.0036194324493408, |
| "learning_rate": 0.00019225474429076874, |
| "loss": 3.4764, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3916398713826367, |
| "grad_norm": 1.0174047946929932, |
| "learning_rate": 0.0001922418784174976, |
| "loss": 3.3898, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.39228295819935693, |
| "grad_norm": 1.1916203498840332, |
| "learning_rate": 0.00019222901254422646, |
| "loss": 3.2736, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3929260450160772, |
| "grad_norm": 1.096472144126892, |
| "learning_rate": 0.00019221614667095532, |
| "loss": 3.4986, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.3935691318327974, |
| "grad_norm": 1.0956705808639526, |
| "learning_rate": 0.00019220328079768415, |
| "loss": 3.3589, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.39421221864951767, |
| "grad_norm": 1.2517657279968262, |
| "learning_rate": 0.000192190414924413, |
| "loss": 3.4869, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.3948553054662379, |
| "grad_norm": 1.0856648683547974, |
| "learning_rate": 0.00019217754905114185, |
| "loss": 3.5259, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3954983922829582, |
| "grad_norm": 0.9672216773033142, |
| "learning_rate": 0.0001921646831778707, |
| "loss": 3.3918, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.39614147909967845, |
| "grad_norm": 1.1243929862976074, |
| "learning_rate": 0.00019215181730459957, |
| "loss": 3.3397, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3967845659163987, |
| "grad_norm": 1.1399009227752686, |
| "learning_rate": 0.0001921389514313284, |
| "loss": 3.4099, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.397427652733119, |
| "grad_norm": 1.0743727684020996, |
| "learning_rate": 0.00019212608555805726, |
| "loss": 3.3701, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.39807073954983924, |
| "grad_norm": 0.9858459830284119, |
| "learning_rate": 0.00019211321968478612, |
| "loss": 3.4865, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.3987138263665595, |
| "grad_norm": 1.0459102392196655, |
| "learning_rate": 0.00019210035381151498, |
| "loss": 3.4858, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.39935691318327976, |
| "grad_norm": 1.106679081916809, |
| "learning_rate": 0.00019208748793824384, |
| "loss": 3.4588, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.1907827854156494, |
| "learning_rate": 0.00019207462206497267, |
| "loss": 3.41, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.4006430868167203, |
| "grad_norm": 1.1031006574630737, |
| "learning_rate": 0.00019206175619170153, |
| "loss": 3.4571, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.4012861736334405, |
| "grad_norm": 1.2004433870315552, |
| "learning_rate": 0.00019204889031843036, |
| "loss": 3.3627, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.40192926045016075, |
| "grad_norm": 1.3993326425552368, |
| "learning_rate": 0.00019203602444515922, |
| "loss": 3.5063, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.402572347266881, |
| "grad_norm": 1.4372771978378296, |
| "learning_rate": 0.00019202315857188808, |
| "loss": 3.3901, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.4032154340836013, |
| "grad_norm": 1.3181085586547852, |
| "learning_rate": 0.00019201029269861692, |
| "loss": 3.4757, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.40385852090032154, |
| "grad_norm": 1.3685520887374878, |
| "learning_rate": 0.00019199742682534578, |
| "loss": 3.3122, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.4045016077170418, |
| "grad_norm": 1.124450445175171, |
| "learning_rate": 0.0001919845609520746, |
| "loss": 3.3933, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.40514469453376206, |
| "grad_norm": 1.247326135635376, |
| "learning_rate": 0.00019197169507880347, |
| "loss": 3.434, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4057877813504823, |
| "grad_norm": 1.3969745635986328, |
| "learning_rate": 0.00019195882920553233, |
| "loss": 3.3692, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.4064308681672026, |
| "grad_norm": 1.0851151943206787, |
| "learning_rate": 0.0001919459633322612, |
| "loss": 3.3945, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.40707395498392285, |
| "grad_norm": 1.0287741422653198, |
| "learning_rate": 0.00019193309745899005, |
| "loss": 3.4111, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.4077170418006431, |
| "grad_norm": 1.1267411708831787, |
| "learning_rate": 0.00019192023158571888, |
| "loss": 3.395, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.40836012861736337, |
| "grad_norm": 0.9802685379981995, |
| "learning_rate": 0.00019190736571244774, |
| "loss": 3.3637, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.4090032154340836, |
| "grad_norm": 1.075246810913086, |
| "learning_rate": 0.0001918944998391766, |
| "loss": 3.3833, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.40964630225080384, |
| "grad_norm": 0.8767859935760498, |
| "learning_rate": 0.00019188163396590544, |
| "loss": 3.2365, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.4102893890675241, |
| "grad_norm": 1.2082061767578125, |
| "learning_rate": 0.0001918687680926343, |
| "loss": 3.4539, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.41093247588424436, |
| "grad_norm": 1.1632206439971924, |
| "learning_rate": 0.00019185590221936316, |
| "loss": 3.4249, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.4115755627009646, |
| "grad_norm": 0.8956990242004395, |
| "learning_rate": 0.000191843036346092, |
| "loss": 3.4036, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4122186495176849, |
| "grad_norm": 1.136662244796753, |
| "learning_rate": 0.00019183017047282085, |
| "loss": 3.2916, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.41286173633440515, |
| "grad_norm": 1.1098051071166992, |
| "learning_rate": 0.0001918173045995497, |
| "loss": 3.3685, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.4135048231511254, |
| "grad_norm": 1.0665825605392456, |
| "learning_rate": 0.00019180443872627857, |
| "loss": 3.3959, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.4141479099678457, |
| "grad_norm": 1.1620614528656006, |
| "learning_rate": 0.00019179157285300743, |
| "loss": 3.4566, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.41479099678456594, |
| "grad_norm": 0.905841588973999, |
| "learning_rate": 0.00019177870697973626, |
| "loss": 3.3252, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.4154340836012862, |
| "grad_norm": 1.4927425384521484, |
| "learning_rate": 0.00019176584110646512, |
| "loss": 3.5348, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4160771704180064, |
| "grad_norm": 1.1315077543258667, |
| "learning_rate": 0.00019175297523319395, |
| "loss": 3.3393, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.41672025723472667, |
| "grad_norm": 0.9733510613441467, |
| "learning_rate": 0.00019174010935992281, |
| "loss": 3.3689, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4173633440514469, |
| "grad_norm": 1.0324153900146484, |
| "learning_rate": 0.00019172724348665167, |
| "loss": 3.3592, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.4180064308681672, |
| "grad_norm": 1.057694435119629, |
| "learning_rate": 0.0001917143776133805, |
| "loss": 3.4237, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.41864951768488745, |
| "grad_norm": 1.2631813287734985, |
| "learning_rate": 0.00019170151174010937, |
| "loss": 3.2269, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.4192926045016077, |
| "grad_norm": 0.9592750668525696, |
| "learning_rate": 0.0001916886458668382, |
| "loss": 3.2824, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.419935691318328, |
| "grad_norm": 1.085555911064148, |
| "learning_rate": 0.00019167577999356706, |
| "loss": 3.3289, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.42057877813504824, |
| "grad_norm": 1.005272388458252, |
| "learning_rate": 0.00019166291412029592, |
| "loss": 3.3593, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.4212218649517685, |
| "grad_norm": 0.8905820846557617, |
| "learning_rate": 0.00019165004824702478, |
| "loss": 3.4101, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.42186495176848876, |
| "grad_norm": 1.3383442163467407, |
| "learning_rate": 0.00019163718237375364, |
| "loss": 3.4679, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.422508038585209, |
| "grad_norm": 1.0254385471343994, |
| "learning_rate": 0.00019162431650048247, |
| "loss": 3.411, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.4231511254019293, |
| "grad_norm": 1.010944128036499, |
| "learning_rate": 0.00019161145062721133, |
| "loss": 3.2327, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4237942122186495, |
| "grad_norm": 1.0854414701461792, |
| "learning_rate": 0.0001915985847539402, |
| "loss": 3.4239, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.42443729903536975, |
| "grad_norm": 1.0793399810791016, |
| "learning_rate": 0.00019158571888066903, |
| "loss": 3.2859, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.42508038585209, |
| "grad_norm": 1.158903956413269, |
| "learning_rate": 0.00019157285300739789, |
| "loss": 3.388, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.4257234726688103, |
| "grad_norm": 1.126036524772644, |
| "learning_rate": 0.00019155998713412672, |
| "loss": 3.3337, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.42636655948553054, |
| "grad_norm": 0.9668582677841187, |
| "learning_rate": 0.00019154712126085558, |
| "loss": 3.3274, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.4270096463022508, |
| "grad_norm": 1.0506800413131714, |
| "learning_rate": 0.00019153425538758444, |
| "loss": 3.374, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.42765273311897106, |
| "grad_norm": 1.2126219272613525, |
| "learning_rate": 0.0001915213895143133, |
| "loss": 3.4406, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.4282958199356913, |
| "grad_norm": 1.0302507877349854, |
| "learning_rate": 0.00019150852364104216, |
| "loss": 3.3207, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.4289389067524116, |
| "grad_norm": 1.025179386138916, |
| "learning_rate": 0.000191495657767771, |
| "loss": 3.4822, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.42958199356913185, |
| "grad_norm": 1.0189132690429688, |
| "learning_rate": 0.00019148279189449985, |
| "loss": 3.3718, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.4302250803858521, |
| "grad_norm": 1.157392144203186, |
| "learning_rate": 0.0001914699260212287, |
| "loss": 3.3795, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.43086816720257237, |
| "grad_norm": 1.1980301141738892, |
| "learning_rate": 0.00019145706014795754, |
| "loss": 3.324, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4315112540192926, |
| "grad_norm": 1.1774390935897827, |
| "learning_rate": 0.0001914441942746864, |
| "loss": 3.2682, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.43215434083601284, |
| "grad_norm": 1.0564755201339722, |
| "learning_rate": 0.00019143132840141524, |
| "loss": 3.3632, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.4327974276527331, |
| "grad_norm": 1.2775535583496094, |
| "learning_rate": 0.0001914184625281441, |
| "loss": 3.2401, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.43344051446945336, |
| "grad_norm": 1.172121524810791, |
| "learning_rate": 0.00019140559665487296, |
| "loss": 3.3599, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.4340836012861736, |
| "grad_norm": 1.014290690422058, |
| "learning_rate": 0.0001913927307816018, |
| "loss": 3.3045, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.4347266881028939, |
| "grad_norm": 1.083747386932373, |
| "learning_rate": 0.00019137986490833068, |
| "loss": 3.206, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.43536977491961415, |
| "grad_norm": 1.0449104309082031, |
| "learning_rate": 0.00019136699903505954, |
| "loss": 3.2687, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.4360128617363344, |
| "grad_norm": 1.0497163534164429, |
| "learning_rate": 0.00019135413316178837, |
| "loss": 3.2697, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.4366559485530547, |
| "grad_norm": 1.0219502449035645, |
| "learning_rate": 0.00019134126728851723, |
| "loss": 3.3335, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.43729903536977494, |
| "grad_norm": 1.150263786315918, |
| "learning_rate": 0.00019132840141524606, |
| "loss": 3.3292, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4379421221864952, |
| "grad_norm": 1.0708996057510376, |
| "learning_rate": 0.00019131553554197492, |
| "loss": 3.3125, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.43858520900321546, |
| "grad_norm": 1.1313010454177856, |
| "learning_rate": 0.00019130266966870378, |
| "loss": 3.2571, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.43922829581993567, |
| "grad_norm": 1.1228160858154297, |
| "learning_rate": 0.00019128980379543262, |
| "loss": 3.3591, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.4398713826366559, |
| "grad_norm": 1.2336113452911377, |
| "learning_rate": 0.00019127693792216148, |
| "loss": 3.3048, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.4405144694533762, |
| "grad_norm": 1.0371979475021362, |
| "learning_rate": 0.0001912640720488903, |
| "loss": 3.4398, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.44115755627009645, |
| "grad_norm": 1.0723671913146973, |
| "learning_rate": 0.00019125120617561917, |
| "loss": 3.3608, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.4418006430868167, |
| "grad_norm": 1.1435720920562744, |
| "learning_rate": 0.00019123834030234803, |
| "loss": 3.3603, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.442443729903537, |
| "grad_norm": 1.118101954460144, |
| "learning_rate": 0.0001912254744290769, |
| "loss": 3.3674, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.44308681672025724, |
| "grad_norm": 1.160780668258667, |
| "learning_rate": 0.00019121260855580575, |
| "loss": 3.2998, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.4437299035369775, |
| "grad_norm": 1.065692663192749, |
| "learning_rate": 0.00019119974268253458, |
| "loss": 3.3548, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.44437299035369776, |
| "grad_norm": 1.0315111875534058, |
| "learning_rate": 0.00019118687680926344, |
| "loss": 3.3054, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.445016077170418, |
| "grad_norm": 1.1071641445159912, |
| "learning_rate": 0.0001911740109359923, |
| "loss": 3.3202, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.4456591639871383, |
| "grad_norm": 1.1420172452926636, |
| "learning_rate": 0.00019116114506272113, |
| "loss": 3.2928, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.44630225080385855, |
| "grad_norm": 1.2857017517089844, |
| "learning_rate": 0.00019114827918945, |
| "loss": 3.229, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.44694533762057875, |
| "grad_norm": 1.4348443746566772, |
| "learning_rate": 0.00019113541331617883, |
| "loss": 3.4344, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.447588424437299, |
| "grad_norm": 1.149869441986084, |
| "learning_rate": 0.0001911225474429077, |
| "loss": 3.3527, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4482315112540193, |
| "grad_norm": 1.4944316148757935, |
| "learning_rate": 0.00019110968156963655, |
| "loss": 3.3647, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.44887459807073954, |
| "grad_norm": 1.3225072622299194, |
| "learning_rate": 0.0001910968156963654, |
| "loss": 3.3557, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4495176848874598, |
| "grad_norm": 1.4342169761657715, |
| "learning_rate": 0.00019108394982309427, |
| "loss": 3.3011, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.45016077170418006, |
| "grad_norm": 1.2693394422531128, |
| "learning_rate": 0.0001910710839498231, |
| "loss": 3.4043, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4508038585209003, |
| "grad_norm": 1.1735270023345947, |
| "learning_rate": 0.00019105821807655196, |
| "loss": 3.2249, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.4514469453376206, |
| "grad_norm": 1.238094449043274, |
| "learning_rate": 0.00019104535220328082, |
| "loss": 3.3155, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.45209003215434085, |
| "grad_norm": 1.1719659566879272, |
| "learning_rate": 0.00019103248633000965, |
| "loss": 3.3839, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.4527331189710611, |
| "grad_norm": 0.9133745431900024, |
| "learning_rate": 0.0001910196204567385, |
| "loss": 3.3059, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.4533762057877814, |
| "grad_norm": 1.281079649925232, |
| "learning_rate": 0.00019100675458346735, |
| "loss": 3.2738, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.45401929260450163, |
| "grad_norm": 1.1078438758850098, |
| "learning_rate": 0.0001909938887101962, |
| "loss": 3.3192, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.45466237942122184, |
| "grad_norm": 1.1194933652877808, |
| "learning_rate": 0.00019098102283692507, |
| "loss": 3.3174, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.4553054662379421, |
| "grad_norm": 1.1717486381530762, |
| "learning_rate": 0.0001909681569636539, |
| "loss": 3.2079, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.45594855305466236, |
| "grad_norm": 1.0228320360183716, |
| "learning_rate": 0.00019095529109038276, |
| "loss": 3.2985, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.4565916398713826, |
| "grad_norm": 1.071147084236145, |
| "learning_rate": 0.00019094242521711162, |
| "loss": 3.2786, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4572347266881029, |
| "grad_norm": 1.0266625881195068, |
| "learning_rate": 0.00019092955934384048, |
| "loss": 3.3508, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.45787781350482315, |
| "grad_norm": 1.0354539155960083, |
| "learning_rate": 0.00019091669347056934, |
| "loss": 3.2937, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.4585209003215434, |
| "grad_norm": 1.1274703741073608, |
| "learning_rate": 0.00019090382759729817, |
| "loss": 3.3525, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.4591639871382637, |
| "grad_norm": 1.1178417205810547, |
| "learning_rate": 0.00019089096172402703, |
| "loss": 3.3228, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.45980707395498394, |
| "grad_norm": 1.040401816368103, |
| "learning_rate": 0.0001908780958507559, |
| "loss": 3.2619, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.4604501607717042, |
| "grad_norm": 1.083000659942627, |
| "learning_rate": 0.00019086522997748472, |
| "loss": 3.2896, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.46109324758842446, |
| "grad_norm": 1.0114821195602417, |
| "learning_rate": 0.00019085236410421358, |
| "loss": 3.1989, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.4617363344051447, |
| "grad_norm": 1.0420598983764648, |
| "learning_rate": 0.00019083949823094242, |
| "loss": 3.3671, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.46237942122186493, |
| "grad_norm": 1.2424354553222656, |
| "learning_rate": 0.00019082663235767128, |
| "loss": 3.2971, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.4630225080385852, |
| "grad_norm": 0.9966109395027161, |
| "learning_rate": 0.00019081376648440014, |
| "loss": 3.3125, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.46366559485530545, |
| "grad_norm": 1.2035448551177979, |
| "learning_rate": 0.000190800900611129, |
| "loss": 3.352, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.4643086816720257, |
| "grad_norm": 1.3619177341461182, |
| "learning_rate": 0.00019078803473785786, |
| "loss": 3.2357, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.464951768488746, |
| "grad_norm": 1.0742976665496826, |
| "learning_rate": 0.0001907751688645867, |
| "loss": 3.2926, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.46559485530546624, |
| "grad_norm": 1.2764792442321777, |
| "learning_rate": 0.00019076230299131555, |
| "loss": 3.2803, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.4662379421221865, |
| "grad_norm": 0.9125346541404724, |
| "learning_rate": 0.0001907494371180444, |
| "loss": 3.2938, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.46688102893890676, |
| "grad_norm": 1.1562446355819702, |
| "learning_rate": 0.00019073657124477324, |
| "loss": 3.4009, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.467524115755627, |
| "grad_norm": 1.0354193449020386, |
| "learning_rate": 0.0001907237053715021, |
| "loss": 3.2119, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.4681672025723473, |
| "grad_norm": 1.2904752492904663, |
| "learning_rate": 0.00019071083949823094, |
| "loss": 3.2445, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.46881028938906755, |
| "grad_norm": 1.183132290840149, |
| "learning_rate": 0.0001906979736249598, |
| "loss": 3.2195, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.4694533762057878, |
| "grad_norm": 0.9867958426475525, |
| "learning_rate": 0.00019068510775168866, |
| "loss": 3.2712, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.470096463022508, |
| "grad_norm": 1.1032123565673828, |
| "learning_rate": 0.0001906722418784175, |
| "loss": 3.2709, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.4707395498392283, |
| "grad_norm": 1.199021577835083, |
| "learning_rate": 0.00019065937600514635, |
| "loss": 3.2594, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.47138263665594854, |
| "grad_norm": 1.191476583480835, |
| "learning_rate": 0.0001906465101318752, |
| "loss": 3.293, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.4720257234726688, |
| "grad_norm": 1.2461153268814087, |
| "learning_rate": 0.00019063364425860407, |
| "loss": 3.2375, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.47266881028938906, |
| "grad_norm": 1.1664519309997559, |
| "learning_rate": 0.00019062077838533293, |
| "loss": 3.2176, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.4733118971061093, |
| "grad_norm": 1.1912034749984741, |
| "learning_rate": 0.00019060791251206176, |
| "loss": 3.2735, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.4739549839228296, |
| "grad_norm": 1.369511604309082, |
| "learning_rate": 0.00019059504663879062, |
| "loss": 3.1804, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.47459807073954985, |
| "grad_norm": 1.230069875717163, |
| "learning_rate": 0.00019058218076551945, |
| "loss": 3.328, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4752411575562701, |
| "grad_norm": 1.3258293867111206, |
| "learning_rate": 0.00019056931489224831, |
| "loss": 3.2911, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.4758842443729904, |
| "grad_norm": 1.2006981372833252, |
| "learning_rate": 0.00019055644901897717, |
| "loss": 3.391, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.47652733118971063, |
| "grad_norm": 1.0535584688186646, |
| "learning_rate": 0.000190543583145706, |
| "loss": 3.2206, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.47717041800643084, |
| "grad_norm": 1.3122670650482178, |
| "learning_rate": 0.00019053071727243487, |
| "loss": 3.4187, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.4778135048231511, |
| "grad_norm": 1.1901317834854126, |
| "learning_rate": 0.00019051785139916373, |
| "loss": 3.2673, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.47845659163987136, |
| "grad_norm": 1.2190645933151245, |
| "learning_rate": 0.0001905049855258926, |
| "loss": 3.2902, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.4790996784565916, |
| "grad_norm": 1.4672080278396606, |
| "learning_rate": 0.00019049211965262145, |
| "loss": 3.1481, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.4797427652733119, |
| "grad_norm": 1.1337164640426636, |
| "learning_rate": 0.00019047925377935028, |
| "loss": 3.2713, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.48038585209003215, |
| "grad_norm": 1.2864869832992554, |
| "learning_rate": 0.00019046638790607914, |
| "loss": 3.2631, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4810289389067524, |
| "grad_norm": 1.163984775543213, |
| "learning_rate": 0.000190453522032808, |
| "loss": 3.2007, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.4816720257234727, |
| "grad_norm": 1.3512396812438965, |
| "learning_rate": 0.00019044065615953683, |
| "loss": 3.1754, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.48231511254019294, |
| "grad_norm": 1.2742228507995605, |
| "learning_rate": 0.0001904277902862657, |
| "loss": 3.2099, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4829581993569132, |
| "grad_norm": 1.090262532234192, |
| "learning_rate": 0.00019041492441299453, |
| "loss": 3.2087, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.48360128617363346, |
| "grad_norm": 1.0582116842269897, |
| "learning_rate": 0.00019040205853972339, |
| "loss": 3.2742, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.4842443729903537, |
| "grad_norm": 1.299381971359253, |
| "learning_rate": 0.00019038919266645225, |
| "loss": 3.3179, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.48488745980707393, |
| "grad_norm": 1.3819348812103271, |
| "learning_rate": 0.00019037632679318108, |
| "loss": 3.3102, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.4855305466237942, |
| "grad_norm": 1.135838270187378, |
| "learning_rate": 0.00019036346091990997, |
| "loss": 3.2429, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.48617363344051445, |
| "grad_norm": 1.0232223272323608, |
| "learning_rate": 0.0001903505950466388, |
| "loss": 3.1738, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.4868167202572347, |
| "grad_norm": 1.0841803550720215, |
| "learning_rate": 0.00019033772917336766, |
| "loss": 3.2514, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.487459807073955, |
| "grad_norm": 1.2285308837890625, |
| "learning_rate": 0.00019032486330009652, |
| "loss": 3.1662, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.48810289389067524, |
| "grad_norm": 1.1364984512329102, |
| "learning_rate": 0.00019031199742682535, |
| "loss": 3.2686, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.4887459807073955, |
| "grad_norm": 1.167251467704773, |
| "learning_rate": 0.0001902991315535542, |
| "loss": 3.2791, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.48938906752411576, |
| "grad_norm": 1.1167079210281372, |
| "learning_rate": 0.00019028626568028304, |
| "loss": 3.1558, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.490032154340836, |
| "grad_norm": 1.1595637798309326, |
| "learning_rate": 0.0001902733998070119, |
| "loss": 3.207, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.4906752411575563, |
| "grad_norm": 1.3299120664596558, |
| "learning_rate": 0.00019026053393374076, |
| "loss": 3.2207, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.49131832797427655, |
| "grad_norm": 1.2679451704025269, |
| "learning_rate": 0.0001902476680604696, |
| "loss": 3.2342, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.4919614147909968, |
| "grad_norm": 1.289936900138855, |
| "learning_rate": 0.00019023480218719846, |
| "loss": 3.3722, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.492604501607717, |
| "grad_norm": 1.2382489442825317, |
| "learning_rate": 0.00019022193631392732, |
| "loss": 3.2567, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.4932475884244373, |
| "grad_norm": 1.2640892267227173, |
| "learning_rate": 0.00019020907044065618, |
| "loss": 3.1951, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.49389067524115754, |
| "grad_norm": 1.2752370834350586, |
| "learning_rate": 0.00019019620456738504, |
| "loss": 3.2232, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.4945337620578778, |
| "grad_norm": 1.3053966760635376, |
| "learning_rate": 0.00019018333869411387, |
| "loss": 3.1693, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.49517684887459806, |
| "grad_norm": 1.3304706811904907, |
| "learning_rate": 0.00019017047282084273, |
| "loss": 3.1699, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.4958199356913183, |
| "grad_norm": 1.0017753839492798, |
| "learning_rate": 0.00019015760694757156, |
| "loss": 3.1091, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.4964630225080386, |
| "grad_norm": 1.1159201860427856, |
| "learning_rate": 0.00019014474107430042, |
| "loss": 3.222, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.49710610932475885, |
| "grad_norm": 1.049239158630371, |
| "learning_rate": 0.00019013187520102928, |
| "loss": 3.2, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.4977491961414791, |
| "grad_norm": 1.3383749723434448, |
| "learning_rate": 0.00019011900932775812, |
| "loss": 3.077, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.4983922829581994, |
| "grad_norm": 1.0949004888534546, |
| "learning_rate": 0.00019010614345448698, |
| "loss": 3.3314, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.49903536977491963, |
| "grad_norm": 1.262426733970642, |
| "learning_rate": 0.00019009327758121584, |
| "loss": 3.1419, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4996784565916399, |
| "grad_norm": 1.2074456214904785, |
| "learning_rate": 0.0001900804117079447, |
| "loss": 3.1357, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.5003215434083601, |
| "grad_norm": 1.0343276262283325, |
| "learning_rate": 0.00019006754583467356, |
| "loss": 3.2589, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.5009646302250804, |
| "grad_norm": 1.3059816360473633, |
| "learning_rate": 0.0001900546799614024, |
| "loss": 3.2158, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.5016077170418006, |
| "grad_norm": 1.1477670669555664, |
| "learning_rate": 0.00019004181408813125, |
| "loss": 3.2286, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5022508038585209, |
| "grad_norm": 0.9345281720161438, |
| "learning_rate": 0.0001900289482148601, |
| "loss": 3.1478, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.5028938906752412, |
| "grad_norm": 1.545401930809021, |
| "learning_rate": 0.00019001608234158894, |
| "loss": 3.2269, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.5035369774919615, |
| "grad_norm": 1.1383672952651978, |
| "learning_rate": 0.0001900032164683178, |
| "loss": 3.2749, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.5041800643086817, |
| "grad_norm": 1.1127848625183105, |
| "learning_rate": 0.00018999035059504664, |
| "loss": 3.1765, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5048231511254019, |
| "grad_norm": 1.338855504989624, |
| "learning_rate": 0.0001899774847217755, |
| "loss": 3.1888, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.5054662379421222, |
| "grad_norm": 1.183184266090393, |
| "learning_rate": 0.00018996461884850436, |
| "loss": 3.2694, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.5061093247588424, |
| "grad_norm": 1.0847487449645996, |
| "learning_rate": 0.0001899517529752332, |
| "loss": 3.2269, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.5067524115755627, |
| "grad_norm": 1.4246736764907837, |
| "learning_rate": 0.00018993888710196205, |
| "loss": 3.2068, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.5073954983922829, |
| "grad_norm": 1.0470788478851318, |
| "learning_rate": 0.0001899260212286909, |
| "loss": 3.1605, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.5080385852090032, |
| "grad_norm": 1.329217791557312, |
| "learning_rate": 0.00018991315535541977, |
| "loss": 3.2064, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5086816720257235, |
| "grad_norm": 1.4735416173934937, |
| "learning_rate": 0.00018990028948214863, |
| "loss": 3.289, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.5093247588424438, |
| "grad_norm": 1.110742449760437, |
| "learning_rate": 0.00018988742360887746, |
| "loss": 3.2186, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.509967845659164, |
| "grad_norm": 1.3622022867202759, |
| "learning_rate": 0.00018987455773560632, |
| "loss": 3.2241, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.5106109324758843, |
| "grad_norm": 1.1964266300201416, |
| "learning_rate": 0.00018986169186233515, |
| "loss": 3.1842, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.5112540192926045, |
| "grad_norm": 1.2105697393417358, |
| "learning_rate": 0.00018984882598906401, |
| "loss": 3.159, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.5118971061093247, |
| "grad_norm": 1.2597590684890747, |
| "learning_rate": 0.00018983596011579287, |
| "loss": 3.184, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.512540192926045, |
| "grad_norm": 1.1780807971954346, |
| "learning_rate": 0.0001898230942425217, |
| "loss": 3.1621, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.5131832797427652, |
| "grad_norm": 1.048012137413025, |
| "learning_rate": 0.00018981022836925057, |
| "loss": 3.2248, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.5138263665594855, |
| "grad_norm": 1.4612892866134644, |
| "learning_rate": 0.00018979736249597943, |
| "loss": 3.1989, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.5144694533762058, |
| "grad_norm": 1.5027645826339722, |
| "learning_rate": 0.0001897844966227083, |
| "loss": 3.1642, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5151125401929261, |
| "grad_norm": 1.2052314281463623, |
| "learning_rate": 0.00018977163074943715, |
| "loss": 3.1384, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.5157556270096463, |
| "grad_norm": 1.327204704284668, |
| "learning_rate": 0.00018975876487616598, |
| "loss": 3.2679, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.5163987138263666, |
| "grad_norm": 1.7540024518966675, |
| "learning_rate": 0.00018974589900289484, |
| "loss": 3.2228, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.5170418006430868, |
| "grad_norm": 1.187525987625122, |
| "learning_rate": 0.00018973303312962367, |
| "loss": 3.1897, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.5176848874598071, |
| "grad_norm": 1.3195263147354126, |
| "learning_rate": 0.00018972016725635253, |
| "loss": 3.1717, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.5183279742765273, |
| "grad_norm": 1.3006914854049683, |
| "learning_rate": 0.0001897073013830814, |
| "loss": 3.0372, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.5189710610932476, |
| "grad_norm": 1.3825714588165283, |
| "learning_rate": 0.00018969443550981023, |
| "loss": 3.2407, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.5196141479099678, |
| "grad_norm": 1.3513221740722656, |
| "learning_rate": 0.00018968156963653909, |
| "loss": 3.1957, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.5202572347266881, |
| "grad_norm": 1.1686780452728271, |
| "learning_rate": 0.00018966870376326795, |
| "loss": 3.1793, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.5209003215434084, |
| "grad_norm": 1.485041856765747, |
| "learning_rate": 0.00018965583788999678, |
| "loss": 3.1323, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5215434083601286, |
| "grad_norm": 1.2755258083343506, |
| "learning_rate": 0.00018964297201672564, |
| "loss": 3.0376, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5221864951768489, |
| "grad_norm": 1.2364903688430786, |
| "learning_rate": 0.0001896301061434545, |
| "loss": 3.1489, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5228295819935691, |
| "grad_norm": 1.2964539527893066, |
| "learning_rate": 0.00018961724027018336, |
| "loss": 3.1077, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.5234726688102894, |
| "grad_norm": 1.259230375289917, |
| "learning_rate": 0.00018960437439691222, |
| "loss": 3.2041, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5241157556270096, |
| "grad_norm": 1.2545006275177002, |
| "learning_rate": 0.00018959150852364105, |
| "loss": 3.329, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.52475884244373, |
| "grad_norm": 1.2107785940170288, |
| "learning_rate": 0.0001895786426503699, |
| "loss": 3.1635, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.5254019292604502, |
| "grad_norm": 1.251050591468811, |
| "learning_rate": 0.00018956577677709874, |
| "loss": 3.1996, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.5260450160771705, |
| "grad_norm": 1.2920563220977783, |
| "learning_rate": 0.0001895529109038276, |
| "loss": 3.1743, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.5266881028938907, |
| "grad_norm": 1.373834490776062, |
| "learning_rate": 0.00018954004503055646, |
| "loss": 3.1707, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.5273311897106109, |
| "grad_norm": 1.1601290702819824, |
| "learning_rate": 0.0001895271791572853, |
| "loss": 3.1825, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5279742765273312, |
| "grad_norm": 1.1515223979949951, |
| "learning_rate": 0.00018951431328401416, |
| "loss": 3.1254, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.5286173633440514, |
| "grad_norm": 1.3081713914871216, |
| "learning_rate": 0.00018950144741074302, |
| "loss": 3.2704, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5292604501607717, |
| "grad_norm": 1.4287598133087158, |
| "learning_rate": 0.00018948858153747188, |
| "loss": 3.1797, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5299035369774919, |
| "grad_norm": 1.2460300922393799, |
| "learning_rate": 0.00018947571566420074, |
| "loss": 3.1673, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5305466237942122, |
| "grad_norm": 1.2695204019546509, |
| "learning_rate": 0.00018946284979092957, |
| "loss": 3.1102, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.5311897106109325, |
| "grad_norm": 1.490421175956726, |
| "learning_rate": 0.00018944998391765843, |
| "loss": 3.067, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5318327974276528, |
| "grad_norm": 1.4937459230422974, |
| "learning_rate": 0.00018943711804438726, |
| "loss": 3.3027, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.532475884244373, |
| "grad_norm": 1.1960488557815552, |
| "learning_rate": 0.00018942425217111612, |
| "loss": 3.1005, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.5331189710610933, |
| "grad_norm": 1.1672232151031494, |
| "learning_rate": 0.00018941138629784498, |
| "loss": 3.1973, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5337620578778135, |
| "grad_norm": 1.3269081115722656, |
| "learning_rate": 0.00018939852042457382, |
| "loss": 3.0847, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5344051446945337, |
| "grad_norm": 1.2900030612945557, |
| "learning_rate": 0.00018938565455130268, |
| "loss": 3.0921, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.535048231511254, |
| "grad_norm": 1.076888084411621, |
| "learning_rate": 0.0001893727886780315, |
| "loss": 3.2275, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.5356913183279742, |
| "grad_norm": 1.106076717376709, |
| "learning_rate": 0.00018935992280476037, |
| "loss": 3.1484, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5363344051446945, |
| "grad_norm": 1.191945195198059, |
| "learning_rate": 0.00018934705693148923, |
| "loss": 3.2304, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5369774919614148, |
| "grad_norm": 1.2519569396972656, |
| "learning_rate": 0.0001893341910582181, |
| "loss": 3.1809, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5376205787781351, |
| "grad_norm": 1.303782343864441, |
| "learning_rate": 0.00018932132518494695, |
| "loss": 3.0748, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5382636655948553, |
| "grad_norm": 1.1886576414108276, |
| "learning_rate": 0.00018930845931167578, |
| "loss": 3.1439, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5389067524115756, |
| "grad_norm": 1.5747839212417603, |
| "learning_rate": 0.00018929559343840464, |
| "loss": 3.2817, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.5395498392282958, |
| "grad_norm": 1.1393136978149414, |
| "learning_rate": 0.0001892827275651335, |
| "loss": 3.1519, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.5401929260450161, |
| "grad_norm": 1.1961064338684082, |
| "learning_rate": 0.00018926986169186233, |
| "loss": 3.1477, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5408360128617363, |
| "grad_norm": 1.2031160593032837, |
| "learning_rate": 0.0001892569958185912, |
| "loss": 3.1408, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.5414790996784566, |
| "grad_norm": 1.2001953125, |
| "learning_rate": 0.00018924412994532003, |
| "loss": 3.1901, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5421221864951768, |
| "grad_norm": 1.0908831357955933, |
| "learning_rate": 0.0001892312640720489, |
| "loss": 3.2398, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5427652733118971, |
| "grad_norm": 1.09163498878479, |
| "learning_rate": 0.00018921839819877775, |
| "loss": 3.2032, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.5434083601286174, |
| "grad_norm": 1.162726879119873, |
| "learning_rate": 0.0001892055323255066, |
| "loss": 3.1095, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5440514469453376, |
| "grad_norm": 1.0796010494232178, |
| "learning_rate": 0.00018919266645223547, |
| "loss": 3.1118, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5446945337620579, |
| "grad_norm": 1.1972142457962036, |
| "learning_rate": 0.00018917980057896433, |
| "loss": 3.1192, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.5453376205787781, |
| "grad_norm": 0.996917724609375, |
| "learning_rate": 0.00018916693470569316, |
| "loss": 3.1339, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5459807073954984, |
| "grad_norm": 1.2327890396118164, |
| "learning_rate": 0.00018915406883242202, |
| "loss": 3.1469, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5466237942122186, |
| "grad_norm": 1.1841199398040771, |
| "learning_rate": 0.00018914120295915085, |
| "loss": 3.1122, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.547266881028939, |
| "grad_norm": 1.1988370418548584, |
| "learning_rate": 0.0001891283370858797, |
| "loss": 3.1059, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5479099678456592, |
| "grad_norm": 1.2205297946929932, |
| "learning_rate": 0.00018911547121260857, |
| "loss": 3.109, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5485530546623795, |
| "grad_norm": 1.1680948734283447, |
| "learning_rate": 0.0001891026053393374, |
| "loss": 3.1783, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.5491961414790997, |
| "grad_norm": 1.4123858213424683, |
| "learning_rate": 0.00018908973946606627, |
| "loss": 3.093, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5498392282958199, |
| "grad_norm": 1.3545359373092651, |
| "learning_rate": 0.0001890768735927951, |
| "loss": 3.1441, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.5504823151125402, |
| "grad_norm": 1.0951796770095825, |
| "learning_rate": 0.00018906400771952399, |
| "loss": 3.092, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5511254019292604, |
| "grad_norm": 1.2619158029556274, |
| "learning_rate": 0.00018905114184625285, |
| "loss": 3.1159, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.5517684887459807, |
| "grad_norm": 1.3777707815170288, |
| "learning_rate": 0.00018903827597298168, |
| "loss": 3.1173, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5524115755627009, |
| "grad_norm": 1.1288398504257202, |
| "learning_rate": 0.00018902541009971054, |
| "loss": 3.1351, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.5530546623794212, |
| "grad_norm": 1.0961090326309204, |
| "learning_rate": 0.00018901254422643937, |
| "loss": 3.1003, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5536977491961415, |
| "grad_norm": 1.383499026298523, |
| "learning_rate": 0.00018899967835316823, |
| "loss": 3.1626, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.5543408360128618, |
| "grad_norm": 1.2608115673065186, |
| "learning_rate": 0.0001889868124798971, |
| "loss": 3.1251, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.554983922829582, |
| "grad_norm": 1.0015844106674194, |
| "learning_rate": 0.00018897394660662592, |
| "loss": 3.0936, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.5556270096463023, |
| "grad_norm": 1.2425543069839478, |
| "learning_rate": 0.00018896108073335478, |
| "loss": 2.9914, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5562700964630225, |
| "grad_norm": 1.169270634651184, |
| "learning_rate": 0.00018894821486008362, |
| "loss": 3.0521, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.5569131832797428, |
| "grad_norm": 1.1069300174713135, |
| "learning_rate": 0.00018893534898681248, |
| "loss": 3.1724, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.557556270096463, |
| "grad_norm": 1.0531796216964722, |
| "learning_rate": 0.00018892248311354134, |
| "loss": 3.1483, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.5581993569131832, |
| "grad_norm": 1.192090630531311, |
| "learning_rate": 0.0001889096172402702, |
| "loss": 3.1941, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5588424437299035, |
| "grad_norm": 1.1669304370880127, |
| "learning_rate": 0.00018889675136699906, |
| "loss": 3.0172, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5594855305466238, |
| "grad_norm": 1.3154963254928589, |
| "learning_rate": 0.0001888838854937279, |
| "loss": 3.0505, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5601286173633441, |
| "grad_norm": 1.1110012531280518, |
| "learning_rate": 0.00018887101962045675, |
| "loss": 3.057, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5607717041800643, |
| "grad_norm": 1.350762128829956, |
| "learning_rate": 0.0001888581537471856, |
| "loss": 3.1815, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5614147909967846, |
| "grad_norm": 1.2955763339996338, |
| "learning_rate": 0.00018884528787391444, |
| "loss": 3.153, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.5620578778135048, |
| "grad_norm": 1.4547818899154663, |
| "learning_rate": 0.0001888324220006433, |
| "loss": 3.0896, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5627009646302251, |
| "grad_norm": 1.323948621749878, |
| "learning_rate": 0.00018881955612737214, |
| "loss": 3.1684, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.5633440514469453, |
| "grad_norm": 0.9974737167358398, |
| "learning_rate": 0.000188806690254101, |
| "loss": 3.1328, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5639871382636656, |
| "grad_norm": 1.2835307121276855, |
| "learning_rate": 0.00018879382438082986, |
| "loss": 3.1713, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5646302250803859, |
| "grad_norm": 1.4593397378921509, |
| "learning_rate": 0.00018878095850755872, |
| "loss": 3.0995, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.5652733118971061, |
| "grad_norm": 1.3302654027938843, |
| "learning_rate": 0.00018876809263428758, |
| "loss": 3.0794, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.5659163987138264, |
| "grad_norm": 1.3593260049819946, |
| "learning_rate": 0.00018875522676101644, |
| "loss": 3.0851, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5665594855305466, |
| "grad_norm": 1.515762209892273, |
| "learning_rate": 0.00018874236088774527, |
| "loss": 3.1116, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.5672025723472669, |
| "grad_norm": 1.3611632585525513, |
| "learning_rate": 0.00018872949501447413, |
| "loss": 3.1126, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.5678456591639871, |
| "grad_norm": 1.281817078590393, |
| "learning_rate": 0.00018871662914120296, |
| "loss": 3.0904, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.5684887459807074, |
| "grad_norm": 1.5134152173995972, |
| "learning_rate": 0.00018870376326793182, |
| "loss": 3.1503, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.5691318327974276, |
| "grad_norm": 1.3870372772216797, |
| "learning_rate": 0.00018869089739466068, |
| "loss": 3.1883, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.569774919614148, |
| "grad_norm": 1.3908785581588745, |
| "learning_rate": 0.00018867803152138951, |
| "loss": 3.0711, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.5704180064308682, |
| "grad_norm": 1.5557795763015747, |
| "learning_rate": 0.00018866516564811837, |
| "loss": 3.2031, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.5710610932475885, |
| "grad_norm": 1.3231662511825562, |
| "learning_rate": 0.0001886522997748472, |
| "loss": 3.0175, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.5717041800643087, |
| "grad_norm": 1.1312745809555054, |
| "learning_rate": 0.00018863943390157607, |
| "loss": 3.1328, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.572347266881029, |
| "grad_norm": 1.6406269073486328, |
| "learning_rate": 0.00018862656802830493, |
| "loss": 3.0261, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5729903536977492, |
| "grad_norm": 1.5973625183105469, |
| "learning_rate": 0.0001886137021550338, |
| "loss": 3.0951, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.5736334405144694, |
| "grad_norm": 1.237862229347229, |
| "learning_rate": 0.00018860083628176265, |
| "loss": 3.0748, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.5742765273311897, |
| "grad_norm": 1.2180136442184448, |
| "learning_rate": 0.00018858797040849148, |
| "loss": 3.1109, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.5749196141479099, |
| "grad_norm": 1.3336851596832275, |
| "learning_rate": 0.00018857510453522034, |
| "loss": 3.0763, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.5755627009646302, |
| "grad_norm": 1.3121765851974487, |
| "learning_rate": 0.0001885622386619492, |
| "loss": 3.1751, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.5762057877813505, |
| "grad_norm": 1.292459487915039, |
| "learning_rate": 0.00018854937278867803, |
| "loss": 3.118, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.5768488745980708, |
| "grad_norm": 1.2852542400360107, |
| "learning_rate": 0.0001885365069154069, |
| "loss": 3.1566, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.577491961414791, |
| "grad_norm": 1.2023179531097412, |
| "learning_rate": 0.00018852364104213573, |
| "loss": 3.2088, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.5781350482315113, |
| "grad_norm": 1.2750616073608398, |
| "learning_rate": 0.00018851077516886459, |
| "loss": 3.0904, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.5787781350482315, |
| "grad_norm": 1.2595165967941284, |
| "learning_rate": 0.00018849790929559345, |
| "loss": 3.0601, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5794212218649518, |
| "grad_norm": 1.2735553979873657, |
| "learning_rate": 0.0001884850434223223, |
| "loss": 3.0751, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.580064308681672, |
| "grad_norm": 1.3524245023727417, |
| "learning_rate": 0.00018847217754905117, |
| "loss": 3.0462, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.5807073954983922, |
| "grad_norm": 1.335268259048462, |
| "learning_rate": 0.00018845931167578, |
| "loss": 3.0297, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.5813504823151125, |
| "grad_norm": 1.3870205879211426, |
| "learning_rate": 0.00018844644580250886, |
| "loss": 3.2041, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5819935691318328, |
| "grad_norm": 1.4512618780136108, |
| "learning_rate": 0.00018843357992923772, |
| "loss": 3.278, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5826366559485531, |
| "grad_norm": 1.4124186038970947, |
| "learning_rate": 0.00018842071405596655, |
| "loss": 3.1195, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5832797427652733, |
| "grad_norm": 1.5869147777557373, |
| "learning_rate": 0.0001884078481826954, |
| "loss": 3.0581, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5839228295819936, |
| "grad_norm": 1.358933687210083, |
| "learning_rate": 0.00018839498230942424, |
| "loss": 3.0677, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5845659163987138, |
| "grad_norm": 1.1760841608047485, |
| "learning_rate": 0.0001883821164361531, |
| "loss": 2.995, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5852090032154341, |
| "grad_norm": 1.4777272939682007, |
| "learning_rate": 0.00018836925056288196, |
| "loss": 2.9668, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5858520900321543, |
| "grad_norm": 1.1686232089996338, |
| "learning_rate": 0.0001883563846896108, |
| "loss": 3.1695, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5864951768488746, |
| "grad_norm": 1.0929896831512451, |
| "learning_rate": 0.00018834351881633966, |
| "loss": 3.1391, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5871382636655949, |
| "grad_norm": 1.4318856000900269, |
| "learning_rate": 0.00018833065294306852, |
| "loss": 3.0627, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5877813504823152, |
| "grad_norm": 1.3018511533737183, |
| "learning_rate": 0.00018831778706979738, |
| "loss": 3.1305, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5884244372990354, |
| "grad_norm": 1.395560622215271, |
| "learning_rate": 0.00018830492119652624, |
| "loss": 3.1234, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5890675241157556, |
| "grad_norm": 1.3363614082336426, |
| "learning_rate": 0.00018829205532325507, |
| "loss": 3.0128, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5897106109324759, |
| "grad_norm": 1.264737606048584, |
| "learning_rate": 0.00018827918944998393, |
| "loss": 3.1762, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5903536977491961, |
| "grad_norm": 1.3853096961975098, |
| "learning_rate": 0.0001882663235767128, |
| "loss": 3.1491, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5909967845659164, |
| "grad_norm": 1.5895792245864868, |
| "learning_rate": 0.00018825345770344162, |
| "loss": 3.1872, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5916398713826366, |
| "grad_norm": 1.3258469104766846, |
| "learning_rate": 0.00018824059183017048, |
| "loss": 3.0809, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.592282958199357, |
| "grad_norm": 1.3434430360794067, |
| "learning_rate": 0.00018822772595689932, |
| "loss": 3.0111, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5929260450160772, |
| "grad_norm": 1.3564894199371338, |
| "learning_rate": 0.00018821486008362818, |
| "loss": 3.0968, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5935691318327975, |
| "grad_norm": 1.4929382801055908, |
| "learning_rate": 0.00018820199421035704, |
| "loss": 3.1024, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5942122186495177, |
| "grad_norm": 1.380781888961792, |
| "learning_rate": 0.0001881891283370859, |
| "loss": 3.0029, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.594855305466238, |
| "grad_norm": 1.2817083597183228, |
| "learning_rate": 0.00018817626246381476, |
| "loss": 3.0043, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.5954983922829582, |
| "grad_norm": 1.182271957397461, |
| "learning_rate": 0.0001881633965905436, |
| "loss": 3.0496, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5961414790996784, |
| "grad_norm": 1.124681830406189, |
| "learning_rate": 0.00018815053071727245, |
| "loss": 3.0583, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5967845659163987, |
| "grad_norm": 1.037346601486206, |
| "learning_rate": 0.0001881376648440013, |
| "loss": 3.0009, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5974276527331189, |
| "grad_norm": 1.2296048402786255, |
| "learning_rate": 0.00018812479897073014, |
| "loss": 3.0378, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5980707395498392, |
| "grad_norm": 1.1884113550186157, |
| "learning_rate": 0.000188111933097459, |
| "loss": 3.0192, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5987138263665595, |
| "grad_norm": 1.3827930688858032, |
| "learning_rate": 0.00018809906722418783, |
| "loss": 3.0462, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5993569131832798, |
| "grad_norm": 1.4823634624481201, |
| "learning_rate": 0.0001880862013509167, |
| "loss": 3.1284, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.3943361043930054, |
| "learning_rate": 0.00018807333547764555, |
| "loss": 3.0216, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.6006430868167203, |
| "grad_norm": 1.2865196466445923, |
| "learning_rate": 0.0001880604696043744, |
| "loss": 3.0187, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.6012861736334405, |
| "grad_norm": 1.4083055257797241, |
| "learning_rate": 0.00018804760373110327, |
| "loss": 3.1085, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.6019292604501608, |
| "grad_norm": 1.317826509475708, |
| "learning_rate": 0.0001880347378578321, |
| "loss": 3.0967, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.602572347266881, |
| "grad_norm": 1.337525486946106, |
| "learning_rate": 0.00018802187198456097, |
| "loss": 3.072, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.6032154340836012, |
| "grad_norm": 1.328406810760498, |
| "learning_rate": 0.00018800900611128983, |
| "loss": 2.93, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.6038585209003215, |
| "grad_norm": 1.381536841392517, |
| "learning_rate": 0.00018799614023801866, |
| "loss": 2.9831, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.6045016077170418, |
| "grad_norm": 1.3098235130310059, |
| "learning_rate": 0.00018798327436474752, |
| "loss": 3.1046, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6051446945337621, |
| "grad_norm": 1.2065914869308472, |
| "learning_rate": 0.00018797040849147635, |
| "loss": 2.9651, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.6057877813504823, |
| "grad_norm": 1.3447399139404297, |
| "learning_rate": 0.0001879575426182052, |
| "loss": 3.1248, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.6064308681672026, |
| "grad_norm": 1.2985327243804932, |
| "learning_rate": 0.00018794467674493407, |
| "loss": 3.1018, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.6070739549839228, |
| "grad_norm": 1.3033169507980347, |
| "learning_rate": 0.0001879318108716629, |
| "loss": 2.9986, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.6077170418006431, |
| "grad_norm": 1.3129827976226807, |
| "learning_rate": 0.00018791894499839177, |
| "loss": 3.0661, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.6083601286173633, |
| "grad_norm": 1.1535412073135376, |
| "learning_rate": 0.00018790607912512063, |
| "loss": 3.0727, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.6090032154340836, |
| "grad_norm": 1.2947921752929688, |
| "learning_rate": 0.00018789321325184949, |
| "loss": 2.997, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.6096463022508039, |
| "grad_norm": 1.1528445482254028, |
| "learning_rate": 0.00018788034737857835, |
| "loss": 3.0687, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.6102893890675242, |
| "grad_norm": 1.1423406600952148, |
| "learning_rate": 0.00018786748150530718, |
| "loss": 3.0846, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.6109324758842444, |
| "grad_norm": 1.1380009651184082, |
| "learning_rate": 0.00018785461563203604, |
| "loss": 3.0174, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6115755627009646, |
| "grad_norm": 1.1382924318313599, |
| "learning_rate": 0.0001878417497587649, |
| "loss": 3.0143, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.6122186495176849, |
| "grad_norm": 1.2278555631637573, |
| "learning_rate": 0.00018782888388549373, |
| "loss": 3.0719, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.6128617363344051, |
| "grad_norm": 1.331308126449585, |
| "learning_rate": 0.0001878160180122226, |
| "loss": 3.0604, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.6135048231511254, |
| "grad_norm": 1.3055458068847656, |
| "learning_rate": 0.00018780315213895142, |
| "loss": 3.2065, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.6141479099678456, |
| "grad_norm": 1.0813393592834473, |
| "learning_rate": 0.00018779028626568028, |
| "loss": 3.0412, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.614790996784566, |
| "grad_norm": 1.3665434122085571, |
| "learning_rate": 0.00018777742039240914, |
| "loss": 3.1002, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.6154340836012862, |
| "grad_norm": 1.0831019878387451, |
| "learning_rate": 0.000187764554519138, |
| "loss": 2.9718, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.6160771704180065, |
| "grad_norm": 1.2719820737838745, |
| "learning_rate": 0.00018775168864586686, |
| "loss": 3.0464, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.6167202572347267, |
| "grad_norm": 1.290090560913086, |
| "learning_rate": 0.0001877388227725957, |
| "loss": 3.083, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.617363344051447, |
| "grad_norm": 1.1994895935058594, |
| "learning_rate": 0.00018772595689932456, |
| "loss": 2.9636, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6180064308681672, |
| "grad_norm": 1.330077886581421, |
| "learning_rate": 0.00018771309102605342, |
| "loss": 3.0226, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.6186495176848874, |
| "grad_norm": 1.1761059761047363, |
| "learning_rate": 0.00018770022515278225, |
| "loss": 3.0349, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.6192926045016077, |
| "grad_norm": 1.1384966373443604, |
| "learning_rate": 0.0001876873592795111, |
| "loss": 3.0769, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.6199356913183279, |
| "grad_norm": 1.1724135875701904, |
| "learning_rate": 0.00018767449340623994, |
| "loss": 3.0703, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.6205787781350482, |
| "grad_norm": 1.1943421363830566, |
| "learning_rate": 0.0001876616275329688, |
| "loss": 3.0188, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.6212218649517685, |
| "grad_norm": 1.1089471578598022, |
| "learning_rate": 0.00018764876165969766, |
| "loss": 3.088, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.6218649517684888, |
| "grad_norm": 1.1156290769577026, |
| "learning_rate": 0.0001876358957864265, |
| "loss": 3.0223, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.622508038585209, |
| "grad_norm": 1.3356914520263672, |
| "learning_rate": 0.00018762302991315536, |
| "loss": 3.0365, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.6231511254019293, |
| "grad_norm": 1.2664483785629272, |
| "learning_rate": 0.00018761016403988422, |
| "loss": 3.1077, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.6237942122186495, |
| "grad_norm": 1.211126685142517, |
| "learning_rate": 0.00018759729816661308, |
| "loss": 3.1502, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6244372990353698, |
| "grad_norm": 1.1281132698059082, |
| "learning_rate": 0.00018758443229334194, |
| "loss": 2.9797, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.62508038585209, |
| "grad_norm": 1.2974032163619995, |
| "learning_rate": 0.00018757156642007077, |
| "loss": 3.0456, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.6257234726688103, |
| "grad_norm": 1.1352269649505615, |
| "learning_rate": 0.00018755870054679963, |
| "loss": 3.0097, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.6263665594855305, |
| "grad_norm": 1.256424903869629, |
| "learning_rate": 0.00018754583467352846, |
| "loss": 2.9262, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.6270096463022508, |
| "grad_norm": 1.1779004335403442, |
| "learning_rate": 0.00018753296880025732, |
| "loss": 2.931, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.6276527331189711, |
| "grad_norm": 1.3205631971359253, |
| "learning_rate": 0.00018752010292698618, |
| "loss": 2.996, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.6282958199356913, |
| "grad_norm": 1.3165156841278076, |
| "learning_rate": 0.00018750723705371502, |
| "loss": 2.9635, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.6289389067524116, |
| "grad_norm": 1.3659495115280151, |
| "learning_rate": 0.00018749437118044388, |
| "loss": 2.9997, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.6295819935691318, |
| "grad_norm": 1.2277815341949463, |
| "learning_rate": 0.00018748150530717274, |
| "loss": 3.0684, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.6302250803858521, |
| "grad_norm": 1.3968592882156372, |
| "learning_rate": 0.0001874686394339016, |
| "loss": 3.018, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6308681672025723, |
| "grad_norm": 1.1932144165039062, |
| "learning_rate": 0.00018745577356063046, |
| "loss": 2.9842, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.6315112540192926, |
| "grad_norm": 1.1522760391235352, |
| "learning_rate": 0.0001874429076873593, |
| "loss": 3.0659, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.6321543408360129, |
| "grad_norm": 1.3020713329315186, |
| "learning_rate": 0.00018743004181408815, |
| "loss": 2.915, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.6327974276527332, |
| "grad_norm": 1.2788695096969604, |
| "learning_rate": 0.000187417175940817, |
| "loss": 2.9683, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.6334405144694534, |
| "grad_norm": 1.1550734043121338, |
| "learning_rate": 0.00018740431006754584, |
| "loss": 3.1207, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.6340836012861736, |
| "grad_norm": 1.3394757509231567, |
| "learning_rate": 0.0001873914441942747, |
| "loss": 3.0282, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6347266881028939, |
| "grad_norm": 1.2279311418533325, |
| "learning_rate": 0.00018737857832100353, |
| "loss": 2.9223, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.6353697749196141, |
| "grad_norm": 1.3444873094558716, |
| "learning_rate": 0.0001873657124477324, |
| "loss": 2.9301, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6360128617363344, |
| "grad_norm": 1.389906883239746, |
| "learning_rate": 0.00018735284657446125, |
| "loss": 3.0233, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.6366559485530546, |
| "grad_norm": 1.3237117528915405, |
| "learning_rate": 0.0001873399807011901, |
| "loss": 3.0986, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.637299035369775, |
| "grad_norm": 1.294018268585205, |
| "learning_rate": 0.00018732711482791895, |
| "loss": 2.9491, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.6379421221864952, |
| "grad_norm": 1.852734923362732, |
| "learning_rate": 0.0001873142489546478, |
| "loss": 3.0225, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.6385852090032155, |
| "grad_norm": 1.240017294883728, |
| "learning_rate": 0.00018730138308137667, |
| "loss": 2.9521, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.6392282958199357, |
| "grad_norm": 1.3555036783218384, |
| "learning_rate": 0.00018728851720810553, |
| "loss": 3.013, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.639871382636656, |
| "grad_norm": 1.4047218561172485, |
| "learning_rate": 0.00018727565133483436, |
| "loss": 3.0727, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.6405144694533762, |
| "grad_norm": 1.4910492897033691, |
| "learning_rate": 0.00018726278546156322, |
| "loss": 3.012, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.6411575562700965, |
| "grad_norm": 1.2978317737579346, |
| "learning_rate": 0.00018724991958829205, |
| "loss": 3.0042, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.6418006430868167, |
| "grad_norm": 1.492448329925537, |
| "learning_rate": 0.0001872370537150209, |
| "loss": 2.9069, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.6424437299035369, |
| "grad_norm": 1.340957522392273, |
| "learning_rate": 0.00018722418784174977, |
| "loss": 2.9449, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.6430868167202572, |
| "grad_norm": 1.2786818742752075, |
| "learning_rate": 0.0001872113219684786, |
| "loss": 2.9224, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6430868167202572, |
| "eval_loss": 2.9839203357696533, |
| "eval_runtime": 5.4642, |
| "eval_samples_per_second": 91.505, |
| "eval_steps_per_second": 45.753, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 15550, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1051904803307520.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|