Invalid JSON:Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 29.12, | |
| "eval_steps": 100, | |
| "global_step": 182000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 1.1145988702774048, | |
| "learning_rate": 5.94e-05, | |
| "loss": 129.2138, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.3314463794231415, | |
| "learning_rate": 0.0001194, | |
| "loss": 147.1265, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.30200499296188354, | |
| "learning_rate": 0.00017939999999999997, | |
| "loss": 147.1375, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.20890414714813232, | |
| "learning_rate": 0.0002394, | |
| "loss": 141.107, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.19977182149887085, | |
| "learning_rate": 0.00029939999999999996, | |
| "loss": 130.2311, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.1718936711549759, | |
| "learning_rate": 0.00029999762390495616, | |
| "loss": 116.9488, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.21659506857395172, | |
| "learning_rate": 0.00029999522380895233, | |
| "loss": 106.3702, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.19612713158130646, | |
| "learning_rate": 0.0002999928237129485, | |
| "loss": 98.8033, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.18958421051502228, | |
| "learning_rate": 0.00029999042361694467, | |
| "loss": 94.6761, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.25341877341270447, | |
| "learning_rate": 0.00029998802352094084, | |
| "loss": 88.2629, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.1762186735868454, | |
| "learning_rate": 0.000299985623424937, | |
| "loss": 87.4362, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.23407000303268433, | |
| "learning_rate": 0.0002999832233289331, | |
| "loss": 85.7211, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.23202084004878998, | |
| "learning_rate": 0.0002999808232329293, | |
| "loss": 81.4749, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.1819111853837967, | |
| "learning_rate": 0.00029997842313692546, | |
| "loss": 80.3999, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.16154050827026367, | |
| "learning_rate": 0.00029997602304092163, | |
| "loss": 80.5113, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.20147816836833954, | |
| "learning_rate": 0.0002999736229449178, | |
| "loss": 77.4306, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.2032860815525055, | |
| "learning_rate": 0.0002999712228489139, | |
| "loss": 76.3299, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.20103086531162262, | |
| "learning_rate": 0.0002999688227529101, | |
| "loss": 77.0755, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.1930929720401764, | |
| "learning_rate": 0.00029996642265690625, | |
| "loss": 74.2643, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.21013671159744263, | |
| "learning_rate": 0.0002999640225609024, | |
| "loss": 75.9168, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.2554585635662079, | |
| "learning_rate": 0.0002999616224648986, | |
| "loss": 75.2005, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.21000510454177856, | |
| "learning_rate": 0.00029995922236889476, | |
| "loss": 74.1565, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.2096049040555954, | |
| "learning_rate": 0.0002999568222728909, | |
| "loss": 73.3684, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.2806188464164734, | |
| "learning_rate": 0.00029995442217688705, | |
| "loss": 73.9772, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.17476481199264526, | |
| "learning_rate": 0.0002999520220808832, | |
| "loss": 73.7125, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.26867198944091797, | |
| "learning_rate": 0.0002999496219848794, | |
| "loss": 72.5119, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.1896703690290451, | |
| "learning_rate": 0.00029994722188887555, | |
| "loss": 72.6918, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.2521280348300934, | |
| "learning_rate": 0.00029994482179287167, | |
| "loss": 72.1229, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.20409554243087769, | |
| "learning_rate": 0.00029994242169686784, | |
| "loss": 72.3524, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.1911861002445221, | |
| "learning_rate": 0.000299940021600864, | |
| "loss": 70.9714, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.21338903903961182, | |
| "learning_rate": 0.0002999376215048602, | |
| "loss": 69.5716, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.20922720432281494, | |
| "learning_rate": 0.00029993522140885634, | |
| "loss": 70.1812, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.2678331434726715, | |
| "learning_rate": 0.0002999328213128525, | |
| "loss": 68.8041, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.25610026717185974, | |
| "learning_rate": 0.00029993042121684863, | |
| "loss": 71.186, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.23267875611782074, | |
| "learning_rate": 0.0002999280211208448, | |
| "loss": 68.9921, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.23876765370368958, | |
| "learning_rate": 0.00029992562102484097, | |
| "loss": 69.738, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.1865028291940689, | |
| "learning_rate": 0.00029992322092883714, | |
| "loss": 68.9813, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.21735595166683197, | |
| "learning_rate": 0.0002999208208328333, | |
| "loss": 67.5755, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.16909943521022797, | |
| "learning_rate": 0.0002999184207368294, | |
| "loss": 66.3015, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.19918648898601532, | |
| "learning_rate": 0.0002999160206408256, | |
| "loss": 67.3844, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.22282840311527252, | |
| "learning_rate": 0.00029991362054482176, | |
| "loss": 66.0008, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.19900047779083252, | |
| "learning_rate": 0.00029991122044881793, | |
| "loss": 66.029, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.2067142128944397, | |
| "learning_rate": 0.0002999088203528141, | |
| "loss": 65.7196, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.24062038958072662, | |
| "learning_rate": 0.00029990642025681027, | |
| "loss": 66.7571, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.2454902082681656, | |
| "learning_rate": 0.0002999040201608064, | |
| "loss": 65.7736, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.24499955773353577, | |
| "learning_rate": 0.00029990162006480255, | |
| "loss": 65.498, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.2421354055404663, | |
| "learning_rate": 0.0002998992199687987, | |
| "loss": 65.9207, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.1900254338979721, | |
| "learning_rate": 0.0002998968198727949, | |
| "loss": 63.4017, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.21995197236537933, | |
| "learning_rate": 0.00029989441977679106, | |
| "loss": 65.4319, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.2170778065919876, | |
| "learning_rate": 0.00029989201968078717, | |
| "loss": 64.1503, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.29141783714294434, | |
| "learning_rate": 0.00029988961958478334, | |
| "loss": 63.4509, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.2149534821510315, | |
| "learning_rate": 0.0002998872194887795, | |
| "loss": 63.8549, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.2090325504541397, | |
| "learning_rate": 0.0002998848193927757, | |
| "loss": 62.5135, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.19093327224254608, | |
| "learning_rate": 0.00029988241929677185, | |
| "loss": 64.1856, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.24676312506198883, | |
| "learning_rate": 0.000299880019200768, | |
| "loss": 62.8992, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.2047237902879715, | |
| "learning_rate": 0.00029987761910476413, | |
| "loss": 63.5, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.2169736623764038, | |
| "learning_rate": 0.0002998752190087603, | |
| "loss": 63.2706, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.2212333083152771, | |
| "learning_rate": 0.00029987281891275647, | |
| "loss": 62.8563, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.22105100750923157, | |
| "learning_rate": 0.00029987041881675264, | |
| "loss": 61.4049, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.21934692561626434, | |
| "learning_rate": 0.0002998680187207488, | |
| "loss": 61.2102, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.231471449136734, | |
| "learning_rate": 0.0002998656186247449, | |
| "loss": 61.161, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.20244845747947693, | |
| "learning_rate": 0.0002998632185287411, | |
| "loss": 61.5284, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.008, | |
| "grad_norm": 0.31659385561943054, | |
| "learning_rate": 0.00029986081843273726, | |
| "loss": 59.6197, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.024, | |
| "grad_norm": 0.22351042926311493, | |
| "learning_rate": 0.00029985841833673343, | |
| "loss": 60.8731, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.20470276474952698, | |
| "learning_rate": 0.0002998560182407296, | |
| "loss": 60.5648, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.056, | |
| "grad_norm": 0.17768125236034393, | |
| "learning_rate": 0.00029985361814472577, | |
| "loss": 59.2689, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.072, | |
| "grad_norm": 0.20775848627090454, | |
| "learning_rate": 0.0002998512180487219, | |
| "loss": 58.2776, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.088, | |
| "grad_norm": 0.2682810127735138, | |
| "learning_rate": 0.00029984881795271806, | |
| "loss": 60.5164, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.104, | |
| "grad_norm": 0.22458679974079132, | |
| "learning_rate": 0.0002998464178567142, | |
| "loss": 60.1217, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.22781415283679962, | |
| "learning_rate": 0.0002998440177607104, | |
| "loss": 58.191, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.1360000000000001, | |
| "grad_norm": 0.2532273232936859, | |
| "learning_rate": 0.00029984161766470656, | |
| "loss": 58.8972, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.152, | |
| "grad_norm": 0.2014983743429184, | |
| "learning_rate": 0.00029983921756870273, | |
| "loss": 58.7748, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.168, | |
| "grad_norm": 0.19773030281066895, | |
| "learning_rate": 0.0002998368174726989, | |
| "loss": 57.9689, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.184, | |
| "grad_norm": 0.245356023311615, | |
| "learning_rate": 0.00029983441737669507, | |
| "loss": 57.855, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.2565186023712158, | |
| "learning_rate": 0.00029983201728069124, | |
| "loss": 56.8152, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.216, | |
| "grad_norm": 0.17781591415405273, | |
| "learning_rate": 0.00029982961718468735, | |
| "loss": 55.2139, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.232, | |
| "grad_norm": 0.21849973499774933, | |
| "learning_rate": 0.0002998272170886835, | |
| "loss": 55.9843, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.248, | |
| "grad_norm": 0.17623578011989594, | |
| "learning_rate": 0.0002998248169926797, | |
| "loss": 57.3084, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.264, | |
| "grad_norm": 0.22286267578601837, | |
| "learning_rate": 0.00029982241689667586, | |
| "loss": 56.4191, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.20891787111759186, | |
| "learning_rate": 0.00029982001680067203, | |
| "loss": 56.4775, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.296, | |
| "grad_norm": 0.19925983250141144, | |
| "learning_rate": 0.00029981761670466815, | |
| "loss": 55.0521, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.312, | |
| "grad_norm": 0.22015956044197083, | |
| "learning_rate": 0.0002998152166086643, | |
| "loss": 55.6771, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.328, | |
| "grad_norm": 0.24997876584529877, | |
| "learning_rate": 0.0002998128165126605, | |
| "loss": 53.8931, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.3439999999999999, | |
| "grad_norm": 0.2933981418609619, | |
| "learning_rate": 0.00029981041641665665, | |
| "loss": 56.6028, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.1963578313589096, | |
| "learning_rate": 0.0002998080163206528, | |
| "loss": 54.5404, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.376, | |
| "grad_norm": 0.21487855911254883, | |
| "learning_rate": 0.000299805616224649, | |
| "loss": 54.2586, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.392, | |
| "grad_norm": 0.21776583790779114, | |
| "learning_rate": 0.0002998032161286451, | |
| "loss": 53.9896, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.408, | |
| "grad_norm": 0.2172229140996933, | |
| "learning_rate": 0.0002998008160326413, | |
| "loss": 53.8424, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.424, | |
| "grad_norm": 0.23105138540267944, | |
| "learning_rate": 0.00029979841593663745, | |
| "loss": 54.1874, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.18797878921031952, | |
| "learning_rate": 0.0002997960158406336, | |
| "loss": 53.3869, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.456, | |
| "grad_norm": 0.20597319304943085, | |
| "learning_rate": 0.0002997936157446298, | |
| "loss": 53.7132, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.472, | |
| "grad_norm": 0.21674391627311707, | |
| "learning_rate": 0.00029979121564862595, | |
| "loss": 52.2728, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.488, | |
| "grad_norm": 0.2250959277153015, | |
| "learning_rate": 0.00029978881555262207, | |
| "loss": 53.3457, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.504, | |
| "grad_norm": 0.19289842247962952, | |
| "learning_rate": 0.00029978641545661824, | |
| "loss": 52.898, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.2215307652950287, | |
| "learning_rate": 0.0002997840153606144, | |
| "loss": 52.8446, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.536, | |
| "grad_norm": 0.19949446618556976, | |
| "learning_rate": 0.0002997816152646106, | |
| "loss": 51.9649, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.552, | |
| "grad_norm": 0.1753661036491394, | |
| "learning_rate": 0.00029977921516860675, | |
| "loss": 51.5562, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.568, | |
| "grad_norm": 0.22938130795955658, | |
| "learning_rate": 0.00029977681507260286, | |
| "loss": 52.4538, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.584, | |
| "grad_norm": 0.255227655172348, | |
| "learning_rate": 0.00029977441497659903, | |
| "loss": 50.8902, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.24369871616363525, | |
| "learning_rate": 0.0002997720148805952, | |
| "loss": 50.8092, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.616, | |
| "grad_norm": 0.22126376628875732, | |
| "learning_rate": 0.0002997696387855514, | |
| "loss": 51.0513, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.6320000000000001, | |
| "grad_norm": 0.199215367436409, | |
| "learning_rate": 0.00029976723868954756, | |
| "loss": 49.6234, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.6480000000000001, | |
| "grad_norm": 0.22058773040771484, | |
| "learning_rate": 0.0002997648385935437, | |
| "loss": 51.2333, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.6640000000000001, | |
| "grad_norm": 0.26106688380241394, | |
| "learning_rate": 0.0002997624384975399, | |
| "loss": 49.6582, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.23437049984931946, | |
| "learning_rate": 0.00029976003840153606, | |
| "loss": 49.6097, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.696, | |
| "grad_norm": 0.1709340363740921, | |
| "learning_rate": 0.00029975763830553223, | |
| "loss": 49.9149, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.712, | |
| "grad_norm": 0.2278878539800644, | |
| "learning_rate": 0.00029975523820952835, | |
| "loss": 50.2495, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.728, | |
| "grad_norm": 0.25324809551239014, | |
| "learning_rate": 0.0002997528381135245, | |
| "loss": 48.3701, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.744, | |
| "grad_norm": 0.21413564682006836, | |
| "learning_rate": 0.0002997504380175207, | |
| "loss": 48.8447, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.2975509464740753, | |
| "learning_rate": 0.00029974803792151686, | |
| "loss": 50.0095, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.776, | |
| "grad_norm": 0.19792191684246063, | |
| "learning_rate": 0.00029974566182647304, | |
| "loss": 49.2986, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.792, | |
| "grad_norm": 0.2350345253944397, | |
| "learning_rate": 0.0002997432617304692, | |
| "loss": 48.7027, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.808, | |
| "grad_norm": 0.19396322965621948, | |
| "learning_rate": 0.00029974086163446533, | |
| "loss": 47.9713, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.8239999999999998, | |
| "grad_norm": 0.2414630949497223, | |
| "learning_rate": 0.0002997384615384615, | |
| "loss": 48.7363, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.2678147554397583, | |
| "learning_rate": 0.00029973606144245767, | |
| "loss": 48.4818, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.8559999999999999, | |
| "grad_norm": 0.19563674926757812, | |
| "learning_rate": 0.00029973366134645384, | |
| "loss": 48.2693, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.8719999999999999, | |
| "grad_norm": 0.22531713545322418, | |
| "learning_rate": 0.00029973126125045, | |
| "loss": 47.758, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.888, | |
| "grad_norm": 0.22199738025665283, | |
| "learning_rate": 0.0002997288611544461, | |
| "loss": 46.9644, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.904, | |
| "grad_norm": 0.253896027803421, | |
| "learning_rate": 0.0002997264610584423, | |
| "loss": 46.5968, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.18806882202625275, | |
| "learning_rate": 0.00029972406096243846, | |
| "loss": 48.2712, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.936, | |
| "grad_norm": 0.22023610770702362, | |
| "learning_rate": 0.00029972166086643463, | |
| "loss": 47.2612, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.952, | |
| "grad_norm": 0.213795468211174, | |
| "learning_rate": 0.0002997192607704308, | |
| "loss": 45.9592, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.968, | |
| "grad_norm": 0.19787845015525818, | |
| "learning_rate": 0.00029971686067442697, | |
| "loss": 47.5647, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.984, | |
| "grad_norm": 0.19648146629333496, | |
| "learning_rate": 0.0002997144605784231, | |
| "loss": 46.8397, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.1904546618461609, | |
| "learning_rate": 0.00029971206048241925, | |
| "loss": 46.2783, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.016, | |
| "grad_norm": 0.23515231907367706, | |
| "learning_rate": 0.0002997096603864154, | |
| "loss": 46.5475, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.032, | |
| "grad_norm": 0.21483579277992249, | |
| "learning_rate": 0.0002997072602904116, | |
| "loss": 44.2442, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.048, | |
| "grad_norm": 0.2563657760620117, | |
| "learning_rate": 0.00029970486019440776, | |
| "loss": 46.1955, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 0.20812326669692993, | |
| "learning_rate": 0.00029970246009840387, | |
| "loss": 45.5704, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.2190365344285965, | |
| "learning_rate": 0.00029970006000240004, | |
| "loss": 45.7909, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.096, | |
| "grad_norm": 0.2379041463136673, | |
| "learning_rate": 0.0002996976599063962, | |
| "loss": 46.2324, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.112, | |
| "grad_norm": 0.2170909345149994, | |
| "learning_rate": 0.0002996952598103924, | |
| "loss": 44.766, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.128, | |
| "grad_norm": 0.15927261114120483, | |
| "learning_rate": 0.00029969285971438855, | |
| "loss": 43.669, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 0.22271278500556946, | |
| "learning_rate": 0.0002996904596183847, | |
| "loss": 45.0739, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.17792785167694092, | |
| "learning_rate": 0.0002996880595223809, | |
| "loss": 43.8963, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.176, | |
| "grad_norm": 0.28457048535346985, | |
| "learning_rate": 0.00029968565942637706, | |
| "loss": 44.6317, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.192, | |
| "grad_norm": 0.19491800665855408, | |
| "learning_rate": 0.0002996832593303732, | |
| "loss": 43.8541, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.208, | |
| "grad_norm": 0.21633195877075195, | |
| "learning_rate": 0.00029968085923436934, | |
| "loss": 43.2844, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 0.2146127074956894, | |
| "learning_rate": 0.0002996784591383655, | |
| "loss": 45.0415, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.2204289436340332, | |
| "learning_rate": 0.0002996760590423617, | |
| "loss": 44.2757, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.2560000000000002, | |
| "grad_norm": 0.3051868677139282, | |
| "learning_rate": 0.00029967365894635785, | |
| "loss": 42.7227, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 2.2720000000000002, | |
| "grad_norm": 0.23641665279865265, | |
| "learning_rate": 0.000299671258850354, | |
| "loss": 44.0578, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 2.288, | |
| "grad_norm": 0.18554934859275818, | |
| "learning_rate": 0.0002996688587543502, | |
| "loss": 42.5159, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 0.24741467833518982, | |
| "learning_rate": 0.0002996664586583463, | |
| "loss": 42.9106, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.18483412265777588, | |
| "learning_rate": 0.00029966405856234247, | |
| "loss": 42.2459, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.336, | |
| "grad_norm": 0.24359823763370514, | |
| "learning_rate": 0.00029966165846633864, | |
| "loss": 42.6733, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 2.352, | |
| "grad_norm": 0.20456752181053162, | |
| "learning_rate": 0.0002996592583703348, | |
| "loss": 41.5754, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 2.368, | |
| "grad_norm": 0.24165822565555573, | |
| "learning_rate": 0.000299656858274331, | |
| "loss": 43.6988, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 0.20422741770744324, | |
| "learning_rate": 0.0002996544581783271, | |
| "loss": 41.9116, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.2413185089826584, | |
| "learning_rate": 0.00029965205808232326, | |
| "loss": 41.8573, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.416, | |
| "grad_norm": 0.20443005859851837, | |
| "learning_rate": 0.00029964968198727945, | |
| "loss": 42.3368, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 2.432, | |
| "grad_norm": 0.21270470321178436, | |
| "learning_rate": 0.0002996472818912756, | |
| "loss": 40.336, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 2.448, | |
| "grad_norm": 0.21689313650131226, | |
| "learning_rate": 0.0002996448817952718, | |
| "loss": 40.5125, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 0.25577059388160706, | |
| "learning_rate": 0.00029964248169926796, | |
| "loss": 40.5761, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.2624509930610657, | |
| "learning_rate": 0.0002996400816032641, | |
| "loss": 40.3047, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.496, | |
| "grad_norm": 0.225455641746521, | |
| "learning_rate": 0.00029963768150726024, | |
| "loss": 40.3576, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 2.512, | |
| "grad_norm": 0.18313691020011902, | |
| "learning_rate": 0.0002996352814112564, | |
| "loss": 41.113, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 2.528, | |
| "grad_norm": 0.21272344887256622, | |
| "learning_rate": 0.0002996328813152526, | |
| "loss": 41.2563, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 0.23525486886501312, | |
| "learning_rate": 0.00029963048121924875, | |
| "loss": 41.2227, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.226985365152359, | |
| "learning_rate": 0.00029962808112324487, | |
| "loss": 40.6251, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.576, | |
| "grad_norm": 0.20422585308551788, | |
| "learning_rate": 0.00029962568102724103, | |
| "loss": 40.6449, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 2.592, | |
| "grad_norm": 0.18906068801879883, | |
| "learning_rate": 0.0002996232809312372, | |
| "loss": 39.5927, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 2.608, | |
| "grad_norm": 0.21180450916290283, | |
| "learning_rate": 0.0002996208808352334, | |
| "loss": 39.7467, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 0.2399897575378418, | |
| "learning_rate": 0.00029961848073922954, | |
| "loss": 38.9522, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.1941596120595932, | |
| "learning_rate": 0.0002996160806432257, | |
| "loss": 39.5798, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.656, | |
| "grad_norm": 0.19715790450572968, | |
| "learning_rate": 0.0002996136805472218, | |
| "loss": 39.9061, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 2.672, | |
| "grad_norm": 0.22090336680412292, | |
| "learning_rate": 0.00029961128045121805, | |
| "loss": 39.6083, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 2.6879999999999997, | |
| "grad_norm": 0.26035964488983154, | |
| "learning_rate": 0.00029960890435617424, | |
| "loss": 39.3414, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 0.21888568997383118, | |
| "learning_rate": 0.00029960650426017035, | |
| "loss": 38.3817, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.29924601316452026, | |
| "learning_rate": 0.0002996041041641665, | |
| "loss": 38.3896, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.7359999999999998, | |
| "grad_norm": 0.20395514369010925, | |
| "learning_rate": 0.0002996017040681627, | |
| "loss": 38.8915, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 2.752, | |
| "grad_norm": 0.20730023086071014, | |
| "learning_rate": 0.00029959930397215886, | |
| "loss": 38.9281, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 2.768, | |
| "grad_norm": 0.23472309112548828, | |
| "learning_rate": 0.00029959690387615503, | |
| "loss": 39.371, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 0.2272721529006958, | |
| "learning_rate": 0.0002995945037801512, | |
| "loss": 38.7238, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.20280113816261292, | |
| "learning_rate": 0.0002995921036841473, | |
| "loss": 38.1639, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.816, | |
| "grad_norm": 0.21985846757888794, | |
| "learning_rate": 0.0002995897035881435, | |
| "loss": 38.2459, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 2.832, | |
| "grad_norm": 0.22791948914527893, | |
| "learning_rate": 0.00029958730349213965, | |
| "loss": 38.365, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 2.848, | |
| "grad_norm": 0.218161940574646, | |
| "learning_rate": 0.0002995849033961358, | |
| "loss": 37.7998, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 0.23389916121959686, | |
| "learning_rate": 0.000299582503300132, | |
| "loss": 38.0078, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.20153094828128815, | |
| "learning_rate": 0.0002995801032041281, | |
| "loss": 37.1053, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.896, | |
| "grad_norm": 0.231399804353714, | |
| "learning_rate": 0.0002995777031081243, | |
| "loss": 37.6589, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 2.912, | |
| "grad_norm": 0.19814245402812958, | |
| "learning_rate": 0.00029957530301212044, | |
| "loss": 36.8171, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 2.928, | |
| "grad_norm": 0.22390811145305634, | |
| "learning_rate": 0.0002995729029161166, | |
| "loss": 36.6616, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 0.19958479702472687, | |
| "learning_rate": 0.0002995705028201128, | |
| "loss": 36.0232, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.1972126066684723, | |
| "learning_rate": 0.00029956810272410895, | |
| "loss": 36.5331, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.976, | |
| "grad_norm": 0.18196193873882294, | |
| "learning_rate": 0.00029956570262810507, | |
| "loss": 36.8888, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 2.992, | |
| "grad_norm": 0.17047256231307983, | |
| "learning_rate": 0.00029956330253210124, | |
| "loss": 36.5987, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 3.008, | |
| "grad_norm": 0.22138766944408417, | |
| "learning_rate": 0.0002995609024360974, | |
| "loss": 36.2777, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 3.024, | |
| "grad_norm": 0.22713051736354828, | |
| "learning_rate": 0.0002995585023400936, | |
| "loss": 35.768, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.1997511237859726, | |
| "learning_rate": 0.00029955610224408974, | |
| "loss": 35.872, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.056, | |
| "grad_norm": 0.19796296954154968, | |
| "learning_rate": 0.00029955370214808586, | |
| "loss": 34.8971, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 3.072, | |
| "grad_norm": 0.1922471821308136, | |
| "learning_rate": 0.00029955130205208203, | |
| "loss": 35.4181, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 3.088, | |
| "grad_norm": 0.18493038415908813, | |
| "learning_rate": 0.0002995489019560782, | |
| "loss": 36.3712, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 3.104, | |
| "grad_norm": 0.22148194909095764, | |
| "learning_rate": 0.00029954650186007437, | |
| "loss": 34.5266, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.19701820611953735, | |
| "learning_rate": 0.00029954410176407054, | |
| "loss": 35.2642, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.136, | |
| "grad_norm": 0.1763058602809906, | |
| "learning_rate": 0.0002995417016680667, | |
| "loss": 36.1582, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 3.152, | |
| "grad_norm": 0.2792583107948303, | |
| "learning_rate": 0.0002995393015720628, | |
| "loss": 34.755, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 3.168, | |
| "grad_norm": 0.20418234169483185, | |
| "learning_rate": 0.00029953690147605904, | |
| "loss": 34.5373, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 3.184, | |
| "grad_norm": 0.24839259684085846, | |
| "learning_rate": 0.0002995345013800552, | |
| "loss": 34.5007, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.22200001776218414, | |
| "learning_rate": 0.00029953210128405133, | |
| "loss": 34.8183, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.216, | |
| "grad_norm": 0.2371726781129837, | |
| "learning_rate": 0.0002995297011880475, | |
| "loss": 34.0164, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 3.232, | |
| "grad_norm": 0.21370230615139008, | |
| "learning_rate": 0.00029952730109204367, | |
| "loss": 34.8268, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 3.248, | |
| "grad_norm": 0.20940592885017395, | |
| "learning_rate": 0.00029952490099603983, | |
| "loss": 33.8475, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 3.2640000000000002, | |
| "grad_norm": 0.18580414354801178, | |
| "learning_rate": 0.000299522500900036, | |
| "loss": 33.8718, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 0.2200319468975067, | |
| "learning_rate": 0.0002995201008040322, | |
| "loss": 33.9083, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.296, | |
| "grad_norm": 0.18141067028045654, | |
| "learning_rate": 0.0002995177007080283, | |
| "loss": 33.2878, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 3.312, | |
| "grad_norm": 0.24104055762290955, | |
| "learning_rate": 0.00029951530061202446, | |
| "loss": 34.4549, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 3.328, | |
| "grad_norm": 0.22455894947052002, | |
| "learning_rate": 0.0002995129005160206, | |
| "loss": 33.2184, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 3.344, | |
| "grad_norm": 0.19662746787071228, | |
| "learning_rate": 0.0002995105244209768, | |
| "loss": 33.836, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.2322922796010971, | |
| "learning_rate": 0.000299508124324973, | |
| "loss": 33.1089, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.376, | |
| "grad_norm": 0.2140241116285324, | |
| "learning_rate": 0.0002995057482299292, | |
| "loss": 32.8205, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 3.392, | |
| "grad_norm": 0.19320878386497498, | |
| "learning_rate": 0.00029950334813392534, | |
| "loss": 32.8251, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 3.408, | |
| "grad_norm": 0.18298691511154175, | |
| "learning_rate": 0.0002995009480379215, | |
| "loss": 33.2469, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 3.424, | |
| "grad_norm": 0.22385163605213165, | |
| "learning_rate": 0.0002994985479419177, | |
| "loss": 32.4997, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.2047736793756485, | |
| "learning_rate": 0.0002994961478459138, | |
| "loss": 33.5516, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.456, | |
| "grad_norm": 0.242600679397583, | |
| "learning_rate": 0.00029949374774990996, | |
| "loss": 33.4754, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 3.472, | |
| "grad_norm": 0.21438950300216675, | |
| "learning_rate": 0.00029949134765390613, | |
| "loss": 33.2636, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 3.488, | |
| "grad_norm": 0.16991284489631653, | |
| "learning_rate": 0.0002994889475579023, | |
| "loss": 32.2435, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 3.504, | |
| "grad_norm": 0.21854659914970398, | |
| "learning_rate": 0.00029948654746189847, | |
| "loss": 32.986, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.22860901057720184, | |
| "learning_rate": 0.0002994841473658946, | |
| "loss": 32.1887, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.536, | |
| "grad_norm": 0.20433278381824493, | |
| "learning_rate": 0.00029948174726989076, | |
| "loss": 32.1502, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 3.552, | |
| "grad_norm": 0.19475246965885162, | |
| "learning_rate": 0.0002994793471738869, | |
| "loss": 32.0844, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 3.568, | |
| "grad_norm": 0.20006608963012695, | |
| "learning_rate": 0.0002994769470778831, | |
| "loss": 32.5956, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 3.584, | |
| "grad_norm": 0.17535006999969482, | |
| "learning_rate": 0.00029947454698187926, | |
| "loss": 32.1812, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.22252418100833893, | |
| "learning_rate": 0.00029947214688587543, | |
| "loss": 30.6041, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.616, | |
| "grad_norm": 0.18110983073711395, | |
| "learning_rate": 0.00029946974678987155, | |
| "loss": 31.7236, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 3.632, | |
| "grad_norm": 0.227754145860672, | |
| "learning_rate": 0.0002994673466938677, | |
| "loss": 31.2323, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 3.648, | |
| "grad_norm": 0.19320198893547058, | |
| "learning_rate": 0.0002994649465978639, | |
| "loss": 31.4608, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 3.664, | |
| "grad_norm": 0.17932754755020142, | |
| "learning_rate": 0.00029946254650186006, | |
| "loss": 31.9613, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.19677236676216125, | |
| "learning_rate": 0.0002994601464058562, | |
| "loss": 30.9284, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.6959999999999997, | |
| "grad_norm": 0.22562915086746216, | |
| "learning_rate": 0.00029945774630985234, | |
| "loss": 30.7692, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 3.7119999999999997, | |
| "grad_norm": 0.19202880561351776, | |
| "learning_rate": 0.0002994553462138485, | |
| "loss": 31.2991, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 3.7279999999999998, | |
| "grad_norm": 0.22251880168914795, | |
| "learning_rate": 0.0002994529461178447, | |
| "loss": 29.574, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 3.7439999999999998, | |
| "grad_norm": 0.18705110251903534, | |
| "learning_rate": 0.00029945054602184085, | |
| "loss": 30.2693, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.18061533570289612, | |
| "learning_rate": 0.000299448145925837, | |
| "loss": 30.0086, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.776, | |
| "grad_norm": 0.23449186980724335, | |
| "learning_rate": 0.0002994457458298332, | |
| "loss": 29.9262, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 3.792, | |
| "grad_norm": 0.20259559154510498, | |
| "learning_rate": 0.0002994433457338293, | |
| "loss": 30.0139, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 3.808, | |
| "grad_norm": 0.21019335091114044, | |
| "learning_rate": 0.00029944094563782547, | |
| "loss": 30.853, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 3.824, | |
| "grad_norm": 0.17927643656730652, | |
| "learning_rate": 0.00029943854554182164, | |
| "loss": 30.7392, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.18862564861774445, | |
| "learning_rate": 0.0002994361454458178, | |
| "loss": 29.3096, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.856, | |
| "grad_norm": 0.22294782102108002, | |
| "learning_rate": 0.000299433745349814, | |
| "loss": 30.2642, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 3.872, | |
| "grad_norm": 0.20843671262264252, | |
| "learning_rate": 0.0002994313452538101, | |
| "loss": 29.4115, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 3.888, | |
| "grad_norm": 0.19081708788871765, | |
| "learning_rate": 0.00029942894515780626, | |
| "loss": 30.0382, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 3.904, | |
| "grad_norm": 0.18849343061447144, | |
| "learning_rate": 0.00029942654506180243, | |
| "loss": 29.6371, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.2084178924560547, | |
| "learning_rate": 0.0002994241449657986, | |
| "loss": 29.5353, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.936, | |
| "grad_norm": 0.179380401968956, | |
| "learning_rate": 0.00029942174486979477, | |
| "loss": 29.1119, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 3.952, | |
| "grad_norm": 0.2312467098236084, | |
| "learning_rate": 0.00029941934477379094, | |
| "loss": 29.3352, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 3.968, | |
| "grad_norm": 0.19268761575222015, | |
| "learning_rate": 0.00029941694467778705, | |
| "loss": 29.1584, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 3.984, | |
| "grad_norm": 0.19523601233959198, | |
| "learning_rate": 0.0002994145445817832, | |
| "loss": 29.3122, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.18007320165634155, | |
| "learning_rate": 0.0002994121444857794, | |
| "loss": 29.1468, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.016, | |
| "grad_norm": 0.19717352092266083, | |
| "learning_rate": 0.00029940974438977556, | |
| "loss": 29.2291, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 4.032, | |
| "grad_norm": 0.18931248784065247, | |
| "learning_rate": 0.00029940736829473175, | |
| "loss": 28.4476, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 4.048, | |
| "grad_norm": 0.17574016749858856, | |
| "learning_rate": 0.0002994049681987279, | |
| "loss": 27.6189, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 4.064, | |
| "grad_norm": 0.19395378232002258, | |
| "learning_rate": 0.0002994025681027241, | |
| "loss": 28.3701, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.1916889250278473, | |
| "learning_rate": 0.00029940016800672026, | |
| "loss": 28.3605, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.096, | |
| "grad_norm": 0.229524627327919, | |
| "learning_rate": 0.0002993977679107164, | |
| "loss": 27.7045, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 4.112, | |
| "grad_norm": 0.191976860165596, | |
| "learning_rate": 0.00029939536781471254, | |
| "loss": 27.6015, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 4.128, | |
| "grad_norm": 0.20611730217933655, | |
| "learning_rate": 0.0002993929917196688, | |
| "loss": 27.3844, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 4.144, | |
| "grad_norm": 0.21954050660133362, | |
| "learning_rate": 0.00029939059162366495, | |
| "loss": 27.6474, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.23369371891021729, | |
| "learning_rate": 0.00029938819152766107, | |
| "loss": 27.0846, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.176, | |
| "grad_norm": 0.19088931381702423, | |
| "learning_rate": 0.00029938579143165724, | |
| "loss": 27.0919, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 4.192, | |
| "grad_norm": 0.16385389864444733, | |
| "learning_rate": 0.0002993833913356534, | |
| "loss": 26.7928, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 4.208, | |
| "grad_norm": 0.22816230356693268, | |
| "learning_rate": 0.0002993809912396496, | |
| "loss": 26.597, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 4.224, | |
| "grad_norm": 0.22640523314476013, | |
| "learning_rate": 0.00029937859114364574, | |
| "loss": 26.6011, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.18119996786117554, | |
| "learning_rate": 0.0002993761910476419, | |
| "loss": 26.8414, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.256, | |
| "grad_norm": 0.2026926428079605, | |
| "learning_rate": 0.00029937379095163803, | |
| "loss": 26.9172, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 4.272, | |
| "grad_norm": 0.20275373756885529, | |
| "learning_rate": 0.0002993713908556342, | |
| "loss": 26.6568, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 4.288, | |
| "grad_norm": 0.2261670082807541, | |
| "learning_rate": 0.00029936899075963037, | |
| "loss": 27.1839, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 4.304, | |
| "grad_norm": 0.18411505222320557, | |
| "learning_rate": 0.00029936659066362654, | |
| "loss": 26.4785, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.2916317582130432, | |
| "learning_rate": 0.0002993641905676227, | |
| "loss": 26.5309, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.336, | |
| "grad_norm": 0.18537244200706482, | |
| "learning_rate": 0.0002993617904716188, | |
| "loss": 27.1665, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 4.352, | |
| "grad_norm": 0.16285920143127441, | |
| "learning_rate": 0.000299359390375615, | |
| "loss": 27.2424, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 4.368, | |
| "grad_norm": 0.15773992240428925, | |
| "learning_rate": 0.00029935699027961116, | |
| "loss": 26.5359, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 4.384, | |
| "grad_norm": 0.18703384697437286, | |
| "learning_rate": 0.00029935459018360733, | |
| "loss": 27.342, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.18335498869419098, | |
| "learning_rate": 0.0002993521900876035, | |
| "loss": 27.0257, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.416, | |
| "grad_norm": 0.19414934515953064, | |
| "learning_rate": 0.00029934978999159967, | |
| "loss": 26.2998, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 4.432, | |
| "grad_norm": 0.20599210262298584, | |
| "learning_rate": 0.0002993473898955958, | |
| "loss": 25.9369, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 4.448, | |
| "grad_norm": 0.27044299244880676, | |
| "learning_rate": 0.00029934498979959195, | |
| "loss": 26.4132, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 4.464, | |
| "grad_norm": 0.22304300963878632, | |
| "learning_rate": 0.0002993425897035881, | |
| "loss": 26.2685, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.20784711837768555, | |
| "learning_rate": 0.0002993401896075843, | |
| "loss": 25.336, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.496, | |
| "grad_norm": 0.2017608880996704, | |
| "learning_rate": 0.00029933778951158046, | |
| "loss": 26.1331, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 4.5120000000000005, | |
| "grad_norm": 0.18563418090343475, | |
| "learning_rate": 0.0002993353894155766, | |
| "loss": 25.6813, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 4.5280000000000005, | |
| "grad_norm": 0.21515151858329773, | |
| "learning_rate": 0.00029933298931957274, | |
| "loss": 26.2951, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 4.5440000000000005, | |
| "grad_norm": 0.20512834191322327, | |
| "learning_rate": 0.0002993305892235689, | |
| "loss": 25.2256, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.23129431903362274, | |
| "learning_rate": 0.0002993281891275651, | |
| "loss": 25.7071, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.576, | |
| "grad_norm": 0.18308007717132568, | |
| "learning_rate": 0.00029932578903156125, | |
| "loss": 25.5192, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 4.592, | |
| "grad_norm": 0.217178076505661, | |
| "learning_rate": 0.0002993233889355574, | |
| "loss": 25.349, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 4.608, | |
| "grad_norm": 0.18590569496154785, | |
| "learning_rate": 0.00029932098883955353, | |
| "loss": 25.2593, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 4.624, | |
| "grad_norm": 0.20052315294742584, | |
| "learning_rate": 0.0002993185887435497, | |
| "loss": 24.8334, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.21725590527057648, | |
| "learning_rate": 0.00029931621264850595, | |
| "loss": 24.6134, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.656, | |
| "grad_norm": 0.23973499238491058, | |
| "learning_rate": 0.00029931381255250206, | |
| "loss": 24.8209, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 4.672, | |
| "grad_norm": 0.20804470777511597, | |
| "learning_rate": 0.00029931141245649823, | |
| "loss": 25.0912, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 4.688, | |
| "grad_norm": 0.17555804550647736, | |
| "learning_rate": 0.0002993090363614544, | |
| "loss": 25.1723, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 4.704, | |
| "grad_norm": 0.17459039390087128, | |
| "learning_rate": 0.0002993066362654506, | |
| "loss": 24.5282, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.211078941822052, | |
| "learning_rate": 0.00029930423616944676, | |
| "loss": 24.6043, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.736, | |
| "grad_norm": 0.16957704722881317, | |
| "learning_rate": 0.0002993018360734429, | |
| "loss": 24.7947, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 4.752, | |
| "grad_norm": 0.2855212092399597, | |
| "learning_rate": 0.00029929943597743904, | |
| "loss": 24.5785, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 4.768, | |
| "grad_norm": 0.19777260720729828, | |
| "learning_rate": 0.0002992970358814352, | |
| "loss": 24.4989, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 4.784, | |
| "grad_norm": 0.17237554490566254, | |
| "learning_rate": 0.0002992946357854314, | |
| "loss": 24.6684, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.1824658066034317, | |
| "learning_rate": 0.00029929223568942755, | |
| "loss": 24.934, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 4.816, | |
| "grad_norm": 0.19774967432022095, | |
| "learning_rate": 0.0002992898355934237, | |
| "loss": 24.4343, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 4.832, | |
| "grad_norm": 0.2127138376235962, | |
| "learning_rate": 0.00029928743549741983, | |
| "loss": 24.7444, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 4.848, | |
| "grad_norm": 0.21794643998146057, | |
| "learning_rate": 0.000299285035401416, | |
| "loss": 25.2811, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 4.864, | |
| "grad_norm": 0.178062304854393, | |
| "learning_rate": 0.00029928263530541217, | |
| "loss": 24.9453, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.22796912491321564, | |
| "learning_rate": 0.00029928023520940834, | |
| "loss": 23.9367, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 4.896, | |
| "grad_norm": 0.18951456248760223, | |
| "learning_rate": 0.0002992778351134045, | |
| "loss": 23.7658, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 4.912, | |
| "grad_norm": 0.24202126264572144, | |
| "learning_rate": 0.0002992754350174007, | |
| "loss": 23.9004, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 4.928, | |
| "grad_norm": 0.19269002974033356, | |
| "learning_rate": 0.0002992730349213968, | |
| "loss": 23.2493, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 4.944, | |
| "grad_norm": 0.1657482087612152, | |
| "learning_rate": 0.00029927063482539296, | |
| "loss": 23.8883, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.151734858751297, | |
| "learning_rate": 0.00029926823472938913, | |
| "loss": 23.7884, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.976, | |
| "grad_norm": 0.2854020595550537, | |
| "learning_rate": 0.0002992658346333853, | |
| "loss": 24.1054, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 4.992, | |
| "grad_norm": 0.17750577628612518, | |
| "learning_rate": 0.00029926343453738147, | |
| "loss": 23.6583, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 5.008, | |
| "grad_norm": 0.17882367968559265, | |
| "learning_rate": 0.00029926103444137764, | |
| "loss": 23.4828, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 5.024, | |
| "grad_norm": 0.17182889580726624, | |
| "learning_rate": 0.0002992586343453738, | |
| "loss": 22.8774, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.20355378091335297, | |
| "learning_rate": 0.00029925623424937, | |
| "loss": 23.3064, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.056, | |
| "grad_norm": 0.21614141762256622, | |
| "learning_rate": 0.00029925383415336615, | |
| "loss": 22.8978, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 5.072, | |
| "grad_norm": 0.20654118061065674, | |
| "learning_rate": 0.00029925143405736226, | |
| "loss": 24.0182, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 5.088, | |
| "grad_norm": 0.17882691323757172, | |
| "learning_rate": 0.00029924903396135843, | |
| "loss": 22.8556, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 5.104, | |
| "grad_norm": 0.16477125883102417, | |
| "learning_rate": 0.0002992466338653546, | |
| "loss": 22.63, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.15241862833499908, | |
| "learning_rate": 0.00029924423376935077, | |
| "loss": 22.9513, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.136, | |
| "grad_norm": 0.17560409009456635, | |
| "learning_rate": 0.00029924183367334694, | |
| "loss": 22.808, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 5.152, | |
| "grad_norm": 0.18167634308338165, | |
| "learning_rate": 0.00029923943357734305, | |
| "loss": 23.0177, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 5.168, | |
| "grad_norm": 0.18328386545181274, | |
| "learning_rate": 0.0002992370334813392, | |
| "loss": 22.5144, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 5.184, | |
| "grad_norm": 0.20202048122882843, | |
| "learning_rate": 0.0002992346333853354, | |
| "loss": 23.1037, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 0.20026326179504395, | |
| "learning_rate": 0.00029923223328933156, | |
| "loss": 22.3593, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.216, | |
| "grad_norm": 0.1727285534143448, | |
| "learning_rate": 0.00029922983319332773, | |
| "loss": 22.214, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 5.232, | |
| "grad_norm": 0.1824960708618164, | |
| "learning_rate": 0.0002992274330973239, | |
| "loss": 22.2179, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 5.248, | |
| "grad_norm": 0.19371069967746735, | |
| "learning_rate": 0.00029922503300132, | |
| "loss": 22.453, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 5.264, | |
| "grad_norm": 0.22930407524108887, | |
| "learning_rate": 0.0002992226329053162, | |
| "loss": 22.1665, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 0.20372043550014496, | |
| "learning_rate": 0.00029922023280931235, | |
| "loss": 22.1181, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.296, | |
| "grad_norm": 0.20339564979076385, | |
| "learning_rate": 0.0002992178327133085, | |
| "loss": 22.5446, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 5.312, | |
| "grad_norm": 0.2182660847902298, | |
| "learning_rate": 0.0002992154326173047, | |
| "loss": 22.3062, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 5.328, | |
| "grad_norm": 0.18666419386863708, | |
| "learning_rate": 0.0002992130325213008, | |
| "loss": 22.0127, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 5.344, | |
| "grad_norm": 0.2193373292684555, | |
| "learning_rate": 0.000299210632425297, | |
| "loss": 22.1167, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 0.19642606377601624, | |
| "learning_rate": 0.00029920823232929315, | |
| "loss": 21.8393, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.376, | |
| "grad_norm": 0.24106252193450928, | |
| "learning_rate": 0.0002992058322332893, | |
| "loss": 21.7386, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 5.392, | |
| "grad_norm": 0.17611666023731232, | |
| "learning_rate": 0.0002992034321372855, | |
| "loss": 22.1787, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 5.408, | |
| "grad_norm": 0.23640978336334229, | |
| "learning_rate": 0.00029920103204128165, | |
| "loss": 21.5912, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 5.424, | |
| "grad_norm": 0.19579695165157318, | |
| "learning_rate": 0.00029919863194527777, | |
| "loss": 22.1147, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.18251273036003113, | |
| "learning_rate": 0.00029919623184927394, | |
| "loss": 21.8284, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.456, | |
| "grad_norm": 0.2099759876728058, | |
| "learning_rate": 0.0002991938317532701, | |
| "loss": 21.5234, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 5.4719999999999995, | |
| "grad_norm": 0.21391774713993073, | |
| "learning_rate": 0.0002991914316572663, | |
| "loss": 21.1876, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 5.4879999999999995, | |
| "grad_norm": 0.17656175792217255, | |
| "learning_rate": 0.00029918903156126244, | |
| "loss": 21.7905, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 5.504, | |
| "grad_norm": 0.1752483993768692, | |
| "learning_rate": 0.00029918663146525856, | |
| "loss": 20.9481, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.29879820346832275, | |
| "learning_rate": 0.00029918423136925473, | |
| "loss": 21.2073, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 5.536, | |
| "grad_norm": 0.1947035789489746, | |
| "learning_rate": 0.0002991818312732509, | |
| "loss": 21.0199, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 5.552, | |
| "grad_norm": 0.15402550995349884, | |
| "learning_rate": 0.00029917943117724707, | |
| "loss": 21.4862, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 5.568, | |
| "grad_norm": 0.21479055285453796, | |
| "learning_rate": 0.00029917703108124324, | |
| "loss": 20.3479, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 5.584, | |
| "grad_norm": 0.15968792140483856, | |
| "learning_rate": 0.0002991746309852394, | |
| "loss": 20.8151, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 0.16876402497291565, | |
| "learning_rate": 0.0002991722308892355, | |
| "loss": 21.8482, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.616, | |
| "grad_norm": 0.16191044449806213, | |
| "learning_rate": 0.0002991698307932317, | |
| "loss": 21.4486, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 5.632, | |
| "grad_norm": 0.20595960319042206, | |
| "learning_rate": 0.00029916743069722786, | |
| "loss": 21.7225, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 5.648, | |
| "grad_norm": 0.1939288079738617, | |
| "learning_rate": 0.00029916503060122403, | |
| "loss": 21.0107, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 5.664, | |
| "grad_norm": 0.20212168991565704, | |
| "learning_rate": 0.0002991626305052202, | |
| "loss": 20.4026, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 0.1956707388162613, | |
| "learning_rate": 0.0002991602544101764, | |
| "loss": 20.9491, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 5.696, | |
| "grad_norm": 0.22702528536319733, | |
| "learning_rate": 0.00029915785431417256, | |
| "loss": 21.12, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 5.712, | |
| "grad_norm": 0.19706673920154572, | |
| "learning_rate": 0.00029915547821912874, | |
| "loss": 21.5166, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 5.728, | |
| "grad_norm": 0.18108151853084564, | |
| "learning_rate": 0.0002991530781231249, | |
| "loss": 20.4059, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 5.744, | |
| "grad_norm": 0.1714268922805786, | |
| "learning_rate": 0.00029915067802712103, | |
| "loss": 20.2456, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 0.1415804773569107, | |
| "learning_rate": 0.0002991482779311172, | |
| "loss": 20.3176, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 5.776, | |
| "grad_norm": 0.1928543597459793, | |
| "learning_rate": 0.00029914587783511337, | |
| "loss": 20.797, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 5.792, | |
| "grad_norm": 0.17042042315006256, | |
| "learning_rate": 0.00029914347773910954, | |
| "loss": 20.2684, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 5.808, | |
| "grad_norm": 0.1929057389497757, | |
| "learning_rate": 0.0002991410776431057, | |
| "loss": 19.7169, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 5.824, | |
| "grad_norm": 0.19770380854606628, | |
| "learning_rate": 0.0002991386775471018, | |
| "loss": 20.3972, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 0.19927264750003815, | |
| "learning_rate": 0.000299136277451098, | |
| "loss": 20.3105, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 5.856, | |
| "grad_norm": 0.2222350686788559, | |
| "learning_rate": 0.00029913387735509416, | |
| "loss": 20.3396, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 5.872, | |
| "grad_norm": 0.15629681944847107, | |
| "learning_rate": 0.00029913147725909033, | |
| "loss": 19.7281, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 5.888, | |
| "grad_norm": 0.1714082509279251, | |
| "learning_rate": 0.0002991290771630865, | |
| "loss": 20.2121, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 5.904, | |
| "grad_norm": 0.19152860343456268, | |
| "learning_rate": 0.00029912667706708267, | |
| "loss": 20.3316, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 0.18097779154777527, | |
| "learning_rate": 0.0002991242769710788, | |
| "loss": 19.9225, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 5.936, | |
| "grad_norm": 0.21503089368343353, | |
| "learning_rate": 0.00029912187687507495, | |
| "loss": 20.3151, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 5.952, | |
| "grad_norm": 0.16976934671401978, | |
| "learning_rate": 0.0002991194767790711, | |
| "loss": 20.4782, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 5.968, | |
| "grad_norm": 0.1788826435804367, | |
| "learning_rate": 0.0002991170766830673, | |
| "loss": 19.616, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 5.984, | |
| "grad_norm": 0.17762643098831177, | |
| "learning_rate": 0.00029911467658706346, | |
| "loss": 19.4074, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.19231481850147247, | |
| "learning_rate": 0.0002991122764910596, | |
| "loss": 19.3966, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 6.016, | |
| "grad_norm": 0.2067825198173523, | |
| "learning_rate": 0.0002991098763950558, | |
| "loss": 19.6924, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 6.032, | |
| "grad_norm": 0.1930302083492279, | |
| "learning_rate": 0.00029910747629905196, | |
| "loss": 19.765, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 6.048, | |
| "grad_norm": 0.2076890915632248, | |
| "learning_rate": 0.00029910507620304813, | |
| "loss": 19.0516, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 6.064, | |
| "grad_norm": 0.2006111741065979, | |
| "learning_rate": 0.00029910267610704425, | |
| "loss": 19.1025, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 0.1836411952972412, | |
| "learning_rate": 0.0002991002760110404, | |
| "loss": 19.3714, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.096, | |
| "grad_norm": 0.1817934662103653, | |
| "learning_rate": 0.0002990978759150366, | |
| "loss": 19.1752, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 6.112, | |
| "grad_norm": 0.18150608241558075, | |
| "learning_rate": 0.00029909547581903276, | |
| "loss": 19.5865, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 6.128, | |
| "grad_norm": 0.3108033835887909, | |
| "learning_rate": 0.0002990930757230289, | |
| "loss": 19.3632, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 6.144, | |
| "grad_norm": 0.18861189484596252, | |
| "learning_rate": 0.00029909067562702504, | |
| "loss": 19.9617, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 0.16909874975681305, | |
| "learning_rate": 0.0002990882755310212, | |
| "loss": 19.8722, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 6.176, | |
| "grad_norm": 0.16401100158691406, | |
| "learning_rate": 0.0002990858754350174, | |
| "loss": 19.3652, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 6.192, | |
| "grad_norm": 0.17053301632404327, | |
| "learning_rate": 0.00029908347533901355, | |
| "loss": 19.4264, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 6.208, | |
| "grad_norm": 0.18607936799526215, | |
| "learning_rate": 0.0002990810752430097, | |
| "loss": 19.3128, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 6.224, | |
| "grad_norm": 0.2513495087623596, | |
| "learning_rate": 0.0002990786751470059, | |
| "loss": 20.1134, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 0.21938976645469666, | |
| "learning_rate": 0.000299076275051002, | |
| "loss": 19.5682, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.256, | |
| "grad_norm": 0.21253296732902527, | |
| "learning_rate": 0.00029907387495499817, | |
| "loss": 18.7325, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 6.272, | |
| "grad_norm": 0.21298116445541382, | |
| "learning_rate": 0.00029907147485899434, | |
| "loss": 19.0698, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 6.288, | |
| "grad_norm": 0.17804065346717834, | |
| "learning_rate": 0.0002990690747629905, | |
| "loss": 18.3022, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 6.304, | |
| "grad_norm": 0.31990084052085876, | |
| "learning_rate": 0.0002990666986679467, | |
| "loss": 18.9093, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "grad_norm": 0.17742526531219482, | |
| "learning_rate": 0.0002990642985719428, | |
| "loss": 18.6614, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 6.336, | |
| "grad_norm": 0.20601534843444824, | |
| "learning_rate": 0.000299061898475939, | |
| "loss": 19.6871, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 6.352, | |
| "grad_norm": 0.16021846234798431, | |
| "learning_rate": 0.00029905949837993515, | |
| "loss": 18.6417, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 6.368, | |
| "grad_norm": 0.1588086634874344, | |
| "learning_rate": 0.0002990570982839313, | |
| "loss": 18.3146, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 6.384, | |
| "grad_norm": 0.21372877061367035, | |
| "learning_rate": 0.0002990546981879275, | |
| "loss": 19.0519, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 0.18066450953483582, | |
| "learning_rate": 0.00029905229809192366, | |
| "loss": 19.2848, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.416, | |
| "grad_norm": 0.23790153861045837, | |
| "learning_rate": 0.0002990498979959198, | |
| "loss": 18.7495, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 6.432, | |
| "grad_norm": 0.21764115989208221, | |
| "learning_rate": 0.00029904749789991594, | |
| "loss": 18.5835, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 6.448, | |
| "grad_norm": 0.18615952134132385, | |
| "learning_rate": 0.0002990450978039121, | |
| "loss": 17.9751, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 6.464, | |
| "grad_norm": 0.1657874882221222, | |
| "learning_rate": 0.0002990426977079083, | |
| "loss": 18.5635, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 0.3158019185066223, | |
| "learning_rate": 0.00029904029761190445, | |
| "loss": 18.6618, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 6.496, | |
| "grad_norm": 0.2320430427789688, | |
| "learning_rate": 0.0002990378975159006, | |
| "loss": 18.2968, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 6.5120000000000005, | |
| "grad_norm": 0.20868684351444244, | |
| "learning_rate": 0.0002990354974198968, | |
| "loss": 18.595, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 6.5280000000000005, | |
| "grad_norm": 0.2185734063386917, | |
| "learning_rate": 0.00029903309732389296, | |
| "loss": 17.9672, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 6.5440000000000005, | |
| "grad_norm": 0.22871826589107513, | |
| "learning_rate": 0.0002990306972278891, | |
| "loss": 18.0843, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 6.5600000000000005, | |
| "grad_norm": 0.16801375150680542, | |
| "learning_rate": 0.00029902829713188524, | |
| "loss": 18.138, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.576, | |
| "grad_norm": 0.17401717603206635, | |
| "learning_rate": 0.0002990258970358814, | |
| "loss": 18.7431, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 6.592, | |
| "grad_norm": 0.17664673924446106, | |
| "learning_rate": 0.0002990234969398776, | |
| "loss": 17.966, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 6.608, | |
| "grad_norm": 0.2024875283241272, | |
| "learning_rate": 0.00029902109684387375, | |
| "loss": 17.9339, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 6.624, | |
| "grad_norm": 0.19322896003723145, | |
| "learning_rate": 0.0002990186967478699, | |
| "loss": 18.5554, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 0.2797154188156128, | |
| "learning_rate": 0.00029901629665186603, | |
| "loss": 17.5192, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 6.656, | |
| "grad_norm": 0.2197944074869156, | |
| "learning_rate": 0.0002990138965558622, | |
| "loss": 18.4582, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 6.672, | |
| "grad_norm": 0.18805234134197235, | |
| "learning_rate": 0.00029901149645985837, | |
| "loss": 17.9245, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 6.688, | |
| "grad_norm": 0.14986388385295868, | |
| "learning_rate": 0.00029900909636385454, | |
| "loss": 17.7746, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 6.704, | |
| "grad_norm": 0.26323381066322327, | |
| "learning_rate": 0.0002990066962678507, | |
| "loss": 17.6134, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 0.1791141778230667, | |
| "learning_rate": 0.0002990042961718469, | |
| "loss": 17.7648, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.736, | |
| "grad_norm": 0.22629794478416443, | |
| "learning_rate": 0.000299001920076803, | |
| "loss": 18.2337, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 6.752, | |
| "grad_norm": 0.17983581125736237, | |
| "learning_rate": 0.0002989995199807992, | |
| "loss": 17.4193, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 6.768, | |
| "grad_norm": 0.17379482090473175, | |
| "learning_rate": 0.00029899711988479535, | |
| "loss": 17.9815, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 6.784, | |
| "grad_norm": 0.2074684351682663, | |
| "learning_rate": 0.0002989947197887915, | |
| "loss": 17.898, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 0.16909289360046387, | |
| "learning_rate": 0.0002989923196927877, | |
| "loss": 17.7292, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 6.816, | |
| "grad_norm": 0.184371218085289, | |
| "learning_rate": 0.00029898991959678386, | |
| "loss": 18.0706, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 6.832, | |
| "grad_norm": 0.17724382877349854, | |
| "learning_rate": 0.00029898751950078, | |
| "loss": 17.9871, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 6.848, | |
| "grad_norm": 0.2286718785762787, | |
| "learning_rate": 0.00029898511940477614, | |
| "loss": 17.5911, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 6.864, | |
| "grad_norm": 0.2002006471157074, | |
| "learning_rate": 0.0002989827193087723, | |
| "loss": 17.4336, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 0.20236457884311676, | |
| "learning_rate": 0.0002989803192127685, | |
| "loss": 17.0849, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 6.896, | |
| "grad_norm": 0.23483681678771973, | |
| "learning_rate": 0.00029897791911676465, | |
| "loss": 17.7893, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 6.912, | |
| "grad_norm": 0.18751464784145355, | |
| "learning_rate": 0.00029897551902076077, | |
| "loss": 17.4798, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 6.928, | |
| "grad_norm": 0.17341011762619019, | |
| "learning_rate": 0.00029897311892475694, | |
| "loss": 17.7278, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 6.944, | |
| "grad_norm": 0.15160439908504486, | |
| "learning_rate": 0.0002989707188287531, | |
| "loss": 17.4948, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 0.19316324591636658, | |
| "learning_rate": 0.0002989683187327493, | |
| "loss": 17.3409, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 6.976, | |
| "grad_norm": 0.1800646036863327, | |
| "learning_rate": 0.00029896591863674544, | |
| "loss": 17.5152, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 6.992, | |
| "grad_norm": 0.19359643757343292, | |
| "learning_rate": 0.0002989635185407416, | |
| "loss": 17.2701, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 7.008, | |
| "grad_norm": 0.21103709936141968, | |
| "learning_rate": 0.0002989611184447378, | |
| "loss": 17.0028, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 7.024, | |
| "grad_norm": 0.18972234427928925, | |
| "learning_rate": 0.00029895871834873395, | |
| "loss": 16.8714, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 0.16335220634937286, | |
| "learning_rate": 0.0002989563182527301, | |
| "loss": 17.1409, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 7.056, | |
| "grad_norm": 0.16595561802387238, | |
| "learning_rate": 0.00029895391815672624, | |
| "loss": 17.1677, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 7.072, | |
| "grad_norm": 0.1885690540075302, | |
| "learning_rate": 0.0002989515180607224, | |
| "loss": 17.1327, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 7.088, | |
| "grad_norm": 0.16525697708129883, | |
| "learning_rate": 0.0002989491179647186, | |
| "loss": 17.0265, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 7.104, | |
| "grad_norm": 0.17798613011837006, | |
| "learning_rate": 0.00029894671786871474, | |
| "loss": 16.5858, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "grad_norm": 0.17442761361598969, | |
| "learning_rate": 0.0002989443177727109, | |
| "loss": 16.7029, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 7.136, | |
| "grad_norm": 0.17014281451702118, | |
| "learning_rate": 0.0002989419176767071, | |
| "loss": 16.3283, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 7.152, | |
| "grad_norm": 0.21125547587871552, | |
| "learning_rate": 0.0002989395175807032, | |
| "loss": 17.0964, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 7.168, | |
| "grad_norm": 0.15473531186580658, | |
| "learning_rate": 0.00029893711748469937, | |
| "loss": 17.2634, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 7.184, | |
| "grad_norm": 0.22423428297042847, | |
| "learning_rate": 0.00029893471738869553, | |
| "loss": 16.6492, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 0.23651999235153198, | |
| "learning_rate": 0.0002989323172926917, | |
| "loss": 17.2672, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.216, | |
| "grad_norm": 0.18389280140399933, | |
| "learning_rate": 0.00029892991719668787, | |
| "loss": 16.3061, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 7.232, | |
| "grad_norm": 0.19786329567432404, | |
| "learning_rate": 0.000298927517100684, | |
| "loss": 16.7178, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 7.248, | |
| "grad_norm": 0.1748264580965042, | |
| "learning_rate": 0.00029892511700468016, | |
| "loss": 16.8728, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 7.264, | |
| "grad_norm": 0.17337900400161743, | |
| "learning_rate": 0.0002989227169086763, | |
| "loss": 16.143, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 0.1627172827720642, | |
| "learning_rate": 0.0002989203168126725, | |
| "loss": 16.677, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 7.296, | |
| "grad_norm": 0.18607047200202942, | |
| "learning_rate": 0.00029891791671666866, | |
| "loss": 16.6493, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 7.312, | |
| "grad_norm": 0.17733363807201385, | |
| "learning_rate": 0.00029891551662066483, | |
| "loss": 16.8518, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 7.328, | |
| "grad_norm": 0.17257067561149597, | |
| "learning_rate": 0.00029891311652466095, | |
| "loss": 16.7963, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 7.344, | |
| "grad_norm": 0.22989864647388458, | |
| "learning_rate": 0.0002989107164286571, | |
| "loss": 16.6846, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 0.1924850195646286, | |
| "learning_rate": 0.0002989083163326533, | |
| "loss": 16.7258, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.376, | |
| "grad_norm": 0.15162524580955505, | |
| "learning_rate": 0.00029890591623664946, | |
| "loss": 16.0529, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 7.392, | |
| "grad_norm": 0.19990018010139465, | |
| "learning_rate": 0.00029890354014160564, | |
| "loss": 16.3768, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 7.408, | |
| "grad_norm": 0.1724652647972107, | |
| "learning_rate": 0.00029890114004560176, | |
| "loss": 17.0495, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 7.424, | |
| "grad_norm": 0.1920676976442337, | |
| "learning_rate": 0.00029889873994959793, | |
| "loss": 16.1202, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "grad_norm": 0.1957552433013916, | |
| "learning_rate": 0.00029889636385455417, | |
| "loss": 16.413, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 7.456, | |
| "grad_norm": 0.14071592688560486, | |
| "learning_rate": 0.00029889396375855034, | |
| "loss": 15.732, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 7.4719999999999995, | |
| "grad_norm": 0.1833236664533615, | |
| "learning_rate": 0.00029889156366254646, | |
| "loss": 16.7192, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 7.4879999999999995, | |
| "grad_norm": 0.2189483791589737, | |
| "learning_rate": 0.0002988891635665426, | |
| "loss": 16.0979, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 7.504, | |
| "grad_norm": 0.17360301315784454, | |
| "learning_rate": 0.0002988867634705388, | |
| "loss": 15.8968, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "grad_norm": 0.1952562779188156, | |
| "learning_rate": 0.00029888436337453496, | |
| "loss": 15.9731, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.536, | |
| "grad_norm": 0.1601036638021469, | |
| "learning_rate": 0.00029888196327853113, | |
| "loss": 16.392, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 7.552, | |
| "grad_norm": 0.17277076840400696, | |
| "learning_rate": 0.00029887956318252725, | |
| "loss": 15.9779, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 7.568, | |
| "grad_norm": 0.1868811696767807, | |
| "learning_rate": 0.0002988771630865234, | |
| "loss": 15.5355, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 7.584, | |
| "grad_norm": 0.2078930139541626, | |
| "learning_rate": 0.00029887478699147966, | |
| "loss": 15.8833, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 0.17647911608219147, | |
| "learning_rate": 0.0002988723868954758, | |
| "loss": 16.0442, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 7.616, | |
| "grad_norm": 0.20268210768699646, | |
| "learning_rate": 0.00029886998679947194, | |
| "loss": 16.1957, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 7.632, | |
| "grad_norm": 0.1820913553237915, | |
| "learning_rate": 0.0002988675867034681, | |
| "loss": 15.8208, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 7.648, | |
| "grad_norm": 0.2001231610774994, | |
| "learning_rate": 0.0002988651866074643, | |
| "loss": 16.1706, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 7.664, | |
| "grad_norm": 0.18558456003665924, | |
| "learning_rate": 0.00029886278651146045, | |
| "loss": 15.9747, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "grad_norm": 0.17034992575645447, | |
| "learning_rate": 0.0002988603864154566, | |
| "loss": 16.4537, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 7.696, | |
| "grad_norm": 0.16974206268787384, | |
| "learning_rate": 0.00029885798631945274, | |
| "loss": 15.5116, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 7.712, | |
| "grad_norm": 0.1771545112133026, | |
| "learning_rate": 0.0002988555862234489, | |
| "loss": 15.8605, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 7.728, | |
| "grad_norm": 0.17756806313991547, | |
| "learning_rate": 0.0002988531861274451, | |
| "loss": 15.8965, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 7.744, | |
| "grad_norm": 0.20773237943649292, | |
| "learning_rate": 0.00029885078603144124, | |
| "loss": 15.1184, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "grad_norm": 0.18383237719535828, | |
| "learning_rate": 0.0002988483859354374, | |
| "loss": 16.0467, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 7.776, | |
| "grad_norm": 0.18748898804187775, | |
| "learning_rate": 0.0002988459858394336, | |
| "loss": 15.3286, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 7.792, | |
| "grad_norm": 0.2877133786678314, | |
| "learning_rate": 0.0002988435857434297, | |
| "loss": 15.8562, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 7.808, | |
| "grad_norm": 0.168177530169487, | |
| "learning_rate": 0.00029884118564742587, | |
| "loss": 15.8613, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 7.824, | |
| "grad_norm": 0.18536759912967682, | |
| "learning_rate": 0.00029883878555142203, | |
| "loss": 15.8204, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "grad_norm": 0.15699341893196106, | |
| "learning_rate": 0.0002988363854554182, | |
| "loss": 15.6026, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 7.856, | |
| "grad_norm": 0.17730812728405, | |
| "learning_rate": 0.0002988339853594144, | |
| "loss": 15.5268, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 7.872, | |
| "grad_norm": 0.16140446066856384, | |
| "learning_rate": 0.0002988315852634105, | |
| "loss": 15.3766, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 7.888, | |
| "grad_norm": 0.16114762425422668, | |
| "learning_rate": 0.00029882918516740666, | |
| "loss": 15.8614, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 7.904, | |
| "grad_norm": 0.19132892787456512, | |
| "learning_rate": 0.0002988267850714028, | |
| "loss": 15.4026, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "grad_norm": 0.190206840634346, | |
| "learning_rate": 0.000298824384975399, | |
| "loss": 15.42, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 7.936, | |
| "grad_norm": 0.18264752626419067, | |
| "learning_rate": 0.00029882198487939516, | |
| "loss": 15.5455, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 7.952, | |
| "grad_norm": 0.1774350255727768, | |
| "learning_rate": 0.00029881958478339133, | |
| "loss": 15.7328, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 7.968, | |
| "grad_norm": 0.1655503213405609, | |
| "learning_rate": 0.00029881718468738745, | |
| "loss": 15.5836, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 7.984, | |
| "grad_norm": 0.18890833854675293, | |
| "learning_rate": 0.0002988147845913836, | |
| "loss": 15.4838, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.1880652904510498, | |
| "learning_rate": 0.0002988123844953798, | |
| "loss": 15.2114, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.016, | |
| "grad_norm": 0.18285752832889557, | |
| "learning_rate": 0.00029880998439937596, | |
| "loss": 14.9511, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 8.032, | |
| "grad_norm": 0.19436243176460266, | |
| "learning_rate": 0.0002988075843033721, | |
| "loss": 15.4968, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 8.048, | |
| "grad_norm": 0.1822815239429474, | |
| "learning_rate": 0.00029880518420736824, | |
| "loss": 14.7632, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 8.064, | |
| "grad_norm": 0.16189494729042053, | |
| "learning_rate": 0.0002988027841113644, | |
| "loss": 14.937, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "grad_norm": 0.152993842959404, | |
| "learning_rate": 0.0002988003840153606, | |
| "loss": 14.676, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 8.096, | |
| "grad_norm": 0.2119678407907486, | |
| "learning_rate": 0.00029879798391935675, | |
| "loss": 15.725, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 8.112, | |
| "grad_norm": 0.22487041354179382, | |
| "learning_rate": 0.0002987955838233529, | |
| "loss": 15.0505, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 8.128, | |
| "grad_norm": 0.16072215139865875, | |
| "learning_rate": 0.0002987931837273491, | |
| "loss": 15.4103, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 8.144, | |
| "grad_norm": 0.16657765209674835, | |
| "learning_rate": 0.0002987907836313452, | |
| "loss": 14.7139, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "grad_norm": 0.15327660739421844, | |
| "learning_rate": 0.00029878838353534137, | |
| "loss": 14.6325, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.176, | |
| "grad_norm": 0.20472773909568787, | |
| "learning_rate": 0.00029878598343933754, | |
| "loss": 14.7217, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 8.192, | |
| "grad_norm": 0.214088574051857, | |
| "learning_rate": 0.0002987835833433337, | |
| "loss": 14.121, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 8.208, | |
| "grad_norm": 0.20903360843658447, | |
| "learning_rate": 0.0002987811832473299, | |
| "loss": 15.1448, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 8.224, | |
| "grad_norm": 0.20621182024478912, | |
| "learning_rate": 0.000298778783151326, | |
| "loss": 14.7588, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "grad_norm": 0.18515250086784363, | |
| "learning_rate": 0.00029877638305532216, | |
| "loss": 15.3639, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 8.256, | |
| "grad_norm": 0.17146657407283783, | |
| "learning_rate": 0.00029877398295931833, | |
| "loss": 14.4964, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 8.272, | |
| "grad_norm": 0.18953190743923187, | |
| "learning_rate": 0.0002987715828633145, | |
| "loss": 14.5639, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 8.288, | |
| "grad_norm": 0.17434297502040863, | |
| "learning_rate": 0.0002987692067682707, | |
| "loss": 15.2875, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 8.304, | |
| "grad_norm": 0.16686853766441345, | |
| "learning_rate": 0.00029876680667226686, | |
| "loss": 14.4679, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "grad_norm": 0.14394892752170563, | |
| "learning_rate": 0.00029876440657626303, | |
| "loss": 14.5162, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.336, | |
| "grad_norm": 0.20816083252429962, | |
| "learning_rate": 0.0002987620064802592, | |
| "loss": 15.2646, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 8.352, | |
| "grad_norm": 0.16660048067569733, | |
| "learning_rate": 0.00029875960638425537, | |
| "loss": 15.0214, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 8.368, | |
| "grad_norm": 0.16948403418064117, | |
| "learning_rate": 0.0002987572062882515, | |
| "loss": 14.7227, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 8.384, | |
| "grad_norm": 0.15360529720783234, | |
| "learning_rate": 0.00029875480619224765, | |
| "loss": 14.8453, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 0.1730951964855194, | |
| "learning_rate": 0.0002987524060962438, | |
| "loss": 14.6784, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 8.416, | |
| "grad_norm": 0.1714763641357422, | |
| "learning_rate": 0.00029875000600024, | |
| "loss": 14.3347, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 8.432, | |
| "grad_norm": 0.21991823613643646, | |
| "learning_rate": 0.00029874760590423616, | |
| "loss": 14.7373, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 8.448, | |
| "grad_norm": 0.26085495948791504, | |
| "learning_rate": 0.00029874520580823233, | |
| "loss": 14.4799, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 8.464, | |
| "grad_norm": 0.15623599290847778, | |
| "learning_rate": 0.00029874280571222844, | |
| "loss": 14.9737, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "grad_norm": 0.14685533940792084, | |
| "learning_rate": 0.0002987404056162246, | |
| "loss": 14.4126, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 8.496, | |
| "grad_norm": 0.19048573076725006, | |
| "learning_rate": 0.0002987380055202208, | |
| "loss": 14.6049, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 8.512, | |
| "grad_norm": 0.15729829668998718, | |
| "learning_rate": 0.00029873560542421695, | |
| "loss": 14.8894, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 8.528, | |
| "grad_norm": 0.18257932364940643, | |
| "learning_rate": 0.0002987332053282131, | |
| "loss": 14.3249, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 8.544, | |
| "grad_norm": 0.20492464303970337, | |
| "learning_rate": 0.00029873080523220923, | |
| "loss": 15.0053, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "grad_norm": 0.22026245296001434, | |
| "learning_rate": 0.0002987284051362054, | |
| "loss": 14.1141, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 8.576, | |
| "grad_norm": 0.16078276932239532, | |
| "learning_rate": 0.00029872600504020157, | |
| "loss": 14.3822, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 8.592, | |
| "grad_norm": 0.19619469344615936, | |
| "learning_rate": 0.00029872360494419774, | |
| "loss": 14.3099, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 8.608, | |
| "grad_norm": 0.15051692724227905, | |
| "learning_rate": 0.0002987212048481939, | |
| "loss": 13.7999, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 8.624, | |
| "grad_norm": 0.19525863230228424, | |
| "learning_rate": 0.0002987188047521901, | |
| "loss": 14.3567, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "grad_norm": 0.16883693635463715, | |
| "learning_rate": 0.0002987164046561862, | |
| "loss": 13.3731, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.656, | |
| "grad_norm": 0.1703290492296219, | |
| "learning_rate": 0.00029871400456018236, | |
| "loss": 13.8462, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 8.672, | |
| "grad_norm": 0.18907932937145233, | |
| "learning_rate": 0.00029871160446417853, | |
| "loss": 14.5297, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 8.688, | |
| "grad_norm": 0.16260308027267456, | |
| "learning_rate": 0.0002987092043681747, | |
| "loss": 14.0573, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 8.704, | |
| "grad_norm": 0.1732938140630722, | |
| "learning_rate": 0.0002987068282731309, | |
| "loss": 14.1114, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "grad_norm": 0.20591895282268524, | |
| "learning_rate": 0.00029870442817712706, | |
| "loss": 13.7101, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 8.736, | |
| "grad_norm": 0.1871296912431717, | |
| "learning_rate": 0.00029870202808112323, | |
| "loss": 14.539, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 8.752, | |
| "grad_norm": 0.15711694955825806, | |
| "learning_rate": 0.0002986996279851194, | |
| "loss": 14.4353, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 8.768, | |
| "grad_norm": 0.1790015697479248, | |
| "learning_rate": 0.00029869722788911557, | |
| "loss": 14.4861, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 8.784, | |
| "grad_norm": 0.1903577744960785, | |
| "learning_rate": 0.0002986948277931117, | |
| "loss": 14.2582, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 0.18150964379310608, | |
| "learning_rate": 0.00029869242769710785, | |
| "loss": 13.9522, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 8.816, | |
| "grad_norm": 0.17604489624500275, | |
| "learning_rate": 0.000298690027601104, | |
| "loss": 14.4482, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 8.832, | |
| "grad_norm": 0.18487071990966797, | |
| "learning_rate": 0.0002986876275051002, | |
| "loss": 13.9656, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 8.848, | |
| "grad_norm": 0.15276212990283966, | |
| "learning_rate": 0.00029868522740909636, | |
| "loss": 14.2513, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 8.864, | |
| "grad_norm": 0.19339829683303833, | |
| "learning_rate": 0.0002986828273130925, | |
| "loss": 13.7151, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "grad_norm": 0.14462265372276306, | |
| "learning_rate": 0.00029868042721708864, | |
| "loss": 13.8859, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 8.896, | |
| "grad_norm": 0.16163522005081177, | |
| "learning_rate": 0.0002986780271210848, | |
| "loss": 13.7567, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 8.912, | |
| "grad_norm": 0.15859289467334747, | |
| "learning_rate": 0.000298675627025081, | |
| "loss": 14.4693, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 8.928, | |
| "grad_norm": 0.1641652137041092, | |
| "learning_rate": 0.00029867322692907715, | |
| "loss": 13.6118, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 8.943999999999999, | |
| "grad_norm": 0.18410654366016388, | |
| "learning_rate": 0.0002986708268330733, | |
| "loss": 14.3033, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "grad_norm": 0.18847694993019104, | |
| "learning_rate": 0.00029866842673706944, | |
| "loss": 13.2935, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 8.975999999999999, | |
| "grad_norm": 0.15224353969097137, | |
| "learning_rate": 0.0002986660266410656, | |
| "loss": 13.6185, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 8.992, | |
| "grad_norm": 0.15307171642780304, | |
| "learning_rate": 0.0002986636265450618, | |
| "loss": 13.9229, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 9.008, | |
| "grad_norm": 0.1455143541097641, | |
| "learning_rate": 0.00029866122644905794, | |
| "loss": 13.9716, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 9.024, | |
| "grad_norm": 0.18889980018138885, | |
| "learning_rate": 0.0002986588263530541, | |
| "loss": 13.8509, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 0.19757011532783508, | |
| "learning_rate": 0.0002986564262570502, | |
| "loss": 14.0519, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 9.056, | |
| "grad_norm": 0.18008406460285187, | |
| "learning_rate": 0.00029865405016200647, | |
| "loss": 13.1833, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 9.072, | |
| "grad_norm": 0.1602972447872162, | |
| "learning_rate": 0.00029865165006600264, | |
| "loss": 13.2838, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 9.088, | |
| "grad_norm": 0.17582525312900543, | |
| "learning_rate": 0.0002986492499699988, | |
| "loss": 13.898, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 9.104, | |
| "grad_norm": 0.15762995183467865, | |
| "learning_rate": 0.0002986468498739949, | |
| "loss": 13.5733, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "grad_norm": 0.1670118272304535, | |
| "learning_rate": 0.0002986444497779911, | |
| "loss": 13.5845, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 9.136, | |
| "grad_norm": 0.18542303144931793, | |
| "learning_rate": 0.00029864204968198726, | |
| "loss": 13.9615, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 9.152, | |
| "grad_norm": 0.18144281208515167, | |
| "learning_rate": 0.00029863964958598343, | |
| "loss": 13.0945, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 9.168, | |
| "grad_norm": 0.18359419703483582, | |
| "learning_rate": 0.0002986372494899796, | |
| "loss": 13.4529, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 9.184, | |
| "grad_norm": 0.2034582495689392, | |
| "learning_rate": 0.0002986348493939757, | |
| "loss": 13.2086, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 0.1561286300420761, | |
| "learning_rate": 0.0002986324492979719, | |
| "loss": 13.5699, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 9.216, | |
| "grad_norm": 0.2128494530916214, | |
| "learning_rate": 0.00029863004920196805, | |
| "loss": 13.7906, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 9.232, | |
| "grad_norm": 0.18951255083084106, | |
| "learning_rate": 0.0002986276491059642, | |
| "loss": 13.4684, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 9.248, | |
| "grad_norm": 0.14849476516246796, | |
| "learning_rate": 0.0002986252490099604, | |
| "loss": 13.6832, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 9.264, | |
| "grad_norm": 0.19169315695762634, | |
| "learning_rate": 0.00029862284891395656, | |
| "loss": 12.9751, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "grad_norm": 0.219793900847435, | |
| "learning_rate": 0.0002986204488179527, | |
| "loss": 13.4069, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.296, | |
| "grad_norm": 0.2139630764722824, | |
| "learning_rate": 0.00029861804872194884, | |
| "loss": 12.9185, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 9.312, | |
| "grad_norm": 0.1722664088010788, | |
| "learning_rate": 0.000298615648625945, | |
| "loss": 13.4876, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 9.328, | |
| "grad_norm": 0.15841473639011383, | |
| "learning_rate": 0.0002986132485299412, | |
| "loss": 13.481, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 9.344, | |
| "grad_norm": 0.17484904825687408, | |
| "learning_rate": 0.00029861084843393735, | |
| "loss": 13.5925, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "grad_norm": 0.20388108491897583, | |
| "learning_rate": 0.00029860844833793347, | |
| "loss": 13.2549, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 9.376, | |
| "grad_norm": 0.17959387600421906, | |
| "learning_rate": 0.00029860604824192964, | |
| "loss": 13.571, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 9.392, | |
| "grad_norm": 0.1830485612154007, | |
| "learning_rate": 0.0002986036481459258, | |
| "loss": 13.0808, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 9.408, | |
| "grad_norm": 0.1935325413942337, | |
| "learning_rate": 0.000298601248049922, | |
| "loss": 12.9193, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 9.424, | |
| "grad_norm": 0.22928985953330994, | |
| "learning_rate": 0.00029859884795391814, | |
| "loss": 12.9233, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "grad_norm": 0.17562927305698395, | |
| "learning_rate": 0.0002985964478579143, | |
| "loss": 13.0933, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 9.456, | |
| "grad_norm": 0.21014900505542755, | |
| "learning_rate": 0.00029859404776191043, | |
| "loss": 12.9421, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 9.472, | |
| "grad_norm": 0.16698358952999115, | |
| "learning_rate": 0.0002985916476659066, | |
| "loss": 13.6465, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 9.488, | |
| "grad_norm": 0.15990376472473145, | |
| "learning_rate": 0.00029858924756990277, | |
| "loss": 12.9832, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 9.504, | |
| "grad_norm": 0.21185587346553802, | |
| "learning_rate": 0.00029858684747389894, | |
| "loss": 13.3695, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "grad_norm": 0.16105149686336517, | |
| "learning_rate": 0.0002985844473778951, | |
| "loss": 13.0733, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 9.536, | |
| "grad_norm": 0.22624213993549347, | |
| "learning_rate": 0.0002985820472818912, | |
| "loss": 13.2586, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 9.552, | |
| "grad_norm": 0.1732643097639084, | |
| "learning_rate": 0.0002985796471858874, | |
| "loss": 12.9246, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 9.568, | |
| "grad_norm": 0.18406638503074646, | |
| "learning_rate": 0.00029857724708988356, | |
| "loss": 13.4556, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 9.584, | |
| "grad_norm": 0.18207241594791412, | |
| "learning_rate": 0.0002985748709948398, | |
| "loss": 12.8405, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 0.14808227121829987, | |
| "learning_rate": 0.0002985724708988359, | |
| "loss": 13.0075, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 9.616, | |
| "grad_norm": 0.1976134330034256, | |
| "learning_rate": 0.0002985700708028321, | |
| "loss": 12.687, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 9.632, | |
| "grad_norm": 0.1712380349636078, | |
| "learning_rate": 0.00029856767070682825, | |
| "loss": 13.003, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 9.648, | |
| "grad_norm": 0.1509382426738739, | |
| "learning_rate": 0.0002985652706108244, | |
| "loss": 13.0863, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 9.664, | |
| "grad_norm": 0.1992410570383072, | |
| "learning_rate": 0.0002985628705148206, | |
| "loss": 13.1396, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "grad_norm": 0.19914288818836212, | |
| "learning_rate": 0.0002985604704188167, | |
| "loss": 13.0716, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 9.696, | |
| "grad_norm": 0.17157557606697083, | |
| "learning_rate": 0.0002985580703228129, | |
| "loss": 12.5376, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 9.712, | |
| "grad_norm": 0.14820295572280884, | |
| "learning_rate": 0.00029855567022680905, | |
| "loss": 12.9209, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 9.728, | |
| "grad_norm": 0.17262442409992218, | |
| "learning_rate": 0.0002985532701308052, | |
| "loss": 13.3595, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 9.744, | |
| "grad_norm": 0.1804870218038559, | |
| "learning_rate": 0.0002985508700348014, | |
| "loss": 13.0037, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "grad_norm": 0.1507444977760315, | |
| "learning_rate": 0.00029854846993879755, | |
| "loss": 12.5568, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 9.776, | |
| "grad_norm": 0.17809054255485535, | |
| "learning_rate": 0.00029854606984279367, | |
| "loss": 12.9826, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 9.792, | |
| "grad_norm": 0.25455987453460693, | |
| "learning_rate": 0.00029854366974678984, | |
| "loss": 12.5432, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 9.808, | |
| "grad_norm": 0.15175747871398926, | |
| "learning_rate": 0.000298541269650786, | |
| "loss": 12.9513, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 9.824, | |
| "grad_norm": 0.22233819961547852, | |
| "learning_rate": 0.0002985388695547822, | |
| "loss": 13.2744, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "grad_norm": 0.1534196138381958, | |
| "learning_rate": 0.00029853646945877835, | |
| "loss": 12.4878, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 9.856, | |
| "grad_norm": 0.17612405121326447, | |
| "learning_rate": 0.00029853406936277446, | |
| "loss": 12.6281, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 9.872, | |
| "grad_norm": 0.14971201121807098, | |
| "learning_rate": 0.00029853166926677063, | |
| "loss": 12.4393, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 9.888, | |
| "grad_norm": 0.15717633068561554, | |
| "learning_rate": 0.0002985292691707668, | |
| "loss": 12.6903, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 9.904, | |
| "grad_norm": 0.1695670634508133, | |
| "learning_rate": 0.00029852686907476297, | |
| "loss": 12.9557, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "grad_norm": 0.16429013013839722, | |
| "learning_rate": 0.00029852446897875914, | |
| "loss": 12.9804, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 9.936, | |
| "grad_norm": 0.1919148713350296, | |
| "learning_rate": 0.0002985220688827553, | |
| "loss": 12.8735, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 9.952, | |
| "grad_norm": 0.1977461278438568, | |
| "learning_rate": 0.0002985196687867514, | |
| "loss": 12.6665, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 9.968, | |
| "grad_norm": 0.3409396708011627, | |
| "learning_rate": 0.0002985172686907476, | |
| "loss": 11.9422, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 9.984, | |
| "grad_norm": 0.1977001428604126, | |
| "learning_rate": 0.00029851486859474376, | |
| "loss": 13.392, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.19805894792079926, | |
| "learning_rate": 0.00029851246849873993, | |
| "loss": 12.3432, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 10.016, | |
| "grad_norm": 0.1851508915424347, | |
| "learning_rate": 0.0002985100684027361, | |
| "loss": 12.8953, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 10.032, | |
| "grad_norm": 0.15137746930122375, | |
| "learning_rate": 0.0002985076683067322, | |
| "loss": 12.8256, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 10.048, | |
| "grad_norm": 0.1815025508403778, | |
| "learning_rate": 0.00029850529221168846, | |
| "loss": 12.2427, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 10.064, | |
| "grad_norm": 0.282045841217041, | |
| "learning_rate": 0.0002985028921156846, | |
| "loss": 12.5777, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "grad_norm": 0.19669105112552643, | |
| "learning_rate": 0.0002985004920196808, | |
| "loss": 12.85, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 10.096, | |
| "grad_norm": 0.1557861566543579, | |
| "learning_rate": 0.0002984980919236769, | |
| "loss": 12.6325, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 10.112, | |
| "grad_norm": 0.16353458166122437, | |
| "learning_rate": 0.0002984956918276731, | |
| "loss": 12.5578, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 10.128, | |
| "grad_norm": 0.19124484062194824, | |
| "learning_rate": 0.00029849329173166925, | |
| "loss": 12.8784, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 10.144, | |
| "grad_norm": 0.16097944974899292, | |
| "learning_rate": 0.0002984908916356654, | |
| "loss": 11.7994, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "grad_norm": 0.155614972114563, | |
| "learning_rate": 0.0002984884915396616, | |
| "loss": 11.9617, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 10.176, | |
| "grad_norm": 0.19013510644435883, | |
| "learning_rate": 0.0002984860914436577, | |
| "loss": 12.1663, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 10.192, | |
| "grad_norm": 0.21610714495182037, | |
| "learning_rate": 0.00029848369134765387, | |
| "loss": 12.2304, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 10.208, | |
| "grad_norm": 0.15554966032505035, | |
| "learning_rate": 0.00029848129125165004, | |
| "loss": 11.9337, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 10.224, | |
| "grad_norm": 0.14373019337654114, | |
| "learning_rate": 0.0002984788911556462, | |
| "loss": 12.5049, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "grad_norm": 0.197763592004776, | |
| "learning_rate": 0.0002984764910596424, | |
| "loss": 12.2087, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 10.256, | |
| "grad_norm": 0.1522061824798584, | |
| "learning_rate": 0.00029847409096363855, | |
| "loss": 12.475, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 10.272, | |
| "grad_norm": 0.15849411487579346, | |
| "learning_rate": 0.00029847169086763466, | |
| "loss": 12.1301, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 10.288, | |
| "grad_norm": 0.1680125594139099, | |
| "learning_rate": 0.00029846929077163083, | |
| "loss": 12.2041, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 10.304, | |
| "grad_norm": 0.17618972063064575, | |
| "learning_rate": 0.000298466890675627, | |
| "loss": 12.1634, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "grad_norm": 0.19345271587371826, | |
| "learning_rate": 0.00029846449057962317, | |
| "loss": 12.0509, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 10.336, | |
| "grad_norm": 0.15981802344322205, | |
| "learning_rate": 0.00029846209048361934, | |
| "loss": 11.879, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 10.352, | |
| "grad_norm": 0.1640341877937317, | |
| "learning_rate": 0.00029845969038761545, | |
| "loss": 12.3471, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 10.368, | |
| "grad_norm": 0.1751720905303955, | |
| "learning_rate": 0.0002984572902916116, | |
| "loss": 11.7085, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 10.384, | |
| "grad_norm": 0.15203487873077393, | |
| "learning_rate": 0.00029845491419656787, | |
| "loss": 11.9901, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "grad_norm": 0.1836910843849182, | |
| "learning_rate": 0.00029845251410056403, | |
| "loss": 11.5864, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 10.416, | |
| "grad_norm": 0.2329769879579544, | |
| "learning_rate": 0.00029845011400456015, | |
| "loss": 11.8386, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 10.432, | |
| "grad_norm": 0.25904643535614014, | |
| "learning_rate": 0.0002984477139085563, | |
| "loss": 11.6842, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 10.448, | |
| "grad_norm": 0.16373856365680695, | |
| "learning_rate": 0.0002984453138125525, | |
| "loss": 11.9861, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 10.464, | |
| "grad_norm": 0.1684304028749466, | |
| "learning_rate": 0.00029844291371654866, | |
| "loss": 12.1751, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "grad_norm": 0.1975129395723343, | |
| "learning_rate": 0.0002984405136205448, | |
| "loss": 11.9744, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 10.496, | |
| "grad_norm": 0.144730344414711, | |
| "learning_rate": 0.00029843811352454094, | |
| "loss": 11.7554, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 10.512, | |
| "grad_norm": 0.21416126191616058, | |
| "learning_rate": 0.0002984357134285371, | |
| "loss": 11.7885, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 10.528, | |
| "grad_norm": 0.1401461511850357, | |
| "learning_rate": 0.0002984333133325333, | |
| "loss": 12.2278, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 10.544, | |
| "grad_norm": 0.15199688076972961, | |
| "learning_rate": 0.00029843091323652945, | |
| "loss": 12.0611, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "grad_norm": 0.16079574823379517, | |
| "learning_rate": 0.0002984285131405256, | |
| "loss": 11.3473, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 10.576, | |
| "grad_norm": 0.14441320300102234, | |
| "learning_rate": 0.0002984261130445218, | |
| "loss": 11.5284, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 10.592, | |
| "grad_norm": 0.1676328480243683, | |
| "learning_rate": 0.0002984237129485179, | |
| "loss": 11.6487, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 10.608, | |
| "grad_norm": 0.13956011831760406, | |
| "learning_rate": 0.00029842131285251407, | |
| "loss": 11.772, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 10.624, | |
| "grad_norm": 0.17723798751831055, | |
| "learning_rate": 0.00029841891275651024, | |
| "loss": 11.7424, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "grad_norm": 0.18211066722869873, | |
| "learning_rate": 0.0002984165126605064, | |
| "loss": 11.9263, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 10.656, | |
| "grad_norm": 0.18465609848499298, | |
| "learning_rate": 0.0002984141125645026, | |
| "loss": 12.1533, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 10.672, | |
| "grad_norm": 0.15032535791397095, | |
| "learning_rate": 0.0002984117124684987, | |
| "loss": 11.8711, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 10.688, | |
| "grad_norm": 0.25048136711120605, | |
| "learning_rate": 0.00029840931237249486, | |
| "loss": 12.1925, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 10.704, | |
| "grad_norm": 0.17632503807544708, | |
| "learning_rate": 0.00029840691227649103, | |
| "loss": 12.0652, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 10.72, | |
| "grad_norm": 0.17492571473121643, | |
| "learning_rate": 0.0002984045121804872, | |
| "loss": 12.3961, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 10.736, | |
| "grad_norm": 0.17848367989063263, | |
| "learning_rate": 0.00029840211208448337, | |
| "loss": 12.0021, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 10.752, | |
| "grad_norm": 0.23175941407680511, | |
| "learning_rate": 0.00029839971198847954, | |
| "loss": 11.4583, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 10.768, | |
| "grad_norm": 0.24281519651412964, | |
| "learning_rate": 0.0002983973358934357, | |
| "loss": 12.0376, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 10.784, | |
| "grad_norm": 0.18129272758960724, | |
| "learning_rate": 0.00029839493579743184, | |
| "loss": 12.1892, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 0.1454136222600937, | |
| "learning_rate": 0.000298392535701428, | |
| "loss": 11.9333, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 10.816, | |
| "grad_norm": 0.12412439286708832, | |
| "learning_rate": 0.0002983901356054242, | |
| "loss": 11.0441, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 10.832, | |
| "grad_norm": 0.19814914464950562, | |
| "learning_rate": 0.00029838773550942035, | |
| "loss": 11.4348, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 10.848, | |
| "grad_norm": 0.2250308245420456, | |
| "learning_rate": 0.0002983853354134165, | |
| "loss": 11.723, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 10.864, | |
| "grad_norm": 0.1328551471233368, | |
| "learning_rate": 0.0002983829353174127, | |
| "loss": 11.4324, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "grad_norm": 0.2366170883178711, | |
| "learning_rate": 0.00029838053522140886, | |
| "loss": 12.1462, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 10.896, | |
| "grad_norm": 0.20911742746829987, | |
| "learning_rate": 0.00029837813512540503, | |
| "loss": 11.6067, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 10.912, | |
| "grad_norm": 0.1770290583372116, | |
| "learning_rate": 0.00029837573502940114, | |
| "loss": 11.9299, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 10.928, | |
| "grad_norm": 0.21429571509361267, | |
| "learning_rate": 0.0002983733349333973, | |
| "loss": 11.3683, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 10.943999999999999, | |
| "grad_norm": 0.1542270928621292, | |
| "learning_rate": 0.0002983709348373935, | |
| "loss": 11.3472, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "grad_norm": 0.2420985847711563, | |
| "learning_rate": 0.00029836853474138965, | |
| "loss": 11.5805, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 10.975999999999999, | |
| "grad_norm": 0.17665143311023712, | |
| "learning_rate": 0.0002983661346453858, | |
| "loss": 11.7406, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 10.992, | |
| "grad_norm": 0.26210835576057434, | |
| "learning_rate": 0.00029836373454938193, | |
| "loss": 11.7457, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 11.008, | |
| "grad_norm": 0.14472606778144836, | |
| "learning_rate": 0.0002983613344533781, | |
| "loss": 11.4662, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 11.024, | |
| "grad_norm": 0.17449091374874115, | |
| "learning_rate": 0.0002983589343573743, | |
| "loss": 11.0297, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "grad_norm": 0.15488724410533905, | |
| "learning_rate": 0.00029835653426137044, | |
| "loss": 11.792, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 11.056, | |
| "grad_norm": 0.1447325348854065, | |
| "learning_rate": 0.0002983541341653666, | |
| "loss": 11.4483, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 11.072, | |
| "grad_norm": 0.17111489176750183, | |
| "learning_rate": 0.0002983517340693628, | |
| "loss": 11.1499, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 11.088, | |
| "grad_norm": 0.17446951568126678, | |
| "learning_rate": 0.0002983493339733589, | |
| "loss": 10.6961, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 11.104, | |
| "grad_norm": 0.1421278566122055, | |
| "learning_rate": 0.00029834693387735506, | |
| "loss": 11.4794, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "grad_norm": 0.17439322173595428, | |
| "learning_rate": 0.00029834455778231125, | |
| "loss": 11.0965, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 11.136, | |
| "grad_norm": 0.16200323402881622, | |
| "learning_rate": 0.0002983421576863074, | |
| "loss": 11.1367, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 11.152, | |
| "grad_norm": 0.3391527831554413, | |
| "learning_rate": 0.0002983397575903036, | |
| "loss": 10.7709, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 11.168, | |
| "grad_norm": 0.18793489038944244, | |
| "learning_rate": 0.0002983373574942997, | |
| "loss": 11.1479, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 11.184, | |
| "grad_norm": 0.1996636688709259, | |
| "learning_rate": 0.0002983349573982959, | |
| "loss": 11.8347, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "grad_norm": 0.166090190410614, | |
| "learning_rate": 0.00029833255730229205, | |
| "loss": 10.9514, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 11.216, | |
| "grad_norm": 0.17243006825447083, | |
| "learning_rate": 0.0002983301572062882, | |
| "loss": 11.2505, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 11.232, | |
| "grad_norm": 0.17860250174999237, | |
| "learning_rate": 0.0002983277571102844, | |
| "loss": 11.023, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 11.248, | |
| "grad_norm": 0.13896320760250092, | |
| "learning_rate": 0.00029832535701428055, | |
| "loss": 11.092, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 11.264, | |
| "grad_norm": 0.20008546113967896, | |
| "learning_rate": 0.00029832295691827667, | |
| "loss": 11.2161, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 11.28, | |
| "grad_norm": 0.14014984667301178, | |
| "learning_rate": 0.00029832055682227284, | |
| "loss": 11.315, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 11.296, | |
| "grad_norm": 0.16158168017864227, | |
| "learning_rate": 0.000298318156726269, | |
| "loss": 11.3935, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 11.312, | |
| "grad_norm": 0.15444719791412354, | |
| "learning_rate": 0.0002983157566302652, | |
| "loss": 10.9662, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 11.328, | |
| "grad_norm": 0.21788270771503448, | |
| "learning_rate": 0.00029831335653426134, | |
| "loss": 11.4848, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 11.344, | |
| "grad_norm": 0.17685194313526154, | |
| "learning_rate": 0.0002983109564382575, | |
| "loss": 11.3436, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "grad_norm": 0.15553423762321472, | |
| "learning_rate": 0.0002983085563422537, | |
| "loss": 11.1136, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 11.376, | |
| "grad_norm": 0.1547129899263382, | |
| "learning_rate": 0.00029830615624624985, | |
| "loss": 10.7924, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 11.392, | |
| "grad_norm": 0.1907842457294464, | |
| "learning_rate": 0.000298303756150246, | |
| "loss": 10.9726, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 11.408, | |
| "grad_norm": 0.15053051710128784, | |
| "learning_rate": 0.00029830135605424214, | |
| "loss": 12.0626, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 11.424, | |
| "grad_norm": 0.14403216540813446, | |
| "learning_rate": 0.0002982989559582383, | |
| "loss": 11.428, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 11.44, | |
| "grad_norm": 0.15850169956684113, | |
| "learning_rate": 0.0002982965558622345, | |
| "loss": 11.1033, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 11.456, | |
| "grad_norm": 0.18223829567432404, | |
| "learning_rate": 0.00029829417976719066, | |
| "loss": 11.5088, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 11.472, | |
| "grad_norm": 0.18121246993541718, | |
| "learning_rate": 0.00029829177967118683, | |
| "loss": 11.0869, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 11.488, | |
| "grad_norm": 0.1591707020998001, | |
| "learning_rate": 0.00029828937957518295, | |
| "loss": 10.5898, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 11.504, | |
| "grad_norm": 0.1652923971414566, | |
| "learning_rate": 0.0002982869794791791, | |
| "loss": 11.3647, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "grad_norm": 0.1930815577507019, | |
| "learning_rate": 0.0002982845793831753, | |
| "loss": 11.4873, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 11.536, | |
| "grad_norm": 0.1646055281162262, | |
| "learning_rate": 0.00029828217928717145, | |
| "loss": 11.3799, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 11.552, | |
| "grad_norm": 0.19326475262641907, | |
| "learning_rate": 0.0002982797791911676, | |
| "loss": 10.8387, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 11.568, | |
| "grad_norm": 0.23909342288970947, | |
| "learning_rate": 0.0002982773790951638, | |
| "loss": 10.757, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 11.584, | |
| "grad_norm": 0.1616702377796173, | |
| "learning_rate": 0.0002982749789991599, | |
| "loss": 10.7907, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "grad_norm": 0.16581912338733673, | |
| "learning_rate": 0.0002982725789031561, | |
| "loss": 10.8977, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 11.616, | |
| "grad_norm": 0.1478215605020523, | |
| "learning_rate": 0.00029827017880715225, | |
| "loss": 10.9325, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 11.632, | |
| "grad_norm": 0.2693212628364563, | |
| "learning_rate": 0.0002982677787111484, | |
| "loss": 11.2731, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 11.648, | |
| "grad_norm": 0.15163065493106842, | |
| "learning_rate": 0.0002982653786151446, | |
| "loss": 11.0141, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 11.664, | |
| "grad_norm": 0.15364685654640198, | |
| "learning_rate": 0.00029826297851914075, | |
| "loss": 10.6781, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 11.68, | |
| "grad_norm": 0.1410771906375885, | |
| "learning_rate": 0.00029826057842313687, | |
| "loss": 11.0262, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 11.696, | |
| "grad_norm": 0.2245720773935318, | |
| "learning_rate": 0.00029825817832713304, | |
| "loss": 11.51, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 11.712, | |
| "grad_norm": 0.17434003949165344, | |
| "learning_rate": 0.0002982557782311292, | |
| "loss": 10.7819, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 11.728, | |
| "grad_norm": 0.13878166675567627, | |
| "learning_rate": 0.0002982534021360854, | |
| "loss": 10.8833, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 11.744, | |
| "grad_norm": 0.13650259375572205, | |
| "learning_rate": 0.00029825100204008157, | |
| "loss": 11.0158, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "grad_norm": 0.22818398475646973, | |
| "learning_rate": 0.00029824860194407773, | |
| "loss": 10.8819, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 11.776, | |
| "grad_norm": 0.14601178467273712, | |
| "learning_rate": 0.0002982462018480739, | |
| "loss": 10.0593, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 11.792, | |
| "grad_norm": 0.2245131880044937, | |
| "learning_rate": 0.00029824380175207007, | |
| "loss": 10.6634, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 11.808, | |
| "grad_norm": 1.000320553779602, | |
| "learning_rate": 0.0002982414016560662, | |
| "loss": 10.961, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 11.824, | |
| "grad_norm": 0.18026384711265564, | |
| "learning_rate": 0.00029823900156006236, | |
| "loss": 11.1536, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 11.84, | |
| "grad_norm": 0.15758727490901947, | |
| "learning_rate": 0.0002982366014640585, | |
| "loss": 10.6586, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 11.856, | |
| "grad_norm": 0.19163353741168976, | |
| "learning_rate": 0.0002982342013680547, | |
| "loss": 11.0334, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 11.872, | |
| "grad_norm": 0.11467296630144119, | |
| "learning_rate": 0.00029823180127205086, | |
| "loss": 10.8224, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 11.888, | |
| "grad_norm": 0.15869416296482086, | |
| "learning_rate": 0.00029822940117604703, | |
| "loss": 10.4906, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 11.904, | |
| "grad_norm": 0.1966274380683899, | |
| "learning_rate": 0.00029822700108004315, | |
| "loss": 10.4152, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "grad_norm": 0.16446225345134735, | |
| "learning_rate": 0.0002982246009840393, | |
| "loss": 10.4887, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 11.936, | |
| "grad_norm": 0.16940893232822418, | |
| "learning_rate": 0.0002982222008880355, | |
| "loss": 10.39, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 11.952, | |
| "grad_norm": 0.1838199496269226, | |
| "learning_rate": 0.00029821980079203166, | |
| "loss": 10.384, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 11.968, | |
| "grad_norm": 0.17523860931396484, | |
| "learning_rate": 0.0002982174006960278, | |
| "loss": 10.8568, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 11.984, | |
| "grad_norm": 0.1432792991399765, | |
| "learning_rate": 0.000298215000600024, | |
| "loss": 10.3596, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.20020250976085663, | |
| "learning_rate": 0.0002982126005040201, | |
| "loss": 10.14, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 12.016, | |
| "grad_norm": 0.19777518510818481, | |
| "learning_rate": 0.0002982102004080163, | |
| "loss": 10.9224, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 12.032, | |
| "grad_norm": 0.17126210033893585, | |
| "learning_rate": 0.00029820780031201245, | |
| "loss": 10.5306, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 12.048, | |
| "grad_norm": 0.16797253489494324, | |
| "learning_rate": 0.0002982054002160086, | |
| "loss": 10.8089, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 12.064, | |
| "grad_norm": 0.20862014591693878, | |
| "learning_rate": 0.0002982030001200048, | |
| "loss": 10.4757, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "grad_norm": 0.18397895991802216, | |
| "learning_rate": 0.0002982006000240009, | |
| "loss": 9.9135, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 12.096, | |
| "grad_norm": 0.16641663014888763, | |
| "learning_rate": 0.00029819819992799707, | |
| "loss": 10.6077, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 12.112, | |
| "grad_norm": 0.16870319843292236, | |
| "learning_rate": 0.00029819579983199324, | |
| "loss": 10.5788, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 12.128, | |
| "grad_norm": 0.16674315929412842, | |
| "learning_rate": 0.0002981933997359894, | |
| "loss": 10.7791, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 12.144, | |
| "grad_norm": 0.1637590378522873, | |
| "learning_rate": 0.0002981909996399856, | |
| "loss": 10.0084, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 12.16, | |
| "grad_norm": 0.16165070235729218, | |
| "learning_rate": 0.00029818859954398175, | |
| "loss": 10.7957, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 12.176, | |
| "grad_norm": 0.1414174884557724, | |
| "learning_rate": 0.00029818619944797786, | |
| "loss": 9.8668, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 12.192, | |
| "grad_norm": 0.1490393877029419, | |
| "learning_rate": 0.00029818379935197403, | |
| "loss": 10.5844, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 12.208, | |
| "grad_norm": 0.15608841180801392, | |
| "learning_rate": 0.0002981813992559702, | |
| "loss": 10.7121, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 12.224, | |
| "grad_norm": 0.1658240258693695, | |
| "learning_rate": 0.00029817899915996637, | |
| "loss": 10.4018, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "grad_norm": 0.1533997803926468, | |
| "learning_rate": 0.00029817659906396254, | |
| "loss": 10.0445, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 12.256, | |
| "grad_norm": 0.14606164395809174, | |
| "learning_rate": 0.00029817419896795865, | |
| "loss": 10.8624, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 12.272, | |
| "grad_norm": 0.1926526576280594, | |
| "learning_rate": 0.0002981717988719548, | |
| "loss": 9.9639, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 12.288, | |
| "grad_norm": 0.16846922039985657, | |
| "learning_rate": 0.000298169398775951, | |
| "loss": 10.4076, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 12.304, | |
| "grad_norm": 0.1497686505317688, | |
| "learning_rate": 0.00029816699867994716, | |
| "loss": 10.3741, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 0.17146418988704681, | |
| "learning_rate": 0.00029816459858394333, | |
| "loss": 10.6163, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 12.336, | |
| "grad_norm": 0.169904425740242, | |
| "learning_rate": 0.0002981621984879395, | |
| "loss": 10.0631, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 12.352, | |
| "grad_norm": 0.15850874781608582, | |
| "learning_rate": 0.00029815979839193567, | |
| "loss": 10.0799, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 12.368, | |
| "grad_norm": 0.15920597314834595, | |
| "learning_rate": 0.00029815739829593184, | |
| "loss": 9.6119, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 12.384, | |
| "grad_norm": 0.2246374636888504, | |
| "learning_rate": 0.000298154998199928, | |
| "loss": 10.3029, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 12.4, | |
| "grad_norm": 0.168796569108963, | |
| "learning_rate": 0.0002981525981039241, | |
| "loss": 10.3374, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 12.416, | |
| "grad_norm": 0.1864066869020462, | |
| "learning_rate": 0.0002981501980079203, | |
| "loss": 10.0087, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 12.432, | |
| "grad_norm": 0.14401012659072876, | |
| "learning_rate": 0.0002981478219128765, | |
| "loss": 10.1803, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 12.448, | |
| "grad_norm": 0.1375201791524887, | |
| "learning_rate": 0.00029814542181687265, | |
| "loss": 9.911, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 12.464, | |
| "grad_norm": 0.1398741900920868, | |
| "learning_rate": 0.0002981430217208688, | |
| "loss": 10.261, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "grad_norm": 0.15873165428638458, | |
| "learning_rate": 0.000298140621624865, | |
| "loss": 10.7101, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 12.496, | |
| "grad_norm": 0.1714644730091095, | |
| "learning_rate": 0.0002981382215288611, | |
| "loss": 10.1714, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 12.512, | |
| "grad_norm": 0.1591562181711197, | |
| "learning_rate": 0.00029813582143285727, | |
| "loss": 10.1645, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 12.528, | |
| "grad_norm": 0.18264716863632202, | |
| "learning_rate": 0.00029813342133685344, | |
| "loss": 10.3564, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 12.544, | |
| "grad_norm": 0.1514509618282318, | |
| "learning_rate": 0.0002981310212408496, | |
| "loss": 10.0476, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "grad_norm": 0.19021818041801453, | |
| "learning_rate": 0.0002981286211448458, | |
| "loss": 10.2492, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 12.576, | |
| "grad_norm": 0.21221980452537537, | |
| "learning_rate": 0.0002981262210488419, | |
| "loss": 9.7379, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 12.592, | |
| "grad_norm": 0.16575005650520325, | |
| "learning_rate": 0.00029812382095283806, | |
| "loss": 10.237, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 12.608, | |
| "grad_norm": 0.12602052092552185, | |
| "learning_rate": 0.00029812142085683423, | |
| "loss": 10.0729, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 12.624, | |
| "grad_norm": 0.23105710744857788, | |
| "learning_rate": 0.0002981190207608304, | |
| "loss": 9.8609, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "grad_norm": 0.29600638151168823, | |
| "learning_rate": 0.00029811662066482657, | |
| "loss": 9.8653, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 12.656, | |
| "grad_norm": 0.19172607362270355, | |
| "learning_rate": 0.00029811422056882274, | |
| "loss": 9.8614, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 12.672, | |
| "grad_norm": 0.1930418759584427, | |
| "learning_rate": 0.00029811182047281886, | |
| "loss": 10.0208, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 12.688, | |
| "grad_norm": 0.12393278628587723, | |
| "learning_rate": 0.000298109420376815, | |
| "loss": 10.349, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 12.704, | |
| "grad_norm": 0.1565830409526825, | |
| "learning_rate": 0.0002981070202808112, | |
| "loss": 10.5402, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 12.72, | |
| "grad_norm": 0.13968247175216675, | |
| "learning_rate": 0.00029810462018480736, | |
| "loss": 9.9296, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 12.736, | |
| "grad_norm": 0.17765802145004272, | |
| "learning_rate": 0.00029810222008880353, | |
| "loss": 9.8002, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 12.752, | |
| "grad_norm": 0.23838719725608826, | |
| "learning_rate": 0.00029809981999279965, | |
| "loss": 9.8636, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 12.768, | |
| "grad_norm": 0.23086270689964294, | |
| "learning_rate": 0.0002980974438977559, | |
| "loss": 9.9585, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 12.784, | |
| "grad_norm": 0.14923255145549774, | |
| "learning_rate": 0.00029809504380175206, | |
| "loss": 9.5379, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "grad_norm": 0.1599462628364563, | |
| "learning_rate": 0.00029809264370574823, | |
| "loss": 9.641, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 12.816, | |
| "grad_norm": 0.1716078370809555, | |
| "learning_rate": 0.00029809024360974434, | |
| "loss": 9.8697, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 12.832, | |
| "grad_norm": 0.19052661955356598, | |
| "learning_rate": 0.0002980878435137405, | |
| "loss": 9.6785, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 12.848, | |
| "grad_norm": 0.15575654804706573, | |
| "learning_rate": 0.0002980854434177367, | |
| "loss": 9.9394, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 12.864, | |
| "grad_norm": 0.19439518451690674, | |
| "learning_rate": 0.00029808304332173285, | |
| "loss": 9.5522, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "grad_norm": 0.17798827588558197, | |
| "learning_rate": 0.000298080643225729, | |
| "loss": 9.9453, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 12.896, | |
| "grad_norm": 0.16586044430732727, | |
| "learning_rate": 0.00029807824312972513, | |
| "loss": 9.8505, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 12.912, | |
| "grad_norm": 0.15794214606285095, | |
| "learning_rate": 0.0002980758430337213, | |
| "loss": 10.0497, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 12.928, | |
| "grad_norm": 0.1685098111629486, | |
| "learning_rate": 0.0002980734429377175, | |
| "loss": 10.2658, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 12.943999999999999, | |
| "grad_norm": 0.16599301993846893, | |
| "learning_rate": 0.00029807104284171364, | |
| "loss": 9.837, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "grad_norm": 0.14692434668540955, | |
| "learning_rate": 0.0002980686427457098, | |
| "loss": 10.1817, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 12.975999999999999, | |
| "grad_norm": 0.15374502539634705, | |
| "learning_rate": 0.000298066242649706, | |
| "loss": 10.1231, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 12.992, | |
| "grad_norm": 0.1369294375181198, | |
| "learning_rate": 0.0002980638425537021, | |
| "loss": 9.8245, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 13.008, | |
| "grad_norm": 0.20259645581245422, | |
| "learning_rate": 0.00029806144245769826, | |
| "loss": 9.7027, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 13.024, | |
| "grad_norm": 0.1258879452943802, | |
| "learning_rate": 0.00029805904236169443, | |
| "loss": 9.8863, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "grad_norm": 0.14773085713386536, | |
| "learning_rate": 0.0002980566422656906, | |
| "loss": 9.4255, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 13.056, | |
| "grad_norm": 0.17212265729904175, | |
| "learning_rate": 0.00029805424216968677, | |
| "loss": 10.0506, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 13.072, | |
| "grad_norm": 0.179426372051239, | |
| "learning_rate": 0.0002980518420736829, | |
| "loss": 9.5137, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 13.088, | |
| "grad_norm": 0.15935377776622772, | |
| "learning_rate": 0.00029804944197767906, | |
| "loss": 9.3141, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 13.104, | |
| "grad_norm": 0.17460429668426514, | |
| "learning_rate": 0.0002980470418816752, | |
| "loss": 9.8005, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "grad_norm": 0.20005491375923157, | |
| "learning_rate": 0.0002980446417856714, | |
| "loss": 9.7239, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 13.136, | |
| "grad_norm": 0.15051016211509705, | |
| "learning_rate": 0.00029804224168966756, | |
| "loss": 10.214, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 13.152, | |
| "grad_norm": 0.16659046709537506, | |
| "learning_rate": 0.00029803984159366373, | |
| "loss": 9.4695, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 13.168, | |
| "grad_norm": 0.16346730291843414, | |
| "learning_rate": 0.00029803744149765985, | |
| "loss": 9.5839, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 13.184, | |
| "grad_norm": 0.16145597398281097, | |
| "learning_rate": 0.000298035041401656, | |
| "loss": 9.2663, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "grad_norm": 0.13834603130817413, | |
| "learning_rate": 0.00029803266530661226, | |
| "loss": 9.6926, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 13.216, | |
| "grad_norm": 0.17841538786888123, | |
| "learning_rate": 0.0002980302652106084, | |
| "loss": 9.4752, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 13.232, | |
| "grad_norm": 0.14639347791671753, | |
| "learning_rate": 0.00029802786511460454, | |
| "loss": 9.9606, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 13.248, | |
| "grad_norm": 0.15291540324687958, | |
| "learning_rate": 0.0002980254650186007, | |
| "loss": 9.9284, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 13.264, | |
| "grad_norm": 0.15908333659172058, | |
| "learning_rate": 0.0002980230649225969, | |
| "loss": 9.5464, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "grad_norm": 0.16768860816955566, | |
| "learning_rate": 0.00029802066482659305, | |
| "loss": 10.2164, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 13.296, | |
| "grad_norm": 0.18221326172351837, | |
| "learning_rate": 0.0002980182647305892, | |
| "loss": 9.6566, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 13.312, | |
| "grad_norm": 0.13944192230701447, | |
| "learning_rate": 0.00029801586463458534, | |
| "loss": 9.4149, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 13.328, | |
| "grad_norm": 0.20090098679065704, | |
| "learning_rate": 0.0002980134645385815, | |
| "loss": 9.1968, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 13.344, | |
| "grad_norm": 0.17636704444885254, | |
| "learning_rate": 0.0002980110644425777, | |
| "loss": 9.4497, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "grad_norm": 0.19672048091888428, | |
| "learning_rate": 0.00029800866434657384, | |
| "loss": 9.3083, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 13.376, | |
| "grad_norm": 0.1991618573665619, | |
| "learning_rate": 0.00029800626425057, | |
| "loss": 9.59, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 13.392, | |
| "grad_norm": 0.17260773479938507, | |
| "learning_rate": 0.00029800386415456613, | |
| "loss": 9.9553, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 13.408, | |
| "grad_norm": 0.13101576268672943, | |
| "learning_rate": 0.0002980014640585623, | |
| "loss": 10.0732, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 13.424, | |
| "grad_norm": 0.16349157691001892, | |
| "learning_rate": 0.00029799906396255847, | |
| "loss": 9.8363, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 13.44, | |
| "grad_norm": 0.1792200654745102, | |
| "learning_rate": 0.00029799666386655464, | |
| "loss": 9.9456, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 13.456, | |
| "grad_norm": 0.13476693630218506, | |
| "learning_rate": 0.0002979942637705508, | |
| "loss": 9.4642, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 13.472, | |
| "grad_norm": 0.17343075573444366, | |
| "learning_rate": 0.000297991863674547, | |
| "loss": 9.4041, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 13.488, | |
| "grad_norm": 0.16127794981002808, | |
| "learning_rate": 0.0002979894635785431, | |
| "loss": 9.2465, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 13.504, | |
| "grad_norm": 0.14993996918201447, | |
| "learning_rate": 0.00029798706348253926, | |
| "loss": 9.5946, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "grad_norm": 0.21931160986423492, | |
| "learning_rate": 0.0002979846873874955, | |
| "loss": 9.5796, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 13.536, | |
| "grad_norm": 0.15303994715213776, | |
| "learning_rate": 0.0002979822872914916, | |
| "loss": 9.4222, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 13.552, | |
| "grad_norm": 0.1905248612165451, | |
| "learning_rate": 0.0002979798871954878, | |
| "loss": 9.4192, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 13.568, | |
| "grad_norm": 0.17656217515468597, | |
| "learning_rate": 0.00029797748709948395, | |
| "loss": 9.685, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 13.584, | |
| "grad_norm": 0.31464865803718567, | |
| "learning_rate": 0.0002979750870034801, | |
| "loss": 9.4839, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "grad_norm": 0.20140250027179718, | |
| "learning_rate": 0.0002979726869074763, | |
| "loss": 9.4393, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 13.616, | |
| "grad_norm": 0.1453031599521637, | |
| "learning_rate": 0.00029797028681147246, | |
| "loss": 9.4777, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 13.632, | |
| "grad_norm": 0.15559718012809753, | |
| "learning_rate": 0.0002979678867154686, | |
| "loss": 9.7772, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 13.648, | |
| "grad_norm": 0.16849826276302338, | |
| "learning_rate": 0.00029796548661946475, | |
| "loss": 9.0954, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 13.664, | |
| "grad_norm": 0.15798023343086243, | |
| "learning_rate": 0.0002979630865234609, | |
| "loss": 9.7756, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 13.68, | |
| "grad_norm": 0.0940115824341774, | |
| "learning_rate": 0.0002979606864274571, | |
| "loss": 9.9294, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 13.696, | |
| "grad_norm": 0.18608032166957855, | |
| "learning_rate": 0.00029795828633145325, | |
| "loss": 9.4524, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 13.712, | |
| "grad_norm": 0.16172797977924347, | |
| "learning_rate": 0.00029795588623544937, | |
| "loss": 9.6146, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 13.728, | |
| "grad_norm": 0.1493913233280182, | |
| "learning_rate": 0.00029795348613944554, | |
| "loss": 8.8783, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 13.744, | |
| "grad_norm": 0.1365765631198883, | |
| "learning_rate": 0.0002979510860434417, | |
| "loss": 9.4707, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 13.76, | |
| "grad_norm": 0.17814397811889648, | |
| "learning_rate": 0.0002979486859474379, | |
| "loss": 9.4121, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 13.776, | |
| "grad_norm": 0.16484831273555756, | |
| "learning_rate": 0.00029794628585143405, | |
| "loss": 9.0902, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 13.792, | |
| "grad_norm": 0.1435382217168808, | |
| "learning_rate": 0.0002979438857554302, | |
| "loss": 9.4565, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 13.808, | |
| "grad_norm": 0.1451929211616516, | |
| "learning_rate": 0.00029794148565942633, | |
| "loss": 9.6377, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 13.824, | |
| "grad_norm": 0.1439056396484375, | |
| "learning_rate": 0.0002979390855634225, | |
| "loss": 9.2624, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 13.84, | |
| "grad_norm": 0.1712324023246765, | |
| "learning_rate": 0.00029793668546741867, | |
| "loss": 9.2021, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 13.856, | |
| "grad_norm": 0.15382009744644165, | |
| "learning_rate": 0.00029793428537141484, | |
| "loss": 8.8688, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 13.872, | |
| "grad_norm": 0.14327426254749298, | |
| "learning_rate": 0.000297931885275411, | |
| "loss": 9.2336, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 13.888, | |
| "grad_norm": 0.21682417392730713, | |
| "learning_rate": 0.0002979294851794071, | |
| "loss": 8.9508, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 13.904, | |
| "grad_norm": 0.18012550473213196, | |
| "learning_rate": 0.0002979270850834033, | |
| "loss": 8.8259, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "grad_norm": 0.19224317371845245, | |
| "learning_rate": 0.00029792468498739946, | |
| "loss": 9.0594, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 13.936, | |
| "grad_norm": 0.14684438705444336, | |
| "learning_rate": 0.00029792228489139563, | |
| "loss": 8.6664, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 13.952, | |
| "grad_norm": 0.15808767080307007, | |
| "learning_rate": 0.0002979198847953918, | |
| "loss": 8.8133, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 13.968, | |
| "grad_norm": 0.1466471403837204, | |
| "learning_rate": 0.00029791748469938797, | |
| "loss": 9.2512, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 13.984, | |
| "grad_norm": 0.13929226994514465, | |
| "learning_rate": 0.0002979150846033841, | |
| "loss": 9.0263, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.1410779356956482, | |
| "learning_rate": 0.00029791268450738025, | |
| "loss": 9.0906, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 14.016, | |
| "grad_norm": 0.16633394360542297, | |
| "learning_rate": 0.0002979102844113764, | |
| "loss": 8.8764, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 14.032, | |
| "grad_norm": 0.19240239262580872, | |
| "learning_rate": 0.0002979078843153726, | |
| "loss": 8.6873, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 14.048, | |
| "grad_norm": 0.2285340428352356, | |
| "learning_rate": 0.00029790548421936876, | |
| "loss": 8.7636, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 14.064, | |
| "grad_norm": 0.16399361193180084, | |
| "learning_rate": 0.0002979030841233649, | |
| "loss": 9.3241, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "grad_norm": 0.14966578781604767, | |
| "learning_rate": 0.00029790068402736104, | |
| "loss": 9.0301, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 14.096, | |
| "grad_norm": 0.17241202294826508, | |
| "learning_rate": 0.0002978982839313572, | |
| "loss": 8.9678, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 14.112, | |
| "grad_norm": 0.13520659506320953, | |
| "learning_rate": 0.0002978958838353534, | |
| "loss": 9.0678, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 14.128, | |
| "grad_norm": 0.15996631979942322, | |
| "learning_rate": 0.00029789348373934955, | |
| "loss": 8.7807, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 14.144, | |
| "grad_norm": 0.14483466744422913, | |
| "learning_rate": 0.0002978910836433457, | |
| "loss": 8.6088, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "grad_norm": 0.15150679647922516, | |
| "learning_rate": 0.00029788868354734183, | |
| "loss": 9.2128, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 14.176, | |
| "grad_norm": 0.1668185293674469, | |
| "learning_rate": 0.0002978863074522981, | |
| "loss": 9.518, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 14.192, | |
| "grad_norm": 0.17209367454051971, | |
| "learning_rate": 0.00029788393135725427, | |
| "loss": 8.5952, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 14.208, | |
| "grad_norm": 0.15907296538352966, | |
| "learning_rate": 0.00029788155526221045, | |
| "loss": 8.7632, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 14.224, | |
| "grad_norm": 0.18298570811748505, | |
| "learning_rate": 0.0002978791551662066, | |
| "loss": 8.8021, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "grad_norm": 0.19813942909240723, | |
| "learning_rate": 0.0002978767550702028, | |
| "loss": 9.1381, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 14.256, | |
| "grad_norm": 0.1819518506526947, | |
| "learning_rate": 0.00029787435497419896, | |
| "loss": 9.3086, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 14.272, | |
| "grad_norm": 0.1506895273923874, | |
| "learning_rate": 0.0002978719548781951, | |
| "loss": 8.7471, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 14.288, | |
| "grad_norm": 0.1686287224292755, | |
| "learning_rate": 0.00029786955478219125, | |
| "loss": 8.8441, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 14.304, | |
| "grad_norm": 0.1486745923757553, | |
| "learning_rate": 0.0002978671546861874, | |
| "loss": 9.1216, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "grad_norm": 0.18762429058551788, | |
| "learning_rate": 0.0002978647545901836, | |
| "loss": 9.402, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 14.336, | |
| "grad_norm": 0.13964596390724182, | |
| "learning_rate": 0.00029786235449417975, | |
| "loss": 9.2773, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 14.352, | |
| "grad_norm": 0.2629782557487488, | |
| "learning_rate": 0.00029785995439817587, | |
| "loss": 9.05, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 14.368, | |
| "grad_norm": 0.12668898701667786, | |
| "learning_rate": 0.00029785755430217204, | |
| "loss": 8.8949, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 14.384, | |
| "grad_norm": 0.14362965524196625, | |
| "learning_rate": 0.0002978551542061682, | |
| "loss": 8.6261, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "grad_norm": 0.16700971126556396, | |
| "learning_rate": 0.0002978527541101644, | |
| "loss": 8.8621, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 14.416, | |
| "grad_norm": 0.1597680300474167, | |
| "learning_rate": 0.00029785035401416055, | |
| "loss": 9.1614, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 14.432, | |
| "grad_norm": 0.16268526017665863, | |
| "learning_rate": 0.0002978479539181567, | |
| "loss": 9.2429, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 14.448, | |
| "grad_norm": 0.19829140603542328, | |
| "learning_rate": 0.00029784555382215283, | |
| "loss": 8.6337, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 14.464, | |
| "grad_norm": 0.1362706571817398, | |
| "learning_rate": 0.000297843153726149, | |
| "loss": 8.5578, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 14.48, | |
| "grad_norm": 0.17475652694702148, | |
| "learning_rate": 0.00029784075363014517, | |
| "loss": 9.3407, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 14.496, | |
| "grad_norm": 0.139988973736763, | |
| "learning_rate": 0.00029783835353414134, | |
| "loss": 8.9509, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 14.512, | |
| "grad_norm": 0.15270425379276276, | |
| "learning_rate": 0.0002978359534381375, | |
| "loss": 8.6833, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 14.528, | |
| "grad_norm": 0.12172385305166245, | |
| "learning_rate": 0.0002978335533421336, | |
| "loss": 8.1913, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 14.544, | |
| "grad_norm": 0.18453091382980347, | |
| "learning_rate": 0.0002978311532461298, | |
| "loss": 9.0573, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "grad_norm": 0.12650534510612488, | |
| "learning_rate": 0.00029782875315012596, | |
| "loss": 8.8951, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 14.576, | |
| "grad_norm": 0.19508056342601776, | |
| "learning_rate": 0.00029782635305412213, | |
| "loss": 8.8831, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 14.592, | |
| "grad_norm": 0.12826193869113922, | |
| "learning_rate": 0.0002978239529581183, | |
| "loss": 8.7428, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 14.608, | |
| "grad_norm": 0.16497032344341278, | |
| "learning_rate": 0.00029782155286211447, | |
| "loss": 9.226, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 14.624, | |
| "grad_norm": 0.1467789113521576, | |
| "learning_rate": 0.0002978191527661106, | |
| "loss": 8.56, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 14.64, | |
| "grad_norm": 0.13535846769809723, | |
| "learning_rate": 0.00029781675267010675, | |
| "loss": 9.2005, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 14.656, | |
| "grad_norm": 0.2261963039636612, | |
| "learning_rate": 0.000297814352574103, | |
| "loss": 8.9913, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 14.672, | |
| "grad_norm": 0.16329319775104523, | |
| "learning_rate": 0.0002978119524780991, | |
| "loss": 8.8455, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 14.688, | |
| "grad_norm": 0.14644941687583923, | |
| "learning_rate": 0.00029780955238209526, | |
| "loss": 8.8035, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 14.704, | |
| "grad_norm": 0.17719560861587524, | |
| "learning_rate": 0.00029780715228609143, | |
| "loss": 8.9548, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 14.72, | |
| "grad_norm": 0.17204242944717407, | |
| "learning_rate": 0.0002978047521900876, | |
| "loss": 8.9065, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 14.736, | |
| "grad_norm": 0.15323054790496826, | |
| "learning_rate": 0.00029780235209408377, | |
| "loss": 8.642, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 14.752, | |
| "grad_norm": 0.12264496088027954, | |
| "learning_rate": 0.00029779995199807994, | |
| "loss": 8.7372, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 14.768, | |
| "grad_norm": 0.13607698678970337, | |
| "learning_rate": 0.00029779755190207605, | |
| "loss": 8.649, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 14.784, | |
| "grad_norm": 0.1529749035835266, | |
| "learning_rate": 0.0002977951518060722, | |
| "loss": 8.6928, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "grad_norm": 0.14829668402671814, | |
| "learning_rate": 0.0002977927517100684, | |
| "loss": 8.2178, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 14.816, | |
| "grad_norm": 0.15614420175552368, | |
| "learning_rate": 0.00029779035161406456, | |
| "loss": 8.4939, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 14.832, | |
| "grad_norm": 0.18708457052707672, | |
| "learning_rate": 0.00029778795151806073, | |
| "loss": 8.4044, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 14.848, | |
| "grad_norm": 0.1700950413942337, | |
| "learning_rate": 0.00029778555142205684, | |
| "loss": 9.142, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 14.864, | |
| "grad_norm": 0.17176997661590576, | |
| "learning_rate": 0.000297783151326053, | |
| "loss": 8.3459, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 14.88, | |
| "grad_norm": 0.17668530344963074, | |
| "learning_rate": 0.0002977807512300492, | |
| "loss": 8.4129, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 14.896, | |
| "grad_norm": 0.13708771765232086, | |
| "learning_rate": 0.00029777835113404535, | |
| "loss": 8.6625, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 14.912, | |
| "grad_norm": 0.2073189914226532, | |
| "learning_rate": 0.00029777597503900154, | |
| "loss": 8.8295, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 14.928, | |
| "grad_norm": 0.1584160029888153, | |
| "learning_rate": 0.0002977735749429977, | |
| "loss": 8.2892, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 14.943999999999999, | |
| "grad_norm": 0.13419002294540405, | |
| "learning_rate": 0.0002977711748469938, | |
| "loss": 8.6564, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "grad_norm": 0.12294425070285797, | |
| "learning_rate": 0.00029776877475099, | |
| "loss": 8.6937, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 14.975999999999999, | |
| "grad_norm": 0.12022320926189423, | |
| "learning_rate": 0.00029776637465498616, | |
| "loss": 8.6577, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 14.992, | |
| "grad_norm": 0.1635560393333435, | |
| "learning_rate": 0.00029776397455898233, | |
| "loss": 8.4075, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 15.008, | |
| "grad_norm": 0.12280473113059998, | |
| "learning_rate": 0.0002977615744629785, | |
| "loss": 8.3065, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 15.024, | |
| "grad_norm": 0.14091894030570984, | |
| "learning_rate": 0.0002977591743669746, | |
| "loss": 8.3845, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "grad_norm": 0.16942408680915833, | |
| "learning_rate": 0.0002977567742709708, | |
| "loss": 8.2751, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 15.056, | |
| "grad_norm": 0.1858222782611847, | |
| "learning_rate": 0.00029775437417496695, | |
| "loss": 8.5152, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 15.072, | |
| "grad_norm": 0.15426284074783325, | |
| "learning_rate": 0.0002977519740789631, | |
| "loss": 8.2321, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 15.088, | |
| "grad_norm": 0.13960111141204834, | |
| "learning_rate": 0.0002977495739829593, | |
| "loss": 8.4343, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 15.104, | |
| "grad_norm": 0.1927483230829239, | |
| "learning_rate": 0.00029774717388695546, | |
| "loss": 8.26, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 15.12, | |
| "grad_norm": 0.15174433588981628, | |
| "learning_rate": 0.0002977447737909516, | |
| "loss": 8.665, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 15.136, | |
| "grad_norm": 0.14686360955238342, | |
| "learning_rate": 0.00029774237369494774, | |
| "loss": 8.0608, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 15.152, | |
| "grad_norm": 0.15865716338157654, | |
| "learning_rate": 0.00029773997359894397, | |
| "loss": 8.4204, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 15.168, | |
| "grad_norm": 0.14913444221019745, | |
| "learning_rate": 0.0002977375735029401, | |
| "loss": 8.5544, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 15.184, | |
| "grad_norm": 0.12727545201778412, | |
| "learning_rate": 0.00029773517340693625, | |
| "loss": 7.9671, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "grad_norm": 0.18612131476402283, | |
| "learning_rate": 0.0002977327733109324, | |
| "loss": 8.5797, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 15.216, | |
| "grad_norm": 0.1876545250415802, | |
| "learning_rate": 0.0002977303732149286, | |
| "loss": 8.3126, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 15.232, | |
| "grad_norm": 0.45961084961891174, | |
| "learning_rate": 0.00029772797311892476, | |
| "loss": 8.772, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 15.248, | |
| "grad_norm": 0.16763293743133545, | |
| "learning_rate": 0.00029772557302292093, | |
| "loss": 8.6089, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 15.264, | |
| "grad_norm": 0.17058174312114716, | |
| "learning_rate": 0.00029772317292691704, | |
| "loss": 8.5425, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 15.28, | |
| "grad_norm": 0.17006829380989075, | |
| "learning_rate": 0.0002977207728309132, | |
| "loss": 8.8057, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 15.296, | |
| "grad_norm": 0.09077399969100952, | |
| "learning_rate": 0.0002977183727349094, | |
| "loss": 8.343, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 15.312, | |
| "grad_norm": 0.0950964093208313, | |
| "learning_rate": 0.00029771599663986557, | |
| "loss": 8.3518, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 15.328, | |
| "grad_norm": 0.14622962474822998, | |
| "learning_rate": 0.00029771359654386174, | |
| "loss": 8.1654, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 15.344, | |
| "grad_norm": 0.16222132742404938, | |
| "learning_rate": 0.00029771119644785785, | |
| "loss": 8.6123, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "grad_norm": 0.13185660541057587, | |
| "learning_rate": 0.000297708796351854, | |
| "loss": 8.6665, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 15.376, | |
| "grad_norm": 0.1910812258720398, | |
| "learning_rate": 0.0002977063962558502, | |
| "loss": 8.2323, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 15.392, | |
| "grad_norm": 0.18493321537971497, | |
| "learning_rate": 0.00029770399615984636, | |
| "loss": 8.2076, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 15.408, | |
| "grad_norm": 0.15737323462963104, | |
| "learning_rate": 0.00029770159606384253, | |
| "loss": 8.4031, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 15.424, | |
| "grad_norm": 0.1808168590068817, | |
| "learning_rate": 0.0002976991959678387, | |
| "loss": 8.0816, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 15.44, | |
| "grad_norm": 0.12530648708343506, | |
| "learning_rate": 0.0002976967958718348, | |
| "loss": 8.0609, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 15.456, | |
| "grad_norm": 0.12963543832302094, | |
| "learning_rate": 0.000297694395775831, | |
| "loss": 8.092, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 15.472, | |
| "grad_norm": 0.1329260617494583, | |
| "learning_rate": 0.00029769199567982715, | |
| "loss": 8.4219, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 15.488, | |
| "grad_norm": 0.1603865921497345, | |
| "learning_rate": 0.0002976895955838233, | |
| "loss": 7.8878, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 15.504, | |
| "grad_norm": 0.16902674734592438, | |
| "learning_rate": 0.0002976871954878195, | |
| "loss": 8.2197, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 15.52, | |
| "grad_norm": 0.15807543694972992, | |
| "learning_rate": 0.0002976847953918156, | |
| "loss": 7.937, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 15.536, | |
| "grad_norm": 0.15132875740528107, | |
| "learning_rate": 0.0002976823952958118, | |
| "loss": 8.6177, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 15.552, | |
| "grad_norm": 0.1347590982913971, | |
| "learning_rate": 0.00029767999519980795, | |
| "loss": 8.7107, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 15.568, | |
| "grad_norm": 0.16151072084903717, | |
| "learning_rate": 0.0002976775951038041, | |
| "loss": 8.4782, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 15.584, | |
| "grad_norm": 0.194889098405838, | |
| "learning_rate": 0.0002976751950078003, | |
| "loss": 8.128, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "grad_norm": 0.18148979544639587, | |
| "learning_rate": 0.00029767279491179645, | |
| "loss": 8.3591, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 15.616, | |
| "grad_norm": 0.1610337197780609, | |
| "learning_rate": 0.00029767039481579257, | |
| "loss": 8.8492, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 15.632, | |
| "grad_norm": 0.15079425275325775, | |
| "learning_rate": 0.00029766799471978874, | |
| "loss": 8.2512, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 15.648, | |
| "grad_norm": 0.1274147629737854, | |
| "learning_rate": 0.0002976655946237849, | |
| "loss": 8.2239, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 15.664, | |
| "grad_norm": 0.14330662786960602, | |
| "learning_rate": 0.0002976631945277811, | |
| "loss": 8.3046, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 15.68, | |
| "grad_norm": 0.17394746840000153, | |
| "learning_rate": 0.00029766079443177725, | |
| "loss": 8.2542, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 15.696, | |
| "grad_norm": 0.15639960765838623, | |
| "learning_rate": 0.0002976583943357734, | |
| "loss": 8.3993, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 15.712, | |
| "grad_norm": 0.12845559418201447, | |
| "learning_rate": 0.0002976559942397696, | |
| "loss": 8.2055, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 15.728, | |
| "grad_norm": 0.1673252284526825, | |
| "learning_rate": 0.00029765359414376575, | |
| "loss": 8.2969, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 15.744, | |
| "grad_norm": 0.12345835566520691, | |
| "learning_rate": 0.0002976511940477619, | |
| "loss": 8.4381, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 15.76, | |
| "grad_norm": 0.19648896157741547, | |
| "learning_rate": 0.00029764879395175804, | |
| "loss": 8.0932, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 15.776, | |
| "grad_norm": 0.14960013329982758, | |
| "learning_rate": 0.0002976463938557542, | |
| "loss": 8.4303, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 15.792, | |
| "grad_norm": 0.19554351270198822, | |
| "learning_rate": 0.0002976439937597504, | |
| "loss": 8.0159, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 15.808, | |
| "grad_norm": 0.1545807123184204, | |
| "learning_rate": 0.00029764159366374654, | |
| "loss": 8.0277, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 15.824, | |
| "grad_norm": 0.11705837398767471, | |
| "learning_rate": 0.0002976391935677427, | |
| "loss": 8.2474, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "grad_norm": 0.16222915053367615, | |
| "learning_rate": 0.00029763679347173883, | |
| "loss": 7.8129, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 15.856, | |
| "grad_norm": 0.18901053071022034, | |
| "learning_rate": 0.000297634393375735, | |
| "loss": 8.3068, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 15.872, | |
| "grad_norm": 0.13031688332557678, | |
| "learning_rate": 0.00029763199327973117, | |
| "loss": 8.1526, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 15.888, | |
| "grad_norm": 0.17539045214653015, | |
| "learning_rate": 0.00029762959318372734, | |
| "loss": 7.7545, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 15.904, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0002976271930877235, | |
| "loss": 7.8745, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 15.92, | |
| "grad_norm": 0.17992717027664185, | |
| "learning_rate": 0.0002976248169926797, | |
| "loss": 7.9663, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 15.936, | |
| "grad_norm": 0.40667879581451416, | |
| "learning_rate": 0.0002976224168966758, | |
| "loss": 8.1505, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 15.952, | |
| "grad_norm": 0.15805494785308838, | |
| "learning_rate": 0.000297620016800672, | |
| "loss": 8.4417, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 15.968, | |
| "grad_norm": 0.16626039147377014, | |
| "learning_rate": 0.00029761761670466815, | |
| "loss": 8.2951, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 15.984, | |
| "grad_norm": 0.14239948987960815, | |
| "learning_rate": 0.0002976152166086643, | |
| "loss": 8.3205, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.24553033709526062, | |
| "learning_rate": 0.0002976128165126605, | |
| "loss": 8.2056, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 16.016, | |
| "grad_norm": 0.18159309029579163, | |
| "learning_rate": 0.0002976104164166566, | |
| "loss": 7.9151, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 16.032, | |
| "grad_norm": 0.16968666017055511, | |
| "learning_rate": 0.00029760801632065277, | |
| "loss": 7.8903, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 16.048, | |
| "grad_norm": 0.1661410927772522, | |
| "learning_rate": 0.00029760561622464894, | |
| "loss": 8.3051, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 16.064, | |
| "grad_norm": 0.1526879370212555, | |
| "learning_rate": 0.0002976032161286451, | |
| "loss": 7.8435, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 16.08, | |
| "grad_norm": 0.14917099475860596, | |
| "learning_rate": 0.0002976008160326413, | |
| "loss": 8.0571, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 16.096, | |
| "grad_norm": 0.15157845616340637, | |
| "learning_rate": 0.00029759841593663745, | |
| "loss": 8.0002, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 16.112, | |
| "grad_norm": 0.1487221121788025, | |
| "learning_rate": 0.00029759601584063356, | |
| "loss": 7.864, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 16.128, | |
| "grad_norm": 0.1397908627986908, | |
| "learning_rate": 0.00029759361574462973, | |
| "loss": 8.0639, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 16.144, | |
| "grad_norm": 0.1495772898197174, | |
| "learning_rate": 0.0002975912156486259, | |
| "loss": 7.8346, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 16.16, | |
| "grad_norm": 0.17440412938594818, | |
| "learning_rate": 0.00029758881555262207, | |
| "loss": 8.1732, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 16.176, | |
| "grad_norm": 0.15802791714668274, | |
| "learning_rate": 0.00029758641545661824, | |
| "loss": 7.9528, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 16.192, | |
| "grad_norm": 0.15488143265247345, | |
| "learning_rate": 0.0002975840153606144, | |
| "loss": 7.8414, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 16.208, | |
| "grad_norm": 0.1365291178226471, | |
| "learning_rate": 0.0002975816152646106, | |
| "loss": 7.9363, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 16.224, | |
| "grad_norm": 0.13933680951595306, | |
| "learning_rate": 0.00029757921516860675, | |
| "loss": 7.5429, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 16.24, | |
| "grad_norm": 0.19280196726322174, | |
| "learning_rate": 0.0002975768150726029, | |
| "loss": 7.913, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 16.256, | |
| "grad_norm": 0.11700501292943954, | |
| "learning_rate": 0.00029757441497659903, | |
| "loss": 8.0237, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 16.272, | |
| "grad_norm": 0.16518530249595642, | |
| "learning_rate": 0.0002975720388815552, | |
| "loss": 7.8771, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 16.288, | |
| "grad_norm": 0.14215916395187378, | |
| "learning_rate": 0.0002975696387855514, | |
| "loss": 8.2513, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 16.304, | |
| "grad_norm": 0.15119720995426178, | |
| "learning_rate": 0.00029756723868954756, | |
| "loss": 8.0416, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 16.32, | |
| "grad_norm": 0.17267923057079315, | |
| "learning_rate": 0.0002975648385935437, | |
| "loss": 7.7183, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 16.336, | |
| "grad_norm": 0.13659106194972992, | |
| "learning_rate": 0.00029756243849753984, | |
| "loss": 7.6539, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 16.352, | |
| "grad_norm": 0.13859499990940094, | |
| "learning_rate": 0.000297560038401536, | |
| "loss": 7.9309, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 16.368, | |
| "grad_norm": 0.16713272035121918, | |
| "learning_rate": 0.0002975576383055322, | |
| "loss": 7.7884, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 16.384, | |
| "grad_norm": 0.19469381868839264, | |
| "learning_rate": 0.00029755523820952835, | |
| "loss": 7.6944, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "grad_norm": 0.14082291722297668, | |
| "learning_rate": 0.0002975528381135245, | |
| "loss": 7.5828, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 16.416, | |
| "grad_norm": 0.12121783196926117, | |
| "learning_rate": 0.0002975504380175207, | |
| "loss": 7.813, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 16.432, | |
| "grad_norm": 0.22072196006774902, | |
| "learning_rate": 0.0002975480379215168, | |
| "loss": 8.2315, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 16.448, | |
| "grad_norm": 0.1469603329896927, | |
| "learning_rate": 0.00029754563782551297, | |
| "loss": 8.0137, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 16.464, | |
| "grad_norm": 0.11437113583087921, | |
| "learning_rate": 0.00029754323772950914, | |
| "loss": 7.3291, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 16.48, | |
| "grad_norm": 0.17373935878276825, | |
| "learning_rate": 0.0002975408376335053, | |
| "loss": 8.0078, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 16.496, | |
| "grad_norm": 0.12379905581474304, | |
| "learning_rate": 0.0002975384375375015, | |
| "loss": 8.0724, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 16.512, | |
| "grad_norm": 0.1540013700723648, | |
| "learning_rate": 0.00029753603744149765, | |
| "loss": 7.6953, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 16.528, | |
| "grad_norm": 0.21880146861076355, | |
| "learning_rate": 0.00029753363734549376, | |
| "loss": 8.0522, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 16.544, | |
| "grad_norm": 0.14410023391246796, | |
| "learning_rate": 0.00029753123724948993, | |
| "loss": 8.191, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 16.56, | |
| "grad_norm": 0.13037148118019104, | |
| "learning_rate": 0.0002975288371534861, | |
| "loss": 7.6117, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 16.576, | |
| "grad_norm": 0.16236849129199982, | |
| "learning_rate": 0.00029752643705748227, | |
| "loss": 7.9894, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 16.592, | |
| "grad_norm": 0.1502009928226471, | |
| "learning_rate": 0.00029752403696147844, | |
| "loss": 7.7302, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 16.608, | |
| "grad_norm": 0.18485447764396667, | |
| "learning_rate": 0.00029752163686547455, | |
| "loss": 7.8743, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 16.624, | |
| "grad_norm": 0.12873640656471252, | |
| "learning_rate": 0.0002975192367694707, | |
| "loss": 7.6197, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "grad_norm": 0.11517874896526337, | |
| "learning_rate": 0.0002975168366734669, | |
| "loss": 7.4887, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 16.656, | |
| "grad_norm": 0.11515144258737564, | |
| "learning_rate": 0.00029751443657746306, | |
| "loss": 7.706, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 16.672, | |
| "grad_norm": 0.15465959906578064, | |
| "learning_rate": 0.00029751203648145923, | |
| "loss": 7.3052, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 16.688, | |
| "grad_norm": 0.12962587177753448, | |
| "learning_rate": 0.0002975096603864154, | |
| "loss": 7.8117, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 16.704, | |
| "grad_norm": 0.18321260809898376, | |
| "learning_rate": 0.0002975072602904116, | |
| "loss": 7.4464, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 16.72, | |
| "grad_norm": 0.1769808679819107, | |
| "learning_rate": 0.00029750486019440776, | |
| "loss": 7.8639, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 16.736, | |
| "grad_norm": 0.15869227051734924, | |
| "learning_rate": 0.00029750246009840393, | |
| "loss": 7.7956, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 16.752, | |
| "grad_norm": 0.12134505808353424, | |
| "learning_rate": 0.00029750006000240004, | |
| "loss": 7.5809, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 16.768, | |
| "grad_norm": 0.13986830413341522, | |
| "learning_rate": 0.0002974976599063962, | |
| "loss": 7.4372, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 16.784, | |
| "grad_norm": 0.1761140078306198, | |
| "learning_rate": 0.0002974952598103924, | |
| "loss": 7.7486, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "grad_norm": 0.13163812458515167, | |
| "learning_rate": 0.00029749285971438855, | |
| "loss": 7.834, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 16.816, | |
| "grad_norm": 0.1813841462135315, | |
| "learning_rate": 0.0002974904596183847, | |
| "loss": 7.5974, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 16.832, | |
| "grad_norm": 0.15655750036239624, | |
| "learning_rate": 0.0002974880595223809, | |
| "loss": 7.4437, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 16.848, | |
| "grad_norm": 0.16123917698860168, | |
| "learning_rate": 0.000297485659426377, | |
| "loss": 7.347, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 16.864, | |
| "grad_norm": 0.18692290782928467, | |
| "learning_rate": 0.00029748325933037317, | |
| "loss": 7.8658, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 16.88, | |
| "grad_norm": 0.15913629531860352, | |
| "learning_rate": 0.00029748085923436934, | |
| "loss": 7.9134, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 16.896, | |
| "grad_norm": 0.1343807876110077, | |
| "learning_rate": 0.0002974784591383655, | |
| "loss": 7.5983, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 16.912, | |
| "grad_norm": 0.2009182572364807, | |
| "learning_rate": 0.0002974760590423617, | |
| "loss": 7.3442, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 16.928, | |
| "grad_norm": 0.1569000780582428, | |
| "learning_rate": 0.0002974736589463578, | |
| "loss": 7.5953, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 16.944, | |
| "grad_norm": 0.1601628214120865, | |
| "learning_rate": 0.00029747125885035396, | |
| "loss": 7.5624, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "grad_norm": 0.14143775403499603, | |
| "learning_rate": 0.00029746885875435013, | |
| "loss": 7.579, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 16.976, | |
| "grad_norm": 0.2106146216392517, | |
| "learning_rate": 0.0002974664586583463, | |
| "loss": 7.5958, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 16.992, | |
| "grad_norm": 0.17329080402851105, | |
| "learning_rate": 0.00029746405856234247, | |
| "loss": 8.0935, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 17.008, | |
| "grad_norm": 0.19225256145000458, | |
| "learning_rate": 0.00029746165846633864, | |
| "loss": 6.8958, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 17.024, | |
| "grad_norm": 0.17550058662891388, | |
| "learning_rate": 0.00029745925837033476, | |
| "loss": 7.4002, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 17.04, | |
| "grad_norm": 0.16778625547885895, | |
| "learning_rate": 0.0002974568582743309, | |
| "loss": 7.698, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 17.056, | |
| "grad_norm": 0.14647962152957916, | |
| "learning_rate": 0.0002974544581783271, | |
| "loss": 7.5615, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 17.072, | |
| "grad_norm": 0.15024389326572418, | |
| "learning_rate": 0.00029745205808232326, | |
| "loss": 7.6671, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 17.088, | |
| "grad_norm": 0.11949127167463303, | |
| "learning_rate": 0.00029744965798631943, | |
| "loss": 7.6843, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 17.104, | |
| "grad_norm": 0.15480674803256989, | |
| "learning_rate": 0.00029744725789031555, | |
| "loss": 7.9465, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 17.12, | |
| "grad_norm": 0.14191922545433044, | |
| "learning_rate": 0.0002974448577943117, | |
| "loss": 7.7372, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 17.136, | |
| "grad_norm": 0.19336700439453125, | |
| "learning_rate": 0.0002974424576983079, | |
| "loss": 7.6904, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 17.152, | |
| "grad_norm": 0.17240415513515472, | |
| "learning_rate": 0.0002974400576023041, | |
| "loss": 7.4487, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 17.168, | |
| "grad_norm": 0.135718435049057, | |
| "learning_rate": 0.0002974376575063002, | |
| "loss": 7.5844, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 17.184, | |
| "grad_norm": 0.13594204187393188, | |
| "learning_rate": 0.0002974352574102964, | |
| "loss": 7.1186, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "grad_norm": 0.14997251331806183, | |
| "learning_rate": 0.00029743285731429256, | |
| "loss": 7.3525, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 17.216, | |
| "grad_norm": 0.1264813244342804, | |
| "learning_rate": 0.00029743045721828873, | |
| "loss": 7.8519, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 17.232, | |
| "grad_norm": 0.16751745343208313, | |
| "learning_rate": 0.0002974280571222849, | |
| "loss": 7.346, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 17.248, | |
| "grad_norm": 0.196015402674675, | |
| "learning_rate": 0.000297425657026281, | |
| "loss": 7.5401, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 17.264, | |
| "grad_norm": 0.14854785799980164, | |
| "learning_rate": 0.0002974232569302772, | |
| "loss": 7.3802, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "grad_norm": 0.1462150365114212, | |
| "learning_rate": 0.00029742085683427335, | |
| "loss": 7.56, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 17.296, | |
| "grad_norm": 0.18656545877456665, | |
| "learning_rate": 0.0002974184567382695, | |
| "loss": 7.4044, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 17.312, | |
| "grad_norm": 0.15170492231845856, | |
| "learning_rate": 0.0002974160566422657, | |
| "loss": 7.1246, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 17.328, | |
| "grad_norm": 0.13659091293811798, | |
| "learning_rate": 0.00029741365654626186, | |
| "loss": 7.5455, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 17.344, | |
| "grad_norm": 0.1527138650417328, | |
| "learning_rate": 0.000297411256450258, | |
| "loss": 7.5807, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 17.36, | |
| "grad_norm": 0.15352298319339752, | |
| "learning_rate": 0.00029740885635425415, | |
| "loss": 7.3586, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 17.376, | |
| "grad_norm": 0.16372795403003693, | |
| "learning_rate": 0.0002974065042601704, | |
| "loss": 7.5309, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 17.392, | |
| "grad_norm": 0.14718171954154968, | |
| "learning_rate": 0.0002974041041641665, | |
| "loss": 7.7871, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 17.408, | |
| "grad_norm": 0.13745012879371643, | |
| "learning_rate": 0.0002974017040681627, | |
| "loss": 7.4228, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 17.424, | |
| "grad_norm": 0.1310426890850067, | |
| "learning_rate": 0.00029739930397215886, | |
| "loss": 6.914, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 17.44, | |
| "grad_norm": 0.1291857808828354, | |
| "learning_rate": 0.00029739690387615503, | |
| "loss": 7.5163, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 17.456, | |
| "grad_norm": 0.1615869104862213, | |
| "learning_rate": 0.0002973945037801512, | |
| "loss": 6.9051, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 17.472, | |
| "grad_norm": 0.11409099400043488, | |
| "learning_rate": 0.00029739210368414737, | |
| "loss": 7.4919, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 17.488, | |
| "grad_norm": 0.12527474761009216, | |
| "learning_rate": 0.0002973897035881435, | |
| "loss": 7.5104, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 17.504, | |
| "grad_norm": 0.1936863362789154, | |
| "learning_rate": 0.00029738730349213965, | |
| "loss": 7.1046, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 17.52, | |
| "grad_norm": 0.12854978442192078, | |
| "learning_rate": 0.0002973849033961358, | |
| "loss": 7.4067, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 17.536, | |
| "grad_norm": 0.13116727769374847, | |
| "learning_rate": 0.000297382503300132, | |
| "loss": 7.2106, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 17.552, | |
| "grad_norm": 0.16138528287410736, | |
| "learning_rate": 0.00029738010320412816, | |
| "loss": 7.263, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 17.568, | |
| "grad_norm": 0.14999186992645264, | |
| "learning_rate": 0.0002973777031081243, | |
| "loss": 7.428, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 17.584, | |
| "grad_norm": 0.13564202189445496, | |
| "learning_rate": 0.00029737530301212045, | |
| "loss": 7.6592, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "grad_norm": 0.14535826444625854, | |
| "learning_rate": 0.0002973729029161166, | |
| "loss": 7.2886, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 17.616, | |
| "grad_norm": 0.13466519117355347, | |
| "learning_rate": 0.0002973705028201128, | |
| "loss": 7.4852, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 17.632, | |
| "grad_norm": 0.1622999757528305, | |
| "learning_rate": 0.00029736810272410895, | |
| "loss": 7.6437, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 17.648, | |
| "grad_norm": 0.15417474508285522, | |
| "learning_rate": 0.0002973657026281051, | |
| "loss": 7.4305, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 17.664, | |
| "grad_norm": 0.1484052836894989, | |
| "learning_rate": 0.00029736330253210124, | |
| "loss": 7.5558, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 17.68, | |
| "grad_norm": 0.15688396990299225, | |
| "learning_rate": 0.0002973609024360974, | |
| "loss": 7.4349, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 17.696, | |
| "grad_norm": 0.15338055789470673, | |
| "learning_rate": 0.0002973585023400936, | |
| "loss": 7.2818, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 17.712, | |
| "grad_norm": 0.1761266142129898, | |
| "learning_rate": 0.00029735610224408974, | |
| "loss": 7.2618, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 17.728, | |
| "grad_norm": 0.17337530851364136, | |
| "learning_rate": 0.0002973537021480859, | |
| "loss": 7.0263, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 17.744, | |
| "grad_norm": 0.14693669974803925, | |
| "learning_rate": 0.00029735130205208203, | |
| "loss": 6.9075, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 17.76, | |
| "grad_norm": 0.14184145629405975, | |
| "learning_rate": 0.00029734892595703827, | |
| "loss": 7.1306, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 17.776, | |
| "grad_norm": 0.15281623601913452, | |
| "learning_rate": 0.00029734652586103444, | |
| "loss": 6.9965, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 17.792, | |
| "grad_norm": 0.30168259143829346, | |
| "learning_rate": 0.0002973441257650306, | |
| "loss": 7.3388, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 17.808, | |
| "grad_norm": 0.15365231037139893, | |
| "learning_rate": 0.0002973417256690267, | |
| "loss": 7.2799, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 17.824, | |
| "grad_norm": 0.1704150289297104, | |
| "learning_rate": 0.0002973393255730229, | |
| "loss": 7.3031, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 17.84, | |
| "grad_norm": 0.16025039553642273, | |
| "learning_rate": 0.00029733692547701906, | |
| "loss": 6.9446, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 17.856, | |
| "grad_norm": 0.14661014080047607, | |
| "learning_rate": 0.00029733452538101523, | |
| "loss": 7.4911, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 17.872, | |
| "grad_norm": 0.18997499346733093, | |
| "learning_rate": 0.0002973321252850114, | |
| "loss": 7.2489, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 17.888, | |
| "grad_norm": 0.16025018692016602, | |
| "learning_rate": 0.0002973297251890075, | |
| "loss": 7.4835, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 17.904, | |
| "grad_norm": 0.19556750357151031, | |
| "learning_rate": 0.0002973273250930037, | |
| "loss": 7.5087, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 17.92, | |
| "grad_norm": 0.14444762468338013, | |
| "learning_rate": 0.00029732492499699986, | |
| "loss": 7.3942, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 17.936, | |
| "grad_norm": 0.12939786911010742, | |
| "learning_rate": 0.000297322524900996, | |
| "loss": 7.0694, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 17.951999999999998, | |
| "grad_norm": 0.1845860481262207, | |
| "learning_rate": 0.0002973201248049922, | |
| "loss": 7.3517, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 17.968, | |
| "grad_norm": 0.1611936390399933, | |
| "learning_rate": 0.00029731772470898836, | |
| "loss": 7.3119, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 17.984, | |
| "grad_norm": 0.1410474330186844, | |
| "learning_rate": 0.0002973153246129845, | |
| "loss": 7.1857, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.14935807883739471, | |
| "learning_rate": 0.00029731292451698065, | |
| "loss": 7.2314, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 18.016, | |
| "grad_norm": 0.11792614310979843, | |
| "learning_rate": 0.0002973105244209768, | |
| "loss": 7.0182, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 18.032, | |
| "grad_norm": 0.19907847046852112, | |
| "learning_rate": 0.000297308124324973, | |
| "loss": 7.036, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 18.048, | |
| "grad_norm": 0.11814866214990616, | |
| "learning_rate": 0.00029730572422896915, | |
| "loss": 7.2484, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 18.064, | |
| "grad_norm": 0.16914184391498566, | |
| "learning_rate": 0.00029730332413296527, | |
| "loss": 7.0729, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 18.08, | |
| "grad_norm": 0.11930215358734131, | |
| "learning_rate": 0.00029730092403696144, | |
| "loss": 6.9642, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 18.096, | |
| "grad_norm": 0.14744411408901215, | |
| "learning_rate": 0.0002972985239409576, | |
| "loss": 7.1132, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 18.112, | |
| "grad_norm": 0.1400415003299713, | |
| "learning_rate": 0.0002972961238449538, | |
| "loss": 7.1415, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 18.128, | |
| "grad_norm": 0.1671387106180191, | |
| "learning_rate": 0.00029729374774990997, | |
| "loss": 7.2558, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 18.144, | |
| "grad_norm": 0.16554495692253113, | |
| "learning_rate": 0.00029729134765390613, | |
| "loss": 6.9987, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 18.16, | |
| "grad_norm": 0.1383550763130188, | |
| "learning_rate": 0.0002972889475579023, | |
| "loss": 7.0975, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 18.176, | |
| "grad_norm": 0.1566449999809265, | |
| "learning_rate": 0.0002972865474618985, | |
| "loss": 7.0562, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 18.192, | |
| "grad_norm": 0.19498635828495026, | |
| "learning_rate": 0.00029728414736589464, | |
| "loss": 6.6165, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 18.208, | |
| "grad_norm": 0.1640356481075287, | |
| "learning_rate": 0.00029728174726989076, | |
| "loss": 7.1794, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 18.224, | |
| "grad_norm": 0.11614058166742325, | |
| "learning_rate": 0.0002972793471738869, | |
| "loss": 7.285, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 18.24, | |
| "grad_norm": 0.15918317437171936, | |
| "learning_rate": 0.0002972769470778831, | |
| "loss": 7.163, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 18.256, | |
| "grad_norm": 0.1565544754266739, | |
| "learning_rate": 0.00029727454698187926, | |
| "loss": 7.225, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 18.272, | |
| "grad_norm": 0.17850929498672485, | |
| "learning_rate": 0.00029727214688587543, | |
| "loss": 6.801, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 18.288, | |
| "grad_norm": 0.11589377373456955, | |
| "learning_rate": 0.0002972697467898716, | |
| "loss": 6.8754, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 18.304, | |
| "grad_norm": 0.13528980314731598, | |
| "learning_rate": 0.0002972673466938677, | |
| "loss": 7.1785, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 18.32, | |
| "grad_norm": 0.14462067186832428, | |
| "learning_rate": 0.0002972649465978639, | |
| "loss": 6.7743, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 18.336, | |
| "grad_norm": 0.11352884024381638, | |
| "learning_rate": 0.0002972625705028201, | |
| "loss": 7.195, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 18.352, | |
| "grad_norm": 0.15487293899059296, | |
| "learning_rate": 0.00029726017040681624, | |
| "loss": 6.9974, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 18.368, | |
| "grad_norm": 0.18302305042743683, | |
| "learning_rate": 0.0002972577703108124, | |
| "loss": 7.3688, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 18.384, | |
| "grad_norm": 0.13732467591762543, | |
| "learning_rate": 0.00029725537021480853, | |
| "loss": 7.1072, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "grad_norm": 0.16661597788333893, | |
| "learning_rate": 0.0002972529701188047, | |
| "loss": 6.9747, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 18.416, | |
| "grad_norm": 0.13797527551651, | |
| "learning_rate": 0.00029725057002280087, | |
| "loss": 6.9419, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 18.432, | |
| "grad_norm": 0.12859782576560974, | |
| "learning_rate": 0.00029724816992679704, | |
| "loss": 6.7853, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 18.448, | |
| "grad_norm": 0.14815713465213776, | |
| "learning_rate": 0.0002972457698307932, | |
| "loss": 7.2451, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 18.464, | |
| "grad_norm": 0.17937737703323364, | |
| "learning_rate": 0.0002972433697347894, | |
| "loss": 6.9378, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "grad_norm": 0.1678260713815689, | |
| "learning_rate": 0.0002972409696387855, | |
| "loss": 7.324, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 18.496, | |
| "grad_norm": 0.1482672095298767, | |
| "learning_rate": 0.0002972385695427817, | |
| "loss": 6.7464, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 18.512, | |
| "grad_norm": 0.13717281818389893, | |
| "learning_rate": 0.0002972361694467779, | |
| "loss": 6.9728, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 18.528, | |
| "grad_norm": 0.16356568038463593, | |
| "learning_rate": 0.000297233769350774, | |
| "loss": 6.4269, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 18.544, | |
| "grad_norm": 0.11255384981632233, | |
| "learning_rate": 0.00029723136925477017, | |
| "loss": 6.8938, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 18.56, | |
| "grad_norm": 0.18403998017311096, | |
| "learning_rate": 0.00029722896915876634, | |
| "loss": 7.5852, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 18.576, | |
| "grad_norm": 0.16399045288562775, | |
| "learning_rate": 0.0002972265690627625, | |
| "loss": 6.8499, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 18.592, | |
| "grad_norm": 0.1565336287021637, | |
| "learning_rate": 0.0002972241689667587, | |
| "loss": 6.7727, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 18.608, | |
| "grad_norm": 0.19689014554023743, | |
| "learning_rate": 0.00029722176887075484, | |
| "loss": 7.1385, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 18.624, | |
| "grad_norm": 0.13252195715904236, | |
| "learning_rate": 0.00029721936877475096, | |
| "loss": 6.6291, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 18.64, | |
| "grad_norm": 0.12019433081150055, | |
| "learning_rate": 0.00029721696867874713, | |
| "loss": 6.8913, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 18.656, | |
| "grad_norm": 0.16386528313159943, | |
| "learning_rate": 0.0002972145685827433, | |
| "loss": 6.7989, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 18.672, | |
| "grad_norm": 0.13716477155685425, | |
| "learning_rate": 0.00029721216848673947, | |
| "loss": 6.6763, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 18.688, | |
| "grad_norm": 0.13785770535469055, | |
| "learning_rate": 0.00029720976839073564, | |
| "loss": 6.6476, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 18.704, | |
| "grad_norm": 0.1605842560529709, | |
| "learning_rate": 0.00029720736829473175, | |
| "loss": 6.6566, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 18.72, | |
| "grad_norm": 0.19339755177497864, | |
| "learning_rate": 0.0002972049681987279, | |
| "loss": 6.9454, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 18.736, | |
| "grad_norm": 0.14963068068027496, | |
| "learning_rate": 0.0002972025681027241, | |
| "loss": 7.0718, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 18.752, | |
| "grad_norm": 0.1378934234380722, | |
| "learning_rate": 0.00029720016800672026, | |
| "loss": 6.7582, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 18.768, | |
| "grad_norm": 0.1546606719493866, | |
| "learning_rate": 0.0002971977679107164, | |
| "loss": 6.9278, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 18.784, | |
| "grad_norm": 0.13777601718902588, | |
| "learning_rate": 0.0002971953678147126, | |
| "loss": 6.821, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "grad_norm": 0.1833031326532364, | |
| "learning_rate": 0.0002971929677187087, | |
| "loss": 7.345, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 18.816, | |
| "grad_norm": 0.13752517104148865, | |
| "learning_rate": 0.0002971905676227049, | |
| "loss": 7.0435, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 18.832, | |
| "grad_norm": 0.14740273356437683, | |
| "learning_rate": 0.00029718816752670105, | |
| "loss": 7.0617, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 18.848, | |
| "grad_norm": 0.13207408785820007, | |
| "learning_rate": 0.0002971857674306972, | |
| "loss": 6.9374, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 18.864, | |
| "grad_norm": 0.14092418551445007, | |
| "learning_rate": 0.0002971833673346934, | |
| "loss": 6.5626, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 18.88, | |
| "grad_norm": 0.19631852209568024, | |
| "learning_rate": 0.0002971809672386895, | |
| "loss": 7.162, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 18.896, | |
| "grad_norm": 0.12741628289222717, | |
| "learning_rate": 0.00029717856714268567, | |
| "loss": 6.8316, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 18.912, | |
| "grad_norm": 0.17144246399402618, | |
| "learning_rate": 0.00029717616704668184, | |
| "loss": 6.5714, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 18.928, | |
| "grad_norm": 0.1456017643213272, | |
| "learning_rate": 0.000297173766950678, | |
| "loss": 7.1563, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 18.944, | |
| "grad_norm": 0.17816682159900665, | |
| "learning_rate": 0.0002971713668546742, | |
| "loss": 7.1767, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 18.96, | |
| "grad_norm": 0.274588942527771, | |
| "learning_rate": 0.00029716896675867035, | |
| "loss": 6.9244, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 18.976, | |
| "grad_norm": 0.14686717092990875, | |
| "learning_rate": 0.00029716656666266646, | |
| "loss": 6.9108, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 18.992, | |
| "grad_norm": 0.1549716740846634, | |
| "learning_rate": 0.00029716416656666263, | |
| "loss": 7.1166, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 19.008, | |
| "grad_norm": 0.24241045117378235, | |
| "learning_rate": 0.0002971617664706588, | |
| "loss": 6.7128, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 19.024, | |
| "grad_norm": 0.14365893602371216, | |
| "learning_rate": 0.00029715936637465497, | |
| "loss": 6.5973, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 19.04, | |
| "grad_norm": 0.1771174818277359, | |
| "learning_rate": 0.00029715696627865114, | |
| "loss": 6.8558, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 19.056, | |
| "grad_norm": 0.1703067272901535, | |
| "learning_rate": 0.00029715456618264726, | |
| "loss": 6.748, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 19.072, | |
| "grad_norm": 0.1466696858406067, | |
| "learning_rate": 0.0002971521660866434, | |
| "loss": 6.6093, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 19.088, | |
| "grad_norm": 0.16070063412189484, | |
| "learning_rate": 0.0002971497659906396, | |
| "loss": 6.7417, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 19.104, | |
| "grad_norm": 0.2056402564048767, | |
| "learning_rate": 0.00029714738989559584, | |
| "loss": 6.4175, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 19.12, | |
| "grad_norm": 0.207046240568161, | |
| "learning_rate": 0.00029714498979959195, | |
| "loss": 6.9465, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 19.136, | |
| "grad_norm": 0.12638603150844574, | |
| "learning_rate": 0.0002971425897035881, | |
| "loss": 6.882, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 19.152, | |
| "grad_norm": 0.17709197103977203, | |
| "learning_rate": 0.0002971401896075843, | |
| "loss": 6.5151, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 19.168, | |
| "grad_norm": 0.14313985407352448, | |
| "learning_rate": 0.00029713778951158046, | |
| "loss": 6.6897, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 19.184, | |
| "grad_norm": 0.14212185144424438, | |
| "learning_rate": 0.00029713538941557663, | |
| "loss": 7.0293, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "grad_norm": 0.14830344915390015, | |
| "learning_rate": 0.00029713298931957274, | |
| "loss": 6.8398, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 19.216, | |
| "grad_norm": 0.24165965616703033, | |
| "learning_rate": 0.0002971305892235689, | |
| "loss": 6.715, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 19.232, | |
| "grad_norm": 0.13292773067951202, | |
| "learning_rate": 0.0002971281891275651, | |
| "loss": 6.8165, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 19.248, | |
| "grad_norm": 0.1639406383037567, | |
| "learning_rate": 0.00029712578903156125, | |
| "loss": 6.9099, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 19.264, | |
| "grad_norm": 0.18321408331394196, | |
| "learning_rate": 0.0002971233889355574, | |
| "loss": 6.4805, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 19.28, | |
| "grad_norm": 0.18382756412029266, | |
| "learning_rate": 0.0002971209888395536, | |
| "loss": 6.8172, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 19.296, | |
| "grad_norm": 0.15303823351860046, | |
| "learning_rate": 0.0002971185887435497, | |
| "loss": 6.2661, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 19.312, | |
| "grad_norm": 0.1740507036447525, | |
| "learning_rate": 0.0002971161886475459, | |
| "loss": 6.6127, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 19.328, | |
| "grad_norm": 0.14414259791374207, | |
| "learning_rate": 0.00029711378855154204, | |
| "loss": 6.4442, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 19.344, | |
| "grad_norm": 0.14647360146045685, | |
| "learning_rate": 0.0002971113884555382, | |
| "loss": 6.6076, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 19.36, | |
| "grad_norm": 0.15991808474063873, | |
| "learning_rate": 0.0002971089883595344, | |
| "loss": 6.787, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 19.376, | |
| "grad_norm": 0.1332535594701767, | |
| "learning_rate": 0.0002971065882635305, | |
| "loss": 6.7092, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 19.392, | |
| "grad_norm": 0.14746126532554626, | |
| "learning_rate": 0.00029710418816752667, | |
| "loss": 6.7574, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 19.408, | |
| "grad_norm": 0.13268060982227325, | |
| "learning_rate": 0.00029710178807152283, | |
| "loss": 6.4729, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 19.424, | |
| "grad_norm": 0.18852052092552185, | |
| "learning_rate": 0.000297099387975519, | |
| "loss": 6.7246, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "grad_norm": 0.20590665936470032, | |
| "learning_rate": 0.00029709698787951517, | |
| "loss": 6.7032, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 19.456, | |
| "grad_norm": 0.18409046530723572, | |
| "learning_rate": 0.00029709458778351134, | |
| "loss": 6.9088, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 19.472, | |
| "grad_norm": 0.1330518126487732, | |
| "learning_rate": 0.00029709218768750746, | |
| "loss": 6.7912, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 19.488, | |
| "grad_norm": 0.17881762981414795, | |
| "learning_rate": 0.0002970897875915036, | |
| "loss": 6.6976, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 19.504, | |
| "grad_norm": 0.1952984780073166, | |
| "learning_rate": 0.0002970873874954998, | |
| "loss": 6.6684, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 19.52, | |
| "grad_norm": 0.10283193737268448, | |
| "learning_rate": 0.00029708498739949596, | |
| "loss": 6.8239, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 19.536, | |
| "grad_norm": 0.14318746328353882, | |
| "learning_rate": 0.00029708258730349213, | |
| "loss": 6.3829, | |
| "step": 122100 | |
| }, | |
| { | |
| "epoch": 19.552, | |
| "grad_norm": 0.27563196420669556, | |
| "learning_rate": 0.00029708018720748825, | |
| "loss": 6.5011, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 19.568, | |
| "grad_norm": 0.22338111698627472, | |
| "learning_rate": 0.0002970777871114844, | |
| "loss": 6.5485, | |
| "step": 122300 | |
| }, | |
| { | |
| "epoch": 19.584, | |
| "grad_norm": 0.12649616599082947, | |
| "learning_rate": 0.0002970753870154806, | |
| "loss": 6.7374, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 19.6, | |
| "grad_norm": 0.15860269963741302, | |
| "learning_rate": 0.00029707298691947676, | |
| "loss": 6.3596, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 19.616, | |
| "grad_norm": 0.12358345836400986, | |
| "learning_rate": 0.00029707061082443294, | |
| "loss": 6.3242, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 19.632, | |
| "grad_norm": 0.16506068408489227, | |
| "learning_rate": 0.0002970682107284291, | |
| "loss": 6.5935, | |
| "step": 122700 | |
| }, | |
| { | |
| "epoch": 19.648, | |
| "grad_norm": 0.19951657950878143, | |
| "learning_rate": 0.0002970658106324253, | |
| "loss": 6.4781, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 19.664, | |
| "grad_norm": 0.16879688203334808, | |
| "learning_rate": 0.00029706341053642145, | |
| "loss": 6.4468, | |
| "step": 122900 | |
| }, | |
| { | |
| "epoch": 19.68, | |
| "grad_norm": 0.14565648138523102, | |
| "learning_rate": 0.0002970610104404176, | |
| "loss": 6.635, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 19.696, | |
| "grad_norm": 0.12739145755767822, | |
| "learning_rate": 0.00029705861034441374, | |
| "loss": 6.7823, | |
| "step": 123100 | |
| }, | |
| { | |
| "epoch": 19.712, | |
| "grad_norm": 0.1428256332874298, | |
| "learning_rate": 0.0002970562102484099, | |
| "loss": 6.3011, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 19.728, | |
| "grad_norm": 0.1541672646999359, | |
| "learning_rate": 0.0002970538101524061, | |
| "loss": 6.93, | |
| "step": 123300 | |
| }, | |
| { | |
| "epoch": 19.744, | |
| "grad_norm": 0.14009244740009308, | |
| "learning_rate": 0.00029705141005640224, | |
| "loss": 6.4553, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 19.76, | |
| "grad_norm": 0.1925840973854065, | |
| "learning_rate": 0.0002970490099603984, | |
| "loss": 6.812, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 19.776, | |
| "grad_norm": 0.1624009907245636, | |
| "learning_rate": 0.0002970466098643946, | |
| "loss": 6.644, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 19.792, | |
| "grad_norm": 0.12902632355690002, | |
| "learning_rate": 0.0002970442097683907, | |
| "loss": 6.8444, | |
| "step": 123700 | |
| }, | |
| { | |
| "epoch": 19.808, | |
| "grad_norm": 0.1572074443101883, | |
| "learning_rate": 0.00029704180967238687, | |
| "loss": 6.8285, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 19.824, | |
| "grad_norm": 0.17196834087371826, | |
| "learning_rate": 0.00029703940957638304, | |
| "loss": 6.318, | |
| "step": 123900 | |
| }, | |
| { | |
| "epoch": 19.84, | |
| "grad_norm": 0.14329147338867188, | |
| "learning_rate": 0.0002970370094803792, | |
| "loss": 6.5197, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 19.856, | |
| "grad_norm": 0.12039805948734283, | |
| "learning_rate": 0.0002970346093843754, | |
| "loss": 6.3033, | |
| "step": 124100 | |
| }, | |
| { | |
| "epoch": 19.872, | |
| "grad_norm": 0.1786791980266571, | |
| "learning_rate": 0.0002970322092883715, | |
| "loss": 6.669, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 19.888, | |
| "grad_norm": 0.12987840175628662, | |
| "learning_rate": 0.00029702980919236766, | |
| "loss": 6.2543, | |
| "step": 124300 | |
| }, | |
| { | |
| "epoch": 19.904, | |
| "grad_norm": 0.12259730696678162, | |
| "learning_rate": 0.00029702740909636383, | |
| "loss": 6.4946, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 19.92, | |
| "grad_norm": 0.10069935768842697, | |
| "learning_rate": 0.00029702500900036, | |
| "loss": 6.7976, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 19.936, | |
| "grad_norm": 0.14555324614048004, | |
| "learning_rate": 0.00029702260890435617, | |
| "loss": 6.3994, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 19.951999999999998, | |
| "grad_norm": 0.15070566534996033, | |
| "learning_rate": 0.00029702020880835234, | |
| "loss": 6.3558, | |
| "step": 124700 | |
| }, | |
| { | |
| "epoch": 19.968, | |
| "grad_norm": 0.13936389982700348, | |
| "learning_rate": 0.00029701780871234845, | |
| "loss": 6.369, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 19.984, | |
| "grad_norm": 0.20414897799491882, | |
| "learning_rate": 0.0002970154086163446, | |
| "loss": 6.4591, | |
| "step": 124900 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.17090056836605072, | |
| "learning_rate": 0.0002970130085203408, | |
| "loss": 6.6428, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 20.016, | |
| "grad_norm": 0.13628321886062622, | |
| "learning_rate": 0.00029701060842433696, | |
| "loss": 6.6142, | |
| "step": 125100 | |
| }, | |
| { | |
| "epoch": 20.032, | |
| "grad_norm": 0.1602114588022232, | |
| "learning_rate": 0.0002970082083283331, | |
| "loss": 6.2906, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 20.048, | |
| "grad_norm": 0.16529148817062378, | |
| "learning_rate": 0.00029700580823232924, | |
| "loss": 6.32, | |
| "step": 125300 | |
| }, | |
| { | |
| "epoch": 20.064, | |
| "grad_norm": 0.09591558575630188, | |
| "learning_rate": 0.0002970034081363254, | |
| "loss": 6.5236, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 20.08, | |
| "grad_norm": 0.16209086775779724, | |
| "learning_rate": 0.0002970010080403216, | |
| "loss": 6.0982, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 20.096, | |
| "grad_norm": 0.14823907613754272, | |
| "learning_rate": 0.00029699860794431775, | |
| "loss": 6.5177, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 20.112, | |
| "grad_norm": 0.14667312800884247, | |
| "learning_rate": 0.0002969962078483139, | |
| "loss": 6.2496, | |
| "step": 125700 | |
| }, | |
| { | |
| "epoch": 20.128, | |
| "grad_norm": 0.14101973176002502, | |
| "learning_rate": 0.0002969938077523101, | |
| "loss": 6.4982, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 20.144, | |
| "grad_norm": 0.15947328507900238, | |
| "learning_rate": 0.0002969914076563062, | |
| "loss": 6.2799, | |
| "step": 125900 | |
| }, | |
| { | |
| "epoch": 20.16, | |
| "grad_norm": 0.1501172035932541, | |
| "learning_rate": 0.00029698900756030237, | |
| "loss": 6.3317, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 20.176, | |
| "grad_norm": 0.15825922787189484, | |
| "learning_rate": 0.00029698660746429854, | |
| "loss": 6.2838, | |
| "step": 126100 | |
| }, | |
| { | |
| "epoch": 20.192, | |
| "grad_norm": 0.14270856976509094, | |
| "learning_rate": 0.00029698423136925473, | |
| "loss": 6.2077, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 20.208, | |
| "grad_norm": 0.1994931846857071, | |
| "learning_rate": 0.0002969818312732509, | |
| "loss": 6.3276, | |
| "step": 126300 | |
| }, | |
| { | |
| "epoch": 20.224, | |
| "grad_norm": 0.2308851182460785, | |
| "learning_rate": 0.00029697943117724707, | |
| "loss": 6.3211, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 20.24, | |
| "grad_norm": 0.21615839004516602, | |
| "learning_rate": 0.00029697703108124324, | |
| "loss": 6.2481, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 20.256, | |
| "grad_norm": 0.14972296357154846, | |
| "learning_rate": 0.0002969746309852394, | |
| "loss": 6.3543, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 20.272, | |
| "grad_norm": 0.164517343044281, | |
| "learning_rate": 0.0002969722308892356, | |
| "loss": 6.3991, | |
| "step": 126700 | |
| }, | |
| { | |
| "epoch": 20.288, | |
| "grad_norm": 0.15623216331005096, | |
| "learning_rate": 0.0002969698307932317, | |
| "loss": 6.6786, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 20.304, | |
| "grad_norm": 0.1451660692691803, | |
| "learning_rate": 0.00029696743069722786, | |
| "loss": 6.2966, | |
| "step": 126900 | |
| }, | |
| { | |
| "epoch": 20.32, | |
| "grad_norm": 0.17200326919555664, | |
| "learning_rate": 0.00029696503060122403, | |
| "loss": 6.4685, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 20.336, | |
| "grad_norm": 0.15096783638000488, | |
| "learning_rate": 0.0002969626305052202, | |
| "loss": 6.2486, | |
| "step": 127100 | |
| }, | |
| { | |
| "epoch": 20.352, | |
| "grad_norm": 0.14257729053497314, | |
| "learning_rate": 0.00029696023040921637, | |
| "loss": 6.2078, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 20.368, | |
| "grad_norm": 0.21399612724781036, | |
| "learning_rate": 0.0002969578303132125, | |
| "loss": 6.0766, | |
| "step": 127300 | |
| }, | |
| { | |
| "epoch": 20.384, | |
| "grad_norm": 0.11737848818302155, | |
| "learning_rate": 0.00029695543021720865, | |
| "loss": 6.3663, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "grad_norm": 0.13575823605060577, | |
| "learning_rate": 0.0002969530301212048, | |
| "loss": 6.202, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 20.416, | |
| "grad_norm": 0.15899422764778137, | |
| "learning_rate": 0.000296950630025201, | |
| "loss": 6.0727, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 20.432, | |
| "grad_norm": 0.18363483250141144, | |
| "learning_rate": 0.00029694822992919716, | |
| "loss": 6.594, | |
| "step": 127700 | |
| }, | |
| { | |
| "epoch": 20.448, | |
| "grad_norm": 0.1325751096010208, | |
| "learning_rate": 0.00029694582983319333, | |
| "loss": 6.532, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 20.464, | |
| "grad_norm": 0.13950107991695404, | |
| "learning_rate": 0.00029694342973718944, | |
| "loss": 5.9695, | |
| "step": 127900 | |
| }, | |
| { | |
| "epoch": 20.48, | |
| "grad_norm": 0.09819541126489639, | |
| "learning_rate": 0.0002969410296411856, | |
| "loss": 6.3775, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 20.496, | |
| "grad_norm": 0.15788622200489044, | |
| "learning_rate": 0.0002969386295451818, | |
| "loss": 6.5626, | |
| "step": 128100 | |
| }, | |
| { | |
| "epoch": 20.512, | |
| "grad_norm": 0.1338583081960678, | |
| "learning_rate": 0.00029693622944917795, | |
| "loss": 6.3808, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 20.528, | |
| "grad_norm": 0.1711709052324295, | |
| "learning_rate": 0.0002969338293531741, | |
| "loss": 6.3297, | |
| "step": 128300 | |
| }, | |
| { | |
| "epoch": 20.544, | |
| "grad_norm": 0.10356644541025162, | |
| "learning_rate": 0.00029693142925717023, | |
| "loss": 6.2275, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 20.56, | |
| "grad_norm": 0.17266201972961426, | |
| "learning_rate": 0.0002969290291611664, | |
| "loss": 6.399, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 20.576, | |
| "grad_norm": 0.1582164466381073, | |
| "learning_rate": 0.0002969266290651626, | |
| "loss": 6.186, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 20.592, | |
| "grad_norm": 0.15661326050758362, | |
| "learning_rate": 0.00029692422896915874, | |
| "loss": 6.3988, | |
| "step": 128700 | |
| }, | |
| { | |
| "epoch": 20.608, | |
| "grad_norm": 0.12148367613554001, | |
| "learning_rate": 0.00029692185287411493, | |
| "loss": 6.4026, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 20.624, | |
| "grad_norm": 0.15861108899116516, | |
| "learning_rate": 0.0002969194527781111, | |
| "loss": 6.1632, | |
| "step": 128900 | |
| }, | |
| { | |
| "epoch": 20.64, | |
| "grad_norm": 0.21511606872081757, | |
| "learning_rate": 0.00029691705268210727, | |
| "loss": 6.1254, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 20.656, | |
| "grad_norm": 0.17380183935165405, | |
| "learning_rate": 0.00029691465258610344, | |
| "loss": 5.8979, | |
| "step": 129100 | |
| }, | |
| { | |
| "epoch": 20.672, | |
| "grad_norm": 0.15295742452144623, | |
| "learning_rate": 0.0002969122524900996, | |
| "loss": 6.1504, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 20.688, | |
| "grad_norm": 0.14123979210853577, | |
| "learning_rate": 0.0002969098523940957, | |
| "loss": 6.3968, | |
| "step": 129300 | |
| }, | |
| { | |
| "epoch": 20.704, | |
| "grad_norm": 0.11941767483949661, | |
| "learning_rate": 0.0002969074522980919, | |
| "loss": 6.2761, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 20.72, | |
| "grad_norm": 0.1716291755437851, | |
| "learning_rate": 0.00029690505220208806, | |
| "loss": 6.1725, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 20.736, | |
| "grad_norm": 0.10485927015542984, | |
| "learning_rate": 0.00029690265210608423, | |
| "loss": 6.3992, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 20.752, | |
| "grad_norm": 0.14606288075447083, | |
| "learning_rate": 0.0002969002520100804, | |
| "loss": 6.3221, | |
| "step": 129700 | |
| }, | |
| { | |
| "epoch": 20.768, | |
| "grad_norm": 0.1599857658147812, | |
| "learning_rate": 0.00029689785191407657, | |
| "loss": 6.4159, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 20.784, | |
| "grad_norm": 0.1607884019613266, | |
| "learning_rate": 0.0002968954518180727, | |
| "loss": 6.2899, | |
| "step": 129900 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "grad_norm": 0.17046970129013062, | |
| "learning_rate": 0.00029689305172206885, | |
| "loss": 6.195, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 20.816, | |
| "grad_norm": 0.17893536388874054, | |
| "learning_rate": 0.000296890651626065, | |
| "loss": 6.3987, | |
| "step": 130100 | |
| }, | |
| { | |
| "epoch": 20.832, | |
| "grad_norm": 0.15878397226333618, | |
| "learning_rate": 0.0002968882515300612, | |
| "loss": 6.8826, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 20.848, | |
| "grad_norm": 0.17702220380306244, | |
| "learning_rate": 0.00029688585143405736, | |
| "loss": 6.4912, | |
| "step": 130300 | |
| }, | |
| { | |
| "epoch": 20.864, | |
| "grad_norm": 0.1281166672706604, | |
| "learning_rate": 0.0002968834513380535, | |
| "loss": 6.5531, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 20.88, | |
| "grad_norm": 0.16799704730510712, | |
| "learning_rate": 0.00029688105124204964, | |
| "loss": 5.9929, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 20.896, | |
| "grad_norm": 0.1236133724451065, | |
| "learning_rate": 0.0002968786511460458, | |
| "loss": 6.0232, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 20.912, | |
| "grad_norm": 0.1369544267654419, | |
| "learning_rate": 0.000296876251050042, | |
| "loss": 6.5761, | |
| "step": 130700 | |
| }, | |
| { | |
| "epoch": 20.928, | |
| "grad_norm": 0.13266846537590027, | |
| "learning_rate": 0.00029687385095403815, | |
| "loss": 6.1677, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 20.944, | |
| "grad_norm": 0.11849372833967209, | |
| "learning_rate": 0.0002968714508580343, | |
| "loss": 6.0787, | |
| "step": 130900 | |
| }, | |
| { | |
| "epoch": 20.96, | |
| "grad_norm": 0.11395172029733658, | |
| "learning_rate": 0.00029686905076203044, | |
| "loss": 6.2634, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 20.976, | |
| "grad_norm": 0.11821906268596649, | |
| "learning_rate": 0.0002968666746669866, | |
| "loss": 6.388, | |
| "step": 131100 | |
| }, | |
| { | |
| "epoch": 20.992, | |
| "grad_norm": 0.12622199952602386, | |
| "learning_rate": 0.00029686427457098285, | |
| "loss": 6.0103, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 21.008, | |
| "grad_norm": 0.16676801443099976, | |
| "learning_rate": 0.00029686187447497896, | |
| "loss": 5.865, | |
| "step": 131300 | |
| }, | |
| { | |
| "epoch": 21.024, | |
| "grad_norm": 0.15502384305000305, | |
| "learning_rate": 0.00029685947437897513, | |
| "loss": 6.165, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 21.04, | |
| "grad_norm": 0.24440471827983856, | |
| "learning_rate": 0.0002968570742829713, | |
| "loss": 5.9314, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 21.056, | |
| "grad_norm": 0.1315223127603531, | |
| "learning_rate": 0.00029685467418696747, | |
| "loss": 6.0678, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 21.072, | |
| "grad_norm": 0.1865660399198532, | |
| "learning_rate": 0.00029685227409096364, | |
| "loss": 5.9805, | |
| "step": 131700 | |
| }, | |
| { | |
| "epoch": 21.088, | |
| "grad_norm": 0.2066924124956131, | |
| "learning_rate": 0.0002968498739949598, | |
| "loss": 6.1499, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 21.104, | |
| "grad_norm": 0.14284636080265045, | |
| "learning_rate": 0.0002968474738989559, | |
| "loss": 5.7731, | |
| "step": 131900 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "grad_norm": 0.15058225393295288, | |
| "learning_rate": 0.0002968450738029521, | |
| "loss": 6.1113, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 21.136, | |
| "grad_norm": 0.12619538605213165, | |
| "learning_rate": 0.00029684267370694826, | |
| "loss": 5.9437, | |
| "step": 132100 | |
| }, | |
| { | |
| "epoch": 21.152, | |
| "grad_norm": 0.15766064822673798, | |
| "learning_rate": 0.00029684027361094443, | |
| "loss": 6.2503, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 21.168, | |
| "grad_norm": 0.14563268423080444, | |
| "learning_rate": 0.0002968378735149406, | |
| "loss": 5.96, | |
| "step": 132300 | |
| }, | |
| { | |
| "epoch": 21.184, | |
| "grad_norm": 0.14157824218273163, | |
| "learning_rate": 0.0002968354734189367, | |
| "loss": 6.1794, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 21.2, | |
| "grad_norm": 0.18574143946170807, | |
| "learning_rate": 0.0002968330733229329, | |
| "loss": 6.3155, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 21.216, | |
| "grad_norm": 0.11855421960353851, | |
| "learning_rate": 0.00029683067322692905, | |
| "loss": 6.4108, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 21.232, | |
| "grad_norm": 0.12140708416700363, | |
| "learning_rate": 0.0002968282731309252, | |
| "loss": 6.0888, | |
| "step": 132700 | |
| }, | |
| { | |
| "epoch": 21.248, | |
| "grad_norm": 0.17192867398262024, | |
| "learning_rate": 0.0002968258730349214, | |
| "loss": 6.2884, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 21.264, | |
| "grad_norm": 0.13360394537448883, | |
| "learning_rate": 0.00029682347293891756, | |
| "loss": 6.1993, | |
| "step": 132900 | |
| }, | |
| { | |
| "epoch": 21.28, | |
| "grad_norm": 0.16163136065006256, | |
| "learning_rate": 0.0002968210968438737, | |
| "loss": 6.2262, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 21.296, | |
| "grad_norm": 0.12919676303863525, | |
| "learning_rate": 0.00029681869674786987, | |
| "loss": 5.8, | |
| "step": 133100 | |
| }, | |
| { | |
| "epoch": 21.312, | |
| "grad_norm": 0.1594499945640564, | |
| "learning_rate": 0.00029681629665186603, | |
| "loss": 5.8055, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 21.328, | |
| "grad_norm": 0.12262352555990219, | |
| "learning_rate": 0.0002968138965558622, | |
| "loss": 5.6412, | |
| "step": 133300 | |
| }, | |
| { | |
| "epoch": 21.344, | |
| "grad_norm": 0.16952601075172424, | |
| "learning_rate": 0.0002968114964598584, | |
| "loss": 6.0173, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 21.36, | |
| "grad_norm": 0.17378447949886322, | |
| "learning_rate": 0.0002968090963638545, | |
| "loss": 5.5105, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 21.376, | |
| "grad_norm": 0.12117540836334229, | |
| "learning_rate": 0.00029680669626785066, | |
| "loss": 6.5432, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 21.392, | |
| "grad_norm": 0.15760718286037445, | |
| "learning_rate": 0.0002968042961718468, | |
| "loss": 5.6998, | |
| "step": 133700 | |
| }, | |
| { | |
| "epoch": 21.408, | |
| "grad_norm": 0.20163291692733765, | |
| "learning_rate": 0.000296801896075843, | |
| "loss": 5.9457, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 21.424, | |
| "grad_norm": 0.1601804941892624, | |
| "learning_rate": 0.00029679949597983916, | |
| "loss": 5.7331, | |
| "step": 133900 | |
| }, | |
| { | |
| "epoch": 21.44, | |
| "grad_norm": 0.147283673286438, | |
| "learning_rate": 0.00029679709588383533, | |
| "loss": 6.034, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 21.456, | |
| "grad_norm": 0.1677253395318985, | |
| "learning_rate": 0.00029679469578783145, | |
| "loss": 6.4454, | |
| "step": 134100 | |
| }, | |
| { | |
| "epoch": 21.472, | |
| "grad_norm": 0.1402285099029541, | |
| "learning_rate": 0.0002967922956918276, | |
| "loss": 5.9842, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 21.488, | |
| "grad_norm": 0.185127392411232, | |
| "learning_rate": 0.00029678989559582384, | |
| "loss": 6.0976, | |
| "step": 134300 | |
| }, | |
| { | |
| "epoch": 21.504, | |
| "grad_norm": 0.17136482894420624, | |
| "learning_rate": 0.00029678749549981996, | |
| "loss": 6.3848, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 21.52, | |
| "grad_norm": 0.14343611896038055, | |
| "learning_rate": 0.0002967850954038161, | |
| "loss": 6.1087, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 21.536, | |
| "grad_norm": 0.13721515238285065, | |
| "learning_rate": 0.0002967826953078123, | |
| "loss": 6.0383, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 21.552, | |
| "grad_norm": 0.13419759273529053, | |
| "learning_rate": 0.00029678029521180846, | |
| "loss": 5.8767, | |
| "step": 134700 | |
| }, | |
| { | |
| "epoch": 21.568, | |
| "grad_norm": 0.18504373729228973, | |
| "learning_rate": 0.00029677789511580463, | |
| "loss": 6.0607, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 21.584, | |
| "grad_norm": 0.14880910515785217, | |
| "learning_rate": 0.0002967754950198008, | |
| "loss": 5.9108, | |
| "step": 134900 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "grad_norm": 0.13054971396923065, | |
| "learning_rate": 0.0002967730949237969, | |
| "loss": 6.0197, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 21.616, | |
| "grad_norm": 0.16096660494804382, | |
| "learning_rate": 0.0002967706948277931, | |
| "loss": 5.8114, | |
| "step": 135100 | |
| }, | |
| { | |
| "epoch": 21.632, | |
| "grad_norm": 0.16552191972732544, | |
| "learning_rate": 0.00029676829473178926, | |
| "loss": 6.2389, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 21.648, | |
| "grad_norm": 0.13705958425998688, | |
| "learning_rate": 0.0002967658946357854, | |
| "loss": 6.2474, | |
| "step": 135300 | |
| }, | |
| { | |
| "epoch": 21.664, | |
| "grad_norm": 0.17535176873207092, | |
| "learning_rate": 0.0002967634945397816, | |
| "loss": 6.0806, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "grad_norm": 0.15185397863388062, | |
| "learning_rate": 0.0002967610944437777, | |
| "loss": 6.2673, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 21.696, | |
| "grad_norm": 0.1459989696741104, | |
| "learning_rate": 0.0002967586943477739, | |
| "loss": 6.1566, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 21.712, | |
| "grad_norm": 0.1216706857085228, | |
| "learning_rate": 0.00029675629425177005, | |
| "loss": 5.9801, | |
| "step": 135700 | |
| }, | |
| { | |
| "epoch": 21.728, | |
| "grad_norm": 0.1349131315946579, | |
| "learning_rate": 0.0002967538941557662, | |
| "loss": 5.8902, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 21.744, | |
| "grad_norm": 0.14793895184993744, | |
| "learning_rate": 0.0002967514940597624, | |
| "loss": 5.7143, | |
| "step": 135900 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "grad_norm": 0.171220600605011, | |
| "learning_rate": 0.00029674909396375855, | |
| "loss": 5.7715, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 21.776, | |
| "grad_norm": 0.18677209317684174, | |
| "learning_rate": 0.00029674669386775467, | |
| "loss": 5.9996, | |
| "step": 136100 | |
| }, | |
| { | |
| "epoch": 21.792, | |
| "grad_norm": 0.153004989027977, | |
| "learning_rate": 0.00029674429377175084, | |
| "loss": 6.1678, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 21.808, | |
| "grad_norm": 0.12716227769851685, | |
| "learning_rate": 0.000296741893675747, | |
| "loss": 5.8525, | |
| "step": 136300 | |
| }, | |
| { | |
| "epoch": 21.824, | |
| "grad_norm": 0.15531957149505615, | |
| "learning_rate": 0.0002967394935797432, | |
| "loss": 5.703, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 21.84, | |
| "grad_norm": 0.16813132166862488, | |
| "learning_rate": 0.00029673709348373935, | |
| "loss": 5.7367, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 21.856, | |
| "grad_norm": 0.1366407722234726, | |
| "learning_rate": 0.0002967346933877355, | |
| "loss": 6.4011, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 21.872, | |
| "grad_norm": 0.1486620455980301, | |
| "learning_rate": 0.00029673229329173163, | |
| "loss": 6.0592, | |
| "step": 136700 | |
| }, | |
| { | |
| "epoch": 21.888, | |
| "grad_norm": 0.1474551409482956, | |
| "learning_rate": 0.0002967298931957278, | |
| "loss": 6.1269, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 21.904, | |
| "grad_norm": 0.1317261904478073, | |
| "learning_rate": 0.00029672749309972397, | |
| "loss": 6.2704, | |
| "step": 136900 | |
| }, | |
| { | |
| "epoch": 21.92, | |
| "grad_norm": 0.12736591696739197, | |
| "learning_rate": 0.00029672511700468016, | |
| "loss": 5.9018, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 21.936, | |
| "grad_norm": 0.17512458562850952, | |
| "learning_rate": 0.0002967227169086763, | |
| "loss": 6.1423, | |
| "step": 137100 | |
| }, | |
| { | |
| "epoch": 21.951999999999998, | |
| "grad_norm": 0.2035478949546814, | |
| "learning_rate": 0.0002967203408136325, | |
| "loss": 5.8421, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 21.968, | |
| "grad_norm": 0.15790584683418274, | |
| "learning_rate": 0.0002967179407176287, | |
| "loss": 5.6449, | |
| "step": 137300 | |
| }, | |
| { | |
| "epoch": 21.984, | |
| "grad_norm": 0.13050822913646698, | |
| "learning_rate": 0.00029671554062162485, | |
| "loss": 6.0866, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.1332990825176239, | |
| "learning_rate": 0.00029671314052562097, | |
| "loss": 5.8362, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 22.016, | |
| "grad_norm": 0.14409734308719635, | |
| "learning_rate": 0.00029671074042961714, | |
| "loss": 5.7401, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 22.032, | |
| "grad_norm": 0.1513838768005371, | |
| "learning_rate": 0.0002967083403336133, | |
| "loss": 5.8022, | |
| "step": 137700 | |
| }, | |
| { | |
| "epoch": 22.048, | |
| "grad_norm": 0.14416912198066711, | |
| "learning_rate": 0.0002967059402376095, | |
| "loss": 5.7687, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 22.064, | |
| "grad_norm": 0.13069897890090942, | |
| "learning_rate": 0.00029670354014160565, | |
| "loss": 5.7314, | |
| "step": 137900 | |
| }, | |
| { | |
| "epoch": 22.08, | |
| "grad_norm": 0.15089532732963562, | |
| "learning_rate": 0.0002967011400456018, | |
| "loss": 5.6511, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 22.096, | |
| "grad_norm": 0.1493406444787979, | |
| "learning_rate": 0.00029669873994959793, | |
| "loss": 5.7553, | |
| "step": 138100 | |
| }, | |
| { | |
| "epoch": 22.112, | |
| "grad_norm": 0.11403771489858627, | |
| "learning_rate": 0.0002966963398535941, | |
| "loss": 5.8785, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 22.128, | |
| "grad_norm": 0.1418454647064209, | |
| "learning_rate": 0.00029669393975759027, | |
| "loss": 5.906, | |
| "step": 138300 | |
| }, | |
| { | |
| "epoch": 22.144, | |
| "grad_norm": 0.14632883667945862, | |
| "learning_rate": 0.00029669153966158644, | |
| "loss": 5.7911, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 22.16, | |
| "grad_norm": 0.18317896127700806, | |
| "learning_rate": 0.0002966891395655826, | |
| "loss": 5.6022, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 22.176, | |
| "grad_norm": 0.14640462398529053, | |
| "learning_rate": 0.0002966867394695788, | |
| "loss": 5.6879, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 22.192, | |
| "grad_norm": 0.11322261393070221, | |
| "learning_rate": 0.0002966843393735749, | |
| "loss": 5.679, | |
| "step": 138700 | |
| }, | |
| { | |
| "epoch": 22.208, | |
| "grad_norm": 0.14412596821784973, | |
| "learning_rate": 0.00029668193927757106, | |
| "loss": 5.6202, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 22.224, | |
| "grad_norm": 0.14023444056510925, | |
| "learning_rate": 0.00029667953918156723, | |
| "loss": 6.0133, | |
| "step": 138900 | |
| }, | |
| { | |
| "epoch": 22.24, | |
| "grad_norm": 0.18092051148414612, | |
| "learning_rate": 0.0002966771390855634, | |
| "loss": 5.6881, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 22.256, | |
| "grad_norm": 0.13267236948013306, | |
| "learning_rate": 0.00029667473898955957, | |
| "loss": 5.742, | |
| "step": 139100 | |
| }, | |
| { | |
| "epoch": 22.272, | |
| "grad_norm": 0.1066688597202301, | |
| "learning_rate": 0.0002966723388935557, | |
| "loss": 5.9524, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 22.288, | |
| "grad_norm": 0.17234094440937042, | |
| "learning_rate": 0.00029666993879755185, | |
| "loss": 6.0385, | |
| "step": 139300 | |
| }, | |
| { | |
| "epoch": 22.304, | |
| "grad_norm": 0.1593136042356491, | |
| "learning_rate": 0.000296667538701548, | |
| "loss": 5.7894, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "grad_norm": 0.1161966621875763, | |
| "learning_rate": 0.0002966651386055442, | |
| "loss": 5.6333, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 22.336, | |
| "grad_norm": 0.16088221967220306, | |
| "learning_rate": 0.00029666273850954036, | |
| "loss": 5.3016, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 22.352, | |
| "grad_norm": 0.195027694106102, | |
| "learning_rate": 0.00029666033841353653, | |
| "loss": 5.8886, | |
| "step": 139700 | |
| }, | |
| { | |
| "epoch": 22.368, | |
| "grad_norm": 0.17010509967803955, | |
| "learning_rate": 0.00029665793831753264, | |
| "loss": 5.7462, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 22.384, | |
| "grad_norm": 0.15900500118732452, | |
| "learning_rate": 0.0002966555382215288, | |
| "loss": 6.1951, | |
| "step": 139900 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "grad_norm": 0.20321440696716309, | |
| "learning_rate": 0.000296653138125525, | |
| "loss": 5.8264, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 22.416, | |
| "grad_norm": 0.21823586523532867, | |
| "learning_rate": 0.00029665073802952115, | |
| "loss": 5.7779, | |
| "step": 140100 | |
| }, | |
| { | |
| "epoch": 22.432, | |
| "grad_norm": 0.12739881873130798, | |
| "learning_rate": 0.0002966483379335173, | |
| "loss": 5.6477, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 22.448, | |
| "grad_norm": 0.1288122534751892, | |
| "learning_rate": 0.00029664593783751344, | |
| "loss": 5.5937, | |
| "step": 140300 | |
| }, | |
| { | |
| "epoch": 22.464, | |
| "grad_norm": 0.12690824270248413, | |
| "learning_rate": 0.0002966435377415096, | |
| "loss": 6.0249, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 22.48, | |
| "grad_norm": 0.16361913084983826, | |
| "learning_rate": 0.00029664113764550583, | |
| "loss": 5.8957, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 22.496, | |
| "grad_norm": 0.13729694485664368, | |
| "learning_rate": 0.000296638737549502, | |
| "loss": 5.8405, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 22.512, | |
| "grad_norm": 0.19917264580726624, | |
| "learning_rate": 0.0002966363374534981, | |
| "loss": 5.9084, | |
| "step": 140700 | |
| }, | |
| { | |
| "epoch": 22.528, | |
| "grad_norm": 0.15145164728164673, | |
| "learning_rate": 0.0002966339373574943, | |
| "loss": 5.4631, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 22.544, | |
| "grad_norm": 0.11967241019010544, | |
| "learning_rate": 0.00029663153726149045, | |
| "loss": 5.9098, | |
| "step": 140900 | |
| }, | |
| { | |
| "epoch": 22.56, | |
| "grad_norm": 0.15000027418136597, | |
| "learning_rate": 0.0002966291371654866, | |
| "loss": 5.7238, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 22.576, | |
| "grad_norm": 0.16883157193660736, | |
| "learning_rate": 0.0002966267370694828, | |
| "loss": 5.738, | |
| "step": 141100 | |
| }, | |
| { | |
| "epoch": 22.592, | |
| "grad_norm": 0.13367842137813568, | |
| "learning_rate": 0.0002966243369734789, | |
| "loss": 5.5043, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 22.608, | |
| "grad_norm": 0.15113677084445953, | |
| "learning_rate": 0.00029662193687747507, | |
| "loss": 5.6651, | |
| "step": 141300 | |
| }, | |
| { | |
| "epoch": 22.624, | |
| "grad_norm": 0.13519582152366638, | |
| "learning_rate": 0.00029661953678147124, | |
| "loss": 5.9082, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 22.64, | |
| "grad_norm": 0.15879906713962555, | |
| "learning_rate": 0.0002966171366854674, | |
| "loss": 6.094, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 22.656, | |
| "grad_norm": 0.16288715600967407, | |
| "learning_rate": 0.0002966147365894636, | |
| "loss": 5.5707, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 22.672, | |
| "grad_norm": 0.14412395656108856, | |
| "learning_rate": 0.00029661233649345975, | |
| "loss": 5.6827, | |
| "step": 141700 | |
| }, | |
| { | |
| "epoch": 22.688, | |
| "grad_norm": 0.14847436547279358, | |
| "learning_rate": 0.00029660993639745586, | |
| "loss": 5.4179, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 22.704, | |
| "grad_norm": 0.13256803154945374, | |
| "learning_rate": 0.00029660753630145203, | |
| "loss": 5.6927, | |
| "step": 141900 | |
| }, | |
| { | |
| "epoch": 22.72, | |
| "grad_norm": 0.13526926934719086, | |
| "learning_rate": 0.0002966051362054482, | |
| "loss": 5.7505, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 22.736, | |
| "grad_norm": 0.2226150929927826, | |
| "learning_rate": 0.00029660273610944437, | |
| "loss": 5.6683, | |
| "step": 142100 | |
| }, | |
| { | |
| "epoch": 22.752, | |
| "grad_norm": 0.12251828610897064, | |
| "learning_rate": 0.00029660033601344054, | |
| "loss": 5.4908, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 22.768, | |
| "grad_norm": 0.15432491898536682, | |
| "learning_rate": 0.00029659793591743666, | |
| "loss": 5.5662, | |
| "step": 142300 | |
| }, | |
| { | |
| "epoch": 22.784, | |
| "grad_norm": 0.13890361785888672, | |
| "learning_rate": 0.0002965955358214328, | |
| "loss": 5.6202, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "grad_norm": 0.10568337142467499, | |
| "learning_rate": 0.000296593135725429, | |
| "loss": 5.7232, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 22.816, | |
| "grad_norm": 0.14877153933048248, | |
| "learning_rate": 0.00029659073562942516, | |
| "loss": 5.4585, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 22.832, | |
| "grad_norm": 0.1703936904668808, | |
| "learning_rate": 0.00029658833553342133, | |
| "loss": 5.8294, | |
| "step": 142700 | |
| }, | |
| { | |
| "epoch": 22.848, | |
| "grad_norm": 0.12574242055416107, | |
| "learning_rate": 0.0002965859594383775, | |
| "loss": 6.0963, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 22.864, | |
| "grad_norm": 0.1556757390499115, | |
| "learning_rate": 0.00029658355934237364, | |
| "loss": 5.6681, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 22.88, | |
| "grad_norm": 0.14058822393417358, | |
| "learning_rate": 0.0002965811592463698, | |
| "loss": 5.6148, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 22.896, | |
| "grad_norm": 0.1746063232421875, | |
| "learning_rate": 0.000296578759150366, | |
| "loss": 5.698, | |
| "step": 143100 | |
| }, | |
| { | |
| "epoch": 22.912, | |
| "grad_norm": 0.14458870887756348, | |
| "learning_rate": 0.00029657635905436214, | |
| "loss": 5.439, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 22.928, | |
| "grad_norm": 0.1708308756351471, | |
| "learning_rate": 0.0002965739589583583, | |
| "loss": 5.8077, | |
| "step": 143300 | |
| }, | |
| { | |
| "epoch": 22.944, | |
| "grad_norm": 0.1382734328508377, | |
| "learning_rate": 0.00029657155886235443, | |
| "loss": 5.603, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 22.96, | |
| "grad_norm": 0.15728691220283508, | |
| "learning_rate": 0.0002965691587663506, | |
| "loss": 5.8985, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 22.976, | |
| "grad_norm": 0.12880076467990875, | |
| "learning_rate": 0.00029656675867034677, | |
| "loss": 5.7958, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 22.992, | |
| "grad_norm": 0.130670964717865, | |
| "learning_rate": 0.000296564358574343, | |
| "loss": 5.6226, | |
| "step": 143700 | |
| }, | |
| { | |
| "epoch": 23.008, | |
| "grad_norm": 0.1519329994916916, | |
| "learning_rate": 0.0002965619584783391, | |
| "loss": 5.5619, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 23.024, | |
| "grad_norm": 0.11900737136602402, | |
| "learning_rate": 0.0002965595583823353, | |
| "loss": 5.5148, | |
| "step": 143900 | |
| }, | |
| { | |
| "epoch": 23.04, | |
| "grad_norm": 0.13805437088012695, | |
| "learning_rate": 0.00029655715828633144, | |
| "loss": 5.1992, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 23.056, | |
| "grad_norm": 0.15381775796413422, | |
| "learning_rate": 0.0002965547581903276, | |
| "loss": 5.6994, | |
| "step": 144100 | |
| }, | |
| { | |
| "epoch": 23.072, | |
| "grad_norm": 0.17571000754833221, | |
| "learning_rate": 0.0002965523580943238, | |
| "loss": 5.4076, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 23.088, | |
| "grad_norm": 0.1299617439508438, | |
| "learning_rate": 0.0002965499579983199, | |
| "loss": 5.5817, | |
| "step": 144300 | |
| }, | |
| { | |
| "epoch": 23.104, | |
| "grad_norm": 0.1709066480398178, | |
| "learning_rate": 0.00029654755790231607, | |
| "loss": 5.6442, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 23.12, | |
| "grad_norm": 0.11673315614461899, | |
| "learning_rate": 0.00029654515780631224, | |
| "loss": 5.4461, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 23.136, | |
| "grad_norm": 0.17694547772407532, | |
| "learning_rate": 0.0002965427577103084, | |
| "loss": 5.4203, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 23.152, | |
| "grad_norm": 0.1397058516740799, | |
| "learning_rate": 0.0002965403576143046, | |
| "loss": 5.6535, | |
| "step": 144700 | |
| }, | |
| { | |
| "epoch": 23.168, | |
| "grad_norm": 0.14913706481456757, | |
| "learning_rate": 0.00029653795751830074, | |
| "loss": 5.327, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 23.184, | |
| "grad_norm": 0.0980440080165863, | |
| "learning_rate": 0.0002965355814232569, | |
| "loss": 5.6265, | |
| "step": 144900 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "grad_norm": 0.14519555866718292, | |
| "learning_rate": 0.00029653318132725305, | |
| "loss": 5.5968, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 23.216, | |
| "grad_norm": 0.14121969044208527, | |
| "learning_rate": 0.0002965307812312492, | |
| "loss": 5.3419, | |
| "step": 145100 | |
| }, | |
| { | |
| "epoch": 23.232, | |
| "grad_norm": 0.14867204427719116, | |
| "learning_rate": 0.0002965283811352454, | |
| "loss": 5.5432, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 23.248, | |
| "grad_norm": 0.14526410400867462, | |
| "learning_rate": 0.00029652598103924155, | |
| "loss": 5.4119, | |
| "step": 145300 | |
| }, | |
| { | |
| "epoch": 23.264, | |
| "grad_norm": 0.16068951785564423, | |
| "learning_rate": 0.00029652358094323767, | |
| "loss": 5.6084, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 23.28, | |
| "grad_norm": 0.1540200263261795, | |
| "learning_rate": 0.00029652118084723384, | |
| "loss": 5.3346, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 23.296, | |
| "grad_norm": 0.1306939572095871, | |
| "learning_rate": 0.00029651878075123, | |
| "loss": 5.4401, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 23.312, | |
| "grad_norm": 0.19503143429756165, | |
| "learning_rate": 0.0002965163806552262, | |
| "loss": 5.5145, | |
| "step": 145700 | |
| }, | |
| { | |
| "epoch": 23.328, | |
| "grad_norm": 0.16698400676250458, | |
| "learning_rate": 0.00029651398055922235, | |
| "loss": 5.4459, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 23.344, | |
| "grad_norm": 0.14990036189556122, | |
| "learning_rate": 0.0002965115804632185, | |
| "loss": 5.9844, | |
| "step": 145900 | |
| }, | |
| { | |
| "epoch": 23.36, | |
| "grad_norm": 0.12152257561683655, | |
| "learning_rate": 0.00029650918036721463, | |
| "loss": 5.4034, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 23.376, | |
| "grad_norm": 0.12588883936405182, | |
| "learning_rate": 0.0002965067802712108, | |
| "loss": 5.6587, | |
| "step": 146100 | |
| }, | |
| { | |
| "epoch": 23.392, | |
| "grad_norm": 0.13769680261611938, | |
| "learning_rate": 0.00029650438017520697, | |
| "loss": 5.6661, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 23.408, | |
| "grad_norm": 0.18270593881607056, | |
| "learning_rate": 0.00029650198007920314, | |
| "loss": 5.4772, | |
| "step": 146300 | |
| }, | |
| { | |
| "epoch": 23.424, | |
| "grad_norm": 0.16988155245780945, | |
| "learning_rate": 0.0002964995799831993, | |
| "loss": 5.861, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 23.44, | |
| "grad_norm": 0.15813444554805756, | |
| "learning_rate": 0.0002964971798871954, | |
| "loss": 5.5742, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 23.456, | |
| "grad_norm": 0.20319218933582306, | |
| "learning_rate": 0.0002964947797911916, | |
| "loss": 5.5046, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 23.472, | |
| "grad_norm": 0.1794954091310501, | |
| "learning_rate": 0.00029649237969518776, | |
| "loss": 5.4266, | |
| "step": 146700 | |
| }, | |
| { | |
| "epoch": 23.488, | |
| "grad_norm": 0.18233439326286316, | |
| "learning_rate": 0.000296489979599184, | |
| "loss": 5.7988, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 23.504, | |
| "grad_norm": 0.24476204812526703, | |
| "learning_rate": 0.0002964875795031801, | |
| "loss": 5.5573, | |
| "step": 146900 | |
| }, | |
| { | |
| "epoch": 23.52, | |
| "grad_norm": 0.12210160493850708, | |
| "learning_rate": 0.00029648517940717627, | |
| "loss": 5.3991, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 23.536, | |
| "grad_norm": 0.18380597233772278, | |
| "learning_rate": 0.00029648277931117244, | |
| "loss": 5.7061, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 23.552, | |
| "grad_norm": 0.14776001870632172, | |
| "learning_rate": 0.0002964803792151686, | |
| "loss": 5.6827, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 23.568, | |
| "grad_norm": 0.13290056586265564, | |
| "learning_rate": 0.0002964779791191648, | |
| "loss": 5.6598, | |
| "step": 147300 | |
| }, | |
| { | |
| "epoch": 23.584, | |
| "grad_norm": 0.12878666818141937, | |
| "learning_rate": 0.0002964755790231609, | |
| "loss": 5.4732, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 23.6, | |
| "grad_norm": 0.11875222623348236, | |
| "learning_rate": 0.00029647317892715706, | |
| "loss": 5.9345, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 23.616, | |
| "grad_norm": 0.1489972323179245, | |
| "learning_rate": 0.00029647077883115323, | |
| "loss": 5.5631, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 23.632, | |
| "grad_norm": 0.22594046592712402, | |
| "learning_rate": 0.0002964683787351494, | |
| "loss": 5.2854, | |
| "step": 147700 | |
| }, | |
| { | |
| "epoch": 23.648, | |
| "grad_norm": 0.14621250331401825, | |
| "learning_rate": 0.00029646597863914557, | |
| "loss": 5.2938, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 23.664, | |
| "grad_norm": 0.14641734957695007, | |
| "learning_rate": 0.00029646357854314174, | |
| "loss": 5.7265, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 23.68, | |
| "grad_norm": 0.14452804625034332, | |
| "learning_rate": 0.00029646117844713785, | |
| "loss": 5.3081, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 23.696, | |
| "grad_norm": 0.1696479767560959, | |
| "learning_rate": 0.000296458778351134, | |
| "loss": 5.7359, | |
| "step": 148100 | |
| }, | |
| { | |
| "epoch": 23.712, | |
| "grad_norm": 0.1629931777715683, | |
| "learning_rate": 0.0002964563782551302, | |
| "loss": 5.8091, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 23.728, | |
| "grad_norm": 0.1588413119316101, | |
| "learning_rate": 0.00029645397815912636, | |
| "loss": 5.8185, | |
| "step": 148300 | |
| }, | |
| { | |
| "epoch": 23.744, | |
| "grad_norm": 0.1528206616640091, | |
| "learning_rate": 0.00029645157806312253, | |
| "loss": 5.6945, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 23.76, | |
| "grad_norm": 0.16446250677108765, | |
| "learning_rate": 0.00029644917796711864, | |
| "loss": 5.1739, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 23.776, | |
| "grad_norm": 0.14487922191619873, | |
| "learning_rate": 0.00029644680187207483, | |
| "loss": 5.5836, | |
| "step": 148600 | |
| }, | |
| { | |
| "epoch": 23.792, | |
| "grad_norm": 0.297879159450531, | |
| "learning_rate": 0.0002964444257770311, | |
| "loss": 5.5247, | |
| "step": 148700 | |
| }, | |
| { | |
| "epoch": 23.808, | |
| "grad_norm": 0.1171737089753151, | |
| "learning_rate": 0.00029644202568102724, | |
| "loss": 5.3085, | |
| "step": 148800 | |
| }, | |
| { | |
| "epoch": 23.824, | |
| "grad_norm": 0.1464715600013733, | |
| "learning_rate": 0.00029643962558502336, | |
| "loss": 5.3029, | |
| "step": 148900 | |
| }, | |
| { | |
| "epoch": 23.84, | |
| "grad_norm": 0.16126649081707, | |
| "learning_rate": 0.0002964372254890195, | |
| "loss": 5.7273, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 23.856, | |
| "grad_norm": 0.10824692994356155, | |
| "learning_rate": 0.0002964348253930157, | |
| "loss": 5.3296, | |
| "step": 149100 | |
| }, | |
| { | |
| "epoch": 23.872, | |
| "grad_norm": 0.14661309123039246, | |
| "learning_rate": 0.00029643242529701187, | |
| "loss": 5.828, | |
| "step": 149200 | |
| }, | |
| { | |
| "epoch": 23.888, | |
| "grad_norm": 0.16918961703777313, | |
| "learning_rate": 0.00029643002520100803, | |
| "loss": 5.359, | |
| "step": 149300 | |
| }, | |
| { | |
| "epoch": 23.904, | |
| "grad_norm": 0.14028948545455933, | |
| "learning_rate": 0.00029642762510500415, | |
| "loss": 5.5027, | |
| "step": 149400 | |
| }, | |
| { | |
| "epoch": 23.92, | |
| "grad_norm": 0.15497733652591705, | |
| "learning_rate": 0.0002964252250090003, | |
| "loss": 5.7539, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 23.936, | |
| "grad_norm": 0.12349986284971237, | |
| "learning_rate": 0.0002964228249129965, | |
| "loss": 5.1582, | |
| "step": 149600 | |
| }, | |
| { | |
| "epoch": 23.951999999999998, | |
| "grad_norm": 0.1359599381685257, | |
| "learning_rate": 0.00029642042481699266, | |
| "loss": 5.4394, | |
| "step": 149700 | |
| }, | |
| { | |
| "epoch": 23.968, | |
| "grad_norm": 0.18629401922225952, | |
| "learning_rate": 0.0002964180247209888, | |
| "loss": 5.4743, | |
| "step": 149800 | |
| }, | |
| { | |
| "epoch": 23.984, | |
| "grad_norm": 0.1438770890235901, | |
| "learning_rate": 0.000296415624624985, | |
| "loss": 5.4707, | |
| "step": 149900 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.11876608431339264, | |
| "learning_rate": 0.0002964132245289811, | |
| "loss": 5.2108, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 24.016, | |
| "grad_norm": 0.1379069685935974, | |
| "learning_rate": 0.0002964108244329773, | |
| "loss": 5.5858, | |
| "step": 150100 | |
| }, | |
| { | |
| "epoch": 24.032, | |
| "grad_norm": 0.15197959542274475, | |
| "learning_rate": 0.00029640842433697345, | |
| "loss": 5.3452, | |
| "step": 150200 | |
| }, | |
| { | |
| "epoch": 24.048, | |
| "grad_norm": 0.16093584895133972, | |
| "learning_rate": 0.0002964060242409696, | |
| "loss": 5.1725, | |
| "step": 150300 | |
| }, | |
| { | |
| "epoch": 24.064, | |
| "grad_norm": 0.14459937810897827, | |
| "learning_rate": 0.0002964036241449658, | |
| "loss": 5.529, | |
| "step": 150400 | |
| }, | |
| { | |
| "epoch": 24.08, | |
| "grad_norm": 0.15908825397491455, | |
| "learning_rate": 0.0002964012240489619, | |
| "loss": 5.0667, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 24.096, | |
| "grad_norm": 0.14320479333400726, | |
| "learning_rate": 0.00029639882395295807, | |
| "loss": 5.4541, | |
| "step": 150600 | |
| }, | |
| { | |
| "epoch": 24.112, | |
| "grad_norm": 0.1382274329662323, | |
| "learning_rate": 0.00029639642385695424, | |
| "loss": 5.4337, | |
| "step": 150700 | |
| }, | |
| { | |
| "epoch": 24.128, | |
| "grad_norm": 0.09485090523958206, | |
| "learning_rate": 0.0002963940237609504, | |
| "loss": 5.5169, | |
| "step": 150800 | |
| }, | |
| { | |
| "epoch": 24.144, | |
| "grad_norm": 0.1434488147497177, | |
| "learning_rate": 0.0002963916236649466, | |
| "loss": 5.1838, | |
| "step": 150900 | |
| }, | |
| { | |
| "epoch": 24.16, | |
| "grad_norm": 0.172550767660141, | |
| "learning_rate": 0.00029638922356894275, | |
| "loss": 5.4995, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 24.176, | |
| "grad_norm": 0.17296665906906128, | |
| "learning_rate": 0.00029638682347293886, | |
| "loss": 5.3814, | |
| "step": 151100 | |
| }, | |
| { | |
| "epoch": 24.192, | |
| "grad_norm": 0.13183431327342987, | |
| "learning_rate": 0.00029638442337693503, | |
| "loss": 5.4961, | |
| "step": 151200 | |
| }, | |
| { | |
| "epoch": 24.208, | |
| "grad_norm": 0.11805009096860886, | |
| "learning_rate": 0.0002963820472818913, | |
| "loss": 5.3575, | |
| "step": 151300 | |
| }, | |
| { | |
| "epoch": 24.224, | |
| "grad_norm": 0.1694483608007431, | |
| "learning_rate": 0.0002963796471858874, | |
| "loss": 5.4198, | |
| "step": 151400 | |
| }, | |
| { | |
| "epoch": 24.24, | |
| "grad_norm": 0.14694049954414368, | |
| "learning_rate": 0.00029637724708988356, | |
| "loss": 5.2369, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 24.256, | |
| "grad_norm": 0.14818693697452545, | |
| "learning_rate": 0.00029637484699387973, | |
| "loss": 5.5989, | |
| "step": 151600 | |
| }, | |
| { | |
| "epoch": 24.272, | |
| "grad_norm": 0.12142101675271988, | |
| "learning_rate": 0.0002963724468978759, | |
| "loss": 5.5808, | |
| "step": 151700 | |
| }, | |
| { | |
| "epoch": 24.288, | |
| "grad_norm": 0.1072693020105362, | |
| "learning_rate": 0.00029637004680187207, | |
| "loss": 5.2257, | |
| "step": 151800 | |
| }, | |
| { | |
| "epoch": 24.304, | |
| "grad_norm": 0.20452247560024261, | |
| "learning_rate": 0.00029636764670586824, | |
| "loss": 4.9512, | |
| "step": 151900 | |
| }, | |
| { | |
| "epoch": 24.32, | |
| "grad_norm": 0.13785667717456818, | |
| "learning_rate": 0.00029636524660986435, | |
| "loss": 5.3486, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 24.336, | |
| "grad_norm": 0.16348302364349365, | |
| "learning_rate": 0.0002963628465138605, | |
| "loss": 5.416, | |
| "step": 152100 | |
| }, | |
| { | |
| "epoch": 24.352, | |
| "grad_norm": 0.12873555719852448, | |
| "learning_rate": 0.0002963604464178567, | |
| "loss": 5.4854, | |
| "step": 152200 | |
| }, | |
| { | |
| "epoch": 24.368, | |
| "grad_norm": 0.14430370926856995, | |
| "learning_rate": 0.00029635804632185286, | |
| "loss": 5.083, | |
| "step": 152300 | |
| }, | |
| { | |
| "epoch": 24.384, | |
| "grad_norm": 0.14247077703475952, | |
| "learning_rate": 0.00029635564622584903, | |
| "loss": 5.2926, | |
| "step": 152400 | |
| }, | |
| { | |
| "epoch": 24.4, | |
| "grad_norm": 0.12942449748516083, | |
| "learning_rate": 0.00029635324612984514, | |
| "loss": 5.2287, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 24.416, | |
| "grad_norm": 0.1290571689605713, | |
| "learning_rate": 0.0002963508460338413, | |
| "loss": 5.1295, | |
| "step": 152600 | |
| }, | |
| { | |
| "epoch": 24.432, | |
| "grad_norm": 0.14392858743667603, | |
| "learning_rate": 0.0002963484459378375, | |
| "loss": 5.2795, | |
| "step": 152700 | |
| }, | |
| { | |
| "epoch": 24.448, | |
| "grad_norm": 0.10403969883918762, | |
| "learning_rate": 0.00029634604584183365, | |
| "loss": 5.4616, | |
| "step": 152800 | |
| }, | |
| { | |
| "epoch": 24.464, | |
| "grad_norm": 0.1357210874557495, | |
| "learning_rate": 0.0002963436457458298, | |
| "loss": 5.0671, | |
| "step": 152900 | |
| }, | |
| { | |
| "epoch": 24.48, | |
| "grad_norm": 0.162188321352005, | |
| "learning_rate": 0.000296341245649826, | |
| "loss": 5.1244, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 24.496, | |
| "grad_norm": 0.1423524171113968, | |
| "learning_rate": 0.0002963388455538221, | |
| "loss": 5.2658, | |
| "step": 153100 | |
| }, | |
| { | |
| "epoch": 24.512, | |
| "grad_norm": 0.15725597739219666, | |
| "learning_rate": 0.00029633644545781827, | |
| "loss": 5.4486, | |
| "step": 153200 | |
| }, | |
| { | |
| "epoch": 24.528, | |
| "grad_norm": 0.10184895247220993, | |
| "learning_rate": 0.00029633404536181444, | |
| "loss": 5.1975, | |
| "step": 153300 | |
| }, | |
| { | |
| "epoch": 24.544, | |
| "grad_norm": 0.11968593299388885, | |
| "learning_rate": 0.0002963316452658106, | |
| "loss": 5.0282, | |
| "step": 153400 | |
| }, | |
| { | |
| "epoch": 24.56, | |
| "grad_norm": 0.15125450491905212, | |
| "learning_rate": 0.0002963292451698068, | |
| "loss": 5.0548, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 24.576, | |
| "grad_norm": 0.1498018205165863, | |
| "learning_rate": 0.0002963268450738029, | |
| "loss": 5.2235, | |
| "step": 153600 | |
| }, | |
| { | |
| "epoch": 24.592, | |
| "grad_norm": 0.14961381256580353, | |
| "learning_rate": 0.00029632444497779906, | |
| "loss": 5.282, | |
| "step": 153700 | |
| }, | |
| { | |
| "epoch": 24.608, | |
| "grad_norm": 0.10805343836545944, | |
| "learning_rate": 0.00029632204488179523, | |
| "loss": 5.2164, | |
| "step": 153800 | |
| }, | |
| { | |
| "epoch": 24.624, | |
| "grad_norm": 0.1407497674226761, | |
| "learning_rate": 0.0002963196447857914, | |
| "loss": 5.8793, | |
| "step": 153900 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "grad_norm": 0.15589803457260132, | |
| "learning_rate": 0.00029631724468978757, | |
| "loss": 5.2803, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 24.656, | |
| "grad_norm": 0.15549539029598236, | |
| "learning_rate": 0.00029631484459378374, | |
| "loss": 5.5255, | |
| "step": 154100 | |
| }, | |
| { | |
| "epoch": 24.672, | |
| "grad_norm": 0.14697429537773132, | |
| "learning_rate": 0.00029631244449777986, | |
| "loss": 5.2088, | |
| "step": 154200 | |
| }, | |
| { | |
| "epoch": 24.688, | |
| "grad_norm": 0.14445632696151733, | |
| "learning_rate": 0.000296310044401776, | |
| "loss": 5.314, | |
| "step": 154300 | |
| }, | |
| { | |
| "epoch": 24.704, | |
| "grad_norm": 0.13264203071594238, | |
| "learning_rate": 0.0002963076443057722, | |
| "loss": 5.1363, | |
| "step": 154400 | |
| }, | |
| { | |
| "epoch": 24.72, | |
| "grad_norm": 0.14595112204551697, | |
| "learning_rate": 0.00029630524420976836, | |
| "loss": 5.1834, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 24.736, | |
| "grad_norm": 0.15063650906085968, | |
| "learning_rate": 0.00029630284411376453, | |
| "loss": 5.2409, | |
| "step": 154600 | |
| }, | |
| { | |
| "epoch": 24.752, | |
| "grad_norm": 0.1531144678592682, | |
| "learning_rate": 0.00029630044401776065, | |
| "loss": 5.3414, | |
| "step": 154700 | |
| }, | |
| { | |
| "epoch": 24.768, | |
| "grad_norm": 0.15418265759944916, | |
| "learning_rate": 0.0002962980439217568, | |
| "loss": 5.3579, | |
| "step": 154800 | |
| }, | |
| { | |
| "epoch": 24.784, | |
| "grad_norm": 0.13664741814136505, | |
| "learning_rate": 0.000296295643825753, | |
| "loss": 5.4855, | |
| "step": 154900 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "grad_norm": 0.15261198580265045, | |
| "learning_rate": 0.00029629324372974916, | |
| "loss": 5.5078, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 24.816, | |
| "grad_norm": 0.1436208039522171, | |
| "learning_rate": 0.0002962908436337453, | |
| "loss": 5.2359, | |
| "step": 155100 | |
| }, | |
| { | |
| "epoch": 24.832, | |
| "grad_norm": 0.1557721495628357, | |
| "learning_rate": 0.0002962884435377415, | |
| "loss": 5.1472, | |
| "step": 155200 | |
| }, | |
| { | |
| "epoch": 24.848, | |
| "grad_norm": 0.1639142483472824, | |
| "learning_rate": 0.0002962860434417376, | |
| "loss": 5.1701, | |
| "step": 155300 | |
| }, | |
| { | |
| "epoch": 24.864, | |
| "grad_norm": 0.1857120245695114, | |
| "learning_rate": 0.0002962836433457338, | |
| "loss": 5.3149, | |
| "step": 155400 | |
| }, | |
| { | |
| "epoch": 24.88, | |
| "grad_norm": 0.1384589672088623, | |
| "learning_rate": 0.00029628124324972995, | |
| "loss": 5.1655, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 24.896, | |
| "grad_norm": 0.16934780776500702, | |
| "learning_rate": 0.0002962788431537261, | |
| "loss": 5.0212, | |
| "step": 155600 | |
| }, | |
| { | |
| "epoch": 24.912, | |
| "grad_norm": 0.14011263847351074, | |
| "learning_rate": 0.0002962764430577223, | |
| "loss": 5.3506, | |
| "step": 155700 | |
| }, | |
| { | |
| "epoch": 24.928, | |
| "grad_norm": 0.12232084572315216, | |
| "learning_rate": 0.0002962740429617184, | |
| "loss": 4.9836, | |
| "step": 155800 | |
| }, | |
| { | |
| "epoch": 24.944, | |
| "grad_norm": 0.1219339519739151, | |
| "learning_rate": 0.00029627164286571457, | |
| "loss": 5.337, | |
| "step": 155900 | |
| }, | |
| { | |
| "epoch": 24.96, | |
| "grad_norm": 0.13951101899147034, | |
| "learning_rate": 0.0002962692667706708, | |
| "loss": 5.6947, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 24.976, | |
| "grad_norm": 0.15717874467372894, | |
| "learning_rate": 0.000296266866674667, | |
| "loss": 5.0598, | |
| "step": 156100 | |
| }, | |
| { | |
| "epoch": 24.992, | |
| "grad_norm": 0.16753438115119934, | |
| "learning_rate": 0.0002962644665786631, | |
| "loss": 5.1918, | |
| "step": 156200 | |
| }, | |
| { | |
| "epoch": 25.008, | |
| "grad_norm": 0.11955256760120392, | |
| "learning_rate": 0.00029626206648265927, | |
| "loss": 5.1705, | |
| "step": 156300 | |
| }, | |
| { | |
| "epoch": 25.024, | |
| "grad_norm": 0.11964499950408936, | |
| "learning_rate": 0.00029625966638665544, | |
| "loss": 5.3443, | |
| "step": 156400 | |
| }, | |
| { | |
| "epoch": 25.04, | |
| "grad_norm": 0.123370461165905, | |
| "learning_rate": 0.0002962572662906516, | |
| "loss": 4.9845, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 25.056, | |
| "grad_norm": 0.12556427717208862, | |
| "learning_rate": 0.0002962548661946478, | |
| "loss": 4.9369, | |
| "step": 156600 | |
| }, | |
| { | |
| "epoch": 25.072, | |
| "grad_norm": 0.15033285319805145, | |
| "learning_rate": 0.0002962524660986439, | |
| "loss": 5.1891, | |
| "step": 156700 | |
| }, | |
| { | |
| "epoch": 25.088, | |
| "grad_norm": 0.157626673579216, | |
| "learning_rate": 0.00029625006600264006, | |
| "loss": 5.0871, | |
| "step": 156800 | |
| }, | |
| { | |
| "epoch": 25.104, | |
| "grad_norm": 0.12489177286624908, | |
| "learning_rate": 0.0002962476659066362, | |
| "loss": 4.9887, | |
| "step": 156900 | |
| }, | |
| { | |
| "epoch": 25.12, | |
| "grad_norm": 0.17784586548805237, | |
| "learning_rate": 0.0002962452658106324, | |
| "loss": 4.9263, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 25.136, | |
| "grad_norm": 0.26584434509277344, | |
| "learning_rate": 0.00029624286571462857, | |
| "loss": 5.1268, | |
| "step": 157100 | |
| }, | |
| { | |
| "epoch": 25.152, | |
| "grad_norm": 0.14168865978717804, | |
| "learning_rate": 0.00029624046561862473, | |
| "loss": 5.4578, | |
| "step": 157200 | |
| }, | |
| { | |
| "epoch": 25.168, | |
| "grad_norm": 0.1289631426334381, | |
| "learning_rate": 0.00029623806552262085, | |
| "loss": 5.2466, | |
| "step": 157300 | |
| }, | |
| { | |
| "epoch": 25.184, | |
| "grad_norm": 0.12273957580327988, | |
| "learning_rate": 0.000296235665426617, | |
| "loss": 4.7845, | |
| "step": 157400 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "grad_norm": 0.24651670455932617, | |
| "learning_rate": 0.0002962332653306132, | |
| "loss": 5.0988, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 25.216, | |
| "grad_norm": 0.1415649801492691, | |
| "learning_rate": 0.00029623086523460936, | |
| "loss": 5.0998, | |
| "step": 157600 | |
| }, | |
| { | |
| "epoch": 25.232, | |
| "grad_norm": 0.1132798045873642, | |
| "learning_rate": 0.0002962284651386055, | |
| "loss": 5.2229, | |
| "step": 157700 | |
| }, | |
| { | |
| "epoch": 25.248, | |
| "grad_norm": 0.10961470752954483, | |
| "learning_rate": 0.00029622606504260164, | |
| "loss": 4.9959, | |
| "step": 157800 | |
| }, | |
| { | |
| "epoch": 25.264, | |
| "grad_norm": 0.16054928302764893, | |
| "learning_rate": 0.0002962236649465978, | |
| "loss": 4.989, | |
| "step": 157900 | |
| }, | |
| { | |
| "epoch": 25.28, | |
| "grad_norm": 0.16918180882930756, | |
| "learning_rate": 0.000296221264850594, | |
| "loss": 5.0824, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 25.296, | |
| "grad_norm": 0.12880262732505798, | |
| "learning_rate": 0.00029621886475459015, | |
| "loss": 4.6069, | |
| "step": 158100 | |
| }, | |
| { | |
| "epoch": 25.312, | |
| "grad_norm": 0.16930246353149414, | |
| "learning_rate": 0.0002962164646585863, | |
| "loss": 5.0421, | |
| "step": 158200 | |
| }, | |
| { | |
| "epoch": 25.328, | |
| "grad_norm": 0.15791450440883636, | |
| "learning_rate": 0.0002962140645625825, | |
| "loss": 5.1324, | |
| "step": 158300 | |
| }, | |
| { | |
| "epoch": 25.344, | |
| "grad_norm": 0.12896622717380524, | |
| "learning_rate": 0.0002962116644665786, | |
| "loss": 4.8697, | |
| "step": 158400 | |
| }, | |
| { | |
| "epoch": 25.36, | |
| "grad_norm": 0.15522588789463043, | |
| "learning_rate": 0.00029620926437057477, | |
| "loss": 5.112, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 25.376, | |
| "grad_norm": 0.15994909405708313, | |
| "learning_rate": 0.00029620686427457094, | |
| "loss": 5.1186, | |
| "step": 158600 | |
| }, | |
| { | |
| "epoch": 25.392, | |
| "grad_norm": 0.16203735768795013, | |
| "learning_rate": 0.0002962044641785671, | |
| "loss": 5.2136, | |
| "step": 158700 | |
| }, | |
| { | |
| "epoch": 25.408, | |
| "grad_norm": 0.14830628037452698, | |
| "learning_rate": 0.0002962020640825633, | |
| "loss": 4.8028, | |
| "step": 158800 | |
| }, | |
| { | |
| "epoch": 25.424, | |
| "grad_norm": 0.17855019867420197, | |
| "learning_rate": 0.00029619966398655945, | |
| "loss": 5.2293, | |
| "step": 158900 | |
| }, | |
| { | |
| "epoch": 25.44, | |
| "grad_norm": 0.13485394418239594, | |
| "learning_rate": 0.00029619728789151564, | |
| "loss": 5.1688, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 25.456, | |
| "grad_norm": 0.15001603960990906, | |
| "learning_rate": 0.0002961948877955118, | |
| "loss": 5.2429, | |
| "step": 159100 | |
| }, | |
| { | |
| "epoch": 25.472, | |
| "grad_norm": 0.15747343003749847, | |
| "learning_rate": 0.000296192487699508, | |
| "loss": 5.0648, | |
| "step": 159200 | |
| }, | |
| { | |
| "epoch": 25.488, | |
| "grad_norm": 0.11709601432085037, | |
| "learning_rate": 0.0002961900876035041, | |
| "loss": 4.9424, | |
| "step": 159300 | |
| }, | |
| { | |
| "epoch": 25.504, | |
| "grad_norm": 0.14115624129772186, | |
| "learning_rate": 0.00029618768750750026, | |
| "loss": 5.2824, | |
| "step": 159400 | |
| }, | |
| { | |
| "epoch": 25.52, | |
| "grad_norm": 0.13271014392375946, | |
| "learning_rate": 0.00029618528741149643, | |
| "loss": 5.2082, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 25.536, | |
| "grad_norm": 0.13927429914474487, | |
| "learning_rate": 0.0002961828873154926, | |
| "loss": 5.0302, | |
| "step": 159600 | |
| }, | |
| { | |
| "epoch": 25.552, | |
| "grad_norm": 0.1625901609659195, | |
| "learning_rate": 0.00029618048721948877, | |
| "loss": 5.2649, | |
| "step": 159700 | |
| }, | |
| { | |
| "epoch": 25.568, | |
| "grad_norm": 0.1242537572979927, | |
| "learning_rate": 0.0002961780871234849, | |
| "loss": 5.3638, | |
| "step": 159800 | |
| }, | |
| { | |
| "epoch": 25.584, | |
| "grad_norm": 0.22442211210727692, | |
| "learning_rate": 0.00029617568702748105, | |
| "loss": 4.7374, | |
| "step": 159900 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "grad_norm": 0.1424286961555481, | |
| "learning_rate": 0.0002961732869314772, | |
| "loss": 5.0878, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 25.616, | |
| "grad_norm": 0.16174399852752686, | |
| "learning_rate": 0.0002961708868354734, | |
| "loss": 5.4059, | |
| "step": 160100 | |
| }, | |
| { | |
| "epoch": 25.632, | |
| "grad_norm": 0.12529495358467102, | |
| "learning_rate": 0.00029616848673946956, | |
| "loss": 5.1528, | |
| "step": 160200 | |
| }, | |
| { | |
| "epoch": 25.648, | |
| "grad_norm": 0.14766289293766022, | |
| "learning_rate": 0.00029616608664346573, | |
| "loss": 5.2453, | |
| "step": 160300 | |
| }, | |
| { | |
| "epoch": 25.664, | |
| "grad_norm": 0.12722782790660858, | |
| "learning_rate": 0.00029616368654746184, | |
| "loss": 5.1237, | |
| "step": 160400 | |
| }, | |
| { | |
| "epoch": 25.68, | |
| "grad_norm": 0.1653498262166977, | |
| "learning_rate": 0.000296161286451458, | |
| "loss": 5.2606, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 25.696, | |
| "grad_norm": 0.15743720531463623, | |
| "learning_rate": 0.0002961588863554542, | |
| "loss": 5.3842, | |
| "step": 160600 | |
| }, | |
| { | |
| "epoch": 25.712, | |
| "grad_norm": 0.11641506105661392, | |
| "learning_rate": 0.00029615648625945035, | |
| "loss": 5.0112, | |
| "step": 160700 | |
| }, | |
| { | |
| "epoch": 25.728, | |
| "grad_norm": 0.1600313037633896, | |
| "learning_rate": 0.0002961540861634465, | |
| "loss": 5.1207, | |
| "step": 160800 | |
| }, | |
| { | |
| "epoch": 25.744, | |
| "grad_norm": 0.1792784333229065, | |
| "learning_rate": 0.0002961516860674427, | |
| "loss": 5.1801, | |
| "step": 160900 | |
| }, | |
| { | |
| "epoch": 25.76, | |
| "grad_norm": 0.12263203412294388, | |
| "learning_rate": 0.0002961492859714388, | |
| "loss": 5.1875, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 25.776, | |
| "grad_norm": 0.1638142168521881, | |
| "learning_rate": 0.00029614690987639505, | |
| "loss": 5.5503, | |
| "step": 161100 | |
| }, | |
| { | |
| "epoch": 25.792, | |
| "grad_norm": 0.12107832729816437, | |
| "learning_rate": 0.0002961445097803912, | |
| "loss": 5.312, | |
| "step": 161200 | |
| }, | |
| { | |
| "epoch": 25.808, | |
| "grad_norm": 0.1593557745218277, | |
| "learning_rate": 0.00029614210968438733, | |
| "loss": 5.0444, | |
| "step": 161300 | |
| }, | |
| { | |
| "epoch": 25.824, | |
| "grad_norm": 0.14629554748535156, | |
| "learning_rate": 0.0002961397095883835, | |
| "loss": 5.2007, | |
| "step": 161400 | |
| }, | |
| { | |
| "epoch": 25.84, | |
| "grad_norm": 0.14022816717624664, | |
| "learning_rate": 0.00029613730949237967, | |
| "loss": 5.1234, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 25.856, | |
| "grad_norm": 0.15026092529296875, | |
| "learning_rate": 0.00029613490939637584, | |
| "loss": 5.1459, | |
| "step": 161600 | |
| }, | |
| { | |
| "epoch": 25.872, | |
| "grad_norm": 0.16642487049102783, | |
| "learning_rate": 0.000296132509300372, | |
| "loss": 5.074, | |
| "step": 161700 | |
| }, | |
| { | |
| "epoch": 25.888, | |
| "grad_norm": 0.16100358963012695, | |
| "learning_rate": 0.0002961301092043681, | |
| "loss": 4.8445, | |
| "step": 161800 | |
| }, | |
| { | |
| "epoch": 25.904, | |
| "grad_norm": 0.14411258697509766, | |
| "learning_rate": 0.0002961277091083643, | |
| "loss": 4.7157, | |
| "step": 161900 | |
| }, | |
| { | |
| "epoch": 25.92, | |
| "grad_norm": 0.10813727974891663, | |
| "learning_rate": 0.00029612530901236046, | |
| "loss": 5.0682, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 25.936, | |
| "grad_norm": 0.14450779557228088, | |
| "learning_rate": 0.00029612290891635663, | |
| "loss": 5.241, | |
| "step": 162100 | |
| }, | |
| { | |
| "epoch": 25.951999999999998, | |
| "grad_norm": 0.16171583533287048, | |
| "learning_rate": 0.0002961205088203528, | |
| "loss": 5.1133, | |
| "step": 162200 | |
| }, | |
| { | |
| "epoch": 25.968, | |
| "grad_norm": 0.12712721526622772, | |
| "learning_rate": 0.00029611810872434897, | |
| "loss": 5.0328, | |
| "step": 162300 | |
| }, | |
| { | |
| "epoch": 25.984, | |
| "grad_norm": 0.12672489881515503, | |
| "learning_rate": 0.0002961157086283451, | |
| "loss": 4.8169, | |
| "step": 162400 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.15172095596790314, | |
| "learning_rate": 0.00029611330853234125, | |
| "loss": 5.092, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 26.016, | |
| "grad_norm": 0.18036304414272308, | |
| "learning_rate": 0.0002961109084363374, | |
| "loss": 4.7511, | |
| "step": 162600 | |
| }, | |
| { | |
| "epoch": 26.032, | |
| "grad_norm": 0.16676302254199982, | |
| "learning_rate": 0.0002961085083403336, | |
| "loss": 4.9628, | |
| "step": 162700 | |
| }, | |
| { | |
| "epoch": 26.048, | |
| "grad_norm": 0.1724889576435089, | |
| "learning_rate": 0.00029610610824432976, | |
| "loss": 4.8742, | |
| "step": 162800 | |
| }, | |
| { | |
| "epoch": 26.064, | |
| "grad_norm": 0.1280188113451004, | |
| "learning_rate": 0.00029610370814832593, | |
| "loss": 5.3059, | |
| "step": 162900 | |
| }, | |
| { | |
| "epoch": 26.08, | |
| "grad_norm": 0.15785780549049377, | |
| "learning_rate": 0.00029610130805232204, | |
| "loss": 4.8671, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 26.096, | |
| "grad_norm": 0.14080898463726044, | |
| "learning_rate": 0.0002960989079563182, | |
| "loss": 5.1418, | |
| "step": 163100 | |
| }, | |
| { | |
| "epoch": 26.112, | |
| "grad_norm": 0.13095679879188538, | |
| "learning_rate": 0.0002960965078603144, | |
| "loss": 4.7194, | |
| "step": 163200 | |
| }, | |
| { | |
| "epoch": 26.128, | |
| "grad_norm": 0.1574213057756424, | |
| "learning_rate": 0.00029609410776431055, | |
| "loss": 4.9184, | |
| "step": 163300 | |
| }, | |
| { | |
| "epoch": 26.144, | |
| "grad_norm": 0.13669663667678833, | |
| "learning_rate": 0.0002960917076683067, | |
| "loss": 5.0563, | |
| "step": 163400 | |
| }, | |
| { | |
| "epoch": 26.16, | |
| "grad_norm": 0.15946930646896362, | |
| "learning_rate": 0.00029608930757230284, | |
| "loss": 4.7656, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 26.176, | |
| "grad_norm": 0.1457744687795639, | |
| "learning_rate": 0.000296086907476299, | |
| "loss": 4.894, | |
| "step": 163600 | |
| }, | |
| { | |
| "epoch": 26.192, | |
| "grad_norm": 0.10747674852609634, | |
| "learning_rate": 0.0002960845073802952, | |
| "loss": 5.1462, | |
| "step": 163700 | |
| }, | |
| { | |
| "epoch": 26.208, | |
| "grad_norm": 0.22094644606113434, | |
| "learning_rate": 0.00029608210728429134, | |
| "loss": 5.3243, | |
| "step": 163800 | |
| }, | |
| { | |
| "epoch": 26.224, | |
| "grad_norm": 0.12370151281356812, | |
| "learning_rate": 0.0002960797071882875, | |
| "loss": 4.8294, | |
| "step": 163900 | |
| }, | |
| { | |
| "epoch": 26.24, | |
| "grad_norm": 0.1479647010564804, | |
| "learning_rate": 0.0002960773070922837, | |
| "loss": 5.0416, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 26.256, | |
| "grad_norm": 0.15605013072490692, | |
| "learning_rate": 0.0002960749069962798, | |
| "loss": 5.2773, | |
| "step": 164100 | |
| }, | |
| { | |
| "epoch": 26.272, | |
| "grad_norm": 0.1911146342754364, | |
| "learning_rate": 0.00029607250690027597, | |
| "loss": 4.939, | |
| "step": 164200 | |
| }, | |
| { | |
| "epoch": 26.288, | |
| "grad_norm": 0.12012562155723572, | |
| "learning_rate": 0.0002960701308052322, | |
| "loss": 4.8719, | |
| "step": 164300 | |
| }, | |
| { | |
| "epoch": 26.304, | |
| "grad_norm": 0.12493129819631577, | |
| "learning_rate": 0.0002960677307092283, | |
| "loss": 4.7802, | |
| "step": 164400 | |
| }, | |
| { | |
| "epoch": 26.32, | |
| "grad_norm": 0.12632489204406738, | |
| "learning_rate": 0.0002960653306132245, | |
| "loss": 4.8725, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 26.336, | |
| "grad_norm": 0.15591692924499512, | |
| "learning_rate": 0.00029606293051722066, | |
| "loss": 5.2183, | |
| "step": 164600 | |
| }, | |
| { | |
| "epoch": 26.352, | |
| "grad_norm": 0.12113320082426071, | |
| "learning_rate": 0.00029606053042121683, | |
| "loss": 4.981, | |
| "step": 164700 | |
| }, | |
| { | |
| "epoch": 26.368, | |
| "grad_norm": 0.12973067164421082, | |
| "learning_rate": 0.000296058130325213, | |
| "loss": 5.1433, | |
| "step": 164800 | |
| }, | |
| { | |
| "epoch": 26.384, | |
| "grad_norm": 0.15297859907150269, | |
| "learning_rate": 0.00029605573022920917, | |
| "loss": 4.9628, | |
| "step": 164900 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "grad_norm": 0.13537169992923737, | |
| "learning_rate": 0.0002960533301332053, | |
| "loss": 4.6621, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 26.416, | |
| "grad_norm": 0.12161804735660553, | |
| "learning_rate": 0.00029605093003720145, | |
| "loss": 4.9027, | |
| "step": 165100 | |
| }, | |
| { | |
| "epoch": 26.432, | |
| "grad_norm": 0.14561276137828827, | |
| "learning_rate": 0.0002960485299411976, | |
| "loss": 4.7497, | |
| "step": 165200 | |
| }, | |
| { | |
| "epoch": 26.448, | |
| "grad_norm": 0.1523263305425644, | |
| "learning_rate": 0.0002960461298451938, | |
| "loss": 4.7575, | |
| "step": 165300 | |
| }, | |
| { | |
| "epoch": 26.464, | |
| "grad_norm": 0.13894937932491302, | |
| "learning_rate": 0.00029604372974918996, | |
| "loss": 5.1487, | |
| "step": 165400 | |
| }, | |
| { | |
| "epoch": 26.48, | |
| "grad_norm": 0.1122347041964531, | |
| "learning_rate": 0.0002960413296531861, | |
| "loss": 4.8517, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 26.496, | |
| "grad_norm": 0.12737123668193817, | |
| "learning_rate": 0.00029603892955718225, | |
| "loss": 4.8187, | |
| "step": 165600 | |
| }, | |
| { | |
| "epoch": 26.512, | |
| "grad_norm": 0.1302328109741211, | |
| "learning_rate": 0.0002960365294611784, | |
| "loss": 4.6812, | |
| "step": 165700 | |
| }, | |
| { | |
| "epoch": 26.528, | |
| "grad_norm": 0.14844807982444763, | |
| "learning_rate": 0.0002960341293651746, | |
| "loss": 4.9271, | |
| "step": 165800 | |
| }, | |
| { | |
| "epoch": 26.544, | |
| "grad_norm": 0.17675945162773132, | |
| "learning_rate": 0.00029603172926917075, | |
| "loss": 4.7797, | |
| "step": 165900 | |
| }, | |
| { | |
| "epoch": 26.56, | |
| "grad_norm": 0.18416370451450348, | |
| "learning_rate": 0.0002960293291731669, | |
| "loss": 5.1626, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 26.576, | |
| "grad_norm": 0.12005133926868439, | |
| "learning_rate": 0.00029602692907716304, | |
| "loss": 4.7074, | |
| "step": 166100 | |
| }, | |
| { | |
| "epoch": 26.592, | |
| "grad_norm": 0.185636967420578, | |
| "learning_rate": 0.0002960245289811592, | |
| "loss": 5.175, | |
| "step": 166200 | |
| }, | |
| { | |
| "epoch": 26.608, | |
| "grad_norm": 0.11722932010889053, | |
| "learning_rate": 0.0002960221288851554, | |
| "loss": 4.9977, | |
| "step": 166300 | |
| }, | |
| { | |
| "epoch": 26.624, | |
| "grad_norm": 0.13763803243637085, | |
| "learning_rate": 0.00029601972878915154, | |
| "loss": 4.732, | |
| "step": 166400 | |
| }, | |
| { | |
| "epoch": 26.64, | |
| "grad_norm": 0.13912682235240936, | |
| "learning_rate": 0.0002960173286931477, | |
| "loss": 4.877, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 26.656, | |
| "grad_norm": 0.10087449103593826, | |
| "learning_rate": 0.00029601492859714383, | |
| "loss": 4.7994, | |
| "step": 166600 | |
| }, | |
| { | |
| "epoch": 26.672, | |
| "grad_norm": 0.1845891773700714, | |
| "learning_rate": 0.00029601252850114, | |
| "loss": 5.4515, | |
| "step": 166700 | |
| }, | |
| { | |
| "epoch": 26.688, | |
| "grad_norm": 0.14900504052639008, | |
| "learning_rate": 0.00029601012840513617, | |
| "loss": 5.0709, | |
| "step": 166800 | |
| }, | |
| { | |
| "epoch": 26.704, | |
| "grad_norm": 0.19447046518325806, | |
| "learning_rate": 0.00029600772830913234, | |
| "loss": 4.8345, | |
| "step": 166900 | |
| }, | |
| { | |
| "epoch": 26.72, | |
| "grad_norm": 0.15507912635803223, | |
| "learning_rate": 0.0002960053282131285, | |
| "loss": 4.909, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 26.736, | |
| "grad_norm": 0.12142092734575272, | |
| "learning_rate": 0.0002960029281171247, | |
| "loss": 4.8017, | |
| "step": 167100 | |
| }, | |
| { | |
| "epoch": 26.752, | |
| "grad_norm": 0.12530605494976044, | |
| "learning_rate": 0.0002960005280211208, | |
| "loss": 5.1347, | |
| "step": 167200 | |
| }, | |
| { | |
| "epoch": 26.768, | |
| "grad_norm": 0.14327798783779144, | |
| "learning_rate": 0.00029599812792511696, | |
| "loss": 4.7235, | |
| "step": 167300 | |
| }, | |
| { | |
| "epoch": 26.784, | |
| "grad_norm": 0.14647874236106873, | |
| "learning_rate": 0.00029599572782911313, | |
| "loss": 4.9018, | |
| "step": 167400 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "grad_norm": 0.13197900354862213, | |
| "learning_rate": 0.0002959933277331093, | |
| "loss": 5.1885, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 26.816, | |
| "grad_norm": 0.13953787088394165, | |
| "learning_rate": 0.00029599092763710547, | |
| "loss": 4.8121, | |
| "step": 167600 | |
| }, | |
| { | |
| "epoch": 26.832, | |
| "grad_norm": 0.16823934018611908, | |
| "learning_rate": 0.0002959885275411016, | |
| "loss": 4.7129, | |
| "step": 167700 | |
| }, | |
| { | |
| "epoch": 26.848, | |
| "grad_norm": 0.1557362824678421, | |
| "learning_rate": 0.00029598612744509775, | |
| "loss": 5.2257, | |
| "step": 167800 | |
| }, | |
| { | |
| "epoch": 26.864, | |
| "grad_norm": 0.16123229265213013, | |
| "learning_rate": 0.000295983751350054, | |
| "loss": 4.8921, | |
| "step": 167900 | |
| }, | |
| { | |
| "epoch": 26.88, | |
| "grad_norm": 0.1613980084657669, | |
| "learning_rate": 0.00029598135125405016, | |
| "loss": 5.0361, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 26.896, | |
| "grad_norm": 0.1302555948495865, | |
| "learning_rate": 0.0002959789511580463, | |
| "loss": 5.0077, | |
| "step": 168100 | |
| }, | |
| { | |
| "epoch": 26.912, | |
| "grad_norm": 0.15182837843894958, | |
| "learning_rate": 0.00029597655106204245, | |
| "loss": 5.0202, | |
| "step": 168200 | |
| }, | |
| { | |
| "epoch": 26.928, | |
| "grad_norm": 0.13955193758010864, | |
| "learning_rate": 0.0002959741509660386, | |
| "loss": 4.9305, | |
| "step": 168300 | |
| }, | |
| { | |
| "epoch": 26.944, | |
| "grad_norm": 0.1417885273694992, | |
| "learning_rate": 0.0002959717508700348, | |
| "loss": 5.0889, | |
| "step": 168400 | |
| }, | |
| { | |
| "epoch": 26.96, | |
| "grad_norm": 0.14792856574058533, | |
| "learning_rate": 0.00029596935077403095, | |
| "loss": 4.8685, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 26.976, | |
| "grad_norm": 0.14266085624694824, | |
| "learning_rate": 0.00029596695067802707, | |
| "loss": 5.1578, | |
| "step": 168600 | |
| }, | |
| { | |
| "epoch": 26.992, | |
| "grad_norm": 0.11925966292619705, | |
| "learning_rate": 0.00029596455058202324, | |
| "loss": 4.6746, | |
| "step": 168700 | |
| }, | |
| { | |
| "epoch": 27.008, | |
| "grad_norm": 0.13332228362560272, | |
| "learning_rate": 0.0002959621504860194, | |
| "loss": 5.1295, | |
| "step": 168800 | |
| }, | |
| { | |
| "epoch": 27.024, | |
| "grad_norm": 0.13257551193237305, | |
| "learning_rate": 0.0002959597503900156, | |
| "loss": 5.0958, | |
| "step": 168900 | |
| }, | |
| { | |
| "epoch": 27.04, | |
| "grad_norm": 0.11077175289392471, | |
| "learning_rate": 0.00029595735029401175, | |
| "loss": 4.6509, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 27.056, | |
| "grad_norm": 0.1581268608570099, | |
| "learning_rate": 0.0002959549501980079, | |
| "loss": 4.7619, | |
| "step": 169100 | |
| }, | |
| { | |
| "epoch": 27.072, | |
| "grad_norm": 0.15108828246593475, | |
| "learning_rate": 0.00029595255010200403, | |
| "loss": 4.7792, | |
| "step": 169200 | |
| }, | |
| { | |
| "epoch": 27.088, | |
| "grad_norm": 0.15362246334552765, | |
| "learning_rate": 0.0002959501500060002, | |
| "loss": 5.189, | |
| "step": 169300 | |
| }, | |
| { | |
| "epoch": 27.104, | |
| "grad_norm": 0.1353999823331833, | |
| "learning_rate": 0.00029594774990999637, | |
| "loss": 4.7698, | |
| "step": 169400 | |
| }, | |
| { | |
| "epoch": 27.12, | |
| "grad_norm": 0.15684208273887634, | |
| "learning_rate": 0.00029594534981399254, | |
| "loss": 4.8111, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 27.136, | |
| "grad_norm": 0.17176128923892975, | |
| "learning_rate": 0.0002959429497179887, | |
| "loss": 4.8735, | |
| "step": 169600 | |
| }, | |
| { | |
| "epoch": 27.152, | |
| "grad_norm": 0.12857766449451447, | |
| "learning_rate": 0.0002959405496219848, | |
| "loss": 4.5602, | |
| "step": 169700 | |
| }, | |
| { | |
| "epoch": 27.168, | |
| "grad_norm": 0.2216508835554123, | |
| "learning_rate": 0.000295938149525981, | |
| "loss": 4.6848, | |
| "step": 169800 | |
| }, | |
| { | |
| "epoch": 27.184, | |
| "grad_norm": 0.18342281877994537, | |
| "learning_rate": 0.00029593577343093723, | |
| "loss": 4.9973, | |
| "step": 169900 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "grad_norm": 0.2726237177848816, | |
| "learning_rate": 0.0002959333733349334, | |
| "loss": 4.8341, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 27.216, | |
| "grad_norm": 0.1373586356639862, | |
| "learning_rate": 0.0002959309732389295, | |
| "loss": 4.914, | |
| "step": 170100 | |
| }, | |
| { | |
| "epoch": 27.232, | |
| "grad_norm": 0.13454484939575195, | |
| "learning_rate": 0.0002959285731429257, | |
| "loss": 5.0239, | |
| "step": 170200 | |
| }, | |
| { | |
| "epoch": 27.248, | |
| "grad_norm": 0.146050363779068, | |
| "learning_rate": 0.00029592617304692186, | |
| "loss": 4.7314, | |
| "step": 170300 | |
| }, | |
| { | |
| "epoch": 27.264, | |
| "grad_norm": 0.14222508668899536, | |
| "learning_rate": 0.000295923772950918, | |
| "loss": 4.6159, | |
| "step": 170400 | |
| }, | |
| { | |
| "epoch": 27.28, | |
| "grad_norm": 0.14632238447666168, | |
| "learning_rate": 0.0002959213728549142, | |
| "loss": 4.4062, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 27.296, | |
| "grad_norm": 0.16428226232528687, | |
| "learning_rate": 0.0002959189727589103, | |
| "loss": 5.1747, | |
| "step": 170600 | |
| }, | |
| { | |
| "epoch": 27.312, | |
| "grad_norm": 0.1323370337486267, | |
| "learning_rate": 0.0002959165726629065, | |
| "loss": 4.5199, | |
| "step": 170700 | |
| }, | |
| { | |
| "epoch": 27.328, | |
| "grad_norm": 0.14235830307006836, | |
| "learning_rate": 0.00029591417256690265, | |
| "loss": 4.9103, | |
| "step": 170800 | |
| }, | |
| { | |
| "epoch": 27.344, | |
| "grad_norm": 0.13216975331306458, | |
| "learning_rate": 0.0002959117724708988, | |
| "loss": 4.8293, | |
| "step": 170900 | |
| }, | |
| { | |
| "epoch": 27.36, | |
| "grad_norm": 0.15071095526218414, | |
| "learning_rate": 0.000295909372374895, | |
| "loss": 4.9801, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 27.376, | |
| "grad_norm": 0.1272030919790268, | |
| "learning_rate": 0.00029590697227889116, | |
| "loss": 4.9456, | |
| "step": 171100 | |
| }, | |
| { | |
| "epoch": 27.392, | |
| "grad_norm": 0.13579507172107697, | |
| "learning_rate": 0.00029590457218288727, | |
| "loss": 4.8712, | |
| "step": 171200 | |
| }, | |
| { | |
| "epoch": 27.408, | |
| "grad_norm": 0.12844951450824738, | |
| "learning_rate": 0.00029590217208688344, | |
| "loss": 4.679, | |
| "step": 171300 | |
| }, | |
| { | |
| "epoch": 27.424, | |
| "grad_norm": 0.10488644242286682, | |
| "learning_rate": 0.0002958997719908796, | |
| "loss": 4.8333, | |
| "step": 171400 | |
| }, | |
| { | |
| "epoch": 27.44, | |
| "grad_norm": 0.1397544890642166, | |
| "learning_rate": 0.0002958973718948758, | |
| "loss": 4.9637, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 27.456, | |
| "grad_norm": 0.17122800648212433, | |
| "learning_rate": 0.00029589497179887195, | |
| "loss": 4.5042, | |
| "step": 171600 | |
| }, | |
| { | |
| "epoch": 27.472, | |
| "grad_norm": 0.1432805061340332, | |
| "learning_rate": 0.00029589257170286806, | |
| "loss": 4.9236, | |
| "step": 171700 | |
| }, | |
| { | |
| "epoch": 27.488, | |
| "grad_norm": 0.2430882304906845, | |
| "learning_rate": 0.00029589017160686423, | |
| "loss": 4.6134, | |
| "step": 171800 | |
| }, | |
| { | |
| "epoch": 27.504, | |
| "grad_norm": 0.12965236604213715, | |
| "learning_rate": 0.0002958877715108604, | |
| "loss": 4.8867, | |
| "step": 171900 | |
| }, | |
| { | |
| "epoch": 27.52, | |
| "grad_norm": 0.13079382479190826, | |
| "learning_rate": 0.00029588537141485657, | |
| "loss": 4.7196, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 27.536, | |
| "grad_norm": 0.16515448689460754, | |
| "learning_rate": 0.00029588299531981276, | |
| "loss": 4.6995, | |
| "step": 172100 | |
| }, | |
| { | |
| "epoch": 27.552, | |
| "grad_norm": 0.12594960629940033, | |
| "learning_rate": 0.00029588059522380893, | |
| "loss": 4.8708, | |
| "step": 172200 | |
| }, | |
| { | |
| "epoch": 27.568, | |
| "grad_norm": 0.1570487916469574, | |
| "learning_rate": 0.0002958781951278051, | |
| "loss": 4.8169, | |
| "step": 172300 | |
| }, | |
| { | |
| "epoch": 27.584, | |
| "grad_norm": 0.13092289865016937, | |
| "learning_rate": 0.00029587579503180127, | |
| "loss": 4.695, | |
| "step": 172400 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "grad_norm": 0.14942535758018494, | |
| "learning_rate": 0.00029587339493579744, | |
| "loss": 4.7415, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 27.616, | |
| "grad_norm": 0.12075886875391006, | |
| "learning_rate": 0.00029587099483979355, | |
| "loss": 4.4839, | |
| "step": 172600 | |
| }, | |
| { | |
| "epoch": 27.632, | |
| "grad_norm": 0.11725221574306488, | |
| "learning_rate": 0.0002958685947437897, | |
| "loss": 4.8162, | |
| "step": 172700 | |
| }, | |
| { | |
| "epoch": 27.648, | |
| "grad_norm": 0.20893152058124542, | |
| "learning_rate": 0.0002958661946477859, | |
| "loss": 4.78, | |
| "step": 172800 | |
| }, | |
| { | |
| "epoch": 27.664, | |
| "grad_norm": 0.14231526851654053, | |
| "learning_rate": 0.00029586379455178206, | |
| "loss": 4.7212, | |
| "step": 172900 | |
| }, | |
| { | |
| "epoch": 27.68, | |
| "grad_norm": 0.1261710226535797, | |
| "learning_rate": 0.0002958613944557782, | |
| "loss": 4.96, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 27.696, | |
| "grad_norm": 0.1408015638589859, | |
| "learning_rate": 0.0002958589943597744, | |
| "loss": 4.7388, | |
| "step": 173100 | |
| }, | |
| { | |
| "epoch": 27.712, | |
| "grad_norm": 0.14422334730625153, | |
| "learning_rate": 0.0002958565942637705, | |
| "loss": 4.5018, | |
| "step": 173200 | |
| }, | |
| { | |
| "epoch": 27.728, | |
| "grad_norm": 0.17371025681495667, | |
| "learning_rate": 0.0002958541941677667, | |
| "loss": 4.792, | |
| "step": 173300 | |
| }, | |
| { | |
| "epoch": 27.744, | |
| "grad_norm": 0.21515819430351257, | |
| "learning_rate": 0.00029585179407176285, | |
| "loss": 4.8225, | |
| "step": 173400 | |
| }, | |
| { | |
| "epoch": 27.76, | |
| "grad_norm": 0.1557329297065735, | |
| "learning_rate": 0.000295849393975759, | |
| "loss": 4.6305, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 27.776, | |
| "grad_norm": 0.13870660960674286, | |
| "learning_rate": 0.0002958469938797552, | |
| "loss": 4.5486, | |
| "step": 173600 | |
| }, | |
| { | |
| "epoch": 27.792, | |
| "grad_norm": 0.13383133709430695, | |
| "learning_rate": 0.0002958445937837513, | |
| "loss": 4.6136, | |
| "step": 173700 | |
| }, | |
| { | |
| "epoch": 27.808, | |
| "grad_norm": 0.1399243175983429, | |
| "learning_rate": 0.00029584219368774747, | |
| "loss": 4.9352, | |
| "step": 173800 | |
| }, | |
| { | |
| "epoch": 27.824, | |
| "grad_norm": 0.11231095343828201, | |
| "learning_rate": 0.00029583979359174364, | |
| "loss": 4.9996, | |
| "step": 173900 | |
| }, | |
| { | |
| "epoch": 27.84, | |
| "grad_norm": 0.16128210723400116, | |
| "learning_rate": 0.0002958373934957398, | |
| "loss": 4.7546, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 27.856, | |
| "grad_norm": 0.15589210391044617, | |
| "learning_rate": 0.000295834993399736, | |
| "loss": 4.8234, | |
| "step": 174100 | |
| }, | |
| { | |
| "epoch": 27.872, | |
| "grad_norm": 0.22979894280433655, | |
| "learning_rate": 0.00029583259330373215, | |
| "loss": 4.8117, | |
| "step": 174200 | |
| }, | |
| { | |
| "epoch": 27.888, | |
| "grad_norm": 0.14024117588996887, | |
| "learning_rate": 0.00029583019320772826, | |
| "loss": 4.5712, | |
| "step": 174300 | |
| }, | |
| { | |
| "epoch": 27.904, | |
| "grad_norm": 0.16881561279296875, | |
| "learning_rate": 0.00029582779311172443, | |
| "loss": 4.8696, | |
| "step": 174400 | |
| }, | |
| { | |
| "epoch": 27.92, | |
| "grad_norm": 0.14194153249263763, | |
| "learning_rate": 0.0002958253930157206, | |
| "loss": 4.7792, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 27.936, | |
| "grad_norm": 0.16409501433372498, | |
| "learning_rate": 0.00029582299291971677, | |
| "loss": 4.862, | |
| "step": 174600 | |
| }, | |
| { | |
| "epoch": 27.951999999999998, | |
| "grad_norm": 0.21548931300640106, | |
| "learning_rate": 0.00029582059282371294, | |
| "loss": 4.6556, | |
| "step": 174700 | |
| }, | |
| { | |
| "epoch": 27.968, | |
| "grad_norm": 0.15370036661624908, | |
| "learning_rate": 0.00029581819272770906, | |
| "loss": 4.7855, | |
| "step": 174800 | |
| }, | |
| { | |
| "epoch": 27.984, | |
| "grad_norm": 0.1505698263645172, | |
| "learning_rate": 0.0002958157926317052, | |
| "loss": 4.5333, | |
| "step": 174900 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.13952812552452087, | |
| "learning_rate": 0.0002958133925357014, | |
| "loss": 5.0827, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 28.016, | |
| "grad_norm": 0.14113423228263855, | |
| "learning_rate": 0.00029581099243969756, | |
| "loss": 4.4652, | |
| "step": 175100 | |
| }, | |
| { | |
| "epoch": 28.032, | |
| "grad_norm": 0.13563218712806702, | |
| "learning_rate": 0.00029580859234369373, | |
| "loss": 4.4769, | |
| "step": 175200 | |
| }, | |
| { | |
| "epoch": 28.048, | |
| "grad_norm": 0.16485312581062317, | |
| "learning_rate": 0.0002958061922476899, | |
| "loss": 4.7196, | |
| "step": 175300 | |
| }, | |
| { | |
| "epoch": 28.064, | |
| "grad_norm": 0.1928679645061493, | |
| "learning_rate": 0.000295803792151686, | |
| "loss": 4.5181, | |
| "step": 175400 | |
| }, | |
| { | |
| "epoch": 28.08, | |
| "grad_norm": 0.16406244039535522, | |
| "learning_rate": 0.00029580141605664226, | |
| "loss": 4.5547, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 28.096, | |
| "grad_norm": 0.12744209170341492, | |
| "learning_rate": 0.00029579901596063843, | |
| "loss": 4.6802, | |
| "step": 175600 | |
| }, | |
| { | |
| "epoch": 28.112, | |
| "grad_norm": 0.15242663025856018, | |
| "learning_rate": 0.00029579661586463454, | |
| "loss": 4.7076, | |
| "step": 175700 | |
| }, | |
| { | |
| "epoch": 28.128, | |
| "grad_norm": 0.1231980100274086, | |
| "learning_rate": 0.0002957942157686307, | |
| "loss": 4.7097, | |
| "step": 175800 | |
| }, | |
| { | |
| "epoch": 28.144, | |
| "grad_norm": 0.1742876172065735, | |
| "learning_rate": 0.0002957918156726269, | |
| "loss": 4.8166, | |
| "step": 175900 | |
| }, | |
| { | |
| "epoch": 28.16, | |
| "grad_norm": 0.15425816178321838, | |
| "learning_rate": 0.00029578941557662305, | |
| "loss": 4.6306, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 28.176, | |
| "grad_norm": 0.1423932909965515, | |
| "learning_rate": 0.0002957870154806192, | |
| "loss": 4.7671, | |
| "step": 176100 | |
| }, | |
| { | |
| "epoch": 28.192, | |
| "grad_norm": 0.13283143937587738, | |
| "learning_rate": 0.0002957846153846154, | |
| "loss": 4.5074, | |
| "step": 176200 | |
| }, | |
| { | |
| "epoch": 28.208, | |
| "grad_norm": 0.1560533046722412, | |
| "learning_rate": 0.0002957822152886115, | |
| "loss": 4.8514, | |
| "step": 176300 | |
| }, | |
| { | |
| "epoch": 28.224, | |
| "grad_norm": 0.12814775109291077, | |
| "learning_rate": 0.0002957798151926077, | |
| "loss": 4.7173, | |
| "step": 176400 | |
| }, | |
| { | |
| "epoch": 28.24, | |
| "grad_norm": 0.1441114842891693, | |
| "learning_rate": 0.00029577741509660384, | |
| "loss": 4.7003, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 28.256, | |
| "grad_norm": 0.13554996252059937, | |
| "learning_rate": 0.0002957750150006, | |
| "loss": 4.6206, | |
| "step": 176600 | |
| }, | |
| { | |
| "epoch": 28.272, | |
| "grad_norm": 0.21647945046424866, | |
| "learning_rate": 0.0002957726149045962, | |
| "loss": 4.9289, | |
| "step": 176700 | |
| }, | |
| { | |
| "epoch": 28.288, | |
| "grad_norm": 0.1216735765337944, | |
| "learning_rate": 0.0002957702148085923, | |
| "loss": 4.7441, | |
| "step": 176800 | |
| }, | |
| { | |
| "epoch": 28.304, | |
| "grad_norm": 0.12911395728588104, | |
| "learning_rate": 0.00029576781471258847, | |
| "loss": 4.6493, | |
| "step": 176900 | |
| }, | |
| { | |
| "epoch": 28.32, | |
| "grad_norm": 0.12240692973136902, | |
| "learning_rate": 0.00029576541461658463, | |
| "loss": 4.7305, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 28.336, | |
| "grad_norm": 0.17344659566879272, | |
| "learning_rate": 0.0002957630145205808, | |
| "loss": 4.5246, | |
| "step": 177100 | |
| }, | |
| { | |
| "epoch": 28.352, | |
| "grad_norm": 0.12759949266910553, | |
| "learning_rate": 0.00029576061442457697, | |
| "loss": 4.6852, | |
| "step": 177200 | |
| }, | |
| { | |
| "epoch": 28.368, | |
| "grad_norm": 0.12402662634849548, | |
| "learning_rate": 0.00029575821432857314, | |
| "loss": 4.5194, | |
| "step": 177300 | |
| }, | |
| { | |
| "epoch": 28.384, | |
| "grad_norm": 0.19976910948753357, | |
| "learning_rate": 0.00029575581423256926, | |
| "loss": 4.5166, | |
| "step": 177400 | |
| }, | |
| { | |
| "epoch": 28.4, | |
| "grad_norm": 0.14362084865570068, | |
| "learning_rate": 0.0002957534141365654, | |
| "loss": 4.5147, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 28.416, | |
| "grad_norm": 0.13851560652256012, | |
| "learning_rate": 0.0002957510140405616, | |
| "loss": 4.5473, | |
| "step": 177600 | |
| }, | |
| { | |
| "epoch": 28.432, | |
| "grad_norm": 0.13696688413619995, | |
| "learning_rate": 0.00029574861394455776, | |
| "loss": 4.7163, | |
| "step": 177700 | |
| }, | |
| { | |
| "epoch": 28.448, | |
| "grad_norm": 0.1331932544708252, | |
| "learning_rate": 0.00029574621384855393, | |
| "loss": 5.0066, | |
| "step": 177800 | |
| }, | |
| { | |
| "epoch": 28.464, | |
| "grad_norm": 0.13118359446525574, | |
| "learning_rate": 0.00029574381375255005, | |
| "loss": 4.7009, | |
| "step": 177900 | |
| }, | |
| { | |
| "epoch": 28.48, | |
| "grad_norm": 0.11460904031991959, | |
| "learning_rate": 0.0002957414136565462, | |
| "loss": 4.5525, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 28.496, | |
| "grad_norm": 0.11112211644649506, | |
| "learning_rate": 0.0002957390135605424, | |
| "loss": 4.8012, | |
| "step": 178100 | |
| }, | |
| { | |
| "epoch": 28.512, | |
| "grad_norm": 0.1618378460407257, | |
| "learning_rate": 0.00029573661346453856, | |
| "loss": 4.8419, | |
| "step": 178200 | |
| }, | |
| { | |
| "epoch": 28.528, | |
| "grad_norm": 0.13665986061096191, | |
| "learning_rate": 0.0002957342133685347, | |
| "loss": 4.6129, | |
| "step": 178300 | |
| }, | |
| { | |
| "epoch": 28.544, | |
| "grad_norm": 0.10059978067874908, | |
| "learning_rate": 0.0002957318132725309, | |
| "loss": 4.7326, | |
| "step": 178400 | |
| }, | |
| { | |
| "epoch": 28.56, | |
| "grad_norm": 0.1575680524110794, | |
| "learning_rate": 0.000295729413176527, | |
| "loss": 5.0102, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 28.576, | |
| "grad_norm": 0.10887812077999115, | |
| "learning_rate": 0.0002957270130805232, | |
| "loss": 4.7228, | |
| "step": 178600 | |
| }, | |
| { | |
| "epoch": 28.592, | |
| "grad_norm": 0.08943487703800201, | |
| "learning_rate": 0.0002957246369854794, | |
| "loss": 4.4294, | |
| "step": 178700 | |
| }, | |
| { | |
| "epoch": 28.608, | |
| "grad_norm": 0.14149336516857147, | |
| "learning_rate": 0.00029572223688947554, | |
| "loss": 4.6056, | |
| "step": 178800 | |
| }, | |
| { | |
| "epoch": 28.624, | |
| "grad_norm": 0.12872636318206787, | |
| "learning_rate": 0.0002957198367934717, | |
| "loss": 4.8457, | |
| "step": 178900 | |
| }, | |
| { | |
| "epoch": 28.64, | |
| "grad_norm": 0.15382656455039978, | |
| "learning_rate": 0.0002957174366974679, | |
| "loss": 4.7641, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 28.656, | |
| "grad_norm": 0.15484744310379028, | |
| "learning_rate": 0.00029571503660146404, | |
| "loss": 4.7261, | |
| "step": 179100 | |
| }, | |
| { | |
| "epoch": 28.672, | |
| "grad_norm": 0.1385447382926941, | |
| "learning_rate": 0.0002957126365054602, | |
| "loss": 4.8178, | |
| "step": 179200 | |
| }, | |
| { | |
| "epoch": 28.688, | |
| "grad_norm": 0.09416704624891281, | |
| "learning_rate": 0.0002957102364094564, | |
| "loss": 4.462, | |
| "step": 179300 | |
| }, | |
| { | |
| "epoch": 28.704, | |
| "grad_norm": 0.11756269633769989, | |
| "learning_rate": 0.0002957078363134525, | |
| "loss": 4.9817, | |
| "step": 179400 | |
| }, | |
| { | |
| "epoch": 28.72, | |
| "grad_norm": 0.16298645734786987, | |
| "learning_rate": 0.00029570543621744867, | |
| "loss": 4.7884, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 28.736, | |
| "grad_norm": 0.1666107177734375, | |
| "learning_rate": 0.00029570303612144484, | |
| "loss": 4.5478, | |
| "step": 179600 | |
| }, | |
| { | |
| "epoch": 28.752, | |
| "grad_norm": 0.14432166516780853, | |
| "learning_rate": 0.000295700636025441, | |
| "loss": 4.5671, | |
| "step": 179700 | |
| }, | |
| { | |
| "epoch": 28.768, | |
| "grad_norm": 0.14455050230026245, | |
| "learning_rate": 0.0002956982359294372, | |
| "loss": 4.4565, | |
| "step": 179800 | |
| }, | |
| { | |
| "epoch": 28.784, | |
| "grad_norm": 0.11911621689796448, | |
| "learning_rate": 0.0002956958358334333, | |
| "loss": 4.8298, | |
| "step": 179900 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "grad_norm": 0.11492261290550232, | |
| "learning_rate": 0.00029569343573742946, | |
| "loss": 4.8744, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 28.816, | |
| "grad_norm": 0.11532367020845413, | |
| "learning_rate": 0.00029569103564142563, | |
| "loss": 4.9461, | |
| "step": 180100 | |
| }, | |
| { | |
| "epoch": 28.832, | |
| "grad_norm": 0.11335845291614532, | |
| "learning_rate": 0.0002956886355454218, | |
| "loss": 4.6438, | |
| "step": 180200 | |
| }, | |
| { | |
| "epoch": 28.848, | |
| "grad_norm": 0.13290923833847046, | |
| "learning_rate": 0.00029568623544941797, | |
| "loss": 4.5029, | |
| "step": 180300 | |
| }, | |
| { | |
| "epoch": 28.864, | |
| "grad_norm": 0.12123245000839233, | |
| "learning_rate": 0.00029568383535341414, | |
| "loss": 5.002, | |
| "step": 180400 | |
| }, | |
| { | |
| "epoch": 28.88, | |
| "grad_norm": 0.1688774973154068, | |
| "learning_rate": 0.00029568143525741025, | |
| "loss": 4.5888, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 28.896, | |
| "grad_norm": 0.12593814730644226, | |
| "learning_rate": 0.0002956790351614064, | |
| "loss": 4.5949, | |
| "step": 180600 | |
| }, | |
| { | |
| "epoch": 28.912, | |
| "grad_norm": 0.13134326040744781, | |
| "learning_rate": 0.0002956766350654026, | |
| "loss": 4.3431, | |
| "step": 180700 | |
| }, | |
| { | |
| "epoch": 28.928, | |
| "grad_norm": 0.14252367615699768, | |
| "learning_rate": 0.00029567423496939876, | |
| "loss": 4.1599, | |
| "step": 180800 | |
| }, | |
| { | |
| "epoch": 28.944, | |
| "grad_norm": 0.13371191918849945, | |
| "learning_rate": 0.0002956718348733949, | |
| "loss": 4.4618, | |
| "step": 180900 | |
| }, | |
| { | |
| "epoch": 28.96, | |
| "grad_norm": 0.2305118888616562, | |
| "learning_rate": 0.00029566943477739104, | |
| "loss": 4.7324, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 28.976, | |
| "grad_norm": 0.17778520286083221, | |
| "learning_rate": 0.0002956670346813872, | |
| "loss": 4.5895, | |
| "step": 181100 | |
| }, | |
| { | |
| "epoch": 28.992, | |
| "grad_norm": 0.16209328174591064, | |
| "learning_rate": 0.0002956646345853834, | |
| "loss": 4.5924, | |
| "step": 181200 | |
| }, | |
| { | |
| "epoch": 29.008, | |
| "grad_norm": 0.13874457776546478, | |
| "learning_rate": 0.0002956622584903396, | |
| "loss": 4.5032, | |
| "step": 181300 | |
| }, | |
| { | |
| "epoch": 29.024, | |
| "grad_norm": 0.13318394124507904, | |
| "learning_rate": 0.00029565985839433574, | |
| "loss": 4.3979, | |
| "step": 181400 | |
| }, | |
| { | |
| "epoch": 29.04, | |
| "grad_norm": 0.1424497812986374, | |
| "learning_rate": 0.0002956574582983319, | |
| "loss": 4.6121, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 29.056, | |
| "grad_norm": 0.1274562031030655, | |
| "learning_rate": 0.0002956550582023281, | |
| "loss": 4.6716, | |
| "step": 181600 | |
| }, | |
| { | |
| "epoch": 29.072, | |
| "grad_norm": 0.15418770909309387, | |
| "learning_rate": 0.00029565265810632425, | |
| "loss": 4.4586, | |
| "step": 181700 | |
| }, | |
| { | |
| "epoch": 29.088, | |
| "grad_norm": 0.1679641753435135, | |
| "learning_rate": 0.0002956502580103204, | |
| "loss": 4.4676, | |
| "step": 181800 | |
| }, | |
| { | |
| "epoch": 29.104, | |
| "grad_norm": 0.10988187789916992, | |
| "learning_rate": 0.00029564788191527655, | |
| "loss": 4.4074, | |
| "step": 181900 | |
| }, | |
| { | |
| "epoch": 29.12, | |
| "grad_norm": 0.13705100119113922, | |
| "learning_rate": 0.0002956454818192727, | |
| "loss": 4.5681, | |
| "step": 182000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 12500000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2000, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.600452986732544e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |