| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.515237104206927, | |
| "global_step": 32000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5e-05, | |
| "loss": 3.5407, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001, | |
| "loss": 3.2075, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00015, | |
| "loss": 3.0286, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002, | |
| "loss": 2.8212, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025, | |
| "loss": 2.3586, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0003, | |
| "loss": 1.676, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00035, | |
| "loss": 1.3696, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004, | |
| "loss": 1.2677, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00045000000000000004, | |
| "loss": 1.2271, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0005, | |
| "loss": 1.2006, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000498467667790377, | |
| "loss": 1.1846, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0004969353355807539, | |
| "loss": 1.1663, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0004954030033711309, | |
| "loss": 1.1429, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0004938706711615078, | |
| "loss": 1.1384, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0004923383389518848, | |
| "loss": 1.1353, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0004908060067422617, | |
| "loss": 1.1384, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0004892736745326388, | |
| "loss": 1.1461, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00048774134232301567, | |
| "loss": 1.1333, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0004862090101133926, | |
| "loss": 1.1205, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00048467667790376954, | |
| "loss": 1.1141, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004831443456941465, | |
| "loss": 1.1078, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0004816120134845234, | |
| "loss": 1.1006, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00048007968127490044, | |
| "loss": 1.0978, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00047854734906527735, | |
| "loss": 1.0894, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0004770150168556543, | |
| "loss": 1.0861, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0004754826846460313, | |
| "loss": 1.083, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00047395035243640824, | |
| "loss": 1.0758, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00047241802022678515, | |
| "loss": 1.076, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00047088568801716217, | |
| "loss": 1.0794, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0004693533558075391, | |
| "loss": 1.0706, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00046782102359791604, | |
| "loss": 1.0725, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.000466288691388293, | |
| "loss": 1.069, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0004647563591786699, | |
| "loss": 1.0674, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00046322402696904693, | |
| "loss": 1.066, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00046169169475942384, | |
| "loss": 1.0569, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0004601593625498008, | |
| "loss": 1.0579, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00045862703034017777, | |
| "loss": 1.0615, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00045709469813055473, | |
| "loss": 1.055, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00045556236592093164, | |
| "loss": 1.0583, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.0004540300337113086, | |
| "loss": 1.0537, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00045249770150168557, | |
| "loss": 1.0531, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00045096536929206254, | |
| "loss": 1.0507, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.0004494330370824395, | |
| "loss": 1.0449, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.0004479007048728164, | |
| "loss": 1.0463, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00044636837266319343, | |
| "loss": 1.0495, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00044483604045357034, | |
| "loss": 1.0489, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0004433037082439473, | |
| "loss": 1.043, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00044177137603432427, | |
| "loss": 1.0404, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.0004402390438247012, | |
| "loss": 1.0448, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00043870671161507814, | |
| "loss": 1.0378, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.0004371743794054551, | |
| "loss": 1.0359, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00043564204719583207, | |
| "loss": 1.0419, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.000434109714986209, | |
| "loss": 1.0332, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.000432577382776586, | |
| "loss": 1.0382, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.0004310450505669629, | |
| "loss": 1.0312, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.0004295127183573399, | |
| "loss": 1.0377, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00042798038614771683, | |
| "loss": 1.0296, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00042644805393809374, | |
| "loss": 1.0316, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00042491572172847076, | |
| "loss": 1.0322, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.0004233833895188477, | |
| "loss": 1.0325, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00042185105730922464, | |
| "loss": 1.0307, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.0004203187250996016, | |
| "loss": 1.0297, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00041878639288997857, | |
| "loss": 1.031, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.0004172540606803555, | |
| "loss": 1.0304, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.0004157217284707325, | |
| "loss": 1.0278, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0004141893962611094, | |
| "loss": 1.0211, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.0004126570640514864, | |
| "loss": 1.0248, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.00041112473184186333, | |
| "loss": 1.0319, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.00040959239963224024, | |
| "loss": 1.0301, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00040806006742261726, | |
| "loss": 1.0295, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 0.00040652773521299417, | |
| "loss": 1.0247, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.00040499540300337113, | |
| "loss": 1.0205, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.0004034630707937481, | |
| "loss": 1.0221, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.00040193073858412506, | |
| "loss": 1.0251, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.00040039840637450197, | |
| "loss": 1.0164, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.000398866074164879, | |
| "loss": 1.019, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.0003973337419552559, | |
| "loss": 1.0167, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.0003958014097456328, | |
| "loss": 1.0202, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.00039426907753600983, | |
| "loss": 1.0183, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.00039273674532638674, | |
| "loss": 1.0234, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.00039120441311676376, | |
| "loss": 1.0103, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.00038967208090714067, | |
| "loss": 1.0196, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 0.00038813974869751763, | |
| "loss": 1.0147, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.0003866074164878946, | |
| "loss": 1.0138, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.00038507508427827156, | |
| "loss": 1.0151, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.00038354275206864847, | |
| "loss": 1.0118, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 0.0003820104198590255, | |
| "loss": 1.014, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.0003804780876494024, | |
| "loss": 1.0096, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.0003789457554397793, | |
| "loss": 1.0092, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.0003774134232301563, | |
| "loss": 1.0096, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 0.00037588109102053323, | |
| "loss": 1.0148, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.00037434875881091025, | |
| "loss": 1.0102, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.00037281642660128716, | |
| "loss": 1.0095, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.0003712840943916641, | |
| "loss": 1.0099, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.0003697517621820411, | |
| "loss": 1.0083, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.00036821942997241805, | |
| "loss": 1.0093, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00036668709776279496, | |
| "loss": 1.0023, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.00036515476555317193, | |
| "loss": 1.0058, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.0003636224333435489, | |
| "loss": 1.0088, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.0003620901011339258, | |
| "loss": 1.0046, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0003605577689243028, | |
| "loss": 1.0142, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 0.00035902543671467973, | |
| "loss": 1.0031, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 0.0003574931045050567, | |
| "loss": 1.006, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 0.00035596077229543366, | |
| "loss": 1.0019, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.0003544284400858106, | |
| "loss": 1.0023, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.0003528961078761876, | |
| "loss": 0.9993, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.0003513637756665645, | |
| "loss": 0.9987, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.00034983144345694146, | |
| "loss": 0.9987, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.0003482991112473184, | |
| "loss": 1.005, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.0003467667790376954, | |
| "loss": 0.9966, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.0003452344468280723, | |
| "loss": 0.9986, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.0003437021146184493, | |
| "loss": 0.9973, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 0.00034216978240882623, | |
| "loss": 1.0011, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 0.0003406374501992032, | |
| "loss": 0.9944, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 0.00033910511798958016, | |
| "loss": 0.996, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 0.00033757278577995707, | |
| "loss": 0.9976, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 0.0003360404535703341, | |
| "loss": 0.9931, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 0.000334508121360711, | |
| "loss": 0.9921, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 0.00033297578915108796, | |
| "loss": 0.9911, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 0.0003314434569414649, | |
| "loss": 0.9916, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 0.0003299111247318419, | |
| "loss": 0.9921, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 0.0003283787925222188, | |
| "loss": 0.991, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.0003268464603125958, | |
| "loss": 0.9971, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 0.0003253141281029727, | |
| "loss": 0.995, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.0003237817958933497, | |
| "loss": 0.9891, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.00032224946368372665, | |
| "loss": 0.9907, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 0.00032071713147410356, | |
| "loss": 0.9912, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 0.0003191847992644805, | |
| "loss": 0.9873, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.0003176524670548575, | |
| "loss": 0.9868, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 0.00031612013484523445, | |
| "loss": 0.9845, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 0.0003145878026356114, | |
| "loss": 0.9836, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 0.0003130554704259884, | |
| "loss": 0.986, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 0.0003115231382163653, | |
| "loss": 0.9902, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.0003099908060067423, | |
| "loss": 0.983, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 0.0003084584737971192, | |
| "loss": 0.9872, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 0.00030692614158749613, | |
| "loss": 0.9844, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 0.00030539380937787315, | |
| "loss": 0.9867, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 0.00030386147716825006, | |
| "loss": 0.9821, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 0.000302329144958627, | |
| "loss": 0.9809, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.000300796812749004, | |
| "loss": 0.984, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 0.00029926448053938095, | |
| "loss": 0.9767, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 0.0002977321483297579, | |
| "loss": 0.9819, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 0.0002961998161201349, | |
| "loss": 0.9811, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 0.0002946674839105118, | |
| "loss": 0.9791, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 0.00029313515170088875, | |
| "loss": 0.9783, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 0.0002916028194912657, | |
| "loss": 0.9878, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 0.00029007048728164263, | |
| "loss": 0.975, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.00028853815507201965, | |
| "loss": 0.9775, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 0.00028700582286239656, | |
| "loss": 0.9775, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 0.0002854734906527735, | |
| "loss": 0.9786, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 0.0002839411584431505, | |
| "loss": 0.9753, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 0.00028240882623352745, | |
| "loss": 0.9841, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 0.00028087649402390436, | |
| "loss": 0.9716, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 0.0002793441618142814, | |
| "loss": 0.9774, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 0.0002778118296046583, | |
| "loss": 0.9723, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 0.00027627949739503525, | |
| "loss": 0.9702, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 0.0002747471651854122, | |
| "loss": 0.9766, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 0.0002732148329757891, | |
| "loss": 0.9843, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 0.00027168250076616614, | |
| "loss": 0.9701, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 0.00027015016855654305, | |
| "loss": 0.9715, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 0.00026861783634692, | |
| "loss": 0.9695, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 0.000267085504137297, | |
| "loss": 0.9699, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 0.00026555317192767394, | |
| "loss": 0.9665, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 0.00026402083971805085, | |
| "loss": 0.9681, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 0.0002624885075084278, | |
| "loss": 0.9697, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 0.0002609561752988048, | |
| "loss": 0.9662, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 0.00025942384308918175, | |
| "loss": 0.965, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0002578915108795587, | |
| "loss": 0.9655, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 0.0002563591786699356, | |
| "loss": 0.9689, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 0.00025482684646031264, | |
| "loss": 0.9641, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 0.00025329451425068955, | |
| "loss": 0.9612, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 0.0002517621820410665, | |
| "loss": 0.9667, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 0.0002502298498314435, | |
| "loss": 0.9623, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 0.0002486975176218204, | |
| "loss": 0.9611, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 0.00024716518541219735, | |
| "loss": 0.956, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 0.0002456328532025743, | |
| "loss": 0.9623, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 0.00024410052099295128, | |
| "loss": 0.9577, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 0.00024256818878332824, | |
| "loss": 0.9584, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 0.00024103585657370518, | |
| "loss": 0.9595, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 0.00023950352436408212, | |
| "loss": 0.954, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 0.00023797119215445908, | |
| "loss": 0.958, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 0.00023643885994483605, | |
| "loss": 0.9575, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 0.000234906527735213, | |
| "loss": 0.9499, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 0.00023337419552558995, | |
| "loss": 0.9583, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 0.0002318418633159669, | |
| "loss": 0.9547, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 0.00023030953110634387, | |
| "loss": 0.9531, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 0.0002287771988967208, | |
| "loss": 0.9566, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 0.00022724486668709778, | |
| "loss": 0.9519, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 0.0002257125344774747, | |
| "loss": 0.9473, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 0.00022418020226785168, | |
| "loss": 0.9496, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 0.00022264787005822861, | |
| "loss": 0.9469, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 0.00022111553784860558, | |
| "loss": 0.9509, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 0.00021958320563898254, | |
| "loss": 0.9466, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 0.0002180508734293595, | |
| "loss": 0.9499, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 0.00021651854121973644, | |
| "loss": 0.9498, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 0.0002149862090101134, | |
| "loss": 0.9483, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 0.00021345387680049037, | |
| "loss": 0.9522, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 0.00021192154459086728, | |
| "loss": 0.9441, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 0.00021038921238124425, | |
| "loss": 0.9492, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 0.0002088568801716212, | |
| "loss": 0.9421, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 0.00020732454796199817, | |
| "loss": 0.9432, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 0.0002057922157523751, | |
| "loss": 0.9542, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 0.00020425988354275207, | |
| "loss": 0.9426, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 0.00020272755133312904, | |
| "loss": 0.9484, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 0.00020119521912350598, | |
| "loss": 0.9473, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 0.00019966288691388294, | |
| "loss": 0.9413, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.0001981305547042599, | |
| "loss": 0.9438, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 0.00019659822249463684, | |
| "loss": 0.9421, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 0.00019506589028501378, | |
| "loss": 0.9406, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 0.00019353355807539074, | |
| "loss": 0.9384, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 0.0001920012258657677, | |
| "loss": 0.9397, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 0.00019046889365614464, | |
| "loss": 0.9367, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 0.0001889365614465216, | |
| "loss": 0.9402, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 0.00018740422923689857, | |
| "loss": 0.9319, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 0.00018587189702727554, | |
| "loss": 0.9385, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 0.00018433956481765247, | |
| "loss": 0.939, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 0.0001828072326080294, | |
| "loss": 0.9399, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 0.00018127490039840637, | |
| "loss": 0.9407, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 0.00017974256818878334, | |
| "loss": 0.94, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 0.00017821023597916027, | |
| "loss": 0.9407, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 0.00017667790376953724, | |
| "loss": 0.9353, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 0.0001751455715599142, | |
| "loss": 0.9405, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 0.00017361323935029114, | |
| "loss": 0.9305, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 0.0001720809071406681, | |
| "loss": 0.938, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 0.00017054857493104507, | |
| "loss": 0.9311, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 0.00016901624272142203, | |
| "loss": 0.9343, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 0.00016748391051179894, | |
| "loss": 0.9312, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 0.0001659515783021759, | |
| "loss": 0.9353, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 0.00016441924609255287, | |
| "loss": 0.9341, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 0.0001628869138829298, | |
| "loss": 0.9338, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 0.00016135458167330677, | |
| "loss": 0.9318, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 0.00015982224946368373, | |
| "loss": 0.9309, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 0.0001582899172540607, | |
| "loss": 0.9291, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 0.00015675758504443764, | |
| "loss": 0.9307, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 0.0001552252528348146, | |
| "loss": 0.9325, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 0.00015369292062519156, | |
| "loss": 0.9363, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 0.00015216058841556847, | |
| "loss": 0.9325, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 0.00015062825620594544, | |
| "loss": 0.9276, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 0.0001490959239963224, | |
| "loss": 0.9328, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 0.00014756359178669937, | |
| "loss": 0.9304, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 0.0001460312595770763, | |
| "loss": 0.9274, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 0.00014449892736745327, | |
| "loss": 0.9261, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 0.00014296659515783023, | |
| "loss": 0.9245, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 0.0001414342629482072, | |
| "loss": 0.9233, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 0.00013990193073858413, | |
| "loss": 0.9275, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 0.00013836959852896107, | |
| "loss": 0.9265, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 0.00013683726631933803, | |
| "loss": 0.9276, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 0.00013530493410971497, | |
| "loss": 0.9252, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 0.00013377260190009193, | |
| "loss": 0.9224, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 0.0001322402696904689, | |
| "loss": 0.9216, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 0.00013070793748084586, | |
| "loss": 0.9233, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 0.0001291756052712228, | |
| "loss": 0.9275, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 0.00012764327306159976, | |
| "loss": 0.9229, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 0.00012611094085197673, | |
| "loss": 0.922, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 0.00012457860864235367, | |
| "loss": 0.9255, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 0.0001230462764327306, | |
| "loss": 0.9196, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 0.00012151394422310758, | |
| "loss": 0.9198, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 0.00011998161201348452, | |
| "loss": 0.9226, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 0.00011844927980386148, | |
| "loss": 0.9174, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 0.00011691694759423843, | |
| "loss": 0.9191, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 0.0001153846153846154, | |
| "loss": 0.9207, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 0.00011385228317499235, | |
| "loss": 0.9225, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 0.00011231995096536928, | |
| "loss": 0.9198, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 0.00011078761875574625, | |
| "loss": 0.9183, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 0.0001092552865461232, | |
| "loss": 0.919, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 0.00010772295433650016, | |
| "loss": 0.9193, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 0.00010619062212687711, | |
| "loss": 0.9205, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 0.00010465828991725406, | |
| "loss": 0.92, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.00010312595770763101, | |
| "loss": 0.9192, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 0.00010159362549800798, | |
| "loss": 0.9186, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 0.00010006129328838493, | |
| "loss": 0.9176, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 9.852896107876188e-05, | |
| "loss": 0.9136, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 9.699662886913883e-05, | |
| "loss": 0.9116, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 9.546429665951578e-05, | |
| "loss": 0.9174, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 9.393196444989274e-05, | |
| "loss": 0.9156, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 9.23996322402697e-05, | |
| "loss": 0.912, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 9.086730003064666e-05, | |
| "loss": 0.9142, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 8.93349678210236e-05, | |
| "loss": 0.9099, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 8.780263561140055e-05, | |
| "loss": 0.9129, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 8.627030340177751e-05, | |
| "loss": 0.9145, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 8.473797119215446e-05, | |
| "loss": 0.9117, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 8.320563898253141e-05, | |
| "loss": 0.9112, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 8.167330677290836e-05, | |
| "loss": 0.9128, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 8.014097456328533e-05, | |
| "loss": 0.9122, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 7.860864235366228e-05, | |
| "loss": 0.9113, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 7.707631014403924e-05, | |
| "loss": 0.9115, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 7.554397793441618e-05, | |
| "loss": 0.9098, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 7.401164572479313e-05, | |
| "loss": 0.9101, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 7.247931351517009e-05, | |
| "loss": 0.9063, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 7.094698130554704e-05, | |
| "loss": 0.913, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 6.9414649095924e-05, | |
| "loss": 0.9092, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 6.788231688630094e-05, | |
| "loss": 0.9101, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 6.634998467667791e-05, | |
| "loss": 0.9089, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 6.481765246705486e-05, | |
| "loss": 0.9108, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 6.328532025743182e-05, | |
| "loss": 0.9065, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 6.175298804780877e-05, | |
| "loss": 0.9129, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 6.022065583818572e-05, | |
| "loss": 0.9097, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 5.8688323628562674e-05, | |
| "loss": 0.9115, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 5.715599141893963e-05, | |
| "loss": 0.9088, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 5.5623659209316575e-05, | |
| "loss": 0.9112, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 5.409132699969353e-05, | |
| "loss": 0.9086, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 5.255899479007049e-05, | |
| "loss": 0.9106, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 5.102666258044744e-05, | |
| "loss": 0.9104, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 4.94943303708244e-05, | |
| "loss": 0.9037, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 4.796199816120135e-05, | |
| "loss": 0.9082, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 4.6429665951578305e-05, | |
| "loss": 0.9041, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 4.489733374195526e-05, | |
| "loss": 0.9025, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 4.3365001532332206e-05, | |
| "loss": 0.9006, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 4.1832669322709164e-05, | |
| "loss": 0.9072, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 4.0300337113086114e-05, | |
| "loss": 0.9038, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 3.876800490346307e-05, | |
| "loss": 0.9072, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 3.723567269384002e-05, | |
| "loss": 0.9017, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 3.570334048421698e-05, | |
| "loss": 0.9032, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 3.4171008274593937e-05, | |
| "loss": 0.9026, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "learning_rate": 3.263867606497089e-05, | |
| "loss": 0.9008, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 3.110634385534784e-05, | |
| "loss": 0.9065, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 2.9574011645724795e-05, | |
| "loss": 0.9026, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 2.804167943610175e-05, | |
| "loss": 0.9011, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 2.65093472264787e-05, | |
| "loss": 0.9023, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 2.4977015016855653e-05, | |
| "loss": 0.9007, | |
| "step": 32000 | |
| } | |
| ], | |
| "max_steps": 33630, | |
| "num_train_epochs": 10, | |
| "total_flos": 8.418135066885916e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |