| { |
| "best_metric": 1.0445035696029663, |
| "best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-05-09_21-49-53_experiment/checkpoint-248650", |
| "epoch": 49.99748655305886, |
| "global_step": 248650, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.0054293183189222e-05, |
| "loss": 4.306, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2.0108586366378444e-05, |
| "loss": 3.8922, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.016287954956767e-05, |
| "loss": 3.6512, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.021717273275689e-05, |
| "loss": 3.44, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 5.027146591594611e-05, |
| "loss": 3.2164, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 6.032575909913534e-05, |
| "loss": 3.0072, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 7.038005228232456e-05, |
| "loss": 2.8143, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 8.043434546551378e-05, |
| "loss": 2.6472, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.048863864870299e-05, |
| "loss": 2.51, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00010054293183189222, |
| "loss": 2.3891, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00011059722501508144, |
| "loss": 2.2905, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00012065151819827068, |
| "loss": 2.2056, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00013070581138145988, |
| "loss": 2.137, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00014076010456464912, |
| "loss": 2.0792, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00015081439774783834, |
| "loss": 2.0301, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00016086869093102755, |
| "loss": 1.9879, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00017092298411421677, |
| "loss": 1.9499, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00018097727729740598, |
| "loss": 1.9279, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0001910315704805952, |
| "loss": 1.8925, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00020108586366378444, |
| "loss": 1.8673, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00021114015684697366, |
| "loss": 1.8431, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00022119445003016287, |
| "loss": 1.8236, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0002312487432133521, |
| "loss": 1.8023, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00024130303639654136, |
| "loss": 1.7889, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00025135732957973054, |
| "loss": 1.7671, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00026141162276291976, |
| "loss": 1.7506, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00027146591594610903, |
| "loss": 1.737, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00028152020912929824, |
| "loss": 1.7272, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00029157450231248746, |
| "loss": 1.7112, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0003016287954956767, |
| "loss": 1.699, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0003116830886788659, |
| "loss": 1.6933, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0003217373818620551, |
| "loss": 1.6788, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0003317916750452443, |
| "loss": 1.6694, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00034184596822843354, |
| "loss": 1.6607, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00035190026141162275, |
| "loss": 1.6534, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00036195455459481197, |
| "loss": 1.645, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0003720088477780012, |
| "loss": 1.632, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0003820631409611904, |
| "loss": 1.6229, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00039211743414437967, |
| "loss": 1.6161, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0004021717273275689, |
| "loss": 1.6061, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0004122260205107581, |
| "loss": 1.6006, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0004222803136939473, |
| "loss": 1.5875, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00043233460687713653, |
| "loss": 1.5828, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00044238890006032574, |
| "loss": 1.5849, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00045244319324351496, |
| "loss": 1.572, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0004624974864267042, |
| "loss": 1.565, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0004725517796098934, |
| "loss": 1.5589, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0004826060727930827, |
| "loss": 1.5519, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0004926603659762719, |
| "loss": 1.5489, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.38639571407117423, |
| "eval_loss": 1.490514874458313, |
| "eval_runtime": 19.6663, |
| "eval_samples_per_second": 4045.908, |
| "eval_steps_per_second": 15.814, |
| "step": 4973 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0005027146591594611, |
| "loss": 1.5386, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0005127689523426503, |
| "loss": 1.5361, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0005228232455258395, |
| "loss": 1.5279, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0005328775387090288, |
| "loss": 1.5273, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0005429318318922181, |
| "loss": 1.5192, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0005529861250754073, |
| "loss": 1.5175, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0005630404182585965, |
| "loss": 1.5165, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0005730947114417857, |
| "loss": 1.5102, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0005831490046249749, |
| "loss": 1.5006, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0005932032978081641, |
| "loss": 1.4952, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.0006032575909913533, |
| "loss": 1.4956, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.0006133118841745426, |
| "loss": 1.4906, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.0006233661773577318, |
| "loss": 1.4845, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0006334204705409209, |
| "loss": 1.4826, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.0006434747637241102, |
| "loss": 1.4844, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.0006535290569072995, |
| "loss": 1.4789, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.0006635833500904886, |
| "loss": 1.474, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.000673637643273678, |
| "loss": 1.4716, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.0006836919364568671, |
| "loss": 1.471, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0006937462296400564, |
| "loss": 1.4675, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.0007038005228232455, |
| "loss": 1.4627, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.0007138548160064348, |
| "loss": 1.4575, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0007239091091896239, |
| "loss": 1.4615, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.0007339634023728133, |
| "loss": 1.451, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0007440176955560024, |
| "loss": 1.4533, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.0007540719887391917, |
| "loss": 1.4469, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.0007641262819223808, |
| "loss": 1.4471, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.0007741805751055701, |
| "loss": 1.4433, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 0.0007842348682887593, |
| "loss": 1.4447, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 0.0007942891614719485, |
| "loss": 1.4389, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 0.0008043434546551378, |
| "loss": 1.4352, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 0.000814397747838327, |
| "loss": 1.432, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 0.0008244520410215162, |
| "loss": 1.4366, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.0008345063342047054, |
| "loss": 1.4331, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 0.0008445606273878946, |
| "loss": 1.4331, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 0.0008546149205710838, |
| "loss": 1.4229, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 0.0008646692137542731, |
| "loss": 1.4255, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 0.0008747235069374624, |
| "loss": 1.4223, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 0.0008847778001206515, |
| "loss": 1.4239, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 0.0008948320933038408, |
| "loss": 1.4214, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.0009048863864870299, |
| "loss": 1.4187, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 0.0009149406796702192, |
| "loss": 1.4181, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 0.0009249949728534083, |
| "loss": 1.4188, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 0.0009350492660365977, |
| "loss": 1.4166, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 0.0009451035592197868, |
| "loss": 1.4089, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 0.0009551578524029761, |
| "loss": 1.4085, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 0.0009652121455861654, |
| "loss": 1.4111, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.0009752664387693545, |
| "loss": 1.4065, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 0.0009853207319525437, |
| "loss": 1.4065, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 0.000995375025135733, |
| "loss": 1.4079, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.39677088556687123, |
| "eval_loss": 1.357865333557129, |
| "eval_runtime": 19.5955, |
| "eval_samples_per_second": 4060.518, |
| "eval_steps_per_second": 15.871, |
| "step": 9946 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 0.0010054293183189222, |
| "loss": 1.4024, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 0.0010154836115021115, |
| "loss": 1.3983, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 0.0010255379046853006, |
| "loss": 1.3965, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 0.00103559219786849, |
| "loss": 1.3945, |
| "step": 10300 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 0.0010455459481198471, |
| "loss": 1.3989, |
| "step": 10400 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 0.0010556002413030364, |
| "loss": 1.3975, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 0.0010655539915543938, |
| "loss": 1.3924, |
| "step": 10600 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 0.001075608284737583, |
| "loss": 1.3933, |
| "step": 10700 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 0.0010856625779207723, |
| "loss": 1.394, |
| "step": 10800 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 0.0010957168711039614, |
| "loss": 1.3945, |
| "step": 10900 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 0.0011057711642871507, |
| "loss": 1.3929, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 0.0011158254574703398, |
| "loss": 1.3943, |
| "step": 11100 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 0.0011258797506535291, |
| "loss": 1.3893, |
| "step": 11200 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 0.0011359340438367182, |
| "loss": 1.3908, |
| "step": 11300 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 0.0011459883370199076, |
| "loss": 1.3887, |
| "step": 11400 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 0.0011560426302030967, |
| "loss": 1.3902, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 0.001166096923386286, |
| "loss": 1.3934, |
| "step": 11600 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 0.001176151216569475, |
| "loss": 1.3857, |
| "step": 11700 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 0.0011862055097526644, |
| "loss": 1.3885, |
| "step": 11800 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 0.0011962598029358535, |
| "loss": 1.3847, |
| "step": 11900 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 0.0012063140961190428, |
| "loss": 1.3863, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 0.001216368389302232, |
| "loss": 1.3846, |
| "step": 12100 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 0.0012264226824854213, |
| "loss": 1.3815, |
| "step": 12200 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 0.0012364769756686104, |
| "loss": 1.3876, |
| "step": 12300 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 0.0012465312688517997, |
| "loss": 1.3812, |
| "step": 12400 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 0.0012565855620349888, |
| "loss": 1.3838, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 0.0012666398552181781, |
| "loss": 1.3891, |
| "step": 12600 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 0.0012766941484013675, |
| "loss": 1.3852, |
| "step": 12700 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 0.0012867484415845566, |
| "loss": 1.3856, |
| "step": 12800 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 0.0012968027347677459, |
| "loss": 1.3816, |
| "step": 12900 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 0.001306857027950935, |
| "loss": 1.3808, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 0.0013169113211341243, |
| "loss": 1.3808, |
| "step": 13100 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 0.0013269656143173134, |
| "loss": 1.3802, |
| "step": 13200 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 0.0013370199075005028, |
| "loss": 1.3781, |
| "step": 13300 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 0.0013470742006836919, |
| "loss": 1.3813, |
| "step": 13400 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 0.0013571284938668812, |
| "loss": 1.3789, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 0.0013671827870500705, |
| "loss": 1.3808, |
| "step": 13600 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 0.0013772370802332596, |
| "loss": 1.3793, |
| "step": 13700 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 0.001387291373416449, |
| "loss": 1.3839, |
| "step": 13800 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.001397345666599638, |
| "loss": 1.3746, |
| "step": 13900 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 0.0014072994168509952, |
| "loss": 1.3719, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 0.0014173537100341845, |
| "loss": 1.3761, |
| "step": 14100 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 0.0014274080032173739, |
| "loss": 1.3775, |
| "step": 14200 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 0.0014374622964005632, |
| "loss": 1.3781, |
| "step": 14300 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 0.0014475165895837523, |
| "loss": 1.376, |
| "step": 14400 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 0.0014575708827669414, |
| "loss": 1.3759, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 0.0014676251759501307, |
| "loss": 1.3709, |
| "step": 14600 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 0.00147767946913332, |
| "loss": 1.3724, |
| "step": 14700 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 0.0014877337623165092, |
| "loss": 1.3792, |
| "step": 14800 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.0014977880554996985, |
| "loss": 1.3705, |
| "step": 14900 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.3990531464846882, |
| "eval_loss": 1.3305182456970215, |
| "eval_runtime": 19.8107, |
| "eval_samples_per_second": 4016.413, |
| "eval_steps_per_second": 15.699, |
| "step": 14919 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 0.0015078423486828876, |
| "loss": 1.3671, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 0.001517896641866077, |
| "loss": 1.3701, |
| "step": 15100 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 0.001527950935049266, |
| "loss": 1.3686, |
| "step": 15200 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 0.0015380052282324553, |
| "loss": 1.3682, |
| "step": 15300 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 0.0015480595214156445, |
| "loss": 1.3661, |
| "step": 15400 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 0.0015581138145988338, |
| "loss": 1.3605, |
| "step": 15500 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 0.0015681681077820229, |
| "loss": 1.3689, |
| "step": 15600 |
| }, |
| { |
| "epoch": 3.16, |
| "learning_rate": 0.0015782224009652122, |
| "loss": 1.3741, |
| "step": 15700 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 0.0015882766941484015, |
| "loss": 1.3674, |
| "step": 15800 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 0.0015983309873315906, |
| "loss": 1.367, |
| "step": 15900 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 0.0016083852805147797, |
| "loss": 1.3694, |
| "step": 16000 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 0.001618439573697969, |
| "loss": 1.3683, |
| "step": 16100 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 0.0016284938668811584, |
| "loss": 1.3669, |
| "step": 16200 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 0.0016385481600643475, |
| "loss": 1.3713, |
| "step": 16300 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 0.0016486024532475366, |
| "loss": 1.3663, |
| "step": 16400 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 0.001658656746430726, |
| "loss": 1.3708, |
| "step": 16500 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 0.0016687110396139153, |
| "loss": 1.3693, |
| "step": 16600 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 0.0016787653327971046, |
| "loss": 1.3685, |
| "step": 16700 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 0.0016888196259802935, |
| "loss": 1.3692, |
| "step": 16800 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 0.0016988739191634828, |
| "loss": 1.3682, |
| "step": 16900 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 0.0017089282123466721, |
| "loss": 1.3657, |
| "step": 17000 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 0.0017189825055298614, |
| "loss": 1.3685, |
| "step": 17100 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 0.0017290367987130505, |
| "loss": 1.3683, |
| "step": 17200 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 0.0017390910918962397, |
| "loss": 1.368, |
| "step": 17300 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 0.001749145385079429, |
| "loss": 1.368, |
| "step": 17400 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 0.0017591996782626183, |
| "loss": 1.3668, |
| "step": 17500 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 0.0017692539714458076, |
| "loss": 1.363, |
| "step": 17600 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 0.0017793082646289965, |
| "loss": 1.3651, |
| "step": 17700 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 0.0017893625578121858, |
| "loss": 1.363, |
| "step": 17800 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 0.0017994168509953752, |
| "loss": 1.3652, |
| "step": 17900 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 0.0018094711441785645, |
| "loss": 1.3666, |
| "step": 18000 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 0.0018194248944299217, |
| "loss": 1.3653, |
| "step": 18100 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 0.001829479187613111, |
| "loss": 1.3685, |
| "step": 18200 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 0.0018395334807963, |
| "loss": 1.3641, |
| "step": 18300 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 0.0018495877739794892, |
| "loss": 1.3642, |
| "step": 18400 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 0.0018596420671626785, |
| "loss": 1.3668, |
| "step": 18500 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 0.0018696963603458678, |
| "loss": 1.3662, |
| "step": 18600 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 0.001879650110597225, |
| "loss": 1.3659, |
| "step": 18700 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 0.0018897044037804143, |
| "loss": 1.368, |
| "step": 18800 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 0.0018997586969636035, |
| "loss": 1.3631, |
| "step": 18900 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 0.0019098129901467928, |
| "loss": 1.3629, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 0.0019198672833299819, |
| "loss": 1.3609, |
| "step": 19100 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 0.0019299215765131712, |
| "loss": 1.368, |
| "step": 19200 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 0.0019399758696963603, |
| "loss": 1.3663, |
| "step": 19300 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 0.0019500301628795496, |
| "loss": 1.3684, |
| "step": 19400 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 0.0019600844560627387, |
| "loss": 1.3653, |
| "step": 19500 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 0.001970138749245928, |
| "loss": 1.3657, |
| "step": 19600 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 0.0019801930424291174, |
| "loss": 1.3619, |
| "step": 19700 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 0.0019902473356123067, |
| "loss": 1.3648, |
| "step": 19800 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.39993003357370305, |
| "eval_loss": 1.3229814767837524, |
| "eval_runtime": 19.7901, |
| "eval_samples_per_second": 4020.592, |
| "eval_steps_per_second": 15.715, |
| "step": 19893 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 0.0020003016287954956, |
| "loss": 1.3611, |
| "step": 19900 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 0.002010355921978685, |
| "loss": 1.3592, |
| "step": 20000 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 0.0020204102151618743, |
| "loss": 1.3543, |
| "step": 20100 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 0.0020304645083450636, |
| "loss": 1.3601, |
| "step": 20200 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 0.0020405188015282525, |
| "loss": 1.3539, |
| "step": 20300 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 0.002050573094711442, |
| "loss": 1.3579, |
| "step": 20400 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 0.002060627387894631, |
| "loss": 1.3596, |
| "step": 20500 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 0.0020706816810778204, |
| "loss": 1.3603, |
| "step": 20600 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 0.0020807359742610093, |
| "loss": 1.3594, |
| "step": 20700 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 0.0020907902674441987, |
| "loss": 1.3609, |
| "step": 20800 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 0.002100844560627388, |
| "loss": 1.3648, |
| "step": 20900 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 0.0021108988538105773, |
| "loss": 1.3616, |
| "step": 21000 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 0.0021209531469937666, |
| "loss": 1.3631, |
| "step": 21100 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 0.0021310074401769555, |
| "loss": 1.3639, |
| "step": 21200 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 0.002140961190428313, |
| "loss": 1.3646, |
| "step": 21300 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 0.0021510154836115025, |
| "loss": 1.3593, |
| "step": 21400 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 0.0021610697767946913, |
| "loss": 1.3604, |
| "step": 21500 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 0.0021711240699778807, |
| "loss": 1.362, |
| "step": 21600 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 0.00218117836316107, |
| "loss": 1.3583, |
| "step": 21700 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 0.0021912326563442593, |
| "loss": 1.364, |
| "step": 21800 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 0.002201286949527448, |
| "loss": 1.3625, |
| "step": 21900 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 0.0022113412427106375, |
| "loss": 1.367, |
| "step": 22000 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 0.002221395535893827, |
| "loss": 1.3646, |
| "step": 22100 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 0.002231449829077016, |
| "loss": 1.3625, |
| "step": 22200 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 0.002241504122260205, |
| "loss": 1.3617, |
| "step": 22300 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 0.0022515584154433944, |
| "loss": 1.3616, |
| "step": 22400 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 0.0022616127086265837, |
| "loss": 1.3624, |
| "step": 22500 |
| }, |
| { |
| "epoch": 4.54, |
| "learning_rate": 0.002271667001809773, |
| "loss": 1.3635, |
| "step": 22600 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 0.002281721294992962, |
| "loss": 1.36, |
| "step": 22700 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 0.0022917755881761512, |
| "loss": 1.3599, |
| "step": 22800 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 0.0023018298813593406, |
| "loss": 1.3674, |
| "step": 22900 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 0.00231188417454253, |
| "loss": 1.3667, |
| "step": 23000 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 0.002321938467725719, |
| "loss": 1.3555, |
| "step": 23100 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 0.002331992760908908, |
| "loss": 1.3594, |
| "step": 23200 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 0.0023420470540920974, |
| "loss": 1.3629, |
| "step": 23300 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 0.0023521013472752868, |
| "loss": 1.3641, |
| "step": 23400 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 0.0023621556404584756, |
| "loss": 1.3594, |
| "step": 23500 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 0.002372209933641665, |
| "loss": 1.3594, |
| "step": 23600 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 0.0023821636838930226, |
| "loss": 1.3639, |
| "step": 23700 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 0.0023922179770762115, |
| "loss": 1.3609, |
| "step": 23800 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 0.002402272270259401, |
| "loss": 1.3605, |
| "step": 23900 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 0.00241232656344259, |
| "loss": 1.3608, |
| "step": 24000 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 0.0024223808566257794, |
| "loss": 1.3646, |
| "step": 24100 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 0.0024324351498089683, |
| "loss": 1.3599, |
| "step": 24200 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 0.0024424894429921577, |
| "loss": 1.3586, |
| "step": 24300 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 0.002452543736175347, |
| "loss": 1.3608, |
| "step": 24400 |
| }, |
| { |
| "epoch": 4.93, |
| "learning_rate": 0.0024625980293585363, |
| "loss": 1.3615, |
| "step": 24500 |
| }, |
| { |
| "epoch": 4.95, |
| "learning_rate": 0.002472652322541725, |
| "loss": 1.3629, |
| "step": 24600 |
| }, |
| { |
| "epoch": 4.97, |
| "learning_rate": 0.0024827066157249145, |
| "loss": 1.3638, |
| "step": 24700 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 0.002492760908908104, |
| "loss": 1.3652, |
| "step": 24800 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.39963747273250233, |
| "eval_loss": 1.3215824365615845, |
| "eval_runtime": 19.5099, |
| "eval_samples_per_second": 4078.343, |
| "eval_steps_per_second": 15.941, |
| "step": 24866 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 0.002499999903431858, |
| "loss": 1.3594, |
| "step": 24900 |
| }, |
| { |
| "epoch": 5.03, |
| "learning_rate": 0.0024999979819234353, |
| "loss": 1.3548, |
| "step": 25000 |
| }, |
| { |
| "epoch": 5.05, |
| "learning_rate": 0.0024999935969463615, |
| "loss": 1.3572, |
| "step": 25100 |
| }, |
| { |
| "epoch": 5.07, |
| "learning_rate": 0.0024999867485092793, |
| "loss": 1.3579, |
| "step": 25200 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 0.002499977436625685, |
| "loss": 1.3553, |
| "step": 25300 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 0.0024999656613139305, |
| "loss": 1.3558, |
| "step": 25400 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 0.0024999514225972227, |
| "loss": 1.3556, |
| "step": 25500 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 0.002499934720503622, |
| "loss": 1.356, |
| "step": 25600 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 0.0024999155550660458, |
| "loss": 1.3537, |
| "step": 25700 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 0.002499893926322264, |
| "loss": 1.3582, |
| "step": 25800 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 0.0024998698343149024, |
| "loss": 1.3528, |
| "step": 25900 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 0.0024998432790914404, |
| "loss": 1.3531, |
| "step": 26000 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 0.0024998145630805626, |
| "loss": 1.3578, |
| "step": 26100 |
| }, |
| { |
| "epoch": 5.27, |
| "learning_rate": 0.002499783106217526, |
| "loss": 1.3548, |
| "step": 26200 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 0.0024997491863093103, |
| "loss": 1.3516, |
| "step": 26300 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 0.002499712803422766, |
| "loss": 1.3549, |
| "step": 26400 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 0.0024996739576295945, |
| "loss": 1.3539, |
| "step": 26500 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 0.0024996326490063525, |
| "loss": 1.356, |
| "step": 26600 |
| }, |
| { |
| "epoch": 5.37, |
| "learning_rate": 0.0024995888776344504, |
| "loss": 1.3513, |
| "step": 26700 |
| }, |
| { |
| "epoch": 5.39, |
| "learning_rate": 0.002499542643600152, |
| "loss": 1.354, |
| "step": 26800 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 0.0024994939469945737, |
| "loss": 1.3535, |
| "step": 26900 |
| }, |
| { |
| "epoch": 5.43, |
| "learning_rate": 0.0024994427879136854, |
| "loss": 1.3483, |
| "step": 27000 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 0.002499389166458312, |
| "loss": 1.354, |
| "step": 27100 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 0.0024993330827341276, |
| "loss": 1.3512, |
| "step": 27200 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 0.0024992745368516618, |
| "loss": 1.3492, |
| "step": 27300 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 0.0024992135289262953, |
| "loss": 1.3521, |
| "step": 27400 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 0.002499150059078261, |
| "loss": 1.3494, |
| "step": 27500 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 0.0024990841274326442, |
| "loss": 1.3497, |
| "step": 27600 |
| }, |
| { |
| "epoch": 5.57, |
| "learning_rate": 0.0024990157341193814, |
| "loss": 1.3494, |
| "step": 27700 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 0.0024989448792732604, |
| "loss": 1.3487, |
| "step": 27800 |
| }, |
| { |
| "epoch": 5.61, |
| "learning_rate": 0.0024988715630339213, |
| "loss": 1.3468, |
| "step": 27900 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 0.002498795785545853, |
| "loss": 1.3465, |
| "step": 28000 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 0.002498718341526211, |
| "loss": 1.3505, |
| "step": 28100 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 0.00249863766660223, |
| "loss": 1.3512, |
| "step": 28200 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 0.0024985545308904788, |
| "loss": 1.3497, |
| "step": 28300 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 0.0024984689345547983, |
| "loss": 1.3495, |
| "step": 28400 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 0.002498380877763881, |
| "loss": 1.3484, |
| "step": 28500 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 0.0024982903606912666, |
| "loss": 1.3465, |
| "step": 28600 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 0.0024981973835153442, |
| "loss": 1.3494, |
| "step": 28700 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 0.0024981019464193513, |
| "loss": 1.3448, |
| "step": 28800 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 0.002498004049591373, |
| "loss": 1.3447, |
| "step": 28900 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 0.002497903693224343, |
| "loss": 1.3489, |
| "step": 29000 |
| }, |
| { |
| "epoch": 5.85, |
| "learning_rate": 0.0024978008775160404, |
| "loss": 1.3491, |
| "step": 29100 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 0.002497695602669093, |
| "loss": 1.3511, |
| "step": 29200 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 0.002497587868890974, |
| "loss": 1.344, |
| "step": 29300 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 0.002497477676394002, |
| "loss": 1.3442, |
| "step": 29400 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 0.002497365025395343, |
| "loss": 1.3518, |
| "step": 29500 |
| }, |
| { |
| "epoch": 5.95, |
| "learning_rate": 0.0024972499161170065, |
| "loss": 1.3396, |
| "step": 29600 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 0.0024971323487858474, |
| "loss": 1.3427, |
| "step": 29700 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 0.002497012323633565, |
| "loss": 1.3434, |
| "step": 29800 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.4013078853657854, |
| "eval_loss": 1.3029085397720337, |
| "eval_runtime": 20.0428, |
| "eval_samples_per_second": 3969.911, |
| "eval_steps_per_second": 15.517, |
| "step": 29839 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 0.002496889840896702, |
| "loss": 1.3447, |
| "step": 29900 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 0.0024967649008166455, |
| "loss": 1.3366, |
| "step": 30000 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 0.0024966375036396234, |
| "loss": 1.3399, |
| "step": 30100 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 0.0024965076496167083, |
| "loss": 1.3375, |
| "step": 30200 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 0.00249637667426921, |
| "loss": 1.3349, |
| "step": 30300 |
| }, |
| { |
| "epoch": 6.11, |
| "learning_rate": 0.0024962419318890756, |
| "loss": 1.3401, |
| "step": 30400 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 0.0024961047334426316, |
| "loss": 1.339, |
| "step": 30500 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 0.0024959650792002663, |
| "loss": 1.3457, |
| "step": 30600 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 0.0024958229694372063, |
| "loss": 1.34, |
| "step": 30700 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 0.00249567840443352, |
| "loss": 1.3389, |
| "step": 30800 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 0.0024955313844741115, |
| "loss": 1.3385, |
| "step": 30900 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 0.002495381909848725, |
| "loss": 1.3409, |
| "step": 31000 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 0.002495229980851942, |
| "loss": 1.3394, |
| "step": 31100 |
| }, |
| { |
| "epoch": 6.27, |
| "learning_rate": 0.002495075597783181, |
| "loss": 1.3392, |
| "step": 31200 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 0.0024949187609466963, |
| "loss": 1.3401, |
| "step": 31300 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 0.002494759470651578, |
| "loss": 1.3375, |
| "step": 31400 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 0.0024945977272117534, |
| "loss": 1.342, |
| "step": 31500 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 0.002494433530945982, |
| "loss": 1.335, |
| "step": 31600 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 0.002494266882177858, |
| "loss": 1.3347, |
| "step": 31700 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 0.0024940977812358094, |
| "loss": 1.3392, |
| "step": 31800 |
| }, |
| { |
| "epoch": 6.41, |
| "learning_rate": 0.002493926228453097, |
| "loss": 1.3365, |
| "step": 31900 |
| }, |
| { |
| "epoch": 6.43, |
| "learning_rate": 0.0024937522241678133, |
| "loss": 1.3374, |
| "step": 32000 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 0.0024935757687228814, |
| "loss": 1.3372, |
| "step": 32100 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 0.002493396862466056, |
| "loss": 1.3375, |
| "step": 32200 |
| }, |
| { |
| "epoch": 6.49, |
| "learning_rate": 0.0024932173314456876, |
| "loss": 1.3388, |
| "step": 32300 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 0.002493033549126893, |
| "loss": 1.3394, |
| "step": 32400 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 0.002492847317064799, |
| "loss": 1.3353, |
| "step": 32500 |
| }, |
| { |
| "epoch": 6.56, |
| "learning_rate": 0.002492658635626429, |
| "loss": 1.334, |
| "step": 32600 |
| }, |
| { |
| "epoch": 6.58, |
| "learning_rate": 0.0024924675051836314, |
| "loss": 1.3385, |
| "step": 32700 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 0.0024922739261130818, |
| "loss": 1.3378, |
| "step": 32800 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 0.002492077898796282, |
| "loss": 1.3403, |
| "step": 32900 |
| }, |
| { |
| "epoch": 6.64, |
| "learning_rate": 0.002491879423619558, |
| "loss": 1.3408, |
| "step": 33000 |
| }, |
| { |
| "epoch": 6.66, |
| "learning_rate": 0.002491678500974061, |
| "loss": 1.3345, |
| "step": 33100 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 0.002491475131255764, |
| "loss": 1.3331, |
| "step": 33200 |
| }, |
| { |
| "epoch": 6.7, |
| "learning_rate": 0.002491269314865464, |
| "loss": 1.3382, |
| "step": 33300 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 0.00249106105220878, |
| "loss": 1.3372, |
| "step": 33400 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 0.0024908503436961503, |
| "loss": 1.3308, |
| "step": 33500 |
| }, |
| { |
| "epoch": 6.76, |
| "learning_rate": 0.002490637189742836, |
| "loss": 1.3346, |
| "step": 33600 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 0.002490421590768915, |
| "loss": 1.3356, |
| "step": 33700 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 0.0024902035471992857, |
| "loss": 1.3314, |
| "step": 33800 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 0.002489983059463664, |
| "loss": 1.3342, |
| "step": 33900 |
| }, |
| { |
| "epoch": 6.84, |
| "learning_rate": 0.002489760127996581, |
| "loss": 1.3349, |
| "step": 34000 |
| }, |
| { |
| "epoch": 6.86, |
| "learning_rate": 0.0024895347532373864, |
| "loss": 1.336, |
| "step": 34100 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 0.002489306935630243, |
| "loss": 1.3345, |
| "step": 34200 |
| }, |
| { |
| "epoch": 6.9, |
| "learning_rate": 0.0024890766756241293, |
| "loss": 1.3351, |
| "step": 34300 |
| }, |
| { |
| "epoch": 6.92, |
| "learning_rate": 0.0024888463127784766, |
| "loss": 1.3308, |
| "step": 34400 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 0.0024886111937531884, |
| "loss": 1.3369, |
| "step": 34500 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 0.0024883736337000827, |
| "loss": 1.3351, |
| "step": 34600 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 0.0024881336330873373, |
| "loss": 1.333, |
| "step": 34700 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 0.0024878911923879405, |
| "loss": 1.3314, |
| "step": 34800 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.4019466193393693, |
| "eval_loss": 1.2937954664230347, |
| "eval_runtime": 19.8606, |
| "eval_samples_per_second": 4006.315, |
| "eval_steps_per_second": 15.659, |
| "step": 34812 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 0.0024876463120796894, |
| "loss": 1.3277, |
| "step": 34900 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 0.002487398992645188, |
| "loss": 1.3304, |
| "step": 35000 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 0.002487149234571848, |
| "loss": 1.327, |
| "step": 35100 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 0.002486897038351888, |
| "loss": 1.3317, |
| "step": 35200 |
| }, |
| { |
| "epoch": 7.1, |
| "learning_rate": 0.0024866424044823303, |
| "loss": 1.3264, |
| "step": 35300 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 0.0024863853334650015, |
| "loss": 1.3287, |
| "step": 35400 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 0.002486125825806532, |
| "loss": 1.3266, |
| "step": 35500 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 0.002485863882018354, |
| "loss": 1.3311, |
| "step": 35600 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 0.0024855995026166994, |
| "loss": 1.3303, |
| "step": 35700 |
| }, |
| { |
| "epoch": 7.2, |
| "learning_rate": 0.0024853326881226026, |
| "loss": 1.328, |
| "step": 35800 |
| }, |
| { |
| "epoch": 7.22, |
| "learning_rate": 0.002485063439061895, |
| "loss": 1.3339, |
| "step": 35900 |
| }, |
| { |
| "epoch": 7.24, |
| "learning_rate": 0.0024847917559652067, |
| "loss": 1.3296, |
| "step": 36000 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 0.002484517639367966, |
| "loss": 1.3294, |
| "step": 36100 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 0.0024842410898103947, |
| "loss": 1.3311, |
| "step": 36200 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 0.0024839621078375103, |
| "loss": 1.3282, |
| "step": 36300 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 0.002483680693999126, |
| "loss": 1.3248, |
| "step": 36400 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 0.0024833996993344934, |
| "loss": 1.3279, |
| "step": 36500 |
| }, |
| { |
| "epoch": 7.36, |
| "learning_rate": 0.002483113447738441, |
| "loss": 1.3267, |
| "step": 36600 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 0.0024828247659494087, |
| "loss": 1.3265, |
| "step": 36700 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 0.0024825336545363243, |
| "loss": 1.3287, |
| "step": 36800 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 0.0024822401140729027, |
| "loss": 1.3274, |
| "step": 36900 |
| }, |
| { |
| "epoch": 7.44, |
| "learning_rate": 0.0024819441451376482, |
| "loss": 1.3317, |
| "step": 37000 |
| }, |
| { |
| "epoch": 7.46, |
| "learning_rate": 0.00248164574831385, |
| "loss": 1.3274, |
| "step": 37100 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 0.002481344924189581, |
| "loss": 1.3275, |
| "step": 37200 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 0.0024810416733576997, |
| "loss": 1.3314, |
| "step": 37300 |
| }, |
| { |
| "epoch": 7.52, |
| "learning_rate": 0.002480735996415845, |
| "loss": 1.3308, |
| "step": 37400 |
| }, |
| { |
| "epoch": 7.54, |
| "learning_rate": 0.00248042789396644, |
| "loss": 1.3268, |
| "step": 37500 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 0.002480117366616685, |
| "loss": 1.3238, |
| "step": 37600 |
| }, |
| { |
| "epoch": 7.58, |
| "learning_rate": 0.002479804414978561, |
| "loss": 1.3264, |
| "step": 37700 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 0.0024794890396688256, |
| "loss": 1.3302, |
| "step": 37800 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 0.002479171241309015, |
| "loss": 1.3292, |
| "step": 37900 |
| }, |
| { |
| "epoch": 7.64, |
| "learning_rate": 0.002478851020525438, |
| "loss": 1.3292, |
| "step": 38000 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 0.0024785283779491787, |
| "loss": 1.3286, |
| "step": 38100 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 0.0024782033142160946, |
| "loss": 1.3253, |
| "step": 38200 |
| }, |
| { |
| "epoch": 7.7, |
| "learning_rate": 0.002477875829966814, |
| "loss": 1.3284, |
| "step": 38300 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 0.0024775459258467355, |
| "loss": 1.3302, |
| "step": 38400 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 0.00247721693771243, |
| "loss": 1.3224, |
| "step": 38500 |
| }, |
| { |
| "epoch": 7.76, |
| "learning_rate": 0.0024768822199884265, |
| "loss": 1.3232, |
| "step": 38600 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 0.00247654508435181, |
| "loss": 1.3267, |
| "step": 38700 |
| }, |
| { |
| "epoch": 7.8, |
| "learning_rate": 0.002476205531467, |
| "loss": 1.3268, |
| "step": 38800 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 0.0024758635620031806, |
| "loss": 1.319, |
| "step": 38900 |
| }, |
| { |
| "epoch": 7.84, |
| "learning_rate": 0.0024755191766342964, |
| "loss": 1.3272, |
| "step": 39000 |
| }, |
| { |
| "epoch": 7.86, |
| "learning_rate": 0.0024751723760390552, |
| "loss": 1.3269, |
| "step": 39100 |
| }, |
| { |
| "epoch": 7.88, |
| "learning_rate": 0.0024748231609009247, |
| "loss": 1.3272, |
| "step": 39200 |
| }, |
| { |
| "epoch": 7.9, |
| "learning_rate": 0.0024744715319081293, |
| "loss": 1.3293, |
| "step": 39300 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 0.002474117489753652, |
| "loss": 1.3235, |
| "step": 39400 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 0.002473761035135232, |
| "loss": 1.3279, |
| "step": 39500 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 0.0024734021687553617, |
| "loss": 1.3226, |
| "step": 39600 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 0.002473040891321287, |
| "loss": 1.3244, |
| "step": 39700 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.4026586923819002, |
| "eval_loss": 1.2859399318695068, |
| "eval_runtime": 19.8804, |
| "eval_samples_per_second": 4002.332, |
| "eval_steps_per_second": 15.644, |
| "step": 39786 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 0.002472677203545006, |
| "loss": 1.3265, |
| "step": 39800 |
| }, |
| { |
| "epoch": 8.02, |
| "learning_rate": 0.002472311106143266, |
| "loss": 1.3222, |
| "step": 39900 |
| }, |
| { |
| "epoch": 8.04, |
| "learning_rate": 0.0024719425998375646, |
| "loss": 1.3229, |
| "step": 40000 |
| }, |
| { |
| "epoch": 8.06, |
| "learning_rate": 0.002471571685354145, |
| "loss": 1.3203, |
| "step": 40100 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 0.002471198363423998, |
| "loss": 1.324, |
| "step": 40200 |
| }, |
| { |
| "epoch": 8.1, |
| "learning_rate": 0.002470822634782858, |
| "loss": 1.3205, |
| "step": 40300 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 0.0024704445001712027, |
| "loss": 1.3261, |
| "step": 40400 |
| }, |
| { |
| "epoch": 8.14, |
| "learning_rate": 0.002470067777636028, |
| "loss": 1.3198, |
| "step": 40500 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 0.0024696848573647666, |
| "loss": 1.319, |
| "step": 40600 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 0.0024692995333652967, |
| "loss": 1.3251, |
| "step": 40700 |
| }, |
| { |
| "epoch": 8.2, |
| "learning_rate": 0.0024689118063970067, |
| "loss": 1.3224, |
| "step": 40800 |
| }, |
| { |
| "epoch": 8.22, |
| "learning_rate": 0.0024685216772240203, |
| "loss": 1.3196, |
| "step": 40900 |
| }, |
| { |
| "epoch": 8.24, |
| "learning_rate": 0.0024681291466151956, |
| "loss": 1.3172, |
| "step": 41000 |
| }, |
| { |
| "epoch": 8.26, |
| "learning_rate": 0.0024677342153441232, |
| "loss": 1.3211, |
| "step": 41100 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 0.002467336884189125, |
| "loss": 1.3166, |
| "step": 41200 |
| }, |
| { |
| "epoch": 8.3, |
| "learning_rate": 0.0024669371539332526, |
| "loss": 1.3225, |
| "step": 41300 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 0.0024665350253642855, |
| "loss": 1.3187, |
| "step": 41400 |
| }, |
| { |
| "epoch": 8.34, |
| "learning_rate": 0.00246613049927473, |
| "loss": 1.3188, |
| "step": 41500 |
| }, |
| { |
| "epoch": 8.36, |
| "learning_rate": 0.002465723576461818, |
| "loss": 1.3194, |
| "step": 41600 |
| }, |
| { |
| "epoch": 8.38, |
| "learning_rate": 0.0024653142577275022, |
| "loss": 1.322, |
| "step": 41700 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 0.002464902543878461, |
| "loss": 1.3192, |
| "step": 41800 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 0.00246448843572609, |
| "loss": 1.3229, |
| "step": 41900 |
| }, |
| { |
| "epoch": 8.45, |
| "learning_rate": 0.0024640719340865043, |
| "loss": 1.3235, |
| "step": 42000 |
| }, |
| { |
| "epoch": 8.47, |
| "learning_rate": 0.0024636530397805366, |
| "loss": 1.3213, |
| "step": 42100 |
| }, |
| { |
| "epoch": 8.49, |
| "learning_rate": 0.002463231753633735, |
| "loss": 1.3187, |
| "step": 42200 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 0.0024628080764763603, |
| "loss": 1.3226, |
| "step": 42300 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 0.002462382009143387, |
| "loss": 1.3192, |
| "step": 42400 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 0.002461953552474498, |
| "loss": 1.3222, |
| "step": 42500 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 0.0024615270275859403, |
| "loss": 1.3172, |
| "step": 42600 |
| }, |
| { |
| "epoch": 8.59, |
| "learning_rate": 0.002461093818655314, |
| "loss": 1.3191, |
| "step": 42700 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 0.0024606582229275095, |
| "loss": 1.3203, |
| "step": 42800 |
| }, |
| { |
| "epoch": 8.63, |
| "learning_rate": 0.0024602202412609907, |
| "loss": 1.3171, |
| "step": 42900 |
| }, |
| { |
| "epoch": 8.65, |
| "learning_rate": 0.0024597798745189217, |
| "loss": 1.3239, |
| "step": 43000 |
| }, |
| { |
| "epoch": 8.67, |
| "learning_rate": 0.0024593371235691673, |
| "loss": 1.3189, |
| "step": 43100 |
| }, |
| { |
| "epoch": 8.69, |
| "learning_rate": 0.0024588919892842924, |
| "loss": 1.3168, |
| "step": 43200 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 0.002458444472541557, |
| "loss": 1.3214, |
| "step": 43300 |
| }, |
| { |
| "epoch": 8.73, |
| "learning_rate": 0.0024579945742229177, |
| "loss": 1.321, |
| "step": 43400 |
| }, |
| { |
| "epoch": 8.75, |
| "learning_rate": 0.0024575422952150235, |
| "loss": 1.3202, |
| "step": 43500 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 0.002457087636409217, |
| "loss": 1.3188, |
| "step": 43600 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 0.0024566305987015298, |
| "loss": 1.3166, |
| "step": 43700 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 0.002456171182992681, |
| "loss": 1.3244, |
| "step": 43800 |
| }, |
| { |
| "epoch": 8.83, |
| "learning_rate": 0.002455709390188077, |
| "loss": 1.3186, |
| "step": 43900 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 0.00245524522119781, |
| "loss": 1.3191, |
| "step": 44000 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 0.002454778676936653, |
| "loss": 1.3218, |
| "step": 44100 |
| }, |
| { |
| "epoch": 8.89, |
| "learning_rate": 0.0024543097583240615, |
| "loss": 1.3217, |
| "step": 44200 |
| }, |
| { |
| "epoch": 8.91, |
| "learning_rate": 0.0024538384662841704, |
| "loss": 1.3169, |
| "step": 44300 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 0.0024533648017457917, |
| "loss": 1.3201, |
| "step": 44400 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 0.002452888765642413, |
| "loss": 1.3189, |
| "step": 44500 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 0.0024524103589121955, |
| "loss": 1.3206, |
| "step": 44600 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 0.002451934401988944, |
| "loss": 1.3163, |
| "step": 44700 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.4035565789440192, |
| "eval_loss": 1.2766544818878174, |
| "eval_runtime": 19.7806, |
| "eval_samples_per_second": 4022.534, |
| "eval_steps_per_second": 15.723, |
| "step": 44759 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 0.0024514512805208794, |
| "loss": 1.3158, |
| "step": 44800 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 0.0024509657912589406, |
| "loss": 1.3109, |
| "step": 44900 |
| }, |
| { |
| "epoch": 9.05, |
| "learning_rate": 0.0024504779351599195, |
| "loss": 1.312, |
| "step": 45000 |
| }, |
| { |
| "epoch": 9.07, |
| "learning_rate": 0.002449987713185271, |
| "loss": 1.3134, |
| "step": 45100 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 0.002449495126301115, |
| "loss": 1.3156, |
| "step": 45200 |
| }, |
| { |
| "epoch": 9.11, |
| "learning_rate": 0.002449000175478231, |
| "loss": 1.3188, |
| "step": 45300 |
| }, |
| { |
| "epoch": 9.13, |
| "learning_rate": 0.0024485028616920562, |
| "loss": 1.3159, |
| "step": 45400 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 0.0024480031859226863, |
| "loss": 1.3154, |
| "step": 45500 |
| }, |
| { |
| "epoch": 9.17, |
| "learning_rate": 0.0024475011491548715, |
| "loss": 1.3151, |
| "step": 45600 |
| }, |
| { |
| "epoch": 9.19, |
| "learning_rate": 0.002446996752378015, |
| "loss": 1.3152, |
| "step": 45700 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 0.0024464899965861704, |
| "loss": 1.3159, |
| "step": 45800 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 0.002445980882778041, |
| "loss": 1.3189, |
| "step": 45900 |
| }, |
| { |
| "epoch": 9.25, |
| "learning_rate": 0.0024454694119569777, |
| "loss": 1.3132, |
| "step": 46000 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 0.0024449555851309753, |
| "loss": 1.3136, |
| "step": 46100 |
| }, |
| { |
| "epoch": 9.29, |
| "learning_rate": 0.0024444394033126733, |
| "loss": 1.3154, |
| "step": 46200 |
| }, |
| { |
| "epoch": 9.31, |
| "learning_rate": 0.0024439208675193502, |
| "loss": 1.3148, |
| "step": 46300 |
| }, |
| { |
| "epoch": 9.33, |
| "learning_rate": 0.0024433999787729266, |
| "loss": 1.3119, |
| "step": 46400 |
| }, |
| { |
| "epoch": 9.35, |
| "learning_rate": 0.002442876738099957, |
| "loss": 1.3143, |
| "step": 46500 |
| }, |
| { |
| "epoch": 9.37, |
| "learning_rate": 0.002442351146531634, |
| "loss": 1.316, |
| "step": 46600 |
| }, |
| { |
| "epoch": 9.39, |
| "learning_rate": 0.0024418284961464525, |
| "loss": 1.3157, |
| "step": 46700 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 0.002441298229382551, |
| "loss": 1.3142, |
| "step": 46800 |
| }, |
| { |
| "epoch": 9.43, |
| "learning_rate": 0.002440765614834186, |
| "loss": 1.3125, |
| "step": 46900 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 0.0024402306535510226, |
| "loss": 1.3187, |
| "step": 47000 |
| }, |
| { |
| "epoch": 9.47, |
| "learning_rate": 0.0024396933465873506, |
| "loss": 1.316, |
| "step": 47100 |
| }, |
| { |
| "epoch": 9.49, |
| "learning_rate": 0.002439153695002083, |
| "loss": 1.3129, |
| "step": 47200 |
| }, |
| { |
| "epoch": 9.51, |
| "learning_rate": 0.0024386116998587537, |
| "loss": 1.3164, |
| "step": 47300 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 0.002438067362225514, |
| "loss": 1.3156, |
| "step": 47400 |
| }, |
| { |
| "epoch": 9.55, |
| "learning_rate": 0.0024375206831751335, |
| "loss": 1.3151, |
| "step": 47500 |
| }, |
| { |
| "epoch": 9.57, |
| "learning_rate": 0.0024369716637849944, |
| "loss": 1.3131, |
| "step": 47600 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 0.0024364203051370926, |
| "loss": 1.3154, |
| "step": 47700 |
| }, |
| { |
| "epoch": 9.61, |
| "learning_rate": 0.0024358666083180336, |
| "loss": 1.3141, |
| "step": 47800 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 0.0024353105744190314, |
| "loss": 1.3131, |
| "step": 47900 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 0.0024347522045359044, |
| "loss": 1.3147, |
| "step": 48000 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 0.0024341914997690764, |
| "loss": 1.3159, |
| "step": 48100 |
| }, |
| { |
| "epoch": 9.69, |
| "learning_rate": 0.002433628461223572, |
| "loss": 1.3135, |
| "step": 48200 |
| }, |
| { |
| "epoch": 9.71, |
| "learning_rate": 0.0024330630900090155, |
| "loss": 1.3133, |
| "step": 48300 |
| }, |
| { |
| "epoch": 9.73, |
| "learning_rate": 0.0024324953872396277, |
| "loss": 1.3126, |
| "step": 48400 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 0.0024319253540342253, |
| "loss": 1.3144, |
| "step": 48500 |
| }, |
| { |
| "epoch": 9.77, |
| "learning_rate": 0.0024313529915162163, |
| "loss": 1.315, |
| "step": 48600 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 0.002430784059241426, |
| "loss": 1.315, |
| "step": 48700 |
| }, |
| { |
| "epoch": 9.81, |
| "learning_rate": 0.0024302070647516908, |
| "loss": 1.3129, |
| "step": 48800 |
| }, |
| { |
| "epoch": 9.83, |
| "learning_rate": 0.002429627744335717, |
| "loss": 1.3163, |
| "step": 48900 |
| }, |
| { |
| "epoch": 9.85, |
| "learning_rate": 0.0024290460991352155, |
| "loss": 1.3101, |
| "step": 49000 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 0.0024284621302964804, |
| "loss": 1.3152, |
| "step": 49100 |
| }, |
| { |
| "epoch": 9.89, |
| "learning_rate": 0.0024278758389703853, |
| "loss": 1.3163, |
| "step": 49200 |
| }, |
| { |
| "epoch": 9.91, |
| "learning_rate": 0.002427287226312379, |
| "loss": 1.3089, |
| "step": 49300 |
| }, |
| { |
| "epoch": 9.93, |
| "learning_rate": 0.002426696293482488, |
| "loss": 1.3141, |
| "step": 49400 |
| }, |
| { |
| "epoch": 9.95, |
| "learning_rate": 0.002426103041645309, |
| "loss": 1.3113, |
| "step": 49500 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 0.0024255074719700094, |
| "loss": 1.3098, |
| "step": 49600 |
| }, |
| { |
| "epoch": 9.99, |
| "learning_rate": 0.0024249095856303253, |
| "loss": 1.3112, |
| "step": 49700 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.4034808535713104, |
| "eval_loss": 1.2766634225845337, |
| "eval_runtime": 19.8931, |
| "eval_samples_per_second": 3999.773, |
| "eval_steps_per_second": 15.634, |
| "step": 49732 |
| }, |
| { |
| "epoch": 10.01, |
| "learning_rate": 0.0024243093838045584, |
| "loss": 1.3131, |
| "step": 49800 |
| }, |
| { |
| "epoch": 10.03, |
| "learning_rate": 0.002423706867675572, |
| "loss": 1.3102, |
| "step": 49900 |
| }, |
| { |
| "epoch": 10.05, |
| "learning_rate": 0.002423102038430793, |
| "loss": 1.3043, |
| "step": 50000 |
| }, |
| { |
| "epoch": 10.07, |
| "learning_rate": 0.0024224948972622054, |
| "loss": 1.3073, |
| "step": 50100 |
| }, |
| { |
| "epoch": 10.09, |
| "learning_rate": 0.002421885445366349, |
| "loss": 1.3097, |
| "step": 50200 |
| }, |
| { |
| "epoch": 10.11, |
| "learning_rate": 0.002421273683944319, |
| "loss": 1.3065, |
| "step": 50300 |
| }, |
| { |
| "epoch": 10.13, |
| "learning_rate": 0.002420659614201761, |
| "loss": 1.3096, |
| "step": 50400 |
| }, |
| { |
| "epoch": 10.15, |
| "learning_rate": 0.002420043237348871, |
| "loss": 1.3118, |
| "step": 50500 |
| }, |
| { |
| "epoch": 10.17, |
| "learning_rate": 0.0024194245546003894, |
| "loss": 1.3081, |
| "step": 50600 |
| }, |
| { |
| "epoch": 10.19, |
| "learning_rate": 0.002418803567175604, |
| "loss": 1.3087, |
| "step": 50700 |
| }, |
| { |
| "epoch": 10.21, |
| "learning_rate": 0.002418186520605176, |
| "loss": 1.3084, |
| "step": 50800 |
| }, |
| { |
| "epoch": 10.23, |
| "learning_rate": 0.0024175609505199523, |
| "loss": 1.3071, |
| "step": 50900 |
| }, |
| { |
| "epoch": 10.25, |
| "learning_rate": 0.002416933079431175, |
| "loss": 1.3071, |
| "step": 51000 |
| }, |
| { |
| "epoch": 10.27, |
| "learning_rate": 0.0024163029085762376, |
| "loss": 1.3126, |
| "step": 51100 |
| }, |
| { |
| "epoch": 10.3, |
| "learning_rate": 0.0024156704391970684, |
| "loss": 1.3117, |
| "step": 51200 |
| }, |
| { |
| "epoch": 10.32, |
| "learning_rate": 0.0024150356725401233, |
| "loss": 1.3088, |
| "step": 51300 |
| }, |
| { |
| "epoch": 10.34, |
| "learning_rate": 0.002414404991844438, |
| "loss": 1.3101, |
| "step": 51400 |
| }, |
| { |
| "epoch": 10.36, |
| "learning_rate": 0.0024137656573309015, |
| "loss": 1.3126, |
| "step": 51500 |
| }, |
| { |
| "epoch": 10.38, |
| "learning_rate": 0.002413124029293492, |
| "loss": 1.3098, |
| "step": 51600 |
| }, |
| { |
| "epoch": 10.4, |
| "learning_rate": 0.0024124801089967156, |
| "loss": 1.3148, |
| "step": 51700 |
| }, |
| { |
| "epoch": 10.42, |
| "learning_rate": 0.0024118338977095963, |
| "loss": 1.3099, |
| "step": 51800 |
| }, |
| { |
| "epoch": 10.44, |
| "learning_rate": 0.0024111853967056732, |
| "loss": 1.3077, |
| "step": 51900 |
| }, |
| { |
| "epoch": 10.46, |
| "learning_rate": 0.002410534607262998, |
| "loss": 1.3099, |
| "step": 52000 |
| }, |
| { |
| "epoch": 10.48, |
| "learning_rate": 0.0024098815306641316, |
| "loss": 1.3082, |
| "step": 52100 |
| }, |
| { |
| "epoch": 10.5, |
| "learning_rate": 0.002409226168196144, |
| "loss": 1.3068, |
| "step": 52200 |
| }, |
| { |
| "epoch": 10.52, |
| "learning_rate": 0.0024085685211506086, |
| "loss": 1.3094, |
| "step": 52300 |
| }, |
| { |
| "epoch": 10.54, |
| "learning_rate": 0.002407908590823602, |
| "loss": 1.3076, |
| "step": 52400 |
| }, |
| { |
| "epoch": 10.56, |
| "learning_rate": 0.0024072463785157006, |
| "loss": 1.3075, |
| "step": 52500 |
| }, |
| { |
| "epoch": 10.58, |
| "learning_rate": 0.0024065818855319773, |
| "loss": 1.3118, |
| "step": 52600 |
| }, |
| { |
| "epoch": 10.6, |
| "learning_rate": 0.0024059151131820013, |
| "loss": 1.3101, |
| "step": 52700 |
| }, |
| { |
| "epoch": 10.62, |
| "learning_rate": 0.002405246062779832, |
| "loss": 1.3058, |
| "step": 52800 |
| }, |
| { |
| "epoch": 10.64, |
| "learning_rate": 0.0024045747356440197, |
| "loss": 1.3079, |
| "step": 52900 |
| }, |
| { |
| "epoch": 10.66, |
| "learning_rate": 0.0024039011330976015, |
| "loss": 1.309, |
| "step": 53000 |
| }, |
| { |
| "epoch": 10.68, |
| "learning_rate": 0.0024032252564680978, |
| "loss": 1.3076, |
| "step": 53100 |
| }, |
| { |
| "epoch": 10.7, |
| "learning_rate": 0.0024025471070875117, |
| "loss": 1.3102, |
| "step": 53200 |
| }, |
| { |
| "epoch": 10.72, |
| "learning_rate": 0.002401866686292326, |
| "loss": 1.3088, |
| "step": 53300 |
| }, |
| { |
| "epoch": 10.74, |
| "learning_rate": 0.0024011839954234983, |
| "loss": 1.3089, |
| "step": 53400 |
| }, |
| { |
| "epoch": 10.76, |
| "learning_rate": 0.0024004990358264607, |
| "loss": 1.3063, |
| "step": 53500 |
| }, |
| { |
| "epoch": 10.78, |
| "learning_rate": 0.0023998118088511167, |
| "loss": 1.3066, |
| "step": 53600 |
| }, |
| { |
| "epoch": 10.8, |
| "learning_rate": 0.0023991223158518387, |
| "loss": 1.3071, |
| "step": 53700 |
| }, |
| { |
| "epoch": 10.82, |
| "learning_rate": 0.0023984305581874637, |
| "loss": 1.3097, |
| "step": 53800 |
| }, |
| { |
| "epoch": 10.84, |
| "learning_rate": 0.002397736537221293, |
| "loss": 1.3097, |
| "step": 53900 |
| }, |
| { |
| "epoch": 10.86, |
| "learning_rate": 0.002397040254321088, |
| "loss": 1.3088, |
| "step": 54000 |
| }, |
| { |
| "epoch": 10.88, |
| "learning_rate": 0.0023963417108590675, |
| "loss": 1.3071, |
| "step": 54100 |
| }, |
| { |
| "epoch": 10.9, |
| "learning_rate": 0.0023956409082119055, |
| "loss": 1.3055, |
| "step": 54200 |
| }, |
| { |
| "epoch": 10.92, |
| "learning_rate": 0.002394937847760729, |
| "loss": 1.3059, |
| "step": 54300 |
| }, |
| { |
| "epoch": 10.94, |
| "learning_rate": 0.002394232530891114, |
| "loss": 1.3016, |
| "step": 54400 |
| }, |
| { |
| "epoch": 10.96, |
| "learning_rate": 0.0023935249589930835, |
| "loss": 1.308, |
| "step": 54500 |
| }, |
| { |
| "epoch": 10.98, |
| "learning_rate": 0.0023928151334611045, |
| "loss": 1.3063, |
| "step": 54600 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 0.002392103055694086, |
| "loss": 1.3058, |
| "step": 54700 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.4042048581326505, |
| "eval_loss": 1.2673618793487549, |
| "eval_runtime": 19.7081, |
| "eval_samples_per_second": 4037.327, |
| "eval_steps_per_second": 15.78, |
| "step": 54705 |
| }, |
| { |
| "epoch": 11.02, |
| "learning_rate": 0.0023913887270953743, |
| "loss": 1.2989, |
| "step": 54800 |
| }, |
| { |
| "epoch": 11.04, |
| "learning_rate": 0.002390672149072754, |
| "loss": 1.302, |
| "step": 54900 |
| }, |
| { |
| "epoch": 11.06, |
| "learning_rate": 0.0023899605224217933, |
| "loss": 1.2996, |
| "step": 55000 |
| }, |
| { |
| "epoch": 11.08, |
| "learning_rate": 0.002389239472251357, |
| "loss": 1.2986, |
| "step": 55100 |
| }, |
| { |
| "epoch": 11.1, |
| "learning_rate": 0.0023885161768927156, |
| "loss": 1.3003, |
| "step": 55200 |
| }, |
| { |
| "epoch": 11.12, |
| "learning_rate": 0.002387790637771324, |
| "loss": 1.3006, |
| "step": 55300 |
| }, |
| { |
| "epoch": 11.14, |
| "learning_rate": 0.0023870628563170586, |
| "loss": 1.3063, |
| "step": 55400 |
| }, |
| { |
| "epoch": 11.16, |
| "learning_rate": 0.0023863328339642155, |
| "loss": 1.3082, |
| "step": 55500 |
| }, |
| { |
| "epoch": 11.18, |
| "learning_rate": 0.002385600572151506, |
| "loss": 1.3049, |
| "step": 55600 |
| }, |
| { |
| "epoch": 11.2, |
| "learning_rate": 0.002384866072322057, |
| "loss": 1.3041, |
| "step": 55700 |
| }, |
| { |
| "epoch": 11.22, |
| "learning_rate": 0.0023841293359234033, |
| "loss": 1.305, |
| "step": 55800 |
| }, |
| { |
| "epoch": 11.24, |
| "learning_rate": 0.00238339036440749, |
| "loss": 1.3048, |
| "step": 55900 |
| }, |
| { |
| "epoch": 11.26, |
| "learning_rate": 0.002382649159230665, |
| "loss": 1.3019, |
| "step": 56000 |
| }, |
| { |
| "epoch": 11.28, |
| "learning_rate": 0.00238190572185368, |
| "loss": 1.3049, |
| "step": 56100 |
| }, |
| { |
| "epoch": 11.3, |
| "learning_rate": 0.002381160053741684, |
| "loss": 1.306, |
| "step": 56200 |
| }, |
| { |
| "epoch": 11.32, |
| "learning_rate": 0.0023804121563642247, |
| "loss": 1.3058, |
| "step": 56300 |
| }, |
| { |
| "epoch": 11.34, |
| "learning_rate": 0.0023796620311952415, |
| "loss": 1.305, |
| "step": 56400 |
| }, |
| { |
| "epoch": 11.36, |
| "learning_rate": 0.0023789096797130643, |
| "loss": 1.3037, |
| "step": 56500 |
| }, |
| { |
| "epoch": 11.38, |
| "learning_rate": 0.002378155103400411, |
| "loss": 1.3064, |
| "step": 56600 |
| }, |
| { |
| "epoch": 11.4, |
| "learning_rate": 0.002377398303744384, |
| "loss": 1.3038, |
| "step": 56700 |
| }, |
| { |
| "epoch": 11.42, |
| "learning_rate": 0.002376639282236468, |
| "loss": 1.3057, |
| "step": 56800 |
| }, |
| { |
| "epoch": 11.44, |
| "learning_rate": 0.0023758780403725257, |
| "loss": 1.3044, |
| "step": 56900 |
| }, |
| { |
| "epoch": 11.46, |
| "learning_rate": 0.0023751145796527956, |
| "loss": 1.3057, |
| "step": 57000 |
| }, |
| { |
| "epoch": 11.48, |
| "learning_rate": 0.00237434890158189, |
| "loss": 1.3034, |
| "step": 57100 |
| }, |
| { |
| "epoch": 11.5, |
| "learning_rate": 0.0023735810076687893, |
| "loss": 1.3055, |
| "step": 57200 |
| }, |
| { |
| "epoch": 11.52, |
| "learning_rate": 0.0023728108994268433, |
| "loss": 1.3021, |
| "step": 57300 |
| }, |
| { |
| "epoch": 11.54, |
| "learning_rate": 0.0023720385783737637, |
| "loss": 1.3051, |
| "step": 57400 |
| }, |
| { |
| "epoch": 11.56, |
| "learning_rate": 0.0023712640460316244, |
| "loss": 1.3066, |
| "step": 57500 |
| }, |
| { |
| "epoch": 11.58, |
| "learning_rate": 0.0023704873039268565, |
| "loss": 1.306, |
| "step": 57600 |
| }, |
| { |
| "epoch": 11.6, |
| "learning_rate": 0.0023697083535902464, |
| "loss": 1.2988, |
| "step": 57700 |
| }, |
| { |
| "epoch": 11.62, |
| "learning_rate": 0.0023689271965569325, |
| "loss": 1.3044, |
| "step": 57800 |
| }, |
| { |
| "epoch": 11.64, |
| "learning_rate": 0.0023681438343664016, |
| "loss": 1.3054, |
| "step": 57900 |
| }, |
| { |
| "epoch": 11.66, |
| "learning_rate": 0.002367358268562487, |
| "loss": 1.3035, |
| "step": 58000 |
| }, |
| { |
| "epoch": 11.68, |
| "learning_rate": 0.0023665705006933645, |
| "loss": 1.2999, |
| "step": 58100 |
| }, |
| { |
| "epoch": 11.7, |
| "learning_rate": 0.00236578053231155, |
| "loss": 1.3027, |
| "step": 58200 |
| }, |
| { |
| "epoch": 11.72, |
| "learning_rate": 0.0023649883649738954, |
| "loss": 1.3035, |
| "step": 58300 |
| }, |
| { |
| "epoch": 11.74, |
| "learning_rate": 0.0023641940002415876, |
| "loss": 1.3023, |
| "step": 58400 |
| }, |
| { |
| "epoch": 11.76, |
| "learning_rate": 0.002363397439680142, |
| "loss": 1.2999, |
| "step": 58500 |
| }, |
| { |
| "epoch": 11.78, |
| "learning_rate": 0.002362598684859403, |
| "loss": 1.2994, |
| "step": 58600 |
| }, |
| { |
| "epoch": 11.8, |
| "learning_rate": 0.0023617977373535403, |
| "loss": 1.3024, |
| "step": 58700 |
| }, |
| { |
| "epoch": 11.82, |
| "learning_rate": 0.0023609945987410432, |
| "loss": 1.302, |
| "step": 58800 |
| }, |
| { |
| "epoch": 11.84, |
| "learning_rate": 0.0023601892706047187, |
| "loss": 1.3044, |
| "step": 58900 |
| }, |
| { |
| "epoch": 11.86, |
| "learning_rate": 0.002359381754531691, |
| "loss": 1.3026, |
| "step": 59000 |
| }, |
| { |
| "epoch": 11.88, |
| "learning_rate": 0.0023585801599547503, |
| "loss": 1.3061, |
| "step": 59100 |
| }, |
| { |
| "epoch": 11.9, |
| "learning_rate": 0.002357768294626511, |
| "loss": 1.2997, |
| "step": 59200 |
| }, |
| { |
| "epoch": 11.92, |
| "learning_rate": 0.0023569542461327744, |
| "loss": 1.2999, |
| "step": 59300 |
| }, |
| { |
| "epoch": 11.94, |
| "learning_rate": 0.0023561380160778494, |
| "loss": 1.3, |
| "step": 59400 |
| }, |
| { |
| "epoch": 11.96, |
| "learning_rate": 0.0023553196060703448, |
| "loss": 1.3009, |
| "step": 59500 |
| }, |
| { |
| "epoch": 11.98, |
| "learning_rate": 0.0023544990177231644, |
| "loss": 1.3043, |
| "step": 59600 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.40439273978216544, |
| "eval_loss": 1.2642629146575928, |
| "eval_runtime": 19.7374, |
| "eval_samples_per_second": 4031.338, |
| "eval_steps_per_second": 15.757, |
| "step": 59679 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 0.0023536762526535065, |
| "loss": 1.3005, |
| "step": 59700 |
| }, |
| { |
| "epoch": 12.02, |
| "learning_rate": 0.002352851312482858, |
| "loss": 1.2972, |
| "step": 59800 |
| }, |
| { |
| "epoch": 12.04, |
| "learning_rate": 0.0023520241988369927, |
| "loss": 1.2979, |
| "step": 59900 |
| }, |
| { |
| "epoch": 12.06, |
| "learning_rate": 0.0023511949133459688, |
| "loss": 1.2933, |
| "step": 60000 |
| }, |
| { |
| "epoch": 12.08, |
| "learning_rate": 0.0023503634576441234, |
| "loss": 1.2949, |
| "step": 60100 |
| }, |
| { |
| "epoch": 12.1, |
| "learning_rate": 0.0023495298333700712, |
| "loss": 1.3001, |
| "step": 60200 |
| }, |
| { |
| "epoch": 12.12, |
| "learning_rate": 0.0023486940421667012, |
| "loss": 1.2958, |
| "step": 60300 |
| }, |
| { |
| "epoch": 12.14, |
| "learning_rate": 0.0023478560856811715, |
| "loss": 1.2996, |
| "step": 60400 |
| }, |
| { |
| "epoch": 12.17, |
| "learning_rate": 0.0023470159655649093, |
| "loss": 1.2979, |
| "step": 60500 |
| }, |
| { |
| "epoch": 12.19, |
| "learning_rate": 0.0023461736834736045, |
| "loss": 1.2997, |
| "step": 60600 |
| }, |
| { |
| "epoch": 12.21, |
| "learning_rate": 0.0023453292410672085, |
| "loss": 1.3016, |
| "step": 60700 |
| }, |
| { |
| "epoch": 12.23, |
| "learning_rate": 0.002344482640009929, |
| "loss": 1.299, |
| "step": 60800 |
| }, |
| { |
| "epoch": 12.25, |
| "learning_rate": 0.00234363388197023, |
| "loss": 1.3009, |
| "step": 60900 |
| }, |
| { |
| "epoch": 12.27, |
| "learning_rate": 0.0023427829686208245, |
| "loss": 1.3003, |
| "step": 61000 |
| }, |
| { |
| "epoch": 12.29, |
| "learning_rate": 0.002341938442963461, |
| "loss": 1.303, |
| "step": 61100 |
| }, |
| { |
| "epoch": 12.31, |
| "learning_rate": 0.0023410832455409507, |
| "loss": 1.2995, |
| "step": 61200 |
| }, |
| { |
| "epoch": 12.33, |
| "learning_rate": 0.002340225897835473, |
| "loss": 1.2984, |
| "step": 61300 |
| }, |
| { |
| "epoch": 12.35, |
| "learning_rate": 0.0023393664015366696, |
| "loss": 1.3007, |
| "step": 61400 |
| }, |
| { |
| "epoch": 12.37, |
| "learning_rate": 0.0023385047583384167, |
| "loss": 1.3039, |
| "step": 61500 |
| }, |
| { |
| "epoch": 12.39, |
| "learning_rate": 0.0023376582667187836, |
| "loss": 1.2987, |
| "step": 61600 |
| }, |
| { |
| "epoch": 12.41, |
| "learning_rate": 0.0023367923776734523, |
| "loss": 1.295, |
| "step": 61700 |
| }, |
| { |
| "epoch": 12.43, |
| "learning_rate": 0.0023359243468015017, |
| "loss": 1.3033, |
| "step": 61800 |
| }, |
| { |
| "epoch": 12.45, |
| "learning_rate": 0.002335054175813628, |
| "loss": 1.3007, |
| "step": 61900 |
| }, |
| { |
| "epoch": 12.47, |
| "learning_rate": 0.002334181866424745, |
| "loss": 1.3005, |
| "step": 62000 |
| }, |
| { |
| "epoch": 12.49, |
| "learning_rate": 0.002333307420353981, |
| "loss": 1.3006, |
| "step": 62100 |
| }, |
| { |
| "epoch": 12.51, |
| "learning_rate": 0.0023324308393246752, |
| "loss": 1.2948, |
| "step": 62200 |
| }, |
| { |
| "epoch": 12.53, |
| "learning_rate": 0.002331552125064374, |
| "loss": 1.2972, |
| "step": 62300 |
| }, |
| { |
| "epoch": 12.55, |
| "learning_rate": 0.0023306712793048287, |
| "loss": 1.3009, |
| "step": 62400 |
| }, |
| { |
| "epoch": 12.57, |
| "learning_rate": 0.0023297883037819906, |
| "loss": 1.2992, |
| "step": 62500 |
| }, |
| { |
| "epoch": 12.59, |
| "learning_rate": 0.002328903200236008, |
| "loss": 1.3011, |
| "step": 62600 |
| }, |
| { |
| "epoch": 12.61, |
| "learning_rate": 0.0023280159704112255, |
| "loss": 1.3016, |
| "step": 62700 |
| }, |
| { |
| "epoch": 12.63, |
| "learning_rate": 0.0023271266160561736, |
| "loss": 1.2949, |
| "step": 62800 |
| }, |
| { |
| "epoch": 12.65, |
| "learning_rate": 0.0023262351389235743, |
| "loss": 1.2994, |
| "step": 62900 |
| }, |
| { |
| "epoch": 12.67, |
| "learning_rate": 0.00232534154077033, |
| "loss": 1.3009, |
| "step": 63000 |
| }, |
| { |
| "epoch": 12.69, |
| "learning_rate": 0.002324445823357525, |
| "loss": 1.2988, |
| "step": 63100 |
| }, |
| { |
| "epoch": 12.71, |
| "learning_rate": 0.0023235479884504196, |
| "loss": 1.2966, |
| "step": 63200 |
| }, |
| { |
| "epoch": 12.73, |
| "learning_rate": 0.0023226480378184472, |
| "loss": 1.2976, |
| "step": 63300 |
| }, |
| { |
| "epoch": 12.75, |
| "learning_rate": 0.0023217459732352104, |
| "loss": 1.2981, |
| "step": 63400 |
| }, |
| { |
| "epoch": 12.77, |
| "learning_rate": 0.002320841796478479, |
| "loss": 1.2969, |
| "step": 63500 |
| }, |
| { |
| "epoch": 12.79, |
| "learning_rate": 0.0023199355093301835, |
| "loss": 1.2999, |
| "step": 63600 |
| }, |
| { |
| "epoch": 12.81, |
| "learning_rate": 0.0023190271135764165, |
| "loss": 1.295, |
| "step": 63700 |
| }, |
| { |
| "epoch": 12.83, |
| "learning_rate": 0.0023181166110074224, |
| "loss": 1.2908, |
| "step": 63800 |
| }, |
| { |
| "epoch": 12.85, |
| "learning_rate": 0.002317204003417602, |
| "loss": 1.2966, |
| "step": 63900 |
| }, |
| { |
| "epoch": 12.87, |
| "learning_rate": 0.0023162892926055006, |
| "loss": 1.2965, |
| "step": 64000 |
| }, |
| { |
| "epoch": 12.89, |
| "learning_rate": 0.002315372480373811, |
| "loss": 1.2946, |
| "step": 64100 |
| }, |
| { |
| "epoch": 12.91, |
| "learning_rate": 0.0023144535685293665, |
| "loss": 1.2988, |
| "step": 64200 |
| }, |
| { |
| "epoch": 12.93, |
| "learning_rate": 0.0023135325588831386, |
| "loss": 1.3009, |
| "step": 64300 |
| }, |
| { |
| "epoch": 12.95, |
| "learning_rate": 0.0023126094532502327, |
| "loss": 1.2955, |
| "step": 64400 |
| }, |
| { |
| "epoch": 12.97, |
| "learning_rate": 0.0023116842534498857, |
| "loss": 1.3004, |
| "step": 64500 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 0.00231075696130546, |
| "loss": 1.2943, |
| "step": 64600 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.4050755408318846, |
| "eval_loss": 1.25895094871521, |
| "eval_runtime": 19.7319, |
| "eval_samples_per_second": 4032.464, |
| "eval_steps_per_second": 15.761, |
| "step": 64652 |
| }, |
| { |
| "epoch": 13.01, |
| "learning_rate": 0.0023098275786444435, |
| "loss": 1.2935, |
| "step": 64700 |
| }, |
| { |
| "epoch": 13.03, |
| "learning_rate": 0.002308896107298443, |
| "loss": 1.2893, |
| "step": 64800 |
| }, |
| { |
| "epoch": 13.05, |
| "learning_rate": 0.0023079625491031824, |
| "loss": 1.2912, |
| "step": 64900 |
| }, |
| { |
| "epoch": 13.07, |
| "learning_rate": 0.002307026905898497, |
| "loss": 1.2944, |
| "step": 65000 |
| }, |
| { |
| "epoch": 13.09, |
| "learning_rate": 0.002306089179528332, |
| "loss": 1.2949, |
| "step": 65100 |
| }, |
| { |
| "epoch": 13.11, |
| "learning_rate": 0.002305149371840738, |
| "loss": 1.2898, |
| "step": 65200 |
| }, |
| { |
| "epoch": 13.13, |
| "learning_rate": 0.002304207484687868, |
| "loss": 1.2934, |
| "step": 65300 |
| }, |
| { |
| "epoch": 13.15, |
| "learning_rate": 0.002303263519925972, |
| "loss": 1.2922, |
| "step": 65400 |
| }, |
| { |
| "epoch": 13.17, |
| "learning_rate": 0.002302317479415395, |
| "loss": 1.2913, |
| "step": 65500 |
| }, |
| { |
| "epoch": 13.19, |
| "learning_rate": 0.002301369365020573, |
| "loss": 1.2921, |
| "step": 65600 |
| }, |
| { |
| "epoch": 13.21, |
| "learning_rate": 0.0023004191786100297, |
| "loss": 1.2935, |
| "step": 65700 |
| }, |
| { |
| "epoch": 13.23, |
| "learning_rate": 0.002299466922056371, |
| "loss": 1.298, |
| "step": 65800 |
| }, |
| { |
| "epoch": 13.25, |
| "learning_rate": 0.0022985125972362834, |
| "loss": 1.2949, |
| "step": 65900 |
| }, |
| { |
| "epoch": 13.27, |
| "learning_rate": 0.0022975562060305295, |
| "loss": 1.2926, |
| "step": 66000 |
| }, |
| { |
| "epoch": 13.29, |
| "learning_rate": 0.002296597750323944, |
| "loss": 1.293, |
| "step": 66100 |
| }, |
| { |
| "epoch": 13.31, |
| "learning_rate": 0.0022956372320054306, |
| "loss": 1.2942, |
| "step": 66200 |
| }, |
| { |
| "epoch": 13.33, |
| "learning_rate": 0.0022946746529679575, |
| "loss": 1.2944, |
| "step": 66300 |
| }, |
| { |
| "epoch": 13.35, |
| "learning_rate": 0.002293710015108555, |
| "loss": 1.297, |
| "step": 66400 |
| }, |
| { |
| "epoch": 13.37, |
| "learning_rate": 0.0022927433203283093, |
| "loss": 1.2982, |
| "step": 66500 |
| }, |
| { |
| "epoch": 13.39, |
| "learning_rate": 0.0022917745705323617, |
| "loss": 1.2904, |
| "step": 66600 |
| }, |
| { |
| "epoch": 13.41, |
| "learning_rate": 0.0022908037676299038, |
| "loss": 1.2948, |
| "step": 66700 |
| }, |
| { |
| "epoch": 13.43, |
| "learning_rate": 0.0022898309135341716, |
| "loss": 1.2965, |
| "step": 66800 |
| }, |
| { |
| "epoch": 13.45, |
| "learning_rate": 0.0022888560101624452, |
| "loss": 1.2892, |
| "step": 66900 |
| }, |
| { |
| "epoch": 13.47, |
| "learning_rate": 0.002287888839071394, |
| "loss": 1.2942, |
| "step": 67000 |
| }, |
| { |
| "epoch": 13.49, |
| "learning_rate": 0.0022869098633604185, |
| "loss": 1.2886, |
| "step": 67100 |
| }, |
| { |
| "epoch": 13.51, |
| "learning_rate": 0.0022859288441301902, |
| "loss": 1.2901, |
| "step": 67200 |
| }, |
| { |
| "epoch": 13.53, |
| "learning_rate": 0.0022849457833140803, |
| "loss": 1.2941, |
| "step": 67300 |
| }, |
| { |
| "epoch": 13.55, |
| "learning_rate": 0.0022839606828494842, |
| "loss": 1.2947, |
| "step": 67400 |
| }, |
| { |
| "epoch": 13.57, |
| "learning_rate": 0.002282973544677816, |
| "loss": 1.2952, |
| "step": 67500 |
| }, |
| { |
| "epoch": 13.59, |
| "learning_rate": 0.0022819843707445058, |
| "loss": 1.2945, |
| "step": 67600 |
| }, |
| { |
| "epoch": 13.61, |
| "learning_rate": 0.002280993162998996, |
| "loss": 1.2935, |
| "step": 67700 |
| }, |
| { |
| "epoch": 13.63, |
| "learning_rate": 0.002279999923394737, |
| "loss": 1.2948, |
| "step": 67800 |
| }, |
| { |
| "epoch": 13.65, |
| "learning_rate": 0.0022790046538891844, |
| "loss": 1.2895, |
| "step": 67900 |
| }, |
| { |
| "epoch": 13.67, |
| "learning_rate": 0.0022780073564437927, |
| "loss": 1.2952, |
| "step": 68000 |
| }, |
| { |
| "epoch": 13.69, |
| "learning_rate": 0.002277008033024014, |
| "loss": 1.2946, |
| "step": 68100 |
| }, |
| { |
| "epoch": 13.71, |
| "learning_rate": 0.002276006685599293, |
| "loss": 1.298, |
| "step": 68200 |
| }, |
| { |
| "epoch": 13.73, |
| "learning_rate": 0.002275003316143064, |
| "loss": 1.2942, |
| "step": 68300 |
| }, |
| { |
| "epoch": 13.75, |
| "learning_rate": 0.0022739979266327448, |
| "loss": 1.2936, |
| "step": 68400 |
| }, |
| { |
| "epoch": 13.77, |
| "learning_rate": 0.002272990519049735, |
| "loss": 1.2937, |
| "step": 68500 |
| }, |
| { |
| "epoch": 13.79, |
| "learning_rate": 0.0022719810953794116, |
| "loss": 1.2991, |
| "step": 68600 |
| }, |
| { |
| "epoch": 13.81, |
| "learning_rate": 0.002270969657611125, |
| "loss": 1.2924, |
| "step": 68700 |
| }, |
| { |
| "epoch": 13.83, |
| "learning_rate": 0.0022699562077381943, |
| "loss": 1.2924, |
| "step": 68800 |
| }, |
| { |
| "epoch": 13.85, |
| "learning_rate": 0.002268940747757904, |
| "loss": 1.2936, |
| "step": 68900 |
| }, |
| { |
| "epoch": 13.87, |
| "learning_rate": 0.002267923279671501, |
| "loss": 1.2963, |
| "step": 69000 |
| }, |
| { |
| "epoch": 13.89, |
| "learning_rate": 0.0022669038054841887, |
| "loss": 1.2875, |
| "step": 69100 |
| }, |
| { |
| "epoch": 13.91, |
| "learning_rate": 0.0022658823272051244, |
| "loss": 1.2912, |
| "step": 69200 |
| }, |
| { |
| "epoch": 13.93, |
| "learning_rate": 0.0022648588468474158, |
| "loss": 1.2943, |
| "step": 69300 |
| }, |
| { |
| "epoch": 13.95, |
| "learning_rate": 0.002263833366428115, |
| "loss": 1.2974, |
| "step": 69400 |
| }, |
| { |
| "epoch": 13.97, |
| "learning_rate": 0.002262816172636471, |
| "loss": 1.2929, |
| "step": 69500 |
| }, |
| { |
| "epoch": 13.99, |
| "learning_rate": 0.002261786718111027, |
| "loss": 1.2926, |
| "step": 69600 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.40531926198942647, |
| "eval_loss": 1.2532228231430054, |
| "eval_runtime": 19.5692, |
| "eval_samples_per_second": 4065.991, |
| "eval_steps_per_second": 15.892, |
| "step": 69625 |
| }, |
| { |
| "epoch": 14.01, |
| "learning_rate": 0.002260755269578475, |
| "loss": 1.2829, |
| "step": 69700 |
| }, |
| { |
| "epoch": 14.04, |
| "learning_rate": 0.0022597218290715715, |
| "loss": 1.2825, |
| "step": 69800 |
| }, |
| { |
| "epoch": 14.06, |
| "learning_rate": 0.002258686398626998, |
| "loss": 1.2912, |
| "step": 69900 |
| }, |
| { |
| "epoch": 14.08, |
| "learning_rate": 0.0022576489802853578, |
| "loss": 1.2877, |
| "step": 70000 |
| }, |
| { |
| "epoch": 14.1, |
| "learning_rate": 0.002256609576091173, |
| "loss": 1.2876, |
| "step": 70100 |
| }, |
| { |
| "epoch": 14.12, |
| "learning_rate": 0.0022555681880928784, |
| "loss": 1.2921, |
| "step": 70200 |
| }, |
| { |
| "epoch": 14.14, |
| "learning_rate": 0.0022545248183428184, |
| "loss": 1.2898, |
| "step": 70300 |
| }, |
| { |
| "epoch": 14.16, |
| "learning_rate": 0.0022534794688972436, |
| "loss": 1.2881, |
| "step": 70400 |
| }, |
| { |
| "epoch": 14.18, |
| "learning_rate": 0.0022524321418163056, |
| "loss": 1.2887, |
| "step": 70500 |
| }, |
| { |
| "epoch": 14.2, |
| "learning_rate": 0.0022513828391640535, |
| "loss": 1.2835, |
| "step": 70600 |
| }, |
| { |
| "epoch": 14.22, |
| "learning_rate": 0.00225033156300843, |
| "loss": 1.2945, |
| "step": 70700 |
| }, |
| { |
| "epoch": 14.24, |
| "learning_rate": 0.0022492783154212676, |
| "loss": 1.2912, |
| "step": 70800 |
| }, |
| { |
| "epoch": 14.26, |
| "learning_rate": 0.002248223098478284, |
| "loss": 1.2864, |
| "step": 70900 |
| }, |
| { |
| "epoch": 14.28, |
| "learning_rate": 0.0022471659142590758, |
| "loss": 1.2914, |
| "step": 71000 |
| }, |
| { |
| "epoch": 14.3, |
| "learning_rate": 0.00224610676484712, |
| "loss": 1.2877, |
| "step": 71100 |
| }, |
| { |
| "epoch": 14.32, |
| "learning_rate": 0.0022450456523297646, |
| "loss": 1.288, |
| "step": 71200 |
| }, |
| { |
| "epoch": 14.34, |
| "learning_rate": 0.0022439825787982275, |
| "loss": 1.2901, |
| "step": 71300 |
| }, |
| { |
| "epoch": 14.36, |
| "learning_rate": 0.0022429175463475897, |
| "loss": 1.2885, |
| "step": 71400 |
| }, |
| { |
| "epoch": 14.38, |
| "learning_rate": 0.002241861236648861, |
| "loss": 1.2915, |
| "step": 71500 |
| }, |
| { |
| "epoch": 14.4, |
| "learning_rate": 0.00224079231219746, |
| "loss": 1.2862, |
| "step": 71600 |
| }, |
| { |
| "epoch": 14.42, |
| "learning_rate": 0.002239721435114266, |
| "loss": 1.2887, |
| "step": 71700 |
| }, |
| { |
| "epoch": 14.44, |
| "learning_rate": 0.0022386486075097406, |
| "loss": 1.2953, |
| "step": 71800 |
| }, |
| { |
| "epoch": 14.46, |
| "learning_rate": 0.0022375738314981885, |
| "loss": 1.2835, |
| "step": 71900 |
| }, |
| { |
| "epoch": 14.48, |
| "learning_rate": 0.0022364971091977555, |
| "loss": 1.2931, |
| "step": 72000 |
| }, |
| { |
| "epoch": 14.5, |
| "learning_rate": 0.002235418442730422, |
| "loss": 1.2878, |
| "step": 72100 |
| }, |
| { |
| "epoch": 14.52, |
| "learning_rate": 0.0022343378342220004, |
| "loss": 1.2877, |
| "step": 72200 |
| }, |
| { |
| "epoch": 14.54, |
| "learning_rate": 0.0022332552858021298, |
| "loss": 1.2856, |
| "step": 72300 |
| }, |
| { |
| "epoch": 14.56, |
| "learning_rate": 0.002232170799604273, |
| "loss": 1.2855, |
| "step": 72400 |
| }, |
| { |
| "epoch": 14.58, |
| "learning_rate": 0.002231084377765712, |
| "loss": 1.2872, |
| "step": 72500 |
| }, |
| { |
| "epoch": 14.6, |
| "learning_rate": 0.002229996022427543, |
| "loss": 1.2863, |
| "step": 72600 |
| }, |
| { |
| "epoch": 14.62, |
| "learning_rate": 0.002228905735734673, |
| "loss": 1.2894, |
| "step": 72700 |
| }, |
| { |
| "epoch": 14.64, |
| "learning_rate": 0.002227813519835815, |
| "loss": 1.2877, |
| "step": 72800 |
| }, |
| { |
| "epoch": 14.66, |
| "learning_rate": 0.0022267193768834843, |
| "loss": 1.2847, |
| "step": 72900 |
| }, |
| { |
| "epoch": 14.68, |
| "learning_rate": 0.002225623309033993, |
| "loss": 1.2925, |
| "step": 73000 |
| }, |
| { |
| "epoch": 14.7, |
| "learning_rate": 0.0022245253184474496, |
| "loss": 1.2866, |
| "step": 73100 |
| }, |
| { |
| "epoch": 14.72, |
| "learning_rate": 0.002223425407287748, |
| "loss": 1.2889, |
| "step": 73200 |
| }, |
| { |
| "epoch": 14.74, |
| "learning_rate": 0.00222232357772257, |
| "loss": 1.2886, |
| "step": 73300 |
| }, |
| { |
| "epoch": 14.76, |
| "learning_rate": 0.0022212198319233765, |
| "loss": 1.2896, |
| "step": 73400 |
| }, |
| { |
| "epoch": 14.78, |
| "learning_rate": 0.0022201141720654062, |
| "loss": 1.2915, |
| "step": 73500 |
| }, |
| { |
| "epoch": 14.8, |
| "learning_rate": 0.002219006600327669, |
| "loss": 1.287, |
| "step": 73600 |
| }, |
| { |
| "epoch": 14.82, |
| "learning_rate": 0.002217897118892943, |
| "loss": 1.2942, |
| "step": 73700 |
| }, |
| { |
| "epoch": 14.84, |
| "learning_rate": 0.0022167857299477704, |
| "loss": 1.2878, |
| "step": 73800 |
| }, |
| { |
| "epoch": 14.86, |
| "learning_rate": 0.0022156724356824516, |
| "loss": 1.285, |
| "step": 73900 |
| }, |
| { |
| "epoch": 14.88, |
| "learning_rate": 0.0022145683996782177, |
| "loss": 1.2891, |
| "step": 74000 |
| }, |
| { |
| "epoch": 14.9, |
| "learning_rate": 0.0022134513203569167, |
| "loss": 1.29, |
| "step": 74100 |
| }, |
| { |
| "epoch": 14.92, |
| "learning_rate": 0.0022123323422868512, |
| "loss": 1.2884, |
| "step": 74200 |
| }, |
| { |
| "epoch": 14.94, |
| "learning_rate": 0.002211211467673279, |
| "loss": 1.2901, |
| "step": 74300 |
| }, |
| { |
| "epoch": 14.96, |
| "learning_rate": 0.0022100886987251943, |
| "loss": 1.2901, |
| "step": 74400 |
| }, |
| { |
| "epoch": 14.98, |
| "learning_rate": 0.0022089640376553267, |
| "loss": 1.2917, |
| "step": 74500 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.40564348112720083, |
| "eval_loss": 1.2492839097976685, |
| "eval_runtime": 19.7618, |
| "eval_samples_per_second": 4026.36, |
| "eval_steps_per_second": 15.737, |
| "step": 74598 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 0.0022078374866801326, |
| "loss": 1.2901, |
| "step": 74600 |
| }, |
| { |
| "epoch": 15.02, |
| "learning_rate": 0.002206709048019794, |
| "loss": 1.2815, |
| "step": 74700 |
| }, |
| { |
| "epoch": 15.04, |
| "learning_rate": 0.0022055787238982145, |
| "loss": 1.2824, |
| "step": 74800 |
| }, |
| { |
| "epoch": 15.06, |
| "learning_rate": 0.0022044465165430107, |
| "loss": 1.2836, |
| "step": 74900 |
| }, |
| { |
| "epoch": 15.08, |
| "learning_rate": 0.002203312428185512, |
| "loss": 1.2848, |
| "step": 75000 |
| }, |
| { |
| "epoch": 15.1, |
| "learning_rate": 0.002202176461060756, |
| "loss": 1.2856, |
| "step": 75100 |
| }, |
| { |
| "epoch": 15.12, |
| "learning_rate": 0.0022010386174074813, |
| "loss": 1.2858, |
| "step": 75200 |
| }, |
| { |
| "epoch": 15.14, |
| "learning_rate": 0.002199898899468125, |
| "loss": 1.2827, |
| "step": 75300 |
| }, |
| { |
| "epoch": 15.16, |
| "learning_rate": 0.0021987573094888182, |
| "loss": 1.2816, |
| "step": 75400 |
| }, |
| { |
| "epoch": 15.18, |
| "learning_rate": 0.0021976138497193823, |
| "loss": 1.2845, |
| "step": 75500 |
| }, |
| { |
| "epoch": 15.2, |
| "learning_rate": 0.0021964685224133214, |
| "loss": 1.2876, |
| "step": 75600 |
| }, |
| { |
| "epoch": 15.22, |
| "learning_rate": 0.0021953213298278223, |
| "loss": 1.2815, |
| "step": 75700 |
| }, |
| { |
| "epoch": 15.24, |
| "learning_rate": 0.0021941837739942976, |
| "loss": 1.2838, |
| "step": 75800 |
| }, |
| { |
| "epoch": 15.26, |
| "learning_rate": 0.002193032876232499, |
| "loss": 1.2826, |
| "step": 75900 |
| }, |
| { |
| "epoch": 15.28, |
| "learning_rate": 0.002191880119962158, |
| "loss": 1.2794, |
| "step": 76000 |
| }, |
| { |
| "epoch": 15.3, |
| "learning_rate": 0.0021907255074551024, |
| "loss": 1.2845, |
| "step": 76100 |
| }, |
| { |
| "epoch": 15.32, |
| "learning_rate": 0.0021895690409868165, |
| "loss": 1.2861, |
| "step": 76200 |
| }, |
| { |
| "epoch": 15.34, |
| "learning_rate": 0.0021884107228364387, |
| "loss": 1.2845, |
| "step": 76300 |
| }, |
| { |
| "epoch": 15.36, |
| "learning_rate": 0.002187250555286758, |
| "loss": 1.2845, |
| "step": 76400 |
| }, |
| { |
| "epoch": 15.38, |
| "learning_rate": 0.0021860885406242065, |
| "loss": 1.2879, |
| "step": 76500 |
| }, |
| { |
| "epoch": 15.4, |
| "learning_rate": 0.002184924681138858, |
| "loss": 1.2798, |
| "step": 76600 |
| }, |
| { |
| "epoch": 15.42, |
| "learning_rate": 0.0021837589791244205, |
| "loss": 1.2823, |
| "step": 76700 |
| }, |
| { |
| "epoch": 15.44, |
| "learning_rate": 0.0021825914368782343, |
| "loss": 1.282, |
| "step": 76800 |
| }, |
| { |
| "epoch": 15.46, |
| "learning_rate": 0.002181422056701266, |
| "loss": 1.2844, |
| "step": 76900 |
| }, |
| { |
| "epoch": 15.48, |
| "learning_rate": 0.002180250840898105, |
| "loss": 1.2854, |
| "step": 77000 |
| }, |
| { |
| "epoch": 15.5, |
| "learning_rate": 0.002179077791776957, |
| "loss": 1.287, |
| "step": 77100 |
| }, |
| { |
| "epoch": 15.52, |
| "learning_rate": 0.0021779029116496423, |
| "loss": 1.284, |
| "step": 77200 |
| }, |
| { |
| "epoch": 15.54, |
| "learning_rate": 0.002176726202831588, |
| "loss": 1.2866, |
| "step": 77300 |
| }, |
| { |
| "epoch": 15.56, |
| "learning_rate": 0.0021755476676418267, |
| "loss": 1.29, |
| "step": 77400 |
| }, |
| { |
| "epoch": 15.58, |
| "learning_rate": 0.0021743673084029897, |
| "loss": 1.2852, |
| "step": 77500 |
| }, |
| { |
| "epoch": 15.6, |
| "learning_rate": 0.002173185127441303, |
| "loss": 1.28, |
| "step": 77600 |
| }, |
| { |
| "epoch": 15.62, |
| "learning_rate": 0.0021720129760884666, |
| "loss": 1.2861, |
| "step": 77700 |
| }, |
| { |
| "epoch": 15.64, |
| "learning_rate": 0.0021708271768331494, |
| "loss": 1.2843, |
| "step": 77800 |
| }, |
| { |
| "epoch": 15.66, |
| "learning_rate": 0.002169639562831796, |
| "loss": 1.2825, |
| "step": 77900 |
| }, |
| { |
| "epoch": 15.68, |
| "learning_rate": 0.002168450136424931, |
| "loss": 1.2862, |
| "step": 78000 |
| }, |
| { |
| "epoch": 15.7, |
| "learning_rate": 0.0021672588999566487, |
| "loss": 1.2842, |
| "step": 78100 |
| }, |
| { |
| "epoch": 15.72, |
| "learning_rate": 0.0021660658557746126, |
| "loss": 1.2826, |
| "step": 78200 |
| }, |
| { |
| "epoch": 15.74, |
| "learning_rate": 0.0021648710062300482, |
| "loss": 1.2851, |
| "step": 78300 |
| }, |
| { |
| "epoch": 15.76, |
| "learning_rate": 0.002163674353677738, |
| "loss": 1.28, |
| "step": 78400 |
| }, |
| { |
| "epoch": 15.78, |
| "learning_rate": 0.0021624759004760198, |
| "loss": 1.2835, |
| "step": 78500 |
| }, |
| { |
| "epoch": 15.8, |
| "learning_rate": 0.0021612756489867773, |
| "loss": 1.282, |
| "step": 78600 |
| }, |
| { |
| "epoch": 15.82, |
| "learning_rate": 0.002160073601575442, |
| "loss": 1.2815, |
| "step": 78700 |
| }, |
| { |
| "epoch": 15.84, |
| "learning_rate": 0.0021588697606109808, |
| "loss": 1.2854, |
| "step": 78800 |
| }, |
| { |
| "epoch": 15.86, |
| "learning_rate": 0.0021576641284658978, |
| "loss": 1.2859, |
| "step": 78900 |
| }, |
| { |
| "epoch": 15.88, |
| "learning_rate": 0.0021564567075162263, |
| "loss": 1.2845, |
| "step": 79000 |
| }, |
| { |
| "epoch": 15.91, |
| "learning_rate": 0.002155247500141525, |
| "loss": 1.286, |
| "step": 79100 |
| }, |
| { |
| "epoch": 15.93, |
| "learning_rate": 0.0021540365087248737, |
| "loss": 1.2849, |
| "step": 79200 |
| }, |
| { |
| "epoch": 15.95, |
| "learning_rate": 0.0021528237356528675, |
| "loss": 1.2842, |
| "step": 79300 |
| }, |
| { |
| "epoch": 15.97, |
| "learning_rate": 0.002151609183315613, |
| "loss": 1.283, |
| "step": 79400 |
| }, |
| { |
| "epoch": 15.99, |
| "learning_rate": 0.0021503928541067234, |
| "loss": 1.2873, |
| "step": 79500 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.40597676821927015, |
| "eval_loss": 1.2457048892974854, |
| "eval_runtime": 19.9932, |
| "eval_samples_per_second": 3979.752, |
| "eval_steps_per_second": 15.555, |
| "step": 79572 |
| }, |
| { |
| "epoch": 16.01, |
| "learning_rate": 0.002149174750423314, |
| "loss": 1.2817, |
| "step": 79600 |
| }, |
| { |
| "epoch": 16.03, |
| "learning_rate": 0.0021479548746659964, |
| "loss": 1.2792, |
| "step": 79700 |
| }, |
| { |
| "epoch": 16.05, |
| "learning_rate": 0.002146733229238875, |
| "loss": 1.2774, |
| "step": 79800 |
| }, |
| { |
| "epoch": 16.07, |
| "learning_rate": 0.002145509816549542, |
| "loss": 1.2808, |
| "step": 79900 |
| }, |
| { |
| "epoch": 16.09, |
| "learning_rate": 0.0021442846390090724, |
| "loss": 1.2771, |
| "step": 80000 |
| }, |
| { |
| "epoch": 16.11, |
| "learning_rate": 0.0021430576990320196, |
| "loss": 1.2769, |
| "step": 80100 |
| }, |
| { |
| "epoch": 16.13, |
| "learning_rate": 0.002141828999036409, |
| "loss": 1.2819, |
| "step": 80200 |
| }, |
| { |
| "epoch": 16.15, |
| "learning_rate": 0.0021405985414437367, |
| "loss": 1.2789, |
| "step": 80300 |
| }, |
| { |
| "epoch": 16.17, |
| "learning_rate": 0.002139366328678961, |
| "loss": 1.2777, |
| "step": 80400 |
| }, |
| { |
| "epoch": 16.19, |
| "learning_rate": 0.0021381323631705002, |
| "loss": 1.2804, |
| "step": 80500 |
| }, |
| { |
| "epoch": 16.21, |
| "learning_rate": 0.002136896647350226, |
| "loss": 1.2802, |
| "step": 80600 |
| }, |
| { |
| "epoch": 16.23, |
| "learning_rate": 0.0021356591836534607, |
| "loss": 1.2759, |
| "step": 80700 |
| }, |
| { |
| "epoch": 16.25, |
| "learning_rate": 0.00213441997451897, |
| "loss": 1.2834, |
| "step": 80800 |
| }, |
| { |
| "epoch": 16.27, |
| "learning_rate": 0.002133179022388961, |
| "loss": 1.2772, |
| "step": 80900 |
| }, |
| { |
| "epoch": 16.29, |
| "learning_rate": 0.0021319363297090744, |
| "loss": 1.2805, |
| "step": 81000 |
| }, |
| { |
| "epoch": 16.31, |
| "learning_rate": 0.0021306918989283815, |
| "loss": 1.2752, |
| "step": 81100 |
| }, |
| { |
| "epoch": 16.33, |
| "learning_rate": 0.0021294457324993795, |
| "loss": 1.279, |
| "step": 81200 |
| }, |
| { |
| "epoch": 16.35, |
| "learning_rate": 0.0021281978328779863, |
| "loss": 1.2825, |
| "step": 81300 |
| }, |
| { |
| "epoch": 16.37, |
| "learning_rate": 0.0021269482025235344, |
| "loss": 1.2824, |
| "step": 81400 |
| }, |
| { |
| "epoch": 16.39, |
| "learning_rate": 0.002125696843898769, |
| "loss": 1.2801, |
| "step": 81500 |
| }, |
| { |
| "epoch": 16.41, |
| "learning_rate": 0.0021244437594698383, |
| "loss": 1.2782, |
| "step": 81600 |
| }, |
| { |
| "epoch": 16.43, |
| "learning_rate": 0.0021231889517062965, |
| "loss": 1.2817, |
| "step": 81700 |
| }, |
| { |
| "epoch": 16.45, |
| "learning_rate": 0.0021219324230810884, |
| "loss": 1.2788, |
| "step": 81800 |
| }, |
| { |
| "epoch": 16.47, |
| "learning_rate": 0.002120674176070555, |
| "loss": 1.2796, |
| "step": 81900 |
| }, |
| { |
| "epoch": 16.49, |
| "learning_rate": 0.0021194142131544212, |
| "loss": 1.2799, |
| "step": 82000 |
| }, |
| { |
| "epoch": 16.51, |
| "learning_rate": 0.002118152536815795, |
| "loss": 1.2786, |
| "step": 82100 |
| }, |
| { |
| "epoch": 16.53, |
| "learning_rate": 0.0021168891495411592, |
| "loss": 1.2794, |
| "step": 82200 |
| }, |
| { |
| "epoch": 16.55, |
| "learning_rate": 0.0021156240538203713, |
| "loss": 1.2814, |
| "step": 82300 |
| }, |
| { |
| "epoch": 16.57, |
| "learning_rate": 0.0021143572521466533, |
| "loss": 1.2811, |
| "step": 82400 |
| }, |
| { |
| "epoch": 16.59, |
| "learning_rate": 0.00211308874701659, |
| "loss": 1.2821, |
| "step": 82500 |
| }, |
| { |
| "epoch": 16.61, |
| "learning_rate": 0.002111818540930124, |
| "loss": 1.2811, |
| "step": 82600 |
| }, |
| { |
| "epoch": 16.63, |
| "learning_rate": 0.00211054663639055, |
| "loss": 1.2759, |
| "step": 82700 |
| }, |
| { |
| "epoch": 16.65, |
| "learning_rate": 0.0021092730359045086, |
| "loss": 1.2797, |
| "step": 82800 |
| }, |
| { |
| "epoch": 16.67, |
| "learning_rate": 0.0021079977419819853, |
| "loss": 1.2784, |
| "step": 82900 |
| }, |
| { |
| "epoch": 16.69, |
| "learning_rate": 0.0021067207571362997, |
| "loss": 1.2816, |
| "step": 83000 |
| }, |
| { |
| "epoch": 16.71, |
| "learning_rate": 0.0021054420838841066, |
| "loss": 1.2801, |
| "step": 83100 |
| }, |
| { |
| "epoch": 16.73, |
| "learning_rate": 0.0021041617247453863, |
| "loss": 1.284, |
| "step": 83200 |
| }, |
| { |
| "epoch": 16.75, |
| "learning_rate": 0.0021028796822434442, |
| "loss": 1.2809, |
| "step": 83300 |
| }, |
| { |
| "epoch": 16.77, |
| "learning_rate": 0.0021015959589049003, |
| "loss": 1.2786, |
| "step": 83400 |
| }, |
| { |
| "epoch": 16.79, |
| "learning_rate": 0.0021003105572596887, |
| "loss": 1.2824, |
| "step": 83500 |
| }, |
| { |
| "epoch": 16.81, |
| "learning_rate": 0.0020990234798410508, |
| "loss": 1.2771, |
| "step": 83600 |
| }, |
| { |
| "epoch": 16.83, |
| "learning_rate": 0.0020977347291855314, |
| "loss": 1.2829, |
| "step": 83700 |
| }, |
| { |
| "epoch": 16.85, |
| "learning_rate": 0.0020964443078329703, |
| "loss": 1.2805, |
| "step": 83800 |
| }, |
| { |
| "epoch": 16.87, |
| "learning_rate": 0.0020951522183265034, |
| "loss": 1.2761, |
| "step": 83900 |
| }, |
| { |
| "epoch": 16.89, |
| "learning_rate": 0.0020938584632125513, |
| "loss": 1.2776, |
| "step": 84000 |
| }, |
| { |
| "epoch": 16.91, |
| "learning_rate": 0.002092563045040819, |
| "loss": 1.2783, |
| "step": 84100 |
| }, |
| { |
| "epoch": 16.93, |
| "learning_rate": 0.0020912659663642878, |
| "loss": 1.2802, |
| "step": 84200 |
| }, |
| { |
| "epoch": 16.95, |
| "learning_rate": 0.002089967229739212, |
| "loss": 1.2749, |
| "step": 84300 |
| }, |
| { |
| "epoch": 16.97, |
| "learning_rate": 0.0020886668377251135, |
| "loss": 1.2798, |
| "step": 84400 |
| }, |
| { |
| "epoch": 16.99, |
| "learning_rate": 0.002087364792884776, |
| "loss": 1.2791, |
| "step": 84500 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.40644209745282756, |
| "eval_loss": 1.2400243282318115, |
| "eval_runtime": 19.8275, |
| "eval_samples_per_second": 4013.004, |
| "eval_steps_per_second": 15.685, |
| "step": 84545 |
| }, |
| { |
| "epoch": 17.01, |
| "learning_rate": 0.0020860610977842414, |
| "loss": 1.2757, |
| "step": 84600 |
| }, |
| { |
| "epoch": 17.03, |
| "learning_rate": 0.0020847557549928037, |
| "loss": 1.2693, |
| "step": 84700 |
| }, |
| { |
| "epoch": 17.05, |
| "learning_rate": 0.002083448767083003, |
| "loss": 1.2748, |
| "step": 84800 |
| }, |
| { |
| "epoch": 17.07, |
| "learning_rate": 0.002082140136630623, |
| "loss": 1.2713, |
| "step": 84900 |
| }, |
| { |
| "epoch": 17.09, |
| "learning_rate": 0.002080829866214684, |
| "loss": 1.2743, |
| "step": 85000 |
| }, |
| { |
| "epoch": 17.11, |
| "learning_rate": 0.0020795310855919614, |
| "loss": 1.2762, |
| "step": 85100 |
| }, |
| { |
| "epoch": 17.13, |
| "learning_rate": 0.0020782175593340372, |
| "loss": 1.2779, |
| "step": 85200 |
| }, |
| { |
| "epoch": 17.15, |
| "learning_rate": 0.0020769024008430834, |
| "loss": 1.272, |
| "step": 85300 |
| }, |
| { |
| "epoch": 17.17, |
| "learning_rate": 0.0020755856127109857, |
| "loss": 1.2736, |
| "step": 85400 |
| }, |
| { |
| "epoch": 17.19, |
| "learning_rate": 0.0020742671975328406, |
| "loss": 1.2722, |
| "step": 85500 |
| }, |
| { |
| "epoch": 17.21, |
| "learning_rate": 0.0020729471579069526, |
| "loss": 1.2711, |
| "step": 85600 |
| }, |
| { |
| "epoch": 17.23, |
| "learning_rate": 0.002071625496434827, |
| "loss": 1.2761, |
| "step": 85700 |
| }, |
| { |
| "epoch": 17.25, |
| "learning_rate": 0.0020703022157211644, |
| "loss": 1.2755, |
| "step": 85800 |
| }, |
| { |
| "epoch": 17.27, |
| "learning_rate": 0.002068977318373858, |
| "loss": 1.28, |
| "step": 85900 |
| }, |
| { |
| "epoch": 17.29, |
| "learning_rate": 0.002067650807003987, |
| "loss": 1.2775, |
| "step": 86000 |
| }, |
| { |
| "epoch": 17.31, |
| "learning_rate": 0.00206632268422581, |
| "loss": 1.2762, |
| "step": 86100 |
| }, |
| { |
| "epoch": 17.33, |
| "learning_rate": 0.002064992952656763, |
| "loss": 1.2791, |
| "step": 86200 |
| }, |
| { |
| "epoch": 17.35, |
| "learning_rate": 0.0020636616149174508, |
| "loss": 1.2801, |
| "step": 86300 |
| }, |
| { |
| "epoch": 17.37, |
| "learning_rate": 0.002062328673631646, |
| "loss": 1.2765, |
| "step": 86400 |
| }, |
| { |
| "epoch": 17.39, |
| "learning_rate": 0.002060994131426279, |
| "loss": 1.271, |
| "step": 86500 |
| }, |
| { |
| "epoch": 17.41, |
| "learning_rate": 0.0020596713602392754, |
| "loss": 1.2774, |
| "step": 86600 |
| }, |
| { |
| "epoch": 17.43, |
| "learning_rate": 0.002058333640031713, |
| "loss": 1.2742, |
| "step": 86700 |
| }, |
| { |
| "epoch": 17.45, |
| "learning_rate": 0.002056994326777913, |
| "loss": 1.2761, |
| "step": 86800 |
| }, |
| { |
| "epoch": 17.47, |
| "learning_rate": 0.0020556534231173655, |
| "loss": 1.2749, |
| "step": 86900 |
| }, |
| { |
| "epoch": 17.49, |
| "learning_rate": 0.002054310931692694, |
| "loss": 1.2757, |
| "step": 87000 |
| }, |
| { |
| "epoch": 17.51, |
| "learning_rate": 0.0020529668551496506, |
| "loss": 1.2755, |
| "step": 87100 |
| }, |
| { |
| "epoch": 17.53, |
| "learning_rate": 0.0020516211961371126, |
| "loss": 1.2766, |
| "step": 87200 |
| }, |
| { |
| "epoch": 17.55, |
| "learning_rate": 0.0020502739573070757, |
| "loss": 1.2719, |
| "step": 87300 |
| }, |
| { |
| "epoch": 17.57, |
| "learning_rate": 0.002048925141314647, |
| "loss": 1.2771, |
| "step": 87400 |
| }, |
| { |
| "epoch": 17.59, |
| "learning_rate": 0.0020475747508180457, |
| "loss": 1.2732, |
| "step": 87500 |
| }, |
| { |
| "epoch": 17.61, |
| "learning_rate": 0.00204622278847859, |
| "loss": 1.2741, |
| "step": 87600 |
| }, |
| { |
| "epoch": 17.63, |
| "learning_rate": 0.0020448692569606988, |
| "loss": 1.275, |
| "step": 87700 |
| }, |
| { |
| "epoch": 17.65, |
| "learning_rate": 0.0020435141589318817, |
| "loss": 1.2755, |
| "step": 87800 |
| }, |
| { |
| "epoch": 17.67, |
| "learning_rate": 0.0020421574970627366, |
| "loss": 1.2727, |
| "step": 87900 |
| }, |
| { |
| "epoch": 17.69, |
| "learning_rate": 0.002040799274026943, |
| "loss": 1.2753, |
| "step": 88000 |
| }, |
| { |
| "epoch": 17.71, |
| "learning_rate": 0.0020394394925012565, |
| "loss": 1.2749, |
| "step": 88100 |
| }, |
| { |
| "epoch": 17.73, |
| "learning_rate": 0.002038078155165506, |
| "loss": 1.2776, |
| "step": 88200 |
| }, |
| { |
| "epoch": 17.75, |
| "learning_rate": 0.002036715264702584, |
| "loss": 1.2752, |
| "step": 88300 |
| }, |
| { |
| "epoch": 17.78, |
| "learning_rate": 0.0020353508237984466, |
| "loss": 1.2771, |
| "step": 88400 |
| }, |
| { |
| "epoch": 17.8, |
| "learning_rate": 0.002033984835142102, |
| "loss": 1.2761, |
| "step": 88500 |
| }, |
| { |
| "epoch": 17.82, |
| "learning_rate": 0.002032617301425613, |
| "loss": 1.2707, |
| "step": 88600 |
| }, |
| { |
| "epoch": 17.84, |
| "learning_rate": 0.0020312482253440835, |
| "loss": 1.2749, |
| "step": 88700 |
| }, |
| { |
| "epoch": 17.86, |
| "learning_rate": 0.0020298776095956594, |
| "loss": 1.2735, |
| "step": 88800 |
| }, |
| { |
| "epoch": 17.88, |
| "learning_rate": 0.00202850545688152, |
| "loss": 1.2736, |
| "step": 88900 |
| }, |
| { |
| "epoch": 17.9, |
| "learning_rate": 0.002027131769905874, |
| "loss": 1.2763, |
| "step": 89000 |
| }, |
| { |
| "epoch": 17.92, |
| "learning_rate": 0.002025756551375953, |
| "loss": 1.2728, |
| "step": 89100 |
| }, |
| { |
| "epoch": 17.94, |
| "learning_rate": 0.0020243798040020084, |
| "loss": 1.2773, |
| "step": 89200 |
| }, |
| { |
| "epoch": 17.96, |
| "learning_rate": 0.002023001530497303, |
| "loss": 1.2736, |
| "step": 89300 |
| }, |
| { |
| "epoch": 17.98, |
| "learning_rate": 0.0020216217335781088, |
| "loss": 1.2755, |
| "step": 89400 |
| }, |
| { |
| "epoch": 18.0, |
| "learning_rate": 0.002020240415963699, |
| "loss": 1.2724, |
| "step": 89500 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.40673402194635844, |
| "eval_loss": 1.2363520860671997, |
| "eval_runtime": 19.8757, |
| "eval_samples_per_second": 4003.284, |
| "eval_steps_per_second": 15.647, |
| "step": 89518 |
| }, |
| { |
| "epoch": 18.02, |
| "learning_rate": 0.0020188575803763435, |
| "loss": 1.2653, |
| "step": 89600 |
| }, |
| { |
| "epoch": 18.04, |
| "learning_rate": 0.0020174732295413058, |
| "loss": 1.265, |
| "step": 89700 |
| }, |
| { |
| "epoch": 18.06, |
| "learning_rate": 0.0020160873661868328, |
| "loss": 1.2724, |
| "step": 89800 |
| }, |
| { |
| "epoch": 18.08, |
| "learning_rate": 0.0020146999930441547, |
| "loss": 1.2692, |
| "step": 89900 |
| }, |
| { |
| "epoch": 18.1, |
| "learning_rate": 0.002013311112847475, |
| "loss": 1.2678, |
| "step": 90000 |
| }, |
| { |
| "epoch": 18.12, |
| "learning_rate": 0.0020119207283339695, |
| "loss": 1.2709, |
| "step": 90100 |
| }, |
| { |
| "epoch": 18.14, |
| "learning_rate": 0.0020105288422437775, |
| "loss": 1.2674, |
| "step": 90200 |
| }, |
| { |
| "epoch": 18.16, |
| "learning_rate": 0.0020091354573199965, |
| "loss": 1.2694, |
| "step": 90300 |
| }, |
| { |
| "epoch": 18.18, |
| "learning_rate": 0.00200774057630868, |
| "loss": 1.2691, |
| "step": 90400 |
| }, |
| { |
| "epoch": 18.2, |
| "learning_rate": 0.0020063442019588283, |
| "loss": 1.2665, |
| "step": 90500 |
| }, |
| { |
| "epoch": 18.22, |
| "learning_rate": 0.002004946337022386, |
| "loss": 1.2669, |
| "step": 90600 |
| }, |
| { |
| "epoch": 18.24, |
| "learning_rate": 0.0020035469842542347, |
| "loss": 1.272, |
| "step": 90700 |
| }, |
| { |
| "epoch": 18.26, |
| "learning_rate": 0.002002146146412188, |
| "loss": 1.2658, |
| "step": 90800 |
| }, |
| { |
| "epoch": 18.28, |
| "learning_rate": 0.002000743826256986, |
| "loss": 1.2719, |
| "step": 90900 |
| }, |
| { |
| "epoch": 18.3, |
| "learning_rate": 0.0019993400265522917, |
| "loss": 1.2716, |
| "step": 91000 |
| }, |
| { |
| "epoch": 18.32, |
| "learning_rate": 0.001997934750064681, |
| "loss": 1.2676, |
| "step": 91100 |
| }, |
| { |
| "epoch": 18.34, |
| "learning_rate": 0.0019965279995636438, |
| "loss": 1.2705, |
| "step": 91200 |
| }, |
| { |
| "epoch": 18.36, |
| "learning_rate": 0.001995119777821572, |
| "loss": 1.2741, |
| "step": 91300 |
| }, |
| { |
| "epoch": 18.38, |
| "learning_rate": 0.0019937100876137592, |
| "loss": 1.2688, |
| "step": 91400 |
| }, |
| { |
| "epoch": 18.4, |
| "learning_rate": 0.001992298931718391, |
| "loss": 1.2709, |
| "step": 91500 |
| }, |
| { |
| "epoch": 18.42, |
| "learning_rate": 0.0019908863129165432, |
| "loss": 1.2719, |
| "step": 91600 |
| }, |
| { |
| "epoch": 18.44, |
| "learning_rate": 0.0019894722339921737, |
| "loss": 1.2726, |
| "step": 91700 |
| }, |
| { |
| "epoch": 18.46, |
| "learning_rate": 0.0019880566977321184, |
| "loss": 1.2734, |
| "step": 91800 |
| }, |
| { |
| "epoch": 18.48, |
| "learning_rate": 0.001986639706926085, |
| "loss": 1.2703, |
| "step": 91900 |
| }, |
| { |
| "epoch": 18.5, |
| "learning_rate": 0.0019852354559692456, |
| "loss": 1.2692, |
| "step": 92000 |
| }, |
| { |
| "epoch": 18.52, |
| "learning_rate": 0.0019838155789275737, |
| "loss": 1.2722, |
| "step": 92100 |
| }, |
| { |
| "epoch": 18.54, |
| "learning_rate": 0.0019823942556982275, |
| "loss": 1.2754, |
| "step": 92200 |
| }, |
| { |
| "epoch": 18.56, |
| "learning_rate": 0.001980971489082321, |
| "loss": 1.2689, |
| "step": 92300 |
| }, |
| { |
| "epoch": 18.58, |
| "learning_rate": 0.00197954728188381, |
| "loss": 1.2756, |
| "step": 92400 |
| }, |
| { |
| "epoch": 18.6, |
| "learning_rate": 0.0019781216369094915, |
| "loss": 1.2725, |
| "step": 92500 |
| }, |
| { |
| "epoch": 18.62, |
| "learning_rate": 0.001976694556968995, |
| "loss": 1.2689, |
| "step": 92600 |
| }, |
| { |
| "epoch": 18.64, |
| "learning_rate": 0.0019752660448747795, |
| "loss": 1.2701, |
| "step": 92700 |
| }, |
| { |
| "epoch": 18.66, |
| "learning_rate": 0.001973836103442124, |
| "loss": 1.2694, |
| "step": 92800 |
| }, |
| { |
| "epoch": 18.68, |
| "learning_rate": 0.0019724047354891263, |
| "loss": 1.2722, |
| "step": 92900 |
| }, |
| { |
| "epoch": 18.7, |
| "learning_rate": 0.001970971943836695, |
| "loss": 1.2724, |
| "step": 93000 |
| }, |
| { |
| "epoch": 18.72, |
| "learning_rate": 0.0019695377313085453, |
| "loss": 1.2683, |
| "step": 93100 |
| }, |
| { |
| "epoch": 18.74, |
| "learning_rate": 0.0019681021007311905, |
| "loss": 1.2739, |
| "step": 93200 |
| }, |
| { |
| "epoch": 18.76, |
| "learning_rate": 0.001966665054933941, |
| "loss": 1.2722, |
| "step": 93300 |
| }, |
| { |
| "epoch": 18.78, |
| "learning_rate": 0.0019652265967488943, |
| "loss": 1.2705, |
| "step": 93400 |
| }, |
| { |
| "epoch": 18.8, |
| "learning_rate": 0.0019637867290109327, |
| "loss": 1.2687, |
| "step": 93500 |
| }, |
| { |
| "epoch": 18.82, |
| "learning_rate": 0.001962345454557716, |
| "loss": 1.2698, |
| "step": 93600 |
| }, |
| { |
| "epoch": 18.84, |
| "learning_rate": 0.0019609027762296765, |
| "loss": 1.2722, |
| "step": 93700 |
| }, |
| { |
| "epoch": 18.86, |
| "learning_rate": 0.001959458696870013, |
| "loss": 1.274, |
| "step": 93800 |
| }, |
| { |
| "epoch": 18.88, |
| "learning_rate": 0.0019580132193246854, |
| "loss": 1.2683, |
| "step": 93900 |
| }, |
| { |
| "epoch": 18.9, |
| "learning_rate": 0.001956566346442409, |
| "loss": 1.268, |
| "step": 94000 |
| }, |
| { |
| "epoch": 18.92, |
| "learning_rate": 0.001955118081074649, |
| "loss": 1.2674, |
| "step": 94100 |
| }, |
| { |
| "epoch": 18.94, |
| "learning_rate": 0.001953668426075616, |
| "loss": 1.267, |
| "step": 94200 |
| }, |
| { |
| "epoch": 18.96, |
| "learning_rate": 0.0019522173843022578, |
| "loss": 1.2677, |
| "step": 94300 |
| }, |
| { |
| "epoch": 18.98, |
| "learning_rate": 0.0019507649586142553, |
| "loss": 1.2669, |
| "step": 94400 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.4069155401191752, |
| "eval_loss": 1.2321081161499023, |
| "eval_runtime": 19.6805, |
| "eval_samples_per_second": 4042.992, |
| "eval_steps_per_second": 15.802, |
| "step": 94491 |
| }, |
| { |
| "epoch": 19.0, |
| "learning_rate": 0.0019493111518740181, |
| "loss": 1.2724, |
| "step": 94500 |
| }, |
| { |
| "epoch": 19.02, |
| "learning_rate": 0.001947855966946676, |
| "loss": 1.2629, |
| "step": 94600 |
| }, |
| { |
| "epoch": 19.04, |
| "learning_rate": 0.0019463994067000763, |
| "loss": 1.2614, |
| "step": 94700 |
| }, |
| { |
| "epoch": 19.06, |
| "learning_rate": 0.001944941474004775, |
| "loss": 1.2603, |
| "step": 94800 |
| }, |
| { |
| "epoch": 19.08, |
| "learning_rate": 0.0019434821717340346, |
| "loss": 1.2643, |
| "step": 94900 |
| }, |
| { |
| "epoch": 19.1, |
| "learning_rate": 0.001942021502763816, |
| "loss": 1.2619, |
| "step": 95000 |
| }, |
| { |
| "epoch": 19.12, |
| "learning_rate": 0.001940559469972774, |
| "loss": 1.2676, |
| "step": 95100 |
| }, |
| { |
| "epoch": 19.14, |
| "learning_rate": 0.0019390960762422499, |
| "loss": 1.2606, |
| "step": 95200 |
| }, |
| { |
| "epoch": 19.16, |
| "learning_rate": 0.0019376313244562687, |
| "loss": 1.2628, |
| "step": 95300 |
| }, |
| { |
| "epoch": 19.18, |
| "learning_rate": 0.0019361798852696786, |
| "loss": 1.2691, |
| "step": 95400 |
| }, |
| { |
| "epoch": 19.2, |
| "learning_rate": 0.0019347124395440384, |
| "loss": 1.2652, |
| "step": 95500 |
| }, |
| { |
| "epoch": 19.22, |
| "learning_rate": 0.0019332436444021162, |
| "loss": 1.2672, |
| "step": 95600 |
| }, |
| { |
| "epoch": 19.24, |
| "learning_rate": 0.0019317735027385814, |
| "loss": 1.2668, |
| "step": 95700 |
| }, |
| { |
| "epoch": 19.26, |
| "learning_rate": 0.0019303020174507568, |
| "loss": 1.2604, |
| "step": 95800 |
| }, |
| { |
| "epoch": 19.28, |
| "learning_rate": 0.001928829191438613, |
| "loss": 1.2699, |
| "step": 95900 |
| }, |
| { |
| "epoch": 19.3, |
| "learning_rate": 0.0019273550276047641, |
| "loss": 1.2617, |
| "step": 96000 |
| }, |
| { |
| "epoch": 19.32, |
| "learning_rate": 0.0019258795288544595, |
| "loss": 1.2651, |
| "step": 96100 |
| }, |
| { |
| "epoch": 19.34, |
| "learning_rate": 0.0019244026980955796, |
| "loss": 1.2651, |
| "step": 96200 |
| }, |
| { |
| "epoch": 19.36, |
| "learning_rate": 0.0019229245382386302, |
| "loss": 1.2654, |
| "step": 96300 |
| }, |
| { |
| "epoch": 19.38, |
| "learning_rate": 0.0019214450521967369, |
| "loss": 1.2645, |
| "step": 96400 |
| }, |
| { |
| "epoch": 19.4, |
| "learning_rate": 0.0019199642428856373, |
| "loss": 1.2674, |
| "step": 96500 |
| }, |
| { |
| "epoch": 19.42, |
| "learning_rate": 0.0019184821132236796, |
| "loss": 1.2707, |
| "step": 96600 |
| }, |
| { |
| "epoch": 19.44, |
| "learning_rate": 0.0019169986661318106, |
| "loss": 1.2653, |
| "step": 96700 |
| }, |
| { |
| "epoch": 19.46, |
| "learning_rate": 0.0019155139045335771, |
| "loss": 1.2664, |
| "step": 96800 |
| }, |
| { |
| "epoch": 19.48, |
| "learning_rate": 0.0019140278313551134, |
| "loss": 1.2657, |
| "step": 96900 |
| }, |
| { |
| "epoch": 19.5, |
| "learning_rate": 0.0019125404495251408, |
| "loss": 1.2665, |
| "step": 97000 |
| }, |
| { |
| "epoch": 19.52, |
| "learning_rate": 0.001911066655304144, |
| "loss": 1.2674, |
| "step": 97100 |
| }, |
| { |
| "epoch": 19.54, |
| "learning_rate": 0.0019095766779809568, |
| "loss": 1.2644, |
| "step": 97200 |
| }, |
| { |
| "epoch": 19.56, |
| "learning_rate": 0.0019080854007784964, |
| "loss": 1.2648, |
| "step": 97300 |
| }, |
| { |
| "epoch": 19.58, |
| "learning_rate": 0.0019065928266357385, |
| "loss": 1.2669, |
| "step": 97400 |
| }, |
| { |
| "epoch": 19.6, |
| "learning_rate": 0.001905098958494216, |
| "loss": 1.2676, |
| "step": 97500 |
| }, |
| { |
| "epoch": 19.62, |
| "learning_rate": 0.001903618757271021, |
| "loss": 1.2662, |
| "step": 97600 |
| }, |
| { |
| "epoch": 19.65, |
| "learning_rate": 0.001902122322833248, |
| "loss": 1.2677, |
| "step": 97700 |
| }, |
| { |
| "epoch": 19.67, |
| "learning_rate": 0.0019006246032070807, |
| "loss": 1.2659, |
| "step": 97800 |
| }, |
| { |
| "epoch": 19.69, |
| "learning_rate": 0.0018991256013441932, |
| "loss": 1.2669, |
| "step": 97900 |
| }, |
| { |
| "epoch": 19.71, |
| "learning_rate": 0.0018976253201987848, |
| "loss": 1.2649, |
| "step": 98000 |
| }, |
| { |
| "epoch": 19.73, |
| "learning_rate": 0.0018961237627275773, |
| "loss": 1.2679, |
| "step": 98100 |
| }, |
| { |
| "epoch": 19.75, |
| "learning_rate": 0.001894620931889807, |
| "loss": 1.2692, |
| "step": 98200 |
| }, |
| { |
| "epoch": 19.77, |
| "learning_rate": 0.0018931168306472199, |
| "loss": 1.2655, |
| "step": 98300 |
| }, |
| { |
| "epoch": 19.79, |
| "learning_rate": 0.0018916114619640656, |
| "loss": 1.2646, |
| "step": 98400 |
| }, |
| { |
| "epoch": 19.81, |
| "learning_rate": 0.0018901048288070927, |
| "loss": 1.2647, |
| "step": 98500 |
| }, |
| { |
| "epoch": 19.83, |
| "learning_rate": 0.0018885969341455395, |
| "loss": 1.2651, |
| "step": 98600 |
| }, |
| { |
| "epoch": 19.85, |
| "learning_rate": 0.0018870877809511327, |
| "loss": 1.2642, |
| "step": 98700 |
| }, |
| { |
| "epoch": 19.87, |
| "learning_rate": 0.001885577372198078, |
| "loss": 1.2656, |
| "step": 98800 |
| }, |
| { |
| "epoch": 19.89, |
| "learning_rate": 0.001884065710863056, |
| "loss": 1.2627, |
| "step": 98900 |
| }, |
| { |
| "epoch": 19.91, |
| "learning_rate": 0.0018825527999252157, |
| "loss": 1.266, |
| "step": 99000 |
| }, |
| { |
| "epoch": 19.93, |
| "learning_rate": 0.0018810386423661694, |
| "loss": 1.2654, |
| "step": 99100 |
| }, |
| { |
| "epoch": 19.95, |
| "learning_rate": 0.0018795232411699847, |
| "loss": 1.264, |
| "step": 99200 |
| }, |
| { |
| "epoch": 19.97, |
| "learning_rate": 0.0018780065993231816, |
| "loss": 1.2633, |
| "step": 99300 |
| }, |
| { |
| "epoch": 19.99, |
| "learning_rate": 0.0018764887198147245, |
| "loss": 1.2683, |
| "step": 99400 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.4075025708445861, |
| "eval_loss": 1.2268297672271729, |
| "eval_runtime": 19.801, |
| "eval_samples_per_second": 4018.383, |
| "eval_steps_per_second": 15.706, |
| "step": 99465 |
| }, |
| { |
| "epoch": 20.01, |
| "learning_rate": 0.0018749696056360177, |
| "loss": 1.2603, |
| "step": 99500 |
| }, |
| { |
| "epoch": 20.03, |
| "learning_rate": 0.0018734492597808972, |
| "loss": 1.2609, |
| "step": 99600 |
| }, |
| { |
| "epoch": 20.05, |
| "learning_rate": 0.001871927685245628, |
| "loss": 1.2587, |
| "step": 99700 |
| }, |
| { |
| "epoch": 20.07, |
| "learning_rate": 0.0018704048850288952, |
| "loss": 1.2575, |
| "step": 99800 |
| }, |
| { |
| "epoch": 20.09, |
| "learning_rate": 0.001868880862131801, |
| "loss": 1.2582, |
| "step": 99900 |
| }, |
| { |
| "epoch": 20.11, |
| "learning_rate": 0.0018673556195578558, |
| "loss": 1.2595, |
| "step": 100000 |
| }, |
| { |
| "epoch": 20.13, |
| "learning_rate": 0.0018658291603129745, |
| "loss": 1.2615, |
| "step": 100100 |
| }, |
| { |
| "epoch": 20.15, |
| "learning_rate": 0.0018643014874054691, |
| "loss": 1.2603, |
| "step": 100200 |
| }, |
| { |
| "epoch": 20.17, |
| "learning_rate": 0.0018627726038460447, |
| "loss": 1.2634, |
| "step": 100300 |
| }, |
| { |
| "epoch": 20.19, |
| "learning_rate": 0.001861242512647791, |
| "loss": 1.2634, |
| "step": 100400 |
| }, |
| { |
| "epoch": 20.21, |
| "learning_rate": 0.0018597112168261781, |
| "loss": 1.2646, |
| "step": 100500 |
| }, |
| { |
| "epoch": 20.23, |
| "learning_rate": 0.0018581787193990508, |
| "loss": 1.2591, |
| "step": 100600 |
| }, |
| { |
| "epoch": 20.25, |
| "learning_rate": 0.0018566450233866208, |
| "loss": 1.2609, |
| "step": 100700 |
| }, |
| { |
| "epoch": 20.27, |
| "learning_rate": 0.0018551101318114629, |
| "loss": 1.2647, |
| "step": 100800 |
| }, |
| { |
| "epoch": 20.29, |
| "learning_rate": 0.0018535740476985083, |
| "loss": 1.2634, |
| "step": 100900 |
| }, |
| { |
| "epoch": 20.31, |
| "learning_rate": 0.0018520367740750374, |
| "loss": 1.2589, |
| "step": 101000 |
| }, |
| { |
| "epoch": 20.33, |
| "learning_rate": 0.0018504983139706755, |
| "loss": 1.2615, |
| "step": 101100 |
| }, |
| { |
| "epoch": 20.35, |
| "learning_rate": 0.001848958670417386, |
| "loss": 1.2561, |
| "step": 101200 |
| }, |
| { |
| "epoch": 20.37, |
| "learning_rate": 0.0018474178464494648, |
| "loss": 1.2627, |
| "step": 101300 |
| }, |
| { |
| "epoch": 20.39, |
| "learning_rate": 0.001845875845103534, |
| "loss": 1.2583, |
| "step": 101400 |
| }, |
| { |
| "epoch": 20.41, |
| "learning_rate": 0.0018443326694185364, |
| "loss": 1.2633, |
| "step": 101500 |
| }, |
| { |
| "epoch": 20.43, |
| "learning_rate": 0.0018427883224357284, |
| "loss": 1.2611, |
| "step": 101600 |
| }, |
| { |
| "epoch": 20.45, |
| "learning_rate": 0.0018412428071986754, |
| "loss": 1.2625, |
| "step": 101700 |
| }, |
| { |
| "epoch": 20.47, |
| "learning_rate": 0.001839711599315474, |
| "loss": 1.2607, |
| "step": 101800 |
| }, |
| { |
| "epoch": 20.49, |
| "learning_rate": 0.001838163768316336, |
| "loss": 1.2623, |
| "step": 101900 |
| }, |
| { |
| "epoch": 20.51, |
| "learning_rate": 0.0018366147781769232, |
| "loss": 1.2569, |
| "step": 102000 |
| }, |
| { |
| "epoch": 20.53, |
| "learning_rate": 0.001835064631949951, |
| "loss": 1.2585, |
| "step": 102100 |
| }, |
| { |
| "epoch": 20.55, |
| "learning_rate": 0.0018335133326904144, |
| "loss": 1.2631, |
| "step": 102200 |
| }, |
| { |
| "epoch": 20.57, |
| "learning_rate": 0.001831960883455579, |
| "loss": 1.2592, |
| "step": 102300 |
| }, |
| { |
| "epoch": 20.59, |
| "learning_rate": 0.0018304072873049785, |
| "loss": 1.2613, |
| "step": 102400 |
| }, |
| { |
| "epoch": 20.61, |
| "learning_rate": 0.0018288525473004055, |
| "loss": 1.2601, |
| "step": 102500 |
| }, |
| { |
| "epoch": 20.63, |
| "learning_rate": 0.0018272966665059086, |
| "loss": 1.2608, |
| "step": 102600 |
| }, |
| { |
| "epoch": 20.65, |
| "learning_rate": 0.0018257396479877822, |
| "loss": 1.2608, |
| "step": 102700 |
| }, |
| { |
| "epoch": 20.67, |
| "learning_rate": 0.0018241814948145656, |
| "loss": 1.2619, |
| "step": 102800 |
| }, |
| { |
| "epoch": 20.69, |
| "learning_rate": 0.0018226222100570322, |
| "loss": 1.2599, |
| "step": 102900 |
| }, |
| { |
| "epoch": 20.71, |
| "learning_rate": 0.0018210617967881865, |
| "loss": 1.2589, |
| "step": 103000 |
| }, |
| { |
| "epoch": 20.73, |
| "learning_rate": 0.0018195002580832567, |
| "loss": 1.2573, |
| "step": 103100 |
| }, |
| { |
| "epoch": 20.75, |
| "learning_rate": 0.0018179375970196893, |
| "loss": 1.2582, |
| "step": 103200 |
| }, |
| { |
| "epoch": 20.77, |
| "learning_rate": 0.0018163738166771422, |
| "loss": 1.2581, |
| "step": 103300 |
| }, |
| { |
| "epoch": 20.79, |
| "learning_rate": 0.0018148089201374795, |
| "loss": 1.2628, |
| "step": 103400 |
| }, |
| { |
| "epoch": 20.81, |
| "learning_rate": 0.0018132429104847653, |
| "loss": 1.2598, |
| "step": 103500 |
| }, |
| { |
| "epoch": 20.83, |
| "learning_rate": 0.0018116757908052572, |
| "loss": 1.2618, |
| "step": 103600 |
| }, |
| { |
| "epoch": 20.85, |
| "learning_rate": 0.0018101075641874, |
| "loss": 1.266, |
| "step": 103700 |
| }, |
| { |
| "epoch": 20.87, |
| "learning_rate": 0.0018085382337218203, |
| "loss": 1.2608, |
| "step": 103800 |
| }, |
| { |
| "epoch": 20.89, |
| "learning_rate": 0.0018069678025013202, |
| "loss": 1.2583, |
| "step": 103900 |
| }, |
| { |
| "epoch": 20.91, |
| "learning_rate": 0.0018053962736208717, |
| "loss": 1.2578, |
| "step": 104000 |
| }, |
| { |
| "epoch": 20.93, |
| "learning_rate": 0.0018038236501776092, |
| "loss": 1.2621, |
| "step": 104100 |
| }, |
| { |
| "epoch": 20.95, |
| "learning_rate": 0.0018022499352708247, |
| "loss": 1.2574, |
| "step": 104200 |
| }, |
| { |
| "epoch": 20.97, |
| "learning_rate": 0.0018006751320019604, |
| "loss": 1.2589, |
| "step": 104300 |
| }, |
| { |
| "epoch": 20.99, |
| "learning_rate": 0.001799099243474605, |
| "loss": 1.2632, |
| "step": 104400 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.40797378629408937, |
| "eval_loss": 1.2228056192398071, |
| "eval_runtime": 19.5365, |
| "eval_samples_per_second": 4072.794, |
| "eval_steps_per_second": 15.919, |
| "step": 104438 |
| }, |
| { |
| "epoch": 21.01, |
| "learning_rate": 0.0017975222727944844, |
| "loss": 1.2558, |
| "step": 104500 |
| }, |
| { |
| "epoch": 21.03, |
| "learning_rate": 0.0017959442230694584, |
| "loss": 1.2493, |
| "step": 104600 |
| }, |
| { |
| "epoch": 21.05, |
| "learning_rate": 0.0017943650974095123, |
| "loss": 1.2564, |
| "step": 104700 |
| }, |
| { |
| "epoch": 21.07, |
| "learning_rate": 0.001792784898926753, |
| "loss": 1.2552, |
| "step": 104800 |
| }, |
| { |
| "epoch": 21.09, |
| "learning_rate": 0.0017912036307354006, |
| "loss": 1.2495, |
| "step": 104900 |
| }, |
| { |
| "epoch": 21.11, |
| "learning_rate": 0.0017896212959517843, |
| "loss": 1.2554, |
| "step": 105000 |
| }, |
| { |
| "epoch": 21.13, |
| "learning_rate": 0.0017880378976943344, |
| "loss": 1.2587, |
| "step": 105100 |
| }, |
| { |
| "epoch": 21.15, |
| "learning_rate": 0.0017864534390835776, |
| "loss": 1.2574, |
| "step": 105200 |
| }, |
| { |
| "epoch": 21.17, |
| "learning_rate": 0.00178486792324213, |
| "loss": 1.2543, |
| "step": 105300 |
| }, |
| { |
| "epoch": 21.19, |
| "learning_rate": 0.0017832813532946924, |
| "loss": 1.2566, |
| "step": 105400 |
| }, |
| { |
| "epoch": 21.21, |
| "learning_rate": 0.001781693732368041, |
| "loss": 1.2563, |
| "step": 105500 |
| }, |
| { |
| "epoch": 21.23, |
| "learning_rate": 0.001780105063591025, |
| "loss": 1.259, |
| "step": 105600 |
| }, |
| { |
| "epoch": 21.25, |
| "learning_rate": 0.0017785153500945576, |
| "loss": 1.2568, |
| "step": 105700 |
| }, |
| { |
| "epoch": 21.27, |
| "learning_rate": 0.001776924595011612, |
| "loss": 1.2594, |
| "step": 105800 |
| }, |
| { |
| "epoch": 21.29, |
| "learning_rate": 0.0017753328014772126, |
| "loss": 1.2547, |
| "step": 105900 |
| }, |
| { |
| "epoch": 21.31, |
| "learning_rate": 0.0017737399726284325, |
| "loss": 1.2585, |
| "step": 106000 |
| }, |
| { |
| "epoch": 21.33, |
| "learning_rate": 0.0017721461116043825, |
| "loss": 1.2554, |
| "step": 106100 |
| }, |
| { |
| "epoch": 21.35, |
| "learning_rate": 0.00177055122154621, |
| "loss": 1.2586, |
| "step": 106200 |
| }, |
| { |
| "epoch": 21.37, |
| "learning_rate": 0.001768955305597089, |
| "loss": 1.2551, |
| "step": 106300 |
| }, |
| { |
| "epoch": 21.39, |
| "learning_rate": 0.0017673583669022158, |
| "loss": 1.256, |
| "step": 106400 |
| }, |
| { |
| "epoch": 21.41, |
| "learning_rate": 0.0017657604086088023, |
| "loss": 1.2537, |
| "step": 106500 |
| }, |
| { |
| "epoch": 21.43, |
| "learning_rate": 0.0017641614338660694, |
| "loss": 1.2552, |
| "step": 106600 |
| }, |
| { |
| "epoch": 21.45, |
| "learning_rate": 0.0017625614458252417, |
| "loss": 1.2555, |
| "step": 106700 |
| }, |
| { |
| "epoch": 21.47, |
| "learning_rate": 0.0017609604476395407, |
| "loss": 1.2555, |
| "step": 106800 |
| }, |
| { |
| "epoch": 21.49, |
| "learning_rate": 0.0017593584424641785, |
| "loss": 1.253, |
| "step": 106900 |
| }, |
| { |
| "epoch": 21.52, |
| "learning_rate": 0.0017577714685050292, |
| "loss": 1.2582, |
| "step": 107000 |
| }, |
| { |
| "epoch": 21.54, |
| "learning_rate": 0.0017561674688150015, |
| "loss": 1.2545, |
| "step": 107100 |
| }, |
| { |
| "epoch": 21.56, |
| "learning_rate": 0.0017545624715812104, |
| "loss": 1.2584, |
| "step": 107200 |
| }, |
| { |
| "epoch": 21.58, |
| "learning_rate": 0.0017529564799667488, |
| "loss": 1.2594, |
| "step": 107300 |
| }, |
| { |
| "epoch": 21.6, |
| "learning_rate": 0.001751365571861091, |
| "loss": 1.2576, |
| "step": 107400 |
| }, |
| { |
| "epoch": 21.62, |
| "learning_rate": 0.0017497576108472055, |
| "loss": 1.256, |
| "step": 107500 |
| }, |
| { |
| "epoch": 21.64, |
| "learning_rate": 0.0017481486649219638, |
| "loss": 1.2531, |
| "step": 107600 |
| }, |
| { |
| "epoch": 21.66, |
| "learning_rate": 0.001746538737256242, |
| "loss": 1.255, |
| "step": 107700 |
| }, |
| { |
| "epoch": 21.68, |
| "learning_rate": 0.0017449278310228496, |
| "loss": 1.258, |
| "step": 107800 |
| }, |
| { |
| "epoch": 21.7, |
| "learning_rate": 0.0017433159493965259, |
| "loss": 1.2543, |
| "step": 107900 |
| }, |
| { |
| "epoch": 21.72, |
| "learning_rate": 0.0017417030955539316, |
| "loss": 1.2588, |
| "step": 108000 |
| }, |
| { |
| "epoch": 21.74, |
| "learning_rate": 0.0017400892726736443, |
| "loss": 1.2568, |
| "step": 108100 |
| }, |
| { |
| "epoch": 21.76, |
| "learning_rate": 0.0017384744839361499, |
| "loss": 1.254, |
| "step": 108200 |
| }, |
| { |
| "epoch": 21.78, |
| "learning_rate": 0.0017368587325238393, |
| "loss": 1.2562, |
| "step": 108300 |
| }, |
| { |
| "epoch": 21.8, |
| "learning_rate": 0.0017352420216209996, |
| "loss": 1.2515, |
| "step": 108400 |
| }, |
| { |
| "epoch": 21.82, |
| "learning_rate": 0.0017336243544138097, |
| "loss": 1.2532, |
| "step": 108500 |
| }, |
| { |
| "epoch": 21.84, |
| "learning_rate": 0.0017320057340903326, |
| "loss": 1.2579, |
| "step": 108600 |
| }, |
| { |
| "epoch": 21.86, |
| "learning_rate": 0.0017303861638405097, |
| "loss": 1.2541, |
| "step": 108700 |
| }, |
| { |
| "epoch": 21.88, |
| "learning_rate": 0.001728765646856154, |
| "loss": 1.2553, |
| "step": 108800 |
| }, |
| { |
| "epoch": 21.9, |
| "learning_rate": 0.0017271441863309462, |
| "loss": 1.2541, |
| "step": 108900 |
| }, |
| { |
| "epoch": 21.92, |
| "learning_rate": 0.0017255217854604242, |
| "loss": 1.2558, |
| "step": 109000 |
| }, |
| { |
| "epoch": 21.94, |
| "learning_rate": 0.0017238984474419804, |
| "loss": 1.2548, |
| "step": 109100 |
| }, |
| { |
| "epoch": 21.96, |
| "learning_rate": 0.0017222741754748536, |
| "loss": 1.2568, |
| "step": 109200 |
| }, |
| { |
| "epoch": 21.98, |
| "learning_rate": 0.0017206489727601237, |
| "loss": 1.2524, |
| "step": 109300 |
| }, |
| { |
| "epoch": 22.0, |
| "learning_rate": 0.0017190228425007044, |
| "loss": 1.2563, |
| "step": 109400 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.40835639033056986, |
| "eval_loss": 1.2162704467773438, |
| "eval_runtime": 19.8119, |
| "eval_samples_per_second": 4016.173, |
| "eval_steps_per_second": 15.698, |
| "step": 109411 |
| }, |
| { |
| "epoch": 22.02, |
| "learning_rate": 0.001717395787901338, |
| "loss": 1.2459, |
| "step": 109500 |
| }, |
| { |
| "epoch": 22.04, |
| "learning_rate": 0.0017157678121685874, |
| "loss": 1.2485, |
| "step": 109600 |
| }, |
| { |
| "epoch": 22.06, |
| "learning_rate": 0.0017141389185108325, |
| "loss": 1.25, |
| "step": 109700 |
| }, |
| { |
| "epoch": 22.08, |
| "learning_rate": 0.0017125091101382601, |
| "loss": 1.2473, |
| "step": 109800 |
| }, |
| { |
| "epoch": 22.1, |
| "learning_rate": 0.0017108783902628616, |
| "loss": 1.2489, |
| "step": 109900 |
| }, |
| { |
| "epoch": 22.12, |
| "learning_rate": 0.0017092467620984239, |
| "loss": 1.2498, |
| "step": 110000 |
| }, |
| { |
| "epoch": 22.14, |
| "learning_rate": 0.0017076142288605242, |
| "loss": 1.2485, |
| "step": 110100 |
| }, |
| { |
| "epoch": 22.16, |
| "learning_rate": 0.0017059807937665227, |
| "loss": 1.2542, |
| "step": 110200 |
| }, |
| { |
| "epoch": 22.18, |
| "learning_rate": 0.0017043464600355585, |
| "loss": 1.2515, |
| "step": 110300 |
| }, |
| { |
| "epoch": 22.2, |
| "learning_rate": 0.0017027112308885396, |
| "loss": 1.2534, |
| "step": 110400 |
| }, |
| { |
| "epoch": 22.22, |
| "learning_rate": 0.0017010751095481403, |
| "loss": 1.2504, |
| "step": 110500 |
| }, |
| { |
| "epoch": 22.24, |
| "learning_rate": 0.001699438099238793, |
| "loss": 1.25, |
| "step": 110600 |
| }, |
| { |
| "epoch": 22.26, |
| "learning_rate": 0.0016978002031866812, |
| "loss": 1.2516, |
| "step": 110700 |
| }, |
| { |
| "epoch": 22.28, |
| "learning_rate": 0.0016961614246197348, |
| "loss": 1.2541, |
| "step": 110800 |
| }, |
| { |
| "epoch": 22.3, |
| "learning_rate": 0.001694521766767623, |
| "loss": 1.2501, |
| "step": 110900 |
| }, |
| { |
| "epoch": 22.32, |
| "learning_rate": 0.001692881232861747, |
| "loss": 1.2477, |
| "step": 111000 |
| }, |
| { |
| "epoch": 22.34, |
| "learning_rate": 0.001691239826135236, |
| "loss": 1.2528, |
| "step": 111100 |
| }, |
| { |
| "epoch": 22.36, |
| "learning_rate": 0.0016895975498229378, |
| "loss": 1.2535, |
| "step": 111200 |
| }, |
| { |
| "epoch": 22.38, |
| "learning_rate": 0.0016879544071614144, |
| "loss": 1.2523, |
| "step": 111300 |
| }, |
| { |
| "epoch": 22.4, |
| "learning_rate": 0.0016863104013889359, |
| "loss": 1.2528, |
| "step": 111400 |
| }, |
| { |
| "epoch": 22.42, |
| "learning_rate": 0.001684681988647626, |
| "loss": 1.248, |
| "step": 111500 |
| }, |
| { |
| "epoch": 22.44, |
| "learning_rate": 0.0016830362749250844, |
| "loss": 1.2526, |
| "step": 111600 |
| }, |
| { |
| "epoch": 22.46, |
| "learning_rate": 0.0016813897077841348, |
| "loss": 1.2497, |
| "step": 111700 |
| }, |
| { |
| "epoch": 22.48, |
| "learning_rate": 0.0016797422904697957, |
| "loss": 1.2518, |
| "step": 111800 |
| }, |
| { |
| "epoch": 22.5, |
| "learning_rate": 0.001678094026228761, |
| "loss": 1.2476, |
| "step": 111900 |
| }, |
| { |
| "epoch": 22.52, |
| "learning_rate": 0.0016764614135541252, |
| "loss": 1.2516, |
| "step": 112000 |
| }, |
| { |
| "epoch": 22.54, |
| "learning_rate": 0.001674811473594641, |
| "loss": 1.2452, |
| "step": 112100 |
| }, |
| { |
| "epoch": 22.56, |
| "learning_rate": 0.0016731606964260073, |
| "loss": 1.2497, |
| "step": 112200 |
| }, |
| { |
| "epoch": 22.58, |
| "learning_rate": 0.0016715090853015398, |
| "loss": 1.2523, |
| "step": 112300 |
| }, |
| { |
| "epoch": 22.6, |
| "learning_rate": 0.0016698566434761963, |
| "loss": 1.2479, |
| "step": 112400 |
| }, |
| { |
| "epoch": 22.62, |
| "learning_rate": 0.0016682033742065746, |
| "loss": 1.2541, |
| "step": 112500 |
| }, |
| { |
| "epoch": 22.64, |
| "learning_rate": 0.0016665492807509006, |
| "loss": 1.2487, |
| "step": 112600 |
| }, |
| { |
| "epoch": 22.66, |
| "learning_rate": 0.0016648943663690257, |
| "loss": 1.2482, |
| "step": 112700 |
| }, |
| { |
| "epoch": 22.68, |
| "learning_rate": 0.0016632386343224186, |
| "loss": 1.2489, |
| "step": 112800 |
| }, |
| { |
| "epoch": 22.7, |
| "learning_rate": 0.0016615820878741606, |
| "loss": 1.2516, |
| "step": 112900 |
| }, |
| { |
| "epoch": 22.72, |
| "learning_rate": 0.0016599247302889358, |
| "loss": 1.2487, |
| "step": 113000 |
| }, |
| { |
| "epoch": 22.74, |
| "learning_rate": 0.0016582665648330298, |
| "loss": 1.2538, |
| "step": 113100 |
| }, |
| { |
| "epoch": 22.76, |
| "learning_rate": 0.0016566075947743175, |
| "loss": 1.2529, |
| "step": 113200 |
| }, |
| { |
| "epoch": 22.78, |
| "learning_rate": 0.0016549478233822618, |
| "loss": 1.2498, |
| "step": 113300 |
| }, |
| { |
| "epoch": 22.8, |
| "learning_rate": 0.0016532872539279028, |
| "loss": 1.2516, |
| "step": 113400 |
| }, |
| { |
| "epoch": 22.82, |
| "learning_rate": 0.0016516258896838553, |
| "loss": 1.2495, |
| "step": 113500 |
| }, |
| { |
| "epoch": 22.84, |
| "learning_rate": 0.0016499637339242989, |
| "loss": 1.2488, |
| "step": 113600 |
| }, |
| { |
| "epoch": 22.86, |
| "learning_rate": 0.0016483007899249745, |
| "loss": 1.2488, |
| "step": 113700 |
| }, |
| { |
| "epoch": 22.88, |
| "learning_rate": 0.0016466370609631749, |
| "loss": 1.2482, |
| "step": 113800 |
| }, |
| { |
| "epoch": 22.9, |
| "learning_rate": 0.0016449725503177412, |
| "loss": 1.2518, |
| "step": 113900 |
| }, |
| { |
| "epoch": 22.92, |
| "learning_rate": 0.0016433072612690542, |
| "loss": 1.2484, |
| "step": 114000 |
| }, |
| { |
| "epoch": 22.94, |
| "learning_rate": 0.0016416411970990297, |
| "loss": 1.2538, |
| "step": 114100 |
| }, |
| { |
| "epoch": 22.96, |
| "learning_rate": 0.0016399743610911097, |
| "loss": 1.2527, |
| "step": 114200 |
| }, |
| { |
| "epoch": 22.98, |
| "learning_rate": 0.0016383067565302588, |
| "loss": 1.2523, |
| "step": 114300 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.4088067653938865, |
| "eval_loss": 1.212856650352478, |
| "eval_runtime": 19.7647, |
| "eval_samples_per_second": 4025.761, |
| "eval_steps_per_second": 15.735, |
| "step": 114384 |
| }, |
| { |
| "epoch": 23.0, |
| "learning_rate": 0.001636638386702955, |
| "loss": 1.2473, |
| "step": 114400 |
| }, |
| { |
| "epoch": 23.02, |
| "learning_rate": 0.0016349692548971854, |
| "loss": 1.2428, |
| "step": 114500 |
| }, |
| { |
| "epoch": 23.04, |
| "learning_rate": 0.001633299364402438, |
| "loss": 1.2422, |
| "step": 114600 |
| }, |
| { |
| "epoch": 23.06, |
| "learning_rate": 0.0016316287185096973, |
| "loss": 1.2432, |
| "step": 114700 |
| }, |
| { |
| "epoch": 23.08, |
| "learning_rate": 0.0016299573205114343, |
| "loss": 1.2439, |
| "step": 114800 |
| }, |
| { |
| "epoch": 23.1, |
| "learning_rate": 0.001628301898865501, |
| "loss": 1.2479, |
| "step": 114900 |
| }, |
| { |
| "epoch": 23.12, |
| "learning_rate": 0.0016266290139783787, |
| "loss": 1.2494, |
| "step": 115000 |
| }, |
| { |
| "epoch": 23.14, |
| "learning_rate": 0.0016249553868390434, |
| "loss": 1.2441, |
| "step": 115100 |
| }, |
| { |
| "epoch": 23.16, |
| "learning_rate": 0.0016232810207458424, |
| "loss": 1.245, |
| "step": 115200 |
| }, |
| { |
| "epoch": 23.18, |
| "learning_rate": 0.0016216059189985796, |
| "loss": 1.2475, |
| "step": 115300 |
| }, |
| { |
| "epoch": 23.2, |
| "learning_rate": 0.0016199300848985091, |
| "loss": 1.2451, |
| "step": 115400 |
| }, |
| { |
| "epoch": 23.22, |
| "learning_rate": 0.0016182535217483282, |
| "loss": 1.247, |
| "step": 115500 |
| }, |
| { |
| "epoch": 23.24, |
| "learning_rate": 0.0016165762328521703, |
| "loss": 1.2434, |
| "step": 115600 |
| }, |
| { |
| "epoch": 23.26, |
| "learning_rate": 0.0016148982215156002, |
| "loss": 1.2469, |
| "step": 115700 |
| }, |
| { |
| "epoch": 23.28, |
| "learning_rate": 0.0016132194910456056, |
| "loss": 1.245, |
| "step": 115800 |
| }, |
| { |
| "epoch": 23.3, |
| "learning_rate": 0.0016115400447505918, |
| "loss": 1.2507, |
| "step": 115900 |
| }, |
| { |
| "epoch": 23.32, |
| "learning_rate": 0.0016098598859403746, |
| "loss": 1.2434, |
| "step": 116000 |
| }, |
| { |
| "epoch": 23.34, |
| "learning_rate": 0.0016081790179261746, |
| "loss": 1.2418, |
| "step": 116100 |
| }, |
| { |
| "epoch": 23.37, |
| "learning_rate": 0.0016064974440206093, |
| "loss": 1.2476, |
| "step": 116200 |
| }, |
| { |
| "epoch": 23.39, |
| "learning_rate": 0.0016048151675376878, |
| "loss": 1.2492, |
| "step": 116300 |
| }, |
| { |
| "epoch": 23.41, |
| "learning_rate": 0.001603132191792804, |
| "loss": 1.2383, |
| "step": 116400 |
| }, |
| { |
| "epoch": 23.43, |
| "learning_rate": 0.0016014485201027297, |
| "loss": 1.2473, |
| "step": 116500 |
| }, |
| { |
| "epoch": 23.45, |
| "learning_rate": 0.0015997641557856073, |
| "loss": 1.2489, |
| "step": 116600 |
| }, |
| { |
| "epoch": 23.47, |
| "learning_rate": 0.0015980791021609464, |
| "loss": 1.2463, |
| "step": 116700 |
| }, |
| { |
| "epoch": 23.49, |
| "learning_rate": 0.001596393362549613, |
| "loss": 1.2469, |
| "step": 116800 |
| }, |
| { |
| "epoch": 23.51, |
| "learning_rate": 0.0015947069402738262, |
| "loss": 1.2443, |
| "step": 116900 |
| }, |
| { |
| "epoch": 23.53, |
| "learning_rate": 0.0015930367130251378, |
| "loss": 1.2448, |
| "step": 117000 |
| }, |
| { |
| "epoch": 23.55, |
| "learning_rate": 0.0015913489421361716, |
| "loss": 1.2469, |
| "step": 117100 |
| }, |
| { |
| "epoch": 23.57, |
| "learning_rate": 0.0015896604985241852, |
| "loss": 1.2453, |
| "step": 117200 |
| }, |
| { |
| "epoch": 23.59, |
| "learning_rate": 0.0015879713855167263, |
| "loss": 1.2467, |
| "step": 117300 |
| }, |
| { |
| "epoch": 23.61, |
| "learning_rate": 0.0015862816064426619, |
| "loss": 1.2437, |
| "step": 117400 |
| }, |
| { |
| "epoch": 23.63, |
| "learning_rate": 0.0015845911646321712, |
| "loss": 1.2453, |
| "step": 117500 |
| }, |
| { |
| "epoch": 23.65, |
| "learning_rate": 0.00158290006341674, |
| "loss": 1.2459, |
| "step": 117600 |
| }, |
| { |
| "epoch": 23.67, |
| "learning_rate": 0.0015812083061291539, |
| "loss": 1.2456, |
| "step": 117700 |
| }, |
| { |
| "epoch": 23.69, |
| "learning_rate": 0.0015795158961034905, |
| "loss": 1.2425, |
| "step": 117800 |
| }, |
| { |
| "epoch": 23.71, |
| "learning_rate": 0.0015778228366751152, |
| "loss": 1.2451, |
| "step": 117900 |
| }, |
| { |
| "epoch": 23.73, |
| "learning_rate": 0.001576129131180672, |
| "loss": 1.246, |
| "step": 118000 |
| }, |
| { |
| "epoch": 23.75, |
| "learning_rate": 0.001574434782958078, |
| "loss": 1.2433, |
| "step": 118100 |
| }, |
| { |
| "epoch": 23.77, |
| "learning_rate": 0.001572739795346519, |
| "loss": 1.2463, |
| "step": 118200 |
| }, |
| { |
| "epoch": 23.79, |
| "learning_rate": 0.001571044171686438, |
| "loss": 1.2468, |
| "step": 118300 |
| }, |
| { |
| "epoch": 23.81, |
| "learning_rate": 0.001569347915319534, |
| "loss": 1.2445, |
| "step": 118400 |
| }, |
| { |
| "epoch": 23.83, |
| "learning_rate": 0.0015676510295887514, |
| "loss": 1.246, |
| "step": 118500 |
| }, |
| { |
| "epoch": 23.85, |
| "learning_rate": 0.0015659535178382759, |
| "loss": 1.2459, |
| "step": 118600 |
| }, |
| { |
| "epoch": 23.87, |
| "learning_rate": 0.0015642553834135256, |
| "loss": 1.2422, |
| "step": 118700 |
| }, |
| { |
| "epoch": 23.89, |
| "learning_rate": 0.0015625566296611475, |
| "loss": 1.2426, |
| "step": 118800 |
| }, |
| { |
| "epoch": 23.91, |
| "learning_rate": 0.0015608572599290084, |
| "loss": 1.2443, |
| "step": 118900 |
| }, |
| { |
| "epoch": 23.93, |
| "learning_rate": 0.001559157277566188, |
| "loss": 1.245, |
| "step": 119000 |
| }, |
| { |
| "epoch": 23.95, |
| "learning_rate": 0.001557456685922975, |
| "loss": 1.2447, |
| "step": 119100 |
| }, |
| { |
| "epoch": 23.97, |
| "learning_rate": 0.0015557554883508585, |
| "loss": 1.245, |
| "step": 119200 |
| }, |
| { |
| "epoch": 23.99, |
| "learning_rate": 0.0015540536882025203, |
| "loss": 1.2394, |
| "step": 119300 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.408813924305172, |
| "eval_loss": 1.208786129951477, |
| "eval_runtime": 19.9606, |
| "eval_samples_per_second": 3986.246, |
| "eval_steps_per_second": 15.581, |
| "step": 119358 |
| }, |
| { |
| "epoch": 24.01, |
| "learning_rate": 0.001552351288831832, |
| "loss": 1.239, |
| "step": 119400 |
| }, |
| { |
| "epoch": 24.03, |
| "learning_rate": 0.0015506482935938443, |
| "loss": 1.2356, |
| "step": 119500 |
| }, |
| { |
| "epoch": 24.05, |
| "learning_rate": 0.0015489447058447835, |
| "loss": 1.2393, |
| "step": 119600 |
| }, |
| { |
| "epoch": 24.07, |
| "learning_rate": 0.0015472405289420421, |
| "loss": 1.2373, |
| "step": 119700 |
| }, |
| { |
| "epoch": 24.09, |
| "learning_rate": 0.0015455357662441758, |
| "loss": 1.2384, |
| "step": 119800 |
| }, |
| { |
| "epoch": 24.11, |
| "learning_rate": 0.0015438304211108924, |
| "loss": 1.2399, |
| "step": 119900 |
| }, |
| { |
| "epoch": 24.13, |
| "learning_rate": 0.00154212449690305, |
| "loss": 1.238, |
| "step": 120000 |
| }, |
| { |
| "epoch": 24.15, |
| "learning_rate": 0.0015404179969826454, |
| "loss": 1.2407, |
| "step": 120100 |
| }, |
| { |
| "epoch": 24.17, |
| "learning_rate": 0.0015387109247128126, |
| "loss": 1.2427, |
| "step": 120200 |
| }, |
| { |
| "epoch": 24.19, |
| "learning_rate": 0.001537003283457811, |
| "loss": 1.2432, |
| "step": 120300 |
| }, |
| { |
| "epoch": 24.21, |
| "learning_rate": 0.0015352950765830234, |
| "loss": 1.2396, |
| "step": 120400 |
| }, |
| { |
| "epoch": 24.23, |
| "learning_rate": 0.001533586307454946, |
| "loss": 1.2404, |
| "step": 120500 |
| }, |
| { |
| "epoch": 24.25, |
| "learning_rate": 0.0015318769794411841, |
| "loss": 1.2383, |
| "step": 120600 |
| }, |
| { |
| "epoch": 24.27, |
| "learning_rate": 0.0015301670959104435, |
| "loss": 1.2408, |
| "step": 120700 |
| }, |
| { |
| "epoch": 24.29, |
| "learning_rate": 0.0015284566602325254, |
| "loss": 1.2389, |
| "step": 120800 |
| }, |
| { |
| "epoch": 24.31, |
| "learning_rate": 0.0015267456757783189, |
| "loss": 1.2397, |
| "step": 120900 |
| }, |
| { |
| "epoch": 24.33, |
| "learning_rate": 0.0015250341459197947, |
| "loss": 1.2415, |
| "step": 121000 |
| }, |
| { |
| "epoch": 24.35, |
| "learning_rate": 0.0015233563207578788, |
| "loss": 1.2356, |
| "step": 121100 |
| }, |
| { |
| "epoch": 24.37, |
| "learning_rate": 0.0015216437209509943, |
| "loss": 1.2364, |
| "step": 121200 |
| }, |
| { |
| "epoch": 24.39, |
| "learning_rate": 0.001519930585794613, |
| "loss": 1.2443, |
| "step": 121300 |
| }, |
| { |
| "epoch": 24.41, |
| "learning_rate": 0.0015182169186649438, |
| "loss": 1.2386, |
| "step": 121400 |
| }, |
| { |
| "epoch": 24.43, |
| "learning_rate": 0.001516502722939245, |
| "loss": 1.2404, |
| "step": 121500 |
| }, |
| { |
| "epoch": 24.45, |
| "learning_rate": 0.0015147880019958154, |
| "loss": 1.2416, |
| "step": 121600 |
| }, |
| { |
| "epoch": 24.47, |
| "learning_rate": 0.0015130727592139904, |
| "loss": 1.2418, |
| "step": 121700 |
| }, |
| { |
| "epoch": 24.49, |
| "learning_rate": 0.0015113741581417957, |
| "loss": 1.2432, |
| "step": 121800 |
| }, |
| { |
| "epoch": 24.51, |
| "learning_rate": 0.001509657886959314, |
| "loss": 1.2408, |
| "step": 121900 |
| }, |
| { |
| "epoch": 24.53, |
| "learning_rate": 0.0015079411040487545, |
| "loss": 1.242, |
| "step": 122000 |
| }, |
| { |
| "epoch": 24.55, |
| "learning_rate": 0.0015062238127935158, |
| "loss": 1.2398, |
| "step": 122100 |
| }, |
| { |
| "epoch": 24.57, |
| "learning_rate": 0.0015045060165779975, |
| "loss": 1.2379, |
| "step": 122200 |
| }, |
| { |
| "epoch": 24.59, |
| "learning_rate": 0.001502787718787595, |
| "loss": 1.2364, |
| "step": 122300 |
| }, |
| { |
| "epoch": 24.61, |
| "learning_rate": 0.0015010689228086916, |
| "loss": 1.2375, |
| "step": 122400 |
| }, |
| { |
| "epoch": 24.63, |
| "learning_rate": 0.0014993496320286532, |
| "loss": 1.2353, |
| "step": 122500 |
| }, |
| { |
| "epoch": 24.65, |
| "learning_rate": 0.00149762984983582, |
| "loss": 1.2391, |
| "step": 122600 |
| }, |
| { |
| "epoch": 24.67, |
| "learning_rate": 0.0014959095796195018, |
| "loss": 1.2424, |
| "step": 122700 |
| }, |
| { |
| "epoch": 24.69, |
| "learning_rate": 0.0014941888247699687, |
| "loss": 1.2423, |
| "step": 122800 |
| }, |
| { |
| "epoch": 24.71, |
| "learning_rate": 0.0014924675886784473, |
| "loss": 1.2402, |
| "step": 122900 |
| }, |
| { |
| "epoch": 24.73, |
| "learning_rate": 0.001490745874737111, |
| "loss": 1.2396, |
| "step": 123000 |
| }, |
| { |
| "epoch": 24.75, |
| "learning_rate": 0.001489023686339077, |
| "loss": 1.2386, |
| "step": 123100 |
| }, |
| { |
| "epoch": 24.77, |
| "learning_rate": 0.001487301026878396, |
| "loss": 1.2402, |
| "step": 123200 |
| }, |
| { |
| "epoch": 24.79, |
| "learning_rate": 0.001485577899750048, |
| "loss": 1.2379, |
| "step": 123300 |
| }, |
| { |
| "epoch": 24.81, |
| "learning_rate": 0.0014838543083499334, |
| "loss": 1.2432, |
| "step": 123400 |
| }, |
| { |
| "epoch": 24.83, |
| "learning_rate": 0.001482130256074869, |
| "loss": 1.24, |
| "step": 123500 |
| }, |
| { |
| "epoch": 24.85, |
| "learning_rate": 0.001480405746322579, |
| "loss": 1.2389, |
| "step": 123600 |
| }, |
| { |
| "epoch": 24.87, |
| "learning_rate": 0.0014786807824916897, |
| "loss": 1.2393, |
| "step": 123700 |
| }, |
| { |
| "epoch": 24.89, |
| "learning_rate": 0.0014769553679817215, |
| "loss": 1.237, |
| "step": 123800 |
| }, |
| { |
| "epoch": 24.91, |
| "learning_rate": 0.0014752295061930846, |
| "loss": 1.2358, |
| "step": 123900 |
| }, |
| { |
| "epoch": 24.93, |
| "learning_rate": 0.0014735032005270684, |
| "loss": 1.2392, |
| "step": 124000 |
| }, |
| { |
| "epoch": 24.95, |
| "learning_rate": 0.0014717764543858392, |
| "loss": 1.2389, |
| "step": 124100 |
| }, |
| { |
| "epoch": 24.97, |
| "learning_rate": 0.00147004927117243, |
| "loss": 1.2392, |
| "step": 124200 |
| }, |
| { |
| "epoch": 24.99, |
| "learning_rate": 0.001468321654290736, |
| "loss": 1.2387, |
| "step": 124300 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.40966806196499067, |
| "eval_loss": 1.2022136449813843, |
| "eval_runtime": 19.7693, |
| "eval_samples_per_second": 4024.827, |
| "eval_steps_per_second": 15.731, |
| "step": 124331 |
| }, |
| { |
| "epoch": 25.01, |
| "learning_rate": 0.0014665936071455062, |
| "loss": 1.2317, |
| "step": 124400 |
| }, |
| { |
| "epoch": 25.03, |
| "learning_rate": 0.0014648651331423384, |
| "loss": 1.23, |
| "step": 124500 |
| }, |
| { |
| "epoch": 25.05, |
| "learning_rate": 0.0014631362356876715, |
| "loss": 1.2321, |
| "step": 124600 |
| }, |
| { |
| "epoch": 25.07, |
| "learning_rate": 0.0014614069181887784, |
| "loss": 1.2331, |
| "step": 124700 |
| }, |
| { |
| "epoch": 25.09, |
| "learning_rate": 0.0014596771840537605, |
| "loss": 1.2315, |
| "step": 124800 |
| }, |
| { |
| "epoch": 25.11, |
| "learning_rate": 0.0014579470366915396, |
| "loss": 1.2357, |
| "step": 124900 |
| }, |
| { |
| "epoch": 25.13, |
| "learning_rate": 0.0014562164795118523, |
| "loss": 1.2365, |
| "step": 125000 |
| }, |
| { |
| "epoch": 25.15, |
| "learning_rate": 0.0014544855159252432, |
| "loss": 1.2316, |
| "step": 125100 |
| }, |
| { |
| "epoch": 25.17, |
| "learning_rate": 0.001452754149343057, |
| "loss": 1.2337, |
| "step": 125200 |
| }, |
| { |
| "epoch": 25.19, |
| "learning_rate": 0.001451022383177433, |
| "loss": 1.235, |
| "step": 125300 |
| }, |
| { |
| "epoch": 25.21, |
| "learning_rate": 0.0014492902208412984, |
| "loss": 1.2352, |
| "step": 125400 |
| }, |
| { |
| "epoch": 25.24, |
| "learning_rate": 0.0014475576657483604, |
| "loss": 1.231, |
| "step": 125500 |
| }, |
| { |
| "epoch": 25.26, |
| "learning_rate": 0.001445824721313101, |
| "loss": 1.2361, |
| "step": 125600 |
| }, |
| { |
| "epoch": 25.28, |
| "learning_rate": 0.0014441087261535155, |
| "loss": 1.2323, |
| "step": 125700 |
| }, |
| { |
| "epoch": 25.3, |
| "learning_rate": 0.0014423750170883193, |
| "loss": 1.2378, |
| "step": 125800 |
| }, |
| { |
| "epoch": 25.32, |
| "learning_rate": 0.001440640928894652, |
| "loss": 1.2323, |
| "step": 125900 |
| }, |
| { |
| "epoch": 25.34, |
| "learning_rate": 0.0014389064649900165, |
| "loss": 1.2387, |
| "step": 126000 |
| }, |
| { |
| "epoch": 25.36, |
| "learning_rate": 0.001437171628792657, |
| "loss": 1.2401, |
| "step": 126100 |
| }, |
| { |
| "epoch": 25.38, |
| "learning_rate": 0.0014354364237215494, |
| "loss": 1.2334, |
| "step": 126200 |
| }, |
| { |
| "epoch": 25.4, |
| "learning_rate": 0.0014337008531963994, |
| "loss": 1.2348, |
| "step": 126300 |
| }, |
| { |
| "epoch": 25.42, |
| "learning_rate": 0.0014319649206376301, |
| "loss": 1.236, |
| "step": 126400 |
| }, |
| { |
| "epoch": 25.44, |
| "learning_rate": 0.00143022862946638, |
| "loss": 1.2343, |
| "step": 126500 |
| }, |
| { |
| "epoch": 25.46, |
| "learning_rate": 0.0014284919831044935, |
| "loss": 1.2348, |
| "step": 126600 |
| }, |
| { |
| "epoch": 25.48, |
| "learning_rate": 0.0014267549849745155, |
| "loss": 1.2337, |
| "step": 126700 |
| }, |
| { |
| "epoch": 25.5, |
| "learning_rate": 0.0014250176384996832, |
| "loss": 1.2328, |
| "step": 126800 |
| }, |
| { |
| "epoch": 25.52, |
| "learning_rate": 0.0014232799471039221, |
| "loss": 1.2345, |
| "step": 126900 |
| }, |
| { |
| "epoch": 25.54, |
| "learning_rate": 0.0014215419142118353, |
| "loss": 1.2391, |
| "step": 127000 |
| }, |
| { |
| "epoch": 25.56, |
| "learning_rate": 0.0014198035432487004, |
| "loss": 1.2323, |
| "step": 127100 |
| }, |
| { |
| "epoch": 25.58, |
| "learning_rate": 0.0014180648376404608, |
| "loss": 1.2352, |
| "step": 127200 |
| }, |
| { |
| "epoch": 25.6, |
| "learning_rate": 0.0014163258008137198, |
| "loss": 1.2319, |
| "step": 127300 |
| }, |
| { |
| "epoch": 25.62, |
| "learning_rate": 0.0014145864361957325, |
| "loss": 1.2329, |
| "step": 127400 |
| }, |
| { |
| "epoch": 25.64, |
| "learning_rate": 0.001412846747214401, |
| "loss": 1.2351, |
| "step": 127500 |
| }, |
| { |
| "epoch": 25.66, |
| "learning_rate": 0.001411106737298266, |
| "loss": 1.2341, |
| "step": 127600 |
| }, |
| { |
| "epoch": 25.68, |
| "learning_rate": 0.001409366409876501, |
| "loss": 1.2328, |
| "step": 127700 |
| }, |
| { |
| "epoch": 25.7, |
| "learning_rate": 0.001407625768378905, |
| "loss": 1.235, |
| "step": 127800 |
| }, |
| { |
| "epoch": 25.72, |
| "learning_rate": 0.0014058848162358966, |
| "loss": 1.2304, |
| "step": 127900 |
| }, |
| { |
| "epoch": 25.74, |
| "learning_rate": 0.001404160970981524, |
| "loss": 1.2324, |
| "step": 128000 |
| }, |
| { |
| "epoch": 25.76, |
| "learning_rate": 0.0014024194108622253, |
| "loss": 1.2356, |
| "step": 128100 |
| }, |
| { |
| "epoch": 25.78, |
| "learning_rate": 0.0014006775503580902, |
| "loss": 1.2296, |
| "step": 128200 |
| }, |
| { |
| "epoch": 25.8, |
| "learning_rate": 0.0013989353929019378, |
| "loss": 1.2354, |
| "step": 128300 |
| }, |
| { |
| "epoch": 25.82, |
| "learning_rate": 0.0013971929419271745, |
| "loss": 1.2314, |
| "step": 128400 |
| }, |
| { |
| "epoch": 25.84, |
| "learning_rate": 0.0013954502008677843, |
| "loss": 1.2336, |
| "step": 128500 |
| }, |
| { |
| "epoch": 25.86, |
| "learning_rate": 0.0013937071731583237, |
| "loss": 1.2356, |
| "step": 128600 |
| }, |
| { |
| "epoch": 25.88, |
| "learning_rate": 0.001391963862233913, |
| "loss": 1.2314, |
| "step": 128700 |
| }, |
| { |
| "epoch": 25.9, |
| "learning_rate": 0.0013902202715302314, |
| "loss": 1.2336, |
| "step": 128800 |
| }, |
| { |
| "epoch": 25.92, |
| "learning_rate": 0.0013884764044835088, |
| "loss": 1.234, |
| "step": 128900 |
| }, |
| { |
| "epoch": 25.94, |
| "learning_rate": 0.001386732264530521, |
| "loss": 1.2355, |
| "step": 129000 |
| }, |
| { |
| "epoch": 25.96, |
| "learning_rate": 0.0013849878551085804, |
| "loss": 1.2334, |
| "step": 129100 |
| }, |
| { |
| "epoch": 25.98, |
| "learning_rate": 0.0013832431796555308, |
| "loss": 1.2339, |
| "step": 129200 |
| }, |
| { |
| "epoch": 26.0, |
| "learning_rate": 0.0013814982416097406, |
| "loss": 1.234, |
| "step": 129300 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.4099652363267976, |
| "eval_loss": 1.1980103254318237, |
| "eval_runtime": 19.8186, |
| "eval_samples_per_second": 4014.806, |
| "eval_steps_per_second": 15.692, |
| "step": 129304 |
| }, |
| { |
| "epoch": 26.02, |
| "learning_rate": 0.0013797530444100952, |
| "loss": 1.2258, |
| "step": 129400 |
| }, |
| { |
| "epoch": 26.04, |
| "learning_rate": 0.0013780075914959912, |
| "loss": 1.2242, |
| "step": 129500 |
| }, |
| { |
| "epoch": 26.06, |
| "learning_rate": 0.001376261886307329, |
| "loss": 1.2273, |
| "step": 129600 |
| }, |
| { |
| "epoch": 26.08, |
| "learning_rate": 0.001374515932284506, |
| "loss": 1.2251, |
| "step": 129700 |
| }, |
| { |
| "epoch": 26.1, |
| "learning_rate": 0.001372787196065969, |
| "loss": 1.23, |
| "step": 129800 |
| }, |
| { |
| "epoch": 26.12, |
| "learning_rate": 0.0013710407571004548, |
| "loss": 1.2325, |
| "step": 129900 |
| }, |
| { |
| "epoch": 26.14, |
| "learning_rate": 0.0013692940795904673, |
| "loss": 1.2274, |
| "step": 130000 |
| }, |
| { |
| "epoch": 26.16, |
| "learning_rate": 0.0013675471669783198, |
| "loss": 1.2278, |
| "step": 130100 |
| }, |
| { |
| "epoch": 26.18, |
| "learning_rate": 0.0013658000227067901, |
| "loss": 1.2245, |
| "step": 130200 |
| }, |
| { |
| "epoch": 26.2, |
| "learning_rate": 0.0013640526502191111, |
| "loss": 1.2266, |
| "step": 130300 |
| }, |
| { |
| "epoch": 26.22, |
| "learning_rate": 0.0013623050529589667, |
| "loss": 1.2315, |
| "step": 130400 |
| }, |
| { |
| "epoch": 26.24, |
| "learning_rate": 0.001360557234370483, |
| "loss": 1.2302, |
| "step": 130500 |
| }, |
| { |
| "epoch": 26.26, |
| "learning_rate": 0.001358809197898223, |
| "loss": 1.2324, |
| "step": 130600 |
| }, |
| { |
| "epoch": 26.28, |
| "learning_rate": 0.0013570609469871781, |
| "loss": 1.2305, |
| "step": 130700 |
| }, |
| { |
| "epoch": 26.3, |
| "learning_rate": 0.001355312485082764, |
| "loss": 1.2294, |
| "step": 130800 |
| }, |
| { |
| "epoch": 26.32, |
| "learning_rate": 0.0013535638156308098, |
| "loss": 1.2278, |
| "step": 130900 |
| }, |
| { |
| "epoch": 26.34, |
| "learning_rate": 0.0013518149420775557, |
| "loss": 1.224, |
| "step": 131000 |
| }, |
| { |
| "epoch": 26.36, |
| "learning_rate": 0.0013500658678696432, |
| "loss": 1.2296, |
| "step": 131100 |
| }, |
| { |
| "epoch": 26.38, |
| "learning_rate": 0.00134831659645411, |
| "loss": 1.228, |
| "step": 131200 |
| }, |
| { |
| "epoch": 26.4, |
| "learning_rate": 0.0013465671312783809, |
| "loss": 1.2245, |
| "step": 131300 |
| }, |
| { |
| "epoch": 26.42, |
| "learning_rate": 0.0013448174757902645, |
| "loss": 1.2305, |
| "step": 131400 |
| }, |
| { |
| "epoch": 26.44, |
| "learning_rate": 0.0013430676334379426, |
| "loss": 1.2305, |
| "step": 131500 |
| }, |
| { |
| "epoch": 26.46, |
| "learning_rate": 0.0013413176076699674, |
| "loss": 1.2294, |
| "step": 131600 |
| }, |
| { |
| "epoch": 26.48, |
| "learning_rate": 0.0013395674019352503, |
| "loss": 1.2312, |
| "step": 131700 |
| }, |
| { |
| "epoch": 26.5, |
| "learning_rate": 0.0013378170196830588, |
| "loss": 1.2278, |
| "step": 131800 |
| }, |
| { |
| "epoch": 26.52, |
| "learning_rate": 0.001336066464363008, |
| "loss": 1.2289, |
| "step": 131900 |
| }, |
| { |
| "epoch": 26.54, |
| "learning_rate": 0.0013343157394250537, |
| "loss": 1.2242, |
| "step": 132000 |
| }, |
| { |
| "epoch": 26.56, |
| "learning_rate": 0.0013325648483194865, |
| "loss": 1.2275, |
| "step": 132100 |
| }, |
| { |
| "epoch": 26.58, |
| "learning_rate": 0.0013308137944969248, |
| "loss": 1.2262, |
| "step": 132200 |
| }, |
| { |
| "epoch": 26.6, |
| "learning_rate": 0.001329062581408306, |
| "loss": 1.2289, |
| "step": 132300 |
| }, |
| { |
| "epoch": 26.62, |
| "learning_rate": 0.0013273112125048833, |
| "loss": 1.2341, |
| "step": 132400 |
| }, |
| { |
| "epoch": 26.64, |
| "learning_rate": 0.0013255596912382156, |
| "loss": 1.2288, |
| "step": 132500 |
| }, |
| { |
| "epoch": 26.66, |
| "learning_rate": 0.0013238080210601635, |
| "loss": 1.2267, |
| "step": 132600 |
| }, |
| { |
| "epoch": 26.68, |
| "learning_rate": 0.0013220562054228793, |
| "loss": 1.2267, |
| "step": 132700 |
| }, |
| { |
| "epoch": 26.7, |
| "learning_rate": 0.0013203042477788038, |
| "loss": 1.2287, |
| "step": 132800 |
| }, |
| { |
| "epoch": 26.72, |
| "learning_rate": 0.001318552151580656, |
| "loss": 1.226, |
| "step": 132900 |
| }, |
| { |
| "epoch": 26.74, |
| "learning_rate": 0.001316799920281429, |
| "loss": 1.2294, |
| "step": 133000 |
| }, |
| { |
| "epoch": 26.76, |
| "learning_rate": 0.0013150475573343817, |
| "loss": 1.2272, |
| "step": 133100 |
| }, |
| { |
| "epoch": 26.78, |
| "learning_rate": 0.0013132950661930332, |
| "loss": 1.23, |
| "step": 133200 |
| }, |
| { |
| "epoch": 26.8, |
| "learning_rate": 0.001311542450311154, |
| "loss": 1.2282, |
| "step": 133300 |
| }, |
| { |
| "epoch": 26.82, |
| "learning_rate": 0.0013097897131427616, |
| "loss": 1.2297, |
| "step": 133400 |
| }, |
| { |
| "epoch": 26.84, |
| "learning_rate": 0.0013080368581421117, |
| "loss": 1.2269, |
| "step": 133500 |
| }, |
| { |
| "epoch": 26.86, |
| "learning_rate": 0.0013062838887636927, |
| "loss": 1.2283, |
| "step": 133600 |
| }, |
| { |
| "epoch": 26.88, |
| "learning_rate": 0.001304530808462218, |
| "loss": 1.2292, |
| "step": 133700 |
| }, |
| { |
| "epoch": 26.9, |
| "learning_rate": 0.0013027776206926205, |
| "loss": 1.2306, |
| "step": 133800 |
| }, |
| { |
| "epoch": 26.92, |
| "learning_rate": 0.0013010243289100437, |
| "loss": 1.2264, |
| "step": 133900 |
| }, |
| { |
| "epoch": 26.94, |
| "learning_rate": 0.0012992709365698368, |
| "loss": 1.2249, |
| "step": 134000 |
| }, |
| { |
| "epoch": 26.96, |
| "learning_rate": 0.001297517447127547, |
| "loss": 1.2279, |
| "step": 134100 |
| }, |
| { |
| "epoch": 26.98, |
| "learning_rate": 0.001295763864038913, |
| "loss": 1.2272, |
| "step": 134200 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.41072249005388595, |
| "eval_loss": 1.1898874044418335, |
| "eval_runtime": 19.6468, |
| "eval_samples_per_second": 4049.93, |
| "eval_steps_per_second": 15.83, |
| "step": 134277 |
| }, |
| { |
| "epoch": 27.0, |
| "learning_rate": 0.0012940101907598575, |
| "loss": 1.2222, |
| "step": 134300 |
| }, |
| { |
| "epoch": 27.02, |
| "learning_rate": 0.0012922564307464824, |
| "loss": 1.2168, |
| "step": 134400 |
| }, |
| { |
| "epoch": 27.04, |
| "learning_rate": 0.0012905025874550586, |
| "loss": 1.2195, |
| "step": 134500 |
| }, |
| { |
| "epoch": 27.06, |
| "learning_rate": 0.0012887486643420223, |
| "loss": 1.2188, |
| "step": 134600 |
| }, |
| { |
| "epoch": 27.08, |
| "learning_rate": 0.001286994664863967, |
| "loss": 1.2162, |
| "step": 134700 |
| }, |
| { |
| "epoch": 27.11, |
| "learning_rate": 0.0012852405924776362, |
| "loss": 1.2236, |
| "step": 134800 |
| }, |
| { |
| "epoch": 27.13, |
| "learning_rate": 0.0012834864506399174, |
| "loss": 1.2245, |
| "step": 134900 |
| }, |
| { |
| "epoch": 27.15, |
| "learning_rate": 0.001281732242807835, |
| "loss": 1.2193, |
| "step": 135000 |
| }, |
| { |
| "epoch": 27.17, |
| "learning_rate": 0.0012799779724385432, |
| "loss": 1.2235, |
| "step": 135100 |
| }, |
| { |
| "epoch": 27.19, |
| "learning_rate": 0.0012782236429893202, |
| "loss": 1.2252, |
| "step": 135200 |
| }, |
| { |
| "epoch": 27.21, |
| "learning_rate": 0.0012764692579175594, |
| "loss": 1.2253, |
| "step": 135300 |
| }, |
| { |
| "epoch": 27.23, |
| "learning_rate": 0.0012747148206807646, |
| "loss": 1.2226, |
| "step": 135400 |
| }, |
| { |
| "epoch": 27.25, |
| "learning_rate": 0.0012729603347365424, |
| "loss": 1.2217, |
| "step": 135500 |
| }, |
| { |
| "epoch": 27.27, |
| "learning_rate": 0.0012712058035425956, |
| "loss": 1.2222, |
| "step": 135600 |
| }, |
| { |
| "epoch": 27.29, |
| "learning_rate": 0.0012694512305567152, |
| "loss": 1.2209, |
| "step": 135700 |
| }, |
| { |
| "epoch": 27.31, |
| "learning_rate": 0.0012677141655283745, |
| "loss": 1.2256, |
| "step": 135800 |
| }, |
| { |
| "epoch": 27.33, |
| "learning_rate": 0.0012659595196639695, |
| "loss": 1.2215, |
| "step": 135900 |
| }, |
| { |
| "epoch": 27.35, |
| "learning_rate": 0.0012642048423468924, |
| "loss": 1.2238, |
| "step": 136000 |
| }, |
| { |
| "epoch": 27.37, |
| "learning_rate": 0.0012624501370352233, |
| "loss": 1.222, |
| "step": 136100 |
| }, |
| { |
| "epoch": 27.39, |
| "learning_rate": 0.0012606954071870964, |
| "loss": 1.2196, |
| "step": 136200 |
| }, |
| { |
| "epoch": 27.41, |
| "learning_rate": 0.0012589406562606954, |
| "loss": 1.2271, |
| "step": 136300 |
| }, |
| { |
| "epoch": 27.43, |
| "learning_rate": 0.0012571858877142449, |
| "loss": 1.2228, |
| "step": 136400 |
| }, |
| { |
| "epoch": 27.45, |
| "learning_rate": 0.001255431105006004, |
| "loss": 1.2225, |
| "step": 136500 |
| }, |
| { |
| "epoch": 27.47, |
| "learning_rate": 0.0012536763115942604, |
| "loss": 1.2253, |
| "step": 136600 |
| }, |
| { |
| "epoch": 27.49, |
| "learning_rate": 0.0012519215109373229, |
| "loss": 1.222, |
| "step": 136700 |
| }, |
| { |
| "epoch": 27.51, |
| "learning_rate": 0.001250166706493513, |
| "loss": 1.2234, |
| "step": 136800 |
| }, |
| { |
| "epoch": 27.53, |
| "learning_rate": 0.001248411901721162, |
| "loss": 1.2235, |
| "step": 136900 |
| }, |
| { |
| "epoch": 27.55, |
| "learning_rate": 0.0012466571000786, |
| "loss": 1.2218, |
| "step": 137000 |
| }, |
| { |
| "epoch": 27.57, |
| "learning_rate": 0.001244902305024152, |
| "loss": 1.2255, |
| "step": 137100 |
| }, |
| { |
| "epoch": 27.59, |
| "learning_rate": 0.0012431475200161302, |
| "loss": 1.2253, |
| "step": 137200 |
| }, |
| { |
| "epoch": 27.61, |
| "learning_rate": 0.0012413927485128253, |
| "loss": 1.2223, |
| "step": 137300 |
| }, |
| { |
| "epoch": 27.63, |
| "learning_rate": 0.001239637993972503, |
| "loss": 1.2251, |
| "step": 137400 |
| }, |
| { |
| "epoch": 27.65, |
| "learning_rate": 0.0012378832598533957, |
| "loss": 1.2225, |
| "step": 137500 |
| }, |
| { |
| "epoch": 27.67, |
| "learning_rate": 0.0012361285496136948, |
| "loss": 1.2206, |
| "step": 137600 |
| }, |
| { |
| "epoch": 27.69, |
| "learning_rate": 0.001234373866711544, |
| "loss": 1.2238, |
| "step": 137700 |
| }, |
| { |
| "epoch": 27.71, |
| "learning_rate": 0.0012326192146050346, |
| "loss": 1.2259, |
| "step": 137800 |
| }, |
| { |
| "epoch": 27.73, |
| "learning_rate": 0.0012308645967521966, |
| "loss": 1.2215, |
| "step": 137900 |
| }, |
| { |
| "epoch": 27.75, |
| "learning_rate": 0.0012291100166109926, |
| "loss": 1.2229, |
| "step": 138000 |
| }, |
| { |
| "epoch": 27.77, |
| "learning_rate": 0.0012273554776393101, |
| "loss": 1.2194, |
| "step": 138100 |
| }, |
| { |
| "epoch": 27.79, |
| "learning_rate": 0.0012256009832949562, |
| "loss": 1.2188, |
| "step": 138200 |
| }, |
| { |
| "epoch": 27.81, |
| "learning_rate": 0.0012238465370356501, |
| "loss": 1.2199, |
| "step": 138300 |
| }, |
| { |
| "epoch": 27.83, |
| "learning_rate": 0.0012220921423190167, |
| "loss": 1.2211, |
| "step": 138400 |
| }, |
| { |
| "epoch": 27.85, |
| "learning_rate": 0.0012203378026025777, |
| "loss": 1.2184, |
| "step": 138500 |
| }, |
| { |
| "epoch": 27.87, |
| "learning_rate": 0.0012185835213437478, |
| "loss": 1.2187, |
| "step": 138600 |
| }, |
| { |
| "epoch": 27.89, |
| "learning_rate": 0.0012168293019998258, |
| "loss": 1.2232, |
| "step": 138700 |
| }, |
| { |
| "epoch": 27.91, |
| "learning_rate": 0.0012150751480279897, |
| "loss": 1.2241, |
| "step": 138800 |
| }, |
| { |
| "epoch": 27.93, |
| "learning_rate": 0.0012133210628852867, |
| "loss": 1.2215, |
| "step": 138900 |
| }, |
| { |
| "epoch": 27.95, |
| "learning_rate": 0.0012115670500286294, |
| "loss": 1.2231, |
| "step": 139000 |
| }, |
| { |
| "epoch": 27.97, |
| "learning_rate": 0.0012098131129147888, |
| "loss": 1.2214, |
| "step": 139100 |
| }, |
| { |
| "epoch": 27.99, |
| "learning_rate": 0.0012080767931761424, |
| "loss": 1.2187, |
| "step": 139200 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.4111801831154055, |
| "eval_loss": 1.1840488910675049, |
| "eval_runtime": 19.7999, |
| "eval_samples_per_second": 4018.599, |
| "eval_steps_per_second": 15.707, |
| "step": 139251 |
| }, |
| { |
| "epoch": 28.01, |
| "learning_rate": 0.0012063230170739731, |
| "loss": 1.2143, |
| "step": 139300 |
| }, |
| { |
| "epoch": 28.03, |
| "learning_rate": 0.0012045693270494448, |
| "loss": 1.211, |
| "step": 139400 |
| }, |
| { |
| "epoch": 28.05, |
| "learning_rate": 0.0012028157265586918, |
| "loss": 1.2165, |
| "step": 139500 |
| }, |
| { |
| "epoch": 28.07, |
| "learning_rate": 0.0012010622190576717, |
| "loss": 1.217, |
| "step": 139600 |
| }, |
| { |
| "epoch": 28.09, |
| "learning_rate": 0.001199308808002159, |
| "loss": 1.2124, |
| "step": 139700 |
| }, |
| { |
| "epoch": 28.11, |
| "learning_rate": 0.001197555496847737, |
| "loss": 1.2182, |
| "step": 139800 |
| }, |
| { |
| "epoch": 28.13, |
| "learning_rate": 0.0011958022890497934, |
| "loss": 1.2156, |
| "step": 139900 |
| }, |
| { |
| "epoch": 28.15, |
| "learning_rate": 0.0011940491880635118, |
| "loss": 1.2185, |
| "step": 140000 |
| }, |
| { |
| "epoch": 28.17, |
| "learning_rate": 0.0011922961973438657, |
| "loss": 1.2143, |
| "step": 140100 |
| }, |
| { |
| "epoch": 28.19, |
| "learning_rate": 0.0011905433203456097, |
| "loss": 1.2148, |
| "step": 140200 |
| }, |
| { |
| "epoch": 28.21, |
| "learning_rate": 0.001188790560523276, |
| "loss": 1.2149, |
| "step": 140300 |
| }, |
| { |
| "epoch": 28.23, |
| "learning_rate": 0.0011870379213311652, |
| "loss": 1.2188, |
| "step": 140400 |
| }, |
| { |
| "epoch": 28.25, |
| "learning_rate": 0.0011852854062233409, |
| "loss": 1.2153, |
| "step": 140500 |
| }, |
| { |
| "epoch": 28.27, |
| "learning_rate": 0.0011835330186536204, |
| "loss": 1.2163, |
| "step": 140600 |
| }, |
| { |
| "epoch": 28.29, |
| "learning_rate": 0.0011817807620755712, |
| "loss": 1.2149, |
| "step": 140700 |
| }, |
| { |
| "epoch": 28.31, |
| "learning_rate": 0.001180028639942502, |
| "loss": 1.2188, |
| "step": 140800 |
| }, |
| { |
| "epoch": 28.33, |
| "learning_rate": 0.0011782766557074578, |
| "loss": 1.2193, |
| "step": 140900 |
| }, |
| { |
| "epoch": 28.35, |
| "learning_rate": 0.0011765248128232095, |
| "loss": 1.2181, |
| "step": 141000 |
| }, |
| { |
| "epoch": 28.37, |
| "learning_rate": 0.001174773114742251, |
| "loss": 1.2133, |
| "step": 141100 |
| }, |
| { |
| "epoch": 28.39, |
| "learning_rate": 0.0011730215649167904, |
| "loss": 1.2198, |
| "step": 141200 |
| }, |
| { |
| "epoch": 28.41, |
| "learning_rate": 0.001171270166798745, |
| "loss": 1.2163, |
| "step": 141300 |
| }, |
| { |
| "epoch": 28.43, |
| "learning_rate": 0.00116951892383973, |
| "loss": 1.219, |
| "step": 141400 |
| }, |
| { |
| "epoch": 28.45, |
| "learning_rate": 0.0011677678394910577, |
| "loss": 1.2176, |
| "step": 141500 |
| }, |
| { |
| "epoch": 28.47, |
| "learning_rate": 0.0011660169172037266, |
| "loss": 1.2186, |
| "step": 141600 |
| }, |
| { |
| "epoch": 28.49, |
| "learning_rate": 0.0011642661604284164, |
| "loss": 1.2152, |
| "step": 141700 |
| }, |
| { |
| "epoch": 28.51, |
| "learning_rate": 0.0011625155726154794, |
| "loss": 1.2113, |
| "step": 141800 |
| }, |
| { |
| "epoch": 28.53, |
| "learning_rate": 0.0011607651572149362, |
| "loss": 1.2138, |
| "step": 141900 |
| }, |
| { |
| "epoch": 28.55, |
| "learning_rate": 0.001159014917676467, |
| "loss": 1.2151, |
| "step": 142000 |
| }, |
| { |
| "epoch": 28.57, |
| "learning_rate": 0.0011572648574494063, |
| "loss": 1.217, |
| "step": 142100 |
| }, |
| { |
| "epoch": 28.59, |
| "learning_rate": 0.001155514979982733, |
| "loss": 1.2148, |
| "step": 142200 |
| }, |
| { |
| "epoch": 28.61, |
| "learning_rate": 0.0011537652887250683, |
| "loss": 1.2149, |
| "step": 142300 |
| }, |
| { |
| "epoch": 28.63, |
| "learning_rate": 0.0011520157871246655, |
| "loss": 1.2161, |
| "step": 142400 |
| }, |
| { |
| "epoch": 28.65, |
| "learning_rate": 0.001150266478629404, |
| "loss": 1.2111, |
| "step": 142500 |
| }, |
| { |
| "epoch": 28.67, |
| "learning_rate": 0.001148517366686782, |
| "loss": 1.2201, |
| "step": 142600 |
| }, |
| { |
| "epoch": 28.69, |
| "learning_rate": 0.0011467684547439116, |
| "loss": 1.2148, |
| "step": 142700 |
| }, |
| { |
| "epoch": 28.71, |
| "learning_rate": 0.00114501974624751, |
| "loss": 1.2185, |
| "step": 142800 |
| }, |
| { |
| "epoch": 28.73, |
| "learning_rate": 0.0011432887286244955, |
| "loss": 1.2136, |
| "step": 142900 |
| }, |
| { |
| "epoch": 28.75, |
| "learning_rate": 0.0011415404352391302, |
| "loss": 1.2153, |
| "step": 143000 |
| }, |
| { |
| "epoch": 28.77, |
| "learning_rate": 0.0011397923556035006, |
| "loss": 1.2159, |
| "step": 143100 |
| }, |
| { |
| "epoch": 28.79, |
| "learning_rate": 0.0011380444931626827, |
| "loss": 1.2155, |
| "step": 143200 |
| }, |
| { |
| "epoch": 28.81, |
| "learning_rate": 0.0011362968513613262, |
| "loss": 1.2182, |
| "step": 143300 |
| }, |
| { |
| "epoch": 28.83, |
| "learning_rate": 0.001134549433643645, |
| "loss": 1.2113, |
| "step": 143400 |
| }, |
| { |
| "epoch": 28.85, |
| "learning_rate": 0.0011328022434534126, |
| "loss": 1.2165, |
| "step": 143500 |
| }, |
| { |
| "epoch": 28.87, |
| "learning_rate": 0.0011310552842339516, |
| "loss": 1.2119, |
| "step": 143600 |
| }, |
| { |
| "epoch": 28.89, |
| "learning_rate": 0.001129308559428132, |
| "loss": 1.2145, |
| "step": 143700 |
| }, |
| { |
| "epoch": 28.91, |
| "learning_rate": 0.0011275620724783605, |
| "loss": 1.2153, |
| "step": 143800 |
| }, |
| { |
| "epoch": 28.93, |
| "learning_rate": 0.001125815826826576, |
| "loss": 1.2151, |
| "step": 143900 |
| }, |
| { |
| "epoch": 28.95, |
| "learning_rate": 0.0011240698259142399, |
| "loss": 1.219, |
| "step": 144000 |
| }, |
| { |
| "epoch": 28.98, |
| "learning_rate": 0.0011223240731823335, |
| "loss": 1.2149, |
| "step": 144100 |
| }, |
| { |
| "epoch": 29.0, |
| "learning_rate": 0.0011205785720713479, |
| "loss": 1.2162, |
| "step": 144200 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.41123713623185454, |
| "eval_loss": 1.1828089952468872, |
| "eval_runtime": 19.8923, |
| "eval_samples_per_second": 3999.934, |
| "eval_steps_per_second": 15.634, |
| "step": 144224 |
| }, |
| { |
| "epoch": 29.02, |
| "learning_rate": 0.0011188333260212788, |
| "loss": 1.2079, |
| "step": 144300 |
| }, |
| { |
| "epoch": 29.04, |
| "learning_rate": 0.001117088338471618, |
| "loss": 1.2047, |
| "step": 144400 |
| }, |
| { |
| "epoch": 29.06, |
| "learning_rate": 0.001115343612861349, |
| "loss": 1.2053, |
| "step": 144500 |
| }, |
| { |
| "epoch": 29.08, |
| "learning_rate": 0.0011135991526289393, |
| "loss": 1.2075, |
| "step": 144600 |
| }, |
| { |
| "epoch": 29.1, |
| "learning_rate": 0.0011118549612123333, |
| "loss": 1.2061, |
| "step": 144700 |
| }, |
| { |
| "epoch": 29.12, |
| "learning_rate": 0.0011101110420489442, |
| "loss": 1.2101, |
| "step": 144800 |
| }, |
| { |
| "epoch": 29.14, |
| "learning_rate": 0.0011083673985756498, |
| "loss": 1.2117, |
| "step": 144900 |
| }, |
| { |
| "epoch": 29.16, |
| "learning_rate": 0.001106624034228785, |
| "loss": 1.2104, |
| "step": 145000 |
| }, |
| { |
| "epoch": 29.18, |
| "learning_rate": 0.0011048809524441346, |
| "loss": 1.2105, |
| "step": 145100 |
| }, |
| { |
| "epoch": 29.2, |
| "learning_rate": 0.0011031381566569247, |
| "loss": 1.2052, |
| "step": 145200 |
| }, |
| { |
| "epoch": 29.22, |
| "learning_rate": 0.0011013956503018196, |
| "loss": 1.2104, |
| "step": 145300 |
| }, |
| { |
| "epoch": 29.24, |
| "learning_rate": 0.0010996534368129128, |
| "loss": 1.2103, |
| "step": 145400 |
| }, |
| { |
| "epoch": 29.26, |
| "learning_rate": 0.0010979115196237208, |
| "loss": 1.2142, |
| "step": 145500 |
| }, |
| { |
| "epoch": 29.28, |
| "learning_rate": 0.001096169902167175, |
| "loss": 1.2038, |
| "step": 145600 |
| }, |
| { |
| "epoch": 29.3, |
| "learning_rate": 0.0010944285878756177, |
| "loss": 1.2115, |
| "step": 145700 |
| }, |
| { |
| "epoch": 29.32, |
| "learning_rate": 0.0010926875801807927, |
| "loss": 1.2122, |
| "step": 145800 |
| }, |
| { |
| "epoch": 29.34, |
| "learning_rate": 0.0010909468825138404, |
| "loss": 1.2085, |
| "step": 145900 |
| }, |
| { |
| "epoch": 29.36, |
| "learning_rate": 0.0010892064983052884, |
| "loss": 1.2095, |
| "step": 146000 |
| }, |
| { |
| "epoch": 29.38, |
| "learning_rate": 0.0010874664309850487, |
| "loss": 1.2102, |
| "step": 146100 |
| }, |
| { |
| "epoch": 29.4, |
| "learning_rate": 0.0010857266839824074, |
| "loss": 1.2116, |
| "step": 146200 |
| }, |
| { |
| "epoch": 29.42, |
| "learning_rate": 0.0010839872607260209, |
| "loss": 1.2121, |
| "step": 146300 |
| }, |
| { |
| "epoch": 29.44, |
| "learning_rate": 0.0010822481646439047, |
| "loss": 1.2101, |
| "step": 146400 |
| }, |
| { |
| "epoch": 29.46, |
| "learning_rate": 0.0010805093991634325, |
| "loss": 1.2077, |
| "step": 146500 |
| }, |
| { |
| "epoch": 29.48, |
| "learning_rate": 0.001078770967711325, |
| "loss": 1.2103, |
| "step": 146600 |
| }, |
| { |
| "epoch": 29.5, |
| "learning_rate": 0.001077032873713645, |
| "loss": 1.2079, |
| "step": 146700 |
| }, |
| { |
| "epoch": 29.52, |
| "learning_rate": 0.0010752951205957896, |
| "loss": 1.2105, |
| "step": 146800 |
| }, |
| { |
| "epoch": 29.54, |
| "learning_rate": 0.0010735577117824847, |
| "loss": 1.2118, |
| "step": 146900 |
| }, |
| { |
| "epoch": 29.56, |
| "learning_rate": 0.0010718206506977778, |
| "loss": 1.2105, |
| "step": 147000 |
| }, |
| { |
| "epoch": 29.58, |
| "learning_rate": 0.0010700839407650313, |
| "loss": 1.2045, |
| "step": 147100 |
| }, |
| { |
| "epoch": 29.6, |
| "learning_rate": 0.001068347585406914, |
| "loss": 1.2123, |
| "step": 147200 |
| }, |
| { |
| "epoch": 29.62, |
| "learning_rate": 0.0010666115880453974, |
| "loss": 1.2078, |
| "step": 147300 |
| }, |
| { |
| "epoch": 29.64, |
| "learning_rate": 0.0010648759521017476, |
| "loss": 1.2099, |
| "step": 147400 |
| }, |
| { |
| "epoch": 29.66, |
| "learning_rate": 0.0010631406809965178, |
| "loss": 1.2085, |
| "step": 147500 |
| }, |
| { |
| "epoch": 29.68, |
| "learning_rate": 0.0010614057781495414, |
| "loss": 1.2119, |
| "step": 147600 |
| }, |
| { |
| "epoch": 29.7, |
| "learning_rate": 0.001059671246979928, |
| "loss": 1.2093, |
| "step": 147700 |
| }, |
| { |
| "epoch": 29.72, |
| "learning_rate": 0.001057937090906053, |
| "loss": 1.2063, |
| "step": 147800 |
| }, |
| { |
| "epoch": 29.74, |
| "learning_rate": 0.001056203313345554, |
| "loss": 1.2055, |
| "step": 147900 |
| }, |
| { |
| "epoch": 29.76, |
| "learning_rate": 0.0010544699177153208, |
| "loss": 1.2086, |
| "step": 148000 |
| }, |
| { |
| "epoch": 29.78, |
| "learning_rate": 0.0010527369074314922, |
| "loss": 1.2115, |
| "step": 148100 |
| }, |
| { |
| "epoch": 29.8, |
| "learning_rate": 0.0010510042859094464, |
| "loss": 1.2102, |
| "step": 148200 |
| }, |
| { |
| "epoch": 29.82, |
| "learning_rate": 0.0010492720565637972, |
| "loss": 1.2079, |
| "step": 148300 |
| }, |
| { |
| "epoch": 29.84, |
| "learning_rate": 0.001047540222808383, |
| "loss": 1.2114, |
| "step": 148400 |
| }, |
| { |
| "epoch": 29.86, |
| "learning_rate": 0.001045808788056264, |
| "loss": 1.2092, |
| "step": 148500 |
| }, |
| { |
| "epoch": 29.88, |
| "learning_rate": 0.001044077755719714, |
| "loss": 1.2106, |
| "step": 148600 |
| }, |
| { |
| "epoch": 29.9, |
| "learning_rate": 0.0010423471292102147, |
| "loss": 1.2099, |
| "step": 148700 |
| }, |
| { |
| "epoch": 29.92, |
| "learning_rate": 0.0010406169119384452, |
| "loss": 1.2035, |
| "step": 148800 |
| }, |
| { |
| "epoch": 29.94, |
| "learning_rate": 0.0010388871073142806, |
| "loss": 1.2079, |
| "step": 148900 |
| }, |
| { |
| "epoch": 29.96, |
| "learning_rate": 0.0010371577187467818, |
| "loss": 1.2114, |
| "step": 149000 |
| }, |
| { |
| "epoch": 29.98, |
| "learning_rate": 0.001035428749644191, |
| "loss": 1.2087, |
| "step": 149100 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.41176053219028325, |
| "eval_loss": 1.1731864213943481, |
| "eval_runtime": 19.7738, |
| "eval_samples_per_second": 4023.916, |
| "eval_steps_per_second": 15.728, |
| "step": 149197 |
| }, |
| { |
| "epoch": 30.0, |
| "learning_rate": 0.001033700203413921, |
| "loss": 1.2107, |
| "step": 149200 |
| }, |
| { |
| "epoch": 30.02, |
| "learning_rate": 0.0010319893625408032, |
| "loss": 1.204, |
| "step": 149300 |
| }, |
| { |
| "epoch": 30.04, |
| "learning_rate": 0.0010302616679603773, |
| "loss": 1.2, |
| "step": 149400 |
| }, |
| { |
| "epoch": 30.06, |
| "learning_rate": 0.0010285344064354445, |
| "loss": 1.2002, |
| "step": 149500 |
| }, |
| { |
| "epoch": 30.08, |
| "learning_rate": 0.0010268075813700541, |
| "loss": 1.1993, |
| "step": 149600 |
| }, |
| { |
| "epoch": 30.1, |
| "learning_rate": 0.0010250811961673946, |
| "loss": 1.2016, |
| "step": 149700 |
| }, |
| { |
| "epoch": 30.12, |
| "learning_rate": 0.0010233552542297884, |
| "loss": 1.2029, |
| "step": 149800 |
| }, |
| { |
| "epoch": 30.14, |
| "learning_rate": 0.001021629758958684, |
| "loss": 1.2027, |
| "step": 149900 |
| }, |
| { |
| "epoch": 30.16, |
| "learning_rate": 0.0010199047137546503, |
| "loss": 1.2043, |
| "step": 150000 |
| }, |
| { |
| "epoch": 30.18, |
| "learning_rate": 0.0010181801220173676, |
| "loss": 1.2028, |
| "step": 150100 |
| }, |
| { |
| "epoch": 30.2, |
| "learning_rate": 0.0010164559871456242, |
| "loss": 1.2031, |
| "step": 150200 |
| }, |
| { |
| "epoch": 30.22, |
| "learning_rate": 0.0010147323125373072, |
| "loss": 1.2036, |
| "step": 150300 |
| }, |
| { |
| "epoch": 30.24, |
| "learning_rate": 0.0010130263313926052, |
| "loss": 1.2056, |
| "step": 150400 |
| }, |
| { |
| "epoch": 30.26, |
| "learning_rate": 0.001011303582813796, |
| "loss": 1.2015, |
| "step": 150500 |
| }, |
| { |
| "epoch": 30.28, |
| "learning_rate": 0.0010095813046526582, |
| "loss": 1.2047, |
| "step": 150600 |
| }, |
| { |
| "epoch": 30.3, |
| "learning_rate": 0.0010078595003034205, |
| "loss": 1.2056, |
| "step": 150700 |
| }, |
| { |
| "epoch": 30.32, |
| "learning_rate": 0.0010061381731593774, |
| "loss": 1.2017, |
| "step": 150800 |
| }, |
| { |
| "epoch": 30.34, |
| "learning_rate": 0.0010044345326882533, |
| "loss": 1.2007, |
| "step": 150900 |
| }, |
| { |
| "epoch": 30.36, |
| "learning_rate": 0.0010027141652740393, |
| "loss": 1.1998, |
| "step": 151000 |
| }, |
| { |
| "epoch": 30.38, |
| "learning_rate": 0.0010009942852053342, |
| "loss": 1.2035, |
| "step": 151100 |
| }, |
| { |
| "epoch": 30.4, |
| "learning_rate": 0.0009992748958716382, |
| "loss": 1.1996, |
| "step": 151200 |
| }, |
| { |
| "epoch": 30.42, |
| "learning_rate": 0.0009975560006614873, |
| "loss": 1.2052, |
| "step": 151300 |
| }, |
| { |
| "epoch": 30.44, |
| "learning_rate": 0.0009958376029624422, |
| "loss": 1.2017, |
| "step": 151400 |
| }, |
| { |
| "epoch": 30.46, |
| "learning_rate": 0.0009941197061610842, |
| "loss": 1.2044, |
| "step": 151500 |
| }, |
| { |
| "epoch": 30.48, |
| "learning_rate": 0.0009924023136430055, |
| "loss": 1.2078, |
| "step": 151600 |
| }, |
| { |
| "epoch": 30.5, |
| "learning_rate": 0.000990685428792806, |
| "loss": 1.2025, |
| "step": 151700 |
| }, |
| { |
| "epoch": 30.52, |
| "learning_rate": 0.0009889690549940852, |
| "loss": 1.2001, |
| "step": 151800 |
| }, |
| { |
| "epoch": 30.54, |
| "learning_rate": 0.0009872531956294354, |
| "loss": 1.1996, |
| "step": 151900 |
| }, |
| { |
| "epoch": 30.56, |
| "learning_rate": 0.0009855378540804332, |
| "loss": 1.2012, |
| "step": 152000 |
| }, |
| { |
| "epoch": 30.58, |
| "learning_rate": 0.0009838230337276372, |
| "loss": 1.2039, |
| "step": 152100 |
| }, |
| { |
| "epoch": 30.6, |
| "learning_rate": 0.0009821087379505776, |
| "loss": 1.2026, |
| "step": 152200 |
| }, |
| { |
| "epoch": 30.62, |
| "learning_rate": 0.0009803949701277515, |
| "loss": 1.2004, |
| "step": 152300 |
| }, |
| { |
| "epoch": 30.64, |
| "learning_rate": 0.0009786817336366138, |
| "loss": 1.2015, |
| "step": 152400 |
| }, |
| { |
| "epoch": 30.66, |
| "learning_rate": 0.0009769690318535743, |
| "loss": 1.2026, |
| "step": 152500 |
| }, |
| { |
| "epoch": 30.68, |
| "learning_rate": 0.0009752739871163907, |
| "loss": 1.2037, |
| "step": 152600 |
| }, |
| { |
| "epoch": 30.7, |
| "learning_rate": 0.0009735623594432755, |
| "loss": 1.2016, |
| "step": 152700 |
| }, |
| { |
| "epoch": 30.72, |
| "learning_rate": 0.0009718512765674095, |
| "loss": 1.2024, |
| "step": 152800 |
| }, |
| { |
| "epoch": 30.74, |
| "learning_rate": 0.0009701407418609562, |
| "loss": 1.202, |
| "step": 152900 |
| }, |
| { |
| "epoch": 30.76, |
| "learning_rate": 0.0009684307586950005, |
| "loss": 1.206, |
| "step": 153000 |
| }, |
| { |
| "epoch": 30.78, |
| "learning_rate": 0.0009667213304395399, |
| "loss": 1.1989, |
| "step": 153100 |
| }, |
| { |
| "epoch": 30.8, |
| "learning_rate": 0.0009650124604634786, |
| "loss": 1.2081, |
| "step": 153200 |
| }, |
| { |
| "epoch": 30.82, |
| "learning_rate": 0.0009633041521346189, |
| "loss": 1.2015, |
| "step": 153300 |
| }, |
| { |
| "epoch": 30.85, |
| "learning_rate": 0.0009615964088196581, |
| "loss": 1.1989, |
| "step": 153400 |
| }, |
| { |
| "epoch": 30.87, |
| "learning_rate": 0.0009598892338841794, |
| "loss": 1.2006, |
| "step": 153500 |
| }, |
| { |
| "epoch": 30.89, |
| "learning_rate": 0.0009581826306926464, |
| "loss": 1.1991, |
| "step": 153600 |
| }, |
| { |
| "epoch": 30.91, |
| "learning_rate": 0.000956493660031415, |
| "loss": 1.2033, |
| "step": 153700 |
| }, |
| { |
| "epoch": 30.93, |
| "learning_rate": 0.0009547882046153125, |
| "loss": 1.2024, |
| "step": 153800 |
| }, |
| { |
| "epoch": 30.95, |
| "learning_rate": 0.000953083330996152, |
| "loss": 1.2042, |
| "step": 153900 |
| }, |
| { |
| "epoch": 30.97, |
| "learning_rate": 0.0009513790425338609, |
| "loss": 1.2005, |
| "step": 154000 |
| }, |
| { |
| "epoch": 30.99, |
| "learning_rate": 0.000949675342587214, |
| "loss": 1.2005, |
| "step": 154100 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.41267003209737646, |
| "eval_loss": 1.1657705307006836, |
| "eval_runtime": 19.6203, |
| "eval_samples_per_second": 4055.399, |
| "eval_steps_per_second": 15.851, |
| "step": 154170 |
| }, |
| { |
| "epoch": 31.01, |
| "learning_rate": 0.0009479722345138251, |
| "loss": 1.1976, |
| "step": 154200 |
| }, |
| { |
| "epoch": 31.03, |
| "learning_rate": 0.0009462697216701424, |
| "loss": 1.1919, |
| "step": 154300 |
| }, |
| { |
| "epoch": 31.05, |
| "learning_rate": 0.0009445678074114414, |
| "loss": 1.1973, |
| "step": 154400 |
| }, |
| { |
| "epoch": 31.07, |
| "learning_rate": 0.0009428664950918177, |
| "loss": 1.1941, |
| "step": 154500 |
| }, |
| { |
| "epoch": 31.09, |
| "learning_rate": 0.0009411657880641792, |
| "loss": 1.1909, |
| "step": 154600 |
| }, |
| { |
| "epoch": 31.11, |
| "learning_rate": 0.0009394656896802428, |
| "loss": 1.1942, |
| "step": 154700 |
| }, |
| { |
| "epoch": 31.13, |
| "learning_rate": 0.0009377662032905253, |
| "loss": 1.1925, |
| "step": 154800 |
| }, |
| { |
| "epoch": 31.15, |
| "learning_rate": 0.0009360673322443375, |
| "loss": 1.1955, |
| "step": 154900 |
| }, |
| { |
| "epoch": 31.17, |
| "learning_rate": 0.0009343690798897762, |
| "loss": 1.1939, |
| "step": 155000 |
| }, |
| { |
| "epoch": 31.19, |
| "learning_rate": 0.0009326714495737206, |
| "loss": 1.1936, |
| "step": 155100 |
| }, |
| { |
| "epoch": 31.21, |
| "learning_rate": 0.0009309744446418236, |
| "loss": 1.195, |
| "step": 155200 |
| }, |
| { |
| "epoch": 31.23, |
| "learning_rate": 0.0009292780684385055, |
| "loss": 1.1938, |
| "step": 155300 |
| }, |
| { |
| "epoch": 31.25, |
| "learning_rate": 0.0009275823243069464, |
| "loss": 1.1969, |
| "step": 155400 |
| }, |
| { |
| "epoch": 31.27, |
| "learning_rate": 0.0009258872155890821, |
| "loss": 1.1971, |
| "step": 155500 |
| }, |
| { |
| "epoch": 31.29, |
| "learning_rate": 0.0009241927456255962, |
| "loss": 1.1924, |
| "step": 155600 |
| }, |
| { |
| "epoch": 31.31, |
| "learning_rate": 0.0009224989177559132, |
| "loss": 1.197, |
| "step": 155700 |
| }, |
| { |
| "epoch": 31.33, |
| "learning_rate": 0.0009208057353181909, |
| "loss": 1.1955, |
| "step": 155800 |
| }, |
| { |
| "epoch": 31.35, |
| "learning_rate": 0.0009191132016493168, |
| "loss": 1.1927, |
| "step": 155900 |
| }, |
| { |
| "epoch": 31.37, |
| "learning_rate": 0.0009174213200848991, |
| "loss": 1.1954, |
| "step": 156000 |
| }, |
| { |
| "epoch": 31.39, |
| "learning_rate": 0.0009157300939592614, |
| "loss": 1.1945, |
| "step": 156100 |
| }, |
| { |
| "epoch": 31.41, |
| "learning_rate": 0.0009140395266054343, |
| "loss": 1.1989, |
| "step": 156200 |
| }, |
| { |
| "epoch": 31.43, |
| "learning_rate": 0.0009123496213551513, |
| "loss": 1.1914, |
| "step": 156300 |
| }, |
| { |
| "epoch": 31.45, |
| "learning_rate": 0.0009106603815388409, |
| "loss": 1.1953, |
| "step": 156400 |
| }, |
| { |
| "epoch": 31.47, |
| "learning_rate": 0.0009089718104856201, |
| "loss": 1.1952, |
| "step": 156500 |
| }, |
| { |
| "epoch": 31.49, |
| "learning_rate": 0.0009072839115232867, |
| "loss": 1.1936, |
| "step": 156600 |
| }, |
| { |
| "epoch": 31.51, |
| "learning_rate": 0.0009055966879783159, |
| "loss": 1.197, |
| "step": 156700 |
| }, |
| { |
| "epoch": 31.53, |
| "learning_rate": 0.0009039101431758506, |
| "loss": 1.1957, |
| "step": 156800 |
| }, |
| { |
| "epoch": 31.55, |
| "learning_rate": 0.0009022242804396972, |
| "loss": 1.1971, |
| "step": 156900 |
| }, |
| { |
| "epoch": 31.57, |
| "learning_rate": 0.0009005391030923156, |
| "loss": 1.1941, |
| "step": 157000 |
| }, |
| { |
| "epoch": 31.59, |
| "learning_rate": 0.0008988546144548173, |
| "loss": 1.1935, |
| "step": 157100 |
| }, |
| { |
| "epoch": 31.61, |
| "learning_rate": 0.0008971708178469554, |
| "loss": 1.1977, |
| "step": 157200 |
| }, |
| { |
| "epoch": 31.63, |
| "learning_rate": 0.00089548771658712, |
| "loss": 1.1978, |
| "step": 157300 |
| }, |
| { |
| "epoch": 31.65, |
| "learning_rate": 0.0008938053139923291, |
| "loss": 1.1932, |
| "step": 157400 |
| }, |
| { |
| "epoch": 31.67, |
| "learning_rate": 0.0008921236133782254, |
| "loss": 1.1928, |
| "step": 157500 |
| }, |
| { |
| "epoch": 31.69, |
| "learning_rate": 0.0008904426180590678, |
| "loss": 1.1976, |
| "step": 157600 |
| }, |
| { |
| "epoch": 31.71, |
| "learning_rate": 0.0008887623313477256, |
| "loss": 1.1946, |
| "step": 157700 |
| }, |
| { |
| "epoch": 31.73, |
| "learning_rate": 0.0008870827565556696, |
| "loss": 1.1975, |
| "step": 157800 |
| }, |
| { |
| "epoch": 31.75, |
| "learning_rate": 0.0008854038969929701, |
| "loss": 1.195, |
| "step": 157900 |
| }, |
| { |
| "epoch": 31.77, |
| "learning_rate": 0.0008837257559682865, |
| "loss": 1.1947, |
| "step": 158000 |
| }, |
| { |
| "epoch": 31.79, |
| "learning_rate": 0.0008820483367888628, |
| "loss": 1.1963, |
| "step": 158100 |
| }, |
| { |
| "epoch": 31.81, |
| "learning_rate": 0.0008803716427605191, |
| "loss": 1.1963, |
| "step": 158200 |
| }, |
| { |
| "epoch": 31.83, |
| "learning_rate": 0.0008786956771876478, |
| "loss": 1.1923, |
| "step": 158300 |
| }, |
| { |
| "epoch": 31.85, |
| "learning_rate": 0.000877020443373205, |
| "loss": 1.1971, |
| "step": 158400 |
| }, |
| { |
| "epoch": 31.87, |
| "learning_rate": 0.0008753459446187053, |
| "loss": 1.1951, |
| "step": 158500 |
| }, |
| { |
| "epoch": 31.89, |
| "learning_rate": 0.0008736721842242136, |
| "loss": 1.1981, |
| "step": 158600 |
| }, |
| { |
| "epoch": 31.91, |
| "learning_rate": 0.0008719991654883402, |
| "loss": 1.1991, |
| "step": 158700 |
| }, |
| { |
| "epoch": 31.93, |
| "learning_rate": 0.0008703268917082342, |
| "loss": 1.1949, |
| "step": 158800 |
| }, |
| { |
| "epoch": 31.95, |
| "learning_rate": 0.0008686553661795765, |
| "loss": 1.1921, |
| "step": 158900 |
| }, |
| { |
| "epoch": 31.97, |
| "learning_rate": 0.0008669845921965718, |
| "loss": 1.1974, |
| "step": 159000 |
| }, |
| { |
| "epoch": 31.99, |
| "learning_rate": 0.0008653145730519456, |
| "loss": 1.1944, |
| "step": 159100 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_accuracy": 0.4131173845092614, |
| "eval_loss": 1.1602274179458618, |
| "eval_runtime": 19.7773, |
| "eval_samples_per_second": 4023.208, |
| "eval_steps_per_second": 15.725, |
| "step": 159144 |
| }, |
| { |
| "epoch": 32.01, |
| "learning_rate": 0.000863645312036935, |
| "loss": 1.1882, |
| "step": 159200 |
| }, |
| { |
| "epoch": 32.03, |
| "learning_rate": 0.0008619768124412836, |
| "loss": 1.1821, |
| "step": 159300 |
| }, |
| { |
| "epoch": 32.05, |
| "learning_rate": 0.000860309077553233, |
| "loss": 1.1848, |
| "step": 159400 |
| }, |
| { |
| "epoch": 32.07, |
| "learning_rate": 0.0008586421106595186, |
| "loss": 1.1857, |
| "step": 159500 |
| }, |
| { |
| "epoch": 32.09, |
| "learning_rate": 0.0008569759150453628, |
| "loss": 1.1859, |
| "step": 159600 |
| }, |
| { |
| "epoch": 32.11, |
| "learning_rate": 0.0008553104939944677, |
| "loss": 1.189, |
| "step": 159700 |
| }, |
| { |
| "epoch": 32.13, |
| "learning_rate": 0.0008536458507890077, |
| "loss": 1.189, |
| "step": 159800 |
| }, |
| { |
| "epoch": 32.15, |
| "learning_rate": 0.0008519819887096256, |
| "loss": 1.1908, |
| "step": 159900 |
| }, |
| { |
| "epoch": 32.17, |
| "learning_rate": 0.0008503189110354243, |
| "loss": 1.1872, |
| "step": 160000 |
| }, |
| { |
| "epoch": 32.19, |
| "learning_rate": 0.0008486566210439614, |
| "loss": 1.1861, |
| "step": 160100 |
| }, |
| { |
| "epoch": 32.21, |
| "learning_rate": 0.0008470117330755695, |
| "loss": 1.1838, |
| "step": 160200 |
| }, |
| { |
| "epoch": 32.23, |
| "learning_rate": 0.0008453510203175021, |
| "loss": 1.1876, |
| "step": 160300 |
| }, |
| { |
| "epoch": 32.25, |
| "learning_rate": 0.0008436911050327827, |
| "loss": 1.1843, |
| "step": 160400 |
| }, |
| { |
| "epoch": 32.27, |
| "learning_rate": 0.0008420319904927365, |
| "loss": 1.1884, |
| "step": 160500 |
| }, |
| { |
| "epoch": 32.29, |
| "learning_rate": 0.0008403736799671097, |
| "loss": 1.191, |
| "step": 160600 |
| }, |
| { |
| "epoch": 32.31, |
| "learning_rate": 0.000838716176724065, |
| "loss": 1.1915, |
| "step": 160700 |
| }, |
| { |
| "epoch": 32.33, |
| "learning_rate": 0.0008370594840301723, |
| "loss": 1.188, |
| "step": 160800 |
| }, |
| { |
| "epoch": 32.35, |
| "learning_rate": 0.000835403605150406, |
| "loss": 1.1913, |
| "step": 160900 |
| }, |
| { |
| "epoch": 32.37, |
| "learning_rate": 0.000833748543348136, |
| "loss": 1.1905, |
| "step": 161000 |
| }, |
| { |
| "epoch": 32.39, |
| "learning_rate": 0.0008320943018851221, |
| "loss": 1.1876, |
| "step": 161100 |
| }, |
| { |
| "epoch": 32.41, |
| "learning_rate": 0.0008304408840215062, |
| "loss": 1.1882, |
| "step": 161200 |
| }, |
| { |
| "epoch": 32.43, |
| "learning_rate": 0.0008287882930158088, |
| "loss": 1.1874, |
| "step": 161300 |
| }, |
| { |
| "epoch": 32.45, |
| "learning_rate": 0.0008271365321249197, |
| "loss": 1.1908, |
| "step": 161400 |
| }, |
| { |
| "epoch": 32.47, |
| "learning_rate": 0.0008254856046040937, |
| "loss": 1.1903, |
| "step": 161500 |
| }, |
| { |
| "epoch": 32.49, |
| "learning_rate": 0.0008238355137069418, |
| "loss": 1.192, |
| "step": 161600 |
| }, |
| { |
| "epoch": 32.51, |
| "learning_rate": 0.0008221862626854274, |
| "loss": 1.1883, |
| "step": 161700 |
| }, |
| { |
| "epoch": 32.53, |
| "learning_rate": 0.0008205378547898581, |
| "loss": 1.1918, |
| "step": 161800 |
| }, |
| { |
| "epoch": 32.55, |
| "learning_rate": 0.0008188902932688807, |
| "loss": 1.1885, |
| "step": 161900 |
| }, |
| { |
| "epoch": 32.57, |
| "learning_rate": 0.0008172435813694726, |
| "loss": 1.1887, |
| "step": 162000 |
| }, |
| { |
| "epoch": 32.59, |
| "learning_rate": 0.0008155977223369379, |
| "loss": 1.1873, |
| "step": 162100 |
| }, |
| { |
| "epoch": 32.61, |
| "learning_rate": 0.0008139527194148993, |
| "loss": 1.1863, |
| "step": 162200 |
| }, |
| { |
| "epoch": 32.63, |
| "learning_rate": 0.0008123085758452935, |
| "loss": 1.1897, |
| "step": 162300 |
| }, |
| { |
| "epoch": 32.65, |
| "learning_rate": 0.0008106652948683613, |
| "loss": 1.1856, |
| "step": 162400 |
| }, |
| { |
| "epoch": 32.67, |
| "learning_rate": 0.0008090392995776086, |
| "loss": 1.1922, |
| "step": 162500 |
| }, |
| { |
| "epoch": 32.69, |
| "learning_rate": 0.0008073977447932476, |
| "loss": 1.1861, |
| "step": 162600 |
| }, |
| { |
| "epoch": 32.72, |
| "learning_rate": 0.0008057570622797192, |
| "loss": 1.1881, |
| "step": 162700 |
| }, |
| { |
| "epoch": 32.74, |
| "learning_rate": 0.0008041336489961784, |
| "loss": 1.1852, |
| "step": 162800 |
| }, |
| { |
| "epoch": 32.76, |
| "learning_rate": 0.0008024947119195025, |
| "loss": 1.1851, |
| "step": 162900 |
| }, |
| { |
| "epoch": 32.78, |
| "learning_rate": 0.000800856656776449, |
| "loss": 1.1819, |
| "step": 163000 |
| }, |
| { |
| "epoch": 32.8, |
| "learning_rate": 0.0007992194867952607, |
| "loss": 1.1882, |
| "step": 163100 |
| }, |
| { |
| "epoch": 32.82, |
| "learning_rate": 0.0007975832052024367, |
| "loss": 1.1864, |
| "step": 163200 |
| }, |
| { |
| "epoch": 32.84, |
| "learning_rate": 0.0007959478152227251, |
| "loss": 1.1914, |
| "step": 163300 |
| }, |
| { |
| "epoch": 32.86, |
| "learning_rate": 0.0007943133200791164, |
| "loss": 1.1888, |
| "step": 163400 |
| }, |
| { |
| "epoch": 32.88, |
| "learning_rate": 0.0007926797229928376, |
| "loss": 1.1831, |
| "step": 163500 |
| }, |
| { |
| "epoch": 32.9, |
| "learning_rate": 0.0007910470271833464, |
| "loss": 1.1878, |
| "step": 163600 |
| }, |
| { |
| "epoch": 32.92, |
| "learning_rate": 0.0007894152358683243, |
| "loss": 1.1889, |
| "step": 163700 |
| }, |
| { |
| "epoch": 32.94, |
| "learning_rate": 0.0007877843522636694, |
| "loss": 1.1872, |
| "step": 163800 |
| }, |
| { |
| "epoch": 32.96, |
| "learning_rate": 0.0007861543795834913, |
| "loss": 1.1851, |
| "step": 163900 |
| }, |
| { |
| "epoch": 32.98, |
| "learning_rate": 0.0007845253210401045, |
| "loss": 1.1905, |
| "step": 164000 |
| }, |
| { |
| "epoch": 33.0, |
| "learning_rate": 0.0007828971798440226, |
| "loss": 1.1887, |
| "step": 164100 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_accuracy": 0.41392475061534817, |
| "eval_loss": 1.1511569023132324, |
| "eval_runtime": 19.7992, |
| "eval_samples_per_second": 4018.746, |
| "eval_steps_per_second": 15.708, |
| "step": 164117 |
| }, |
| { |
| "epoch": 33.02, |
| "learning_rate": 0.0007812699592039499, |
| "loss": 1.1783, |
| "step": 164200 |
| }, |
| { |
| "epoch": 33.04, |
| "learning_rate": 0.0007796436623267771, |
| "loss": 1.1784, |
| "step": 164300 |
| }, |
| { |
| "epoch": 33.06, |
| "learning_rate": 0.0007780182924175748, |
| "loss": 1.178, |
| "step": 164400 |
| }, |
| { |
| "epoch": 33.08, |
| "learning_rate": 0.0007763938526795867, |
| "loss": 1.177, |
| "step": 164500 |
| }, |
| { |
| "epoch": 33.1, |
| "learning_rate": 0.0007747703463142225, |
| "loss": 1.1826, |
| "step": 164600 |
| }, |
| { |
| "epoch": 33.12, |
| "learning_rate": 0.0007731477765210531, |
| "loss": 1.1774, |
| "step": 164700 |
| }, |
| { |
| "epoch": 33.14, |
| "learning_rate": 0.0007715261464978038, |
| "loss": 1.1816, |
| "step": 164800 |
| }, |
| { |
| "epoch": 33.16, |
| "learning_rate": 0.0007699054594403476, |
| "loss": 1.1792, |
| "step": 164900 |
| }, |
| { |
| "epoch": 33.18, |
| "learning_rate": 0.0007682857185426989, |
| "loss": 1.1801, |
| "step": 165000 |
| }, |
| { |
| "epoch": 33.2, |
| "learning_rate": 0.0007666669269970071, |
| "loss": 1.1801, |
| "step": 165100 |
| }, |
| { |
| "epoch": 33.22, |
| "learning_rate": 0.0007650490879935517, |
| "loss": 1.1833, |
| "step": 165200 |
| }, |
| { |
| "epoch": 33.24, |
| "learning_rate": 0.0007634322047207346, |
| "loss": 1.1818, |
| "step": 165300 |
| }, |
| { |
| "epoch": 33.26, |
| "learning_rate": 0.0007618162803650734, |
| "loss": 1.1805, |
| "step": 165400 |
| }, |
| { |
| "epoch": 33.28, |
| "learning_rate": 0.0007602013181111966, |
| "loss": 1.179, |
| "step": 165500 |
| }, |
| { |
| "epoch": 33.3, |
| "learning_rate": 0.0007585873211418363, |
| "loss": 1.1792, |
| "step": 165600 |
| }, |
| { |
| "epoch": 33.32, |
| "learning_rate": 0.000756974292637823, |
| "loss": 1.1775, |
| "step": 165700 |
| }, |
| { |
| "epoch": 33.34, |
| "learning_rate": 0.0007553622357780774, |
| "loss": 1.183, |
| "step": 165800 |
| }, |
| { |
| "epoch": 33.36, |
| "learning_rate": 0.0007537511537396058, |
| "loss": 1.1838, |
| "step": 165900 |
| }, |
| { |
| "epoch": 33.38, |
| "learning_rate": 0.0007521410496974935, |
| "loss": 1.182, |
| "step": 166000 |
| }, |
| { |
| "epoch": 33.4, |
| "learning_rate": 0.0007505319268248988, |
| "loss": 1.1809, |
| "step": 166100 |
| }, |
| { |
| "epoch": 33.42, |
| "learning_rate": 0.0007489237882930453, |
| "loss": 1.1785, |
| "step": 166200 |
| }, |
| { |
| "epoch": 33.44, |
| "learning_rate": 0.0007473166372712171, |
| "loss": 1.184, |
| "step": 166300 |
| }, |
| { |
| "epoch": 33.46, |
| "learning_rate": 0.0007457104769267522, |
| "loss": 1.1809, |
| "step": 166400 |
| }, |
| { |
| "epoch": 33.48, |
| "learning_rate": 0.000744105310425037, |
| "loss": 1.1809, |
| "step": 166500 |
| }, |
| { |
| "epoch": 33.5, |
| "learning_rate": 0.0007425011409294979, |
| "loss": 1.1788, |
| "step": 166600 |
| }, |
| { |
| "epoch": 33.52, |
| "learning_rate": 0.0007408979716015968, |
| "loss": 1.1821, |
| "step": 166700 |
| }, |
| { |
| "epoch": 33.54, |
| "learning_rate": 0.000739295805600825, |
| "loss": 1.1827, |
| "step": 166800 |
| }, |
| { |
| "epoch": 33.56, |
| "learning_rate": 0.0007376946460846965, |
| "loss": 1.1819, |
| "step": 166900 |
| }, |
| { |
| "epoch": 33.58, |
| "learning_rate": 0.0007360944962087409, |
| "loss": 1.1793, |
| "step": 167000 |
| }, |
| { |
| "epoch": 33.6, |
| "learning_rate": 0.0007344953591264986, |
| "loss": 1.1827, |
| "step": 167100 |
| }, |
| { |
| "epoch": 33.62, |
| "learning_rate": 0.000732897237989514, |
| "loss": 1.1813, |
| "step": 167200 |
| }, |
| { |
| "epoch": 33.64, |
| "learning_rate": 0.0007313001359473295, |
| "loss": 1.1835, |
| "step": 167300 |
| }, |
| { |
| "epoch": 33.66, |
| "learning_rate": 0.0007297040561474782, |
| "loss": 1.1772, |
| "step": 167400 |
| }, |
| { |
| "epoch": 33.68, |
| "learning_rate": 0.0007281090017354799, |
| "loss": 1.1828, |
| "step": 167500 |
| }, |
| { |
| "epoch": 33.7, |
| "learning_rate": 0.0007265149758548325, |
| "loss": 1.182, |
| "step": 167600 |
| }, |
| { |
| "epoch": 33.72, |
| "learning_rate": 0.0007249219816470082, |
| "loss": 1.1799, |
| "step": 167700 |
| }, |
| { |
| "epoch": 33.74, |
| "learning_rate": 0.0007233300222514435, |
| "loss": 1.1794, |
| "step": 167800 |
| }, |
| { |
| "epoch": 33.76, |
| "learning_rate": 0.0007217391008055382, |
| "loss": 1.1759, |
| "step": 167900 |
| }, |
| { |
| "epoch": 33.78, |
| "learning_rate": 0.0007201492204446453, |
| "loss": 1.1799, |
| "step": 168000 |
| }, |
| { |
| "epoch": 33.8, |
| "learning_rate": 0.0007185603843020663, |
| "loss": 1.1757, |
| "step": 168100 |
| }, |
| { |
| "epoch": 33.82, |
| "learning_rate": 0.0007169725955090442, |
| "loss": 1.1791, |
| "step": 168200 |
| }, |
| { |
| "epoch": 33.84, |
| "learning_rate": 0.0007153858571947587, |
| "loss": 1.1777, |
| "step": 168300 |
| }, |
| { |
| "epoch": 33.86, |
| "learning_rate": 0.0007138160241077916, |
| "loss": 1.1769, |
| "step": 168400 |
| }, |
| { |
| "epoch": 33.88, |
| "learning_rate": 0.0007122313855474593, |
| "loss": 1.1821, |
| "step": 168500 |
| }, |
| { |
| "epoch": 33.9, |
| "learning_rate": 0.0007106478068097369, |
| "loss": 1.1786, |
| "step": 168600 |
| }, |
| { |
| "epoch": 33.92, |
| "learning_rate": 0.0007090652910155055, |
| "loss": 1.1816, |
| "step": 168700 |
| }, |
| { |
| "epoch": 33.94, |
| "learning_rate": 0.0007074838412835532, |
| "loss": 1.1778, |
| "step": 168800 |
| }, |
| { |
| "epoch": 33.96, |
| "learning_rate": 0.0007059034607305667, |
| "loss": 1.1819, |
| "step": 168900 |
| }, |
| { |
| "epoch": 33.98, |
| "learning_rate": 0.0007043241524711256, |
| "loss": 1.1795, |
| "step": 169000 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_accuracy": 0.4142251067155042, |
| "eval_loss": 1.1453089714050293, |
| "eval_runtime": 19.8806, |
| "eval_samples_per_second": 4002.288, |
| "eval_steps_per_second": 15.643, |
| "step": 169090 |
| }, |
| { |
| "epoch": 34.0, |
| "learning_rate": 0.000702745919617695, |
| "loss": 1.1752, |
| "step": 169100 |
| }, |
| { |
| "epoch": 34.02, |
| "learning_rate": 0.0007011687652806225, |
| "loss": 1.1677, |
| "step": 169200 |
| }, |
| { |
| "epoch": 34.04, |
| "learning_rate": 0.000699592692568129, |
| "loss": 1.17, |
| "step": 169300 |
| }, |
| { |
| "epoch": 34.06, |
| "learning_rate": 0.0006980177045863047, |
| "loss": 1.1725, |
| "step": 169400 |
| }, |
| { |
| "epoch": 34.08, |
| "learning_rate": 0.0006964438044391006, |
| "loss": 1.1729, |
| "step": 169500 |
| }, |
| { |
| "epoch": 34.1, |
| "learning_rate": 0.0006948709952283247, |
| "loss": 1.1716, |
| "step": 169600 |
| }, |
| { |
| "epoch": 34.12, |
| "learning_rate": 0.0006932992800536353, |
| "loss": 1.174, |
| "step": 169700 |
| }, |
| { |
| "epoch": 34.14, |
| "learning_rate": 0.0006917286620125349, |
| "loss": 1.1747, |
| "step": 169800 |
| }, |
| { |
| "epoch": 34.16, |
| "learning_rate": 0.0006901591442003616, |
| "loss": 1.1702, |
| "step": 169900 |
| }, |
| { |
| "epoch": 34.18, |
| "learning_rate": 0.0006885907297102878, |
| "loss": 1.1726, |
| "step": 170000 |
| }, |
| { |
| "epoch": 34.2, |
| "learning_rate": 0.0006870234216333101, |
| "loss": 1.1726, |
| "step": 170100 |
| }, |
| { |
| "epoch": 34.22, |
| "learning_rate": 0.0006854572230582455, |
| "loss": 1.1732, |
| "step": 170200 |
| }, |
| { |
| "epoch": 34.24, |
| "learning_rate": 0.000683892137071723, |
| "loss": 1.1736, |
| "step": 170300 |
| }, |
| { |
| "epoch": 34.26, |
| "learning_rate": 0.00068232816675818, |
| "loss": 1.1748, |
| "step": 170400 |
| }, |
| { |
| "epoch": 34.28, |
| "learning_rate": 0.0006807653151998552, |
| "loss": 1.1725, |
| "step": 170500 |
| }, |
| { |
| "epoch": 34.3, |
| "learning_rate": 0.0006792035854767827, |
| "loss": 1.1689, |
| "step": 170600 |
| }, |
| { |
| "epoch": 34.32, |
| "learning_rate": 0.0006776429806667841, |
| "loss": 1.1719, |
| "step": 170700 |
| }, |
| { |
| "epoch": 34.34, |
| "learning_rate": 0.0006760835038454657, |
| "loss": 1.1711, |
| "step": 170800 |
| }, |
| { |
| "epoch": 34.36, |
| "learning_rate": 0.0006745407359349601, |
| "loss": 1.1753, |
| "step": 170900 |
| }, |
| { |
| "epoch": 34.38, |
| "learning_rate": 0.0006729835129523944, |
| "loss": 1.1734, |
| "step": 171000 |
| }, |
| { |
| "epoch": 34.4, |
| "learning_rate": 0.0006714274271412859, |
| "loss": 1.174, |
| "step": 171100 |
| }, |
| { |
| "epoch": 34.42, |
| "learning_rate": 0.0006698724815683352, |
| "loss": 1.1727, |
| "step": 171200 |
| }, |
| { |
| "epoch": 34.44, |
| "learning_rate": 0.0006683186792979937, |
| "loss": 1.1714, |
| "step": 171300 |
| }, |
| { |
| "epoch": 34.46, |
| "learning_rate": 0.0006667660233924612, |
| "loss": 1.1702, |
| "step": 171400 |
| }, |
| { |
| "epoch": 34.48, |
| "learning_rate": 0.0006652145169116783, |
| "loss": 1.1687, |
| "step": 171500 |
| }, |
| { |
| "epoch": 34.5, |
| "learning_rate": 0.0006636641629133204, |
| "loss": 1.1715, |
| "step": 171600 |
| }, |
| { |
| "epoch": 34.52, |
| "learning_rate": 0.0006621149644527902, |
| "loss": 1.1716, |
| "step": 171700 |
| }, |
| { |
| "epoch": 34.54, |
| "learning_rate": 0.0006605669245832145, |
| "loss": 1.1719, |
| "step": 171800 |
| }, |
| { |
| "epoch": 34.56, |
| "learning_rate": 0.0006590200463554366, |
| "loss": 1.1726, |
| "step": 171900 |
| }, |
| { |
| "epoch": 34.59, |
| "learning_rate": 0.0006574743328180105, |
| "loss": 1.1751, |
| "step": 172000 |
| }, |
| { |
| "epoch": 34.61, |
| "learning_rate": 0.0006559297870171938, |
| "loss": 1.1714, |
| "step": 172100 |
| }, |
| { |
| "epoch": 34.63, |
| "learning_rate": 0.0006543864119969438, |
| "loss": 1.1752, |
| "step": 172200 |
| }, |
| { |
| "epoch": 34.65, |
| "learning_rate": 0.0006528442107989105, |
| "loss": 1.1756, |
| "step": 172300 |
| }, |
| { |
| "epoch": 34.67, |
| "learning_rate": 0.0006513031864624303, |
| "loss": 1.1723, |
| "step": 172400 |
| }, |
| { |
| "epoch": 34.69, |
| "learning_rate": 0.0006497633420245197, |
| "loss": 1.1723, |
| "step": 172500 |
| }, |
| { |
| "epoch": 34.71, |
| "learning_rate": 0.0006482246805198708, |
| "loss": 1.1735, |
| "step": 172600 |
| }, |
| { |
| "epoch": 34.73, |
| "learning_rate": 0.0006466872049808438, |
| "loss": 1.1732, |
| "step": 172700 |
| }, |
| { |
| "epoch": 34.75, |
| "learning_rate": 0.0006451509184374624, |
| "loss": 1.1732, |
| "step": 172800 |
| }, |
| { |
| "epoch": 34.77, |
| "learning_rate": 0.0006436158239174055, |
| "loss": 1.1751, |
| "step": 172900 |
| }, |
| { |
| "epoch": 34.79, |
| "learning_rate": 0.0006420819244460042, |
| "loss": 1.1721, |
| "step": 173000 |
| }, |
| { |
| "epoch": 34.81, |
| "learning_rate": 0.0006405492230462343, |
| "loss": 1.1709, |
| "step": 173100 |
| }, |
| { |
| "epoch": 34.83, |
| "learning_rate": 0.0006390177227387101, |
| "loss": 1.1696, |
| "step": 173200 |
| }, |
| { |
| "epoch": 34.85, |
| "learning_rate": 0.0006374874265416783, |
| "loss": 1.17, |
| "step": 173300 |
| }, |
| { |
| "epoch": 34.87, |
| "learning_rate": 0.0006359583374710134, |
| "loss": 1.1774, |
| "step": 173400 |
| }, |
| { |
| "epoch": 34.89, |
| "learning_rate": 0.0006344304585402111, |
| "loss": 1.1666, |
| "step": 173500 |
| }, |
| { |
| "epoch": 34.91, |
| "learning_rate": 0.0006329037927603816, |
| "loss": 1.1701, |
| "step": 173600 |
| }, |
| { |
| "epoch": 34.93, |
| "learning_rate": 0.0006313783431402438, |
| "loss": 1.1708, |
| "step": 173700 |
| }, |
| { |
| "epoch": 34.95, |
| "learning_rate": 0.0006298541126861209, |
| "loss": 1.1724, |
| "step": 173800 |
| }, |
| { |
| "epoch": 34.97, |
| "learning_rate": 0.0006283311044019327, |
| "loss": 1.1727, |
| "step": 173900 |
| }, |
| { |
| "epoch": 34.99, |
| "learning_rate": 0.0006268093212891912, |
| "loss": 1.1685, |
| "step": 174000 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_accuracy": 0.41499333743989697, |
| "eval_loss": 1.1372462511062622, |
| "eval_runtime": 19.9974, |
| "eval_samples_per_second": 3978.919, |
| "eval_steps_per_second": 15.552, |
| "step": 174063 |
| }, |
| { |
| "epoch": 35.01, |
| "learning_rate": 0.0006253039658071285, |
| "loss": 1.1616, |
| "step": 174100 |
| }, |
| { |
| "epoch": 35.03, |
| "learning_rate": 0.0006237846297056513, |
| "loss": 1.1594, |
| "step": 174200 |
| }, |
| { |
| "epoch": 35.05, |
| "learning_rate": 0.0006222665277357129, |
| "loss": 1.1619, |
| "step": 174300 |
| }, |
| { |
| "epoch": 35.07, |
| "learning_rate": 0.0006207496628891555, |
| "loss": 1.1655, |
| "step": 174400 |
| }, |
| { |
| "epoch": 35.09, |
| "learning_rate": 0.0006192340381553838, |
| "loss": 1.1588, |
| "step": 174500 |
| }, |
| { |
| "epoch": 35.11, |
| "learning_rate": 0.0006177196565213567, |
| "loss": 1.1639, |
| "step": 174600 |
| }, |
| { |
| "epoch": 35.13, |
| "learning_rate": 0.0006162065209715849, |
| "loss": 1.1594, |
| "step": 174700 |
| }, |
| { |
| "epoch": 35.15, |
| "learning_rate": 0.0006146946344881228, |
| "loss": 1.164, |
| "step": 174800 |
| }, |
| { |
| "epoch": 35.17, |
| "learning_rate": 0.0006131840000505637, |
| "loss": 1.1602, |
| "step": 174900 |
| }, |
| { |
| "epoch": 35.19, |
| "learning_rate": 0.0006116746206360317, |
| "loss": 1.1629, |
| "step": 175000 |
| }, |
| { |
| "epoch": 35.21, |
| "learning_rate": 0.0006101664992191795, |
| "loss": 1.1656, |
| "step": 175100 |
| }, |
| { |
| "epoch": 35.23, |
| "learning_rate": 0.0006086596387721796, |
| "loss": 1.166, |
| "step": 175200 |
| }, |
| { |
| "epoch": 35.25, |
| "learning_rate": 0.0006071540422647201, |
| "loss": 1.1632, |
| "step": 175300 |
| }, |
| { |
| "epoch": 35.27, |
| "learning_rate": 0.0006056497126639966, |
| "loss": 1.1649, |
| "step": 175400 |
| }, |
| { |
| "epoch": 35.29, |
| "learning_rate": 0.0006041466529347094, |
| "loss": 1.1621, |
| "step": 175500 |
| }, |
| { |
| "epoch": 35.31, |
| "learning_rate": 0.0006026448660390557, |
| "loss": 1.1657, |
| "step": 175600 |
| }, |
| { |
| "epoch": 35.33, |
| "learning_rate": 0.0006011443549367248, |
| "loss": 1.1663, |
| "step": 175700 |
| }, |
| { |
| "epoch": 35.35, |
| "learning_rate": 0.0005996451225848903, |
| "loss": 1.1682, |
| "step": 175800 |
| }, |
| { |
| "epoch": 35.37, |
| "learning_rate": 0.0005981471719382066, |
| "loss": 1.1618, |
| "step": 175900 |
| }, |
| { |
| "epoch": 35.39, |
| "learning_rate": 0.0005966505059488022, |
| "loss": 1.1687, |
| "step": 176000 |
| }, |
| { |
| "epoch": 35.41, |
| "learning_rate": 0.0005951551275662743, |
| "loss": 1.162, |
| "step": 176100 |
| }, |
| { |
| "epoch": 35.43, |
| "learning_rate": 0.0005936610397376806, |
| "loss": 1.1656, |
| "step": 176200 |
| }, |
| { |
| "epoch": 35.45, |
| "learning_rate": 0.0005921682454075374, |
| "loss": 1.1663, |
| "step": 176300 |
| }, |
| { |
| "epoch": 35.47, |
| "learning_rate": 0.0005906767475178108, |
| "loss": 1.163, |
| "step": 176400 |
| }, |
| { |
| "epoch": 35.49, |
| "learning_rate": 0.0005891865490079131, |
| "loss": 1.1663, |
| "step": 176500 |
| }, |
| { |
| "epoch": 35.51, |
| "learning_rate": 0.0005876976528146937, |
| "loss": 1.1646, |
| "step": 176600 |
| }, |
| { |
| "epoch": 35.53, |
| "learning_rate": 0.000586210061872437, |
| "loss": 1.1609, |
| "step": 176700 |
| }, |
| { |
| "epoch": 35.55, |
| "learning_rate": 0.0005847237791128547, |
| "loss": 1.1629, |
| "step": 176800 |
| }, |
| { |
| "epoch": 35.57, |
| "learning_rate": 0.0005832388074650808, |
| "loss": 1.1615, |
| "step": 176900 |
| }, |
| { |
| "epoch": 35.59, |
| "learning_rate": 0.0005817551498556642, |
| "loss": 1.1688, |
| "step": 177000 |
| }, |
| { |
| "epoch": 35.61, |
| "learning_rate": 0.0005802728092085649, |
| "loss": 1.1621, |
| "step": 177100 |
| }, |
| { |
| "epoch": 35.63, |
| "learning_rate": 0.0005787917884451475, |
| "loss": 1.1638, |
| "step": 177200 |
| }, |
| { |
| "epoch": 35.65, |
| "learning_rate": 0.000577312090484176, |
| "loss": 1.1656, |
| "step": 177300 |
| }, |
| { |
| "epoch": 35.67, |
| "learning_rate": 0.0005758337182418055, |
| "loss": 1.1625, |
| "step": 177400 |
| }, |
| { |
| "epoch": 35.69, |
| "learning_rate": 0.0005743566746315804, |
| "loss": 1.1613, |
| "step": 177500 |
| }, |
| { |
| "epoch": 35.71, |
| "learning_rate": 0.0005728809625644257, |
| "loss": 1.167, |
| "step": 177600 |
| }, |
| { |
| "epoch": 35.73, |
| "learning_rate": 0.0005714065849486429, |
| "loss": 1.1632, |
| "step": 177700 |
| }, |
| { |
| "epoch": 35.75, |
| "learning_rate": 0.0005699335446899022, |
| "loss": 1.1662, |
| "step": 177800 |
| }, |
| { |
| "epoch": 35.77, |
| "learning_rate": 0.0005684618446912396, |
| "loss": 1.1628, |
| "step": 177900 |
| }, |
| { |
| "epoch": 35.79, |
| "learning_rate": 0.0005669914878530493, |
| "loss": 1.1689, |
| "step": 178000 |
| }, |
| { |
| "epoch": 35.81, |
| "learning_rate": 0.0005655224770730786, |
| "loss": 1.1652, |
| "step": 178100 |
| }, |
| { |
| "epoch": 35.83, |
| "learning_rate": 0.0005640694851778694, |
| "loss": 1.1631, |
| "step": 178200 |
| }, |
| { |
| "epoch": 35.85, |
| "learning_rate": 0.0005626031616641948, |
| "loss": 1.1611, |
| "step": 178300 |
| }, |
| { |
| "epoch": 35.87, |
| "learning_rate": 0.0005611381928571567, |
| "loss": 1.1598, |
| "step": 178400 |
| }, |
| { |
| "epoch": 35.89, |
| "learning_rate": 0.0005596745816438834, |
| "loss": 1.1639, |
| "step": 178500 |
| }, |
| { |
| "epoch": 35.91, |
| "learning_rate": 0.0005582123309088284, |
| "loss": 1.1606, |
| "step": 178600 |
| }, |
| { |
| "epoch": 35.93, |
| "learning_rate": 0.0005567514435337624, |
| "loss": 1.1622, |
| "step": 178700 |
| }, |
| { |
| "epoch": 35.95, |
| "learning_rate": 0.000555291922397771, |
| "loss": 1.162, |
| "step": 178800 |
| }, |
| { |
| "epoch": 35.97, |
| "learning_rate": 0.000553833770377246, |
| "loss": 1.164, |
| "step": 178900 |
| }, |
| { |
| "epoch": 35.99, |
| "learning_rate": 0.0005523769903458824, |
| "loss": 1.1658, |
| "step": 179000 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_accuracy": 0.4156597525371182, |
| "eval_loss": 1.1301237344741821, |
| "eval_runtime": 19.5705, |
| "eval_samples_per_second": 4065.719, |
| "eval_steps_per_second": 15.891, |
| "step": 179037 |
| }, |
| { |
| "epoch": 36.01, |
| "learning_rate": 0.0005509215851746693, |
| "loss": 1.1566, |
| "step": 179100 |
| }, |
| { |
| "epoch": 36.03, |
| "learning_rate": 0.0005494675577318875, |
| "loss": 1.1495, |
| "step": 179200 |
| }, |
| { |
| "epoch": 36.05, |
| "learning_rate": 0.0005480149108831029, |
| "loss": 1.1467, |
| "step": 179300 |
| }, |
| { |
| "epoch": 36.07, |
| "learning_rate": 0.0005465636474911603, |
| "loss": 1.1519, |
| "step": 179400 |
| }, |
| { |
| "epoch": 36.09, |
| "learning_rate": 0.000545113770416177, |
| "loss": 1.1524, |
| "step": 179500 |
| }, |
| { |
| "epoch": 36.11, |
| "learning_rate": 0.0005436652825155394, |
| "loss": 1.1548, |
| "step": 179600 |
| }, |
| { |
| "epoch": 36.13, |
| "learning_rate": 0.0005422181866438958, |
| "loss": 1.1576, |
| "step": 179700 |
| }, |
| { |
| "epoch": 36.15, |
| "learning_rate": 0.0005407724856531514, |
| "loss": 1.1558, |
| "step": 179800 |
| }, |
| { |
| "epoch": 36.17, |
| "learning_rate": 0.0005393281823924612, |
| "loss": 1.1539, |
| "step": 179900 |
| }, |
| { |
| "epoch": 36.19, |
| "learning_rate": 0.000537885279708227, |
| "loss": 1.1562, |
| "step": 180000 |
| }, |
| { |
| "epoch": 36.21, |
| "learning_rate": 0.0005364437804440896, |
| "loss": 1.1551, |
| "step": 180100 |
| }, |
| { |
| "epoch": 36.23, |
| "learning_rate": 0.0005350036874409248, |
| "loss": 1.1554, |
| "step": 180200 |
| }, |
| { |
| "epoch": 36.25, |
| "learning_rate": 0.0005335650035368354, |
| "loss": 1.1547, |
| "step": 180300 |
| }, |
| { |
| "epoch": 36.27, |
| "learning_rate": 0.0005321277315671485, |
| "loss": 1.159, |
| "step": 180400 |
| }, |
| { |
| "epoch": 36.29, |
| "learning_rate": 0.0005306918743644085, |
| "loss": 1.1551, |
| "step": 180500 |
| }, |
| { |
| "epoch": 36.31, |
| "learning_rate": 0.0005292574347583714, |
| "loss": 1.1557, |
| "step": 180600 |
| }, |
| { |
| "epoch": 36.33, |
| "learning_rate": 0.0005278244155759988, |
| "loss": 1.1572, |
| "step": 180700 |
| }, |
| { |
| "epoch": 36.35, |
| "learning_rate": 0.0005263928196414538, |
| "loss": 1.1536, |
| "step": 180800 |
| }, |
| { |
| "epoch": 36.37, |
| "learning_rate": 0.0005249626497760943, |
| "loss": 1.1544, |
| "step": 180900 |
| }, |
| { |
| "epoch": 36.39, |
| "learning_rate": 0.0005235339087984682, |
| "loss": 1.1571, |
| "step": 181000 |
| }, |
| { |
| "epoch": 36.41, |
| "learning_rate": 0.0005221065995243063, |
| "loss": 1.1575, |
| "step": 181100 |
| }, |
| { |
| "epoch": 36.43, |
| "learning_rate": 0.0005206807247665185, |
| "loss": 1.1507, |
| "step": 181200 |
| }, |
| { |
| "epoch": 36.46, |
| "learning_rate": 0.0005192562873351877, |
| "loss": 1.1566, |
| "step": 181300 |
| }, |
| { |
| "epoch": 36.48, |
| "learning_rate": 0.0005178332900375647, |
| "loss": 1.1561, |
| "step": 181400 |
| }, |
| { |
| "epoch": 36.5, |
| "learning_rate": 0.0005164117356780603, |
| "loss": 1.1568, |
| "step": 181500 |
| }, |
| { |
| "epoch": 36.52, |
| "learning_rate": 0.000514991627058243, |
| "loss": 1.1599, |
| "step": 181600 |
| }, |
| { |
| "epoch": 36.54, |
| "learning_rate": 0.0005135729669768321, |
| "loss": 1.1601, |
| "step": 181700 |
| }, |
| { |
| "epoch": 36.56, |
| "learning_rate": 0.0005121557582296915, |
| "loss": 1.1561, |
| "step": 181800 |
| }, |
| { |
| "epoch": 36.58, |
| "learning_rate": 0.0005107400036098259, |
| "loss": 1.1579, |
| "step": 181900 |
| }, |
| { |
| "epoch": 36.6, |
| "learning_rate": 0.0005093257059073723, |
| "loss": 1.1532, |
| "step": 182000 |
| }, |
| { |
| "epoch": 36.62, |
| "learning_rate": 0.0005079128679095983, |
| "loss": 1.159, |
| "step": 182100 |
| }, |
| { |
| "epoch": 36.64, |
| "learning_rate": 0.0005065014924008942, |
| "loss": 1.1549, |
| "step": 182200 |
| }, |
| { |
| "epoch": 36.66, |
| "learning_rate": 0.0005051056740029337, |
| "loss": 1.1537, |
| "step": 182300 |
| }, |
| { |
| "epoch": 36.68, |
| "learning_rate": 0.000503697217119769, |
| "loss": 1.1526, |
| "step": 182400 |
| }, |
| { |
| "epoch": 36.7, |
| "learning_rate": 0.0005022902310337858, |
| "loss": 1.1616, |
| "step": 182500 |
| }, |
| { |
| "epoch": 36.72, |
| "learning_rate": 0.000500884718517842, |
| "loss": 1.1551, |
| "step": 182600 |
| }, |
| { |
| "epoch": 36.74, |
| "learning_rate": 0.0004994806823418908, |
| "loss": 1.1537, |
| "step": 182700 |
| }, |
| { |
| "epoch": 36.76, |
| "learning_rate": 0.0004980781252729766, |
| "loss": 1.1562, |
| "step": 182800 |
| }, |
| { |
| "epoch": 36.78, |
| "learning_rate": 0.000496677050075227, |
| "loss": 1.1569, |
| "step": 182900 |
| }, |
| { |
| "epoch": 36.8, |
| "learning_rate": 0.000495277459509851, |
| "loss": 1.1549, |
| "step": 183000 |
| }, |
| { |
| "epoch": 36.82, |
| "learning_rate": 0.0004938793563351308, |
| "loss": 1.1525, |
| "step": 183100 |
| }, |
| { |
| "epoch": 36.84, |
| "learning_rate": 0.0004924827433064183, |
| "loss": 1.1588, |
| "step": 183200 |
| }, |
| { |
| "epoch": 36.86, |
| "learning_rate": 0.0004910876231761266, |
| "loss": 1.1492, |
| "step": 183300 |
| }, |
| { |
| "epoch": 36.88, |
| "learning_rate": 0.0004896939986937287, |
| "loss": 1.1494, |
| "step": 183400 |
| }, |
| { |
| "epoch": 36.9, |
| "learning_rate": 0.000488301872605749, |
| "loss": 1.1526, |
| "step": 183500 |
| }, |
| { |
| "epoch": 36.92, |
| "learning_rate": 0.00048691124765575953, |
| "loss": 1.1514, |
| "step": 183600 |
| }, |
| { |
| "epoch": 36.94, |
| "learning_rate": 0.0004855221265843726, |
| "loss": 1.1562, |
| "step": 183700 |
| }, |
| { |
| "epoch": 36.96, |
| "learning_rate": 0.00048413451212923826, |
| "loss": 1.1559, |
| "step": 183800 |
| }, |
| { |
| "epoch": 36.98, |
| "learning_rate": 0.0004827622605958196, |
| "loss": 1.1564, |
| "step": 183900 |
| }, |
| { |
| "epoch": 37.0, |
| "learning_rate": 0.0004813776524399174, |
| "loss": 1.1529, |
| "step": 184000 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_accuracy": 0.416447232778523, |
| "eval_loss": 1.121274709701538, |
| "eval_runtime": 19.5197, |
| "eval_samples_per_second": 4076.295, |
| "eval_steps_per_second": 15.933, |
| "step": 184010 |
| }, |
| { |
| "epoch": 37.02, |
| "learning_rate": 0.0004799945590681072, |
| "loss": 1.1429, |
| "step": 184100 |
| }, |
| { |
| "epoch": 37.04, |
| "learning_rate": 0.0004786129832061578, |
| "loss": 1.143, |
| "step": 184200 |
| }, |
| { |
| "epoch": 37.06, |
| "learning_rate": 0.00047723292757684944, |
| "loss": 1.1453, |
| "step": 184300 |
| }, |
| { |
| "epoch": 37.08, |
| "learning_rate": 0.00047585439489996554, |
| "loss": 1.1486, |
| "step": 184400 |
| }, |
| { |
| "epoch": 37.1, |
| "learning_rate": 0.0004744773878922883, |
| "loss": 1.1424, |
| "step": 184500 |
| }, |
| { |
| "epoch": 37.12, |
| "learning_rate": 0.0004731019092675921, |
| "loss": 1.1469, |
| "step": 184600 |
| }, |
| { |
| "epoch": 37.14, |
| "learning_rate": 0.00047172796173664076, |
| "loss": 1.1446, |
| "step": 184700 |
| }, |
| { |
| "epoch": 37.16, |
| "learning_rate": 0.0004703555480071799, |
| "loss": 1.1482, |
| "step": 184800 |
| }, |
| { |
| "epoch": 37.18, |
| "learning_rate": 0.00046898467078393294, |
| "loss": 1.1476, |
| "step": 184900 |
| }, |
| { |
| "epoch": 37.2, |
| "learning_rate": 0.00046761533276859366, |
| "loss": 1.1463, |
| "step": 185000 |
| }, |
| { |
| "epoch": 37.22, |
| "learning_rate": 0.0004662475366598239, |
| "loss": 1.1455, |
| "step": 185100 |
| }, |
| { |
| "epoch": 37.24, |
| "learning_rate": 0.00046488128515324634, |
| "loss": 1.1469, |
| "step": 185200 |
| }, |
| { |
| "epoch": 37.26, |
| "learning_rate": 0.00046351658094144005, |
| "loss": 1.1439, |
| "step": 185300 |
| }, |
| { |
| "epoch": 37.28, |
| "learning_rate": 0.0004621534267139332, |
| "loss": 1.1458, |
| "step": 185400 |
| }, |
| { |
| "epoch": 37.3, |
| "learning_rate": 0.00046079182515720076, |
| "loss": 1.1454, |
| "step": 185500 |
| }, |
| { |
| "epoch": 37.32, |
| "learning_rate": 0.00045943177895465734, |
| "loss": 1.1487, |
| "step": 185600 |
| }, |
| { |
| "epoch": 37.34, |
| "learning_rate": 0.0004580732907866525, |
| "loss": 1.1434, |
| "step": 185700 |
| }, |
| { |
| "epoch": 37.36, |
| "learning_rate": 0.00045671636333046426, |
| "loss": 1.152, |
| "step": 185800 |
| }, |
| { |
| "epoch": 37.38, |
| "learning_rate": 0.00045536099926029585, |
| "loss": 1.1452, |
| "step": 185900 |
| }, |
| { |
| "epoch": 37.4, |
| "learning_rate": 0.00045400720124726915, |
| "loss": 1.1455, |
| "step": 186000 |
| }, |
| { |
| "epoch": 37.42, |
| "learning_rate": 0.00045265497195942, |
| "loss": 1.146, |
| "step": 186100 |
| }, |
| { |
| "epoch": 37.44, |
| "learning_rate": 0.00045130431406169156, |
| "loss": 1.1436, |
| "step": 186200 |
| }, |
| { |
| "epoch": 37.46, |
| "learning_rate": 0.00044995523021593126, |
| "loss": 1.1471, |
| "step": 186300 |
| }, |
| { |
| "epoch": 37.48, |
| "learning_rate": 0.0004486077230808838, |
| "loss": 1.1461, |
| "step": 186400 |
| }, |
| { |
| "epoch": 37.5, |
| "learning_rate": 0.00044726179531218727, |
| "loss": 1.1437, |
| "step": 186500 |
| }, |
| { |
| "epoch": 37.52, |
| "learning_rate": 0.00044591744956236595, |
| "loss": 1.1465, |
| "step": 186600 |
| }, |
| { |
| "epoch": 37.54, |
| "learning_rate": 0.00044457468848082713, |
| "loss": 1.1502, |
| "step": 186700 |
| }, |
| { |
| "epoch": 37.56, |
| "learning_rate": 0.00044323351471385527, |
| "loss": 1.1474, |
| "step": 186800 |
| }, |
| { |
| "epoch": 37.58, |
| "learning_rate": 0.0004418939309046065, |
| "loss": 1.1465, |
| "step": 186900 |
| }, |
| { |
| "epoch": 37.6, |
| "learning_rate": 0.00044056931171319787, |
| "loss": 1.1458, |
| "step": 187000 |
| }, |
| { |
| "epoch": 37.62, |
| "learning_rate": 0.0004392328997709341, |
| "loss": 1.1453, |
| "step": 187100 |
| }, |
| { |
| "epoch": 37.64, |
| "learning_rate": 0.00043789808567071725, |
| "loss": 1.1459, |
| "step": 187200 |
| }, |
| { |
| "epoch": 37.66, |
| "learning_rate": 0.0004365648720431699, |
| "loss": 1.144, |
| "step": 187300 |
| }, |
| { |
| "epoch": 37.68, |
| "learning_rate": 0.0004352332615157606, |
| "loss": 1.1478, |
| "step": 187400 |
| }, |
| { |
| "epoch": 37.7, |
| "learning_rate": 0.00043390325671279736, |
| "loss": 1.1437, |
| "step": 187500 |
| }, |
| { |
| "epoch": 37.72, |
| "learning_rate": 0.00043257486025542497, |
| "loss": 1.1467, |
| "step": 187600 |
| }, |
| { |
| "epoch": 37.74, |
| "learning_rate": 0.0004312480747616181, |
| "loss": 1.1486, |
| "step": 187700 |
| }, |
| { |
| "epoch": 37.76, |
| "learning_rate": 0.00042992290284617695, |
| "loss": 1.1459, |
| "step": 187800 |
| }, |
| { |
| "epoch": 37.78, |
| "learning_rate": 0.00042859934712072045, |
| "loss": 1.1466, |
| "step": 187900 |
| }, |
| { |
| "epoch": 37.8, |
| "learning_rate": 0.00042727741019368354, |
| "loss": 1.147, |
| "step": 188000 |
| }, |
| { |
| "epoch": 37.82, |
| "learning_rate": 0.0004259570946703109, |
| "loss": 1.1458, |
| "step": 188100 |
| }, |
| { |
| "epoch": 37.84, |
| "learning_rate": 0.00042463840315265153, |
| "loss": 1.1443, |
| "step": 188200 |
| }, |
| { |
| "epoch": 37.86, |
| "learning_rate": 0.00042332133823955317, |
| "loss": 1.1469, |
| "step": 188300 |
| }, |
| { |
| "epoch": 37.88, |
| "learning_rate": 0.00042200590252665886, |
| "loss": 1.1453, |
| "step": 188400 |
| }, |
| { |
| "epoch": 37.9, |
| "learning_rate": 0.0004206920986064004, |
| "loss": 1.1479, |
| "step": 188500 |
| }, |
| { |
| "epoch": 37.92, |
| "learning_rate": 0.00041937992906799436, |
| "loss": 1.1438, |
| "step": 188600 |
| }, |
| { |
| "epoch": 37.94, |
| "learning_rate": 0.00041806939649743484, |
| "loss": 1.1443, |
| "step": 188700 |
| }, |
| { |
| "epoch": 37.96, |
| "learning_rate": 0.00041676050347749116, |
| "loss": 1.1442, |
| "step": 188800 |
| }, |
| { |
| "epoch": 37.98, |
| "learning_rate": 0.00041545325258770095, |
| "loss": 1.1463, |
| "step": 188900 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_accuracy": 0.41712748843756287, |
| "eval_loss": 1.113813042640686, |
| "eval_runtime": 19.8226, |
| "eval_samples_per_second": 4013.998, |
| "eval_steps_per_second": 15.689, |
| "step": 188983 |
| }, |
| { |
| "epoch": 38.0, |
| "learning_rate": 0.00041414764640436614, |
| "loss": 1.1435, |
| "step": 189000 |
| }, |
| { |
| "epoch": 38.02, |
| "learning_rate": 0.0004128436875005459, |
| "loss": 1.1341, |
| "step": 189100 |
| }, |
| { |
| "epoch": 38.04, |
| "learning_rate": 0.0004115413784460545, |
| "loss": 1.1316, |
| "step": 189200 |
| }, |
| { |
| "epoch": 38.06, |
| "learning_rate": 0.00041024072180745373, |
| "loss": 1.1341, |
| "step": 189300 |
| }, |
| { |
| "epoch": 38.08, |
| "learning_rate": 0.00040894172014805, |
| "loss": 1.1296, |
| "step": 189400 |
| }, |
| { |
| "epoch": 38.1, |
| "learning_rate": 0.00040764437602788627, |
| "loss": 1.1366, |
| "step": 189500 |
| }, |
| { |
| "epoch": 38.12, |
| "learning_rate": 0.00040634869200374056, |
| "loss": 1.1403, |
| "step": 189600 |
| }, |
| { |
| "epoch": 38.14, |
| "learning_rate": 0.00040505467062911833, |
| "loss": 1.1389, |
| "step": 189700 |
| }, |
| { |
| "epoch": 38.16, |
| "learning_rate": 0.00040376231445424904, |
| "loss": 1.1316, |
| "step": 189800 |
| }, |
| { |
| "epoch": 38.18, |
| "learning_rate": 0.00040247162602607923, |
| "loss": 1.1382, |
| "step": 189900 |
| }, |
| { |
| "epoch": 38.2, |
| "learning_rate": 0.00040118260788826965, |
| "loss": 1.1355, |
| "step": 190000 |
| }, |
| { |
| "epoch": 38.22, |
| "learning_rate": 0.00039989526258118915, |
| "loss": 1.1365, |
| "step": 190100 |
| }, |
| { |
| "epoch": 38.24, |
| "learning_rate": 0.00039860959264191007, |
| "loss": 1.136, |
| "step": 190200 |
| }, |
| { |
| "epoch": 38.26, |
| "learning_rate": 0.0003973256006042017, |
| "loss": 1.1405, |
| "step": 190300 |
| }, |
| { |
| "epoch": 38.28, |
| "learning_rate": 0.00039604328899852786, |
| "loss": 1.1402, |
| "step": 190400 |
| }, |
| { |
| "epoch": 38.3, |
| "learning_rate": 0.00039476266035204003, |
| "loss": 1.1375, |
| "step": 190500 |
| }, |
| { |
| "epoch": 38.33, |
| "learning_rate": 0.00039348371718857386, |
| "loss": 1.1352, |
| "step": 190600 |
| }, |
| { |
| "epoch": 38.35, |
| "learning_rate": 0.0003922064620286414, |
| "loss": 1.1363, |
| "step": 190700 |
| }, |
| { |
| "epoch": 38.37, |
| "learning_rate": 0.00039093089738942975, |
| "loss": 1.1377, |
| "step": 190800 |
| }, |
| { |
| "epoch": 38.39, |
| "learning_rate": 0.0003896570257847937, |
| "loss": 1.1385, |
| "step": 190900 |
| }, |
| { |
| "epoch": 38.41, |
| "learning_rate": 0.00038839756308466366, |
| "loss": 1.1418, |
| "step": 191000 |
| }, |
| { |
| "epoch": 38.43, |
| "learning_rate": 0.0003871270680844698, |
| "loss": 1.1369, |
| "step": 191100 |
| }, |
| { |
| "epoch": 38.45, |
| "learning_rate": 0.0003858582736153553, |
| "loss": 1.1408, |
| "step": 191200 |
| }, |
| { |
| "epoch": 38.47, |
| "learning_rate": 0.00038459118217783184, |
| "loss": 1.1376, |
| "step": 191300 |
| }, |
| { |
| "epoch": 38.49, |
| "learning_rate": 0.000383325796269056, |
| "loss": 1.1401, |
| "step": 191400 |
| }, |
| { |
| "epoch": 38.51, |
| "learning_rate": 0.0003820621183828224, |
| "loss": 1.1371, |
| "step": 191500 |
| }, |
| { |
| "epoch": 38.53, |
| "learning_rate": 0.0003808001510095603, |
| "loss": 1.1346, |
| "step": 191600 |
| }, |
| { |
| "epoch": 38.55, |
| "learning_rate": 0.0003795398966363266, |
| "loss": 1.1439, |
| "step": 191700 |
| }, |
| { |
| "epoch": 38.57, |
| "learning_rate": 0.0003782813577468032, |
| "loss": 1.1426, |
| "step": 191800 |
| }, |
| { |
| "epoch": 38.59, |
| "learning_rate": 0.00037702453682129114, |
| "loss": 1.1383, |
| "step": 191900 |
| }, |
| { |
| "epoch": 38.61, |
| "learning_rate": 0.00037576943633670605, |
| "loss": 1.1362, |
| "step": 192000 |
| }, |
| { |
| "epoch": 38.63, |
| "learning_rate": 0.0003745160587665715, |
| "loss": 1.1356, |
| "step": 192100 |
| }, |
| { |
| "epoch": 38.65, |
| "learning_rate": 0.000373264406581017, |
| "loss": 1.1333, |
| "step": 192200 |
| }, |
| { |
| "epoch": 38.67, |
| "learning_rate": 0.00037201448224677153, |
| "loss": 1.14, |
| "step": 192300 |
| }, |
| { |
| "epoch": 38.69, |
| "learning_rate": 0.0003707662882271585, |
| "loss": 1.1349, |
| "step": 192400 |
| }, |
| { |
| "epoch": 38.71, |
| "learning_rate": 0.00036951982698209094, |
| "loss": 1.1371, |
| "step": 192500 |
| }, |
| { |
| "epoch": 38.73, |
| "learning_rate": 0.0003682751009680678, |
| "loss": 1.1391, |
| "step": 192600 |
| }, |
| { |
| "epoch": 38.75, |
| "learning_rate": 0.0003670321126381676, |
| "loss": 1.134, |
| "step": 192700 |
| }, |
| { |
| "epoch": 38.77, |
| "learning_rate": 0.000365790864442045, |
| "loss": 1.1358, |
| "step": 192800 |
| }, |
| { |
| "epoch": 38.79, |
| "learning_rate": 0.0003645513588259242, |
| "loss": 1.1331, |
| "step": 192900 |
| }, |
| { |
| "epoch": 38.81, |
| "learning_rate": 0.0003633135982325958, |
| "loss": 1.1376, |
| "step": 193000 |
| }, |
| { |
| "epoch": 38.83, |
| "learning_rate": 0.0003620775851014114, |
| "loss": 1.1353, |
| "step": 193100 |
| }, |
| { |
| "epoch": 38.85, |
| "learning_rate": 0.00036084332186827895, |
| "loss": 1.1376, |
| "step": 193200 |
| }, |
| { |
| "epoch": 38.87, |
| "learning_rate": 0.0003596231273926687, |
| "loss": 1.1346, |
| "step": 193300 |
| }, |
| { |
| "epoch": 38.89, |
| "learning_rate": 0.0003583923536899551, |
| "loss": 1.1375, |
| "step": 193400 |
| }, |
| { |
| "epoch": 38.91, |
| "learning_rate": 0.0003571633371480666, |
| "loss": 1.1387, |
| "step": 193500 |
| }, |
| { |
| "epoch": 38.93, |
| "learning_rate": 0.0003559360801891219, |
| "loss": 1.1375, |
| "step": 193600 |
| }, |
| { |
| "epoch": 38.95, |
| "learning_rate": 0.00035471058523177265, |
| "loss": 1.1357, |
| "step": 193700 |
| }, |
| { |
| "epoch": 38.97, |
| "learning_rate": 0.00035348685469119664, |
| "loss": 1.136, |
| "step": 193800 |
| }, |
| { |
| "epoch": 38.99, |
| "learning_rate": 0.0003522648909790957, |
| "loss": 1.1352, |
| "step": 193900 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_accuracy": 0.41790908246302183, |
| "eval_loss": 1.1047524213790894, |
| "eval_runtime": 19.8895, |
| "eval_samples_per_second": 4000.506, |
| "eval_steps_per_second": 15.636, |
| "step": 193956 |
| }, |
| { |
| "epoch": 39.01, |
| "learning_rate": 0.000351044696503689, |
| "loss": 1.1315, |
| "step": 194000 |
| }, |
| { |
| "epoch": 39.03, |
| "learning_rate": 0.0003498262736697096, |
| "loss": 1.1213, |
| "step": 194100 |
| }, |
| { |
| "epoch": 39.05, |
| "learning_rate": 0.00034860962487839767, |
| "loss": 1.126, |
| "step": 194200 |
| }, |
| { |
| "epoch": 39.07, |
| "learning_rate": 0.00034739475252749854, |
| "loss": 1.125, |
| "step": 194300 |
| }, |
| { |
| "epoch": 39.09, |
| "learning_rate": 0.0003461816590112558, |
| "loss": 1.1259, |
| "step": 194400 |
| }, |
| { |
| "epoch": 39.11, |
| "learning_rate": 0.00034497034672040824, |
| "loss": 1.1266, |
| "step": 194500 |
| }, |
| { |
| "epoch": 39.13, |
| "learning_rate": 0.00034376081804218263, |
| "loss": 1.1286, |
| "step": 194600 |
| }, |
| { |
| "epoch": 39.15, |
| "learning_rate": 0.00034255307536029206, |
| "loss": 1.1286, |
| "step": 194700 |
| }, |
| { |
| "epoch": 39.17, |
| "learning_rate": 0.00034134712105492946, |
| "loss": 1.1294, |
| "step": 194800 |
| }, |
| { |
| "epoch": 39.19, |
| "learning_rate": 0.00034014295750276326, |
| "loss": 1.1256, |
| "step": 194900 |
| }, |
| { |
| "epoch": 39.21, |
| "learning_rate": 0.0003389405870769323, |
| "loss": 1.1315, |
| "step": 195000 |
| }, |
| { |
| "epoch": 39.23, |
| "learning_rate": 0.00033774001214704184, |
| "loss": 1.1328, |
| "step": 195100 |
| }, |
| { |
| "epoch": 39.25, |
| "learning_rate": 0.00033654123507915887, |
| "loss": 1.1278, |
| "step": 195200 |
| }, |
| { |
| "epoch": 39.27, |
| "learning_rate": 0.0003353562190853805, |
| "loss": 1.1284, |
| "step": 195300 |
| }, |
| { |
| "epoch": 39.29, |
| "learning_rate": 0.00033416102678803547, |
| "loss": 1.1292, |
| "step": 195400 |
| }, |
| { |
| "epoch": 39.31, |
| "learning_rate": 0.00033296763940608335, |
| "loss": 1.1335, |
| "step": 195500 |
| }, |
| { |
| "epoch": 39.33, |
| "learning_rate": 0.0003317760592914265, |
| "loss": 1.1264, |
| "step": 195600 |
| }, |
| { |
| "epoch": 39.35, |
| "learning_rate": 0.00033058628879240477, |
| "loss": 1.1293, |
| "step": 195700 |
| }, |
| { |
| "epoch": 39.37, |
| "learning_rate": 0.0003293983302537923, |
| "loss": 1.1275, |
| "step": 195800 |
| }, |
| { |
| "epoch": 39.39, |
| "learning_rate": 0.00032821218601679066, |
| "loss": 1.127, |
| "step": 195900 |
| }, |
| { |
| "epoch": 39.41, |
| "learning_rate": 0.00032702785841902783, |
| "loss": 1.1287, |
| "step": 196000 |
| }, |
| { |
| "epoch": 39.43, |
| "learning_rate": 0.0003258453497945503, |
| "loss": 1.1278, |
| "step": 196100 |
| }, |
| { |
| "epoch": 39.45, |
| "learning_rate": 0.0003246646624738207, |
| "loss": 1.1296, |
| "step": 196200 |
| }, |
| { |
| "epoch": 39.47, |
| "learning_rate": 0.00032348579878371114, |
| "loss": 1.1309, |
| "step": 196300 |
| }, |
| { |
| "epoch": 39.49, |
| "learning_rate": 0.00032230876104750046, |
| "loss": 1.1274, |
| "step": 196400 |
| }, |
| { |
| "epoch": 39.51, |
| "learning_rate": 0.000321133551584869, |
| "loss": 1.1295, |
| "step": 196500 |
| }, |
| { |
| "epoch": 39.53, |
| "learning_rate": 0.00031996017271189437, |
| "loss": 1.1271, |
| "step": 196600 |
| }, |
| { |
| "epoch": 39.55, |
| "learning_rate": 0.0003187886267410448, |
| "loss": 1.1254, |
| "step": 196700 |
| }, |
| { |
| "epoch": 39.57, |
| "learning_rate": 0.0003176189159811782, |
| "loss": 1.1303, |
| "step": 196800 |
| }, |
| { |
| "epoch": 39.59, |
| "learning_rate": 0.00031645104273753474, |
| "loss": 1.1273, |
| "step": 196900 |
| }, |
| { |
| "epoch": 39.61, |
| "learning_rate": 0.0003152850093117338, |
| "loss": 1.13, |
| "step": 197000 |
| }, |
| { |
| "epoch": 39.63, |
| "learning_rate": 0.0003141208180017678, |
| "loss": 1.1253, |
| "step": 197100 |
| }, |
| { |
| "epoch": 39.65, |
| "learning_rate": 0.00031295847110199976, |
| "loss": 1.1286, |
| "step": 197200 |
| }, |
| { |
| "epoch": 39.67, |
| "learning_rate": 0.0003117979709031578, |
| "loss": 1.1295, |
| "step": 197300 |
| }, |
| { |
| "epoch": 39.69, |
| "learning_rate": 0.0003106393196923304, |
| "loss": 1.1279, |
| "step": 197400 |
| }, |
| { |
| "epoch": 39.71, |
| "learning_rate": 0.00030949407858107237, |
| "loss": 1.1279, |
| "step": 197500 |
| }, |
| { |
| "epoch": 39.73, |
| "learning_rate": 0.00030833911364617274, |
| "loss": 1.1268, |
| "step": 197600 |
| }, |
| { |
| "epoch": 39.75, |
| "learning_rate": 0.0003071860045159272, |
| "loss": 1.1229, |
| "step": 197700 |
| }, |
| { |
| "epoch": 39.77, |
| "learning_rate": 0.00030603475346285824, |
| "loss": 1.1263, |
| "step": 197800 |
| }, |
| { |
| "epoch": 39.79, |
| "learning_rate": 0.0003048853627558264, |
| "loss": 1.1254, |
| "step": 197900 |
| }, |
| { |
| "epoch": 39.81, |
| "learning_rate": 0.0003037378346600259, |
| "loss": 1.1294, |
| "step": 198000 |
| }, |
| { |
| "epoch": 39.83, |
| "learning_rate": 0.0003025921714369792, |
| "loss": 1.1278, |
| "step": 198100 |
| }, |
| { |
| "epoch": 39.85, |
| "learning_rate": 0.0003014483753445349, |
| "loss": 1.1253, |
| "step": 198200 |
| }, |
| { |
| "epoch": 39.87, |
| "learning_rate": 0.000300306448636861, |
| "loss": 1.1272, |
| "step": 198300 |
| }, |
| { |
| "epoch": 39.89, |
| "learning_rate": 0.0002991663935644423, |
| "loss": 1.1225, |
| "step": 198400 |
| }, |
| { |
| "epoch": 39.91, |
| "learning_rate": 0.0002980282123740735, |
| "loss": 1.1233, |
| "step": 198500 |
| }, |
| { |
| "epoch": 39.93, |
| "learning_rate": 0.00029689190730885747, |
| "loss": 1.1229, |
| "step": 198600 |
| }, |
| { |
| "epoch": 39.95, |
| "learning_rate": 0.00029575748060819946, |
| "loss": 1.1273, |
| "step": 198700 |
| }, |
| { |
| "epoch": 39.97, |
| "learning_rate": 0.00029462493450780307, |
| "loss": 1.1271, |
| "step": 198800 |
| }, |
| { |
| "epoch": 39.99, |
| "learning_rate": 0.0002934942712396647, |
| "loss": 1.1259, |
| "step": 198900 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_accuracy": 0.41850995375025135, |
| "eval_loss": 1.096311330795288, |
| "eval_runtime": 19.5694, |
| "eval_samples_per_second": 4065.946, |
| "eval_steps_per_second": 15.892, |
| "step": 198930 |
| }, |
| { |
| "epoch": 40.01, |
| "learning_rate": 0.0002923654930320711, |
| "loss": 1.116, |
| "step": 199000 |
| }, |
| { |
| "epoch": 40.03, |
| "learning_rate": 0.0002912386021095936, |
| "loss": 1.1166, |
| "step": 199100 |
| }, |
| { |
| "epoch": 40.05, |
| "learning_rate": 0.00029011360069308446, |
| "loss": 1.1156, |
| "step": 199200 |
| }, |
| { |
| "epoch": 40.07, |
| "learning_rate": 0.0002889904909996713, |
| "loss": 1.1193, |
| "step": 199300 |
| }, |
| { |
| "epoch": 40.09, |
| "learning_rate": 0.0002878692752427543, |
| "loss": 1.1196, |
| "step": 199400 |
| }, |
| { |
| "epoch": 40.11, |
| "learning_rate": 0.0002867611394349388, |
| "loss": 1.123, |
| "step": 199500 |
| }, |
| { |
| "epoch": 40.13, |
| "learning_rate": 0.0002856436991818517, |
| "loss": 1.117, |
| "step": 199600 |
| }, |
| { |
| "epoch": 40.15, |
| "learning_rate": 0.00028452815946104537, |
| "loss": 1.1189, |
| "step": 199700 |
| }, |
| { |
| "epoch": 40.17, |
| "learning_rate": 0.00028341452247099996, |
| "loss": 1.1162, |
| "step": 199800 |
| }, |
| { |
| "epoch": 40.2, |
| "learning_rate": 0.0002823027904064475, |
| "loss": 1.1149, |
| "step": 199900 |
| }, |
| { |
| "epoch": 40.22, |
| "learning_rate": 0.00028119296545836515, |
| "loss": 1.1151, |
| "step": 200000 |
| }, |
| { |
| "epoch": 40.24, |
| "learning_rate": 0.000280085049813972, |
| "loss": 1.1168, |
| "step": 200100 |
| }, |
| { |
| "epoch": 40.26, |
| "learning_rate": 0.00027897904565672314, |
| "loss": 1.1182, |
| "step": 200200 |
| }, |
| { |
| "epoch": 40.28, |
| "learning_rate": 0.00027787495516630776, |
| "loss": 1.1149, |
| "step": 200300 |
| }, |
| { |
| "epoch": 40.3, |
| "learning_rate": 0.0002767727805186432, |
| "loss": 1.1155, |
| "step": 200400 |
| }, |
| { |
| "epoch": 40.32, |
| "learning_rate": 0.0002756725238858715, |
| "loss": 1.1182, |
| "step": 200500 |
| }, |
| { |
| "epoch": 40.34, |
| "learning_rate": 0.00027457418743635374, |
| "loss": 1.1201, |
| "step": 200600 |
| }, |
| { |
| "epoch": 40.36, |
| "learning_rate": 0.00027347777333466746, |
| "loss": 1.1188, |
| "step": 200700 |
| }, |
| { |
| "epoch": 40.38, |
| "learning_rate": 0.0002723832837416017, |
| "loss": 1.1222, |
| "step": 200800 |
| }, |
| { |
| "epoch": 40.4, |
| "learning_rate": 0.0002712907208141528, |
| "loss": 1.1186, |
| "step": 200900 |
| }, |
| { |
| "epoch": 40.42, |
| "learning_rate": 0.00027020008670551935, |
| "loss": 1.121, |
| "step": 201000 |
| }, |
| { |
| "epoch": 40.44, |
| "learning_rate": 0.0002691113835650995, |
| "loss": 1.1172, |
| "step": 201100 |
| }, |
| { |
| "epoch": 40.46, |
| "learning_rate": 0.0002680246135384853, |
| "loss": 1.1223, |
| "step": 201200 |
| }, |
| { |
| "epoch": 40.48, |
| "learning_rate": 0.00026693977876746, |
| "loss": 1.118, |
| "step": 201300 |
| }, |
| { |
| "epoch": 40.5, |
| "learning_rate": 0.0002658568813899914, |
| "loss": 1.1155, |
| "step": 201400 |
| }, |
| { |
| "epoch": 40.52, |
| "learning_rate": 0.00026478672351106777, |
| "loss": 1.1191, |
| "step": 201500 |
| }, |
| { |
| "epoch": 40.54, |
| "learning_rate": 0.00026370768789222914, |
| "loss": 1.1167, |
| "step": 201600 |
| }, |
| { |
| "epoch": 40.56, |
| "learning_rate": 0.0002626305960366815, |
| "loss": 1.1194, |
| "step": 201700 |
| }, |
| { |
| "epoch": 40.58, |
| "learning_rate": 0.00026155545006713424, |
| "loss": 1.1171, |
| "step": 201800 |
| }, |
| { |
| "epoch": 40.6, |
| "learning_rate": 0.00026048225210246063, |
| "loss": 1.1199, |
| "step": 201900 |
| }, |
| { |
| "epoch": 40.62, |
| "learning_rate": 0.0002594110042576962, |
| "loss": 1.1161, |
| "step": 202000 |
| }, |
| { |
| "epoch": 40.64, |
| "learning_rate": 0.00025834170864403287, |
| "loss": 1.1146, |
| "step": 202100 |
| }, |
| { |
| "epoch": 40.66, |
| "learning_rate": 0.00025727436736881505, |
| "loss": 1.117, |
| "step": 202200 |
| }, |
| { |
| "epoch": 40.68, |
| "learning_rate": 0.00025620898253553515, |
| "loss": 1.1194, |
| "step": 202300 |
| }, |
| { |
| "epoch": 40.7, |
| "learning_rate": 0.00025514555624383053, |
| "loss": 1.1188, |
| "step": 202400 |
| }, |
| { |
| "epoch": 40.72, |
| "learning_rate": 0.0002540840905894784, |
| "loss": 1.1166, |
| "step": 202500 |
| }, |
| { |
| "epoch": 40.74, |
| "learning_rate": 0.0002530245876643923, |
| "loss": 1.1188, |
| "step": 202600 |
| }, |
| { |
| "epoch": 40.76, |
| "learning_rate": 0.0002519670495566169, |
| "loss": 1.1191, |
| "step": 202700 |
| }, |
| { |
| "epoch": 40.78, |
| "learning_rate": 0.00025091147835032526, |
| "loss": 1.1151, |
| "step": 202800 |
| }, |
| { |
| "epoch": 40.8, |
| "learning_rate": 0.00024985787612581423, |
| "loss": 1.1194, |
| "step": 202900 |
| }, |
| { |
| "epoch": 40.82, |
| "learning_rate": 0.00024880624495950024, |
| "loss": 1.1167, |
| "step": 203000 |
| }, |
| { |
| "epoch": 40.84, |
| "learning_rate": 0.00024775658692391416, |
| "loss": 1.1208, |
| "step": 203100 |
| }, |
| { |
| "epoch": 40.86, |
| "learning_rate": 0.0002467089040876995, |
| "loss": 1.1195, |
| "step": 203200 |
| }, |
| { |
| "epoch": 40.88, |
| "learning_rate": 0.00024566319851560675, |
| "loss": 1.1142, |
| "step": 203300 |
| }, |
| { |
| "epoch": 40.9, |
| "learning_rate": 0.00024461947226848984, |
| "loss": 1.1159, |
| "step": 203400 |
| }, |
| { |
| "epoch": 40.92, |
| "learning_rate": 0.00024357772740330076, |
| "loss": 1.1104, |
| "step": 203500 |
| }, |
| { |
| "epoch": 40.94, |
| "learning_rate": 0.00024254835376265563, |
| "loss": 1.1169, |
| "step": 203600 |
| }, |
| { |
| "epoch": 40.96, |
| "learning_rate": 0.00024151055795158666, |
| "loss": 1.1174, |
| "step": 203700 |
| }, |
| { |
| "epoch": 40.98, |
| "learning_rate": 0.00024047474964942626, |
| "loss": 1.1196, |
| "step": 203800 |
| }, |
| { |
| "epoch": 41.0, |
| "learning_rate": 0.00023944093089752302, |
| "loss": 1.1194, |
| "step": 203900 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_accuracy": 0.41941595374516055, |
| "eval_loss": 1.0883480310440063, |
| "eval_runtime": 19.8865, |
| "eval_samples_per_second": 4001.112, |
| "eval_steps_per_second": 15.639, |
| "step": 203903 |
| }, |
| { |
| "epoch": 41.02, |
| "learning_rate": 0.00023840910373330374, |
| "loss": 1.1042, |
| "step": 204000 |
| }, |
| { |
| "epoch": 41.04, |
| "learning_rate": 0.00023737927019027105, |
| "loss": 1.1048, |
| "step": 204100 |
| }, |
| { |
| "epoch": 41.06, |
| "learning_rate": 0.00023635143229799844, |
| "loss": 1.1014, |
| "step": 204200 |
| }, |
| { |
| "epoch": 41.08, |
| "learning_rate": 0.0002353255920821265, |
| "loss": 1.1069, |
| "step": 204300 |
| }, |
| { |
| "epoch": 41.1, |
| "learning_rate": 0.0002343017515643582, |
| "loss": 1.107, |
| "step": 204400 |
| }, |
| { |
| "epoch": 41.12, |
| "learning_rate": 0.00023327991276245617, |
| "loss": 1.1049, |
| "step": 204500 |
| }, |
| { |
| "epoch": 41.14, |
| "learning_rate": 0.00023226007769023795, |
| "loss": 1.106, |
| "step": 204600 |
| }, |
| { |
| "epoch": 41.16, |
| "learning_rate": 0.00023124224835757254, |
| "loss": 1.109, |
| "step": 204700 |
| }, |
| { |
| "epoch": 41.18, |
| "learning_rate": 0.00023022642677037505, |
| "loss": 1.1107, |
| "step": 204800 |
| }, |
| { |
| "epoch": 41.2, |
| "learning_rate": 0.0002292126149306048, |
| "loss": 1.1064, |
| "step": 204900 |
| }, |
| { |
| "epoch": 41.22, |
| "learning_rate": 0.00022820081483625993, |
| "loss": 1.1077, |
| "step": 205000 |
| }, |
| { |
| "epoch": 41.24, |
| "learning_rate": 0.00022719102848137426, |
| "loss": 1.1066, |
| "step": 205100 |
| }, |
| { |
| "epoch": 41.26, |
| "learning_rate": 0.0002261832578560119, |
| "loss": 1.1104, |
| "step": 205200 |
| }, |
| { |
| "epoch": 41.28, |
| "learning_rate": 0.00022517750494626537, |
| "loss": 1.1109, |
| "step": 205300 |
| }, |
| { |
| "epoch": 41.3, |
| "learning_rate": 0.00022417377173425068, |
| "loss": 1.1095, |
| "step": 205400 |
| }, |
| { |
| "epoch": 41.32, |
| "learning_rate": 0.00022317206019810355, |
| "loss": 1.1118, |
| "step": 205500 |
| }, |
| { |
| "epoch": 41.34, |
| "learning_rate": 0.00022218235916729556, |
| "loss": 1.1085, |
| "step": 205600 |
| }, |
| { |
| "epoch": 41.36, |
| "learning_rate": 0.00022118467663540713, |
| "loss": 1.1091, |
| "step": 205700 |
| }, |
| { |
| "epoch": 41.38, |
| "learning_rate": 0.00022018902167022874, |
| "loss": 1.1113, |
| "step": 205800 |
| }, |
| { |
| "epoch": 41.4, |
| "learning_rate": 0.00021919539623397572, |
| "loss": 1.1041, |
| "step": 205900 |
| }, |
| { |
| "epoch": 41.42, |
| "learning_rate": 0.00021820380228486328, |
| "loss": 1.1076, |
| "step": 206000 |
| }, |
| { |
| "epoch": 41.44, |
| "learning_rate": 0.00021721424177710335, |
| "loss": 1.1081, |
| "step": 206100 |
| }, |
| { |
| "epoch": 41.46, |
| "learning_rate": 0.00021622671666089957, |
| "loss": 1.1087, |
| "step": 206200 |
| }, |
| { |
| "epoch": 41.48, |
| "learning_rate": 0.00021524122888244514, |
| "loss": 1.1065, |
| "step": 206300 |
| }, |
| { |
| "epoch": 41.5, |
| "learning_rate": 0.00021425778038391768, |
| "loss": 1.1096, |
| "step": 206400 |
| }, |
| { |
| "epoch": 41.52, |
| "learning_rate": 0.00021327637310347648, |
| "loss": 1.1105, |
| "step": 206500 |
| }, |
| { |
| "epoch": 41.54, |
| "learning_rate": 0.0002122970089752567, |
| "loss": 1.109, |
| "step": 206600 |
| }, |
| { |
| "epoch": 41.56, |
| "learning_rate": 0.00021131968992936804, |
| "loss": 1.1114, |
| "step": 206700 |
| }, |
| { |
| "epoch": 41.58, |
| "learning_rate": 0.00021034441789188953, |
| "loss": 1.1096, |
| "step": 206800 |
| }, |
| { |
| "epoch": 41.6, |
| "learning_rate": 0.00020937119478486616, |
| "loss": 1.1161, |
| "step": 206900 |
| }, |
| { |
| "epoch": 41.62, |
| "learning_rate": 0.00020840002252630428, |
| "loss": 1.1108, |
| "step": 207000 |
| }, |
| { |
| "epoch": 41.64, |
| "learning_rate": 0.00020743090303016923, |
| "loss": 1.1092, |
| "step": 207100 |
| }, |
| { |
| "epoch": 41.66, |
| "learning_rate": 0.00020646383820638022, |
| "loss": 1.1069, |
| "step": 207200 |
| }, |
| { |
| "epoch": 41.68, |
| "learning_rate": 0.00020549882996080793, |
| "loss": 1.108, |
| "step": 207300 |
| }, |
| { |
| "epoch": 41.7, |
| "learning_rate": 0.0002045358801952689, |
| "loss": 1.1072, |
| "step": 207400 |
| }, |
| { |
| "epoch": 41.72, |
| "learning_rate": 0.00020357499080752366, |
| "loss": 1.108, |
| "step": 207500 |
| }, |
| { |
| "epoch": 41.74, |
| "learning_rate": 0.00020261616369127183, |
| "loss": 1.1074, |
| "step": 207600 |
| }, |
| { |
| "epoch": 41.76, |
| "learning_rate": 0.0002016689581419083, |
| "loss": 1.1075, |
| "step": 207700 |
| }, |
| { |
| "epoch": 41.78, |
| "learning_rate": 0.00020071424056369538, |
| "loss": 1.1085, |
| "step": 207800 |
| }, |
| { |
| "epoch": 41.8, |
| "learning_rate": 0.00019976159089487967, |
| "loss": 1.1082, |
| "step": 207900 |
| }, |
| { |
| "epoch": 41.82, |
| "learning_rate": 0.0001988110110129225, |
| "loss": 1.1102, |
| "step": 208000 |
| }, |
| { |
| "epoch": 41.84, |
| "learning_rate": 0.00019786250279120606, |
| "loss": 1.1108, |
| "step": 208100 |
| }, |
| { |
| "epoch": 41.86, |
| "learning_rate": 0.00019691606809902964, |
| "loss": 1.108, |
| "step": 208200 |
| }, |
| { |
| "epoch": 41.88, |
| "learning_rate": 0.00019597170880160568, |
| "loss": 1.1053, |
| "step": 208300 |
| }, |
| { |
| "epoch": 41.9, |
| "learning_rate": 0.00019502942676005705, |
| "loss": 1.1093, |
| "step": 208400 |
| }, |
| { |
| "epoch": 41.92, |
| "learning_rate": 0.00019408922383141235, |
| "loss": 1.109, |
| "step": 208500 |
| }, |
| { |
| "epoch": 41.94, |
| "learning_rate": 0.0001931511018686033, |
| "loss": 1.1064, |
| "step": 208600 |
| }, |
| { |
| "epoch": 41.96, |
| "learning_rate": 0.0001922150627204597, |
| "loss": 1.1058, |
| "step": 208700 |
| }, |
| { |
| "epoch": 41.98, |
| "learning_rate": 0.00019128110823170717, |
| "loss": 1.1035, |
| "step": 208800 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_accuracy": 0.42008968684058473, |
| "eval_loss": 1.080112099647522, |
| "eval_runtime": 19.7792, |
| "eval_samples_per_second": 4022.819, |
| "eval_steps_per_second": 15.724, |
| "step": 208876 |
| }, |
| { |
| "epoch": 42.0, |
| "learning_rate": 0.00019034924024296282, |
| "loss": 1.1021, |
| "step": 208900 |
| }, |
| { |
| "epoch": 42.02, |
| "learning_rate": 0.00018941946059073192, |
| "loss": 1.097, |
| "step": 209000 |
| }, |
| { |
| "epoch": 42.04, |
| "learning_rate": 0.0001884917711074033, |
| "loss": 1.0949, |
| "step": 209100 |
| }, |
| { |
| "epoch": 42.07, |
| "learning_rate": 0.00018756617362124722, |
| "loss": 1.094, |
| "step": 209200 |
| }, |
| { |
| "epoch": 42.09, |
| "learning_rate": 0.00018664266995641125, |
| "loss": 1.0974, |
| "step": 209300 |
| }, |
| { |
| "epoch": 42.11, |
| "learning_rate": 0.00018572126193291613, |
| "loss": 1.1003, |
| "step": 209400 |
| }, |
| { |
| "epoch": 42.13, |
| "learning_rate": 0.00018480195136665227, |
| "loss": 1.0985, |
| "step": 209500 |
| }, |
| { |
| "epoch": 42.15, |
| "learning_rate": 0.00018388474006937703, |
| "loss": 1.0984, |
| "step": 209600 |
| }, |
| { |
| "epoch": 42.17, |
| "learning_rate": 0.0001829787705446627, |
| "loss": 1.0988, |
| "step": 209700 |
| }, |
| { |
| "epoch": 42.19, |
| "learning_rate": 0.0001820657421663692, |
| "loss": 1.0996, |
| "step": 209800 |
| }, |
| { |
| "epoch": 42.21, |
| "learning_rate": 0.00018115481844952584, |
| "loss": 1.099, |
| "step": 209900 |
| }, |
| { |
| "epoch": 42.23, |
| "learning_rate": 0.00018024600118936058, |
| "loss": 1.0987, |
| "step": 210000 |
| }, |
| { |
| "epoch": 42.25, |
| "learning_rate": 0.0001793392921769506, |
| "loss": 1.1014, |
| "step": 210100 |
| }, |
| { |
| "epoch": 42.27, |
| "learning_rate": 0.0001784346931992184, |
| "loss": 1.1018, |
| "step": 210200 |
| }, |
| { |
| "epoch": 42.29, |
| "learning_rate": 0.0001775322060389281, |
| "loss": 1.0979, |
| "step": 210300 |
| }, |
| { |
| "epoch": 42.31, |
| "learning_rate": 0.000176631832474681, |
| "loss": 1.1027, |
| "step": 210400 |
| }, |
| { |
| "epoch": 42.33, |
| "learning_rate": 0.00017573357428091394, |
| "loss": 1.0946, |
| "step": 210500 |
| }, |
| { |
| "epoch": 42.35, |
| "learning_rate": 0.0001748374332278946, |
| "loss": 1.0991, |
| "step": 210600 |
| }, |
| { |
| "epoch": 42.37, |
| "learning_rate": 0.00017394341108171812, |
| "loss": 1.1001, |
| "step": 210700 |
| }, |
| { |
| "epoch": 42.39, |
| "learning_rate": 0.00017305150960430338, |
| "loss": 1.0999, |
| "step": 210800 |
| }, |
| { |
| "epoch": 42.41, |
| "learning_rate": 0.00017216173055339067, |
| "loss": 1.1005, |
| "step": 210900 |
| }, |
| { |
| "epoch": 42.43, |
| "learning_rate": 0.0001712740756825369, |
| "loss": 1.0971, |
| "step": 211000 |
| }, |
| { |
| "epoch": 42.45, |
| "learning_rate": 0.0001703885467411133, |
| "loss": 1.1002, |
| "step": 211100 |
| }, |
| { |
| "epoch": 42.47, |
| "learning_rate": 0.00016950514547430003, |
| "loss": 1.1011, |
| "step": 211200 |
| }, |
| { |
| "epoch": 42.49, |
| "learning_rate": 0.00016862387362308565, |
| "loss": 1.1001, |
| "step": 211300 |
| }, |
| { |
| "epoch": 42.51, |
| "learning_rate": 0.00016774473292426124, |
| "loss": 1.0965, |
| "step": 211400 |
| }, |
| { |
| "epoch": 42.53, |
| "learning_rate": 0.00016686772511041823, |
| "loss": 1.0978, |
| "step": 211500 |
| }, |
| { |
| "epoch": 42.55, |
| "learning_rate": 0.00016599285190994393, |
| "loss": 1.098, |
| "step": 211600 |
| }, |
| { |
| "epoch": 42.57, |
| "learning_rate": 0.00016512011504701954, |
| "loss": 1.0967, |
| "step": 211700 |
| }, |
| { |
| "epoch": 42.59, |
| "learning_rate": 0.00016425821164064812, |
| "loss": 1.0975, |
| "step": 211800 |
| }, |
| { |
| "epoch": 42.61, |
| "learning_rate": 0.00016338973120231013, |
| "loss": 1.1008, |
| "step": 211900 |
| }, |
| { |
| "epoch": 42.63, |
| "learning_rate": 0.00016252339223169542, |
| "loss": 1.1004, |
| "step": 212000 |
| }, |
| { |
| "epoch": 42.65, |
| "learning_rate": 0.0001616591964361662, |
| "loss": 1.0986, |
| "step": 212100 |
| }, |
| { |
| "epoch": 42.67, |
| "learning_rate": 0.00016079714551885991, |
| "loss": 1.0978, |
| "step": 212200 |
| }, |
| { |
| "epoch": 42.69, |
| "learning_rate": 0.00015993724117868786, |
| "loss": 1.1004, |
| "step": 212300 |
| }, |
| { |
| "epoch": 42.71, |
| "learning_rate": 0.00015907948511033082, |
| "loss": 1.0987, |
| "step": 212400 |
| }, |
| { |
| "epoch": 42.73, |
| "learning_rate": 0.00015822387900423591, |
| "loss": 1.1042, |
| "step": 212500 |
| }, |
| { |
| "epoch": 42.75, |
| "learning_rate": 0.00015737042454661232, |
| "loss": 1.0976, |
| "step": 212600 |
| }, |
| { |
| "epoch": 42.77, |
| "learning_rate": 0.00015652762576620364, |
| "loss": 1.1006, |
| "step": 212700 |
| }, |
| { |
| "epoch": 42.79, |
| "learning_rate": 0.00015567845808881514, |
| "loss": 1.1024, |
| "step": 212800 |
| }, |
| { |
| "epoch": 42.81, |
| "learning_rate": 0.00015483144707635803, |
| "loss": 1.0976, |
| "step": 212900 |
| }, |
| { |
| "epoch": 42.83, |
| "learning_rate": 0.0001539865943981035, |
| "loss": 1.0966, |
| "step": 213000 |
| }, |
| { |
| "epoch": 42.85, |
| "learning_rate": 0.0001531439017190679, |
| "loss": 1.1029, |
| "step": 213100 |
| }, |
| { |
| "epoch": 42.87, |
| "learning_rate": 0.0001523033707000121, |
| "loss": 1.0952, |
| "step": 213200 |
| }, |
| { |
| "epoch": 42.89, |
| "learning_rate": 0.00015146500299743584, |
| "loss": 1.0991, |
| "step": 213300 |
| }, |
| { |
| "epoch": 42.91, |
| "learning_rate": 0.0001506288002635764, |
| "loss": 1.0977, |
| "step": 213400 |
| }, |
| { |
| "epoch": 42.93, |
| "learning_rate": 0.00014979476414640313, |
| "loss": 1.095, |
| "step": 213500 |
| }, |
| { |
| "epoch": 42.95, |
| "learning_rate": 0.00014896289628961654, |
| "loss": 1.0932, |
| "step": 213600 |
| }, |
| { |
| "epoch": 42.97, |
| "learning_rate": 0.00014813319833264339, |
| "loss": 1.0971, |
| "step": 213700 |
| }, |
| { |
| "epoch": 42.99, |
| "learning_rate": 0.00014730567191063472, |
| "loss": 1.0962, |
| "step": 213800 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_accuracy": 0.4207685107173674, |
| "eval_loss": 1.071601152420044, |
| "eval_runtime": 19.6492, |
| "eval_samples_per_second": 4049.421, |
| "eval_steps_per_second": 15.828, |
| "step": 213849 |
| }, |
| { |
| "epoch": 43.01, |
| "learning_rate": 0.00014648031865446083, |
| "loss": 1.0912, |
| "step": 213900 |
| }, |
| { |
| "epoch": 43.03, |
| "learning_rate": 0.00014565714019071016, |
| "loss": 1.0872, |
| "step": 214000 |
| }, |
| { |
| "epoch": 43.05, |
| "learning_rate": 0.00014483613814168474, |
| "loss": 1.0874, |
| "step": 214100 |
| }, |
| { |
| "epoch": 43.07, |
| "learning_rate": 0.0001440173141253978, |
| "loss": 1.0936, |
| "step": 214200 |
| }, |
| { |
| "epoch": 43.09, |
| "learning_rate": 0.00014320066975556919, |
| "loss": 1.0912, |
| "step": 214300 |
| }, |
| { |
| "epoch": 43.11, |
| "learning_rate": 0.000142386206641624, |
| "loss": 1.0885, |
| "step": 214400 |
| }, |
| { |
| "epoch": 43.13, |
| "learning_rate": 0.00014157392638868823, |
| "loss": 1.0879, |
| "step": 214500 |
| }, |
| { |
| "epoch": 43.15, |
| "learning_rate": 0.00014076383059758642, |
| "loss": 1.0882, |
| "step": 214600 |
| }, |
| { |
| "epoch": 43.17, |
| "learning_rate": 0.0001399559208648371, |
| "loss": 1.0894, |
| "step": 214700 |
| }, |
| { |
| "epoch": 43.19, |
| "learning_rate": 0.00013915019878265114, |
| "loss": 1.0883, |
| "step": 214800 |
| }, |
| { |
| "epoch": 43.21, |
| "learning_rate": 0.0001383466659389282, |
| "loss": 1.0865, |
| "step": 214900 |
| }, |
| { |
| "epoch": 43.23, |
| "learning_rate": 0.00013754532391725333, |
| "loss": 1.0899, |
| "step": 215000 |
| }, |
| { |
| "epoch": 43.25, |
| "learning_rate": 0.0001367461742968934, |
| "loss": 1.09, |
| "step": 215100 |
| }, |
| { |
| "epoch": 43.27, |
| "learning_rate": 0.0001359492186527951, |
| "loss": 1.0892, |
| "step": 215200 |
| }, |
| { |
| "epoch": 43.29, |
| "learning_rate": 0.0001351544585555814, |
| "loss": 1.0893, |
| "step": 215300 |
| }, |
| { |
| "epoch": 43.31, |
| "learning_rate": 0.00013436189557154823, |
| "loss": 1.0895, |
| "step": 215400 |
| }, |
| { |
| "epoch": 43.33, |
| "learning_rate": 0.0001335715312626608, |
| "loss": 1.0932, |
| "step": 215500 |
| }, |
| { |
| "epoch": 43.35, |
| "learning_rate": 0.00013278336718655206, |
| "loss": 1.0897, |
| "step": 215600 |
| }, |
| { |
| "epoch": 43.37, |
| "learning_rate": 0.00013199740489651862, |
| "loss": 1.0899, |
| "step": 215700 |
| }, |
| { |
| "epoch": 43.39, |
| "learning_rate": 0.0001312136459415178, |
| "loss": 1.0911, |
| "step": 215800 |
| }, |
| { |
| "epoch": 43.41, |
| "learning_rate": 0.00013043209186616432, |
| "loss": 1.0859, |
| "step": 215900 |
| }, |
| { |
| "epoch": 43.43, |
| "learning_rate": 0.000129652744210728, |
| "loss": 1.0889, |
| "step": 216000 |
| }, |
| { |
| "epoch": 43.45, |
| "learning_rate": 0.00012887560451113, |
| "loss": 1.0887, |
| "step": 216100 |
| }, |
| { |
| "epoch": 43.47, |
| "learning_rate": 0.0001281006742989406, |
| "loss": 1.0937, |
| "step": 216200 |
| }, |
| { |
| "epoch": 43.49, |
| "learning_rate": 0.0001273279551013748, |
| "loss": 1.0875, |
| "step": 216300 |
| }, |
| { |
| "epoch": 43.51, |
| "learning_rate": 0.00012655744844129082, |
| "loss": 1.0894, |
| "step": 216400 |
| }, |
| { |
| "epoch": 43.53, |
| "learning_rate": 0.0001257891558371864, |
| "loss": 1.088, |
| "step": 216500 |
| }, |
| { |
| "epoch": 43.55, |
| "learning_rate": 0.00012502307880319613, |
| "loss": 1.0907, |
| "step": 216600 |
| }, |
| { |
| "epoch": 43.57, |
| "learning_rate": 0.00012426684646913629, |
| "loss": 1.0884, |
| "step": 216700 |
| }, |
| { |
| "epoch": 43.59, |
| "learning_rate": 0.0001235051829070172, |
| "loss": 1.0873, |
| "step": 216800 |
| }, |
| { |
| "epoch": 43.61, |
| "learning_rate": 0.00012274573941621584, |
| "loss": 1.0884, |
| "step": 216900 |
| }, |
| { |
| "epoch": 43.63, |
| "learning_rate": 0.00012198851749342654, |
| "loss": 1.0917, |
| "step": 217000 |
| }, |
| { |
| "epoch": 43.65, |
| "learning_rate": 0.00012123351863096627, |
| "loss": 1.0934, |
| "step": 217100 |
| }, |
| { |
| "epoch": 43.67, |
| "learning_rate": 0.0001204807443167695, |
| "loss": 1.0904, |
| "step": 217200 |
| }, |
| { |
| "epoch": 43.69, |
| "learning_rate": 0.00011973019603438773, |
| "loss": 1.0906, |
| "step": 217300 |
| }, |
| { |
| "epoch": 43.71, |
| "learning_rate": 0.00011898187526298495, |
| "loss": 1.0889, |
| "step": 217400 |
| }, |
| { |
| "epoch": 43.73, |
| "learning_rate": 0.00011823578347733579, |
| "loss": 1.09, |
| "step": 217500 |
| }, |
| { |
| "epoch": 43.75, |
| "learning_rate": 0.00011749192214782101, |
| "loss": 1.0912, |
| "step": 217600 |
| }, |
| { |
| "epoch": 43.77, |
| "learning_rate": 0.00011675029274042654, |
| "loss": 1.0905, |
| "step": 217700 |
| }, |
| { |
| "epoch": 43.79, |
| "learning_rate": 0.0001160108967167392, |
| "loss": 1.0874, |
| "step": 217800 |
| }, |
| { |
| "epoch": 43.81, |
| "learning_rate": 0.0001152737355339449, |
| "loss": 1.0902, |
| "step": 217900 |
| }, |
| { |
| "epoch": 43.83, |
| "learning_rate": 0.00011453881064482418, |
| "loss": 1.088, |
| "step": 218000 |
| }, |
| { |
| "epoch": 43.85, |
| "learning_rate": 0.0001138061234977511, |
| "loss": 1.0878, |
| "step": 218100 |
| }, |
| { |
| "epoch": 43.87, |
| "learning_rate": 0.00011307567553668963, |
| "loss": 1.0927, |
| "step": 218200 |
| }, |
| { |
| "epoch": 43.89, |
| "learning_rate": 0.00011234746820119087, |
| "loss": 1.0882, |
| "step": 218300 |
| }, |
| { |
| "epoch": 43.91, |
| "learning_rate": 0.00011162150292638924, |
| "loss": 1.0927, |
| "step": 218400 |
| }, |
| { |
| "epoch": 43.94, |
| "learning_rate": 0.00011089778114300134, |
| "loss": 1.0885, |
| "step": 218500 |
| }, |
| { |
| "epoch": 43.96, |
| "learning_rate": 0.00011017630427732253, |
| "loss": 1.0884, |
| "step": 218600 |
| }, |
| { |
| "epoch": 43.98, |
| "learning_rate": 0.00010945707375122376, |
| "loss": 1.0878, |
| "step": 218700 |
| }, |
| { |
| "epoch": 44.0, |
| "learning_rate": 0.0001087472496787982, |
| "loss": 1.0855, |
| "step": 218800 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_accuracy": 0.4214934698002123, |
| "eval_loss": 1.0645169019699097, |
| "eval_runtime": 19.7931, |
| "eval_samples_per_second": 4019.996, |
| "eval_steps_per_second": 15.713, |
| "step": 218823 |
| }, |
| { |
| "epoch": 44.02, |
| "learning_rate": 0.00010803249358108017, |
| "loss": 1.0767, |
| "step": 218900 |
| }, |
| { |
| "epoch": 44.04, |
| "learning_rate": 0.00010731998804791671, |
| "loss": 1.0805, |
| "step": 219000 |
| }, |
| { |
| "epoch": 44.06, |
| "learning_rate": 0.00010660973448349847, |
| "loss": 1.0767, |
| "step": 219100 |
| }, |
| { |
| "epoch": 44.08, |
| "learning_rate": 0.00010590173428757774, |
| "loss": 1.0796, |
| "step": 219200 |
| }, |
| { |
| "epoch": 44.1, |
| "learning_rate": 0.00010519598885546585, |
| "loss": 1.0782, |
| "step": 219300 |
| }, |
| { |
| "epoch": 44.12, |
| "learning_rate": 0.00010449249957803011, |
| "loss": 1.0765, |
| "step": 219400 |
| }, |
| { |
| "epoch": 44.14, |
| "learning_rate": 0.00010379126784169191, |
| "loss": 1.0828, |
| "step": 219500 |
| }, |
| { |
| "epoch": 44.16, |
| "learning_rate": 0.00010309229502842355, |
| "loss": 1.077, |
| "step": 219600 |
| }, |
| { |
| "epoch": 44.18, |
| "learning_rate": 0.00010239558251574535, |
| "loss": 1.0754, |
| "step": 219700 |
| }, |
| { |
| "epoch": 44.2, |
| "learning_rate": 0.00010170113167672274, |
| "loss": 1.0823, |
| "step": 219800 |
| }, |
| { |
| "epoch": 44.22, |
| "learning_rate": 0.00010100894387996454, |
| "loss": 1.082, |
| "step": 219900 |
| }, |
| { |
| "epoch": 44.24, |
| "learning_rate": 0.00010031902048961913, |
| "loss": 1.0795, |
| "step": 220000 |
| }, |
| { |
| "epoch": 44.26, |
| "learning_rate": 9.963136286537278e-05, |
| "loss": 1.0816, |
| "step": 220100 |
| }, |
| { |
| "epoch": 44.28, |
| "learning_rate": 9.894597236244558e-05, |
| "loss": 1.079, |
| "step": 220200 |
| }, |
| { |
| "epoch": 44.3, |
| "learning_rate": 9.826285033159035e-05, |
| "loss": 1.0816, |
| "step": 220300 |
| }, |
| { |
| "epoch": 44.32, |
| "learning_rate": 9.758199811908924e-05, |
| "loss": 1.0845, |
| "step": 220400 |
| }, |
| { |
| "epoch": 44.34, |
| "learning_rate": 9.690341706675043e-05, |
| "loss": 1.0838, |
| "step": 220500 |
| }, |
| { |
| "epoch": 44.36, |
| "learning_rate": 9.622710851190694e-05, |
| "loss": 1.0781, |
| "step": 220600 |
| }, |
| { |
| "epoch": 44.38, |
| "learning_rate": 9.555307378741259e-05, |
| "loss": 1.0841, |
| "step": 220700 |
| }, |
| { |
| "epoch": 44.4, |
| "learning_rate": 9.488802055091186e-05, |
| "loss": 1.0813, |
| "step": 220800 |
| }, |
| { |
| "epoch": 44.42, |
| "learning_rate": 9.421851469638642e-05, |
| "loss": 1.0834, |
| "step": 220900 |
| }, |
| { |
| "epoch": 44.44, |
| "learning_rate": 9.355128663070311e-05, |
| "loss": 1.0805, |
| "step": 221000 |
| }, |
| { |
| "epoch": 44.46, |
| "learning_rate": 9.288633766882021e-05, |
| "loss": 1.0837, |
| "step": 221100 |
| }, |
| { |
| "epoch": 44.48, |
| "learning_rate": 9.222366912120445e-05, |
| "loss": 1.079, |
| "step": 221200 |
| }, |
| { |
| "epoch": 44.5, |
| "learning_rate": 9.156328229382879e-05, |
| "loss": 1.082, |
| "step": 221300 |
| }, |
| { |
| "epoch": 44.52, |
| "learning_rate": 9.090517848816912e-05, |
| "loss": 1.0806, |
| "step": 221400 |
| }, |
| { |
| "epoch": 44.54, |
| "learning_rate": 9.024935900120185e-05, |
| "loss": 1.082, |
| "step": 221500 |
| }, |
| { |
| "epoch": 44.56, |
| "learning_rate": 8.95958251254017e-05, |
| "loss": 1.0818, |
| "step": 221600 |
| }, |
| { |
| "epoch": 44.58, |
| "learning_rate": 8.894457814873885e-05, |
| "loss": 1.0804, |
| "step": 221700 |
| }, |
| { |
| "epoch": 44.6, |
| "learning_rate": 8.829561935467689e-05, |
| "loss": 1.0799, |
| "step": 221800 |
| }, |
| { |
| "epoch": 44.62, |
| "learning_rate": 8.76489500221693e-05, |
| "loss": 1.0803, |
| "step": 221900 |
| }, |
| { |
| "epoch": 44.64, |
| "learning_rate": 8.700457142565774e-05, |
| "loss": 1.0811, |
| "step": 222000 |
| }, |
| { |
| "epoch": 44.66, |
| "learning_rate": 8.63624848350697e-05, |
| "loss": 1.0835, |
| "step": 222100 |
| }, |
| { |
| "epoch": 44.68, |
| "learning_rate": 8.57226915158156e-05, |
| "loss": 1.0782, |
| "step": 222200 |
| }, |
| { |
| "epoch": 44.7, |
| "learning_rate": 8.508519272878545e-05, |
| "loss": 1.0845, |
| "step": 222300 |
| }, |
| { |
| "epoch": 44.72, |
| "learning_rate": 8.444998973034829e-05, |
| "loss": 1.0814, |
| "step": 222400 |
| }, |
| { |
| "epoch": 44.74, |
| "learning_rate": 8.381708377234828e-05, |
| "loss": 1.0828, |
| "step": 222500 |
| }, |
| { |
| "epoch": 44.76, |
| "learning_rate": 8.318647610210284e-05, |
| "loss": 1.0845, |
| "step": 222600 |
| }, |
| { |
| "epoch": 44.78, |
| "learning_rate": 8.25581679623992e-05, |
| "loss": 1.0805, |
| "step": 222700 |
| }, |
| { |
| "epoch": 44.8, |
| "learning_rate": 8.193216059149355e-05, |
| "loss": 1.0768, |
| "step": 222800 |
| }, |
| { |
| "epoch": 44.82, |
| "learning_rate": 8.131468087783922e-05, |
| "loss": 1.0814, |
| "step": 222900 |
| }, |
| { |
| "epoch": 44.84, |
| "learning_rate": 8.06932557027712e-05, |
| "loss": 1.0778, |
| "step": 223000 |
| }, |
| { |
| "epoch": 44.86, |
| "learning_rate": 8.007413497182939e-05, |
| "loss": 1.083, |
| "step": 223100 |
| }, |
| { |
| "epoch": 44.88, |
| "learning_rate": 7.94573199051636e-05, |
| "loss": 1.075, |
| "step": 223200 |
| }, |
| { |
| "epoch": 44.9, |
| "learning_rate": 7.884281171837912e-05, |
| "loss": 1.0767, |
| "step": 223300 |
| }, |
| { |
| "epoch": 44.92, |
| "learning_rate": 7.82306116225355e-05, |
| "loss": 1.0833, |
| "step": 223400 |
| }, |
| { |
| "epoch": 44.94, |
| "learning_rate": 7.762072082414307e-05, |
| "loss": 1.0793, |
| "step": 223500 |
| }, |
| { |
| "epoch": 44.96, |
| "learning_rate": 7.701314052516146e-05, |
| "loss": 1.0785, |
| "step": 223600 |
| }, |
| { |
| "epoch": 44.98, |
| "learning_rate": 7.640787192299645e-05, |
| "loss": 1.0801, |
| "step": 223700 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_accuracy": 0.4221078634754255, |
| "eval_loss": 1.0577867031097412, |
| "eval_runtime": 19.8376, |
| "eval_samples_per_second": 4010.968, |
| "eval_steps_per_second": 15.677, |
| "step": 223796 |
| }, |
| { |
| "epoch": 45.0, |
| "learning_rate": 7.580491621049817e-05, |
| "loss": 1.0812, |
| "step": 223800 |
| }, |
| { |
| "epoch": 45.02, |
| "learning_rate": 7.52042745759586e-05, |
| "loss": 1.0668, |
| "step": 223900 |
| }, |
| { |
| "epoch": 45.04, |
| "learning_rate": 7.460594820310931e-05, |
| "loss": 1.0678, |
| "step": 224000 |
| }, |
| { |
| "epoch": 45.06, |
| "learning_rate": 7.400993827111833e-05, |
| "loss": 1.0686, |
| "step": 224100 |
| }, |
| { |
| "epoch": 45.08, |
| "learning_rate": 7.341624595458923e-05, |
| "loss": 1.0737, |
| "step": 224200 |
| }, |
| { |
| "epoch": 45.1, |
| "learning_rate": 7.282487242355806e-05, |
| "loss": 1.0732, |
| "step": 224300 |
| }, |
| { |
| "epoch": 45.12, |
| "learning_rate": 7.223581884349059e-05, |
| "loss": 1.0704, |
| "step": 224400 |
| }, |
| { |
| "epoch": 45.14, |
| "learning_rate": 7.16490863752807e-05, |
| "loss": 1.0695, |
| "step": 224500 |
| }, |
| { |
| "epoch": 45.16, |
| "learning_rate": 7.106467617524787e-05, |
| "loss": 1.0738, |
| "step": 224600 |
| }, |
| { |
| "epoch": 45.18, |
| "learning_rate": 7.048258939513497e-05, |
| "loss": 1.07, |
| "step": 224700 |
| }, |
| { |
| "epoch": 45.2, |
| "learning_rate": 6.990282718210616e-05, |
| "loss": 1.0724, |
| "step": 224800 |
| }, |
| { |
| "epoch": 45.22, |
| "learning_rate": 6.93311535277738e-05, |
| "loss": 1.071, |
| "step": 224900 |
| }, |
| { |
| "epoch": 45.24, |
| "learning_rate": 6.875602059798258e-05, |
| "loss": 1.0762, |
| "step": 225000 |
| }, |
| { |
| "epoch": 45.26, |
| "learning_rate": 6.818321563795887e-05, |
| "loss": 1.074, |
| "step": 225100 |
| }, |
| { |
| "epoch": 45.28, |
| "learning_rate": 6.761273977657439e-05, |
| "loss": 1.069, |
| "step": 225200 |
| }, |
| { |
| "epoch": 45.3, |
| "learning_rate": 6.704459413811065e-05, |
| "loss": 1.0751, |
| "step": 225300 |
| }, |
| { |
| "epoch": 45.32, |
| "learning_rate": 6.647877984225656e-05, |
| "loss": 1.0755, |
| "step": 225400 |
| }, |
| { |
| "epoch": 45.34, |
| "learning_rate": 6.59152980041064e-05, |
| "loss": 1.0733, |
| "step": 225500 |
| }, |
| { |
| "epoch": 45.36, |
| "learning_rate": 6.535414973415796e-05, |
| "loss": 1.0741, |
| "step": 225600 |
| }, |
| { |
| "epoch": 45.38, |
| "learning_rate": 6.479533613830982e-05, |
| "loss": 1.0741, |
| "step": 225700 |
| }, |
| { |
| "epoch": 45.4, |
| "learning_rate": 6.423885831785992e-05, |
| "loss": 1.076, |
| "step": 225800 |
| }, |
| { |
| "epoch": 45.42, |
| "learning_rate": 6.368471736950224e-05, |
| "loss": 1.073, |
| "step": 225900 |
| }, |
| { |
| "epoch": 45.44, |
| "learning_rate": 6.313291438532556e-05, |
| "loss": 1.075, |
| "step": 226000 |
| }, |
| { |
| "epoch": 45.46, |
| "learning_rate": 6.258345045281138e-05, |
| "loss": 1.0719, |
| "step": 226100 |
| }, |
| { |
| "epoch": 45.48, |
| "learning_rate": 6.203632665483128e-05, |
| "loss": 1.0774, |
| "step": 226200 |
| }, |
| { |
| "epoch": 45.5, |
| "learning_rate": 6.149154406964438e-05, |
| "loss": 1.0707, |
| "step": 226300 |
| }, |
| { |
| "epoch": 45.52, |
| "learning_rate": 6.09491037708966e-05, |
| "loss": 1.0762, |
| "step": 226400 |
| }, |
| { |
| "epoch": 45.54, |
| "learning_rate": 6.040900682761738e-05, |
| "loss": 1.07, |
| "step": 226500 |
| }, |
| { |
| "epoch": 45.56, |
| "learning_rate": 5.987125430421819e-05, |
| "loss": 1.0754, |
| "step": 226600 |
| }, |
| { |
| "epoch": 45.58, |
| "learning_rate": 5.9335847260489354e-05, |
| "loss": 1.0736, |
| "step": 226700 |
| }, |
| { |
| "epoch": 45.6, |
| "learning_rate": 5.8802786751599724e-05, |
| "loss": 1.0737, |
| "step": 226800 |
| }, |
| { |
| "epoch": 45.62, |
| "learning_rate": 5.827207382809313e-05, |
| "loss": 1.0699, |
| "step": 226900 |
| }, |
| { |
| "epoch": 45.64, |
| "learning_rate": 5.774370953588723e-05, |
| "loss": 1.0726, |
| "step": 227000 |
| }, |
| { |
| "epoch": 45.66, |
| "learning_rate": 5.72176949162706e-05, |
| "loss": 1.0727, |
| "step": 227100 |
| }, |
| { |
| "epoch": 45.68, |
| "learning_rate": 5.669403100590123e-05, |
| "loss": 1.0772, |
| "step": 227200 |
| }, |
| { |
| "epoch": 45.7, |
| "learning_rate": 5.617792031399907e-05, |
| "loss": 1.0748, |
| "step": 227300 |
| }, |
| { |
| "epoch": 45.72, |
| "learning_rate": 5.565893738080891e-05, |
| "loss": 1.0718, |
| "step": 227400 |
| }, |
| { |
| "epoch": 45.74, |
| "learning_rate": 5.5142308228831315e-05, |
| "loss": 1.0739, |
| "step": 227500 |
| }, |
| { |
| "epoch": 45.76, |
| "learning_rate": 5.4628033876227974e-05, |
| "loss": 1.075, |
| "step": 227600 |
| }, |
| { |
| "epoch": 45.78, |
| "learning_rate": 5.411611533651911e-05, |
| "loss": 1.0738, |
| "step": 227700 |
| }, |
| { |
| "epoch": 45.81, |
| "learning_rate": 5.3606553618582714e-05, |
| "loss": 1.0752, |
| "step": 227800 |
| }, |
| { |
| "epoch": 45.83, |
| "learning_rate": 5.309934972665201e-05, |
| "loss": 1.0727, |
| "step": 227900 |
| }, |
| { |
| "epoch": 45.85, |
| "learning_rate": 5.259450466031324e-05, |
| "loss": 1.071, |
| "step": 228000 |
| }, |
| { |
| "epoch": 45.87, |
| "learning_rate": 5.2092019414504e-05, |
| "loss": 1.074, |
| "step": 228100 |
| }, |
| { |
| "epoch": 45.89, |
| "learning_rate": 5.1591894979511055e-05, |
| "loss": 1.0699, |
| "step": 228200 |
| }, |
| { |
| "epoch": 45.91, |
| "learning_rate": 5.109413234096888e-05, |
| "loss": 1.0727, |
| "step": 228300 |
| }, |
| { |
| "epoch": 45.93, |
| "learning_rate": 5.059873247985722e-05, |
| "loss": 1.0721, |
| "step": 228400 |
| }, |
| { |
| "epoch": 45.95, |
| "learning_rate": 5.010569637249912e-05, |
| "loss": 1.0708, |
| "step": 228500 |
| }, |
| { |
| "epoch": 45.97, |
| "learning_rate": 4.961502499055928e-05, |
| "loss": 1.0701, |
| "step": 228600 |
| }, |
| { |
| "epoch": 45.99, |
| "learning_rate": 4.912671930104237e-05, |
| "loss": 1.072, |
| "step": 228700 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_accuracy": 0.42260501009247403, |
| "eval_loss": 1.052234172821045, |
| "eval_runtime": 19.8604, |
| "eval_samples_per_second": 4006.368, |
| "eval_steps_per_second": 15.659, |
| "step": 228769 |
| }, |
| { |
| "epoch": 46.01, |
| "learning_rate": 4.864078026629054e-05, |
| "loss": 1.0673, |
| "step": 228800 |
| }, |
| { |
| "epoch": 46.03, |
| "learning_rate": 4.8157208843981476e-05, |
| "loss": 1.0645, |
| "step": 228900 |
| }, |
| { |
| "epoch": 46.05, |
| "learning_rate": 4.767600598712743e-05, |
| "loss": 1.0635, |
| "step": 229000 |
| }, |
| { |
| "epoch": 46.07, |
| "learning_rate": 4.719717264407245e-05, |
| "loss": 1.0651, |
| "step": 229100 |
| }, |
| { |
| "epoch": 46.09, |
| "learning_rate": 4.672070975849069e-05, |
| "loss": 1.0623, |
| "step": 229200 |
| }, |
| { |
| "epoch": 46.11, |
| "learning_rate": 4.625134744279142e-05, |
| "loss": 1.0643, |
| "step": 229300 |
| }, |
| { |
| "epoch": 46.13, |
| "learning_rate": 4.5779604556573094e-05, |
| "loss": 1.0661, |
| "step": 229400 |
| }, |
| { |
| "epoch": 46.15, |
| "learning_rate": 4.5310234921539935e-05, |
| "loss": 1.0668, |
| "step": 229500 |
| }, |
| { |
| "epoch": 46.17, |
| "learning_rate": 4.4843239462715455e-05, |
| "loss": 1.0682, |
| "step": 229600 |
| }, |
| { |
| "epoch": 46.19, |
| "learning_rate": 4.437861910044444e-05, |
| "loss": 1.0681, |
| "step": 229700 |
| }, |
| { |
| "epoch": 46.21, |
| "learning_rate": 4.3916374750390256e-05, |
| "loss": 1.0666, |
| "step": 229800 |
| }, |
| { |
| "epoch": 46.23, |
| "learning_rate": 4.345650732353393e-05, |
| "loss": 1.0656, |
| "step": 229900 |
| }, |
| { |
| "epoch": 46.25, |
| "learning_rate": 4.299901772617215e-05, |
| "loss": 1.0653, |
| "step": 230000 |
| }, |
| { |
| "epoch": 46.27, |
| "learning_rate": 4.2543906859915384e-05, |
| "loss": 1.0678, |
| "step": 230100 |
| }, |
| { |
| "epoch": 46.29, |
| "learning_rate": 4.209117562168643e-05, |
| "loss": 1.064, |
| "step": 230200 |
| }, |
| { |
| "epoch": 46.31, |
| "learning_rate": 4.1640824903717566e-05, |
| "loss": 1.071, |
| "step": 230300 |
| }, |
| { |
| "epoch": 46.33, |
| "learning_rate": 4.119285559355049e-05, |
| "loss": 1.0661, |
| "step": 230400 |
| }, |
| { |
| "epoch": 46.35, |
| "learning_rate": 4.0747268574033294e-05, |
| "loss": 1.0667, |
| "step": 230500 |
| }, |
| { |
| "epoch": 46.37, |
| "learning_rate": 4.0304064723319104e-05, |
| "loss": 1.0685, |
| "step": 230600 |
| }, |
| { |
| "epoch": 46.39, |
| "learning_rate": 3.986324491486421e-05, |
| "loss": 1.0662, |
| "step": 230700 |
| }, |
| { |
| "epoch": 46.41, |
| "learning_rate": 3.942481001742673e-05, |
| "loss": 1.0661, |
| "step": 230800 |
| }, |
| { |
| "epoch": 46.43, |
| "learning_rate": 3.8988760895064675e-05, |
| "loss": 1.0645, |
| "step": 230900 |
| }, |
| { |
| "epoch": 46.45, |
| "learning_rate": 3.8555098407134085e-05, |
| "loss": 1.0628, |
| "step": 231000 |
| }, |
| { |
| "epoch": 46.47, |
| "learning_rate": 3.8123823408287294e-05, |
| "loss": 1.0712, |
| "step": 231100 |
| }, |
| { |
| "epoch": 46.49, |
| "learning_rate": 3.7694936748471633e-05, |
| "loss": 1.0657, |
| "step": 231200 |
| }, |
| { |
| "epoch": 46.51, |
| "learning_rate": 3.726843927292778e-05, |
| "loss": 1.0666, |
| "step": 231300 |
| }, |
| { |
| "epoch": 46.53, |
| "learning_rate": 3.6848561063323876e-05, |
| "loss": 1.0681, |
| "step": 231400 |
| }, |
| { |
| "epoch": 46.55, |
| "learning_rate": 3.6426820560480634e-05, |
| "loss": 1.0666, |
| "step": 231500 |
| }, |
| { |
| "epoch": 46.57, |
| "learning_rate": 3.600747174108493e-05, |
| "loss": 1.0647, |
| "step": 231600 |
| }, |
| { |
| "epoch": 46.59, |
| "learning_rate": 3.5590515431579846e-05, |
| "loss": 1.0647, |
| "step": 231700 |
| }, |
| { |
| "epoch": 46.61, |
| "learning_rate": 3.51759524536939e-05, |
| "loss": 1.0643, |
| "step": 231800 |
| }, |
| { |
| "epoch": 46.63, |
| "learning_rate": 3.476378362443869e-05, |
| "loss": 1.0623, |
| "step": 231900 |
| }, |
| { |
| "epoch": 46.65, |
| "learning_rate": 3.435400975610778e-05, |
| "loss": 1.0654, |
| "step": 232000 |
| }, |
| { |
| "epoch": 46.67, |
| "learning_rate": 3.394663165627407e-05, |
| "loss": 1.0645, |
| "step": 232100 |
| }, |
| { |
| "epoch": 46.69, |
| "learning_rate": 3.3541650127789646e-05, |
| "loss": 1.0678, |
| "step": 232200 |
| }, |
| { |
| "epoch": 46.71, |
| "learning_rate": 3.31390659687833e-05, |
| "loss": 1.0657, |
| "step": 232300 |
| }, |
| { |
| "epoch": 46.73, |
| "learning_rate": 3.2738879972659116e-05, |
| "loss": 1.068, |
| "step": 232400 |
| }, |
| { |
| "epoch": 46.75, |
| "learning_rate": 3.234109292809456e-05, |
| "loss": 1.0686, |
| "step": 232500 |
| }, |
| { |
| "epoch": 46.77, |
| "learning_rate": 3.194570561904003e-05, |
| "loss": 1.0659, |
| "step": 232600 |
| }, |
| { |
| "epoch": 46.79, |
| "learning_rate": 3.1552718824715834e-05, |
| "loss": 1.0674, |
| "step": 232700 |
| }, |
| { |
| "epoch": 46.81, |
| "learning_rate": 3.116213331961215e-05, |
| "loss": 1.0668, |
| "step": 232800 |
| }, |
| { |
| "epoch": 46.83, |
| "learning_rate": 3.077394987348589e-05, |
| "loss": 1.0674, |
| "step": 232900 |
| }, |
| { |
| "epoch": 46.85, |
| "learning_rate": 3.0388169251360788e-05, |
| "loss": 1.0628, |
| "step": 233000 |
| }, |
| { |
| "epoch": 46.87, |
| "learning_rate": 3.0004792213524645e-05, |
| "loss": 1.0639, |
| "step": 233100 |
| }, |
| { |
| "epoch": 46.89, |
| "learning_rate": 2.9623819515528783e-05, |
| "loss": 1.0649, |
| "step": 233200 |
| }, |
| { |
| "epoch": 46.91, |
| "learning_rate": 2.9245251908185526e-05, |
| "loss": 1.0651, |
| "step": 233300 |
| }, |
| { |
| "epoch": 46.93, |
| "learning_rate": 2.8872839843944315e-05, |
| "loss": 1.067, |
| "step": 233400 |
| }, |
| { |
| "epoch": 46.95, |
| "learning_rate": 2.8499060581948928e-05, |
| "loss": 1.0659, |
| "step": 233500 |
| }, |
| { |
| "epoch": 46.97, |
| "learning_rate": 2.812768862725676e-05, |
| "loss": 1.0669, |
| "step": 233600 |
| }, |
| { |
| "epoch": 46.99, |
| "learning_rate": 2.7758724711759303e-05, |
| "loss": 1.0625, |
| "step": 233700 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_accuracy": 0.42298172791300875, |
| "eval_loss": 1.0480923652648926, |
| "eval_runtime": 19.887, |
| "eval_samples_per_second": 4001.005, |
| "eval_steps_per_second": 15.638, |
| "step": 233742 |
| }, |
| { |
| "epoch": 47.01, |
| "learning_rate": 2.7392169562602682e-05, |
| "loss": 1.0617, |
| "step": 233800 |
| }, |
| { |
| "epoch": 47.03, |
| "learning_rate": 2.7028023902185562e-05, |
| "loss": 1.0595, |
| "step": 233900 |
| }, |
| { |
| "epoch": 47.05, |
| "learning_rate": 2.6666288448158464e-05, |
| "loss": 1.0587, |
| "step": 234000 |
| }, |
| { |
| "epoch": 47.07, |
| "learning_rate": 2.6306963913421827e-05, |
| "loss": 1.0602, |
| "step": 234100 |
| }, |
| { |
| "epoch": 47.09, |
| "learning_rate": 2.5950051006124048e-05, |
| "loss": 1.0601, |
| "step": 234200 |
| }, |
| { |
| "epoch": 47.11, |
| "learning_rate": 2.5595550429661775e-05, |
| "loss": 1.0611, |
| "step": 234300 |
| }, |
| { |
| "epoch": 47.13, |
| "learning_rate": 2.5243462882676703e-05, |
| "loss": 1.0591, |
| "step": 234400 |
| }, |
| { |
| "epoch": 47.15, |
| "learning_rate": 2.4893789059055454e-05, |
| "loss": 1.0635, |
| "step": 234500 |
| }, |
| { |
| "epoch": 47.17, |
| "learning_rate": 2.4546529647927335e-05, |
| "loss": 1.059, |
| "step": 234600 |
| }, |
| { |
| "epoch": 47.19, |
| "learning_rate": 2.4201685333663654e-05, |
| "loss": 1.0587, |
| "step": 234700 |
| }, |
| { |
| "epoch": 47.21, |
| "learning_rate": 2.3859256795876057e-05, |
| "loss": 1.0579, |
| "step": 234800 |
| }, |
| { |
| "epoch": 47.23, |
| "learning_rate": 2.351924470941541e-05, |
| "loss": 1.0601, |
| "step": 234900 |
| }, |
| { |
| "epoch": 47.25, |
| "learning_rate": 2.318164974436987e-05, |
| "loss": 1.06, |
| "step": 235000 |
| }, |
| { |
| "epoch": 47.27, |
| "learning_rate": 2.2846472566064037e-05, |
| "loss": 1.0604, |
| "step": 235100 |
| }, |
| { |
| "epoch": 47.29, |
| "learning_rate": 2.2513713835058124e-05, |
| "loss": 1.058, |
| "step": 235200 |
| }, |
| { |
| "epoch": 47.31, |
| "learning_rate": 2.2183374207145472e-05, |
| "loss": 1.0605, |
| "step": 235300 |
| }, |
| { |
| "epoch": 47.33, |
| "learning_rate": 2.1858721552181353e-05, |
| "loss": 1.0602, |
| "step": 235400 |
| }, |
| { |
| "epoch": 47.35, |
| "learning_rate": 2.153319787157798e-05, |
| "loss": 1.0587, |
| "step": 235500 |
| }, |
| { |
| "epoch": 47.37, |
| "learning_rate": 2.1210095226447403e-05, |
| "loss": 1.0582, |
| "step": 235600 |
| }, |
| { |
| "epoch": 47.39, |
| "learning_rate": 2.0889414253553036e-05, |
| "loss": 1.0639, |
| "step": 235700 |
| }, |
| { |
| "epoch": 47.41, |
| "learning_rate": 2.057115558488601e-05, |
| "loss": 1.0615, |
| "step": 235800 |
| }, |
| { |
| "epoch": 47.43, |
| "learning_rate": 2.0255319847663906e-05, |
| "loss": 1.0598, |
| "step": 235900 |
| }, |
| { |
| "epoch": 47.45, |
| "learning_rate": 1.9941907664328407e-05, |
| "loss": 1.0658, |
| "step": 236000 |
| }, |
| { |
| "epoch": 47.47, |
| "learning_rate": 1.9630919652545432e-05, |
| "loss": 1.0611, |
| "step": 236100 |
| }, |
| { |
| "epoch": 47.49, |
| "learning_rate": 1.9322356425203607e-05, |
| "loss": 1.0624, |
| "step": 236200 |
| }, |
| { |
| "epoch": 47.51, |
| "learning_rate": 1.901621859041247e-05, |
| "loss": 1.0621, |
| "step": 236300 |
| }, |
| { |
| "epoch": 47.53, |
| "learning_rate": 1.8712506751501767e-05, |
| "loss": 1.0599, |
| "step": 236400 |
| }, |
| { |
| "epoch": 47.55, |
| "learning_rate": 1.841122150702007e-05, |
| "loss": 1.0564, |
| "step": 236500 |
| }, |
| { |
| "epoch": 47.57, |
| "learning_rate": 1.8112363450733667e-05, |
| "loss": 1.0621, |
| "step": 236600 |
| }, |
| { |
| "epoch": 47.59, |
| "learning_rate": 1.781593317162572e-05, |
| "loss": 1.0629, |
| "step": 236700 |
| }, |
| { |
| "epoch": 47.61, |
| "learning_rate": 1.7521931253894342e-05, |
| "loss": 1.0607, |
| "step": 236800 |
| }, |
| { |
| "epoch": 47.63, |
| "learning_rate": 1.7230358276952156e-05, |
| "loss": 1.0589, |
| "step": 236900 |
| }, |
| { |
| "epoch": 47.65, |
| "learning_rate": 1.6941214815424793e-05, |
| "loss": 1.0609, |
| "step": 237000 |
| }, |
| { |
| "epoch": 47.68, |
| "learning_rate": 1.66545014391499e-05, |
| "loss": 1.0602, |
| "step": 237100 |
| }, |
| { |
| "epoch": 47.7, |
| "learning_rate": 1.6370218713175762e-05, |
| "loss": 1.066, |
| "step": 237200 |
| }, |
| { |
| "epoch": 47.72, |
| "learning_rate": 1.6088367197760607e-05, |
| "loss": 1.0584, |
| "step": 237300 |
| }, |
| { |
| "epoch": 47.74, |
| "learning_rate": 1.5808947448371218e-05, |
| "loss": 1.0585, |
| "step": 237400 |
| }, |
| { |
| "epoch": 47.76, |
| "learning_rate": 1.553471784824498e-05, |
| "loss": 1.0647, |
| "step": 237500 |
| }, |
| { |
| "epoch": 47.78, |
| "learning_rate": 1.5260138946824596e-05, |
| "loss": 1.0597, |
| "step": 237600 |
| }, |
| { |
| "epoch": 47.8, |
| "learning_rate": 1.4987993443684039e-05, |
| "loss": 1.0618, |
| "step": 237700 |
| }, |
| { |
| "epoch": 47.82, |
| "learning_rate": 1.4718281875161916e-05, |
| "loss": 1.0587, |
| "step": 237800 |
| }, |
| { |
| "epoch": 47.84, |
| "learning_rate": 1.4451004772799565e-05, |
| "loss": 1.0562, |
| "step": 237900 |
| }, |
| { |
| "epoch": 47.86, |
| "learning_rate": 1.418616266334133e-05, |
| "loss": 1.0588, |
| "step": 238000 |
| }, |
| { |
| "epoch": 47.88, |
| "learning_rate": 1.392375606873178e-05, |
| "loss": 1.0633, |
| "step": 238100 |
| }, |
| { |
| "epoch": 47.9, |
| "learning_rate": 1.3663785506116133e-05, |
| "loss": 1.0609, |
| "step": 238200 |
| }, |
| { |
| "epoch": 47.92, |
| "learning_rate": 1.3406251487839e-05, |
| "loss": 1.0614, |
| "step": 238300 |
| }, |
| { |
| "epoch": 47.94, |
| "learning_rate": 1.3151154521442582e-05, |
| "loss": 1.0631, |
| "step": 238400 |
| }, |
| { |
| "epoch": 47.96, |
| "learning_rate": 1.2898495109666397e-05, |
| "loss": 1.06, |
| "step": 238500 |
| }, |
| { |
| "epoch": 47.98, |
| "learning_rate": 1.2648273750446026e-05, |
| "loss": 1.0606, |
| "step": 238600 |
| }, |
| { |
| "epoch": 48.0, |
| "learning_rate": 1.2400490936912284e-05, |
| "loss": 1.0639, |
| "step": 238700 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_accuracy": 0.4231766093868917, |
| "eval_loss": 1.045819640159607, |
| "eval_runtime": 19.861, |
| "eval_samples_per_second": 4006.252, |
| "eval_steps_per_second": 15.659, |
| "step": 238716 |
| }, |
| { |
| "epoch": 48.02, |
| "learning_rate": 1.2155147157390245e-05, |
| "loss": 1.0555, |
| "step": 238800 |
| }, |
| { |
| "epoch": 48.04, |
| "learning_rate": 1.1912242895397857e-05, |
| "loss": 1.057, |
| "step": 238900 |
| }, |
| { |
| "epoch": 48.06, |
| "learning_rate": 1.1671778629645525e-05, |
| "loss": 1.0555, |
| "step": 239000 |
| }, |
| { |
| "epoch": 48.08, |
| "learning_rate": 1.1433754834035137e-05, |
| "loss": 1.0566, |
| "step": 239100 |
| }, |
| { |
| "epoch": 48.1, |
| "learning_rate": 1.1198171977658822e-05, |
| "loss": 1.0548, |
| "step": 239200 |
| }, |
| { |
| "epoch": 48.12, |
| "learning_rate": 1.096503052479783e-05, |
| "loss": 1.0572, |
| "step": 239300 |
| }, |
| { |
| "epoch": 48.14, |
| "learning_rate": 1.0734330934922404e-05, |
| "loss": 1.0535, |
| "step": 239400 |
| }, |
| { |
| "epoch": 48.16, |
| "learning_rate": 1.0508344144459226e-05, |
| "loss": 1.0572, |
| "step": 239500 |
| }, |
| { |
| "epoch": 48.18, |
| "learning_rate": 1.0282505209828901e-05, |
| "loss": 1.0567, |
| "step": 239600 |
| }, |
| { |
| "epoch": 48.2, |
| "learning_rate": 1.0059109483290113e-05, |
| "loss": 1.061, |
| "step": 239700 |
| }, |
| { |
| "epoch": 48.22, |
| "learning_rate": 9.838157405106102e-06, |
| "loss": 1.0535, |
| "step": 239800 |
| }, |
| { |
| "epoch": 48.24, |
| "learning_rate": 9.619649410724658e-06, |
| "loss": 1.0575, |
| "step": 239900 |
| }, |
| { |
| "epoch": 48.26, |
| "learning_rate": 9.40358593077631e-06, |
| "loss": 1.0578, |
| "step": 240000 |
| }, |
| { |
| "epoch": 48.28, |
| "learning_rate": 9.189967391074332e-06, |
| "loss": 1.0559, |
| "step": 240100 |
| }, |
| { |
| "epoch": 48.3, |
| "learning_rate": 8.978794212613355e-06, |
| "loss": 1.0572, |
| "step": 240200 |
| }, |
| { |
| "epoch": 48.32, |
| "learning_rate": 8.770066811569083e-06, |
| "loss": 1.0544, |
| "step": 240300 |
| }, |
| { |
| "epoch": 48.34, |
| "learning_rate": 8.563785599296769e-06, |
| "loss": 1.0586, |
| "step": 240400 |
| }, |
| { |
| "epoch": 48.36, |
| "learning_rate": 8.359950982330805e-06, |
| "loss": 1.0592, |
| "step": 240500 |
| }, |
| { |
| "epoch": 48.38, |
| "learning_rate": 8.158563362384158e-06, |
| "loss": 1.0586, |
| "step": 240600 |
| }, |
| { |
| "epoch": 48.4, |
| "learning_rate": 7.959623136347128e-06, |
| "loss": 1.0547, |
| "step": 240700 |
| }, |
| { |
| "epoch": 48.42, |
| "learning_rate": 7.763130696286647e-06, |
| "loss": 1.0525, |
| "step": 240800 |
| }, |
| { |
| "epoch": 48.44, |
| "learning_rate": 7.56908642944587e-06, |
| "loss": 1.0569, |
| "step": 240900 |
| }, |
| { |
| "epoch": 48.46, |
| "learning_rate": 7.3774907182427856e-06, |
| "loss": 1.055, |
| "step": 241000 |
| }, |
| { |
| "epoch": 48.48, |
| "learning_rate": 7.18834394027007e-06, |
| "loss": 1.0579, |
| "step": 241100 |
| }, |
| { |
| "epoch": 48.5, |
| "learning_rate": 7.001646468294265e-06, |
| "loss": 1.0554, |
| "step": 241200 |
| }, |
| { |
| "epoch": 48.52, |
| "learning_rate": 6.817398670254382e-06, |
| "loss": 1.0551, |
| "step": 241300 |
| }, |
| { |
| "epoch": 48.54, |
| "learning_rate": 6.635600909262185e-06, |
| "loss": 1.0599, |
| "step": 241400 |
| }, |
| { |
| "epoch": 48.56, |
| "learning_rate": 6.456253543600521e-06, |
| "loss": 1.0554, |
| "step": 241500 |
| }, |
| { |
| "epoch": 48.58, |
| "learning_rate": 6.281113760537027e-06, |
| "loss": 1.0548, |
| "step": 241600 |
| }, |
| { |
| "epoch": 48.6, |
| "learning_rate": 6.106643728384375e-06, |
| "loss": 1.0615, |
| "step": 241700 |
| }, |
| { |
| "epoch": 48.62, |
| "learning_rate": 5.934625134019766e-06, |
| "loss": 1.0538, |
| "step": 241800 |
| }, |
| { |
| "epoch": 48.64, |
| "learning_rate": 5.76505831645327e-06, |
| "loss": 1.0599, |
| "step": 241900 |
| }, |
| { |
| "epoch": 48.66, |
| "learning_rate": 5.597943609863821e-06, |
| "loss": 1.0539, |
| "step": 242000 |
| }, |
| { |
| "epoch": 48.68, |
| "learning_rate": 5.433281343597135e-06, |
| "loss": 1.0565, |
| "step": 242100 |
| }, |
| { |
| "epoch": 48.7, |
| "learning_rate": 5.2710718421662696e-06, |
| "loss": 1.0596, |
| "step": 242200 |
| }, |
| { |
| "epoch": 48.72, |
| "learning_rate": 5.111315425249952e-06, |
| "loss": 1.0548, |
| "step": 242300 |
| }, |
| { |
| "epoch": 48.74, |
| "learning_rate": 4.954012407692721e-06, |
| "loss": 1.0547, |
| "step": 242400 |
| }, |
| { |
| "epoch": 48.76, |
| "learning_rate": 4.799163099503956e-06, |
| "loss": 1.0514, |
| "step": 242500 |
| }, |
| { |
| "epoch": 48.78, |
| "learning_rate": 4.646767805857183e-06, |
| "loss": 1.0587, |
| "step": 242600 |
| }, |
| { |
| "epoch": 48.8, |
| "learning_rate": 4.496826827089795e-06, |
| "loss": 1.0602, |
| "step": 242700 |
| }, |
| { |
| "epoch": 48.82, |
| "learning_rate": 4.349340458702084e-06, |
| "loss": 1.0576, |
| "step": 242800 |
| }, |
| { |
| "epoch": 48.84, |
| "learning_rate": 4.204308991357098e-06, |
| "loss": 1.0576, |
| "step": 242900 |
| }, |
| { |
| "epoch": 48.86, |
| "learning_rate": 4.061732710879673e-06, |
| "loss": 1.0592, |
| "step": 243000 |
| }, |
| { |
| "epoch": 48.88, |
| "learning_rate": 3.921611898255878e-06, |
| "loss": 1.0601, |
| "step": 243100 |
| }, |
| { |
| "epoch": 48.9, |
| "learning_rate": 3.783946829632734e-06, |
| "loss": 1.0588, |
| "step": 243200 |
| }, |
| { |
| "epoch": 48.92, |
| "learning_rate": 3.648737776317801e-06, |
| "loss": 1.0558, |
| "step": 243300 |
| }, |
| { |
| "epoch": 48.94, |
| "learning_rate": 3.5159850047777885e-06, |
| "loss": 1.057, |
| "step": 243400 |
| }, |
| { |
| "epoch": 48.96, |
| "learning_rate": 3.3856887766392507e-06, |
| "loss": 1.0593, |
| "step": 243500 |
| }, |
| { |
| "epoch": 48.98, |
| "learning_rate": 3.259115580974137e-06, |
| "loss": 1.0585, |
| "step": 243600 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_accuracy": 0.4232847884907615, |
| "eval_loss": 1.044702172279358, |
| "eval_runtime": 19.8772, |
| "eval_samples_per_second": 4002.975, |
| "eval_steps_per_second": 15.646, |
| "step": 243689 |
| }, |
| { |
| "epoch": 49.0, |
| "learning_rate": 3.1337086333987908e-06, |
| "loss": 1.0534, |
| "step": 243700 |
| }, |
| { |
| "epoch": 49.02, |
| "learning_rate": 3.0107589826065816e-06, |
| "loss": 1.0535, |
| "step": 243800 |
| }, |
| { |
| "epoch": 49.04, |
| "learning_rate": 2.8902668709041013e-06, |
| "loss": 1.0535, |
| "step": 243900 |
| }, |
| { |
| "epoch": 49.06, |
| "learning_rate": 2.772232535754593e-06, |
| "loss": 1.0536, |
| "step": 244000 |
| }, |
| { |
| "epoch": 49.08, |
| "learning_rate": 2.6566562097773973e-06, |
| "loss": 1.0529, |
| "step": 244100 |
| }, |
| { |
| "epoch": 49.1, |
| "learning_rate": 2.5435381207479514e-06, |
| "loss": 1.057, |
| "step": 244200 |
| }, |
| { |
| "epoch": 49.12, |
| "learning_rate": 2.432878491596957e-06, |
| "loss": 1.0544, |
| "step": 244300 |
| }, |
| { |
| "epoch": 49.14, |
| "learning_rate": 2.3246775404098252e-06, |
| "loss": 1.0568, |
| "step": 244400 |
| }, |
| { |
| "epoch": 49.16, |
| "learning_rate": 2.218935480426676e-06, |
| "loss": 1.0521, |
| "step": 244500 |
| }, |
| { |
| "epoch": 49.18, |
| "learning_rate": 2.1156525200416444e-06, |
| "loss": 1.0557, |
| "step": 244600 |
| }, |
| { |
| "epoch": 49.2, |
| "learning_rate": 2.0148288628026036e-06, |
| "loss": 1.0553, |
| "step": 244700 |
| }, |
| { |
| "epoch": 49.22, |
| "learning_rate": 1.9164647074104702e-06, |
| "loss": 1.0539, |
| "step": 244800 |
| }, |
| { |
| "epoch": 49.24, |
| "learning_rate": 1.8205602477193439e-06, |
| "loss": 1.0517, |
| "step": 244900 |
| }, |
| { |
| "epoch": 49.26, |
| "learning_rate": 1.7271156727355353e-06, |
| "loss": 1.0535, |
| "step": 245000 |
| }, |
| { |
| "epoch": 49.28, |
| "learning_rate": 1.6361311666174272e-06, |
| "loss": 1.0565, |
| "step": 245100 |
| }, |
| { |
| "epoch": 49.3, |
| "learning_rate": 1.5476069086756139e-06, |
| "loss": 1.0513, |
| "step": 245200 |
| }, |
| { |
| "epoch": 49.32, |
| "learning_rate": 1.4615430733713742e-06, |
| "loss": 1.0539, |
| "step": 245300 |
| }, |
| { |
| "epoch": 49.34, |
| "learning_rate": 1.3779398303177814e-06, |
| "loss": 1.0565, |
| "step": 245400 |
| }, |
| { |
| "epoch": 49.36, |
| "learning_rate": 1.296797344278039e-06, |
| "loss": 1.0572, |
| "step": 245500 |
| }, |
| { |
| "epoch": 49.38, |
| "learning_rate": 1.218115775166173e-06, |
| "loss": 1.0538, |
| "step": 245600 |
| }, |
| { |
| "epoch": 49.4, |
| "learning_rate": 1.1426453002136294e-06, |
| "loss": 1.0577, |
| "step": 245700 |
| }, |
| { |
| "epoch": 49.42, |
| "learning_rate": 1.0688614123491314e-06, |
| "loss": 1.0547, |
| "step": 245800 |
| }, |
| { |
| "epoch": 49.44, |
| "learning_rate": 9.97538890623456e-07, |
| "loss": 1.0539, |
| "step": 245900 |
| }, |
| { |
| "epoch": 49.46, |
| "learning_rate": 9.286778755976388e-07, |
| "loss": 1.0524, |
| "step": 246000 |
| }, |
| { |
| "epoch": 49.48, |
| "learning_rate": 8.622785029814562e-07, |
| "loss": 1.0569, |
| "step": 246100 |
| }, |
| { |
| "epoch": 49.5, |
| "learning_rate": 7.983409036331491e-07, |
| "loss": 1.0525, |
| "step": 246200 |
| }, |
| { |
| "epoch": 49.52, |
| "learning_rate": 7.368652035597001e-07, |
| "loss": 1.0557, |
| "step": 246300 |
| }, |
| { |
| "epoch": 49.55, |
| "learning_rate": 6.778515239161398e-07, |
| "loss": 1.0568, |
| "step": 246400 |
| }, |
| { |
| "epoch": 49.57, |
| "learning_rate": 6.212999810051301e-07, |
| "loss": 1.058, |
| "step": 246500 |
| }, |
| { |
| "epoch": 49.59, |
| "learning_rate": 5.672106862772419e-07, |
| "loss": 1.0585, |
| "step": 246600 |
| }, |
| { |
| "epoch": 49.61, |
| "learning_rate": 5.155837463306778e-07, |
| "loss": 1.0561, |
| "step": 246700 |
| }, |
| { |
| "epoch": 49.63, |
| "learning_rate": 4.664192629104391e-07, |
| "loss": 1.057, |
| "step": 246800 |
| }, |
| { |
| "epoch": 49.65, |
| "learning_rate": 4.201721622633381e-07, |
| "loss": 1.0523, |
| "step": 246900 |
| }, |
| { |
| "epoch": 49.67, |
| "learning_rate": 3.7590825082547965e-07, |
| "loss": 1.0533, |
| "step": 247000 |
| }, |
| { |
| "epoch": 49.69, |
| "learning_rate": 3.3410707118347595e-07, |
| "loss": 1.0538, |
| "step": 247100 |
| }, |
| { |
| "epoch": 49.71, |
| "learning_rate": 2.951498981840217e-07, |
| "loss": 1.0602, |
| "step": 247200 |
| }, |
| { |
| "epoch": 49.73, |
| "learning_rate": 2.5824979513769386e-07, |
| "loss": 1.0534, |
| "step": 247300 |
| }, |
| { |
| "epoch": 49.75, |
| "learning_rate": 2.238126557659037e-07, |
| "loss": 1.0558, |
| "step": 247400 |
| }, |
| { |
| "epoch": 49.77, |
| "learning_rate": 1.9183854793672352e-07, |
| "loss": 1.0555, |
| "step": 247500 |
| }, |
| { |
| "epoch": 49.79, |
| "learning_rate": 1.6232753466377536e-07, |
| "loss": 1.0578, |
| "step": 247600 |
| }, |
| { |
| "epoch": 49.81, |
| "learning_rate": 1.352796741069251e-07, |
| "loss": 1.0576, |
| "step": 247700 |
| }, |
| { |
| "epoch": 49.83, |
| "learning_rate": 1.1069501957144956e-07, |
| "loss": 1.0551, |
| "step": 247800 |
| }, |
| { |
| "epoch": 49.85, |
| "learning_rate": 8.857361950831422e-08, |
| "loss": 1.0557, |
| "step": 247900 |
| }, |
| { |
| "epoch": 49.87, |
| "learning_rate": 6.89155175137568e-08, |
| "loss": 1.0538, |
| "step": 248000 |
| }, |
| { |
| "epoch": 49.89, |
| "learning_rate": 5.172075232956486e-08, |
| "loss": 1.0543, |
| "step": 248100 |
| }, |
| { |
| "epoch": 49.91, |
| "learning_rate": 3.698935784279822e-08, |
| "loss": 1.056, |
| "step": 248200 |
| }, |
| { |
| "epoch": 49.93, |
| "learning_rate": 2.472136308592776e-08, |
| "loss": 1.055, |
| "step": 248300 |
| }, |
| { |
| "epoch": 49.95, |
| "learning_rate": 1.4916792236141507e-08, |
| "loss": 1.0544, |
| "step": 248400 |
| }, |
| { |
| "epoch": 49.97, |
| "learning_rate": 7.575664616454869e-09, |
| "loss": 1.0545, |
| "step": 248500 |
| }, |
| { |
| "epoch": 49.99, |
| "learning_rate": 2.6979946943228584e-09, |
| "loss": 1.0521, |
| "step": 248600 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_accuracy": 0.42330467435544344, |
| "eval_loss": 1.0445035696029663, |
| "eval_runtime": 19.4961, |
| "eval_samples_per_second": 4081.225, |
| "eval_steps_per_second": 15.952, |
| "step": 248650 |
| }, |
| { |
| "epoch": 50.0, |
| "step": 248650, |
| "total_flos": 6.912086038408397e+16, |
| "train_loss": 1.2392261905236766, |
| "train_runtime": 25350.8633, |
| "train_samples_per_second": 2510.968, |
| "train_steps_per_second": 9.808 |
| } |
| ], |
| "max_steps": 248650, |
| "num_train_epochs": 50, |
| "total_flos": 6.912086038408397e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|