| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 50.0, | |
| "global_step": 80650, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 6.195786864931847e-05, | |
| "loss": 7.3347, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 9.97582756158962e-05, | |
| "loss": 4.6943, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.913204664153402e-05, | |
| "loss": 1.9617, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 9.850581766717182e-05, | |
| "loss": 1.4872, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.787958869280964e-05, | |
| "loss": 1.3048, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 9.725335971844745e-05, | |
| "loss": 1.1881, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 9.662713074408527e-05, | |
| "loss": 1.105, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.600090176972308e-05, | |
| "loss": 1.0428, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 9.53746727953609e-05, | |
| "loss": 0.9918, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 9.47484438209987e-05, | |
| "loss": 0.9489, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 9.412221484663653e-05, | |
| "loss": 0.9121, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 9.349598587227433e-05, | |
| "loss": 0.8837, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 9.286975689791215e-05, | |
| "loss": 0.8581, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 9.224352792354997e-05, | |
| "loss": 0.8315, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 9.161729894918779e-05, | |
| "loss": 0.8117, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 9.09910699748256e-05, | |
| "loss": 0.795, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 9.036484100046342e-05, | |
| "loss": 0.776, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 8.973861202610123e-05, | |
| "loss": 0.76, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 8.911238305173905e-05, | |
| "loss": 0.7464, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 8.848615407737685e-05, | |
| "loss": 0.732, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 8.785992510301467e-05, | |
| "loss": 0.7206, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 8.723369612865248e-05, | |
| "loss": 0.709, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 8.66074671542903e-05, | |
| "loss": 0.7003, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 8.598123817992811e-05, | |
| "loss": 0.6869, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 8.535500920556593e-05, | |
| "loss": 0.6808, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 8.472878023120375e-05, | |
| "loss": 0.671, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 8.410255125684155e-05, | |
| "loss": 0.6615, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 8.347632228247937e-05, | |
| "loss": 0.6552, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 8.285009330811718e-05, | |
| "loss": 0.6484, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 8.2223864333755e-05, | |
| "loss": 0.6395, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 8.159763535939281e-05, | |
| "loss": 0.6351, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 8.097140638503063e-05, | |
| "loss": 0.6297, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 8.034642986861716e-05, | |
| "loss": 0.6214, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 7.972020089425498e-05, | |
| "loss": 0.617, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 7.909397191989279e-05, | |
| "loss": 0.6122, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "learning_rate": 7.846774294553061e-05, | |
| "loss": 0.6055, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 11.47, | |
| "learning_rate": 7.784276642911714e-05, | |
| "loss": 0.5992, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "learning_rate": 7.721653745475495e-05, | |
| "loss": 0.5957, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "learning_rate": 7.65915609383415e-05, | |
| "loss": 0.5919, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 12.4, | |
| "learning_rate": 7.596533196397932e-05, | |
| "loss": 0.5857, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "learning_rate": 7.533910298961712e-05, | |
| "loss": 0.5825, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 13.02, | |
| "learning_rate": 7.471287401525494e-05, | |
| "loss": 0.5787, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 7.408664504089275e-05, | |
| "loss": 0.5721, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "learning_rate": 7.346041606653057e-05, | |
| "loss": 0.5694, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 7.283418709216838e-05, | |
| "loss": 0.5672, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 14.26, | |
| "learning_rate": 7.22079581178062e-05, | |
| "loss": 0.5619, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 14.57, | |
| "learning_rate": 7.158298160139275e-05, | |
| "loss": 0.5586, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 14.88, | |
| "learning_rate": 7.095800508497928e-05, | |
| "loss": 0.5559, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 15.19, | |
| "learning_rate": 7.033177611061709e-05, | |
| "loss": 0.5519, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 15.5, | |
| "learning_rate": 6.97055471362549e-05, | |
| "loss": 0.5485, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 15.81, | |
| "learning_rate": 6.907931816189271e-05, | |
| "loss": 0.5459, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "learning_rate": 6.845308918753053e-05, | |
| "loss": 0.5423, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 16.43, | |
| "learning_rate": 6.782686021316834e-05, | |
| "loss": 0.538, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "learning_rate": 6.720063123880616e-05, | |
| "loss": 0.5366, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "learning_rate": 6.657440226444397e-05, | |
| "loss": 0.5351, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 17.36, | |
| "learning_rate": 6.594817329008179e-05, | |
| "loss": 0.5299, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 17.67, | |
| "learning_rate": 6.532194431571961e-05, | |
| "loss": 0.5283, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 17.98, | |
| "learning_rate": 6.469571534135743e-05, | |
| "loss": 0.5259, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 18.29, | |
| "learning_rate": 6.406948636699523e-05, | |
| "loss": 0.5221, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "learning_rate": 6.344325739263305e-05, | |
| "loss": 0.5197, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 18.91, | |
| "learning_rate": 6.281702841827086e-05, | |
| "loss": 0.5185, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 19.22, | |
| "learning_rate": 6.219079944390868e-05, | |
| "loss": 0.5138, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "learning_rate": 6.156457046954649e-05, | |
| "loss": 0.5121, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 19.84, | |
| "learning_rate": 6.09383414951843e-05, | |
| "loss": 0.5112, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 20.15, | |
| "learning_rate": 6.0312112520822115e-05, | |
| "loss": 0.5072, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 20.46, | |
| "learning_rate": 5.968588354645993e-05, | |
| "loss": 0.5049, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 20.77, | |
| "learning_rate": 5.905965457209774e-05, | |
| "loss": 0.5043, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 21.08, | |
| "learning_rate": 5.8433425597735556e-05, | |
| "loss": 0.5013, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 21.39, | |
| "learning_rate": 5.780719662337337e-05, | |
| "loss": 0.4981, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 21.7, | |
| "learning_rate": 5.718096764901118e-05, | |
| "loss": 0.4975, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 22.01, | |
| "learning_rate": 5.6554738674648996e-05, | |
| "loss": 0.4959, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "learning_rate": 5.5928509700286816e-05, | |
| "loss": 0.4921, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 22.63, | |
| "learning_rate": 5.530228072592463e-05, | |
| "loss": 0.4921, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 22.94, | |
| "learning_rate": 5.467605175156244e-05, | |
| "loss": 0.4886, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 23.25, | |
| "learning_rate": 5.404982277720026e-05, | |
| "loss": 0.4856, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 23.56, | |
| "learning_rate": 5.342359380283807e-05, | |
| "loss": 0.4847, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 23.87, | |
| "learning_rate": 5.2797364828475884e-05, | |
| "loss": 0.4839, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 24.18, | |
| "learning_rate": 5.21711358541137e-05, | |
| "loss": 0.4814, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 24.49, | |
| "learning_rate": 5.154490687975152e-05, | |
| "loss": 0.4796, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "learning_rate": 5.091867790538933e-05, | |
| "loss": 0.4784, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 25.11, | |
| "learning_rate": 5.0292448931027144e-05, | |
| "loss": 0.4757, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 25.42, | |
| "learning_rate": 4.966621995666496e-05, | |
| "loss": 0.4744, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 25.73, | |
| "learning_rate": 4.903999098230277e-05, | |
| "loss": 0.4734, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 26.04, | |
| "learning_rate": 4.841376200794059e-05, | |
| "loss": 0.4715, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 26.35, | |
| "learning_rate": 4.7787533033578405e-05, | |
| "loss": 0.4682, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 26.66, | |
| "learning_rate": 4.716130405921622e-05, | |
| "loss": 0.4683, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 26.97, | |
| "learning_rate": 4.653507508485403e-05, | |
| "loss": 0.4671, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 27.28, | |
| "learning_rate": 4.5908846110491845e-05, | |
| "loss": 0.4637, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 27.59, | |
| "learning_rate": 4.528261713612966e-05, | |
| "loss": 0.4633, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 27.9, | |
| "learning_rate": 4.465638816176747e-05, | |
| "loss": 0.4623, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 28.21, | |
| "learning_rate": 4.4030159187405286e-05, | |
| "loss": 0.4595, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 28.52, | |
| "learning_rate": 4.34039302130431e-05, | |
| "loss": 0.4593, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 28.83, | |
| "learning_rate": 4.277770123868091e-05, | |
| "loss": 0.4581, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 29.14, | |
| "learning_rate": 4.2151472264318726e-05, | |
| "loss": 0.4558, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 29.45, | |
| "learning_rate": 4.152524328995654e-05, | |
| "loss": 0.4534, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 29.76, | |
| "learning_rate": 4.089901431559436e-05, | |
| "loss": 0.4538, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 30.07, | |
| "learning_rate": 4.0274037799180894e-05, | |
| "loss": 0.4518, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 30.38, | |
| "learning_rate": 3.9647808824818714e-05, | |
| "loss": 0.4499, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 30.69, | |
| "learning_rate": 3.902157985045653e-05, | |
| "loss": 0.4503, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 3.839535087609434e-05, | |
| "loss": 0.4482, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 31.31, | |
| "learning_rate": 3.7770374359680875e-05, | |
| "loss": 0.4454, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 31.62, | |
| "learning_rate": 3.714414538531869e-05, | |
| "loss": 0.445, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 31.93, | |
| "learning_rate": 3.65179164109565e-05, | |
| "loss": 0.4443, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 32.24, | |
| "learning_rate": 3.5891687436594315e-05, | |
| "loss": 0.443, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 32.55, | |
| "learning_rate": 3.5266710920180856e-05, | |
| "loss": 0.4416, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 32.86, | |
| "learning_rate": 3.464048194581867e-05, | |
| "loss": 0.4414, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 33.17, | |
| "learning_rate": 3.401425297145648e-05, | |
| "loss": 0.4391, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 33.48, | |
| "learning_rate": 3.3388023997094296e-05, | |
| "loss": 0.4369, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 33.79, | |
| "learning_rate": 3.2763047480680836e-05, | |
| "loss": 0.4368, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 34.1, | |
| "learning_rate": 3.213681850631865e-05, | |
| "loss": 0.4363, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 34.41, | |
| "learning_rate": 3.1510589531956463e-05, | |
| "loss": 0.434, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 34.72, | |
| "learning_rate": 3.0884360557594284e-05, | |
| "loss": 0.4337, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 35.03, | |
| "learning_rate": 3.0259384041180817e-05, | |
| "loss": 0.4324, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 35.34, | |
| "learning_rate": 2.9633155066818634e-05, | |
| "loss": 0.4311, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 35.65, | |
| "learning_rate": 2.9006926092456448e-05, | |
| "loss": 0.4298, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 35.96, | |
| "learning_rate": 2.838069711809426e-05, | |
| "loss": 0.4305, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 36.27, | |
| "learning_rate": 2.77557206016808e-05, | |
| "loss": 0.4273, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 36.58, | |
| "learning_rate": 2.7129491627318615e-05, | |
| "loss": 0.4274, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 36.89, | |
| "learning_rate": 2.650326265295643e-05, | |
| "loss": 0.4267, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 37.2, | |
| "learning_rate": 2.5877033678594242e-05, | |
| "loss": 0.4245, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 37.51, | |
| "learning_rate": 2.5250804704232056e-05, | |
| "loss": 0.4234, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 37.82, | |
| "learning_rate": 2.462457572986987e-05, | |
| "loss": 0.4237, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 38.13, | |
| "learning_rate": 2.3998346755507686e-05, | |
| "loss": 0.4217, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 38.44, | |
| "learning_rate": 2.33721177811455e-05, | |
| "loss": 0.42, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 38.75, | |
| "learning_rate": 2.274714126473204e-05, | |
| "loss": 0.4207, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 39.06, | |
| "learning_rate": 2.2120912290369853e-05, | |
| "loss": 0.4206, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 39.37, | |
| "learning_rate": 2.1494683316007667e-05, | |
| "loss": 0.4177, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 39.68, | |
| "learning_rate": 2.086845434164548e-05, | |
| "loss": 0.4183, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 39.99, | |
| "learning_rate": 2.0243477825232017e-05, | |
| "loss": 0.4163, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 40.3, | |
| "learning_rate": 1.961724885086983e-05, | |
| "loss": 0.4151, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 40.61, | |
| "learning_rate": 1.8991019876507644e-05, | |
| "loss": 0.4159, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 40.92, | |
| "learning_rate": 1.836479090214546e-05, | |
| "loss": 0.4146, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 41.23, | |
| "learning_rate": 1.7738561927783275e-05, | |
| "loss": 0.4134, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 41.54, | |
| "learning_rate": 1.7112332953421088e-05, | |
| "loss": 0.4124, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 41.85, | |
| "learning_rate": 1.6486103979058905e-05, | |
| "loss": 0.4119, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 42.16, | |
| "learning_rate": 1.585987500469672e-05, | |
| "loss": 0.4106, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 42.47, | |
| "learning_rate": 1.5233646030334534e-05, | |
| "loss": 0.4104, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 42.78, | |
| "learning_rate": 1.4607417055972347e-05, | |
| "loss": 0.4094, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 43.09, | |
| "learning_rate": 1.398118808161016e-05, | |
| "loss": 0.4091, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 43.4, | |
| "learning_rate": 1.3354959107247974e-05, | |
| "loss": 0.4082, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 43.71, | |
| "learning_rate": 1.2728730132885787e-05, | |
| "loss": 0.4074, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 44.02, | |
| "learning_rate": 1.2102501158523603e-05, | |
| "loss": 0.4067, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 44.33, | |
| "learning_rate": 1.1476272184161418e-05, | |
| "loss": 0.4062, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 44.64, | |
| "learning_rate": 1.0850043209799233e-05, | |
| "loss": 0.4051, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 44.95, | |
| "learning_rate": 1.0223814235437046e-05, | |
| "loss": 0.4048, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 45.26, | |
| "learning_rate": 9.59758526107486e-06, | |
| "loss": 0.4034, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 45.57, | |
| "learning_rate": 8.971356286712675e-06, | |
| "loss": 0.4036, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 45.88, | |
| "learning_rate": 8.345127312350489e-06, | |
| "loss": 0.4038, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 46.19, | |
| "learning_rate": 7.718898337988302e-06, | |
| "loss": 0.4021, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 46.5, | |
| "learning_rate": 7.092669363626117e-06, | |
| "loss": 0.4023, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 46.81, | |
| "learning_rate": 6.466440389263931e-06, | |
| "loss": 0.4011, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 47.12, | |
| "learning_rate": 5.840211414901745e-06, | |
| "loss": 0.4004, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 47.43, | |
| "learning_rate": 5.2139824405395585e-06, | |
| "loss": 0.3997, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 47.74, | |
| "learning_rate": 4.587753466177374e-06, | |
| "loss": 0.3999, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 48.05, | |
| "learning_rate": 3.961524491815188e-06, | |
| "loss": 0.3988, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 48.36, | |
| "learning_rate": 3.3352955174530015e-06, | |
| "loss": 0.3988, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 48.67, | |
| "learning_rate": 2.709066543090816e-06, | |
| "loss": 0.3978, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 48.98, | |
| "learning_rate": 2.08283756872863e-06, | |
| "loss": 0.3983, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 49.29, | |
| "learning_rate": 1.4566085943664442e-06, | |
| "loss": 0.398, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 49.6, | |
| "learning_rate": 8.303796200042584e-07, | |
| "loss": 0.3974, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 49.91, | |
| "learning_rate": 2.0415064564207257e-07, | |
| "loss": 0.3975, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "step": 80650, | |
| "total_flos": 2.180340696717392e+19, | |
| "train_loss": 0.005664803918222426, | |
| "train_runtime": 6206.924, | |
| "train_samples_per_second": 19956.785, | |
| "train_steps_per_second": 12.994 | |
| } | |
| ], | |
| "max_steps": 80650, | |
| "num_train_epochs": 50, | |
| "total_flos": 2.180340696717392e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |