Instructions to use HYdsl/FiLM with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use HYdsl/FiLM with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="HYdsl/FiLM")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("HYdsl/FiLM") model = AutoModelForMaskedLM.from_pretrained("HYdsl/FiLM") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "global_step": 383745, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.993485257136901e-05, | |
| "loss": 1.5554, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9869705142738016e-05, | |
| "loss": 1.5383, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9804557714107027e-05, | |
| "loss": 1.5335, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.973941028547604e-05, | |
| "loss": 1.528, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.967426285684505e-05, | |
| "loss": 1.5146, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.960911542821405e-05, | |
| "loss": 1.5074, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9543967999583054e-05, | |
| "loss": 1.5076, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9478820570952065e-05, | |
| "loss": 1.5025, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9413673142321075e-05, | |
| "loss": 1.5122, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.934852571369008e-05, | |
| "loss": 1.5013, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.928337828505909e-05, | |
| "loss": 1.5068, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.92182308564281e-05, | |
| "loss": 1.4992, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.915308342779711e-05, | |
| "loss": 1.4902, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.908793599916611e-05, | |
| "loss": 1.4905, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.902278857053512e-05, | |
| "loss": 1.4883, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8957641141904134e-05, | |
| "loss": 1.4811, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8892493713273144e-05, | |
| "loss": 1.4823, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.882734628464215e-05, | |
| "loss": 1.4842, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.876219885601115e-05, | |
| "loss": 1.4744, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.869705142738016e-05, | |
| "loss": 1.471, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.863190399874917e-05, | |
| "loss": 1.4788, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8566756570118175e-05, | |
| "loss": 1.4833, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8501609141487185e-05, | |
| "loss": 1.4778, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8436461712856196e-05, | |
| "loss": 1.4651, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8371314284225206e-05, | |
| "loss": 1.4722, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.830616685559421e-05, | |
| "loss": 1.4741, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.824101942696322e-05, | |
| "loss": 1.4711, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.817587199833223e-05, | |
| "loss": 1.463, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.811072456970124e-05, | |
| "loss": 1.4641, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.8045577141070244e-05, | |
| "loss": 1.465, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.7980429712439254e-05, | |
| "loss": 1.46, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.791528228380826e-05, | |
| "loss": 1.4563, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.785013485517727e-05, | |
| "loss": 1.4545, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.778498742654627e-05, | |
| "loss": 1.4643, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.771983999791528e-05, | |
| "loss": 1.4612, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.765469256928429e-05, | |
| "loss": 1.4538, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.75895451406533e-05, | |
| "loss": 1.4508, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7524397712022306e-05, | |
| "loss": 1.4565, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.7459250283391316e-05, | |
| "loss": 1.4562, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.739410285476033e-05, | |
| "loss": 1.4538, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.732895542612934e-05, | |
| "loss": 1.4476, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.726380799749834e-05, | |
| "loss": 1.4567, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.719866056886735e-05, | |
| "loss": 1.4464, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.7133513140236354e-05, | |
| "loss": 1.4514, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.7068365711605365e-05, | |
| "loss": 1.4504, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.700321828297437e-05, | |
| "loss": 1.4467, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.693807085434338e-05, | |
| "loss": 1.4449, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.687292342571239e-05, | |
| "loss": 1.439, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.68077759970814e-05, | |
| "loss": 1.4369, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.67426285684504e-05, | |
| "loss": 1.4486, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.667748113981941e-05, | |
| "loss": 1.4426, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6612333711188423e-05, | |
| "loss": 1.4371, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6547186282557434e-05, | |
| "loss": 1.4382, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.648203885392644e-05, | |
| "loss": 1.4306, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.641689142529545e-05, | |
| "loss": 1.4491, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.635174399666445e-05, | |
| "loss": 1.441, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.628659656803346e-05, | |
| "loss": 1.4499, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6221449139402465e-05, | |
| "loss": 1.4347, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6156301710771475e-05, | |
| "loss": 1.4458, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6091154282140486e-05, | |
| "loss": 1.4394, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6026006853509496e-05, | |
| "loss": 1.4264, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.59608594248785e-05, | |
| "loss": 1.422, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.589571199624751e-05, | |
| "loss": 1.4297, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.583056456761652e-05, | |
| "loss": 1.4204, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.576541713898553e-05, | |
| "loss": 1.4287, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5700269710354534e-05, | |
| "loss": 1.4262, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.5635122281723544e-05, | |
| "loss": 1.4353, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.556997485309255e-05, | |
| "loss": 1.422, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.550482742446156e-05, | |
| "loss": 1.4264, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.543967999583056e-05, | |
| "loss": 1.4279, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.537453256719957e-05, | |
| "loss": 1.4255, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.530938513856858e-05, | |
| "loss": 1.4245, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.524423770993759e-05, | |
| "loss": 1.4112, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5179090281306596e-05, | |
| "loss": 1.4267, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.5113942852675606e-05, | |
| "loss": 1.4233, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.504879542404462e-05, | |
| "loss": 1.4283, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.498364799541363e-05, | |
| "loss": 1.4263, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.491850056678263e-05, | |
| "loss": 1.4239, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.485335313815164e-05, | |
| "loss": 1.4243, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.4788205709520644e-05, | |
| "loss": 1.4223, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.4723058280889655e-05, | |
| "loss": 1.4162, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.465791085225866e-05, | |
| "loss": 1.4142, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.459276342362767e-05, | |
| "loss": 1.4186, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.452761599499668e-05, | |
| "loss": 1.4115, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.446246856636569e-05, | |
| "loss": 1.4171, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.439732113773469e-05, | |
| "loss": 1.4107, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.43321737091037e-05, | |
| "loss": 1.4115, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.426702628047271e-05, | |
| "loss": 1.4064, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.4201878851841724e-05, | |
| "loss": 1.4168, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.413673142321073e-05, | |
| "loss": 1.415, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.407158399457974e-05, | |
| "loss": 1.4082, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.400643656594874e-05, | |
| "loss": 1.4104, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.394128913731775e-05, | |
| "loss": 1.4077, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.3876141708686755e-05, | |
| "loss": 1.4152, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.3810994280055765e-05, | |
| "loss": 1.4087, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3745846851424775e-05, | |
| "loss": 1.4101, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3680699422793786e-05, | |
| "loss": 1.4064, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.361555199416279e-05, | |
| "loss": 1.4071, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.35504045655318e-05, | |
| "loss": 1.4124, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.348525713690081e-05, | |
| "loss": 1.4091, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.342010970826982e-05, | |
| "loss": 1.4081, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3354962279638824e-05, | |
| "loss": 1.4099, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.3289814851007834e-05, | |
| "loss": 1.4087, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.3224667422376844e-05, | |
| "loss": 1.3954, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.315951999374585e-05, | |
| "loss": 1.3962, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.309437256511485e-05, | |
| "loss": 1.4091, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.302922513648386e-05, | |
| "loss": 1.403, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.296407770785287e-05, | |
| "loss": 1.4087, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.289893027922188e-05, | |
| "loss": 1.4044, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.2833782850590886e-05, | |
| "loss": 1.3922, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.2768635421959896e-05, | |
| "loss": 1.4006, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.2703487993328907e-05, | |
| "loss": 1.3969, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.263834056469792e-05, | |
| "loss": 1.3985, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.257319313606692e-05, | |
| "loss": 1.4059, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.250804570743593e-05, | |
| "loss": 1.3923, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.244289827880494e-05, | |
| "loss": 1.3966, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.2377750850173945e-05, | |
| "loss": 1.3921, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.231260342154295e-05, | |
| "loss": 1.3987, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.224745599291196e-05, | |
| "loss": 1.3984, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.218230856428097e-05, | |
| "loss": 1.3914, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.211716113564998e-05, | |
| "loss": 1.3976, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.205201370701898e-05, | |
| "loss": 1.3883, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.198686627838799e-05, | |
| "loss": 1.3898, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.1921718849757e-05, | |
| "loss": 1.3917, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.1856571421126014e-05, | |
| "loss": 1.3973, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.179142399249502e-05, | |
| "loss": 1.392, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.172627656386403e-05, | |
| "loss": 1.3966, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.166112913523304e-05, | |
| "loss": 1.3923, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.159598170660204e-05, | |
| "loss": 1.3838, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1530834277971045e-05, | |
| "loss": 1.3881, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1465686849340055e-05, | |
| "loss": 1.3865, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1400539420709065e-05, | |
| "loss": 1.3855, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1335391992078076e-05, | |
| "loss": 1.3888, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.127024456344708e-05, | |
| "loss": 1.3954, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.120509713481609e-05, | |
| "loss": 1.387, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.11399497061851e-05, | |
| "loss": 1.3765, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.107480227755411e-05, | |
| "loss": 1.387, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.1009654848923114e-05, | |
| "loss": 1.3865, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.0944507420292124e-05, | |
| "loss": 1.3913, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.0879359991661134e-05, | |
| "loss": 1.3781, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.081421256303014e-05, | |
| "loss": 1.3833, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.074906513439914e-05, | |
| "loss": 1.3776, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.068391770576815e-05, | |
| "loss": 1.3837, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.061877027713716e-05, | |
| "loss": 1.3884, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.055362284850617e-05, | |
| "loss": 1.3811, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.0488475419875176e-05, | |
| "loss": 1.3868, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.0423327991244186e-05, | |
| "loss": 1.384, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.0358180562613196e-05, | |
| "loss": 1.3832, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.029303313398221e-05, | |
| "loss": 1.3871, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.022788570535121e-05, | |
| "loss": 1.386, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.016273827672022e-05, | |
| "loss": 1.3815, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.009759084808923e-05, | |
| "loss": 1.3755, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.0032443419458234e-05, | |
| "loss": 1.3859, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.9967295990827245e-05, | |
| "loss": 1.38, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.990214856219625e-05, | |
| "loss": 1.3834, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.983700113356526e-05, | |
| "loss": 1.3793, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.977185370493427e-05, | |
| "loss": 1.3765, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.970670627630327e-05, | |
| "loss": 1.3787, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.964155884767228e-05, | |
| "loss": 1.3818, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.957641141904129e-05, | |
| "loss": 1.3736, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.9511263990410303e-05, | |
| "loss": 1.3854, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.944611656177931e-05, | |
| "loss": 1.3796, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.938096913314832e-05, | |
| "loss": 1.3775, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.931582170451733e-05, | |
| "loss": 1.3768, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.925067427588633e-05, | |
| "loss": 1.3691, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.918552684725534e-05, | |
| "loss": 1.3796, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9120379418624345e-05, | |
| "loss": 1.3701, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9055231989993355e-05, | |
| "loss": 1.3828, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.8990084561362366e-05, | |
| "loss": 1.3812, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.892493713273137e-05, | |
| "loss": 1.3767, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.885978970410038e-05, | |
| "loss": 1.3712, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.879464227546939e-05, | |
| "loss": 1.3715, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.87294948468384e-05, | |
| "loss": 1.3662, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8664347418207404e-05, | |
| "loss": 1.3782, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8599199989576414e-05, | |
| "loss": 1.3758, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8534052560945424e-05, | |
| "loss": 1.3655, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.8468905132314435e-05, | |
| "loss": 1.3802, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.840375770368344e-05, | |
| "loss": 1.3766, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.833861027505244e-05, | |
| "loss": 1.3648, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.827346284642145e-05, | |
| "loss": 1.3703, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.820831541779046e-05, | |
| "loss": 1.3643, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8143167989159466e-05, | |
| "loss": 1.368, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8078020560528476e-05, | |
| "loss": 1.367, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.8012873131897486e-05, | |
| "loss": 1.3641, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.79477257032665e-05, | |
| "loss": 1.3743, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.78825782746355e-05, | |
| "loss": 1.3724, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.781743084600451e-05, | |
| "loss": 1.3729, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.775228341737352e-05, | |
| "loss": 1.3627, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.768713598874253e-05, | |
| "loss": 1.3649, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7621988560111535e-05, | |
| "loss": 1.3617, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.755684113148054e-05, | |
| "loss": 1.3645, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.749169370284955e-05, | |
| "loss": 1.3537, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.742654627421856e-05, | |
| "loss": 1.3666, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.736139884558756e-05, | |
| "loss": 1.3629, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.729625141695657e-05, | |
| "loss": 1.367, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.723110398832558e-05, | |
| "loss": 1.3658, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.716595655969459e-05, | |
| "loss": 1.3599, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.71008091310636e-05, | |
| "loss": 1.3658, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.703566170243261e-05, | |
| "loss": 1.3595, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.697051427380162e-05, | |
| "loss": 1.3662, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.690536684517063e-05, | |
| "loss": 1.3613, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.684021941653963e-05, | |
| "loss": 1.3626, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.6775071987908635e-05, | |
| "loss": 1.3692, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6709924559277645e-05, | |
| "loss": 1.3572, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6644777130646656e-05, | |
| "loss": 1.3553, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.657962970201566e-05, | |
| "loss": 1.3561, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.651448227338467e-05, | |
| "loss": 1.3588, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.644933484475368e-05, | |
| "loss": 1.3554, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.638418741612269e-05, | |
| "loss": 1.3612, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6319039987491694e-05, | |
| "loss": 1.3579, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6253892558860704e-05, | |
| "loss": 1.3564, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6188745130229714e-05, | |
| "loss": 1.3523, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.6123597701598724e-05, | |
| "loss": 1.353, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.605845027296773e-05, | |
| "loss": 1.3539, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.599330284433673e-05, | |
| "loss": 1.3577, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.592815541570574e-05, | |
| "loss": 1.353, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.586300798707475e-05, | |
| "loss": 1.3577, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.5797860558443756e-05, | |
| "loss": 1.3528, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.5732713129812766e-05, | |
| "loss": 1.364, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.5667565701181776e-05, | |
| "loss": 1.3585, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.560241827255079e-05, | |
| "loss": 1.3559, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.553727084391979e-05, | |
| "loss": 1.3518, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.54721234152888e-05, | |
| "loss": 1.3575, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.540697598665781e-05, | |
| "loss": 1.3527, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.534182855802682e-05, | |
| "loss": 1.3525, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.5276681129395825e-05, | |
| "loss": 1.353, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5211533700764835e-05, | |
| "loss": 1.3471, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.514638627213384e-05, | |
| "loss": 1.3439, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.508123884350285e-05, | |
| "loss": 1.3478, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.501609141487185e-05, | |
| "loss": 1.3502, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.495094398624086e-05, | |
| "loss": 1.3482, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.488579655760987e-05, | |
| "loss": 1.3565, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.482064912897888e-05, | |
| "loss": 1.3556, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.475550170034789e-05, | |
| "loss": 1.3455, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.46903542717169e-05, | |
| "loss": 1.3487, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.462520684308591e-05, | |
| "loss": 1.3422, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.456005941445492e-05, | |
| "loss": 1.3493, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.449491198582392e-05, | |
| "loss": 1.3547, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.442976455719293e-05, | |
| "loss": 1.3355, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4364617128561935e-05, | |
| "loss": 1.3569, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4299469699930945e-05, | |
| "loss": 1.3542, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.423432227129995e-05, | |
| "loss": 1.3408, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.416917484266896e-05, | |
| "loss": 1.3452, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.410402741403797e-05, | |
| "loss": 1.3521, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.403887998540698e-05, | |
| "loss": 1.3483, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.3973732556775983e-05, | |
| "loss": 1.3409, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.3908585128144994e-05, | |
| "loss": 1.3493, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.3843437699514004e-05, | |
| "loss": 1.3459, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.3778290270883014e-05, | |
| "loss": 1.3358, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.371314284225202e-05, | |
| "loss": 1.347, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.364799541362103e-05, | |
| "loss": 1.3453, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.358284798499003e-05, | |
| "loss": 1.3358, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.351770055635904e-05, | |
| "loss": 1.3601, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3452553127728046e-05, | |
| "loss": 1.3471, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3387405699097056e-05, | |
| "loss": 1.3505, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3322258270466066e-05, | |
| "loss": 1.3415, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.3257110841835077e-05, | |
| "loss": 1.3301, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.319196341320408e-05, | |
| "loss": 1.3444, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.312681598457309e-05, | |
| "loss": 1.34, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.30616685559421e-05, | |
| "loss": 1.3383, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.299652112731111e-05, | |
| "loss": 1.3386, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.2931373698680115e-05, | |
| "loss": 1.3397, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.2866226270049125e-05, | |
| "loss": 1.3377, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.280107884141813e-05, | |
| "loss": 1.3377, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.273593141278714e-05, | |
| "loss": 1.3327, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.267078398415614e-05, | |
| "loss": 1.3333, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.260563655552515e-05, | |
| "loss": 1.3369, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.254048912689416e-05, | |
| "loss": 1.3389, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.247534169826317e-05, | |
| "loss": 1.3419, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.241019426963218e-05, | |
| "loss": 1.3362, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.234504684100119e-05, | |
| "loss": 1.338, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.22798994123702e-05, | |
| "loss": 1.3349, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.221475198373921e-05, | |
| "loss": 1.3299, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.214960455510821e-05, | |
| "loss": 1.3305, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.208445712647722e-05, | |
| "loss": 1.3395, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.2019309697846225e-05, | |
| "loss": 1.3348, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.1954162269215235e-05, | |
| "loss": 1.3358, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.188901484058424e-05, | |
| "loss": 1.3295, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.182386741195325e-05, | |
| "loss": 1.335, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.175871998332226e-05, | |
| "loss": 1.3326, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.169357255469127e-05, | |
| "loss": 1.3367, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.162842512606027e-05, | |
| "loss": 1.3372, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1563277697429284e-05, | |
| "loss": 1.3337, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1498130268798294e-05, | |
| "loss": 1.3295, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.1432982840167304e-05, | |
| "loss": 1.3445, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.136783541153631e-05, | |
| "loss": 1.33, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.130268798290532e-05, | |
| "loss": 1.3319, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.123754055427432e-05, | |
| "loss": 1.3295, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.117239312564333e-05, | |
| "loss": 1.3294, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.1107245697012335e-05, | |
| "loss": 1.3217, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.1042098268381346e-05, | |
| "loss": 1.3308, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.0976950839750356e-05, | |
| "loss": 1.3278, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.0911803411119366e-05, | |
| "loss": 1.3286, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.084665598248837e-05, | |
| "loss": 1.3352, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.078150855385738e-05, | |
| "loss": 1.3125, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.071636112522639e-05, | |
| "loss": 1.3303, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.06512136965954e-05, | |
| "loss": 1.3185, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0586066267964404e-05, | |
| "loss": 1.3295, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0520918839333415e-05, | |
| "loss": 1.3218, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.045577141070242e-05, | |
| "loss": 1.3252, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0390623982071432e-05, | |
| "loss": 1.3241, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0325476553440436e-05, | |
| "loss": 1.3232, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0260329124809446e-05, | |
| "loss": 1.329, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0195181696178453e-05, | |
| "loss": 1.33, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0130034267547463e-05, | |
| "loss": 1.3331, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.0064886838916467e-05, | |
| "loss": 1.32, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.9999739410285477e-05, | |
| "loss": 1.316, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.9934591981654487e-05, | |
| "loss": 1.3224, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.9869444553023494e-05, | |
| "loss": 1.3297, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.98042971243925e-05, | |
| "loss": 1.3222, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9739149695761508e-05, | |
| "loss": 1.3346, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9674002267130518e-05, | |
| "loss": 1.3314, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.960885483849953e-05, | |
| "loss": 1.3266, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9543707409868532e-05, | |
| "loss": 1.3189, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9478559981237542e-05, | |
| "loss": 1.3265, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.941341255260655e-05, | |
| "loss": 1.3136, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.934826512397556e-05, | |
| "loss": 1.3229, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.9283117695344563e-05, | |
| "loss": 1.3244, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9217970266713574e-05, | |
| "loss": 1.3237, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9152822838082584e-05, | |
| "loss": 1.3136, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.908767540945159e-05, | |
| "loss": 1.3158, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9022527980820598e-05, | |
| "loss": 1.3164, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.8957380552189605e-05, | |
| "loss": 1.3238, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.8892233123558615e-05, | |
| "loss": 1.3206, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.8827085694927625e-05, | |
| "loss": 1.3232, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.876193826629663e-05, | |
| "loss": 1.316, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.869679083766564e-05, | |
| "loss": 1.3094, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8631643409034646e-05, | |
| "loss": 1.313, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8566495980403656e-05, | |
| "loss": 1.3158, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.850134855177266e-05, | |
| "loss": 1.3211, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.843620112314167e-05, | |
| "loss": 1.3155, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.837105369451068e-05, | |
| "loss": 1.314, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.830590626587969e-05, | |
| "loss": 1.3217, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8240758837248694e-05, | |
| "loss": 1.3236, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.81756114086177e-05, | |
| "loss": 1.3118, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.811046397998671e-05, | |
| "loss": 1.3163, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.8045316551355722e-05, | |
| "loss": 1.3117, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7980169122724725e-05, | |
| "loss": 1.3104, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7915021694093736e-05, | |
| "loss": 1.3209, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7849874265462743e-05, | |
| "loss": 1.3155, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.7784726836831753e-05, | |
| "loss": 1.3046, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7719579408200757e-05, | |
| "loss": 1.3179, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7654431979569767e-05, | |
| "loss": 1.3151, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7589284550938777e-05, | |
| "loss": 1.3135, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7524137122307787e-05, | |
| "loss": 1.3182, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.745898969367679e-05, | |
| "loss": 1.3114, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7393842265045798e-05, | |
| "loss": 1.3103, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7328694836414808e-05, | |
| "loss": 1.3097, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.726354740778382e-05, | |
| "loss": 1.3122, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7198399979152822e-05, | |
| "loss": 1.3144, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7133252550521832e-05, | |
| "loss": 1.3087, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.7068105121890843e-05, | |
| "loss": 1.3114, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.700295769325985e-05, | |
| "loss": 1.3098, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6937810264628853e-05, | |
| "loss": 1.3131, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6872662835997863e-05, | |
| "loss": 1.3064, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6807515407366874e-05, | |
| "loss": 1.3102, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6742367978735884e-05, | |
| "loss": 1.31, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6677220550104888e-05, | |
| "loss": 1.3068, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6612073121473895e-05, | |
| "loss": 1.3148, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6546925692842905e-05, | |
| "loss": 1.317, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.6481778264211915e-05, | |
| "loss": 1.3105, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.641663083558092e-05, | |
| "loss": 1.3166, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.635148340694993e-05, | |
| "loss": 1.3029, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.628633597831894e-05, | |
| "loss": 1.3018, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.6221188549687946e-05, | |
| "loss": 1.311, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.615604112105695e-05, | |
| "loss": 1.3071, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.609089369242596e-05, | |
| "loss": 1.3201, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.602574626379497e-05, | |
| "loss": 1.2955, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.596059883516398e-05, | |
| "loss": 1.3063, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.5895451406532984e-05, | |
| "loss": 1.3042, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.583030397790199e-05, | |
| "loss": 1.3157, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.5765156549271e-05, | |
| "loss": 1.3007, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5700009120640012e-05, | |
| "loss": 1.3051, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5634861692009015e-05, | |
| "loss": 1.3082, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5569714263378026e-05, | |
| "loss": 1.3, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5504566834747036e-05, | |
| "loss": 1.3076, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5439419406116043e-05, | |
| "loss": 1.3042, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5374271977485046e-05, | |
| "loss": 1.3045, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5309124548854057e-05, | |
| "loss": 1.306, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5243977120223067e-05, | |
| "loss": 1.3041, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.5178829691592077e-05, | |
| "loss": 1.3057, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.511368226296108e-05, | |
| "loss": 1.3108, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.504853483433009e-05, | |
| "loss": 1.3084, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.4983387405699098e-05, | |
| "loss": 1.3049, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.4918239977068105e-05, | |
| "loss": 1.3015, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.4853092548437115e-05, | |
| "loss": 1.3037, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.4787945119806122e-05, | |
| "loss": 1.3066, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4722797691175133e-05, | |
| "loss": 1.3041, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.465765026254414e-05, | |
| "loss": 1.3004, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4592502833913146e-05, | |
| "loss": 1.3052, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4527355405282153e-05, | |
| "loss": 1.3044, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4462207976651164e-05, | |
| "loss": 1.2971, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.439706054802017e-05, | |
| "loss": 1.3006, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.433191311938918e-05, | |
| "loss": 1.3022, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4266765690758188e-05, | |
| "loss": 1.3031, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4201618262127195e-05, | |
| "loss": 1.3019, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.41364708334962e-05, | |
| "loss": 1.2947, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4071323404865212e-05, | |
| "loss": 1.2943, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.400617597623422e-05, | |
| "loss": 1.3004, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.394102854760323e-05, | |
| "loss": 1.2991, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.3875881118972236e-05, | |
| "loss": 1.2941, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.3810733690341243e-05, | |
| "loss": 1.3016, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.374558626171025e-05, | |
| "loss": 1.3067, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.368043883307926e-05, | |
| "loss": 1.2968, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3615291404448267e-05, | |
| "loss": 1.2987, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3550143975817278e-05, | |
| "loss": 1.292, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3484996547186284e-05, | |
| "loss": 1.3013, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.341984911855529e-05, | |
| "loss": 1.2993, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.33547016899243e-05, | |
| "loss": 1.289, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.328955426129331e-05, | |
| "loss": 1.2991, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3224406832662316e-05, | |
| "loss": 1.3004, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3159259404031326e-05, | |
| "loss": 1.3011, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.3094111975400333e-05, | |
| "loss": 1.3007, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.302896454676934e-05, | |
| "loss": 1.292, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.2963817118138347e-05, | |
| "loss": 1.2954, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.2898669689507357e-05, | |
| "loss": 1.2993, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.2833522260876364e-05, | |
| "loss": 1.2923, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.2768374832245374e-05, | |
| "loss": 1.2959, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.270322740361438e-05, | |
| "loss": 1.2818, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2638079974983388e-05, | |
| "loss": 1.2905, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2572932546352395e-05, | |
| "loss": 1.295, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2507785117721405e-05, | |
| "loss": 1.2953, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2442637689090412e-05, | |
| "loss": 1.2963, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2377490260459422e-05, | |
| "loss": 1.2898, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.231234283182843e-05, | |
| "loss": 1.2868, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2247195403197436e-05, | |
| "loss": 1.2888, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2182047974566443e-05, | |
| "loss": 1.2844, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.2116900545935454e-05, | |
| "loss": 1.2876, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.205175311730446e-05, | |
| "loss": 1.2836, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.198660568867347e-05, | |
| "loss": 1.291, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.1921458260042478e-05, | |
| "loss": 1.2887, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.1856310831411485e-05, | |
| "loss": 1.2891, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.179116340278049e-05, | |
| "loss": 1.2859, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1726015974149502e-05, | |
| "loss": 1.2833, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.166086854551851e-05, | |
| "loss": 1.2901, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.159572111688752e-05, | |
| "loss": 1.2928, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1530573688256526e-05, | |
| "loss": 1.2991, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1465426259625533e-05, | |
| "loss": 1.2895, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.140027883099454e-05, | |
| "loss": 1.2908, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.133513140236355e-05, | |
| "loss": 1.2973, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1269983973732557e-05, | |
| "loss": 1.2887, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1204836545101567e-05, | |
| "loss": 1.2807, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1139689116470574e-05, | |
| "loss": 1.2805, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.107454168783958e-05, | |
| "loss": 1.2887, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1009394259208588e-05, | |
| "loss": 1.2902, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.09442468305776e-05, | |
| "loss": 1.2915, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.0879099401946605e-05, | |
| "loss": 1.2829, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.0813951973315616e-05, | |
| "loss": 1.2915, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0748804544684623e-05, | |
| "loss": 1.286, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0683657116053633e-05, | |
| "loss": 1.2893, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0618509687422637e-05, | |
| "loss": 1.2825, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0553362258791647e-05, | |
| "loss": 1.2829, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0488214830160654e-05, | |
| "loss": 1.2924, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.0423067401529664e-05, | |
| "loss": 1.2794, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.035791997289867e-05, | |
| "loss": 1.2912, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.029277254426768e-05, | |
| "loss": 1.2656, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0227625115636685e-05, | |
| "loss": 1.2763, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0162477687005695e-05, | |
| "loss": 1.2925, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0097330258374702e-05, | |
| "loss": 1.28, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0032182829743712e-05, | |
| "loss": 1.2827, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.996703540111272e-05, | |
| "loss": 1.2835, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.990188797248173e-05, | |
| "loss": 1.2814, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.9836740543850733e-05, | |
| "loss": 1.2853, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.9771593115219743e-05, | |
| "loss": 1.2748, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.970644568658875e-05, | |
| "loss": 1.2812, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.964129825795776e-05, | |
| "loss": 1.284, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9576150829326768e-05, | |
| "loss": 1.2796, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9511003400695778e-05, | |
| "loss": 1.2759, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.944585597206478e-05, | |
| "loss": 1.285, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9380708543433792e-05, | |
| "loss": 1.2847, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.93155611148028e-05, | |
| "loss": 1.2795, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.925041368617181e-05, | |
| "loss": 1.2723, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9185266257540816e-05, | |
| "loss": 1.2786, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9120118828909826e-05, | |
| "loss": 1.272, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9054971400278833e-05, | |
| "loss": 1.2739, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.898982397164784e-05, | |
| "loss": 1.2694, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8924676543016847e-05, | |
| "loss": 1.2819, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8859529114385857e-05, | |
| "loss": 1.2792, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8794381685754864e-05, | |
| "loss": 1.2849, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8729234257123875e-05, | |
| "loss": 1.2889, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.866408682849288e-05, | |
| "loss": 1.274, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.859893939986189e-05, | |
| "loss": 1.2778, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8533791971230895e-05, | |
| "loss": 1.2751, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8468644542599906e-05, | |
| "loss": 1.2791, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8403497113968913e-05, | |
| "loss": 1.2803, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8338349685337923e-05, | |
| "loss": 1.2785, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.827320225670693e-05, | |
| "loss": 1.272, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8208054828075937e-05, | |
| "loss": 1.267, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8142907399444944e-05, | |
| "loss": 1.2723, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8077759970813954e-05, | |
| "loss": 1.2766, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.801261254218296e-05, | |
| "loss": 1.2756, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.794746511355197e-05, | |
| "loss": 1.275, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7882317684920978e-05, | |
| "loss": 1.2775, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7817170256289985e-05, | |
| "loss": 1.2707, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7752022827658992e-05, | |
| "loss": 1.273, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7686875399028002e-05, | |
| "loss": 1.2714, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.762172797039701e-05, | |
| "loss": 1.2758, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.755658054176602e-05, | |
| "loss": 1.2756, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7491433113135026e-05, | |
| "loss": 1.2762, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7426285684504033e-05, | |
| "loss": 1.2763, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.736113825587304e-05, | |
| "loss": 1.2766, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.729599082724205e-05, | |
| "loss": 1.2707, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7230843398611058e-05, | |
| "loss": 1.2719, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7165695969980068e-05, | |
| "loss": 1.2686, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7100548541349075e-05, | |
| "loss": 1.2693, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7035401112718082e-05, | |
| "loss": 1.2699, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.697025368408709e-05, | |
| "loss": 1.2696, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.69051062554561e-05, | |
| "loss": 1.2693, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.6839958826825106e-05, | |
| "loss": 1.2666, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.6774811398194116e-05, | |
| "loss": 1.2697, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6709663969563123e-05, | |
| "loss": 1.2691, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.664451654093213e-05, | |
| "loss": 1.2669, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6579369112301137e-05, | |
| "loss": 1.2663, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6514221683670147e-05, | |
| "loss": 1.2694, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6449074255039154e-05, | |
| "loss": 1.2706, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6383926826408164e-05, | |
| "loss": 1.2704, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.631877939777717e-05, | |
| "loss": 1.258, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.625363196914618e-05, | |
| "loss": 1.2664, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6188484540515185e-05, | |
| "loss": 1.2659, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6123337111884196e-05, | |
| "loss": 1.2677, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.6058189683253202e-05, | |
| "loss": 1.2664, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.5993042254622213e-05, | |
| "loss": 1.2661, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.592789482599122e-05, | |
| "loss": 1.267, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.5862747397360227e-05, | |
| "loss": 1.2642, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.5797599968729234e-05, | |
| "loss": 1.2681, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5732452540098244e-05, | |
| "loss": 1.2618, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.566730511146725e-05, | |
| "loss": 1.2658, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.560215768283626e-05, | |
| "loss": 1.264, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5537010254205268e-05, | |
| "loss": 1.2676, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5471862825574275e-05, | |
| "loss": 1.2665, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5406715396943282e-05, | |
| "loss": 1.2559, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5341567968312292e-05, | |
| "loss": 1.266, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.52764205396813e-05, | |
| "loss": 1.2616, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.521127311105031e-05, | |
| "loss": 1.2641, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5146125682419315e-05, | |
| "loss": 1.2645, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5080978253788325e-05, | |
| "loss": 1.2654, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5015830825157332e-05, | |
| "loss": 1.2604, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.495068339652634e-05, | |
| "loss": 1.2601, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.4885535967895347e-05, | |
| "loss": 1.2642, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.4820388539264358e-05, | |
| "loss": 1.2579, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.4755241110633363e-05, | |
| "loss": 1.2642, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4690093682002373e-05, | |
| "loss": 1.2728, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.462494625337138e-05, | |
| "loss": 1.265, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4559798824740389e-05, | |
| "loss": 1.2657, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4494651396109396e-05, | |
| "loss": 1.2748, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4429503967478406e-05, | |
| "loss": 1.2682, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4364356538847413e-05, | |
| "loss": 1.2628, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4299209110216422e-05, | |
| "loss": 1.2729, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4234061681585429e-05, | |
| "loss": 1.2698, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4168914252954437e-05, | |
| "loss": 1.263, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4103766824323444e-05, | |
| "loss": 1.2563, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4038619395692454e-05, | |
| "loss": 1.2606, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.3973471967061461e-05, | |
| "loss": 1.2605, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.390832453843047e-05, | |
| "loss": 1.2604, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.3843177109799477e-05, | |
| "loss": 1.2627, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.3778029681168485e-05, | |
| "loss": 1.2586, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3712882252537492e-05, | |
| "loss": 1.2617, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3647734823906503e-05, | |
| "loss": 1.2648, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.358258739527551e-05, | |
| "loss": 1.2552, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3517439966644518e-05, | |
| "loss": 1.2602, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3452292538013525e-05, | |
| "loss": 1.2603, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3387145109382534e-05, | |
| "loss": 1.2644, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.332199768075154e-05, | |
| "loss": 1.2553, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3256850252120551e-05, | |
| "loss": 1.2513, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3191702823489558e-05, | |
| "loss": 1.2441, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3126555394858567e-05, | |
| "loss": 1.2602, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3061407966227573e-05, | |
| "loss": 1.2647, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.2996260537596582e-05, | |
| "loss": 1.258, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.2931113108965589e-05, | |
| "loss": 1.2548, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.28659656803346e-05, | |
| "loss": 1.2598, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.2800818251703606e-05, | |
| "loss": 1.2573, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2735670823072615e-05, | |
| "loss": 1.2622, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2670523394441622e-05, | |
| "loss": 1.2579, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2605375965810632e-05, | |
| "loss": 1.2516, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2540228537179637e-05, | |
| "loss": 1.2547, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2475081108548646e-05, | |
| "loss": 1.2576, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2409933679917655e-05, | |
| "loss": 1.2495, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2344786251286662e-05, | |
| "loss": 1.2578, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.227963882265567e-05, | |
| "loss": 1.2646, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2214491394024679e-05, | |
| "loss": 1.2638, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2149343965393686e-05, | |
| "loss": 1.2554, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2084196536762694e-05, | |
| "loss": 1.2501, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2019049108131703e-05, | |
| "loss": 1.2508, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.195390167950071e-05, | |
| "loss": 1.2557, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.1888754250869718e-05, | |
| "loss": 1.2434, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.1823606822238727e-05, | |
| "loss": 1.2519, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.1758459393607734e-05, | |
| "loss": 1.2475, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1693311964976743e-05, | |
| "loss": 1.2479, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1628164536345751e-05, | |
| "loss": 1.2572, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1563017107714758e-05, | |
| "loss": 1.2527, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1497869679083767e-05, | |
| "loss": 1.2505, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1432722250452775e-05, | |
| "loss": 1.2472, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1367574821821782e-05, | |
| "loss": 1.2524, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1302427393190791e-05, | |
| "loss": 1.2548, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.12372799645598e-05, | |
| "loss": 1.2437, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1172132535928808e-05, | |
| "loss": 1.2503, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1106985107297815e-05, | |
| "loss": 1.259, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.1041837678666824e-05, | |
| "loss": 1.2518, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.0976690250035832e-05, | |
| "loss": 1.2511, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.091154282140484e-05, | |
| "loss": 1.2536, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.0846395392773848e-05, | |
| "loss": 1.2522, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.0781247964142856e-05, | |
| "loss": 1.2527, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0716100535511863e-05, | |
| "loss": 1.2461, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0650953106880872e-05, | |
| "loss": 1.2507, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.058580567824988e-05, | |
| "loss": 1.2536, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0520658249618888e-05, | |
| "loss": 1.2427, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0455510820987896e-05, | |
| "loss": 1.2488, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0390363392356905e-05, | |
| "loss": 1.2536, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0325215963725912e-05, | |
| "loss": 1.2464, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.026006853509492e-05, | |
| "loss": 1.2432, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0194921106463929e-05, | |
| "loss": 1.2573, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0129773677832936e-05, | |
| "loss": 1.2486, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0064626249201944e-05, | |
| "loss": 1.2575, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 9.999478820570953e-06, | |
| "loss": 1.2395, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 9.93433139193996e-06, | |
| "loss": 1.2447, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 9.869183963308969e-06, | |
| "loss": 1.252, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 9.804036534677977e-06, | |
| "loss": 1.2417, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.738889106046984e-06, | |
| "loss": 1.2573, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.673741677415993e-06, | |
| "loss": 1.2414, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.608594248785001e-06, | |
| "loss": 1.248, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.543446820154008e-06, | |
| "loss": 1.2386, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.478299391523017e-06, | |
| "loss": 1.258, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.413151962892026e-06, | |
| "loss": 1.2442, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.348004534261033e-06, | |
| "loss": 1.2456, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.282857105630041e-06, | |
| "loss": 1.2505, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.21770967699905e-06, | |
| "loss": 1.2391, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.152562248368057e-06, | |
| "loss": 1.2483, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.087414819737065e-06, | |
| "loss": 1.2432, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.022267391106074e-06, | |
| "loss": 1.2428, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 8.95711996247508e-06, | |
| "loss": 1.2468, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 8.89197253384409e-06, | |
| "loss": 1.2469, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 8.826825105213098e-06, | |
| "loss": 1.2465, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 8.761677676582105e-06, | |
| "loss": 1.2313, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.696530247951114e-06, | |
| "loss": 1.2507, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.631382819320122e-06, | |
| "loss": 1.2452, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.566235390689129e-06, | |
| "loss": 1.2403, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.501087962058138e-06, | |
| "loss": 1.2561, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.435940533427146e-06, | |
| "loss": 1.2514, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.370793104796153e-06, | |
| "loss": 1.2375, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.305645676165162e-06, | |
| "loss": 1.245, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.24049824753417e-06, | |
| "loss": 1.2454, | |
| "step": 320500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.175350818903177e-06, | |
| "loss": 1.2389, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.110203390272186e-06, | |
| "loss": 1.241, | |
| "step": 321500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.045055961641195e-06, | |
| "loss": 1.2465, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.979908533010202e-06, | |
| "loss": 1.2451, | |
| "step": 322500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.91476110437921e-06, | |
| "loss": 1.2396, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.849613675748219e-06, | |
| "loss": 1.2392, | |
| "step": 323500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.784466247117227e-06, | |
| "loss": 1.2431, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.719318818486234e-06, | |
| "loss": 1.2502, | |
| "step": 324500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.654171389855243e-06, | |
| "loss": 1.2453, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.589023961224251e-06, | |
| "loss": 1.237, | |
| "step": 325500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.5238765325932586e-06, | |
| "loss": 1.2242, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.458729103962267e-06, | |
| "loss": 1.2353, | |
| "step": 326500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.393581675331275e-06, | |
| "loss": 1.239, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.328434246700283e-06, | |
| "loss": 1.2448, | |
| "step": 327500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.263286818069291e-06, | |
| "loss": 1.2389, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.198139389438299e-06, | |
| "loss": 1.2387, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.132991960807307e-06, | |
| "loss": 1.242, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.0678445321763155e-06, | |
| "loss": 1.2347, | |
| "step": 329500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.002697103545323e-06, | |
| "loss": 1.2357, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.937549674914332e-06, | |
| "loss": 1.2378, | |
| "step": 330500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.87240224628334e-06, | |
| "loss": 1.2436, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.8072548176523474e-06, | |
| "loss": 1.2335, | |
| "step": 331500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.742107389021356e-06, | |
| "loss": 1.2444, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.676959960390364e-06, | |
| "loss": 1.2385, | |
| "step": 332500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.611812531759372e-06, | |
| "loss": 1.2399, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.54666510312838e-06, | |
| "loss": 1.2405, | |
| "step": 333500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.481517674497388e-06, | |
| "loss": 1.2354, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.416370245866396e-06, | |
| "loss": 1.2357, | |
| "step": 334500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.351222817235404e-06, | |
| "loss": 1.2321, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.286075388604412e-06, | |
| "loss": 1.241, | |
| "step": 335500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.22092795997342e-06, | |
| "loss": 1.2333, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.1557805313424285e-06, | |
| "loss": 1.2356, | |
| "step": 336500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.090633102711436e-06, | |
| "loss": 1.2372, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.025485674080444e-06, | |
| "loss": 1.238, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5.960338245449453e-06, | |
| "loss": 1.2322, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5.8951908168184605e-06, | |
| "loss": 1.2388, | |
| "step": 338500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5.830043388187468e-06, | |
| "loss": 1.2371, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5.764895959556477e-06, | |
| "loss": 1.23, | |
| "step": 339500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.699748530925485e-06, | |
| "loss": 1.2387, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.634601102294492e-06, | |
| "loss": 1.2379, | |
| "step": 340500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.569453673663501e-06, | |
| "loss": 1.2343, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.504306245032509e-06, | |
| "loss": 1.2251, | |
| "step": 341500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.439158816401517e-06, | |
| "loss": 1.2316, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.374011387770525e-06, | |
| "loss": 1.2261, | |
| "step": 342500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.308863959139533e-06, | |
| "loss": 1.2313, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.2437165305085415e-06, | |
| "loss": 1.2262, | |
| "step": 343500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.178569101877549e-06, | |
| "loss": 1.2244, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.113421673246557e-06, | |
| "loss": 1.2219, | |
| "step": 344500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.048274244615566e-06, | |
| "loss": 1.2371, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.9831268159845735e-06, | |
| "loss": 1.2361, | |
| "step": 345500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.917979387353581e-06, | |
| "loss": 1.2376, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.85283195872259e-06, | |
| "loss": 1.2303, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.787684530091598e-06, | |
| "loss": 1.2284, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.722537101460605e-06, | |
| "loss": 1.2293, | |
| "step": 347500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.657389672829614e-06, | |
| "loss": 1.2322, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.592242244198622e-06, | |
| "loss": 1.2377, | |
| "step": 348500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.5270948155676296e-06, | |
| "loss": 1.2291, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.461947386936638e-06, | |
| "loss": 1.2351, | |
| "step": 349500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.396799958305646e-06, | |
| "loss": 1.2346, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.331652529674654e-06, | |
| "loss": 1.2344, | |
| "step": 350500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.266505101043662e-06, | |
| "loss": 1.2364, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.20135767241267e-06, | |
| "loss": 1.2236, | |
| "step": 351500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.136210243781678e-06, | |
| "loss": 1.2356, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.0710628151506865e-06, | |
| "loss": 1.2279, | |
| "step": 352500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.005915386519694e-06, | |
| "loss": 1.2347, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.940767957888702e-06, | |
| "loss": 1.2281, | |
| "step": 353500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.875620529257711e-06, | |
| "loss": 1.2307, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.8104731006267184e-06, | |
| "loss": 1.2296, | |
| "step": 354500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.7453256719957266e-06, | |
| "loss": 1.2299, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.680178243364735e-06, | |
| "loss": 1.2335, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.6150308147337426e-06, | |
| "loss": 1.2345, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.5498833861027508e-06, | |
| "loss": 1.2255, | |
| "step": 356500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.484735957471759e-06, | |
| "loss": 1.2279, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.419588528840767e-06, | |
| "loss": 1.2318, | |
| "step": 357500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.354441100209775e-06, | |
| "loss": 1.2328, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.289293671578783e-06, | |
| "loss": 1.2237, | |
| "step": 358500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.2241462429477913e-06, | |
| "loss": 1.2246, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.158998814316799e-06, | |
| "loss": 1.2295, | |
| "step": 359500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.0938513856858073e-06, | |
| "loss": 1.2272, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.0287039570548155e-06, | |
| "loss": 1.2275, | |
| "step": 360500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.9635565284238233e-06, | |
| "loss": 1.2233, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.8984090997928315e-06, | |
| "loss": 1.2321, | |
| "step": 361500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.8332616711618396e-06, | |
| "loss": 1.2314, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.7681142425308474e-06, | |
| "loss": 1.2295, | |
| "step": 362500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.702966813899855e-06, | |
| "loss": 1.2241, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.6378193852688634e-06, | |
| "loss": 1.2239, | |
| "step": 363500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.5726719566378716e-06, | |
| "loss": 1.224, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.5075245280068793e-06, | |
| "loss": 1.2289, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.4423770993758875e-06, | |
| "loss": 1.2216, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.3772296707448957e-06, | |
| "loss": 1.2295, | |
| "step": 365500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.3120822421139035e-06, | |
| "loss": 1.2328, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.2469348134829117e-06, | |
| "loss": 1.2245, | |
| "step": 366500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.18178738485192e-06, | |
| "loss": 1.2259, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.116639956220928e-06, | |
| "loss": 1.2159, | |
| "step": 367500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.051492527589936e-06, | |
| "loss": 1.2287, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.986345098958944e-06, | |
| "loss": 1.2279, | |
| "step": 368500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.9211976703279522e-06, | |
| "loss": 1.23, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.8560502416969602e-06, | |
| "loss": 1.2333, | |
| "step": 369500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.7909028130659682e-06, | |
| "loss": 1.2307, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.7257553844349762e-06, | |
| "loss": 1.2207, | |
| "step": 370500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.6606079558039844e-06, | |
| "loss": 1.2358, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.5954605271729924e-06, | |
| "loss": 1.2185, | |
| "step": 371500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.5303130985420006e-06, | |
| "loss": 1.2283, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.4651656699110088e-06, | |
| "loss": 1.2243, | |
| "step": 372500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.4000182412800167e-06, | |
| "loss": 1.2249, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.334870812649025e-06, | |
| "loss": 1.2199, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.269723384018033e-06, | |
| "loss": 1.2299, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.204575955387041e-06, | |
| "loss": 1.2126, | |
| "step": 374500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.139428526756049e-06, | |
| "loss": 1.2272, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.074281098125057e-06, | |
| "loss": 1.2325, | |
| "step": 375500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.0091336694940653e-06, | |
| "loss": 1.2247, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.439862408630733e-07, | |
| "loss": 1.2243, | |
| "step": 376500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.788388122320813e-07, | |
| "loss": 1.2236, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.136913836010893e-07, | |
| "loss": 1.2297, | |
| "step": 377500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 7.485439549700973e-07, | |
| "loss": 1.2215, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 6.833965263391054e-07, | |
| "loss": 1.2209, | |
| "step": 378500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 6.182490977081135e-07, | |
| "loss": 1.2283, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.531016690771216e-07, | |
| "loss": 1.2304, | |
| "step": 379500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.879542404461296e-07, | |
| "loss": 1.2144, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.2280681181513764e-07, | |
| "loss": 1.2267, | |
| "step": 380500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.5765938318414573e-07, | |
| "loss": 1.2266, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.925119545531538e-07, | |
| "loss": 1.2236, | |
| "step": 381500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.2736452592216185e-07, | |
| "loss": 1.2162, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.6221709729116992e-07, | |
| "loss": 1.2267, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 9.706966866017799e-08, | |
| "loss": 1.234, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.192224002918605e-08, | |
| "loss": 1.2369, | |
| "step": 383500 | |
| } | |
| ], | |
| "max_steps": 383745, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.61642598748028e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |