Instructions to use akozlo/lib_gpt_med with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use akozlo/lib_gpt_med with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="akozlo/lib_gpt_med")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("akozlo/lib_gpt_med") model = AutoModelForCausalLM.from_pretrained("akozlo/lib_gpt_med") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use akozlo/lib_gpt_med with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "akozlo/lib_gpt_med" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "akozlo/lib_gpt_med", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/akozlo/lib_gpt_med
- SGLang
How to use akozlo/lib_gpt_med with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "akozlo/lib_gpt_med" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "akozlo/lib_gpt_med", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "akozlo/lib_gpt_med" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "akozlo/lib_gpt_med", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use akozlo/lib_gpt_med with Docker Model Runner:
docker model run hf.co/akozlo/lib_gpt_med
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9999977087395546, | |
| "global_step": 654660, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.996196498946018e-05, | |
| "loss": 3.0555, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9923777227874016e-05, | |
| "loss": 3.0074, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.988558946628785e-05, | |
| "loss": 2.9648, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.984740170470168e-05, | |
| "loss": 2.9454, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.980921394311551e-05, | |
| "loss": 2.9381, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9771026181529345e-05, | |
| "loss": 2.9264, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.973283841994318e-05, | |
| "loss": 2.9214, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.969465065835701e-05, | |
| "loss": 2.9099, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.965646289677084e-05, | |
| "loss": 2.8927, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.961827513518468e-05, | |
| "loss": 2.8839, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.958008737359851e-05, | |
| "loss": 2.8857, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9541899612012344e-05, | |
| "loss": 2.898, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.950378822594935e-05, | |
| "loss": 2.8848, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9465600464363184e-05, | |
| "loss": 2.879, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9427412702777016e-05, | |
| "loss": 2.8717, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.938922494119085e-05, | |
| "loss": 2.8649, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.935103717960468e-05, | |
| "loss": 2.8712, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.931284941801851e-05, | |
| "loss": 2.8493, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9274661656432344e-05, | |
| "loss": 2.8726, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.923647389484618e-05, | |
| "loss": 2.8612, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9198286133260015e-05, | |
| "loss": 2.8523, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.916025112272019e-05, | |
| "loss": 2.8501, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.912206336113402e-05, | |
| "loss": 2.8377, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9083875599547855e-05, | |
| "loss": 2.8491, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9045687837961693e-05, | |
| "loss": 2.844, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9007500076375526e-05, | |
| "loss": 2.8197, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.896931231478936e-05, | |
| "loss": 2.8503, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.893112455320319e-05, | |
| "loss": 2.825, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.889293679161702e-05, | |
| "loss": 2.8277, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.885474903003086e-05, | |
| "loss": 2.8359, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.881663764396786e-05, | |
| "loss": 2.8288, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.87784498823817e-05, | |
| "loss": 2.812, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.874033849631871e-05, | |
| "loss": 2.8276, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.870215073473254e-05, | |
| "loss": 2.823, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.866396297314637e-05, | |
| "loss": 2.8204, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.86257752115602e-05, | |
| "loss": 2.8187, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8587587449974036e-05, | |
| "loss": 2.8106, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.854939968838787e-05, | |
| "loss": 2.811, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.85112119268017e-05, | |
| "loss": 2.8186, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.847302416521554e-05, | |
| "loss": 2.7965, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.843483640362937e-05, | |
| "loss": 2.8085, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.83966486420432e-05, | |
| "loss": 2.8108, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.835853725598021e-05, | |
| "loss": 2.8031, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.832034949439404e-05, | |
| "loss": 2.8085, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.8282161732807874e-05, | |
| "loss": 2.8044, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.8243973971221706e-05, | |
| "loss": 2.802, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.820578620963554e-05, | |
| "loss": 2.7915, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.8167674823572545e-05, | |
| "loss": 2.7981, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.812948706198638e-05, | |
| "loss": 2.8002, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.809129930040021e-05, | |
| "loss": 2.7929, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.805311153881404e-05, | |
| "loss": 2.7923, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.8014923777227874e-05, | |
| "loss": 2.7845, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.797681239116488e-05, | |
| "loss": 2.792, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.793862462957871e-05, | |
| "loss": 2.7944, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.7900436867992545e-05, | |
| "loss": 2.7897, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.7862249106406384e-05, | |
| "loss": 2.7927, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.7824061344820216e-05, | |
| "loss": 2.7925, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.778594995875722e-05, | |
| "loss": 2.7703, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.7747762197171055e-05, | |
| "loss": 2.7849, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.770957443558489e-05, | |
| "loss": 2.786, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.767138667399872e-05, | |
| "loss": 2.7615, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.763319891241255e-05, | |
| "loss": 2.7704, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.7595011150826384e-05, | |
| "loss": 2.7749, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.755689976476339e-05, | |
| "loss": 2.7844, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.751871200317722e-05, | |
| "loss": 2.7758, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7480524241591055e-05, | |
| "loss": 2.7723, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.744233648000489e-05, | |
| "loss": 2.7674, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.740414871841872e-05, | |
| "loss": 2.7649, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.7366037332355726e-05, | |
| "loss": 2.7876, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.732784957076956e-05, | |
| "loss": 2.7723, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.72896618091834e-05, | |
| "loss": 2.7732, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.725147404759723e-05, | |
| "loss": 2.7617, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.721328628601106e-05, | |
| "loss": 2.7687, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.7175098524424894e-05, | |
| "loss": 2.7682, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.71369871383619e-05, | |
| "loss": 2.7533, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.709879937677573e-05, | |
| "loss": 2.7571, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.7060611615189565e-05, | |
| "loss": 2.763, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.70224238536034e-05, | |
| "loss": 2.7686, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.698423609201723e-05, | |
| "loss": 2.7591, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.6946124705954236e-05, | |
| "loss": 2.761, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.690793694436807e-05, | |
| "loss": 2.7617, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.6869825558305075e-05, | |
| "loss": 2.7551, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.683163779671891e-05, | |
| "loss": 2.7651, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.679345003513274e-05, | |
| "loss": 2.7581, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.675526227354657e-05, | |
| "loss": 2.7431, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.6717074511960404e-05, | |
| "loss": 2.7553, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.667888675037424e-05, | |
| "loss": 2.7451, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.6640698988788075e-05, | |
| "loss": 2.763, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.660251122720191e-05, | |
| "loss": 2.7454, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.656432346561574e-05, | |
| "loss": 2.7472, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.6526212079552746e-05, | |
| "loss": 2.7321, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.648802431796658e-05, | |
| "loss": 2.7418, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.644983655638042e-05, | |
| "loss": 2.7441, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.641164879479425e-05, | |
| "loss": 2.7342, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.6373537408731256e-05, | |
| "loss": 2.7211, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.633534964714509e-05, | |
| "loss": 2.7377, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.629716188555892e-05, | |
| "loss": 2.7481, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.625897412397275e-05, | |
| "loss": 2.7373, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.6220786362386585e-05, | |
| "loss": 2.7236, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.618259860080042e-05, | |
| "loss": 2.7393, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.6144410839214256e-05, | |
| "loss": 2.7406, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.610622307762809e-05, | |
| "loss": 2.7201, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.606803531604192e-05, | |
| "loss": 2.7447, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.602984755445575e-05, | |
| "loss": 2.7393, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.5991659792869585e-05, | |
| "loss": 2.7194, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.595347203128342e-05, | |
| "loss": 2.7429, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.5915360645220424e-05, | |
| "loss": 2.7358, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.587724925915743e-05, | |
| "loss": 2.7373, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.583906149757126e-05, | |
| "loss": 2.7368, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.5800873735985095e-05, | |
| "loss": 2.7161, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.576268597439893e-05, | |
| "loss": 2.7314, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.572449821281276e-05, | |
| "loss": 2.7221, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.568631045122659e-05, | |
| "loss": 2.7051, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.5648122689640424e-05, | |
| "loss": 2.7213, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.560993492805426e-05, | |
| "loss": 2.7178, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.557182354199126e-05, | |
| "loss": 2.7164, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.553371215592827e-05, | |
| "loss": 2.7223, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.54955243943421e-05, | |
| "loss": 2.7162, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5457336632755934e-05, | |
| "loss": 2.7074, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.541914887116977e-05, | |
| "loss": 2.7116, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5380961109583605e-05, | |
| "loss": 2.709, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.534277334799744e-05, | |
| "loss": 2.7131, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5304661961934444e-05, | |
| "loss": 2.725, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5266474200348276e-05, | |
| "loss": 2.7129, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.522828643876211e-05, | |
| "loss": 2.7157, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.519009867717594e-05, | |
| "loss": 2.7114, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.515191091558977e-05, | |
| "loss": 2.7101, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.5113723154003605e-05, | |
| "loss": 2.7129, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.507553539241744e-05, | |
| "loss": 2.715, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.5037347630831276e-05, | |
| "loss": 2.7164, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.499915986924511e-05, | |
| "loss": 2.7004, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.496097210765894e-05, | |
| "loss": 2.7114, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.492286072159595e-05, | |
| "loss": 2.7219, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.488467296000978e-05, | |
| "loss": 2.6966, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.484648519842361e-05, | |
| "loss": 2.7097, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.480829743683744e-05, | |
| "loss": 2.72, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.477018605077445e-05, | |
| "loss": 2.7164, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.473199828918828e-05, | |
| "loss": 2.7056, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.4693810527602115e-05, | |
| "loss": 2.7102, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.465562276601595e-05, | |
| "loss": 2.7023, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.461743500442978e-05, | |
| "loss": 2.7024, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.4579323618366786e-05, | |
| "loss": 2.7, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.454121223230379e-05, | |
| "loss": 2.7006, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.4503024470717625e-05, | |
| "loss": 2.6956, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.446491308465463e-05, | |
| "loss": 2.7053, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.4426725323068464e-05, | |
| "loss": 2.7132, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.4388537561482296e-05, | |
| "loss": 2.692, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.435034979989613e-05, | |
| "loss": 2.6934, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.431216203830996e-05, | |
| "loss": 2.6871, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.427397427672379e-05, | |
| "loss": 2.6979, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.423578651513763e-05, | |
| "loss": 2.696, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.4197598753551464e-05, | |
| "loss": 2.6928, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.41594109919653e-05, | |
| "loss": 2.6864, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.4121223230379135e-05, | |
| "loss": 2.6974, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.408303546879297e-05, | |
| "loss": 2.6817, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.4044924082729974e-05, | |
| "loss": 2.6817, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.4006736321143806e-05, | |
| "loss": 2.685, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.396854855955764e-05, | |
| "loss": 2.6829, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.393036079797147e-05, | |
| "loss": 2.6871, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.38921730363853e-05, | |
| "loss": 2.6841, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.385406165032231e-05, | |
| "loss": 2.671, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.381587388873614e-05, | |
| "loss": 2.6906, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.3777686127149973e-05, | |
| "loss": 2.6874, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.3739498365563806e-05, | |
| "loss": 2.6838, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.370131060397764e-05, | |
| "loss": 2.6918, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.366312284239148e-05, | |
| "loss": 2.7004, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.362501145632848e-05, | |
| "loss": 2.6895, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.3586823694742316e-05, | |
| "loss": 2.6846, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.354863593315615e-05, | |
| "loss": 2.6808, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.351044817156998e-05, | |
| "loss": 2.6849, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.347226040998381e-05, | |
| "loss": 2.6967, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.343414902392082e-05, | |
| "loss": 2.6933, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.339596126233465e-05, | |
| "loss": 2.6779, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.335777350074848e-05, | |
| "loss": 2.6937, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.3319585739162316e-05, | |
| "loss": 2.691, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.328147435309932e-05, | |
| "loss": 2.6864, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.3243286591513155e-05, | |
| "loss": 2.6732, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.320509882992699e-05, | |
| "loss": 2.6637, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.3166987443863994e-05, | |
| "loss": 2.6791, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.3128799682277826e-05, | |
| "loss": 2.673, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.309061192069166e-05, | |
| "loss": 2.6774, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.305242415910549e-05, | |
| "loss": 2.671, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.301423639751932e-05, | |
| "loss": 2.6695, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.297604863593316e-05, | |
| "loss": 2.6704, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.293786087434699e-05, | |
| "loss": 2.6632, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.2899673112760825e-05, | |
| "loss": 2.6861, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.286148535117466e-05, | |
| "loss": 2.6649, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.282329758958849e-05, | |
| "loss": 2.6774, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.278510982800232e-05, | |
| "loss": 2.6544, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.2746922066416154e-05, | |
| "loss": 2.6761, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.270881068035316e-05, | |
| "loss": 2.6762, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.267062291876699e-05, | |
| "loss": 2.6762, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.2632511532704e-05, | |
| "loss": 2.6695, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.259432377111783e-05, | |
| "loss": 2.6717, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.2556136009531664e-05, | |
| "loss": 2.6925, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.2517948247945496e-05, | |
| "loss": 2.6566, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.2479760486359335e-05, | |
| "loss": 2.669, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.244157272477317e-05, | |
| "loss": 2.6793, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.240346133871017e-05, | |
| "loss": 2.6654, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.2365273577124007e-05, | |
| "loss": 2.665, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.232708581553784e-05, | |
| "loss": 2.6532, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.228889805395167e-05, | |
| "loss": 2.6686, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.22507102923655e-05, | |
| "loss": 2.6629, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.2212522530779335e-05, | |
| "loss": 2.6792, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.217433476919317e-05, | |
| "loss": 2.6797, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.2136147007607e-05, | |
| "loss": 2.6618, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.2098035621544006e-05, | |
| "loss": 2.6528, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.205984785995784e-05, | |
| "loss": 2.6497, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.202166009837167e-05, | |
| "loss": 2.6629, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.198347233678551e-05, | |
| "loss": 2.656, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.194528457519934e-05, | |
| "loss": 2.6646, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.1907096813613174e-05, | |
| "loss": 2.6467, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.186890905202701e-05, | |
| "loss": 2.6529, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.183087404148718e-05, | |
| "loss": 2.6593, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.179268627990102e-05, | |
| "loss": 2.6556, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.175449851831485e-05, | |
| "loss": 2.661, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.171631075672869e-05, | |
| "loss": 2.6502, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.167812299514252e-05, | |
| "loss": 2.66, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.164001160907953e-05, | |
| "loss": 2.6497, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.160182384749336e-05, | |
| "loss": 2.6573, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.1563636085907194e-05, | |
| "loss": 2.6646, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.1525448324321026e-05, | |
| "loss": 2.6692, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.148726056273486e-05, | |
| "loss": 2.6464, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.144907280114869e-05, | |
| "loss": 2.6656, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.141088503956252e-05, | |
| "loss": 2.6579, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.137277365349953e-05, | |
| "loss": 2.6474, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.133458589191336e-05, | |
| "loss": 2.6526, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.1296398130327194e-05, | |
| "loss": 2.6537, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.1258210368741026e-05, | |
| "loss": 2.6416, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.1220022607154865e-05, | |
| "loss": 2.6602, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.11818348455687e-05, | |
| "loss": 2.6473, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.114364708398253e-05, | |
| "loss": 2.666, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.110545932239636e-05, | |
| "loss": 2.6553, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.106734793633337e-05, | |
| "loss": 2.6489, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.1029236550270375e-05, | |
| "loss": 2.646, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.0991125164207376e-05, | |
| "loss": 2.6448, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.095293740262121e-05, | |
| "loss": 2.6547, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.0914749641035047e-05, | |
| "loss": 2.6467, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.087656187944888e-05, | |
| "loss": 2.6508, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.083837411786271e-05, | |
| "loss": 2.6422, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.080018635627654e-05, | |
| "loss": 2.6576, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.0761998594690375e-05, | |
| "loss": 2.662, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.072381083310421e-05, | |
| "loss": 2.6479, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.068562307151804e-05, | |
| "loss": 2.6385, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.064743530993187e-05, | |
| "loss": 2.6539, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.060924754834571e-05, | |
| "loss": 2.6507, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.057113616228271e-05, | |
| "loss": 2.6231, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.053294840069655e-05, | |
| "loss": 2.6401, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.049476063911038e-05, | |
| "loss": 2.6362, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.0456572877524214e-05, | |
| "loss": 2.6484, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.0418385115938046e-05, | |
| "loss": 2.6523, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.038019735435188e-05, | |
| "loss": 2.6453, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.0342085968288885e-05, | |
| "loss": 2.6572, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.030389820670272e-05, | |
| "loss": 2.6525, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.026571044511655e-05, | |
| "loss": 2.6412, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.022752268353038e-05, | |
| "loss": 2.6408, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.018941129746739e-05, | |
| "loss": 2.6422, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.015122353588122e-05, | |
| "loss": 2.6263, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.011303577429505e-05, | |
| "loss": 2.6372, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.0074848012708885e-05, | |
| "loss": 2.6506, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.0036660251122724e-05, | |
| "loss": 2.6432, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.9998472489536556e-05, | |
| "loss": 2.6233, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.9960361103473556e-05, | |
| "loss": 2.6392, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.9922173341887395e-05, | |
| "loss": 2.6375, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.988398558030123e-05, | |
| "loss": 2.6172, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.984579781871506e-05, | |
| "loss": 2.6359, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.980761005712889e-05, | |
| "loss": 2.6329, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.9769422295542724e-05, | |
| "loss": 2.6304, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.9731234533956556e-05, | |
| "loss": 2.6381, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.969304677237039e-05, | |
| "loss": 2.6382, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.965485901078422e-05, | |
| "loss": 2.6273, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.961674762472123e-05, | |
| "loss": 2.6309, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.9578636238658234e-05, | |
| "loss": 2.6375, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.954052485259524e-05, | |
| "loss": 2.6384, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.950233709100907e-05, | |
| "loss": 2.6379, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.9464149329422905e-05, | |
| "loss": 2.6143, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.942596156783674e-05, | |
| "loss": 2.6299, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.9387773806250576e-05, | |
| "loss": 2.6331, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.934958604466441e-05, | |
| "loss": 2.6242, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.931139828307824e-05, | |
| "loss": 2.6214, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.927321052149208e-05, | |
| "loss": 2.6365, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.923502275990591e-05, | |
| "loss": 2.6411, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.9196834998319744e-05, | |
| "loss": 2.6103, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.9158647236733576e-05, | |
| "loss": 2.6253, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.912053585067058e-05, | |
| "loss": 2.634, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.9082348089084415e-05, | |
| "loss": 2.6405, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.904416032749825e-05, | |
| "loss": 2.6187, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.900597256591208e-05, | |
| "loss": 2.6322, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.896778480432591e-05, | |
| "loss": 2.6313, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.8929597042739744e-05, | |
| "loss": 2.6342, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.889148565667675e-05, | |
| "loss": 2.6221, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.885329789509058e-05, | |
| "loss": 2.6278, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.8815110133504415e-05, | |
| "loss": 2.6224, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.8776922371918254e-05, | |
| "loss": 2.6191, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.8738734610332086e-05, | |
| "loss": 2.6043, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.8700623224269086e-05, | |
| "loss": 2.6286, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.8662435462682925e-05, | |
| "loss": 2.6093, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.862424770109676e-05, | |
| "loss": 2.6345, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.858605993951059e-05, | |
| "loss": 2.6133, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.854787217792442e-05, | |
| "loss": 2.6221, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.8509684416338253e-05, | |
| "loss": 2.6208, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.847157303027526e-05, | |
| "loss": 2.6144, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.843338526868909e-05, | |
| "loss": 2.6165, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.8395197507102925e-05, | |
| "loss": 2.6218, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.835700974551676e-05, | |
| "loss": 2.6376, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.831882198393059e-05, | |
| "loss": 2.637, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.828063422234443e-05, | |
| "loss": 2.6056, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.824244646075826e-05, | |
| "loss": 2.6012, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.820425869917209e-05, | |
| "loss": 2.6156, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.8166070937585924e-05, | |
| "loss": 2.6286, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.8127883175999756e-05, | |
| "loss": 2.6332, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.808977178993676e-05, | |
| "loss": 2.6177, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.8051584028350596e-05, | |
| "loss": 2.613, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.801339626676443e-05, | |
| "loss": 2.6077, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.797520850517826e-05, | |
| "loss": 2.6197, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.793709711911527e-05, | |
| "loss": 2.6205, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.78989093575291e-05, | |
| "loss": 2.6046, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.786072159594293e-05, | |
| "loss": 2.6028, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.782253383435676e-05, | |
| "loss": 2.6369, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.778442244829377e-05, | |
| "loss": 2.6246, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.774631106223078e-05, | |
| "loss": 2.6113, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.770812330064461e-05, | |
| "loss": 2.613, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.766993553905844e-05, | |
| "loss": 2.6249, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.7631747777472273e-05, | |
| "loss": 2.6026, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.759356001588611e-05, | |
| "loss": 2.6113, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.7555372254299944e-05, | |
| "loss": 2.6041, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.751718449271378e-05, | |
| "loss": 2.6257, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.747899673112761e-05, | |
| "loss": 2.6065, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.744080896954144e-05, | |
| "loss": 2.6112, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.740269758347845e-05, | |
| "loss": 2.6151, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.736450982189228e-05, | |
| "loss": 2.6017, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.732632206030611e-05, | |
| "loss": 2.6126, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.7288134298719944e-05, | |
| "loss": 2.6254, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.725002291265695e-05, | |
| "loss": 2.6014, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.721183515107078e-05, | |
| "loss": 2.6046, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.7173647389484615e-05, | |
| "loss": 2.6185, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.713545962789845e-05, | |
| "loss": 2.601, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.7097271866312287e-05, | |
| "loss": 2.6168, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.7059160480249293e-05, | |
| "loss": 2.6054, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.7020972718663126e-05, | |
| "loss": 2.6124, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.698278495707696e-05, | |
| "loss": 2.6031, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.694459719549079e-05, | |
| "loss": 2.6041, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.690640943390463e-05, | |
| "loss": 2.6057, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.686822167231846e-05, | |
| "loss": 2.5905, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.683003391073229e-05, | |
| "loss": 2.601, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.6791846149146125e-05, | |
| "loss": 2.6032, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.675365838755996e-05, | |
| "loss": 2.6097, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.671547062597379e-05, | |
| "loss": 2.6002, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.6677359239910797e-05, | |
| "loss": 2.6165, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.663917147832463e-05, | |
| "loss": 2.6062, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.660098371673846e-05, | |
| "loss": 2.5957, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.656279595515229e-05, | |
| "loss": 2.6121, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.65246845690893e-05, | |
| "loss": 2.5918, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.648649680750313e-05, | |
| "loss": 2.6204, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.6448309045916964e-05, | |
| "loss": 2.6062, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.64101212843308e-05, | |
| "loss": 2.5986, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.63720098982678e-05, | |
| "loss": 2.605, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.633382213668164e-05, | |
| "loss": 2.5993, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.6295634375095474e-05, | |
| "loss": 2.606, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.6257446613509306e-05, | |
| "loss": 2.5927, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.621933522744631e-05, | |
| "loss": 2.6058, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.6181147465860145e-05, | |
| "loss": 2.6094, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.614295970427398e-05, | |
| "loss": 2.5979, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.610477194268781e-05, | |
| "loss": 2.5967, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.606666055662482e-05, | |
| "loss": 2.6083, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.602847279503865e-05, | |
| "loss": 2.6111, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.599028503345248e-05, | |
| "loss": 2.6049, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.595209727186631e-05, | |
| "loss": 2.6157, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.5913909510280145e-05, | |
| "loss": 2.6048, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.587579812421715e-05, | |
| "loss": 2.6068, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.5837610362630984e-05, | |
| "loss": 2.5958, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.5799422601044816e-05, | |
| "loss": 2.5922, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.576123483945865e-05, | |
| "loss": 2.5969, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.572319982891883e-05, | |
| "loss": 2.6064, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.568501206733266e-05, | |
| "loss": 2.5968, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.5646824305746495e-05, | |
| "loss": 2.5958, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.560863654416033e-05, | |
| "loss": 2.5861, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.557044878257416e-05, | |
| "loss": 2.5702, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.5532261020988e-05, | |
| "loss": 2.5778, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.549407325940183e-05, | |
| "loss": 2.5904, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.545596187333883e-05, | |
| "loss": 2.5935, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.541777411175267e-05, | |
| "loss": 2.6034, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.53795863501665e-05, | |
| "loss": 2.5932, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.534139858858033e-05, | |
| "loss": 2.5814, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.5303210826994165e-05, | |
| "loss": 2.5764, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.5265023065408e-05, | |
| "loss": 2.5804, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.522683530382183e-05, | |
| "loss": 2.6004, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.518864754223566e-05, | |
| "loss": 2.5761, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.5150459780649494e-05, | |
| "loss": 2.6058, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.51123483945865e-05, | |
| "loss": 2.6052, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.507423700852351e-05, | |
| "loss": 2.5899, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.503604924693734e-05, | |
| "loss": 2.5873, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.499786148535117e-05, | |
| "loss": 2.5917, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.4959673723765004e-05, | |
| "loss": 2.5979, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.492148596217884e-05, | |
| "loss": 2.5871, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.4883298200592675e-05, | |
| "loss": 2.5734, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.484511043900651e-05, | |
| "loss": 2.5835, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.480692267742034e-05, | |
| "loss": 2.5906, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.4768811291357347e-05, | |
| "loss": 2.5929, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.473062352977118e-05, | |
| "loss": 2.5853, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.469243576818501e-05, | |
| "loss": 2.5786, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.465424800659885e-05, | |
| "loss": 2.5977, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.461606024501268e-05, | |
| "loss": 2.5808, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.4577872483426514e-05, | |
| "loss": 2.5888, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.453976109736352e-05, | |
| "loss": 2.5883, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.450157333577735e-05, | |
| "loss": 2.5927, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.4463385574191185e-05, | |
| "loss": 2.5882, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.442519781260502e-05, | |
| "loss": 2.5923, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.4387010051018856e-05, | |
| "loss": 2.5892, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.434882228943269e-05, | |
| "loss": 2.5886, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.431063452784652e-05, | |
| "loss": 2.5904, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.427244676626035e-05, | |
| "loss": 2.5851, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.423433538019736e-05, | |
| "loss": 2.5894, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.419614761861119e-05, | |
| "loss": 2.5733, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.4157959857025024e-05, | |
| "loss": 2.594, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.4119772095438856e-05, | |
| "loss": 2.5767, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.408166070937586e-05, | |
| "loss": 2.5857, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.4043472947789695e-05, | |
| "loss": 2.5687, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.400528518620353e-05, | |
| "loss": 2.5883, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.396709742461736e-05, | |
| "loss": 2.584, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.392890966303119e-05, | |
| "loss": 2.5787, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.38907982769682e-05, | |
| "loss": 2.5826, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.385261051538203e-05, | |
| "loss": 2.5828, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.381442275379586e-05, | |
| "loss": 2.5865, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.37762349922097e-05, | |
| "loss": 2.5867, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.3738047230623534e-05, | |
| "loss": 2.5799, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.3699935844560534e-05, | |
| "loss": 2.5761, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.366174808297437e-05, | |
| "loss": 2.5771, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.362363669691137e-05, | |
| "loss": 2.5876, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.358544893532521e-05, | |
| "loss": 2.574, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.3547261173739044e-05, | |
| "loss": 2.5878, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.3509073412152876e-05, | |
| "loss": 2.5874, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.347088565056671e-05, | |
| "loss": 2.5752, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.343269788898054e-05, | |
| "loss": 2.5783, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.339451012739437e-05, | |
| "loss": 2.5884, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.3356322365808205e-05, | |
| "loss": 2.5687, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.331821097974521e-05, | |
| "loss": 2.5582, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.3280023218159044e-05, | |
| "loss": 2.5573, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.3241835456572876e-05, | |
| "loss": 2.551, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.320364769498671e-05, | |
| "loss": 2.5366, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.316545993340055e-05, | |
| "loss": 2.5569, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.312742492286072e-05, | |
| "loss": 2.5285, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.3089237161274554e-05, | |
| "loss": 2.5448, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.3051049399688386e-05, | |
| "loss": 2.5567, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.301286163810222e-05, | |
| "loss": 2.5396, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.297467387651606e-05, | |
| "loss": 2.5534, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.293648611492989e-05, | |
| "loss": 2.5584, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.289829835334372e-05, | |
| "loss": 2.5447, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.286018696728073e-05, | |
| "loss": 2.5504, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.282199920569456e-05, | |
| "loss": 2.562, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.278381144410839e-05, | |
| "loss": 2.5367, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.2745623682522225e-05, | |
| "loss": 2.548, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.270743592093606e-05, | |
| "loss": 2.5425, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.266924815934989e-05, | |
| "loss": 2.5467, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.263106039776372e-05, | |
| "loss": 2.5352, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.259287263617756e-05, | |
| "loss": 2.5381, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.255476125011456e-05, | |
| "loss": 2.5468, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.251657348852839e-05, | |
| "loss": 2.523, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.247838572694223e-05, | |
| "loss": 2.5617, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.244027434087923e-05, | |
| "loss": 2.5385, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.2402086579293064e-05, | |
| "loss": 2.5488, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.23638988177069e-05, | |
| "loss": 2.5556, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.2325711056120735e-05, | |
| "loss": 2.545, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.2287523294534574e-05, | |
| "loss": 2.5458, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.2249335532948406e-05, | |
| "loss": 2.5588, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.221114777136224e-05, | |
| "loss": 2.5626, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.217296000977607e-05, | |
| "loss": 2.5346, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.21347722481899e-05, | |
| "loss": 2.5441, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.209666086212691e-05, | |
| "loss": 2.5406, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.205847310054074e-05, | |
| "loss": 2.5426, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.202036171447775e-05, | |
| "loss": 2.5486, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.198217395289158e-05, | |
| "loss": 2.5389, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.194398619130541e-05, | |
| "loss": 2.5307, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.1905798429719245e-05, | |
| "loss": 2.5546, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.186761066813308e-05, | |
| "loss": 2.5433, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.1829422906546916e-05, | |
| "loss": 2.5518, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.179123514496075e-05, | |
| "loss": 2.5476, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.175304738337458e-05, | |
| "loss": 2.5427, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.171485962178841e-05, | |
| "loss": 2.5359, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.167674823572542e-05, | |
| "loss": 2.5404, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.163856047413925e-05, | |
| "loss": 2.537, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.1600372712553084e-05, | |
| "loss": 2.5354, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.1562184950966916e-05, | |
| "loss": 2.5492, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.152399718938075e-05, | |
| "loss": 2.547, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.148580942779458e-05, | |
| "loss": 2.5364, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.144769804173159e-05, | |
| "loss": 2.566, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.140951028014542e-05, | |
| "loss": 2.5474, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.137132251855925e-05, | |
| "loss": 2.5425, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.133313475697309e-05, | |
| "loss": 2.5494, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.129494699538692e-05, | |
| "loss": 2.5373, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.125683560932392e-05, | |
| "loss": 2.5279, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.121864784773776e-05, | |
| "loss": 2.5559, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.118053646167476e-05, | |
| "loss": 2.5413, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.11423487000886e-05, | |
| "loss": 2.5333, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.110416093850243e-05, | |
| "loss": 2.541, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.1065973176916265e-05, | |
| "loss": 2.5568, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.10277854153301e-05, | |
| "loss": 2.5394, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.098959765374393e-05, | |
| "loss": 2.5354, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.095140989215776e-05, | |
| "loss": 2.5219, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.091329850609477e-05, | |
| "loss": 2.5332, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.08751107445086e-05, | |
| "loss": 2.5265, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.083692298292243e-05, | |
| "loss": 2.5466, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.0798735221336265e-05, | |
| "loss": 2.5518, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.07605474597501e-05, | |
| "loss": 2.5508, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.0722359698163936e-05, | |
| "loss": 2.5337, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.068417193657777e-05, | |
| "loss": 2.539, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.06459841749916e-05, | |
| "loss": 2.543, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.060779641340543e-05, | |
| "loss": 2.5212, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.056968502734244e-05, | |
| "loss": 2.5283, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.053149726575627e-05, | |
| "loss": 2.5406, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.0493385879693275e-05, | |
| "loss": 2.5387, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.045519811810711e-05, | |
| "loss": 2.5409, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0417010356520942e-05, | |
| "loss": 2.5463, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0378822594934775e-05, | |
| "loss": 2.5373, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0340634833348607e-05, | |
| "loss": 2.5277, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0302447071762442e-05, | |
| "loss": 2.5286, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0264259310176274e-05, | |
| "loss": 2.5371, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0226071548590107e-05, | |
| "loss": 2.5496, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.018788378700394e-05, | |
| "loss": 2.5281, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0149696025417774e-05, | |
| "loss": 2.5319, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0111584639354778e-05, | |
| "loss": 2.5421, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.007339687776861e-05, | |
| "loss": 2.5277, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.003520911618245e-05, | |
| "loss": 2.5327, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.9997021354596284e-05, | |
| "loss": 2.5222, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.9958909968533288e-05, | |
| "loss": 2.5413, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.992072220694712e-05, | |
| "loss": 2.5395, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9882610820884127e-05, | |
| "loss": 2.5356, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.984442305929796e-05, | |
| "loss": 2.5391, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9806235297711795e-05, | |
| "loss": 2.5429, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9768047536125627e-05, | |
| "loss": 2.5278, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.972985977453946e-05, | |
| "loss": 2.5308, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.969167201295329e-05, | |
| "loss": 2.544, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9653484251367127e-05, | |
| "loss": 2.5335, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.961529648978096e-05, | |
| "loss": 2.5356, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9577185103717962e-05, | |
| "loss": 2.5371, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9538997342131798e-05, | |
| "loss": 2.5302, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.950080958054563e-05, | |
| "loss": 2.5275, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9462621818959462e-05, | |
| "loss": 2.5434, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.942451043289647e-05, | |
| "loss": 2.5332, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.93863226713103e-05, | |
| "loss": 2.5238, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.9348134909724133e-05, | |
| "loss": 2.5383, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.930994714813797e-05, | |
| "loss": 2.5484, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.92717593865518e-05, | |
| "loss": 2.532, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9233571624965633e-05, | |
| "loss": 2.5318, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9195383863379465e-05, | |
| "loss": 2.5163, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.91571961017933e-05, | |
| "loss": 2.5415, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9119161091253476e-05, | |
| "loss": 2.5271, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.908097332966731e-05, | |
| "loss": 2.5285, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.9042785568081143e-05, | |
| "loss": 2.5256, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.9004597806494976e-05, | |
| "loss": 2.5373, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.896641004490881e-05, | |
| "loss": 2.5252, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.8928222283322643e-05, | |
| "loss": 2.5303, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.8890110897259647e-05, | |
| "loss": 2.5321, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.8851923135673482e-05, | |
| "loss": 2.5266, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.8813735374087314e-05, | |
| "loss": 2.5352, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.8775547612501147e-05, | |
| "loss": 2.5234, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8737436226438153e-05, | |
| "loss": 2.5307, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8699248464851986e-05, | |
| "loss": 2.5142, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8661060703265818e-05, | |
| "loss": 2.5325, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.862287294167965e-05, | |
| "loss": 2.5186, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8584761555616657e-05, | |
| "loss": 2.5356, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.854657379403049e-05, | |
| "loss": 2.5329, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8508386032444324e-05, | |
| "loss": 2.5316, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8470274646381328e-05, | |
| "loss": 2.5192, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.843208688479516e-05, | |
| "loss": 2.5469, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8393899123208996e-05, | |
| "loss": 2.532, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8355711361622828e-05, | |
| "loss": 2.525, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.831752360003666e-05, | |
| "loss": 2.5439, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8279412213973667e-05, | |
| "loss": 2.5347, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.82412244523875e-05, | |
| "loss": 2.5168, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.820303669080133e-05, | |
| "loss": 2.5167, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.8164848929215167e-05, | |
| "loss": 2.5167, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.8126661167629e-05, | |
| "loss": 2.5207, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.808847340604283e-05, | |
| "loss": 2.5361, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.8050285644456663e-05, | |
| "loss": 2.529, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.80120978828705e-05, | |
| "loss": 2.5263, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.7974062872330674e-05, | |
| "loss": 2.528, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.793587511074451e-05, | |
| "loss": 2.5292, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.789768734915834e-05, | |
| "loss": 2.5142, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7859499587572173e-05, | |
| "loss": 2.5312, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.782131182598601e-05, | |
| "loss": 2.5294, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.778312406439984e-05, | |
| "loss": 2.5228, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7744936302813673e-05, | |
| "loss": 2.5368, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7706748541227505e-05, | |
| "loss": 2.5152, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7668637155164512e-05, | |
| "loss": 2.5217, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7630449393578344e-05, | |
| "loss": 2.532, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7592261631992177e-05, | |
| "loss": 2.5123, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7554073870406012e-05, | |
| "loss": 2.5352, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7515962484343016e-05, | |
| "loss": 2.5273, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7477774722756855e-05, | |
| "loss": 2.531, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7439586961170687e-05, | |
| "loss": 2.5236, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7401399199584522e-05, | |
| "loss": 2.5073, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7363287813521526e-05, | |
| "loss": 2.5285, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7325100051935358e-05, | |
| "loss": 2.5331, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7286912290349193e-05, | |
| "loss": 2.5148, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7248724528763026e-05, | |
| "loss": 2.511, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7210536767176858e-05, | |
| "loss": 2.5316, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7172425381113865e-05, | |
| "loss": 2.5126, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7134237619527697e-05, | |
| "loss": 2.5141, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.709604985794153e-05, | |
| "loss": 2.5339, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.7057862096355365e-05, | |
| "loss": 2.5261, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.7019674334769197e-05, | |
| "loss": 2.5023, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.69815629487062e-05, | |
| "loss": 2.5203, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.6943375187120036e-05, | |
| "loss": 2.5198, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6905187425533868e-05, | |
| "loss": 2.5013, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.68669996639477e-05, | |
| "loss": 2.5304, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6828811902361532e-05, | |
| "loss": 2.5089, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6790624140775368e-05, | |
| "loss": 2.5109, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.67524363791892e-05, | |
| "loss": 2.512, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6714248617603032e-05, | |
| "loss": 2.4975, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.667613723154004e-05, | |
| "loss": 2.497, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.663794946995387e-05, | |
| "loss": 2.504, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6599761708367703e-05, | |
| "loss": 2.5093, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.656157394678154e-05, | |
| "loss": 2.5111, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.652338618519537e-05, | |
| "loss": 2.5143, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.6485198423609203e-05, | |
| "loss": 2.5183, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.644708703754621e-05, | |
| "loss": 2.5143, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.6408899275960042e-05, | |
| "loss": 2.4955, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.6370711514373874e-05, | |
| "loss": 2.5183, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.633260012831088e-05, | |
| "loss": 2.5143, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.6294412366724713e-05, | |
| "loss": 2.5165, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6256224605138545e-05, | |
| "loss": 2.5055, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.621803684355238e-05, | |
| "loss": 2.5095, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6179849081966213e-05, | |
| "loss": 2.5243, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6141661320380045e-05, | |
| "loss": 2.5074, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6103549934317052e-05, | |
| "loss": 2.5096, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.6065362172730884e-05, | |
| "loss": 2.508, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.6027174411144716e-05, | |
| "loss": 2.5096, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.598898664955855e-05, | |
| "loss": 2.4983, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.5950798887972384e-05, | |
| "loss": 2.5052, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5912611126386216e-05, | |
| "loss": 2.5174, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.587442336480005e-05, | |
| "loss": 2.5116, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.583623560321388e-05, | |
| "loss": 2.5065, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5798047841627716e-05, | |
| "loss": 2.52, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.575993645556472e-05, | |
| "loss": 2.5261, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5721748693978552e-05, | |
| "loss": 2.5244, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5683560932392387e-05, | |
| "loss": 2.5157, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.564537317080622e-05, | |
| "loss": 2.5092, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5607261784743226e-05, | |
| "loss": 2.5104, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.556907402315706e-05, | |
| "loss": 2.5138, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.553088626157089e-05, | |
| "loss": 2.5047, | |
| "step": 320500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5492698499984723e-05, | |
| "loss": 2.5195, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.545458711392173e-05, | |
| "loss": 2.5151, | |
| "step": 321500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5416399352335562e-05, | |
| "loss": 2.5174, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5378211590749394e-05, | |
| "loss": 2.5153, | |
| "step": 322500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.534002382916323e-05, | |
| "loss": 2.5133, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5301836067577062e-05, | |
| "loss": 2.5207, | |
| "step": 323500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5263648305990894e-05, | |
| "loss": 2.521, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5225460544404726e-05, | |
| "loss": 2.5116, | |
| "step": 324500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5187272782818565e-05, | |
| "loss": 2.5153, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.51490850212324e-05, | |
| "loss": 2.5185, | |
| "step": 325500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5110973635169404e-05, | |
| "loss": 2.5144, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.5072785873583236e-05, | |
| "loss": 2.4997, | |
| "step": 326500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.5034598111997072e-05, | |
| "loss": 2.5065, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.4996486725934072e-05, | |
| "loss": 2.4958, | |
| "step": 327500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.4958298964347907e-05, | |
| "loss": 2.5255, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.492011120276174e-05, | |
| "loss": 2.5061, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.488192344117557e-05, | |
| "loss": 2.5157, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.4843735679589404e-05, | |
| "loss": 2.5118, | |
| "step": 329500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.480554791800324e-05, | |
| "loss": 2.5054, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.476736015641707e-05, | |
| "loss": 2.5111, | |
| "step": 330500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4729172394830904e-05, | |
| "loss": 2.5024, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.469098463324474e-05, | |
| "loss": 2.5045, | |
| "step": 331500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4652796871658575e-05, | |
| "loss": 2.4979, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4614685485595578e-05, | |
| "loss": 2.5169, | |
| "step": 332500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.457649772400941e-05, | |
| "loss": 2.5107, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.4538309962423246e-05, | |
| "loss": 2.507, | |
| "step": 333500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.450019857636025e-05, | |
| "loss": 2.5057, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.4462010814774085e-05, | |
| "loss": 2.5156, | |
| "step": 334500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.4423823053187917e-05, | |
| "loss": 2.4845, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.438563529160175e-05, | |
| "loss": 2.5032, | |
| "step": 335500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.434744753001558e-05, | |
| "loss": 2.4944, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.4309259768429417e-05, | |
| "loss": 2.5036, | |
| "step": 336500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.427107200684325e-05, | |
| "loss": 2.5061, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.423288424525708e-05, | |
| "loss": 2.509, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4194772859194088e-05, | |
| "loss": 2.4925, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.415658509760792e-05, | |
| "loss": 2.5023, | |
| "step": 338500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4118473711544924e-05, | |
| "loss": 2.5172, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.408028594995876e-05, | |
| "loss": 2.5129, | |
| "step": 339500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.404209818837259e-05, | |
| "loss": 2.5011, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.4003910426786424e-05, | |
| "loss": 2.4977, | |
| "step": 340500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.396572266520026e-05, | |
| "loss": 2.4979, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.392753490361409e-05, | |
| "loss": 2.5248, | |
| "step": 341500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.3889347142027924e-05, | |
| "loss": 2.5024, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.385123575596493e-05, | |
| "loss": 2.5032, | |
| "step": 342500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.3813047994378763e-05, | |
| "loss": 2.5078, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.3774860232792595e-05, | |
| "loss": 2.514, | |
| "step": 343500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3736672471206427e-05, | |
| "loss": 2.5069, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3698484709620262e-05, | |
| "loss": 2.5132, | |
| "step": 344500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3660296948034095e-05, | |
| "loss": 2.5165, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3622109186447927e-05, | |
| "loss": 2.5133, | |
| "step": 345500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.358392142486176e-05, | |
| "loss": 2.5142, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3545810038798766e-05, | |
| "loss": 2.5116, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3507698652735773e-05, | |
| "loss": 2.506, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3469587266672776e-05, | |
| "loss": 2.481, | |
| "step": 347500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3431399505086608e-05, | |
| "loss": 2.496, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3393211743500444e-05, | |
| "loss": 2.5162, | |
| "step": 348500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.335502398191428e-05, | |
| "loss": 2.4868, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.331683622032811e-05, | |
| "loss": 2.4927, | |
| "step": 349500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3278648458741944e-05, | |
| "loss": 2.4977, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.324046069715578e-05, | |
| "loss": 2.5081, | |
| "step": 350500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.320227293556961e-05, | |
| "loss": 2.5005, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3164085173983444e-05, | |
| "loss": 2.5109, | |
| "step": 351500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3125897412397276e-05, | |
| "loss": 2.5022, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.308770965081111e-05, | |
| "loss": 2.4936, | |
| "step": 352500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3049521889224943e-05, | |
| "loss": 2.4932, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3011410503161947e-05, | |
| "loss": 2.4868, | |
| "step": 353500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.2973222741575782e-05, | |
| "loss": 2.5014, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.2935034979989615e-05, | |
| "loss": 2.5233, | |
| "step": 354500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2896847218403447e-05, | |
| "loss": 2.4916, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.285865945681728e-05, | |
| "loss": 2.4821, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2820548070754286e-05, | |
| "loss": 2.51, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2782360309168118e-05, | |
| "loss": 2.5103, | |
| "step": 356500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.274417254758195e-05, | |
| "loss": 2.4884, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2705984785995786e-05, | |
| "loss": 2.4913, | |
| "step": 357500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2667797024409618e-05, | |
| "loss": 2.4929, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.262960926282345e-05, | |
| "loss": 2.5079, | |
| "step": 358500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2591421501237282e-05, | |
| "loss": 2.5, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.255331011517429e-05, | |
| "loss": 2.4958, | |
| "step": 359500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.251512235358812e-05, | |
| "loss": 2.5076, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2476934592001957e-05, | |
| "loss": 2.4991, | |
| "step": 360500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.243874683041579e-05, | |
| "loss": 2.4979, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.240055906882962e-05, | |
| "loss": 2.4877, | |
| "step": 361500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2362447682766628e-05, | |
| "loss": 2.5059, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.232425992118046e-05, | |
| "loss": 2.4843, | |
| "step": 362500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2286072159594296e-05, | |
| "loss": 2.4881, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2247884398008128e-05, | |
| "loss": 2.5017, | |
| "step": 363500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2209773011945135e-05, | |
| "loss": 2.4981, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2171585250358967e-05, | |
| "loss": 2.489, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.21333974887728e-05, | |
| "loss": 2.4955, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2095209727186634e-05, | |
| "loss": 2.4974, | |
| "step": 365500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.2057021965600467e-05, | |
| "loss": 2.4976, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.20188342040143e-05, | |
| "loss": 2.4934, | |
| "step": 366500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.198064644242813e-05, | |
| "loss": 2.5053, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.1942535056365138e-05, | |
| "loss": 2.4881, | |
| "step": 367500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.190434729477897e-05, | |
| "loss": 2.501, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.1866159533192802e-05, | |
| "loss": 2.4958, | |
| "step": 368500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.182804814712981e-05, | |
| "loss": 2.4916, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.178986038554364e-05, | |
| "loss": 2.4897, | |
| "step": 369500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.1751672623957477e-05, | |
| "loss": 2.4845, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.171348486237131e-05, | |
| "loss": 2.5041, | |
| "step": 370500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.167529710078514e-05, | |
| "loss": 2.4962, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.1637109339198973e-05, | |
| "loss": 2.4884, | |
| "step": 371500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.159892157761281e-05, | |
| "loss": 2.4941, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.156073381602664e-05, | |
| "loss": 2.5096, | |
| "step": 372500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1522546054440473e-05, | |
| "loss": 2.4854, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.148443466837748e-05, | |
| "loss": 2.4825, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1446246906791312e-05, | |
| "loss": 2.5111, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1408059145205144e-05, | |
| "loss": 2.5055, | |
| "step": 374500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1369871383618976e-05, | |
| "loss": 2.5146, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1331759997555983e-05, | |
| "loss": 2.4959, | |
| "step": 375500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1293572235969815e-05, | |
| "loss": 2.4997, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.125538447438365e-05, | |
| "loss": 2.4885, | |
| "step": 376500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1217196712797483e-05, | |
| "loss": 2.526, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1179008951211315e-05, | |
| "loss": 2.5, | |
| "step": 377500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1140897565148322e-05, | |
| "loss": 2.4943, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1102709803562158e-05, | |
| "loss": 2.4872, | |
| "step": 378500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.106452204197599e-05, | |
| "loss": 2.4927, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1026334280389822e-05, | |
| "loss": 2.5075, | |
| "step": 379500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.0988146518803657e-05, | |
| "loss": 2.5004, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.095003513274066e-05, | |
| "loss": 2.5027, | |
| "step": 380500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0911923746677668e-05, | |
| "loss": 2.4956, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.08737359850915e-05, | |
| "loss": 2.487, | |
| "step": 381500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0835548223505332e-05, | |
| "loss": 2.4883, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0797360461919164e-05, | |
| "loss": 2.4898, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0759172700333e-05, | |
| "loss": 2.4909, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0720984938746832e-05, | |
| "loss": 2.4797, | |
| "step": 383500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0682797177160664e-05, | |
| "loss": 2.4808, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0644609415574496e-05, | |
| "loss": 2.4991, | |
| "step": 384500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0606498029511503e-05, | |
| "loss": 2.4897, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0568310267925335e-05, | |
| "loss": 2.496, | |
| "step": 385500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0530198881862342e-05, | |
| "loss": 2.4874, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0492011120276174e-05, | |
| "loss": 2.4978, | |
| "step": 386500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0453823358690007e-05, | |
| "loss": 2.4873, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0415635597103842e-05, | |
| "loss": 2.501, | |
| "step": 387500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0377447835517674e-05, | |
| "loss": 2.4895, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0339260073931506e-05, | |
| "loss": 2.4798, | |
| "step": 388500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.030107231234534e-05, | |
| "loss": 2.4804, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0262884550759174e-05, | |
| "loss": 2.4919, | |
| "step": 389500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0224696789173006e-05, | |
| "loss": 2.4777, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0186585403110013e-05, | |
| "loss": 2.4773, | |
| "step": 390500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0148474017047017e-05, | |
| "loss": 2.4739, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.011028625546085e-05, | |
| "loss": 2.5024, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0072098493874684e-05, | |
| "loss": 2.4834, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0033910732288516e-05, | |
| "loss": 2.4924, | |
| "step": 392500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.999572297070235e-05, | |
| "loss": 2.4797, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.995753520911618e-05, | |
| "loss": 2.484, | |
| "step": 393500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9919347447530016e-05, | |
| "loss": 2.4977, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9881159685943852e-05, | |
| "loss": 2.4783, | |
| "step": 394500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9843048299880855e-05, | |
| "loss": 2.4962, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9804936913817862e-05, | |
| "loss": 2.4938, | |
| "step": 395500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9766749152231694e-05, | |
| "loss": 2.4844, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9728561390645527e-05, | |
| "loss": 2.4872, | |
| "step": 396500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9690373629059362e-05, | |
| "loss": 2.4955, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9652185867473194e-05, | |
| "loss": 2.4852, | |
| "step": 397500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9613998105887026e-05, | |
| "loss": 2.4799, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.957581034430086e-05, | |
| "loss": 2.4944, | |
| "step": 398500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9537698958237865e-05, | |
| "loss": 2.4988, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9499511196651698e-05, | |
| "loss": 2.4875, | |
| "step": 399500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9461323435065533e-05, | |
| "loss": 2.4724, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9423212049002537e-05, | |
| "loss": 2.4987, | |
| "step": 400500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.938502428741637e-05, | |
| "loss": 2.4957, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9346836525830204e-05, | |
| "loss": 2.4821, | |
| "step": 401500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9308648764244036e-05, | |
| "loss": 2.4774, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.927046100265787e-05, | |
| "loss": 2.4921, | |
| "step": 402500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.92322732410717e-05, | |
| "loss": 2.4885, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9194085479485536e-05, | |
| "loss": 2.4955, | |
| "step": 403500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.915589771789937e-05, | |
| "loss": 2.4887, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.91177099563132e-05, | |
| "loss": 2.4868, | |
| "step": 404500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9079598570250208e-05, | |
| "loss": 2.4786, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.904141080866404e-05, | |
| "loss": 2.4996, | |
| "step": 405500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9003223047077872e-05, | |
| "loss": 2.4757, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.8965035285491707e-05, | |
| "loss": 2.4956, | |
| "step": 406500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.892684752390554e-05, | |
| "loss": 2.4914, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8888736137842543e-05, | |
| "loss": 2.4867, | |
| "step": 407500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.885054837625638e-05, | |
| "loss": 2.4797, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.881236061467021e-05, | |
| "loss": 2.4955, | |
| "step": 408500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8774172853084043e-05, | |
| "loss": 2.4922, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8735985091497875e-05, | |
| "loss": 2.4875, | |
| "step": 409500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.869779732991171e-05, | |
| "loss": 2.4836, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8659609568325546e-05, | |
| "loss": 2.4803, | |
| "step": 410500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8621421806739378e-05, | |
| "loss": 2.4964, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8583310420676385e-05, | |
| "loss": 2.4875, | |
| "step": 411500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8545122659090217e-05, | |
| "loss": 2.4915, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.850693489750405e-05, | |
| "loss": 2.4995, | |
| "step": 412500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.846874713591788e-05, | |
| "loss": 2.4809, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8430559374331717e-05, | |
| "loss": 2.4906, | |
| "step": 413500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.839244798826872e-05, | |
| "loss": 2.4659, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8354336602205728e-05, | |
| "loss": 2.4745, | |
| "step": 414500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.831614884061956e-05, | |
| "loss": 2.4956, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8277961079033392e-05, | |
| "loss": 2.4996, | |
| "step": 415500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8239773317447227e-05, | |
| "loss": 2.4669, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.820158555586106e-05, | |
| "loss": 2.4857, | |
| "step": 416500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8163397794274892e-05, | |
| "loss": 2.4912, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8125210032688724e-05, | |
| "loss": 2.4874, | |
| "step": 417500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.808702227110256e-05, | |
| "loss": 2.4717, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8048910885039563e-05, | |
| "loss": 2.5005, | |
| "step": 418500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.801079949897657e-05, | |
| "loss": 2.485, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7972611737390402e-05, | |
| "loss": 2.4802, | |
| "step": 419500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7934423975804234e-05, | |
| "loss": 2.5059, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7896236214218066e-05, | |
| "loss": 2.481, | |
| "step": 420500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7858048452631902e-05, | |
| "loss": 2.4955, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7819860691045734e-05, | |
| "loss": 2.4833, | |
| "step": 421500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7781672929459566e-05, | |
| "loss": 2.4695, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7743485167873398e-05, | |
| "loss": 2.4649, | |
| "step": 422500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7705373781810405e-05, | |
| "loss": 2.4864, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7667186020224237e-05, | |
| "loss": 2.4922, | |
| "step": 423500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7629074634161244e-05, | |
| "loss": 2.4674, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7590886872575076e-05, | |
| "loss": 2.4778, | |
| "step": 424500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.755269911098891e-05, | |
| "loss": 2.4762, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7514511349402744e-05, | |
| "loss": 2.4852, | |
| "step": 425500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.747632358781658e-05, | |
| "loss": 2.4944, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7438135826230412e-05, | |
| "loss": 2.4754, | |
| "step": 426500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.740002444016742e-05, | |
| "loss": 2.4842, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.736183667858125e-05, | |
| "loss": 2.4783, | |
| "step": 427500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7323648916995083e-05, | |
| "loss": 2.4826, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.728553753093209e-05, | |
| "loss": 2.4984, | |
| "step": 428500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7247349769345922e-05, | |
| "loss": 2.4978, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7209162007759754e-05, | |
| "loss": 2.4852, | |
| "step": 429500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7170974246173586e-05, | |
| "loss": 2.4654, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7132786484587422e-05, | |
| "loss": 2.4961, | |
| "step": 430500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7094675098524425e-05, | |
| "loss": 2.4828, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.705648733693826e-05, | |
| "loss": 2.4824, | |
| "step": 431500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7018299575352093e-05, | |
| "loss": 2.4922, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.6980111813765925e-05, | |
| "loss": 2.4749, | |
| "step": 432500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.6941924052179757e-05, | |
| "loss": 2.4765, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6903736290593593e-05, | |
| "loss": 2.4909, | |
| "step": 433500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6865548529007425e-05, | |
| "loss": 2.4865, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6827360767421257e-05, | |
| "loss": 2.4848, | |
| "step": 434500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.678917300583509e-05, | |
| "loss": 2.4798, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6750985244248925e-05, | |
| "loss": 2.4661, | |
| "step": 435500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6712797482662757e-05, | |
| "loss": 2.456, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.667468609659976e-05, | |
| "loss": 2.4836, | |
| "step": 436500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6636498335013596e-05, | |
| "loss": 2.4701, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6598310573427428e-05, | |
| "loss": 2.4511, | |
| "step": 437500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.656012281184126e-05, | |
| "loss": 2.4736, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.6521935050255092e-05, | |
| "loss": 2.442, | |
| "step": 438500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.64838236641921e-05, | |
| "loss": 2.4498, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.644563590260593e-05, | |
| "loss": 2.4634, | |
| "step": 439500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6407448141019767e-05, | |
| "loss": 2.4437, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.63692603794336e-05, | |
| "loss": 2.4635, | |
| "step": 440500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6331072617847435e-05, | |
| "loss": 2.4361, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6292884856261267e-05, | |
| "loss": 2.4482, | |
| "step": 441500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.62546970946751e-05, | |
| "loss": 2.4501, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6216509333088935e-05, | |
| "loss": 2.448, | |
| "step": 442500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6178321571502767e-05, | |
| "loss": 2.4459, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.61401338099166e-05, | |
| "loss": 2.4403, | |
| "step": 443500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.610194604833043e-05, | |
| "loss": 2.4537, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6063758286744267e-05, | |
| "loss": 2.4677, | |
| "step": 444500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.602564690068127e-05, | |
| "loss": 2.4495, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.5987459139095106e-05, | |
| "loss": 2.4662, | |
| "step": 445500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.594934775303211e-05, | |
| "loss": 2.4608, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.591115999144594e-05, | |
| "loss": 2.462, | |
| "step": 446500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5873048605382948e-05, | |
| "loss": 2.4542, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.583486084379678e-05, | |
| "loss": 2.4337, | |
| "step": 447500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5796673082210612e-05, | |
| "loss": 2.4451, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5758485320624448e-05, | |
| "loss": 2.4314, | |
| "step": 448500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.572029755903828e-05, | |
| "loss": 2.4542, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5682109797452112e-05, | |
| "loss": 2.4507, | |
| "step": 449500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5643922035865944e-05, | |
| "loss": 2.439, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.560573427427978e-05, | |
| "loss": 2.449, | |
| "step": 450500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5567546512693612e-05, | |
| "loss": 2.4579, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.552943512663062e-05, | |
| "loss": 2.4466, | |
| "step": 451500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.549124736504445e-05, | |
| "loss": 2.4515, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5453135978981455e-05, | |
| "loss": 2.4527, | |
| "step": 452500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.541494821739529e-05, | |
| "loss": 2.4501, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5376760455809122e-05, | |
| "loss": 2.4574, | |
| "step": 453500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5338572694222955e-05, | |
| "loss": 2.4663, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.530046130815996e-05, | |
| "loss": 2.4687, | |
| "step": 454500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5262273546573794e-05, | |
| "loss": 2.4509, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5224085784987627e-05, | |
| "loss": 2.4578, | |
| "step": 455500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.518589802340146e-05, | |
| "loss": 2.4494, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5147710261815295e-05, | |
| "loss": 2.4576, | |
| "step": 456500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5109598875752299e-05, | |
| "loss": 2.4476, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.5071411114166134e-05, | |
| "loss": 2.4516, | |
| "step": 457500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.5033223352579966e-05, | |
| "loss": 2.4582, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.49950355909938e-05, | |
| "loss": 2.4447, | |
| "step": 458500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4956847829407632e-05, | |
| "loss": 2.4487, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.491873644334464e-05, | |
| "loss": 2.4449, | |
| "step": 459500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4880548681758471e-05, | |
| "loss": 2.4465, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4842360920172305e-05, | |
| "loss": 2.4439, | |
| "step": 460500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4804173158586137e-05, | |
| "loss": 2.4527, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4766061772523143e-05, | |
| "loss": 2.4518, | |
| "step": 461500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4727874010936976e-05, | |
| "loss": 2.4407, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4689686249350809e-05, | |
| "loss": 2.4485, | |
| "step": 462500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4651498487764642e-05, | |
| "loss": 2.4401, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4613310726178475e-05, | |
| "loss": 2.4549, | |
| "step": 463500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4575122964592308e-05, | |
| "loss": 2.4468, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.453693520300614e-05, | |
| "loss": 2.4626, | |
| "step": 464500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4498747441419974e-05, | |
| "loss": 2.4491, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.446063605535698e-05, | |
| "loss": 2.4539, | |
| "step": 465500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4422524669293985e-05, | |
| "loss": 2.4641, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4384336907707819e-05, | |
| "loss": 2.4342, | |
| "step": 466500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.434614914612165e-05, | |
| "loss": 2.4475, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4307961384535485e-05, | |
| "loss": 2.4415, | |
| "step": 467500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4269773622949317e-05, | |
| "loss": 2.4543, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.423158586136315e-05, | |
| "loss": 2.4339, | |
| "step": 468500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4193398099776983e-05, | |
| "loss": 2.4473, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4155210338190817e-05, | |
| "loss": 2.4478, | |
| "step": 469500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4117022576604649e-05, | |
| "loss": 2.4604, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.4078834815018483e-05, | |
| "loss": 2.4417, | |
| "step": 470500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.4040647053432315e-05, | |
| "loss": 2.4471, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.400245929184615e-05, | |
| "loss": 2.4461, | |
| "step": 471500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.3964271530259984e-05, | |
| "loss": 2.4557, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.392616014419699e-05, | |
| "loss": 2.4534, | |
| "step": 472500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3888048758133995e-05, | |
| "loss": 2.4506, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3849860996547828e-05, | |
| "loss": 2.4578, | |
| "step": 473500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.381167323496166e-05, | |
| "loss": 2.4611, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3773485473375494e-05, | |
| "loss": 2.426, | |
| "step": 474500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3735297711789327e-05, | |
| "loss": 2.4545, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.369710995020316e-05, | |
| "loss": 2.4478, | |
| "step": 475500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3658922188616993e-05, | |
| "loss": 2.4582, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3620810802554e-05, | |
| "loss": 2.4719, | |
| "step": 476500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3582623040967832e-05, | |
| "loss": 2.4513, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3544435279381665e-05, | |
| "loss": 2.4509, | |
| "step": 477500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3506247517795498e-05, | |
| "loss": 2.4464, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3468059756209331e-05, | |
| "loss": 2.4385, | |
| "step": 478500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3429871994623164e-05, | |
| "loss": 2.4486, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3391684233036997e-05, | |
| "loss": 2.4516, | |
| "step": 479500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.335349647145083e-05, | |
| "loss": 2.4542, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3315385085387835e-05, | |
| "loss": 2.4439, | |
| "step": 480500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3277197323801669e-05, | |
| "loss": 2.4697, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.32390095622155e-05, | |
| "loss": 2.4428, | |
| "step": 481500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3200821800629335e-05, | |
| "loss": 2.4372, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.316271041456634e-05, | |
| "loss": 2.4531, | |
| "step": 482500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3124522652980174e-05, | |
| "loss": 2.4374, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.3086334891394006e-05, | |
| "loss": 2.4511, | |
| "step": 483500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.304814712980784e-05, | |
| "loss": 2.4434, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.3009959368221672e-05, | |
| "loss": 2.4376, | |
| "step": 484500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.2971771606635506e-05, | |
| "loss": 2.4383, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.2933660220572511e-05, | |
| "loss": 2.4472, | |
| "step": 485500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2895472458986343e-05, | |
| "loss": 2.4618, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2857284697400177e-05, | |
| "loss": 2.4335, | |
| "step": 486500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2819096935814012e-05, | |
| "loss": 2.4536, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2781061925274187e-05, | |
| "loss": 2.4489, | |
| "step": 487500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2742874163688021e-05, | |
| "loss": 2.4469, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2704686402101857e-05, | |
| "loss": 2.4408, | |
| "step": 488500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2666498640515689e-05, | |
| "loss": 2.4476, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2628310878929523e-05, | |
| "loss": 2.4503, | |
| "step": 489500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2590123117343355e-05, | |
| "loss": 2.4336, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2551935355757189e-05, | |
| "loss": 2.4564, | |
| "step": 490500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2513823969694194e-05, | |
| "loss": 2.431, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2475636208108026e-05, | |
| "loss": 2.4405, | |
| "step": 491500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.243744844652186e-05, | |
| "loss": 2.443, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2399260684935694e-05, | |
| "loss": 2.4474, | |
| "step": 492500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2361072923349526e-05, | |
| "loss": 2.443, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.232288516176336e-05, | |
| "loss": 2.4471, | |
| "step": 493500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2284697400177192e-05, | |
| "loss": 2.4584, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2246509638591026e-05, | |
| "loss": 2.4465, | |
| "step": 494500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2208321877004858e-05, | |
| "loss": 2.4397, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2170134115418692e-05, | |
| "loss": 2.4618, | |
| "step": 495500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2131946353832524e-05, | |
| "loss": 2.4752, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.2093758592246358e-05, | |
| "loss": 2.4567, | |
| "step": 496500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.205557083066019e-05, | |
| "loss": 2.4568, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.2017612195643541e-05, | |
| "loss": 2.4374, | |
| "step": 497500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.1979424434057373e-05, | |
| "loss": 2.443, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.1941236672471207e-05, | |
| "loss": 2.4478, | |
| "step": 498500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.190304891088504e-05, | |
| "loss": 2.4496, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1864861149298873e-05, | |
| "loss": 2.4571, | |
| "step": 499500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1826673387712707e-05, | |
| "loss": 2.4355, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.178848562612654e-05, | |
| "loss": 2.4499, | |
| "step": 500500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1750297864540373e-05, | |
| "loss": 2.4439, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1712110102954207e-05, | |
| "loss": 2.4372, | |
| "step": 501500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1673998716891212e-05, | |
| "loss": 2.4406, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1635810955305044e-05, | |
| "loss": 2.4465, | |
| "step": 502500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1597623193718878e-05, | |
| "loss": 2.4579, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.155943543213271e-05, | |
| "loss": 2.456, | |
| "step": 503500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1521324046069715e-05, | |
| "loss": 2.4521, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.148313628448355e-05, | |
| "loss": 2.4564, | |
| "step": 504500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1445024898420554e-05, | |
| "loss": 2.4622, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1406837136834388e-05, | |
| "loss": 2.4393, | |
| "step": 505500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.136864937524822e-05, | |
| "loss": 2.452, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1330461613662054e-05, | |
| "loss": 2.4385, | |
| "step": 506500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1292273852075886e-05, | |
| "loss": 2.4319, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.125408609048972e-05, | |
| "loss": 2.4473, | |
| "step": 507500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1215898328903554e-05, | |
| "loss": 2.4442, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1177710567317386e-05, | |
| "loss": 2.4359, | |
| "step": 508500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.113952280573122e-05, | |
| "loss": 2.4457, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1101335044145052e-05, | |
| "loss": 2.4606, | |
| "step": 509500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1063223658082059e-05, | |
| "loss": 2.4343, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1025035896495891e-05, | |
| "loss": 2.4368, | |
| "step": 510500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0986848134909725e-05, | |
| "loss": 2.4222, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0948660373323557e-05, | |
| "loss": 2.442, | |
| "step": 511500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0910472611737391e-05, | |
| "loss": 2.451, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0872284850151223e-05, | |
| "loss": 2.4269, | |
| "step": 512500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0834173464088228e-05, | |
| "loss": 2.455, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0795985702502062e-05, | |
| "loss": 2.4436, | |
| "step": 513500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0757874316439068e-05, | |
| "loss": 2.4479, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0719686554852901e-05, | |
| "loss": 2.448, | |
| "step": 514500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0681498793266734e-05, | |
| "loss": 2.4465, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0643311031680567e-05, | |
| "loss": 2.4565, | |
| "step": 515500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0605123270094401e-05, | |
| "loss": 2.4629, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0566935508508233e-05, | |
| "loss": 2.4651, | |
| "step": 516500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0528747746922067e-05, | |
| "loss": 2.4393, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.04905599853359e-05, | |
| "loss": 2.4447, | |
| "step": 517500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0452372223749733e-05, | |
| "loss": 2.4384, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0414260837686738e-05, | |
| "loss": 2.4547, | |
| "step": 518500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0376073076100572e-05, | |
| "loss": 2.4342, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0337885314514404e-05, | |
| "loss": 2.4307, | |
| "step": 519500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0299697552928238e-05, | |
| "loss": 2.4439, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.026150979134207e-05, | |
| "loss": 2.4509, | |
| "step": 520500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0223322029755904e-05, | |
| "loss": 2.4485, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0185134268169736e-05, | |
| "loss": 2.4511, | |
| "step": 521500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0146946506583572e-05, | |
| "loss": 2.4464, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0108835120520577e-05, | |
| "loss": 2.4437, | |
| "step": 522500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.007064735893441e-05, | |
| "loss": 2.4495, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.0032535972871414e-05, | |
| "loss": 2.4382, | |
| "step": 523500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 9.994348211285248e-06, | |
| "loss": 2.443, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 9.956236825222254e-06, | |
| "loss": 2.4442, | |
| "step": 524500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.918049063636087e-06, | |
| "loss": 2.4554, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.87986130204992e-06, | |
| "loss": 2.4317, | |
| "step": 525500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.841673540463753e-06, | |
| "loss": 2.4502, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.803485778877586e-06, | |
| "loss": 2.4515, | |
| "step": 526500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.76529801729142e-06, | |
| "loss": 2.4367, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.727110255705252e-06, | |
| "loss": 2.4285, | |
| "step": 527500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.688922494119085e-06, | |
| "loss": 2.4296, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.650734732532918e-06, | |
| "loss": 2.4533, | |
| "step": 528500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.612623346469923e-06, | |
| "loss": 2.427, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.574435584883757e-06, | |
| "loss": 2.4342, | |
| "step": 529500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.536247823297589e-06, | |
| "loss": 2.4367, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.498060061711424e-06, | |
| "loss": 2.4302, | |
| "step": 530500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.45994867564843e-06, | |
| "loss": 2.4319, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.421760914062262e-06, | |
| "loss": 2.4366, | |
| "step": 531500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.383573152476095e-06, | |
| "loss": 2.457, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.345385390889928e-06, | |
| "loss": 2.4379, | |
| "step": 532500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.307197629303761e-06, | |
| "loss": 2.4566, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.269086243240767e-06, | |
| "loss": 2.4534, | |
| "step": 533500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.2308984816546e-06, | |
| "loss": 2.4507, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.192710720068433e-06, | |
| "loss": 2.4391, | |
| "step": 534500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.154599334005438e-06, | |
| "loss": 2.4498, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.116411572419272e-06, | |
| "loss": 2.4481, | |
| "step": 535500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.078223810833104e-06, | |
| "loss": 2.4372, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.040036049246938e-06, | |
| "loss": 2.4461, | |
| "step": 536500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.00184828766077e-06, | |
| "loss": 2.4341, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 8.963660526074604e-06, | |
| "loss": 2.4365, | |
| "step": 537500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.925472764488438e-06, | |
| "loss": 2.4351, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.887285002902271e-06, | |
| "loss": 2.4408, | |
| "step": 538500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.849173616839277e-06, | |
| "loss": 2.4623, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.810985855253109e-06, | |
| "loss": 2.4498, | |
| "step": 539500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.772798093666943e-06, | |
| "loss": 2.4255, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.734610332080775e-06, | |
| "loss": 2.4218, | |
| "step": 540500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.696422570494609e-06, | |
| "loss": 2.4306, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.658311184431614e-06, | |
| "loss": 2.441, | |
| "step": 541500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.620123422845448e-06, | |
| "loss": 2.4526, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.58193566125928e-06, | |
| "loss": 2.4509, | |
| "step": 542500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.543824275196285e-06, | |
| "loss": 2.4365, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.505636513610119e-06, | |
| "loss": 2.4411, | |
| "step": 543500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.467448752023951e-06, | |
| "loss": 2.4501, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.429260990437785e-06, | |
| "loss": 2.4357, | |
| "step": 544500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.391073228851617e-06, | |
| "loss": 2.4511, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.35288546726545e-06, | |
| "loss": 2.4388, | |
| "step": 545500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.314697705679285e-06, | |
| "loss": 2.4441, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.276509944093119e-06, | |
| "loss": 2.4441, | |
| "step": 546500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.23832218250695e-06, | |
| "loss": 2.4389, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.200134420920785e-06, | |
| "loss": 2.4493, | |
| "step": 547500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.161946659334617e-06, | |
| "loss": 2.46, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.12375889774845e-06, | |
| "loss": 2.4401, | |
| "step": 548500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.085571136162283e-06, | |
| "loss": 2.4234, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.047383374576117e-06, | |
| "loss": 2.432, | |
| "step": 549500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.009195612989949e-06, | |
| "loss": 2.4419, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.971007851403783e-06, | |
| "loss": 2.4433, | |
| "step": 550500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.932896465340788e-06, | |
| "loss": 2.4406, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.89470870375462e-06, | |
| "loss": 2.4506, | |
| "step": 551500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.856520942168455e-06, | |
| "loss": 2.4455, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.818409556105459e-06, | |
| "loss": 2.426, | |
| "step": 552500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.780221794519293e-06, | |
| "loss": 2.4455, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.742034032933127e-06, | |
| "loss": 2.4256, | |
| "step": 553500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.703846271346959e-06, | |
| "loss": 2.4406, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.665734885283966e-06, | |
| "loss": 2.4354, | |
| "step": 554500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.627547123697798e-06, | |
| "loss": 2.4411, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.589359362111631e-06, | |
| "loss": 2.4288, | |
| "step": 555500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.551171600525464e-06, | |
| "loss": 2.4325, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.512983838939297e-06, | |
| "loss": 2.4231, | |
| "step": 556500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.47479607735313e-06, | |
| "loss": 2.4492, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.436608315766963e-06, | |
| "loss": 2.4552, | |
| "step": 557500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.398420554180796e-06, | |
| "loss": 2.4487, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.360385543640974e-06, | |
| "loss": 2.4361, | |
| "step": 558500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.322197782054807e-06, | |
| "loss": 2.4493, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.28401002046864e-06, | |
| "loss": 2.4353, | |
| "step": 559500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.245822258882473e-06, | |
| "loss": 2.4285, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.207634497296306e-06, | |
| "loss": 2.4285, | |
| "step": 560500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.169446735710141e-06, | |
| "loss": 2.4331, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.131258974123974e-06, | |
| "loss": 2.4412, | |
| "step": 561500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.093071212537807e-06, | |
| "loss": 2.4206, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.05488345095164e-06, | |
| "loss": 2.4278, | |
| "step": 562500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.016695689365473e-06, | |
| "loss": 2.4425, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 6.978507927779306e-06, | |
| "loss": 2.4253, | |
| "step": 563500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 6.940320166193139e-06, | |
| "loss": 2.4526, | |
| "step": 564000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.902208780130144e-06, | |
| "loss": 2.4563, | |
| "step": 564500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.864021018543977e-06, | |
| "loss": 2.4494, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.82583325695781e-06, | |
| "loss": 2.4169, | |
| "step": 565500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.787645495371643e-06, | |
| "loss": 2.4331, | |
| "step": 566000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.749457733785476e-06, | |
| "loss": 2.4448, | |
| "step": 566500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.711346347722482e-06, | |
| "loss": 2.4283, | |
| "step": 567000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.673158586136315e-06, | |
| "loss": 2.4315, | |
| "step": 567500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.634970824550149e-06, | |
| "loss": 2.4446, | |
| "step": 568000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.596783062963982e-06, | |
| "loss": 2.4255, | |
| "step": 568500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.558671676900988e-06, | |
| "loss": 2.4402, | |
| "step": 569000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.520560290837993e-06, | |
| "loss": 2.4345, | |
| "step": 569500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.482372529251826e-06, | |
| "loss": 2.4266, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.444184767665659e-06, | |
| "loss": 2.4297, | |
| "step": 570500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.405997006079492e-06, | |
| "loss": 2.4315, | |
| "step": 571000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.367885620016498e-06, | |
| "loss": 2.4302, | |
| "step": 571500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.329697858430331e-06, | |
| "loss": 2.428, | |
| "step": 572000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.291510096844164e-06, | |
| "loss": 2.4299, | |
| "step": 572500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.253322335257997e-06, | |
| "loss": 2.4304, | |
| "step": 573000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.21513457367183e-06, | |
| "loss": 2.452, | |
| "step": 573500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.176946812085663e-06, | |
| "loss": 2.4342, | |
| "step": 574000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.138835426022668e-06, | |
| "loss": 2.4262, | |
| "step": 574500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.100647664436502e-06, | |
| "loss": 2.446, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.062536278373507e-06, | |
| "loss": 2.4221, | |
| "step": 575500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.02434851678734e-06, | |
| "loss": 2.4428, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5.986160755201173e-06, | |
| "loss": 2.4236, | |
| "step": 576500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5.947972993615006e-06, | |
| "loss": 2.4389, | |
| "step": 577000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.909785232028839e-06, | |
| "loss": 2.4393, | |
| "step": 577500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.871597470442672e-06, | |
| "loss": 2.4377, | |
| "step": 578000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.833409708856506e-06, | |
| "loss": 2.4413, | |
| "step": 578500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.795221947270339e-06, | |
| "loss": 2.4346, | |
| "step": 579000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.757034185684172e-06, | |
| "loss": 2.4069, | |
| "step": 579500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.718846424098005e-06, | |
| "loss": 2.4508, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.680658662511838e-06, | |
| "loss": 2.435, | |
| "step": 580500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.642470900925671e-06, | |
| "loss": 2.4577, | |
| "step": 581000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.604283139339505e-06, | |
| "loss": 2.4099, | |
| "step": 581500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.566095377753338e-06, | |
| "loss": 2.436, | |
| "step": 582000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.527983991690343e-06, | |
| "loss": 2.4231, | |
| "step": 582500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.489796230104176e-06, | |
| "loss": 2.4254, | |
| "step": 583000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.451608468518009e-06, | |
| "loss": 2.448, | |
| "step": 583500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.413497082455015e-06, | |
| "loss": 2.4402, | |
| "step": 584000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.375309320868848e-06, | |
| "loss": 2.4291, | |
| "step": 584500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.337121559282681e-06, | |
| "loss": 2.4349, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.298933797696515e-06, | |
| "loss": 2.4311, | |
| "step": 585500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.260746036110348e-06, | |
| "loss": 2.4328, | |
| "step": 586000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.222558274524181e-06, | |
| "loss": 2.4363, | |
| "step": 586500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.184370512938014e-06, | |
| "loss": 2.4224, | |
| "step": 587000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.146182751351847e-06, | |
| "loss": 2.437, | |
| "step": 587500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.10799498976568e-06, | |
| "loss": 2.4392, | |
| "step": 588000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.069883603702685e-06, | |
| "loss": 2.4361, | |
| "step": 588500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.031695842116519e-06, | |
| "loss": 2.4345, | |
| "step": 589000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.993508080530352e-06, | |
| "loss": 2.4301, | |
| "step": 589500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.955320318944185e-06, | |
| "loss": 2.4188, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.917132557358018e-06, | |
| "loss": 2.4286, | |
| "step": 590500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.879021171295023e-06, | |
| "loss": 2.4451, | |
| "step": 591000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.840833409708856e-06, | |
| "loss": 2.4475, | |
| "step": 591500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.802645648122689e-06, | |
| "loss": 2.4494, | |
| "step": 592000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.764457886536523e-06, | |
| "loss": 2.4347, | |
| "step": 592500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.726270124950356e-06, | |
| "loss": 2.4342, | |
| "step": 593000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.688158738887362e-06, | |
| "loss": 2.4301, | |
| "step": 593500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.649970977301195e-06, | |
| "loss": 2.435, | |
| "step": 594000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.611783215715028e-06, | |
| "loss": 2.4211, | |
| "step": 594500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.573595454128861e-06, | |
| "loss": 2.4424, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.535407692542694e-06, | |
| "loss": 2.4131, | |
| "step": 595500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.497219930956527e-06, | |
| "loss": 2.4284, | |
| "step": 596000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.459108544893532e-06, | |
| "loss": 2.4395, | |
| "step": 596500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.420997158830538e-06, | |
| "loss": 2.4276, | |
| "step": 597000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.3828093972443714e-06, | |
| "loss": 2.4365, | |
| "step": 597500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.3446216356582044e-06, | |
| "loss": 2.4223, | |
| "step": 598000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.3064338740720374e-06, | |
| "loss": 2.4116, | |
| "step": 598500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.2682461124858704e-06, | |
| "loss": 2.4536, | |
| "step": 599000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.2300583508997034e-06, | |
| "loss": 2.4433, | |
| "step": 599500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.1918705893135364e-06, | |
| "loss": 2.4403, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.15368282772737e-06, | |
| "loss": 2.4414, | |
| "step": 600500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.115495066141203e-06, | |
| "loss": 2.4472, | |
| "step": 601000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.077383680078209e-06, | |
| "loss": 2.4324, | |
| "step": 601500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.039195918492042e-06, | |
| "loss": 2.4367, | |
| "step": 602000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.001008156905875e-06, | |
| "loss": 2.4215, | |
| "step": 602500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.962820395319708e-06, | |
| "loss": 2.4327, | |
| "step": 603000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.924632633733541e-06, | |
| "loss": 2.4284, | |
| "step": 603500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.886444872147374e-06, | |
| "loss": 2.4257, | |
| "step": 604000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.84833348608438e-06, | |
| "loss": 2.4366, | |
| "step": 604500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.8101457244982134e-06, | |
| "loss": 2.4546, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.7719579629120464e-06, | |
| "loss": 2.4376, | |
| "step": 605500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.7337702013258794e-06, | |
| "loss": 2.4267, | |
| "step": 606000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.6956588152628845e-06, | |
| "loss": 2.4348, | |
| "step": 606500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.6574710536767175e-06, | |
| "loss": 2.4243, | |
| "step": 607000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.6192832920905505e-06, | |
| "loss": 2.4352, | |
| "step": 607500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.5810955305043844e-06, | |
| "loss": 2.424, | |
| "step": 608000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.542984144441389e-06, | |
| "loss": 2.4384, | |
| "step": 608500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.504796382855223e-06, | |
| "loss": 2.4168, | |
| "step": 609000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.466608621269056e-06, | |
| "loss": 2.4274, | |
| "step": 609500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.4284972352060616e-06, | |
| "loss": 2.4322, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.3903094736198946e-06, | |
| "loss": 2.4362, | |
| "step": 610500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.3521217120337276e-06, | |
| "loss": 2.4174, | |
| "step": 611000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.3139339504475606e-06, | |
| "loss": 2.4281, | |
| "step": 611500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.2757461888613936e-06, | |
| "loss": 2.4349, | |
| "step": 612000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.2375584272752275e-06, | |
| "loss": 2.4416, | |
| "step": 612500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.1993706656890605e-06, | |
| "loss": 2.4361, | |
| "step": 613000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.1611829041028935e-06, | |
| "loss": 2.4214, | |
| "step": 613500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.1229951425167265e-06, | |
| "loss": 2.4186, | |
| "step": 614000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.0848073809305595e-06, | |
| "loss": 2.4362, | |
| "step": 614500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.0466196193443925e-06, | |
| "loss": 2.436, | |
| "step": 615000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.0084318577582255e-06, | |
| "loss": 2.4341, | |
| "step": 615500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.970244096172059e-06, | |
| "loss": 2.4275, | |
| "step": 616000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.9321327101090645e-06, | |
| "loss": 2.4447, | |
| "step": 616500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.8939449485228975e-06, | |
| "loss": 2.4238, | |
| "step": 617000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.855833562459903e-06, | |
| "loss": 2.4418, | |
| "step": 617500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.8177221763969087e-06, | |
| "loss": 2.4255, | |
| "step": 618000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.7795344148107417e-06, | |
| "loss": 2.4169, | |
| "step": 618500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.7413466532245747e-06, | |
| "loss": 2.4201, | |
| "step": 619000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.703158891638408e-06, | |
| "loss": 2.4145, | |
| "step": 619500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.664971130052241e-06, | |
| "loss": 2.4464, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.626783368466074e-06, | |
| "loss": 2.4336, | |
| "step": 620500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.588595606879907e-06, | |
| "loss": 2.4021, | |
| "step": 621000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.55040784529374e-06, | |
| "loss": 2.4143, | |
| "step": 621500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.512220083707573e-06, | |
| "loss": 2.4113, | |
| "step": 622000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.4740323221214066e-06, | |
| "loss": 2.4401, | |
| "step": 622500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.435920936058412e-06, | |
| "loss": 2.4253, | |
| "step": 623000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.397733174472245e-06, | |
| "loss": 2.4336, | |
| "step": 623500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.359545412886078e-06, | |
| "loss": 2.4242, | |
| "step": 624000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.3213576512999116e-06, | |
| "loss": 2.4268, | |
| "step": 624500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.2831698897137446e-06, | |
| "loss": 2.4487, | |
| "step": 625000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.2449821281275776e-06, | |
| "loss": 2.4442, | |
| "step": 625500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.2068707420645833e-06, | |
| "loss": 2.4219, | |
| "step": 626000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.1686829804784167e-06, | |
| "loss": 2.438, | |
| "step": 626500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.1304952188922497e-06, | |
| "loss": 2.4211, | |
| "step": 627000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.0923074573060827e-06, | |
| "loss": 2.4224, | |
| "step": 627500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.054196071243088e-06, | |
| "loss": 2.4236, | |
| "step": 628000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.0160083096569213e-06, | |
| "loss": 2.4088, | |
| "step": 628500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.9778205480707543e-06, | |
| "loss": 2.4216, | |
| "step": 629000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.93970916200776e-06, | |
| "loss": 2.434, | |
| "step": 629500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.901521400421593e-06, | |
| "loss": 2.4511, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.863333638835426e-06, | |
| "loss": 2.4358, | |
| "step": 630500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.8251458772492593e-06, | |
| "loss": 2.4467, | |
| "step": 631000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.7869581156630923e-06, | |
| "loss": 2.425, | |
| "step": 631500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.7487703540769253e-06, | |
| "loss": 2.4136, | |
| "step": 632000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.7105825924907588e-06, | |
| "loss": 2.4209, | |
| "step": 632500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.6723948309045918e-06, | |
| "loss": 2.4297, | |
| "step": 633000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.634207069318425e-06, | |
| "loss": 2.4426, | |
| "step": 633500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5960956832554302e-06, | |
| "loss": 2.4172, | |
| "step": 634000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5579079216692636e-06, | |
| "loss": 2.4378, | |
| "step": 634500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.519796535606269e-06, | |
| "loss": 2.4301, | |
| "step": 635000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.4816087740201022e-06, | |
| "loss": 2.4116, | |
| "step": 635500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.4434210124339354e-06, | |
| "loss": 2.4258, | |
| "step": 636000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.4052332508477684e-06, | |
| "loss": 2.4371, | |
| "step": 636500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.3670454892616014e-06, | |
| "loss": 2.4201, | |
| "step": 637000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.3288577276754346e-06, | |
| "loss": 2.4293, | |
| "step": 637500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.2906699660892676e-06, | |
| "loss": 2.4312, | |
| "step": 638000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2524822045031008e-06, | |
| "loss": 2.4226, | |
| "step": 638500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2143708184401065e-06, | |
| "loss": 2.433, | |
| "step": 639000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1762594323771119e-06, | |
| "loss": 2.4163, | |
| "step": 639500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1380716707909449e-06, | |
| "loss": 2.4187, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.099883909204778e-06, | |
| "loss": 2.4133, | |
| "step": 640500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0616961476186113e-06, | |
| "loss": 2.4094, | |
| "step": 641000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0235083860324443e-06, | |
| "loss": 2.4239, | |
| "step": 641500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 9.853206244462775e-07, | |
| "loss": 2.4384, | |
| "step": 642000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 9.471328628601107e-07, | |
| "loss": 2.4324, | |
| "step": 642500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 9.09021476797116e-07, | |
| "loss": 2.4286, | |
| "step": 643000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.708337152109492e-07, | |
| "loss": 2.4441, | |
| "step": 643500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.326459536247824e-07, | |
| "loss": 2.4167, | |
| "step": 644000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 7.944581920386155e-07, | |
| "loss": 2.4394, | |
| "step": 644500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.562704304524486e-07, | |
| "loss": 2.4224, | |
| "step": 645000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.180826688662817e-07, | |
| "loss": 2.4448, | |
| "step": 645500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.79894907280115e-07, | |
| "loss": 2.4181, | |
| "step": 646000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.417071456939481e-07, | |
| "loss": 2.4207, | |
| "step": 646500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.035193841077812e-07, | |
| "loss": 2.4257, | |
| "step": 647000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.653316225216143e-07, | |
| "loss": 2.4369, | |
| "step": 647500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.272202364586198e-07, | |
| "loss": 2.4371, | |
| "step": 648000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.890324748724529e-07, | |
| "loss": 2.4236, | |
| "step": 648500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.5084471328628605e-07, | |
| "loss": 2.43, | |
| "step": 649000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.126569517001191e-07, | |
| "loss": 2.4313, | |
| "step": 649500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.744691901139523e-07, | |
| "loss": 2.4409, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.3635780405095777e-07, | |
| "loss": 2.4284, | |
| "step": 650500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.981700424647909e-07, | |
| "loss": 2.4234, | |
| "step": 651000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.5998228087862404e-07, | |
| "loss": 2.4511, | |
| "step": 651500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.2179451929245717e-07, | |
| "loss": 2.4348, | |
| "step": 652000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.8368313322946263e-07, | |
| "loss": 2.4319, | |
| "step": 652500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.4549537164329576e-07, | |
| "loss": 2.4315, | |
| "step": 653000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.073076100571289e-07, | |
| "loss": 2.4327, | |
| "step": 653500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 6.911984847096204e-08, | |
| "loss": 2.4033, | |
| "step": 654000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 3.093208688479516e-08, | |
| "loss": 2.4401, | |
| "step": 654500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 654660, | |
| "total_flos": 9.727742291283542e+18, | |
| "train_loss": 2.543785205228415, | |
| "train_runtime": 2023217.6227, | |
| "train_samples_per_second": 2.589, | |
| "train_steps_per_second": 0.324 | |
| } | |
| ], | |
| "max_steps": 654660, | |
| "num_train_epochs": 3, | |
| "total_flos": 9.727742291283542e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |