Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use DCAgent/a1-nemo_prism_math with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DCAgent/a1-nemo_prism_math with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DCAgent/a1-nemo_prism_math") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("DCAgent/a1-nemo_prism_math") model = AutoModelForCausalLM.from_pretrained("DCAgent/a1-nemo_prism_math") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use DCAgent/a1-nemo_prism_math with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DCAgent/a1-nemo_prism_math" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-nemo_prism_math", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/DCAgent/a1-nemo_prism_math
- SGLang
How to use DCAgent/a1-nemo_prism_math with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DCAgent/a1-nemo_prism_math" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-nemo_prism_math", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DCAgent/a1-nemo_prism_math" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-nemo_prism_math", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use DCAgent/a1-nemo_prism_math with Docker Model Runner:
docker model run hf.co/DCAgent/a1-nemo_prism_math
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 3850, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00909090909090909, | |
| "grad_norm": 6.339214137826888, | |
| "learning_rate": 4.155844155844156e-07, | |
| "loss": 0.6263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5477997660636902, | |
| "step": 5, | |
| "valid_targets_mean": 2903.2, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 0.01818181818181818, | |
| "grad_norm": 6.691577065976262, | |
| "learning_rate": 9.350649350649352e-07, | |
| "loss": 0.658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5785444378852844, | |
| "step": 10, | |
| "valid_targets_mean": 2720.8, | |
| "valid_targets_min": 1308 | |
| }, | |
| { | |
| "epoch": 0.02727272727272727, | |
| "grad_norm": 6.888217378747393, | |
| "learning_rate": 1.4545454545454546e-06, | |
| "loss": 0.649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6698688268661499, | |
| "step": 15, | |
| "valid_targets_mean": 2414.9, | |
| "valid_targets_min": 1087 | |
| }, | |
| { | |
| "epoch": 0.03636363636363636, | |
| "grad_norm": 6.872383497711192, | |
| "learning_rate": 1.9740259740259743e-06, | |
| "loss": 0.6331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7004647850990295, | |
| "step": 20, | |
| "valid_targets_mean": 2721.0, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 0.045454545454545456, | |
| "grad_norm": 4.103200743218897, | |
| "learning_rate": 2.4935064935064936e-06, | |
| "loss": 0.5927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6205931901931763, | |
| "step": 25, | |
| "valid_targets_mean": 2484.2, | |
| "valid_targets_min": 1271 | |
| }, | |
| { | |
| "epoch": 0.05454545454545454, | |
| "grad_norm": 2.397410279781038, | |
| "learning_rate": 3.0129870129870133e-06, | |
| "loss": 0.5956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5478155612945557, | |
| "step": 30, | |
| "valid_targets_mean": 3119.8, | |
| "valid_targets_min": 1385 | |
| }, | |
| { | |
| "epoch": 0.06363636363636363, | |
| "grad_norm": 1.9320989591881759, | |
| "learning_rate": 3.532467532467533e-06, | |
| "loss": 0.5349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6627036333084106, | |
| "step": 35, | |
| "valid_targets_mean": 3209.0, | |
| "valid_targets_min": 1036 | |
| }, | |
| { | |
| "epoch": 0.07272727272727272, | |
| "grad_norm": 1.2577860380350014, | |
| "learning_rate": 4.051948051948053e-06, | |
| "loss": 0.5456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4294567108154297, | |
| "step": 40, | |
| "valid_targets_mean": 2889.9, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 0.08181818181818182, | |
| "grad_norm": 1.3213819490142387, | |
| "learning_rate": 4.571428571428572e-06, | |
| "loss": 0.512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5345403552055359, | |
| "step": 45, | |
| "valid_targets_mean": 3041.4, | |
| "valid_targets_min": 1188 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 0.9381336982813961, | |
| "learning_rate": 5.090909090909091e-06, | |
| "loss": 0.4759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46422749757766724, | |
| "step": 50, | |
| "valid_targets_mean": 2768.2, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.1483543606640796, | |
| "learning_rate": 5.6103896103896105e-06, | |
| "loss": 0.4981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4253108501434326, | |
| "step": 55, | |
| "valid_targets_mean": 3121.2, | |
| "valid_targets_min": 1242 | |
| }, | |
| { | |
| "epoch": 0.10909090909090909, | |
| "grad_norm": 0.8457378397953323, | |
| "learning_rate": 6.129870129870131e-06, | |
| "loss": 0.466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46757400035858154, | |
| "step": 60, | |
| "valid_targets_mean": 2539.6, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 0.11818181818181818, | |
| "grad_norm": 0.7504641099947844, | |
| "learning_rate": 6.64935064935065e-06, | |
| "loss": 0.4836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48012202978134155, | |
| "step": 65, | |
| "valid_targets_mean": 3173.1, | |
| "valid_targets_min": 1911 | |
| }, | |
| { | |
| "epoch": 0.12727272727272726, | |
| "grad_norm": 0.7849813069658419, | |
| "learning_rate": 7.16883116883117e-06, | |
| "loss": 0.4367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44680148363113403, | |
| "step": 70, | |
| "valid_targets_mean": 1945.4, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 0.13636363636363635, | |
| "grad_norm": 0.8079545894514485, | |
| "learning_rate": 7.68831168831169e-06, | |
| "loss": 0.4871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47567713260650635, | |
| "step": 75, | |
| "valid_targets_mean": 2781.6, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 0.14545454545454545, | |
| "grad_norm": 0.6825461454759197, | |
| "learning_rate": 8.20779220779221e-06, | |
| "loss": 0.4073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39913469552993774, | |
| "step": 80, | |
| "valid_targets_mean": 2483.6, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 0.15454545454545454, | |
| "grad_norm": 0.6259764551440307, | |
| "learning_rate": 8.727272727272728e-06, | |
| "loss": 0.4403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3804532289505005, | |
| "step": 85, | |
| "valid_targets_mean": 3047.2, | |
| "valid_targets_min": 1145 | |
| }, | |
| { | |
| "epoch": 0.16363636363636364, | |
| "grad_norm": 0.643861228084392, | |
| "learning_rate": 9.246753246753248e-06, | |
| "loss": 0.47, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47174426913261414, | |
| "step": 90, | |
| "valid_targets_mean": 3787.8, | |
| "valid_targets_min": 1296 | |
| }, | |
| { | |
| "epoch": 0.17272727272727273, | |
| "grad_norm": 0.5906721673556469, | |
| "learning_rate": 9.766233766233766e-06, | |
| "loss": 0.3948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3424094021320343, | |
| "step": 95, | |
| "valid_targets_mean": 2835.1, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 0.6753239700311199, | |
| "learning_rate": 1.0285714285714285e-05, | |
| "loss": 0.3798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39557331800460815, | |
| "step": 100, | |
| "valid_targets_mean": 2038.4, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 0.19090909090909092, | |
| "grad_norm": 0.6149179303076256, | |
| "learning_rate": 1.0805194805194805e-05, | |
| "loss": 0.3984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37537679076194763, | |
| "step": 105, | |
| "valid_targets_mean": 2919.0, | |
| "valid_targets_min": 1386 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.6013440558412507, | |
| "learning_rate": 1.1324675324675325e-05, | |
| "loss": 0.3929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35833466053009033, | |
| "step": 110, | |
| "valid_targets_mean": 2751.6, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 0.20909090909090908, | |
| "grad_norm": 0.6789497483457126, | |
| "learning_rate": 1.1844155844155845e-05, | |
| "loss": 0.392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38917845487594604, | |
| "step": 115, | |
| "valid_targets_mean": 2357.2, | |
| "valid_targets_min": 953 | |
| }, | |
| { | |
| "epoch": 0.21818181818181817, | |
| "grad_norm": 0.7856489772298153, | |
| "learning_rate": 1.2363636363636364e-05, | |
| "loss": 0.4254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4267646074295044, | |
| "step": 120, | |
| "valid_targets_mean": 2231.1, | |
| "valid_targets_min": 934 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 0.5962614603101419, | |
| "learning_rate": 1.2883116883116884e-05, | |
| "loss": 0.3905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3281465172767639, | |
| "step": 125, | |
| "valid_targets_mean": 2614.0, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 0.23636363636363636, | |
| "grad_norm": 0.6331353154984325, | |
| "learning_rate": 1.3402597402597404e-05, | |
| "loss": 0.4166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3900526762008667, | |
| "step": 130, | |
| "valid_targets_mean": 2835.6, | |
| "valid_targets_min": 1360 | |
| }, | |
| { | |
| "epoch": 0.24545454545454545, | |
| "grad_norm": 0.6153943492748803, | |
| "learning_rate": 1.3922077922077924e-05, | |
| "loss": 0.3767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30019235610961914, | |
| "step": 135, | |
| "valid_targets_mean": 2453.8, | |
| "valid_targets_min": 1125 | |
| }, | |
| { | |
| "epoch": 0.2545454545454545, | |
| "grad_norm": 0.6197478318878169, | |
| "learning_rate": 1.4441558441558442e-05, | |
| "loss": 0.4251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3900189995765686, | |
| "step": 140, | |
| "valid_targets_mean": 2988.1, | |
| "valid_targets_min": 1448 | |
| }, | |
| { | |
| "epoch": 0.2636363636363636, | |
| "grad_norm": 0.6848995712566689, | |
| "learning_rate": 1.4961038961038962e-05, | |
| "loss": 0.4072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4560791552066803, | |
| "step": 145, | |
| "valid_targets_mean": 2818.6, | |
| "valid_targets_min": 1274 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 0.6611848899773384, | |
| "learning_rate": 1.548051948051948e-05, | |
| "loss": 0.3951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43154481053352356, | |
| "step": 150, | |
| "valid_targets_mean": 3036.3, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 0.2818181818181818, | |
| "grad_norm": 0.6630744668701863, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.39, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44840043783187866, | |
| "step": 155, | |
| "valid_targets_mean": 3600.2, | |
| "valid_targets_min": 1235 | |
| }, | |
| { | |
| "epoch": 0.2909090909090909, | |
| "grad_norm": 0.6582951442716148, | |
| "learning_rate": 1.651948051948052e-05, | |
| "loss": 0.4335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3294903635978699, | |
| "step": 160, | |
| "valid_targets_mean": 2506.0, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.6563223575769623, | |
| "learning_rate": 1.703896103896104e-05, | |
| "loss": 0.3676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41344767808914185, | |
| "step": 165, | |
| "valid_targets_mean": 3668.6, | |
| "valid_targets_min": 1414 | |
| }, | |
| { | |
| "epoch": 0.3090909090909091, | |
| "grad_norm": 0.6459971009182044, | |
| "learning_rate": 1.7558441558441558e-05, | |
| "loss": 0.349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3544159531593323, | |
| "step": 170, | |
| "valid_targets_mean": 2733.3, | |
| "valid_targets_min": 884 | |
| }, | |
| { | |
| "epoch": 0.3181818181818182, | |
| "grad_norm": 0.5674634529192868, | |
| "learning_rate": 1.807792207792208e-05, | |
| "loss": 0.3579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2982725501060486, | |
| "step": 175, | |
| "valid_targets_mean": 2959.8, | |
| "valid_targets_min": 964 | |
| }, | |
| { | |
| "epoch": 0.32727272727272727, | |
| "grad_norm": 0.6303381761627781, | |
| "learning_rate": 1.8597402597402598e-05, | |
| "loss": 0.3505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3669891953468323, | |
| "step": 180, | |
| "valid_targets_mean": 2843.2, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 0.33636363636363636, | |
| "grad_norm": 0.7564327052685856, | |
| "learning_rate": 1.9116883116883117e-05, | |
| "loss": 0.3954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4271281957626343, | |
| "step": 185, | |
| "valid_targets_mean": 2196.0, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 0.34545454545454546, | |
| "grad_norm": 0.5774191959868498, | |
| "learning_rate": 1.963636363636364e-05, | |
| "loss": 0.3696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2974555492401123, | |
| "step": 190, | |
| "valid_targets_mean": 2704.4, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 0.35454545454545455, | |
| "grad_norm": 0.5845657238298496, | |
| "learning_rate": 2.0155844155844157e-05, | |
| "loss": 0.3301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2901238799095154, | |
| "step": 195, | |
| "valid_targets_mean": 2632.3, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.6635921725960787, | |
| "learning_rate": 2.0675324675324675e-05, | |
| "loss": 0.3493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37927788496017456, | |
| "step": 200, | |
| "valid_targets_mean": 2613.2, | |
| "valid_targets_min": 818 | |
| }, | |
| { | |
| "epoch": 0.37272727272727274, | |
| "grad_norm": 1.0884508967232478, | |
| "learning_rate": 2.1194805194805194e-05, | |
| "loss": 0.3386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3851262032985687, | |
| "step": 205, | |
| "valid_targets_mean": 2818.7, | |
| "valid_targets_min": 1403 | |
| }, | |
| { | |
| "epoch": 0.38181818181818183, | |
| "grad_norm": 0.668205633210714, | |
| "learning_rate": 2.1714285714285715e-05, | |
| "loss": 0.3898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38827866315841675, | |
| "step": 210, | |
| "valid_targets_mean": 3074.4, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 0.39090909090909093, | |
| "grad_norm": 0.6948506532864128, | |
| "learning_rate": 2.2233766233766234e-05, | |
| "loss": 0.3548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3508937358856201, | |
| "step": 215, | |
| "valid_targets_mean": 2631.0, | |
| "valid_targets_min": 1329 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.6483556182899709, | |
| "learning_rate": 2.2753246753246752e-05, | |
| "loss": 0.3389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36243948340415955, | |
| "step": 220, | |
| "valid_targets_mean": 2725.5, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 0.4090909090909091, | |
| "grad_norm": 0.6208051012663126, | |
| "learning_rate": 2.3272727272727274e-05, | |
| "loss": 0.3561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3889102637767792, | |
| "step": 225, | |
| "valid_targets_mean": 3122.2, | |
| "valid_targets_min": 1287 | |
| }, | |
| { | |
| "epoch": 0.41818181818181815, | |
| "grad_norm": 0.6336603399119323, | |
| "learning_rate": 2.3792207792207793e-05, | |
| "loss": 0.3457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31206339597702026, | |
| "step": 230, | |
| "valid_targets_mean": 2648.8, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 0.42727272727272725, | |
| "grad_norm": 0.9001451266995485, | |
| "learning_rate": 2.4311688311688314e-05, | |
| "loss": 0.3625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4581303298473358, | |
| "step": 235, | |
| "valid_targets_mean": 2362.3, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 0.43636363636363634, | |
| "grad_norm": 0.6370411644388215, | |
| "learning_rate": 2.4831168831168833e-05, | |
| "loss": 0.3745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46398934721946716, | |
| "step": 240, | |
| "valid_targets_mean": 3442.1, | |
| "valid_targets_min": 1364 | |
| }, | |
| { | |
| "epoch": 0.44545454545454544, | |
| "grad_norm": 0.6449951274638801, | |
| "learning_rate": 2.535064935064935e-05, | |
| "loss": 0.337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3919605612754822, | |
| "step": 245, | |
| "valid_targets_mean": 2908.2, | |
| "valid_targets_min": 1500 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 0.6521200383438808, | |
| "learning_rate": 2.5870129870129873e-05, | |
| "loss": 0.3661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4429694414138794, | |
| "step": 250, | |
| "valid_targets_mean": 3033.2, | |
| "valid_targets_min": 992 | |
| }, | |
| { | |
| "epoch": 0.4636363636363636, | |
| "grad_norm": 0.6773464252143542, | |
| "learning_rate": 2.638961038961039e-05, | |
| "loss": 0.3656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.367068886756897, | |
| "step": 255, | |
| "valid_targets_mean": 3075.7, | |
| "valid_targets_min": 1082 | |
| }, | |
| { | |
| "epoch": 0.4727272727272727, | |
| "grad_norm": 1.4569160111773742, | |
| "learning_rate": 2.690909090909091e-05, | |
| "loss": 0.3427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40190428495407104, | |
| "step": 260, | |
| "valid_targets_mean": 3337.8, | |
| "valid_targets_min": 1620 | |
| }, | |
| { | |
| "epoch": 0.4818181818181818, | |
| "grad_norm": 0.6924018302373937, | |
| "learning_rate": 2.742857142857143e-05, | |
| "loss": 0.3483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31538522243499756, | |
| "step": 265, | |
| "valid_targets_mean": 2467.6, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 0.4909090909090909, | |
| "grad_norm": 0.6869692427452243, | |
| "learning_rate": 2.794805194805195e-05, | |
| "loss": 0.3196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3204636573791504, | |
| "step": 270, | |
| "valid_targets_mean": 2205.6, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.6370154405500698, | |
| "learning_rate": 2.8467532467532472e-05, | |
| "loss": 0.3277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3479101359844208, | |
| "step": 275, | |
| "valid_targets_mean": 3041.8, | |
| "valid_targets_min": 1407 | |
| }, | |
| { | |
| "epoch": 0.509090909090909, | |
| "grad_norm": 0.6425903782342028, | |
| "learning_rate": 2.898701298701299e-05, | |
| "loss": 0.3708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37169209122657776, | |
| "step": 280, | |
| "valid_targets_mean": 2951.7, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 0.5181818181818182, | |
| "grad_norm": 0.6611801390841658, | |
| "learning_rate": 2.950649350649351e-05, | |
| "loss": 0.3314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3530304431915283, | |
| "step": 285, | |
| "valid_targets_mean": 2644.7, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 0.5272727272727272, | |
| "grad_norm": 0.6357035859012629, | |
| "learning_rate": 3.002597402597403e-05, | |
| "loss": 0.3537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3512324094772339, | |
| "step": 290, | |
| "valid_targets_mean": 2653.9, | |
| "valid_targets_min": 1441 | |
| }, | |
| { | |
| "epoch": 0.5363636363636364, | |
| "grad_norm": 0.6939851083287091, | |
| "learning_rate": 3.054545454545455e-05, | |
| "loss": 0.3769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42176488041877747, | |
| "step": 295, | |
| "valid_targets_mean": 2781.9, | |
| "valid_targets_min": 922 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.9499594024592395, | |
| "learning_rate": 3.106493506493507e-05, | |
| "loss": 0.355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4803914427757263, | |
| "step": 300, | |
| "valid_targets_mean": 2959.4, | |
| "valid_targets_min": 1322 | |
| }, | |
| { | |
| "epoch": 0.5545454545454546, | |
| "grad_norm": 0.6488791936318495, | |
| "learning_rate": 3.158441558441559e-05, | |
| "loss": 0.3629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35879287123680115, | |
| "step": 305, | |
| "valid_targets_mean": 2696.9, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 0.5636363636363636, | |
| "grad_norm": 0.6231805329329607, | |
| "learning_rate": 3.210389610389611e-05, | |
| "loss": 0.3662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3448548913002014, | |
| "step": 310, | |
| "valid_targets_mean": 2770.7, | |
| "valid_targets_min": 1559 | |
| }, | |
| { | |
| "epoch": 0.5727272727272728, | |
| "grad_norm": 0.5744702058158829, | |
| "learning_rate": 3.2623376623376626e-05, | |
| "loss": 0.3682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34380805492401123, | |
| "step": 315, | |
| "valid_targets_mean": 2960.0, | |
| "valid_targets_min": 1214 | |
| }, | |
| { | |
| "epoch": 0.5818181818181818, | |
| "grad_norm": 0.6620931219456028, | |
| "learning_rate": 3.314285714285715e-05, | |
| "loss": 0.3326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34156909584999084, | |
| "step": 320, | |
| "valid_targets_mean": 2785.3, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 0.5909090909090909, | |
| "grad_norm": 0.6403063875961951, | |
| "learning_rate": 3.366233766233766e-05, | |
| "loss": 0.3039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3152548670768738, | |
| "step": 325, | |
| "valid_targets_mean": 3012.6, | |
| "valid_targets_min": 1198 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.6672907050177364, | |
| "learning_rate": 3.4181818181818185e-05, | |
| "loss": 0.3384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3686109781265259, | |
| "step": 330, | |
| "valid_targets_mean": 2802.0, | |
| "valid_targets_min": 1371 | |
| }, | |
| { | |
| "epoch": 0.6090909090909091, | |
| "grad_norm": 0.7499566408657522, | |
| "learning_rate": 3.47012987012987e-05, | |
| "loss": 0.3784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.412411630153656, | |
| "step": 335, | |
| "valid_targets_mean": 2592.4, | |
| "valid_targets_min": 1169 | |
| }, | |
| { | |
| "epoch": 0.6181818181818182, | |
| "grad_norm": 0.6208601795738106, | |
| "learning_rate": 3.522077922077922e-05, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2600933313369751, | |
| "step": 340, | |
| "valid_targets_mean": 2492.6, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 0.6272727272727273, | |
| "grad_norm": 0.5997046648171032, | |
| "learning_rate": 3.5740259740259743e-05, | |
| "loss": 0.3524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3274972438812256, | |
| "step": 345, | |
| "valid_targets_mean": 2641.0, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 0.5773178226077512, | |
| "learning_rate": 3.625974025974026e-05, | |
| "loss": 0.3517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35591813921928406, | |
| "step": 350, | |
| "valid_targets_mean": 3244.9, | |
| "valid_targets_min": 1371 | |
| }, | |
| { | |
| "epoch": 0.6454545454545455, | |
| "grad_norm": 0.6612206892362726, | |
| "learning_rate": 3.677922077922078e-05, | |
| "loss": 0.3378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3410378098487854, | |
| "step": 355, | |
| "valid_targets_mean": 2283.9, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 0.6545454545454545, | |
| "grad_norm": 0.634520882439268, | |
| "learning_rate": 3.72987012987013e-05, | |
| "loss": 0.315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3190503716468811, | |
| "step": 360, | |
| "valid_targets_mean": 2368.0, | |
| "valid_targets_min": 1201 | |
| }, | |
| { | |
| "epoch": 0.6636363636363637, | |
| "grad_norm": 0.7186007261011443, | |
| "learning_rate": 3.7818181818181824e-05, | |
| "loss": 0.3663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43607550859451294, | |
| "step": 365, | |
| "valid_targets_mean": 2984.2, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 0.6727272727272727, | |
| "grad_norm": 0.6626084696102383, | |
| "learning_rate": 3.833766233766234e-05, | |
| "loss": 0.3417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28545016050338745, | |
| "step": 370, | |
| "valid_targets_mean": 2105.5, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 0.7422089667942458, | |
| "learning_rate": 3.885714285714286e-05, | |
| "loss": 0.3501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.337421715259552, | |
| "step": 375, | |
| "valid_targets_mean": 2343.6, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 0.6909090909090909, | |
| "grad_norm": 0.6377484020727952, | |
| "learning_rate": 3.937662337662338e-05, | |
| "loss": 0.4028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39789557456970215, | |
| "step": 380, | |
| "valid_targets_mean": 2773.6, | |
| "valid_targets_min": 1463 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.2550270765338776, | |
| "learning_rate": 3.98961038961039e-05, | |
| "loss": 0.3573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3744387626647949, | |
| "step": 385, | |
| "valid_targets_mean": 2553.1, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 0.7090909090909091, | |
| "grad_norm": 0.6439863150267273, | |
| "learning_rate": 3.999986847364818e-05, | |
| "loss": 0.3513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32510316371917725, | |
| "step": 390, | |
| "valid_targets_mean": 2936.0, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 0.7181818181818181, | |
| "grad_norm": 0.5549572539070133, | |
| "learning_rate": 3.999933415080877e-05, | |
| "loss": 0.3555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34088924527168274, | |
| "step": 395, | |
| "valid_targets_mean": 2975.9, | |
| "valid_targets_min": 1628 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.5728271061572586, | |
| "learning_rate": 3.999838882205719e-05, | |
| "loss": 0.3517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3326980471611023, | |
| "step": 400, | |
| "valid_targets_mean": 3024.2, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 0.7363636363636363, | |
| "grad_norm": 0.5933107195266674, | |
| "learning_rate": 3.999703250682087e-05, | |
| "loss": 0.3487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3289388120174408, | |
| "step": 405, | |
| "valid_targets_mean": 3081.2, | |
| "valid_targets_min": 946 | |
| }, | |
| { | |
| "epoch": 0.7454545454545455, | |
| "grad_norm": 0.5704829683945404, | |
| "learning_rate": 3.9995265232973414e-05, | |
| "loss": 0.3492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33433687686920166, | |
| "step": 410, | |
| "valid_targets_mean": 2857.6, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 0.7545454545454545, | |
| "grad_norm": 0.6306739160212078, | |
| "learning_rate": 3.9993087036834034e-05, | |
| "loss": 0.3117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3217725455760956, | |
| "step": 415, | |
| "valid_targets_mean": 2499.9, | |
| "valid_targets_min": 1433 | |
| }, | |
| { | |
| "epoch": 0.7636363636363637, | |
| "grad_norm": 1.5946938923112275, | |
| "learning_rate": 3.9990497963166797e-05, | |
| "loss": 0.3382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2770576477050781, | |
| "step": 420, | |
| "valid_targets_mean": 2605.4, | |
| "valid_targets_min": 962 | |
| }, | |
| { | |
| "epoch": 0.7727272727272727, | |
| "grad_norm": 0.600595195060008, | |
| "learning_rate": 3.99874980651797e-05, | |
| "loss": 0.3466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37298667430877686, | |
| "step": 425, | |
| "valid_targets_mean": 2681.2, | |
| "valid_targets_min": 856 | |
| }, | |
| { | |
| "epoch": 0.7818181818181819, | |
| "grad_norm": 1.27833536268593, | |
| "learning_rate": 3.998408740452359e-05, | |
| "loss": 0.3358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2970236539840698, | |
| "step": 430, | |
| "valid_targets_mean": 2670.9, | |
| "valid_targets_min": 1294 | |
| }, | |
| { | |
| "epoch": 0.7909090909090909, | |
| "grad_norm": 0.556446397533044, | |
| "learning_rate": 3.998026605129088e-05, | |
| "loss": 0.3138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32350656390190125, | |
| "step": 435, | |
| "valid_targets_mean": 2978.1, | |
| "valid_targets_min": 805 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.5867403646744739, | |
| "learning_rate": 3.997603408401413e-05, | |
| "loss": 0.3452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3522062301635742, | |
| "step": 440, | |
| "valid_targets_mean": 3308.9, | |
| "valid_targets_min": 703 | |
| }, | |
| { | |
| "epoch": 0.8090909090909091, | |
| "grad_norm": 0.5864500142871352, | |
| "learning_rate": 3.997139158966441e-05, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30179262161254883, | |
| "step": 445, | |
| "valid_targets_mean": 2586.1, | |
| "valid_targets_min": 1202 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 0.7051801097423364, | |
| "learning_rate": 3.996633866364953e-05, | |
| "loss": 0.3519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41929012537002563, | |
| "step": 450, | |
| "valid_targets_mean": 2292.3, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 0.8272727272727273, | |
| "grad_norm": 0.6665108592747994, | |
| "learning_rate": 3.996087540981206e-05, | |
| "loss": 0.3488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42378395795822144, | |
| "step": 455, | |
| "valid_targets_mean": 2839.5, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 0.8363636363636363, | |
| "grad_norm": 0.5610962836076394, | |
| "learning_rate": 3.9955001940427236e-05, | |
| "loss": 0.3157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3496928811073303, | |
| "step": 460, | |
| "valid_targets_mean": 3111.1, | |
| "valid_targets_min": 1193 | |
| }, | |
| { | |
| "epoch": 0.8454545454545455, | |
| "grad_norm": 0.6683085302834227, | |
| "learning_rate": 3.99487183762006e-05, | |
| "loss": 0.3339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3613266050815582, | |
| "step": 465, | |
| "valid_targets_mean": 2604.9, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 0.8545454545454545, | |
| "grad_norm": 0.6083634878868931, | |
| "learning_rate": 3.994202484626555e-05, | |
| "loss": 0.3391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32465070486068726, | |
| "step": 470, | |
| "valid_targets_mean": 2452.5, | |
| "valid_targets_min": 1520 | |
| }, | |
| { | |
| "epoch": 0.8636363636363636, | |
| "grad_norm": 0.612777955562894, | |
| "learning_rate": 3.993492148818069e-05, | |
| "loss": 0.3187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2962433695793152, | |
| "step": 475, | |
| "valid_targets_mean": 2897.3, | |
| "valid_targets_min": 1415 | |
| }, | |
| { | |
| "epoch": 0.8727272727272727, | |
| "grad_norm": 0.5845598358859424, | |
| "learning_rate": 3.992740844792699e-05, | |
| "loss": 0.316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2899693250656128, | |
| "step": 480, | |
| "valid_targets_mean": 2515.9, | |
| "valid_targets_min": 883 | |
| }, | |
| { | |
| "epoch": 0.8818181818181818, | |
| "grad_norm": 0.5412938052268608, | |
| "learning_rate": 3.991948587990479e-05, | |
| "loss": 0.3055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2653069496154785, | |
| "step": 485, | |
| "valid_targets_mean": 3022.9, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 0.8909090909090909, | |
| "grad_norm": 0.6492200255248272, | |
| "learning_rate": 3.991115394693061e-05, | |
| "loss": 0.3303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2680763602256775, | |
| "step": 490, | |
| "valid_targets_mean": 2127.9, | |
| "valid_targets_min": 1310 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.5141368409068433, | |
| "learning_rate": 3.990241282023385e-05, | |
| "loss": 0.3307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.286845326423645, | |
| "step": 495, | |
| "valid_targets_mean": 3001.3, | |
| "valid_targets_min": 1225 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.574684130591376, | |
| "learning_rate": 3.989326267945323e-05, | |
| "loss": 0.3325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29765868186950684, | |
| "step": 500, | |
| "valid_targets_mean": 2777.2, | |
| "valid_targets_min": 1084 | |
| }, | |
| { | |
| "epoch": 0.9181818181818182, | |
| "grad_norm": 0.5692282689065302, | |
| "learning_rate": 3.98837037126331e-05, | |
| "loss": 0.344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3814631700515747, | |
| "step": 505, | |
| "valid_targets_mean": 3176.1, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 0.9272727272727272, | |
| "grad_norm": 0.5977294617991848, | |
| "learning_rate": 3.98737361162196e-05, | |
| "loss": 0.3378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3976469933986664, | |
| "step": 510, | |
| "valid_targets_mean": 3157.1, | |
| "valid_targets_min": 1135 | |
| }, | |
| { | |
| "epoch": 0.9363636363636364, | |
| "grad_norm": 0.6220654599149978, | |
| "learning_rate": 3.986336009505659e-05, | |
| "loss": 0.337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3083745837211609, | |
| "step": 515, | |
| "valid_targets_mean": 2279.6, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 0.9454545454545454, | |
| "grad_norm": 0.546118021121679, | |
| "learning_rate": 3.985257586238149e-05, | |
| "loss": 0.3207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28898388147354126, | |
| "step": 520, | |
| "valid_targets_mean": 2713.4, | |
| "valid_targets_min": 1345 | |
| }, | |
| { | |
| "epoch": 0.9545454545454546, | |
| "grad_norm": 0.5482830720326405, | |
| "learning_rate": 3.984138363982084e-05, | |
| "loss": 0.3378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27064448595046997, | |
| "step": 525, | |
| "valid_targets_mean": 2486.1, | |
| "valid_targets_min": 705 | |
| }, | |
| { | |
| "epoch": 0.9636363636363636, | |
| "grad_norm": 0.6226203497672143, | |
| "learning_rate": 3.982978365738578e-05, | |
| "loss": 0.3361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3750750422477722, | |
| "step": 530, | |
| "valid_targets_mean": 2812.8, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 0.9727272727272728, | |
| "grad_norm": 0.5612366252493312, | |
| "learning_rate": 3.981777615346731e-05, | |
| "loss": 0.3232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3804115653038025, | |
| "step": 535, | |
| "valid_targets_mean": 2720.2, | |
| "valid_targets_min": 927 | |
| }, | |
| { | |
| "epoch": 0.9818181818181818, | |
| "grad_norm": 0.6294458190395656, | |
| "learning_rate": 3.980536137483141e-05, | |
| "loss": 0.3434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4118061363697052, | |
| "step": 540, | |
| "valid_targets_mean": 2462.1, | |
| "valid_targets_min": 1454 | |
| }, | |
| { | |
| "epoch": 0.990909090909091, | |
| "grad_norm": 0.6220192316895428, | |
| "learning_rate": 3.9792539576613934e-05, | |
| "loss": 0.323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32386863231658936, | |
| "step": 545, | |
| "valid_targets_mean": 2925.1, | |
| "valid_targets_min": 531 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5842866957763833, | |
| "learning_rate": 3.9779311022315405e-05, | |
| "loss": 0.2908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2880892753601074, | |
| "step": 550, | |
| "valid_targets_mean": 2579.1, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 1.009090909090909, | |
| "grad_norm": 0.6447275138647391, | |
| "learning_rate": 3.976567598379558e-05, | |
| "loss": 0.3405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3465537428855896, | |
| "step": 555, | |
| "valid_targets_mean": 2912.1, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 1.018181818181818, | |
| "grad_norm": 0.5563184580746852, | |
| "learning_rate": 3.975163474126785e-05, | |
| "loss": 0.3241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34276115894317627, | |
| "step": 560, | |
| "valid_targets_mean": 3331.6, | |
| "valid_targets_min": 1656 | |
| }, | |
| { | |
| "epoch": 1.0272727272727273, | |
| "grad_norm": 0.6091974583109037, | |
| "learning_rate": 3.9737187583293505e-05, | |
| "loss": 0.3216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28951388597488403, | |
| "step": 565, | |
| "valid_targets_mean": 2659.5, | |
| "valid_targets_min": 1432 | |
| }, | |
| { | |
| "epoch": 1.0363636363636364, | |
| "grad_norm": 0.5131660550916598, | |
| "learning_rate": 3.9722334806775806e-05, | |
| "loss": 0.3127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32671698927879333, | |
| "step": 570, | |
| "valid_targets_mean": 3052.7, | |
| "valid_targets_min": 1228 | |
| }, | |
| { | |
| "epoch": 1.0454545454545454, | |
| "grad_norm": 0.5675543992731116, | |
| "learning_rate": 3.9707076716953866e-05, | |
| "loss": 0.2979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3123570680618286, | |
| "step": 575, | |
| "valid_targets_mean": 2878.4, | |
| "valid_targets_min": 1736 | |
| }, | |
| { | |
| "epoch": 1.0545454545454545, | |
| "grad_norm": 0.6335186826998919, | |
| "learning_rate": 3.969141362739636e-05, | |
| "loss": 0.2983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32032886147499084, | |
| "step": 580, | |
| "valid_targets_mean": 2756.3, | |
| "valid_targets_min": 1023 | |
| }, | |
| { | |
| "epoch": 1.0636363636363637, | |
| "grad_norm": 0.5740150543951968, | |
| "learning_rate": 3.967534585999515e-05, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25155892968177795, | |
| "step": 585, | |
| "valid_targets_mean": 2694.0, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 1.0727272727272728, | |
| "grad_norm": 0.588406146061277, | |
| "learning_rate": 3.965887374495859e-05, | |
| "loss": 0.3043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2900567352771759, | |
| "step": 590, | |
| "valid_targets_mean": 2990.4, | |
| "valid_targets_min": 947 | |
| }, | |
| { | |
| "epoch": 1.0818181818181818, | |
| "grad_norm": 0.5601881961473839, | |
| "learning_rate": 3.964199762080478e-05, | |
| "loss": 0.3065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3262474238872528, | |
| "step": 595, | |
| "valid_targets_mean": 3018.2, | |
| "valid_targets_min": 1111 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 0.5520777767766365, | |
| "learning_rate": 3.9624717834354606e-05, | |
| "loss": 0.2908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2716611921787262, | |
| "step": 600, | |
| "valid_targets_mean": 2441.5, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 0.5612527579381237, | |
| "learning_rate": 3.9607034740724615e-05, | |
| "loss": 0.2902, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31123465299606323, | |
| "step": 605, | |
| "valid_targets_mean": 2829.6, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 1.1090909090909091, | |
| "grad_norm": 0.5797580457154696, | |
| "learning_rate": 3.958894870331971e-05, | |
| "loss": 0.2809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26564833521842957, | |
| "step": 610, | |
| "valid_targets_mean": 2660.3, | |
| "valid_targets_min": 1203 | |
| }, | |
| { | |
| "epoch": 1.1181818181818182, | |
| "grad_norm": 2.3781597689886103, | |
| "learning_rate": 3.9570460093825664e-05, | |
| "loss": 0.347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3429916501045227, | |
| "step": 615, | |
| "valid_targets_mean": 3107.9, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 1.1272727272727272, | |
| "grad_norm": 0.552110516149831, | |
| "learning_rate": 3.9551569292201536e-05, | |
| "loss": 0.2715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30601733922958374, | |
| "step": 620, | |
| "valid_targets_mean": 2663.9, | |
| "valid_targets_min": 1271 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "grad_norm": 0.6417975384157059, | |
| "learning_rate": 3.9532276686671804e-05, | |
| "loss": 0.3171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33833664655685425, | |
| "step": 625, | |
| "valid_targets_mean": 3195.1, | |
| "valid_targets_min": 1222 | |
| }, | |
| { | |
| "epoch": 1.1454545454545455, | |
| "grad_norm": 0.6137360698285799, | |
| "learning_rate": 3.951258267371841e-05, | |
| "loss": 0.2971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3272460401058197, | |
| "step": 630, | |
| "valid_targets_mean": 2482.5, | |
| "valid_targets_min": 1562 | |
| }, | |
| { | |
| "epoch": 1.1545454545454545, | |
| "grad_norm": 0.5820527391442863, | |
| "learning_rate": 3.9492487658072615e-05, | |
| "loss": 0.2888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29666125774383545, | |
| "step": 635, | |
| "valid_targets_mean": 2572.6, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 1.1636363636363636, | |
| "grad_norm": 0.5678713238695126, | |
| "learning_rate": 3.947199205270668e-05, | |
| "loss": 0.3227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34240779280662537, | |
| "step": 640, | |
| "valid_targets_mean": 3045.4, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 1.1727272727272728, | |
| "grad_norm": 0.5266377557583839, | |
| "learning_rate": 3.9451096278825386e-05, | |
| "loss": 0.3286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36640381813049316, | |
| "step": 645, | |
| "valid_targets_mean": 3615.8, | |
| "valid_targets_min": 1346 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 0.5322441323754087, | |
| "learning_rate": 3.942980076585735e-05, | |
| "loss": 0.2964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25494956970214844, | |
| "step": 650, | |
| "valid_targets_mean": 2572.3, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 1.190909090909091, | |
| "grad_norm": 0.5414703426964603, | |
| "learning_rate": 3.940810595144624e-05, | |
| "loss": 0.2882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28678566217422485, | |
| "step": 655, | |
| "valid_targets_mean": 2838.4, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.9276630436508214, | |
| "learning_rate": 3.938601228144173e-05, | |
| "loss": 0.284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2708185613155365, | |
| "step": 660, | |
| "valid_targets_mean": 3114.6, | |
| "valid_targets_min": 1752 | |
| }, | |
| { | |
| "epoch": 1.209090909090909, | |
| "grad_norm": 0.5900825119721383, | |
| "learning_rate": 3.9363520209890405e-05, | |
| "loss": 0.3043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3425714671611786, | |
| "step": 665, | |
| "valid_targets_mean": 3101.9, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 1.2181818181818183, | |
| "grad_norm": 0.5479378833478837, | |
| "learning_rate": 3.9340630199026365e-05, | |
| "loss": 0.2791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31353360414505005, | |
| "step": 670, | |
| "valid_targets_mean": 3066.2, | |
| "valid_targets_min": 1100 | |
| }, | |
| { | |
| "epoch": 1.2272727272727273, | |
| "grad_norm": 0.6726858996451737, | |
| "learning_rate": 3.931734271926176e-05, | |
| "loss": 0.301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36476853489875793, | |
| "step": 675, | |
| "valid_targets_mean": 2387.9, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 1.2363636363636363, | |
| "grad_norm": 0.5964455414003474, | |
| "learning_rate": 3.929365824917712e-05, | |
| "loss": 0.3236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32583141326904297, | |
| "step": 680, | |
| "valid_targets_mean": 2670.8, | |
| "valid_targets_min": 1297 | |
| }, | |
| { | |
| "epoch": 1.2454545454545454, | |
| "grad_norm": 0.5781040411209731, | |
| "learning_rate": 3.9269577275511504e-05, | |
| "loss": 0.3417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30773332715034485, | |
| "step": 685, | |
| "valid_targets_mean": 2672.5, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 1.2545454545454544, | |
| "grad_norm": 0.5639647192049679, | |
| "learning_rate": 3.924510029315253e-05, | |
| "loss": 0.2942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2964206635951996, | |
| "step": 690, | |
| "valid_targets_mean": 2741.2, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 1.2636363636363637, | |
| "grad_norm": 0.5627064673073386, | |
| "learning_rate": 3.922022780512614e-05, | |
| "loss": 0.3456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.355957567691803, | |
| "step": 695, | |
| "valid_targets_mean": 3049.9, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 0.49715452818653605, | |
| "learning_rate": 3.919496032258637e-05, | |
| "loss": 0.2891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23877036571502686, | |
| "step": 700, | |
| "valid_targets_mean": 3023.6, | |
| "valid_targets_min": 1371 | |
| }, | |
| { | |
| "epoch": 1.2818181818181817, | |
| "grad_norm": 0.49529413287800456, | |
| "learning_rate": 3.9169298364804716e-05, | |
| "loss": 0.3041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22517327964305878, | |
| "step": 705, | |
| "valid_targets_mean": 2738.3, | |
| "valid_targets_min": 538 | |
| }, | |
| { | |
| "epoch": 1.290909090909091, | |
| "grad_norm": 0.6026202338409374, | |
| "learning_rate": 3.914324245915956e-05, | |
| "loss": 0.311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36687037348747253, | |
| "step": 710, | |
| "valid_targets_mean": 3185.6, | |
| "valid_targets_min": 1202 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.580349480036476, | |
| "learning_rate": 3.91167931411253e-05, | |
| "loss": 0.3153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25920552015304565, | |
| "step": 715, | |
| "valid_targets_mean": 3105.7, | |
| "valid_targets_min": 1270 | |
| }, | |
| { | |
| "epoch": 1.309090909090909, | |
| "grad_norm": 0.5139387937516867, | |
| "learning_rate": 3.908995095426134e-05, | |
| "loss": 0.3288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3409667909145355, | |
| "step": 720, | |
| "valid_targets_mean": 3377.1, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 1.3181818181818181, | |
| "grad_norm": 0.6127282175817513, | |
| "learning_rate": 3.90627164502009e-05, | |
| "loss": 0.273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26966482400894165, | |
| "step": 725, | |
| "valid_targets_mean": 2139.5, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 1.3272727272727272, | |
| "grad_norm": 0.5506882869535827, | |
| "learning_rate": 3.903509018863974e-05, | |
| "loss": 0.2881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2546706199645996, | |
| "step": 730, | |
| "valid_targets_mean": 2567.1, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 1.3363636363636364, | |
| "grad_norm": 0.555751244337636, | |
| "learning_rate": 3.90070727373246e-05, | |
| "loss": 0.2925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3293226957321167, | |
| "step": 735, | |
| "valid_targets_mean": 2879.5, | |
| "valid_targets_min": 989 | |
| }, | |
| { | |
| "epoch": 1.3454545454545455, | |
| "grad_norm": 1.4072423287991722, | |
| "learning_rate": 3.897866467204155e-05, | |
| "loss": 0.2898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2524782419204712, | |
| "step": 740, | |
| "valid_targets_mean": 2753.0, | |
| "valid_targets_min": 795 | |
| }, | |
| { | |
| "epoch": 1.3545454545454545, | |
| "grad_norm": 0.5528867464762154, | |
| "learning_rate": 3.894986657660418e-05, | |
| "loss": 0.301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32227402925491333, | |
| "step": 745, | |
| "valid_targets_mean": 2946.1, | |
| "valid_targets_min": 1568 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 0.577032285353784, | |
| "learning_rate": 3.892067904284154e-05, | |
| "loss": 0.3165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2747570872306824, | |
| "step": 750, | |
| "valid_targets_mean": 2776.9, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 1.3727272727272728, | |
| "grad_norm": 0.6292241710341524, | |
| "learning_rate": 3.889110267058608e-05, | |
| "loss": 0.2927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33666500449180603, | |
| "step": 755, | |
| "valid_targets_mean": 2910.2, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 1.3818181818181818, | |
| "grad_norm": 0.563288737269173, | |
| "learning_rate": 3.886113806766121e-05, | |
| "loss": 0.3378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2766912877559662, | |
| "step": 760, | |
| "valid_targets_mean": 2442.6, | |
| "valid_targets_min": 603 | |
| }, | |
| { | |
| "epoch": 1.3909090909090909, | |
| "grad_norm": 0.6656004786051898, | |
| "learning_rate": 3.883078584986888e-05, | |
| "loss": 0.2884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29978060722351074, | |
| "step": 765, | |
| "valid_targets_mean": 2543.6, | |
| "valid_targets_min": 1096 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.5236885976409229, | |
| "learning_rate": 3.8800046640976916e-05, | |
| "loss": 0.3098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33730077743530273, | |
| "step": 770, | |
| "valid_targets_mean": 3164.1, | |
| "valid_targets_min": 1218 | |
| }, | |
| { | |
| "epoch": 1.4090909090909092, | |
| "grad_norm": 0.5593924531226494, | |
| "learning_rate": 3.876892107270616e-05, | |
| "loss": 0.3122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2947944700717926, | |
| "step": 775, | |
| "valid_targets_mean": 2482.2, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 1.4181818181818182, | |
| "grad_norm": 0.5906084708968861, | |
| "learning_rate": 3.873740978471755e-05, | |
| "loss": 0.2911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23599432408809662, | |
| "step": 780, | |
| "valid_targets_mean": 2478.7, | |
| "valid_targets_min": 1493 | |
| }, | |
| { | |
| "epoch": 1.4272727272727272, | |
| "grad_norm": 0.5850744993629736, | |
| "learning_rate": 3.8705513424598934e-05, | |
| "loss": 0.3283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30988335609436035, | |
| "step": 785, | |
| "valid_targets_mean": 2411.6, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 1.4363636363636363, | |
| "grad_norm": 0.5894689684496299, | |
| "learning_rate": 3.8673232647851756e-05, | |
| "loss": 0.2962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31927740573883057, | |
| "step": 790, | |
| "valid_targets_mean": 2514.2, | |
| "valid_targets_min": 804 | |
| }, | |
| { | |
| "epoch": 1.4454545454545453, | |
| "grad_norm": 0.5495995497086523, | |
| "learning_rate": 3.8640568117877594e-05, | |
| "loss": 0.3044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33518069982528687, | |
| "step": 795, | |
| "valid_targets_mean": 3436.6, | |
| "valid_targets_min": 1066 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 0.6941187533631331, | |
| "learning_rate": 3.8607520505964574e-05, | |
| "loss": 0.2927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2904342710971832, | |
| "step": 800, | |
| "valid_targets_mean": 2603.8, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 1.4636363636363636, | |
| "grad_norm": 0.5460794833502489, | |
| "learning_rate": 3.857409049127348e-05, | |
| "loss": 0.2973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.279479444026947, | |
| "step": 805, | |
| "valid_targets_mean": 2939.2, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 1.4727272727272727, | |
| "grad_norm": 0.5927685903990932, | |
| "learning_rate": 3.8540278760823866e-05, | |
| "loss": 0.3024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3844236135482788, | |
| "step": 810, | |
| "valid_targets_mean": 2929.4, | |
| "valid_targets_min": 1126 | |
| }, | |
| { | |
| "epoch": 1.481818181818182, | |
| "grad_norm": 0.6004710936297869, | |
| "learning_rate": 3.8506086009479934e-05, | |
| "loss": 0.297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35591375827789307, | |
| "step": 815, | |
| "valid_targets_mean": 2592.2, | |
| "valid_targets_min": 890 | |
| }, | |
| { | |
| "epoch": 1.490909090909091, | |
| "grad_norm": 0.5137662741104242, | |
| "learning_rate": 3.8471512939936224e-05, | |
| "loss": 0.28, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28532475233078003, | |
| "step": 820, | |
| "valid_targets_mean": 3796.9, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.580257790045676, | |
| "learning_rate": 3.843656026270319e-05, | |
| "loss": 0.3151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32185226678848267, | |
| "step": 825, | |
| "valid_targets_mean": 2719.7, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 1.509090909090909, | |
| "grad_norm": 0.5166030598464046, | |
| "learning_rate": 3.840122869609258e-05, | |
| "loss": 0.3075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25254085659980774, | |
| "step": 830, | |
| "valid_targets_mean": 3248.6, | |
| "valid_targets_min": 1070 | |
| }, | |
| { | |
| "epoch": 1.518181818181818, | |
| "grad_norm": 0.5921245945686834, | |
| "learning_rate": 3.8365518966202724e-05, | |
| "loss": 0.3173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3269743323326111, | |
| "step": 835, | |
| "valid_targets_mean": 2656.5, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 1.5272727272727273, | |
| "grad_norm": 0.6134729011732185, | |
| "learning_rate": 3.832943180690356e-05, | |
| "loss": 0.3075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3301253020763397, | |
| "step": 840, | |
| "valid_targets_mean": 2366.7, | |
| "valid_targets_min": 1095 | |
| }, | |
| { | |
| "epoch": 1.5363636363636364, | |
| "grad_norm": 0.535738543010804, | |
| "learning_rate": 3.829296795982156e-05, | |
| "loss": 0.2853, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28925201296806335, | |
| "step": 845, | |
| "valid_targets_mean": 2624.3, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 0.600523233527797, | |
| "learning_rate": 3.8256128174324515e-05, | |
| "loss": 0.3094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3454379141330719, | |
| "step": 850, | |
| "valid_targets_mean": 2845.1, | |
| "valid_targets_min": 1278 | |
| }, | |
| { | |
| "epoch": 1.5545454545454547, | |
| "grad_norm": 0.6308938968736859, | |
| "learning_rate": 3.82189132075061e-05, | |
| "loss": 0.3011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30569013953208923, | |
| "step": 855, | |
| "valid_targets_mean": 2541.4, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 1.5636363636363635, | |
| "grad_norm": 0.5347500694529886, | |
| "learning_rate": 3.818132382417037e-05, | |
| "loss": 0.2995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3351518213748932, | |
| "step": 860, | |
| "valid_targets_mean": 3025.1, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 1.5727272727272728, | |
| "grad_norm": 0.5708969532984176, | |
| "learning_rate": 3.8143360796815964e-05, | |
| "loss": 0.3366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3305366635322571, | |
| "step": 865, | |
| "valid_targets_mean": 3047.1, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 1.5818181818181818, | |
| "grad_norm": 0.5387436367009513, | |
| "learning_rate": 3.81050249056203e-05, | |
| "loss": 0.3013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3135007619857788, | |
| "step": 870, | |
| "valid_targets_mean": 3312.9, | |
| "valid_targets_min": 1369 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "grad_norm": 0.5237363567692674, | |
| "learning_rate": 3.8066316938423495e-05, | |
| "loss": 0.3142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3161287009716034, | |
| "step": 875, | |
| "valid_targets_mean": 3033.4, | |
| "valid_targets_min": 1441 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.5633442537930327, | |
| "learning_rate": 3.8027237690712206e-05, | |
| "loss": 0.3001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29282844066619873, | |
| "step": 880, | |
| "valid_targets_mean": 2575.6, | |
| "valid_targets_min": 1008 | |
| }, | |
| { | |
| "epoch": 1.6090909090909091, | |
| "grad_norm": 0.6071962643151043, | |
| "learning_rate": 3.798778796560326e-05, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3855435848236084, | |
| "step": 885, | |
| "valid_targets_mean": 3637.1, | |
| "valid_targets_min": 1275 | |
| }, | |
| { | |
| "epoch": 1.6181818181818182, | |
| "grad_norm": 0.5144501183205095, | |
| "learning_rate": 3.794796857382717e-05, | |
| "loss": 0.3243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26045137643814087, | |
| "step": 890, | |
| "valid_targets_mean": 2535.8, | |
| "valid_targets_min": 778 | |
| }, | |
| { | |
| "epoch": 1.6272727272727274, | |
| "grad_norm": 0.6391108758936941, | |
| "learning_rate": 3.790778033371145e-05, | |
| "loss": 0.3247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3640952706336975, | |
| "step": 895, | |
| "valid_targets_mean": 2988.9, | |
| "valid_targets_min": 917 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 0.46946518875838034, | |
| "learning_rate": 3.786722407116379e-05, | |
| "loss": 0.3034, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2868632674217224, | |
| "step": 900, | |
| "valid_targets_mean": 3317.6, | |
| "valid_targets_min": 1161 | |
| }, | |
| { | |
| "epoch": 1.6454545454545455, | |
| "grad_norm": 0.5766601735552375, | |
| "learning_rate": 3.782630061965515e-05, | |
| "loss": 0.2775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30782195925712585, | |
| "step": 905, | |
| "valid_targets_mean": 2723.4, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 1.6545454545454545, | |
| "grad_norm": 0.6297295273887248, | |
| "learning_rate": 3.778501082020255e-05, | |
| "loss": 0.3104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3077327609062195, | |
| "step": 910, | |
| "valid_targets_mean": 2310.8, | |
| "valid_targets_min": 962 | |
| }, | |
| { | |
| "epoch": 1.6636363636363636, | |
| "grad_norm": 0.5724281159599157, | |
| "learning_rate": 3.7743355521351814e-05, | |
| "loss": 0.2642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2423037588596344, | |
| "step": 915, | |
| "valid_targets_mean": 2519.7, | |
| "valid_targets_min": 1096 | |
| }, | |
| { | |
| "epoch": 1.6727272727272728, | |
| "grad_norm": 0.5784482599606942, | |
| "learning_rate": 3.7701335579160147e-05, | |
| "loss": 0.2959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26155000925064087, | |
| "step": 920, | |
| "valid_targets_mean": 2259.2, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 1.6818181818181817, | |
| "grad_norm": 0.6257032915898507, | |
| "learning_rate": 3.7658951857178544e-05, | |
| "loss": 0.2945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27045929431915283, | |
| "step": 925, | |
| "valid_targets_mean": 2321.8, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 1.690909090909091, | |
| "grad_norm": 0.47330627816498005, | |
| "learning_rate": 3.7616205226434005e-05, | |
| "loss": 0.3099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3237019181251526, | |
| "step": 930, | |
| "valid_targets_mean": 3809.6, | |
| "valid_targets_min": 1843 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.5566942533616496, | |
| "learning_rate": 3.7573096565411694e-05, | |
| "loss": 0.3062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3009275496006012, | |
| "step": 935, | |
| "valid_targets_mean": 2932.1, | |
| "valid_targets_min": 513 | |
| }, | |
| { | |
| "epoch": 1.709090909090909, | |
| "grad_norm": 0.5553866246810933, | |
| "learning_rate": 3.7529626760036814e-05, | |
| "loss": 0.2959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31550776958465576, | |
| "step": 940, | |
| "valid_targets_mean": 3059.3, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 1.7181818181818183, | |
| "grad_norm": 0.5698641256792278, | |
| "learning_rate": 3.7485796703656475e-05, | |
| "loss": 0.3062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2999531626701355, | |
| "step": 945, | |
| "valid_targets_mean": 2500.3, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 0.5049658769969413, | |
| "learning_rate": 3.7441607297021254e-05, | |
| "loss": 0.2997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25607892870903015, | |
| "step": 950, | |
| "valid_targets_mean": 2733.9, | |
| "valid_targets_min": 1585 | |
| }, | |
| { | |
| "epoch": 1.7363636363636363, | |
| "grad_norm": 0.5106591813226968, | |
| "learning_rate": 3.7397059448266786e-05, | |
| "loss": 0.2709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26858973503112793, | |
| "step": 955, | |
| "valid_targets_mean": 2915.4, | |
| "valid_targets_min": 1287 | |
| }, | |
| { | |
| "epoch": 1.7454545454545456, | |
| "grad_norm": 0.5583983545217152, | |
| "learning_rate": 3.735215407289498e-05, | |
| "loss": 0.323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28842100501060486, | |
| "step": 960, | |
| "valid_targets_mean": 2971.6, | |
| "valid_targets_min": 1471 | |
| }, | |
| { | |
| "epoch": 1.7545454545454544, | |
| "grad_norm": 0.7008542358361727, | |
| "learning_rate": 3.730689209375533e-05, | |
| "loss": 0.3217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31536877155303955, | |
| "step": 965, | |
| "valid_targets_mean": 3088.8, | |
| "valid_targets_min": 606 | |
| }, | |
| { | |
| "epoch": 1.7636363636363637, | |
| "grad_norm": 0.600059036494435, | |
| "learning_rate": 3.726127444102583e-05, | |
| "loss": 0.3194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25902581214904785, | |
| "step": 970, | |
| "valid_targets_mean": 2461.3, | |
| "valid_targets_min": 1266 | |
| }, | |
| { | |
| "epoch": 1.7727272727272727, | |
| "grad_norm": 0.5146678795415901, | |
| "learning_rate": 3.721530205219395e-05, | |
| "loss": 0.3125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2943532168865204, | |
| "step": 975, | |
| "valid_targets_mean": 2999.1, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 1.7818181818181817, | |
| "grad_norm": 0.6264974405624288, | |
| "learning_rate": 3.716897587203733e-05, | |
| "loss": 0.3031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3075316846370697, | |
| "step": 980, | |
| "valid_targets_mean": 2197.6, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 1.790909090909091, | |
| "grad_norm": 0.8108650837597063, | |
| "learning_rate": 3.712229685260434e-05, | |
| "loss": 0.309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32539889216423035, | |
| "step": 985, | |
| "valid_targets_mean": 2118.2, | |
| "valid_targets_min": 935 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.5784815878349024, | |
| "learning_rate": 3.707526595319459e-05, | |
| "loss": 0.2854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3252108693122864, | |
| "step": 990, | |
| "valid_targets_mean": 3014.9, | |
| "valid_targets_min": 1023 | |
| }, | |
| { | |
| "epoch": 1.809090909090909, | |
| "grad_norm": 0.5138580399975403, | |
| "learning_rate": 3.7027884140339144e-05, | |
| "loss": 0.2989, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27519625425338745, | |
| "step": 995, | |
| "valid_targets_mean": 2874.1, | |
| "valid_targets_min": 922 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.5033049189948867, | |
| "learning_rate": 3.698015238778066e-05, | |
| "loss": 0.2913, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38201528787612915, | |
| "step": 1000, | |
| "valid_targets_mean": 3500.8, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 1.8272727272727272, | |
| "grad_norm": 0.5165562131281177, | |
| "learning_rate": 3.693207167645344e-05, | |
| "loss": 0.3093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2646842896938324, | |
| "step": 1005, | |
| "valid_targets_mean": 2819.9, | |
| "valid_targets_min": 529 | |
| }, | |
| { | |
| "epoch": 1.8363636363636364, | |
| "grad_norm": 0.5687615871942717, | |
| "learning_rate": 3.6883642994463194e-05, | |
| "loss": 0.3035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2546718716621399, | |
| "step": 1010, | |
| "valid_targets_mean": 2292.6, | |
| "valid_targets_min": 1405 | |
| }, | |
| { | |
| "epoch": 1.8454545454545455, | |
| "grad_norm": 0.6635357183417174, | |
| "learning_rate": 3.6834867337066805e-05, | |
| "loss": 0.3032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3448749780654907, | |
| "step": 1015, | |
| "valid_targets_mean": 2516.5, | |
| "valid_targets_min": 1422 | |
| }, | |
| { | |
| "epoch": 1.8545454545454545, | |
| "grad_norm": 0.5378740657548305, | |
| "learning_rate": 3.678574570665181e-05, | |
| "loss": 0.3104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31566622853279114, | |
| "step": 1020, | |
| "valid_targets_mean": 2945.9, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 1.8636363636363638, | |
| "grad_norm": 0.5035442837344294, | |
| "learning_rate": 3.673627911271586e-05, | |
| "loss": 0.3294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2709481716156006, | |
| "step": 1025, | |
| "valid_targets_mean": 2638.1, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 1.8727272727272726, | |
| "grad_norm": 0.5317640331664419, | |
| "learning_rate": 3.668646857184591e-05, | |
| "loss": 0.3059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3069203495979309, | |
| "step": 1030, | |
| "valid_targets_mean": 2766.3, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 1.8818181818181818, | |
| "grad_norm": 0.5551139811484433, | |
| "learning_rate": 3.663631510769739e-05, | |
| "loss": 0.314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2864927053451538, | |
| "step": 1035, | |
| "valid_targets_mean": 3255.9, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 1.8909090909090909, | |
| "grad_norm": 0.5244636101727915, | |
| "learning_rate": 3.658581975097311e-05, | |
| "loss": 0.3085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27532529830932617, | |
| "step": 1040, | |
| "valid_targets_mean": 2916.7, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 0.5328499288436037, | |
| "learning_rate": 3.653498353940215e-05, | |
| "loss": 0.3027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35427671670913696, | |
| "step": 1045, | |
| "valid_targets_mean": 3069.6, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 0.4910307505819529, | |
| "learning_rate": 3.648380751771846e-05, | |
| "loss": 0.2975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2594786286354065, | |
| "step": 1050, | |
| "valid_targets_mean": 2967.8, | |
| "valid_targets_min": 1302 | |
| }, | |
| { | |
| "epoch": 1.9181818181818182, | |
| "grad_norm": 0.5535789990195293, | |
| "learning_rate": 3.6432292737639426e-05, | |
| "loss": 0.3227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2898174226284027, | |
| "step": 1055, | |
| "valid_targets_mean": 2596.1, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 1.9272727272727272, | |
| "grad_norm": 0.4824675689759683, | |
| "learning_rate": 3.638044025784425e-05, | |
| "loss": 0.3212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2852433919906616, | |
| "step": 1060, | |
| "valid_targets_mean": 3208.1, | |
| "valid_targets_min": 1167 | |
| }, | |
| { | |
| "epoch": 1.9363636363636365, | |
| "grad_norm": 0.578163740136828, | |
| "learning_rate": 3.63282511439522e-05, | |
| "loss": 0.3112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3305288851261139, | |
| "step": 1065, | |
| "valid_targets_mean": 2377.6, | |
| "valid_targets_min": 1160 | |
| }, | |
| { | |
| "epoch": 1.9454545454545453, | |
| "grad_norm": 0.5864842908917559, | |
| "learning_rate": 3.627572646850069e-05, | |
| "loss": 0.276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2787714898586273, | |
| "step": 1070, | |
| "valid_targets_mean": 2351.8, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 1.9545454545454546, | |
| "grad_norm": 0.6081783924734938, | |
| "learning_rate": 3.6222867310923296e-05, | |
| "loss": 0.3183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3721981644630432, | |
| "step": 1075, | |
| "valid_targets_mean": 3016.4, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 1.9636363636363636, | |
| "grad_norm": 0.5178622230386561, | |
| "learning_rate": 3.6169674757527466e-05, | |
| "loss": 0.2887, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2899319529533386, | |
| "step": 1080, | |
| "valid_targets_mean": 2910.1, | |
| "valid_targets_min": 1107 | |
| }, | |
| { | |
| "epoch": 1.9727272727272727, | |
| "grad_norm": 0.5345917181059462, | |
| "learning_rate": 3.61161499014723e-05, | |
| "loss": 0.2973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25718826055526733, | |
| "step": 1085, | |
| "valid_targets_mean": 2929.2, | |
| "valid_targets_min": 1142 | |
| }, | |
| { | |
| "epoch": 1.981818181818182, | |
| "grad_norm": 0.5176401124515433, | |
| "learning_rate": 3.606229384274604e-05, | |
| "loss": 0.2863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28826001286506653, | |
| "step": 1090, | |
| "valid_targets_mean": 2809.6, | |
| "valid_targets_min": 1480 | |
| }, | |
| { | |
| "epoch": 1.990909090909091, | |
| "grad_norm": 0.5539961041603162, | |
| "learning_rate": 3.600810768814345e-05, | |
| "loss": 0.3149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30427122116088867, | |
| "step": 1095, | |
| "valid_targets_mean": 2813.6, | |
| "valid_targets_min": 1552 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5224215311779324, | |
| "learning_rate": 3.595359255124311e-05, | |
| "loss": 0.2711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28612616658210754, | |
| "step": 1100, | |
| "valid_targets_mean": 2870.6, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 2.0090909090909093, | |
| "grad_norm": 0.489439619755712, | |
| "learning_rate": 3.589874955238449e-05, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2773962616920471, | |
| "step": 1105, | |
| "valid_targets_mean": 3120.6, | |
| "valid_targets_min": 1333 | |
| }, | |
| { | |
| "epoch": 2.018181818181818, | |
| "grad_norm": 0.6640014872803903, | |
| "learning_rate": 3.5843579818644956e-05, | |
| "loss": 0.237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2267443984746933, | |
| "step": 1110, | |
| "valid_targets_mean": 2743.2, | |
| "valid_targets_min": 935 | |
| }, | |
| { | |
| "epoch": 2.0272727272727273, | |
| "grad_norm": 0.5835718798395699, | |
| "learning_rate": 3.5788084483816587e-05, | |
| "loss": 0.2713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31667560338974, | |
| "step": 1115, | |
| "valid_targets_mean": 2659.2, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 2.036363636363636, | |
| "grad_norm": 0.5465980940064771, | |
| "learning_rate": 3.573226468838289e-05, | |
| "loss": 0.2622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2518404722213745, | |
| "step": 1120, | |
| "valid_targets_mean": 2707.6, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 2.0454545454545454, | |
| "grad_norm": 0.5733257551739418, | |
| "learning_rate": 3.567612157949536e-05, | |
| "loss": 0.2895, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27450916171073914, | |
| "step": 1125, | |
| "valid_targets_mean": 2856.4, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 2.0545454545454547, | |
| "grad_norm": 0.5681140687556558, | |
| "learning_rate": 3.561965631094988e-05, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23563730716705322, | |
| "step": 1130, | |
| "valid_targets_mean": 2212.8, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 2.0636363636363635, | |
| "grad_norm": 0.5556954904745599, | |
| "learning_rate": 3.556287004316305e-05, | |
| "loss": 0.2762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2981184124946594, | |
| "step": 1135, | |
| "valid_targets_mean": 2950.0, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 2.0727272727272728, | |
| "grad_norm": 0.5605759813195871, | |
| "learning_rate": 3.5505763943148324e-05, | |
| "loss": 0.2757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26870912313461304, | |
| "step": 1140, | |
| "valid_targets_mean": 2711.8, | |
| "valid_targets_min": 1144 | |
| }, | |
| { | |
| "epoch": 2.081818181818182, | |
| "grad_norm": 0.5216825087835475, | |
| "learning_rate": 3.544833918449199e-05, | |
| "loss": 0.2937, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3655780255794525, | |
| "step": 1145, | |
| "valid_targets_mean": 3950.1, | |
| "valid_targets_min": 856 | |
| }, | |
| { | |
| "epoch": 2.090909090909091, | |
| "grad_norm": 0.5613165558947877, | |
| "learning_rate": 3.5390596947329124e-05, | |
| "loss": 0.2991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2934229373931885, | |
| "step": 1150, | |
| "valid_targets_mean": 3625.8, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 0.6361732770074459, | |
| "learning_rate": 3.5332538418319254e-05, | |
| "loss": 0.2788, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31974083185195923, | |
| "step": 1155, | |
| "valid_targets_mean": 2393.6, | |
| "valid_targets_min": 962 | |
| }, | |
| { | |
| "epoch": 2.109090909090909, | |
| "grad_norm": 0.6027622062417266, | |
| "learning_rate": 3.527416479062205e-05, | |
| "loss": 0.2807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24740280210971832, | |
| "step": 1160, | |
| "valid_targets_mean": 2359.5, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 2.118181818181818, | |
| "grad_norm": 0.5130832544460253, | |
| "learning_rate": 3.521547726387275e-05, | |
| "loss": 0.2873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22652898728847504, | |
| "step": 1165, | |
| "valid_targets_mean": 2752.1, | |
| "valid_targets_min": 1490 | |
| }, | |
| { | |
| "epoch": 2.1272727272727274, | |
| "grad_norm": 0.5486053889891421, | |
| "learning_rate": 3.515647704415754e-05, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2480347454547882, | |
| "step": 1170, | |
| "valid_targets_mean": 2579.8, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 2.1363636363636362, | |
| "grad_norm": 0.5188330941180774, | |
| "learning_rate": 3.509716534398873e-05, | |
| "loss": 0.2801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2117992341518402, | |
| "step": 1175, | |
| "valid_targets_mean": 2542.3, | |
| "valid_targets_min": 1057 | |
| }, | |
| { | |
| "epoch": 2.1454545454545455, | |
| "grad_norm": 0.49929458454016185, | |
| "learning_rate": 3.503754338227989e-05, | |
| "loss": 0.2878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27674928307533264, | |
| "step": 1180, | |
| "valid_targets_mean": 3255.7, | |
| "valid_targets_min": 1464 | |
| }, | |
| { | |
| "epoch": 2.1545454545454543, | |
| "grad_norm": 0.5699600398030361, | |
| "learning_rate": 3.497761238432073e-05, | |
| "loss": 0.2552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24738357961177826, | |
| "step": 1185, | |
| "valid_targets_mean": 2535.2, | |
| "valid_targets_min": 954 | |
| }, | |
| { | |
| "epoch": 2.1636363636363636, | |
| "grad_norm": 0.5402480392886817, | |
| "learning_rate": 3.4917373581752e-05, | |
| "loss": 0.2901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3008500933647156, | |
| "step": 1190, | |
| "valid_targets_mean": 3093.9, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 2.172727272727273, | |
| "grad_norm": 0.6485393078998691, | |
| "learning_rate": 3.4856828212540094e-05, | |
| "loss": 0.2892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3143201768398285, | |
| "step": 1195, | |
| "valid_targets_mean": 2453.3, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.6195348387664918, | |
| "learning_rate": 3.4795977520951684e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3513163626194, | |
| "step": 1200, | |
| "valid_targets_mean": 2938.9, | |
| "valid_targets_min": 1226 | |
| }, | |
| { | |
| "epoch": 2.190909090909091, | |
| "grad_norm": 0.5549330178340143, | |
| "learning_rate": 3.47348227575281e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27062830328941345, | |
| "step": 1205, | |
| "valid_targets_mean": 3571.6, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.7062538572938993, | |
| "learning_rate": 3.467336517905966e-05, | |
| "loss": 0.266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28915590047836304, | |
| "step": 1210, | |
| "valid_targets_mean": 2486.9, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 2.209090909090909, | |
| "grad_norm": 0.5619405166344583, | |
| "learning_rate": 3.46116060485598e-05, | |
| "loss": 0.2642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3145231604576111, | |
| "step": 1215, | |
| "valid_targets_mean": 2945.8, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 2.2181818181818183, | |
| "grad_norm": 0.5371710217063721, | |
| "learning_rate": 3.4549546635239167e-05, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2760411202907562, | |
| "step": 1220, | |
| "valid_targets_mean": 3207.3, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 2.227272727272727, | |
| "grad_norm": 0.5888575885460174, | |
| "learning_rate": 3.448718821447953e-05, | |
| "loss": 0.2994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2726590931415558, | |
| "step": 1225, | |
| "valid_targets_mean": 2388.1, | |
| "valid_targets_min": 529 | |
| }, | |
| { | |
| "epoch": 2.2363636363636363, | |
| "grad_norm": 0.5202633657149802, | |
| "learning_rate": 3.442453206780751e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2371698021888733, | |
| "step": 1230, | |
| "valid_targets_mean": 2776.1, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 2.2454545454545456, | |
| "grad_norm": 0.5190998588011124, | |
| "learning_rate": 3.4361579482868325e-05, | |
| "loss": 0.2182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19271951913833618, | |
| "step": 1235, | |
| "valid_targets_mean": 2445.1, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 2.2545454545454544, | |
| "grad_norm": 0.6415806356522294, | |
| "learning_rate": 3.429833175339927e-05, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3027918338775635, | |
| "step": 1240, | |
| "valid_targets_mean": 2446.1, | |
| "valid_targets_min": 562 | |
| }, | |
| { | |
| "epoch": 2.2636363636363637, | |
| "grad_norm": 0.5859047596126794, | |
| "learning_rate": 3.423479017920317e-05, | |
| "loss": 0.2852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3389890789985657, | |
| "step": 1245, | |
| "valid_targets_mean": 2799.6, | |
| "valid_targets_min": 1142 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 0.8035925762795723, | |
| "learning_rate": 3.4170956066121616e-05, | |
| "loss": 0.2684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2345964014530182, | |
| "step": 1250, | |
| "valid_targets_mean": 2630.6, | |
| "valid_targets_min": 1152 | |
| }, | |
| { | |
| "epoch": 2.2818181818181817, | |
| "grad_norm": 0.578674528171854, | |
| "learning_rate": 3.410683072600818e-05, | |
| "loss": 0.2727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2537236511707306, | |
| "step": 1255, | |
| "valid_targets_mean": 2365.8, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 2.290909090909091, | |
| "grad_norm": 0.4934272429740994, | |
| "learning_rate": 3.4042415476701434e-05, | |
| "loss": 0.3008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2870427966117859, | |
| "step": 1260, | |
| "valid_targets_mean": 3357.2, | |
| "valid_targets_min": 1313 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 0.5576814977535322, | |
| "learning_rate": 3.397771164199787e-05, | |
| "loss": 0.2747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2425975650548935, | |
| "step": 1265, | |
| "valid_targets_mean": 2477.3, | |
| "valid_targets_min": 1004 | |
| }, | |
| { | |
| "epoch": 2.309090909090909, | |
| "grad_norm": 0.6455970996280729, | |
| "learning_rate": 3.3912720551624684e-05, | |
| "loss": 0.2811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24581097066402435, | |
| "step": 1270, | |
| "valid_targets_mean": 2579.8, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 2.3181818181818183, | |
| "grad_norm": 0.5317314295046272, | |
| "learning_rate": 3.384744354121246e-05, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22386619448661804, | |
| "step": 1275, | |
| "valid_targets_mean": 2630.9, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 2.327272727272727, | |
| "grad_norm": 0.6193275788990439, | |
| "learning_rate": 3.3781881952267715e-05, | |
| "loss": 0.2725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22538137435913086, | |
| "step": 1280, | |
| "valid_targets_mean": 2205.0, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 2.3363636363636364, | |
| "grad_norm": 0.5519637684834255, | |
| "learning_rate": 3.3716037132145354e-05, | |
| "loss": 0.292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32305043935775757, | |
| "step": 1285, | |
| "valid_targets_mean": 2939.0, | |
| "valid_targets_min": 1007 | |
| }, | |
| { | |
| "epoch": 2.3454545454545457, | |
| "grad_norm": 0.5178998925686475, | |
| "learning_rate": 3.3649910434020934e-05, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2358517199754715, | |
| "step": 1290, | |
| "valid_targets_mean": 2688.2, | |
| "valid_targets_min": 778 | |
| }, | |
| { | |
| "epoch": 2.3545454545454545, | |
| "grad_norm": 0.8149568552846596, | |
| "learning_rate": 3.35835032168629e-05, | |
| "loss": 0.2781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35959750413894653, | |
| "step": 1295, | |
| "valid_targets_mean": 3071.2, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 1.0108335562213615, | |
| "learning_rate": 3.351681684540462e-05, | |
| "loss": 0.2922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3282524347305298, | |
| "step": 1300, | |
| "valid_targets_mean": 2302.9, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 2.3727272727272726, | |
| "grad_norm": 0.5275055282359982, | |
| "learning_rate": 3.3449852690116375e-05, | |
| "loss": 0.2849, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2222471833229065, | |
| "step": 1305, | |
| "valid_targets_mean": 2474.3, | |
| "valid_targets_min": 992 | |
| }, | |
| { | |
| "epoch": 2.381818181818182, | |
| "grad_norm": 0.5108776081340932, | |
| "learning_rate": 3.3382612127177166e-05, | |
| "loss": 0.2775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22455942630767822, | |
| "step": 1310, | |
| "valid_targets_mean": 2848.2, | |
| "valid_targets_min": 1019 | |
| }, | |
| { | |
| "epoch": 2.390909090909091, | |
| "grad_norm": 0.540702773277452, | |
| "learning_rate": 3.331509653844644e-05, | |
| "loss": 0.2676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2385614961385727, | |
| "step": 1315, | |
| "valid_targets_mean": 2398.2, | |
| "valid_targets_min": 1156 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.5580702423301868, | |
| "learning_rate": 3.324730731143571e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28066539764404297, | |
| "step": 1320, | |
| "valid_targets_mean": 2742.6, | |
| "valid_targets_min": 892 | |
| }, | |
| { | |
| "epoch": 2.409090909090909, | |
| "grad_norm": 0.532467694697259, | |
| "learning_rate": 3.317924583927999e-05, | |
| "loss": 0.263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26546695828437805, | |
| "step": 1325, | |
| "valid_targets_mean": 2783.5, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 2.418181818181818, | |
| "grad_norm": 0.5949400757150429, | |
| "learning_rate": 3.311091352070924e-05, | |
| "loss": 0.2714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30344507098197937, | |
| "step": 1330, | |
| "valid_targets_mean": 2494.1, | |
| "valid_targets_min": 1160 | |
| }, | |
| { | |
| "epoch": 2.4272727272727272, | |
| "grad_norm": 0.6525136381200096, | |
| "learning_rate": 3.3042311760019554e-05, | |
| "loss": 0.2814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2595057487487793, | |
| "step": 1335, | |
| "valid_targets_mean": 3365.3, | |
| "valid_targets_min": 1156 | |
| }, | |
| { | |
| "epoch": 2.4363636363636365, | |
| "grad_norm": 0.5306637930795548, | |
| "learning_rate": 3.297344196704431e-05, | |
| "loss": 0.2815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29292032122612, | |
| "step": 1340, | |
| "valid_targets_mean": 3093.2, | |
| "valid_targets_min": 1225 | |
| }, | |
| { | |
| "epoch": 2.4454545454545453, | |
| "grad_norm": 0.5331840161384932, | |
| "learning_rate": 3.2904305557125265e-05, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2697639465332031, | |
| "step": 1345, | |
| "valid_targets_mean": 3192.3, | |
| "valid_targets_min": 1173 | |
| }, | |
| { | |
| "epoch": 2.4545454545454546, | |
| "grad_norm": 0.5465573713315527, | |
| "learning_rate": 3.2834903951083363e-05, | |
| "loss": 0.2679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28745096921920776, | |
| "step": 1350, | |
| "valid_targets_mean": 2783.4, | |
| "valid_targets_min": 1422 | |
| }, | |
| { | |
| "epoch": 2.463636363636364, | |
| "grad_norm": 0.6761778819959714, | |
| "learning_rate": 3.27652385751896e-05, | |
| "loss": 0.2941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2797173261642456, | |
| "step": 1355, | |
| "valid_targets_mean": 2376.6, | |
| "valid_targets_min": 1587 | |
| }, | |
| { | |
| "epoch": 2.4727272727272727, | |
| "grad_norm": 0.5172807517065136, | |
| "learning_rate": 3.269531086113573e-05, | |
| "loss": 0.2869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25817084312438965, | |
| "step": 1360, | |
| "valid_targets_mean": 3498.8, | |
| "valid_targets_min": 1199 | |
| }, | |
| { | |
| "epoch": 2.481818181818182, | |
| "grad_norm": 0.4945389497424476, | |
| "learning_rate": 3.262512224600478e-05, | |
| "loss": 0.2569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23338064551353455, | |
| "step": 1365, | |
| "valid_targets_mean": 2960.8, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 2.4909090909090907, | |
| "grad_norm": 0.46663492782118515, | |
| "learning_rate": 3.2554674172241565e-05, | |
| "loss": 0.2643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2113073766231537, | |
| "step": 1370, | |
| "valid_targets_mean": 2834.1, | |
| "valid_targets_min": 1502 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.5271452557335616, | |
| "learning_rate": 3.2483968087623026e-05, | |
| "loss": 0.289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2747431993484497, | |
| "step": 1375, | |
| "valid_targets_mean": 3373.9, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 2.509090909090909, | |
| "grad_norm": 0.5903572935792529, | |
| "learning_rate": 3.241300544522848e-05, | |
| "loss": 0.2684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29771339893341064, | |
| "step": 1380, | |
| "valid_targets_mean": 2850.2, | |
| "valid_targets_min": 885 | |
| }, | |
| { | |
| "epoch": 2.518181818181818, | |
| "grad_norm": 0.48296367582709726, | |
| "learning_rate": 3.234178770340975e-05, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2282666265964508, | |
| "step": 1385, | |
| "valid_targets_mean": 3135.6, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 2.5272727272727273, | |
| "grad_norm": 0.575970639650917, | |
| "learning_rate": 3.227031632576122e-05, | |
| "loss": 0.2749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2689974904060364, | |
| "step": 1390, | |
| "valid_targets_mean": 2390.9, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 2.536363636363636, | |
| "grad_norm": 0.5132386498577188, | |
| "learning_rate": 3.219859278108972e-05, | |
| "loss": 0.2619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2247665524482727, | |
| "step": 1395, | |
| "valid_targets_mean": 2708.2, | |
| "valid_targets_min": 824 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.5374983895142559, | |
| "learning_rate": 3.212661854338438e-05, | |
| "loss": 0.2555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2777082920074463, | |
| "step": 1400, | |
| "valid_targets_mean": 3208.6, | |
| "valid_targets_min": 1493 | |
| }, | |
| { | |
| "epoch": 2.5545454545454547, | |
| "grad_norm": 0.4986505149332797, | |
| "learning_rate": 3.20543950917863e-05, | |
| "loss": 0.24, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17187553644180298, | |
| "step": 1405, | |
| "valid_targets_mean": 2338.7, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 2.5636363636363635, | |
| "grad_norm": 0.5088261424244129, | |
| "learning_rate": 3.1981923910558164e-05, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2423439472913742, | |
| "step": 1410, | |
| "valid_targets_mean": 2774.1, | |
| "valid_targets_min": 1221 | |
| }, | |
| { | |
| "epoch": 2.5727272727272728, | |
| "grad_norm": 0.5190336610600647, | |
| "learning_rate": 3.190920648905376e-05, | |
| "loss": 0.2954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3125911056995392, | |
| "step": 1415, | |
| "valid_targets_mean": 3351.1, | |
| "valid_targets_min": 825 | |
| }, | |
| { | |
| "epoch": 2.581818181818182, | |
| "grad_norm": 0.5811156813435989, | |
| "learning_rate": 3.183624432168736e-05, | |
| "loss": 0.2621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2840062081813812, | |
| "step": 1420, | |
| "valid_targets_mean": 2648.9, | |
| "valid_targets_min": 1198 | |
| }, | |
| { | |
| "epoch": 2.590909090909091, | |
| "grad_norm": 0.44510133678522557, | |
| "learning_rate": 3.1763038907902976e-05, | |
| "loss": 0.269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22076770663261414, | |
| "step": 1425, | |
| "valid_targets_mean": 3170.4, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.5423101344043437, | |
| "learning_rate": 3.16895917521436e-05, | |
| "loss": 0.2592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2548125386238098, | |
| "step": 1430, | |
| "valid_targets_mean": 2706.8, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 2.6090909090909093, | |
| "grad_norm": 0.5415408039241957, | |
| "learning_rate": 3.161590436382023e-05, | |
| "loss": 0.2596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23154941201210022, | |
| "step": 1435, | |
| "valid_targets_mean": 2627.1, | |
| "valid_targets_min": 1817 | |
| }, | |
| { | |
| "epoch": 2.618181818181818, | |
| "grad_norm": 0.5442786567017693, | |
| "learning_rate": 3.1541978257280915e-05, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2088831067085266, | |
| "step": 1440, | |
| "valid_targets_mean": 2507.2, | |
| "valid_targets_min": 1167 | |
| }, | |
| { | |
| "epoch": 2.6272727272727274, | |
| "grad_norm": 0.507250652617475, | |
| "learning_rate": 3.1467814951779564e-05, | |
| "loss": 0.2837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3032193183898926, | |
| "step": 1445, | |
| "valid_targets_mean": 2987.0, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 2.6363636363636362, | |
| "grad_norm": 0.5572879239252921, | |
| "learning_rate": 3.139341597144478e-05, | |
| "loss": 0.2806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2529670000076294, | |
| "step": 1450, | |
| "valid_targets_mean": 2529.0, | |
| "valid_targets_min": 1000 | |
| }, | |
| { | |
| "epoch": 2.6454545454545455, | |
| "grad_norm": 0.5153350157095853, | |
| "learning_rate": 3.13187828452485e-05, | |
| "loss": 0.2446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21580073237419128, | |
| "step": 1455, | |
| "valid_targets_mean": 2435.6, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 2.6545454545454543, | |
| "grad_norm": 0.5170627803352125, | |
| "learning_rate": 3.1243917106974583e-05, | |
| "loss": 0.2974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26863837242126465, | |
| "step": 1460, | |
| "valid_targets_mean": 2920.1, | |
| "valid_targets_min": 1100 | |
| }, | |
| { | |
| "epoch": 2.6636363636363636, | |
| "grad_norm": 0.5155469818430718, | |
| "learning_rate": 3.116882029518732e-05, | |
| "loss": 0.2693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22775819897651672, | |
| "step": 1465, | |
| "valid_targets_mean": 2753.8, | |
| "valid_targets_min": 1362 | |
| }, | |
| { | |
| "epoch": 2.672727272727273, | |
| "grad_norm": 0.6111239303899411, | |
| "learning_rate": 3.109349395319976e-05, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28220629692077637, | |
| "step": 1470, | |
| "valid_targets_mean": 2454.9, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 2.6818181818181817, | |
| "grad_norm": 0.584045217206336, | |
| "learning_rate": 3.101793962904205e-05, | |
| "loss": 0.2742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27724209427833557, | |
| "step": 1475, | |
| "valid_targets_mean": 2150.0, | |
| "valid_targets_min": 733 | |
| }, | |
| { | |
| "epoch": 2.690909090909091, | |
| "grad_norm": 0.5593938542064445, | |
| "learning_rate": 3.094215887542957e-05, | |
| "loss": 0.2819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26411521434783936, | |
| "step": 1480, | |
| "valid_targets_mean": 2727.2, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 0.6659207240617939, | |
| "learning_rate": 3.086615324973107e-05, | |
| "loss": 0.3038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2924853563308716, | |
| "step": 1485, | |
| "valid_targets_mean": 2503.4, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 2.709090909090909, | |
| "grad_norm": 0.5463125878243501, | |
| "learning_rate": 3.07899243139366e-05, | |
| "loss": 0.2911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.229422464966774, | |
| "step": 1490, | |
| "valid_targets_mean": 2213.2, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 2.7181818181818183, | |
| "grad_norm": 0.5292840040904653, | |
| "learning_rate": 3.0713473634625507e-05, | |
| "loss": 0.2704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25266051292419434, | |
| "step": 1495, | |
| "valid_targets_mean": 2959.2, | |
| "valid_targets_min": 1173 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.6300182495568534, | |
| "learning_rate": 3.0636802782934146e-05, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2884886860847473, | |
| "step": 1500, | |
| "valid_targets_mean": 2169.2, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 2.7363636363636363, | |
| "grad_norm": 0.5273137367601014, | |
| "learning_rate": 3.055991333452364e-05, | |
| "loss": 0.2569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2575647234916687, | |
| "step": 1505, | |
| "valid_targets_mean": 2934.0, | |
| "valid_targets_min": 1469 | |
| }, | |
| { | |
| "epoch": 2.7454545454545456, | |
| "grad_norm": 0.48641356369382016, | |
| "learning_rate": 3.0482806869547495e-05, | |
| "loss": 0.2593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2414964735507965, | |
| "step": 1510, | |
| "valid_targets_mean": 3465.9, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 2.7545454545454544, | |
| "grad_norm": 0.5613437872799717, | |
| "learning_rate": 3.0405484972619116e-05, | |
| "loss": 0.2689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.329323410987854, | |
| "step": 1515, | |
| "valid_targets_mean": 3002.0, | |
| "valid_targets_min": 1267 | |
| }, | |
| { | |
| "epoch": 2.7636363636363637, | |
| "grad_norm": 0.5178590049360541, | |
| "learning_rate": 3.0327949232779242e-05, | |
| "loss": 0.2556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24617408215999603, | |
| "step": 1520, | |
| "valid_targets_mean": 2753.1, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 2.7727272727272725, | |
| "grad_norm": 0.5209426517091212, | |
| "learning_rate": 3.0250201243463297e-05, | |
| "loss": 0.2737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2629636824131012, | |
| "step": 1525, | |
| "valid_targets_mean": 2723.1, | |
| "valid_targets_min": 983 | |
| }, | |
| { | |
| "epoch": 2.7818181818181817, | |
| "grad_norm": 0.48623271439168053, | |
| "learning_rate": 3.0172242602468637e-05, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25595349073410034, | |
| "step": 1530, | |
| "valid_targets_mean": 3133.2, | |
| "valid_targets_min": 2159 | |
| }, | |
| { | |
| "epoch": 2.790909090909091, | |
| "grad_norm": 0.524320685222588, | |
| "learning_rate": 3.009407491192172e-05, | |
| "loss": 0.3067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3319476246833801, | |
| "step": 1535, | |
| "valid_targets_mean": 3539.6, | |
| "valid_targets_min": 1160 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.5295558188317099, | |
| "learning_rate": 3.0015699778245177e-05, | |
| "loss": 0.2797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2332993447780609, | |
| "step": 1540, | |
| "valid_targets_mean": 2543.6, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 2.809090909090909, | |
| "grad_norm": 0.5827710149024911, | |
| "learning_rate": 2.9937118812124796e-05, | |
| "loss": 0.2836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3239750266075134, | |
| "step": 1545, | |
| "valid_targets_mean": 3131.9, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 2.8181818181818183, | |
| "grad_norm": 0.5971029640504223, | |
| "learning_rate": 2.9858333628476423e-05, | |
| "loss": 0.2944, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.312186062335968, | |
| "step": 1550, | |
| "valid_targets_mean": 2550.8, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 2.827272727272727, | |
| "grad_norm": 0.6142176132505575, | |
| "learning_rate": 2.977934584641278e-05, | |
| "loss": 0.3037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34188252687454224, | |
| "step": 1555, | |
| "valid_targets_mean": 3110.6, | |
| "valid_targets_min": 1112 | |
| }, | |
| { | |
| "epoch": 2.8363636363636364, | |
| "grad_norm": 0.6646128032944204, | |
| "learning_rate": 2.9700157089210174e-05, | |
| "loss": 0.2956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31395840644836426, | |
| "step": 1560, | |
| "valid_targets_mean": 2368.2, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 2.8454545454545457, | |
| "grad_norm": 0.8465812739684272, | |
| "learning_rate": 2.9620768984275163e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2895926237106323, | |
| "step": 1565, | |
| "valid_targets_mean": 3289.6, | |
| "valid_targets_min": 1496 | |
| }, | |
| { | |
| "epoch": 2.8545454545454545, | |
| "grad_norm": 0.5724604891224522, | |
| "learning_rate": 2.9541183163111076e-05, | |
| "loss": 0.2794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2987874746322632, | |
| "step": 1570, | |
| "valid_targets_mean": 3033.1, | |
| "valid_targets_min": 878 | |
| }, | |
| { | |
| "epoch": 2.8636363636363638, | |
| "grad_norm": 0.5220424169586242, | |
| "learning_rate": 2.9461401261284536e-05, | |
| "loss": 0.2875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28746628761291504, | |
| "step": 1575, | |
| "valid_targets_mean": 3495.4, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 2.8727272727272726, | |
| "grad_norm": 0.540257485034313, | |
| "learning_rate": 2.9381424918391775e-05, | |
| "loss": 0.2746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2522783875465393, | |
| "step": 1580, | |
| "valid_targets_mean": 2805.1, | |
| "valid_targets_min": 1328 | |
| }, | |
| { | |
| "epoch": 2.881818181818182, | |
| "grad_norm": 0.6350255065226401, | |
| "learning_rate": 2.9301255778025014e-05, | |
| "loss": 0.2602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24785231053829193, | |
| "step": 1585, | |
| "valid_targets_mean": 3572.1, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 2.8909090909090907, | |
| "grad_norm": 0.6254327695227894, | |
| "learning_rate": 2.9220895487738627e-05, | |
| "loss": 0.267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3446647524833679, | |
| "step": 1590, | |
| "valid_targets_mean": 2484.1, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 0.5107669205847477, | |
| "learning_rate": 2.9140345699015328e-05, | |
| "loss": 0.2796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2758895754814148, | |
| "step": 1595, | |
| "valid_targets_mean": 2978.6, | |
| "valid_targets_min": 1163 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 0.5591042375626212, | |
| "learning_rate": 2.905960806723219e-05, | |
| "loss": 0.2731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37159761786460876, | |
| "step": 1600, | |
| "valid_targets_mean": 3184.2, | |
| "valid_targets_min": 509 | |
| }, | |
| { | |
| "epoch": 2.918181818181818, | |
| "grad_norm": 0.49905659297103566, | |
| "learning_rate": 2.8978684251626652e-05, | |
| "loss": 0.2982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26265135407447815, | |
| "step": 1605, | |
| "valid_targets_mean": 3049.9, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 2.9272727272727272, | |
| "grad_norm": 0.507734740657102, | |
| "learning_rate": 2.8897575915262418e-05, | |
| "loss": 0.2519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19236379861831665, | |
| "step": 1610, | |
| "valid_targets_mean": 2568.4, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 2.9363636363636365, | |
| "grad_norm": 0.6471719002391192, | |
| "learning_rate": 2.8816284724995273e-05, | |
| "loss": 0.2911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29607704281806946, | |
| "step": 1615, | |
| "valid_targets_mean": 2066.0, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 2.9454545454545453, | |
| "grad_norm": 0.5250874195229503, | |
| "learning_rate": 2.8734812351438823e-05, | |
| "loss": 0.264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2234722077846527, | |
| "step": 1620, | |
| "valid_targets_mean": 2377.8, | |
| "valid_targets_min": 1175 | |
| }, | |
| { | |
| "epoch": 2.9545454545454546, | |
| "grad_norm": 0.5143467837243642, | |
| "learning_rate": 2.8653160468930168e-05, | |
| "loss": 0.27, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28245672583580017, | |
| "step": 1625, | |
| "valid_targets_mean": 3324.6, | |
| "valid_targets_min": 890 | |
| }, | |
| { | |
| "epoch": 2.963636363636364, | |
| "grad_norm": 0.5154121964083332, | |
| "learning_rate": 2.85713307554955e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2525605261325836, | |
| "step": 1630, | |
| "valid_targets_mean": 2891.5, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 2.9727272727272727, | |
| "grad_norm": 0.6206896766766579, | |
| "learning_rate": 2.8489324892815604e-05, | |
| "loss": 0.2919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28195661306381226, | |
| "step": 1635, | |
| "valid_targets_mean": 2282.6, | |
| "valid_targets_min": 1306 | |
| }, | |
| { | |
| "epoch": 2.981818181818182, | |
| "grad_norm": 0.5700730166353059, | |
| "learning_rate": 2.8407144566191315e-05, | |
| "loss": 0.2677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27205920219421387, | |
| "step": 1640, | |
| "valid_targets_mean": 2825.0, | |
| "valid_targets_min": 929 | |
| }, | |
| { | |
| "epoch": 2.990909090909091, | |
| "grad_norm": 0.6002367822037568, | |
| "learning_rate": 2.8324791464508856e-05, | |
| "loss": 0.285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3318069577217102, | |
| "step": 1645, | |
| "valid_targets_mean": 2618.8, | |
| "valid_targets_min": 1390 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.4837522767492499, | |
| "learning_rate": 2.824226728020516e-05, | |
| "loss": 0.255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20618417859077454, | |
| "step": 1650, | |
| "valid_targets_mean": 2752.9, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 3.0090909090909093, | |
| "grad_norm": 0.5814855327909163, | |
| "learning_rate": 2.8159573709233074e-05, | |
| "loss": 0.2716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2657179832458496, | |
| "step": 1655, | |
| "valid_targets_mean": 2486.8, | |
| "valid_targets_min": 860 | |
| }, | |
| { | |
| "epoch": 3.018181818181818, | |
| "grad_norm": 0.5673715077261331, | |
| "learning_rate": 2.80767124510265e-05, | |
| "loss": 0.2476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2121908962726593, | |
| "step": 1660, | |
| "valid_targets_mean": 2472.4, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 3.0272727272727273, | |
| "grad_norm": 0.5580763685369373, | |
| "learning_rate": 2.7993685208465483e-05, | |
| "loss": 0.2784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2324039787054062, | |
| "step": 1665, | |
| "valid_targets_mean": 2844.7, | |
| "valid_targets_min": 1310 | |
| }, | |
| { | |
| "epoch": 3.036363636363636, | |
| "grad_norm": 0.6610751547581967, | |
| "learning_rate": 2.7910493687841213e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27891236543655396, | |
| "step": 1670, | |
| "valid_targets_mean": 2515.8, | |
| "valid_targets_min": 1426 | |
| }, | |
| { | |
| "epoch": 3.0454545454545454, | |
| "grad_norm": 0.592826560091231, | |
| "learning_rate": 2.7827139598820947e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23966637253761292, | |
| "step": 1675, | |
| "valid_targets_mean": 2399.9, | |
| "valid_targets_min": 1072 | |
| }, | |
| { | |
| "epoch": 3.0545454545454547, | |
| "grad_norm": 0.5627543784994, | |
| "learning_rate": 2.774362465441288e-05, | |
| "loss": 0.2551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2496371567249298, | |
| "step": 1680, | |
| "valid_targets_mean": 2805.5, | |
| "valid_targets_min": 953 | |
| }, | |
| { | |
| "epoch": 3.0636363636363635, | |
| "grad_norm": 0.5536164197921297, | |
| "learning_rate": 2.7659950570930956e-05, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27527642250061035, | |
| "step": 1685, | |
| "valid_targets_mean": 2894.5, | |
| "valid_targets_min": 1135 | |
| }, | |
| { | |
| "epoch": 3.0727272727272728, | |
| "grad_norm": 0.7095322622012897, | |
| "learning_rate": 2.7576119067959565e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30722740292549133, | |
| "step": 1690, | |
| "valid_targets_mean": 3261.9, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 3.081818181818182, | |
| "grad_norm": 0.4984100254988549, | |
| "learning_rate": 2.7492131868318247e-05, | |
| "loss": 0.2476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20504172146320343, | |
| "step": 1695, | |
| "valid_targets_mean": 3124.6, | |
| "valid_targets_min": 1300 | |
| }, | |
| { | |
| "epoch": 3.090909090909091, | |
| "grad_norm": 0.5730280754927857, | |
| "learning_rate": 2.7407990698026227e-05, | |
| "loss": 0.2431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2182716727256775, | |
| "step": 1700, | |
| "valid_targets_mean": 2661.8, | |
| "valid_targets_min": 839 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.5750248943907733, | |
| "learning_rate": 2.7323697286266998e-05, | |
| "loss": 0.2766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24978840351104736, | |
| "step": 1705, | |
| "valid_targets_mean": 3135.4, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 3.109090909090909, | |
| "grad_norm": 0.5049945154865161, | |
| "learning_rate": 2.7239253365352774e-05, | |
| "loss": 0.2345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19322291016578674, | |
| "step": 1710, | |
| "valid_targets_mean": 2814.9, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 3.118181818181818, | |
| "grad_norm": 0.6782663119949206, | |
| "learning_rate": 2.7154660670688867e-05, | |
| "loss": 0.2386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19753289222717285, | |
| "step": 1715, | |
| "valid_targets_mean": 2069.5, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 3.1272727272727274, | |
| "grad_norm": 0.5456342685836792, | |
| "learning_rate": 2.706992094073803e-05, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2218984067440033, | |
| "step": 1720, | |
| "valid_targets_mean": 2546.0, | |
| "valid_targets_min": 452 | |
| }, | |
| { | |
| "epoch": 3.1363636363636362, | |
| "grad_norm": 0.5152857788301313, | |
| "learning_rate": 2.6985035916984746e-05, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30218735337257385, | |
| "step": 1725, | |
| "valid_targets_mean": 3570.6, | |
| "valid_targets_min": 1913 | |
| }, | |
| { | |
| "epoch": 3.1454545454545455, | |
| "grad_norm": 0.6288155708565155, | |
| "learning_rate": 2.6900007343899414e-05, | |
| "loss": 0.2172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20587828755378723, | |
| "step": 1730, | |
| "valid_targets_mean": 2014.5, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 3.1545454545454543, | |
| "grad_norm": 0.5998489889629469, | |
| "learning_rate": 2.6814836968902535e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24592693150043488, | |
| "step": 1735, | |
| "valid_targets_mean": 2606.9, | |
| "valid_targets_min": 1093 | |
| }, | |
| { | |
| "epoch": 3.1636363636363636, | |
| "grad_norm": 0.6757102777347078, | |
| "learning_rate": 2.6729526542328755e-05, | |
| "loss": 0.2352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29409754276275635, | |
| "step": 1740, | |
| "valid_targets_mean": 2323.0, | |
| "valid_targets_min": 862 | |
| }, | |
| { | |
| "epoch": 3.172727272727273, | |
| "grad_norm": 0.5086126031637096, | |
| "learning_rate": 2.6644077817390933e-05, | |
| "loss": 0.2697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2759111523628235, | |
| "step": 1745, | |
| "valid_targets_mean": 3899.1, | |
| "valid_targets_min": 1370 | |
| }, | |
| { | |
| "epoch": 3.1818181818181817, | |
| "grad_norm": 0.9238879483582045, | |
| "learning_rate": 2.6558492550144092e-05, | |
| "loss": 0.2688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2706644535064697, | |
| "step": 1750, | |
| "valid_targets_mean": 2931.5, | |
| "valid_targets_min": 919 | |
| }, | |
| { | |
| "epoch": 3.190909090909091, | |
| "grad_norm": 0.6240061533266604, | |
| "learning_rate": 2.6472772499449323e-05, | |
| "loss": 0.2734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29832810163497925, | |
| "step": 1755, | |
| "valid_targets_mean": 2611.0, | |
| "valid_targets_min": 1297 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.6170619826973041, | |
| "learning_rate": 2.6386919426937655e-05, | |
| "loss": 0.2465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23452681303024292, | |
| "step": 1760, | |
| "valid_targets_mean": 2523.8, | |
| "valid_targets_min": 694 | |
| }, | |
| { | |
| "epoch": 3.209090909090909, | |
| "grad_norm": 0.5500235152257905, | |
| "learning_rate": 2.6300935096973858e-05, | |
| "loss": 0.2421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2703178822994232, | |
| "step": 1765, | |
| "valid_targets_mean": 3243.7, | |
| "valid_targets_min": 1548 | |
| }, | |
| { | |
| "epoch": 3.2181818181818183, | |
| "grad_norm": 0.5775975150590328, | |
| "learning_rate": 2.6214821276620157e-05, | |
| "loss": 0.2187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20802685618400574, | |
| "step": 1770, | |
| "valid_targets_mean": 2234.9, | |
| "valid_targets_min": 856 | |
| }, | |
| { | |
| "epoch": 3.227272727272727, | |
| "grad_norm": 0.513453903766835, | |
| "learning_rate": 2.6128579735599924e-05, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23952774703502655, | |
| "step": 1775, | |
| "valid_targets_mean": 2953.9, | |
| "valid_targets_min": 1459 | |
| }, | |
| { | |
| "epoch": 3.2363636363636363, | |
| "grad_norm": 0.6551665130118128, | |
| "learning_rate": 2.6042212246261337e-05, | |
| "loss": 0.2529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26932787895202637, | |
| "step": 1780, | |
| "valid_targets_mean": 2470.1, | |
| "valid_targets_min": 637 | |
| }, | |
| { | |
| "epoch": 3.2454545454545456, | |
| "grad_norm": 0.5737639500767836, | |
| "learning_rate": 2.595572058354092e-05, | |
| "loss": 0.2277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23401257395744324, | |
| "step": 1785, | |
| "valid_targets_mean": 2675.4, | |
| "valid_targets_min": 825 | |
| }, | |
| { | |
| "epoch": 3.2545454545454544, | |
| "grad_norm": 0.6311746143624856, | |
| "learning_rate": 2.5869106524927096e-05, | |
| "loss": 0.2365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26091697812080383, | |
| "step": 1790, | |
| "valid_targets_mean": 2271.6, | |
| "valid_targets_min": 963 | |
| }, | |
| { | |
| "epoch": 3.2636363636363637, | |
| "grad_norm": 0.6612760745498111, | |
| "learning_rate": 2.5782371850423627e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18565328419208527, | |
| "step": 1795, | |
| "valid_targets_mean": 2437.2, | |
| "valid_targets_min": 1132 | |
| }, | |
| { | |
| "epoch": 3.2727272727272725, | |
| "grad_norm": 0.5384296217759248, | |
| "learning_rate": 2.5695518342513047e-05, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22534595429897308, | |
| "step": 1800, | |
| "valid_targets_mean": 2859.2, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 3.2818181818181817, | |
| "grad_norm": 0.6361683766746276, | |
| "learning_rate": 2.5608547786120056e-05, | |
| "loss": 0.2424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32746827602386475, | |
| "step": 1805, | |
| "valid_targets_mean": 2984.2, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 3.290909090909091, | |
| "grad_norm": 0.617580783601667, | |
| "learning_rate": 2.55214619685748e-05, | |
| "loss": 0.2612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22853900492191315, | |
| "step": 1810, | |
| "valid_targets_mean": 2373.4, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 0.534570867677942, | |
| "learning_rate": 2.5434262679576157e-05, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18518221378326416, | |
| "step": 1815, | |
| "valid_targets_mean": 2618.7, | |
| "valid_targets_min": 1816 | |
| }, | |
| { | |
| "epoch": 3.309090909090909, | |
| "grad_norm": 0.6257866478906633, | |
| "learning_rate": 2.5346951711154946e-05, | |
| "loss": 0.2145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2663920521736145, | |
| "step": 1820, | |
| "valid_targets_mean": 2585.3, | |
| "valid_targets_min": 1607 | |
| }, | |
| { | |
| "epoch": 3.3181818181818183, | |
| "grad_norm": 0.6910737869035166, | |
| "learning_rate": 2.5259530857637125e-05, | |
| "loss": 0.2338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2623439431190491, | |
| "step": 1825, | |
| "valid_targets_mean": 3113.5, | |
| "valid_targets_min": 885 | |
| }, | |
| { | |
| "epoch": 3.327272727272727, | |
| "grad_norm": 0.6127138768541329, | |
| "learning_rate": 2.5172001915606883e-05, | |
| "loss": 0.2576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32933688163757324, | |
| "step": 1830, | |
| "valid_targets_mean": 3152.8, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 3.3363636363636364, | |
| "grad_norm": 0.6045062207311135, | |
| "learning_rate": 2.5084366683869746e-05, | |
| "loss": 0.2338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22119587659835815, | |
| "step": 1835, | |
| "valid_targets_mean": 2336.5, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 3.3454545454545457, | |
| "grad_norm": 0.559110510072633, | |
| "learning_rate": 2.4996626963415577e-05, | |
| "loss": 0.2568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2486972063779831, | |
| "step": 1840, | |
| "valid_targets_mean": 3022.8, | |
| "valid_targets_min": 1146 | |
| }, | |
| { | |
| "epoch": 3.3545454545454545, | |
| "grad_norm": 0.6611782439671358, | |
| "learning_rate": 2.4908784557381616e-05, | |
| "loss": 0.2635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23325376212596893, | |
| "step": 1845, | |
| "valid_targets_mean": 2603.2, | |
| "valid_targets_min": 1023 | |
| }, | |
| { | |
| "epoch": 3.3636363636363638, | |
| "grad_norm": 0.592192383324759, | |
| "learning_rate": 2.4820841271015364e-05, | |
| "loss": 0.2374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2461732029914856, | |
| "step": 1850, | |
| "valid_targets_mean": 2866.9, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 3.3727272727272726, | |
| "grad_norm": 0.6205836521900234, | |
| "learning_rate": 2.4732798911637525e-05, | |
| "loss": 0.2649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2674775719642639, | |
| "step": 1855, | |
| "valid_targets_mean": 2762.9, | |
| "valid_targets_min": 1064 | |
| }, | |
| { | |
| "epoch": 3.381818181818182, | |
| "grad_norm": 0.5875967584148674, | |
| "learning_rate": 2.4644659288604853e-05, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20846202969551086, | |
| "step": 1860, | |
| "valid_targets_mean": 2467.1, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 3.390909090909091, | |
| "grad_norm": 0.5459079349622694, | |
| "learning_rate": 2.4556424213272955e-05, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18211738765239716, | |
| "step": 1865, | |
| "valid_targets_mean": 2467.2, | |
| "valid_targets_min": 1189 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.6271200037020991, | |
| "learning_rate": 2.4468095498959086e-05, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26200708746910095, | |
| "step": 1870, | |
| "valid_targets_mean": 2224.4, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 3.409090909090909, | |
| "grad_norm": 0.6109998547395447, | |
| "learning_rate": 2.4379674960904867e-05, | |
| "loss": 0.2733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2885187268257141, | |
| "step": 1875, | |
| "valid_targets_mean": 2711.4, | |
| "valid_targets_min": 1059 | |
| }, | |
| { | |
| "epoch": 3.418181818181818, | |
| "grad_norm": 0.6505678700735392, | |
| "learning_rate": 2.4291164416238994e-05, | |
| "loss": 0.252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2580758333206177, | |
| "step": 1880, | |
| "valid_targets_mean": 2137.9, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 3.4272727272727272, | |
| "grad_norm": 0.5212654882972313, | |
| "learning_rate": 2.4202565683939872e-05, | |
| "loss": 0.2328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22886228561401367, | |
| "step": 1885, | |
| "valid_targets_mean": 3005.4, | |
| "valid_targets_min": 1378 | |
| }, | |
| { | |
| "epoch": 3.4363636363636365, | |
| "grad_norm": 0.5353927980487402, | |
| "learning_rate": 2.411388058479827e-05, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27620387077331543, | |
| "step": 1890, | |
| "valid_targets_mean": 3240.9, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 3.4454545454545453, | |
| "grad_norm": 0.6708922860894471, | |
| "learning_rate": 2.402511094137987e-05, | |
| "loss": 0.2378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2250135838985443, | |
| "step": 1895, | |
| "valid_targets_mean": 2237.9, | |
| "valid_targets_min": 1056 | |
| }, | |
| { | |
| "epoch": 3.4545454545454546, | |
| "grad_norm": 0.574944483109974, | |
| "learning_rate": 2.3936258577987807e-05, | |
| "loss": 0.25, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23620785772800446, | |
| "step": 1900, | |
| "valid_targets_mean": 2808.0, | |
| "valid_targets_min": 1004 | |
| }, | |
| { | |
| "epoch": 3.463636363636364, | |
| "grad_norm": 0.6494130584639681, | |
| "learning_rate": 2.3847325320625223e-05, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23063689470291138, | |
| "step": 1905, | |
| "valid_targets_mean": 2713.1, | |
| "valid_targets_min": 705 | |
| }, | |
| { | |
| "epoch": 3.4727272727272727, | |
| "grad_norm": 0.5567043968943757, | |
| "learning_rate": 2.3758312996957676e-05, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19316011667251587, | |
| "step": 1910, | |
| "valid_targets_mean": 2498.1, | |
| "valid_targets_min": 1214 | |
| }, | |
| { | |
| "epoch": 3.481818181818182, | |
| "grad_norm": 0.6610141845828834, | |
| "learning_rate": 2.366922343627565e-05, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27701085805892944, | |
| "step": 1915, | |
| "valid_targets_mean": 2537.4, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 3.4909090909090907, | |
| "grad_norm": 0.6783407015279428, | |
| "learning_rate": 2.358005846945689e-05, | |
| "loss": 0.2613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30464065074920654, | |
| "step": 1920, | |
| "valid_targets_mean": 3082.9, | |
| "valid_targets_min": 1161 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.6172433110892577, | |
| "learning_rate": 2.349081992892885e-05, | |
| "loss": 0.2377, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22736988961696625, | |
| "step": 1925, | |
| "valid_targets_mean": 2372.8, | |
| "valid_targets_min": 1144 | |
| }, | |
| { | |
| "epoch": 3.509090909090909, | |
| "grad_norm": 0.6146777979649554, | |
| "learning_rate": 2.3401509648630954e-05, | |
| "loss": 0.2244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2371920943260193, | |
| "step": 1930, | |
| "valid_targets_mean": 2275.1, | |
| "valid_targets_min": 1084 | |
| }, | |
| { | |
| "epoch": 3.518181818181818, | |
| "grad_norm": 0.6145076071221921, | |
| "learning_rate": 2.331212946397698e-05, | |
| "loss": 0.2315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27125149965286255, | |
| "step": 1935, | |
| "valid_targets_mean": 2734.1, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 3.5272727272727273, | |
| "grad_norm": 0.6210041121322781, | |
| "learning_rate": 2.3222681211817287e-05, | |
| "loss": 0.2387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2806406319141388, | |
| "step": 1940, | |
| "valid_targets_mean": 2847.0, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 3.536363636363636, | |
| "grad_norm": 0.6803398191009827, | |
| "learning_rate": 2.31331667304011e-05, | |
| "loss": 0.2648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2533523440361023, | |
| "step": 1945, | |
| "valid_targets_mean": 2696.0, | |
| "valid_targets_min": 1044 | |
| }, | |
| { | |
| "epoch": 3.5454545454545454, | |
| "grad_norm": 0.5679755520761927, | |
| "learning_rate": 2.3043587859338735e-05, | |
| "loss": 0.2282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19747194647789001, | |
| "step": 1950, | |
| "valid_targets_mean": 2299.3, | |
| "valid_targets_min": 1356 | |
| }, | |
| { | |
| "epoch": 3.5545454545454547, | |
| "grad_norm": 0.5612673714925396, | |
| "learning_rate": 2.2953946439563736e-05, | |
| "loss": 0.2566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24574324488639832, | |
| "step": 1955, | |
| "valid_targets_mean": 3264.1, | |
| "valid_targets_min": 854 | |
| }, | |
| { | |
| "epoch": 3.5636363636363635, | |
| "grad_norm": 0.41355474011693366, | |
| "learning_rate": 2.286424431329513e-05, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20943892002105713, | |
| "step": 1960, | |
| "valid_targets_mean": 4555.5, | |
| "valid_targets_min": 1176 | |
| }, | |
| { | |
| "epoch": 3.5727272727272728, | |
| "grad_norm": 0.6210477048431411, | |
| "learning_rate": 2.277448332399949e-05, | |
| "loss": 0.2421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31793659925460815, | |
| "step": 1965, | |
| "valid_targets_mean": 3210.2, | |
| "valid_targets_min": 1783 | |
| }, | |
| { | |
| "epoch": 3.581818181818182, | |
| "grad_norm": 0.6810529070723943, | |
| "learning_rate": 2.2684665316353112e-05, | |
| "loss": 0.2648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29948270320892334, | |
| "step": 1970, | |
| "valid_targets_mean": 2187.7, | |
| "valid_targets_min": 996 | |
| }, | |
| { | |
| "epoch": 3.590909090909091, | |
| "grad_norm": 0.6515936749324243, | |
| "learning_rate": 2.2594792136204037e-05, | |
| "loss": 0.241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.222039595246315, | |
| "step": 1975, | |
| "valid_targets_mean": 1998.1, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.5732736942837675, | |
| "learning_rate": 2.250486563053419e-05, | |
| "loss": 0.2346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20268699526786804, | |
| "step": 1980, | |
| "valid_targets_mean": 2295.4, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 3.6090909090909093, | |
| "grad_norm": 0.5688575835690776, | |
| "learning_rate": 2.241488764742135e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25517329573631287, | |
| "step": 1985, | |
| "valid_targets_mean": 3159.9, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 3.618181818181818, | |
| "grad_norm": 0.5306274568561311, | |
| "learning_rate": 2.232486003600126e-05, | |
| "loss": 0.2328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22559969127178192, | |
| "step": 1990, | |
| "valid_targets_mean": 2713.6, | |
| "valid_targets_min": 1392 | |
| }, | |
| { | |
| "epoch": 3.6272727272727274, | |
| "grad_norm": 0.5048301936353792, | |
| "learning_rate": 2.223478464642952e-05, | |
| "loss": 0.2398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18586412072181702, | |
| "step": 1995, | |
| "valid_targets_mean": 2830.0, | |
| "valid_targets_min": 1063 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 0.6212436631613655, | |
| "learning_rate": 2.2144663329843653e-05, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27123504877090454, | |
| "step": 2000, | |
| "valid_targets_mean": 2453.6, | |
| "valid_targets_min": 933 | |
| }, | |
| { | |
| "epoch": 3.6454545454545455, | |
| "grad_norm": 0.590331837924596, | |
| "learning_rate": 2.205449793832502e-05, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2864503264427185, | |
| "step": 2005, | |
| "valid_targets_mean": 2854.1, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 3.6545454545454543, | |
| "grad_norm": 0.5585937656084462, | |
| "learning_rate": 2.1964290324860746e-05, | |
| "loss": 0.2549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23903991281986237, | |
| "step": 2010, | |
| "valid_targets_mean": 2668.5, | |
| "valid_targets_min": 907 | |
| }, | |
| { | |
| "epoch": 3.6636363636363636, | |
| "grad_norm": 0.5514489995806369, | |
| "learning_rate": 2.1874042343305685e-05, | |
| "loss": 0.2517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22939041256904602, | |
| "step": 2015, | |
| "valid_targets_mean": 2583.8, | |
| "valid_targets_min": 1316 | |
| }, | |
| { | |
| "epoch": 3.672727272727273, | |
| "grad_norm": 0.5978802005083812, | |
| "learning_rate": 2.1783755848344276e-05, | |
| "loss": 0.2305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24126064777374268, | |
| "step": 2020, | |
| "valid_targets_mean": 2730.9, | |
| "valid_targets_min": 1473 | |
| }, | |
| { | |
| "epoch": 3.6818181818181817, | |
| "grad_norm": 0.53696951487926, | |
| "learning_rate": 2.1693432695452467e-05, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22672604024410248, | |
| "step": 2025, | |
| "valid_targets_mean": 2962.2, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 3.690909090909091, | |
| "grad_norm": 0.7675292875811535, | |
| "learning_rate": 2.1603074740859534e-05, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21047256886959076, | |
| "step": 2030, | |
| "valid_targets_mean": 2160.3, | |
| "valid_targets_min": 1081 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 0.5006566376336993, | |
| "learning_rate": 2.1512683841509982e-05, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21263140439987183, | |
| "step": 2035, | |
| "valid_targets_mean": 3089.0, | |
| "valid_targets_min": 818 | |
| }, | |
| { | |
| "epoch": 3.709090909090909, | |
| "grad_norm": 0.56714645986811, | |
| "learning_rate": 2.1422261855025357e-05, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21199214458465576, | |
| "step": 2040, | |
| "valid_targets_mean": 2500.3, | |
| "valid_targets_min": 1225 | |
| }, | |
| { | |
| "epoch": 3.7181818181818183, | |
| "grad_norm": 0.6494410349611806, | |
| "learning_rate": 2.133181063966608e-05, | |
| "loss": 0.26, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.281173974275589, | |
| "step": 2045, | |
| "valid_targets_mean": 3436.6, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 3.7272727272727275, | |
| "grad_norm": 0.612579658009119, | |
| "learning_rate": 2.1241332054293243e-05, | |
| "loss": 0.24, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26280784606933594, | |
| "step": 2050, | |
| "valid_targets_mean": 2701.9, | |
| "valid_targets_min": 1491 | |
| }, | |
| { | |
| "epoch": 3.7363636363636363, | |
| "grad_norm": 0.6118786743650745, | |
| "learning_rate": 2.115082795833044e-05, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23389646410942078, | |
| "step": 2055, | |
| "valid_targets_mean": 2522.9, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 3.7454545454545456, | |
| "grad_norm": 0.6062010706814313, | |
| "learning_rate": 2.1060300211725496e-05, | |
| "loss": 0.233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2481895238161087, | |
| "step": 2060, | |
| "valid_targets_mean": 3095.2, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 3.7545454545454544, | |
| "grad_norm": 0.5047024357662666, | |
| "learning_rate": 2.096975067491233e-05, | |
| "loss": 0.264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.257171630859375, | |
| "step": 2065, | |
| "valid_targets_mean": 3419.8, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 3.7636363636363637, | |
| "grad_norm": 0.7895766683015748, | |
| "learning_rate": 2.087918120877263e-05, | |
| "loss": 0.2674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3457374572753906, | |
| "step": 2070, | |
| "valid_targets_mean": 3181.0, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 3.7727272727272725, | |
| "grad_norm": 0.5562767734285217, | |
| "learning_rate": 2.0788593674597663e-05, | |
| "loss": 0.2401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25369274616241455, | |
| "step": 2075, | |
| "valid_targets_mean": 3111.6, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 3.7818181818181817, | |
| "grad_norm": 0.6009442711262772, | |
| "learning_rate": 2.0697989934050025e-05, | |
| "loss": 0.2387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23003512620925903, | |
| "step": 2080, | |
| "valid_targets_mean": 2796.6, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 3.790909090909091, | |
| "grad_norm": 0.5951511305351987, | |
| "learning_rate": 2.0607371849125345e-05, | |
| "loss": 0.2625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3040267825126648, | |
| "step": 2085, | |
| "valid_targets_mean": 2972.1, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.5979813146299011, | |
| "learning_rate": 2.0516741282114062e-05, | |
| "loss": 0.2421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23099355399608612, | |
| "step": 2090, | |
| "valid_targets_mean": 2213.4, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 3.809090909090909, | |
| "grad_norm": 0.6242364839677382, | |
| "learning_rate": 2.0426100095563132e-05, | |
| "loss": 0.2593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22831767797470093, | |
| "step": 2095, | |
| "valid_targets_mean": 2692.9, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 3.8181818181818183, | |
| "grad_norm": 0.7299492478429815, | |
| "learning_rate": 2.0335450152237742e-05, | |
| "loss": 0.2376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2542460858821869, | |
| "step": 2100, | |
| "valid_targets_mean": 2546.2, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 3.827272727272727, | |
| "grad_norm": 0.6774426739953928, | |
| "learning_rate": 2.0244793315083043e-05, | |
| "loss": 0.2562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23378236591815948, | |
| "step": 2105, | |
| "valid_targets_mean": 3035.1, | |
| "valid_targets_min": 1604 | |
| }, | |
| { | |
| "epoch": 3.8363636363636364, | |
| "grad_norm": 0.6463334964796907, | |
| "learning_rate": 2.0154131447185876e-05, | |
| "loss": 0.25, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21351692080497742, | |
| "step": 2110, | |
| "valid_targets_mean": 2355.0, | |
| "valid_targets_min": 1037 | |
| }, | |
| { | |
| "epoch": 3.8454545454545457, | |
| "grad_norm": 1.0600862632862642, | |
| "learning_rate": 2.0063466411736447e-05, | |
| "loss": 0.2363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21131891012191772, | |
| "step": 2115, | |
| "valid_targets_mean": 2814.6, | |
| "valid_targets_min": 1024 | |
| }, | |
| { | |
| "epoch": 3.8545454545454545, | |
| "grad_norm": 0.5765173759608054, | |
| "learning_rate": 1.997280007199008e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2817190885543823, | |
| "step": 2120, | |
| "valid_targets_mean": 2889.8, | |
| "valid_targets_min": 1483 | |
| }, | |
| { | |
| "epoch": 3.8636363636363638, | |
| "grad_norm": 0.5816941751168061, | |
| "learning_rate": 1.9882134291228877e-05, | |
| "loss": 0.2444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18958780169487, | |
| "step": 2125, | |
| "valid_targets_mean": 2203.9, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 3.8727272727272726, | |
| "grad_norm": 0.594692421630156, | |
| "learning_rate": 1.9791470932723486e-05, | |
| "loss": 0.2777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2956348657608032, | |
| "step": 2130, | |
| "valid_targets_mean": 2760.8, | |
| "valid_targets_min": 890 | |
| }, | |
| { | |
| "epoch": 3.881818181818182, | |
| "grad_norm": 0.5323715954369375, | |
| "learning_rate": 1.9700811859694734e-05, | |
| "loss": 0.246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22484076023101807, | |
| "step": 2135, | |
| "valid_targets_mean": 3079.9, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 3.8909090909090907, | |
| "grad_norm": 0.6003638416089673, | |
| "learning_rate": 1.961015893527541e-05, | |
| "loss": 0.2355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22621342539787292, | |
| "step": 2140, | |
| "valid_targets_mean": 2506.4, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 0.6244575549178024, | |
| "learning_rate": 1.9519514022471933e-05, | |
| "loss": 0.2311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24318262934684753, | |
| "step": 2145, | |
| "valid_targets_mean": 2432.9, | |
| "valid_targets_min": 985 | |
| }, | |
| { | |
| "epoch": 3.909090909090909, | |
| "grad_norm": 0.591385714850753, | |
| "learning_rate": 1.942887898412608e-05, | |
| "loss": 0.2367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2611646056175232, | |
| "step": 2150, | |
| "valid_targets_mean": 2616.4, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 3.918181818181818, | |
| "grad_norm": 0.5568234541673653, | |
| "learning_rate": 1.9338255682876682e-05, | |
| "loss": 0.2428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2828320264816284, | |
| "step": 2155, | |
| "valid_targets_mean": 3327.7, | |
| "valid_targets_min": 1790 | |
| }, | |
| { | |
| "epoch": 3.9272727272727272, | |
| "grad_norm": 0.6157245144679648, | |
| "learning_rate": 1.924764598112138e-05, | |
| "loss": 0.2593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3046063780784607, | |
| "step": 2160, | |
| "valid_targets_mean": 2769.7, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 3.9363636363636365, | |
| "grad_norm": 0.5875934472439608, | |
| "learning_rate": 1.9157051740978326e-05, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32576432824134827, | |
| "step": 2165, | |
| "valid_targets_mean": 2968.4, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 3.9454545454545453, | |
| "grad_norm": 0.5789081321587037, | |
| "learning_rate": 1.9066474824247913e-05, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2773364186286926, | |
| "step": 2170, | |
| "valid_targets_mean": 2954.2, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 3.9545454545454546, | |
| "grad_norm": 0.5559791711562166, | |
| "learning_rate": 1.8975917092374542e-05, | |
| "loss": 0.235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26809555292129517, | |
| "step": 2175, | |
| "valid_targets_mean": 3171.6, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 3.963636363636364, | |
| "grad_norm": 0.5597868437982705, | |
| "learning_rate": 1.888538040640831e-05, | |
| "loss": 0.2554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27784445881843567, | |
| "step": 2180, | |
| "valid_targets_mean": 2788.2, | |
| "valid_targets_min": 1314 | |
| }, | |
| { | |
| "epoch": 3.9727272727272727, | |
| "grad_norm": 0.5351246359697984, | |
| "learning_rate": 1.8794866626966834e-05, | |
| "loss": 0.2269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21700698137283325, | |
| "step": 2185, | |
| "valid_targets_mean": 3254.6, | |
| "valid_targets_min": 1469 | |
| }, | |
| { | |
| "epoch": 3.981818181818182, | |
| "grad_norm": 0.5965100509204639, | |
| "learning_rate": 1.8704377614196963e-05, | |
| "loss": 0.2584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27556589245796204, | |
| "step": 2190, | |
| "valid_targets_mean": 3114.7, | |
| "valid_targets_min": 1228 | |
| }, | |
| { | |
| "epoch": 3.990909090909091, | |
| "grad_norm": 0.6058379024886905, | |
| "learning_rate": 1.8613915227736584e-05, | |
| "loss": 0.2358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28704237937927246, | |
| "step": 2195, | |
| "valid_targets_mean": 2558.3, | |
| "valid_targets_min": 1185 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.5354214291936074, | |
| "learning_rate": 1.852348132667635e-05, | |
| "loss": 0.2383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21756961941719055, | |
| "step": 2200, | |
| "valid_targets_mean": 2745.4, | |
| "valid_targets_min": 1410 | |
| }, | |
| { | |
| "epoch": 4.009090909090909, | |
| "grad_norm": 0.63811517358803, | |
| "learning_rate": 1.843307776952155e-05, | |
| "loss": 0.2401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24117009341716766, | |
| "step": 2205, | |
| "valid_targets_mean": 2653.8, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 4.0181818181818185, | |
| "grad_norm": 0.5976703740808316, | |
| "learning_rate": 1.834270641415386e-05, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2388264387845993, | |
| "step": 2210, | |
| "valid_targets_mean": 3296.8, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 4.027272727272727, | |
| "grad_norm": 0.6301202500239508, | |
| "learning_rate": 1.8252369117793172e-05, | |
| "loss": 0.2316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25437045097351074, | |
| "step": 2215, | |
| "valid_targets_mean": 2990.8, | |
| "valid_targets_min": 963 | |
| }, | |
| { | |
| "epoch": 4.036363636363636, | |
| "grad_norm": 0.6135121353685487, | |
| "learning_rate": 1.8162067736959454e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2367469221353531, | |
| "step": 2220, | |
| "valid_targets_mean": 2717.5, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 4.045454545454546, | |
| "grad_norm": 0.5929711801863091, | |
| "learning_rate": 1.8071804127434545e-05, | |
| "loss": 0.2188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22404639422893524, | |
| "step": 2225, | |
| "valid_targets_mean": 2649.3, | |
| "valid_targets_min": 1056 | |
| }, | |
| { | |
| "epoch": 4.054545454545455, | |
| "grad_norm": 0.5604357256623951, | |
| "learning_rate": 1.7981580144224066e-05, | |
| "loss": 0.2318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21794027090072632, | |
| "step": 2230, | |
| "valid_targets_mean": 3121.7, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 4.0636363636363635, | |
| "grad_norm": 0.602733331600872, | |
| "learning_rate": 1.7891397641519272e-05, | |
| "loss": 0.211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22328242659568787, | |
| "step": 2235, | |
| "valid_targets_mean": 2848.9, | |
| "valid_targets_min": 1513 | |
| }, | |
| { | |
| "epoch": 4.072727272727272, | |
| "grad_norm": 0.6114398983536747, | |
| "learning_rate": 1.7801258472658964e-05, | |
| "loss": 0.2193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22472426295280457, | |
| "step": 2240, | |
| "valid_targets_mean": 2859.0, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 4.081818181818182, | |
| "grad_norm": 0.7949075687643492, | |
| "learning_rate": 1.7711164490091365e-05, | |
| "loss": 0.21, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22244544327259064, | |
| "step": 2245, | |
| "valid_targets_mean": 2770.4, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 4.090909090909091, | |
| "grad_norm": 0.5050344354024722, | |
| "learning_rate": 1.7621117545336098e-05, | |
| "loss": 0.2201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22612264752388, | |
| "step": 2250, | |
| "valid_targets_mean": 3871.1, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 0.5867166500193305, | |
| "learning_rate": 1.7531119488946107e-05, | |
| "loss": 0.2244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21166576445102692, | |
| "step": 2255, | |
| "valid_targets_mean": 3124.7, | |
| "valid_targets_min": 1342 | |
| }, | |
| { | |
| "epoch": 4.109090909090909, | |
| "grad_norm": 0.7449351531483703, | |
| "learning_rate": 1.7441172170469634e-05, | |
| "loss": 0.2316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22294163703918457, | |
| "step": 2260, | |
| "valid_targets_mean": 2283.2, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 4.118181818181818, | |
| "grad_norm": 0.5806733663439438, | |
| "learning_rate": 1.7351277438412197e-05, | |
| "loss": 0.2336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16949409246444702, | |
| "step": 2265, | |
| "valid_targets_mean": 2851.5, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 4.127272727272727, | |
| "grad_norm": 0.6484255092516409, | |
| "learning_rate": 1.726143714019862e-05, | |
| "loss": 0.2262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22443990409374237, | |
| "step": 2270, | |
| "valid_targets_mean": 2461.2, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 4.136363636363637, | |
| "grad_norm": 0.7029797028928565, | |
| "learning_rate": 1.7171653122135065e-05, | |
| "loss": 0.2386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23379763960838318, | |
| "step": 2275, | |
| "valid_targets_mean": 2964.6, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 4.1454545454545455, | |
| "grad_norm": 0.6487919761685919, | |
| "learning_rate": 1.708192722937106e-05, | |
| "loss": 0.2327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24210688471794128, | |
| "step": 2280, | |
| "valid_targets_mean": 2856.1, | |
| "valid_targets_min": 982 | |
| }, | |
| { | |
| "epoch": 4.154545454545454, | |
| "grad_norm": 0.5408810770304529, | |
| "learning_rate": 1.6992261305861635e-05, | |
| "loss": 0.22, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19399434328079224, | |
| "step": 2285, | |
| "valid_targets_mean": 3258.6, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 4.163636363636364, | |
| "grad_norm": 0.542802352358838, | |
| "learning_rate": 1.6902657194329357e-05, | |
| "loss": 0.239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.261577308177948, | |
| "step": 2290, | |
| "valid_targets_mean": 3440.7, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 4.172727272727273, | |
| "grad_norm": 0.5891703591156177, | |
| "learning_rate": 1.681311673622651e-05, | |
| "loss": 0.2367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18795019388198853, | |
| "step": 2295, | |
| "valid_targets_mean": 2520.1, | |
| "valid_targets_min": 1166 | |
| }, | |
| { | |
| "epoch": 4.181818181818182, | |
| "grad_norm": 0.6252714768421596, | |
| "learning_rate": 1.6723641771697246e-05, | |
| "loss": 0.2308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23848536610603333, | |
| "step": 2300, | |
| "valid_targets_mean": 2629.7, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 4.190909090909091, | |
| "grad_norm": 0.6719808729681732, | |
| "learning_rate": 1.663423413953976e-05, | |
| "loss": 0.2227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22473207116127014, | |
| "step": 2305, | |
| "valid_targets_mean": 2721.4, | |
| "valid_targets_min": 1066 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.6968029217292044, | |
| "learning_rate": 1.6544895677168483e-05, | |
| "loss": 0.2203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23242871463298798, | |
| "step": 2310, | |
| "valid_targets_mean": 2373.2, | |
| "valid_targets_min": 1193 | |
| }, | |
| { | |
| "epoch": 4.209090909090909, | |
| "grad_norm": 0.5804420649431439, | |
| "learning_rate": 1.6455628220576357e-05, | |
| "loss": 0.2122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23284761607646942, | |
| "step": 2315, | |
| "valid_targets_mean": 3136.9, | |
| "valid_targets_min": 1284 | |
| }, | |
| { | |
| "epoch": 4.218181818181818, | |
| "grad_norm": 0.6666888014451425, | |
| "learning_rate": 1.6366433604297072e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26939576864242554, | |
| "step": 2320, | |
| "valid_targets_mean": 2871.4, | |
| "valid_targets_min": 795 | |
| }, | |
| { | |
| "epoch": 4.2272727272727275, | |
| "grad_norm": 0.5785967965305059, | |
| "learning_rate": 1.62773136613674e-05, | |
| "loss": 0.2232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24148644506931305, | |
| "step": 2325, | |
| "valid_targets_mean": 2955.4, | |
| "valid_targets_min": 1222 | |
| }, | |
| { | |
| "epoch": 4.236363636363636, | |
| "grad_norm": 0.5265728273630983, | |
| "learning_rate": 1.6188270223289483e-05, | |
| "loss": 0.204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19100099802017212, | |
| "step": 2330, | |
| "valid_targets_mean": 3084.8, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 4.245454545454545, | |
| "grad_norm": 0.644345992130894, | |
| "learning_rate": 1.609930511999321e-05, | |
| "loss": 0.2379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2727709412574768, | |
| "step": 2335, | |
| "valid_targets_mean": 3700.1, | |
| "valid_targets_min": 1440 | |
| }, | |
| { | |
| "epoch": 4.254545454545455, | |
| "grad_norm": 0.5736669297714321, | |
| "learning_rate": 1.6010420179798623e-05, | |
| "loss": 0.2166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2274719476699829, | |
| "step": 2340, | |
| "valid_targets_mean": 2947.7, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 4.263636363636364, | |
| "grad_norm": 0.4854164691385361, | |
| "learning_rate": 1.5921617229378338e-05, | |
| "loss": 0.2429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25527408719062805, | |
| "step": 2345, | |
| "valid_targets_mean": 4699.1, | |
| "valid_targets_min": 927 | |
| }, | |
| { | |
| "epoch": 4.2727272727272725, | |
| "grad_norm": 0.6550718663752602, | |
| "learning_rate": 1.583289809372e-05, | |
| "loss": 0.2299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19673293828964233, | |
| "step": 2350, | |
| "valid_targets_mean": 2131.5, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 4.281818181818182, | |
| "grad_norm": 0.6070306508847103, | |
| "learning_rate": 1.5744264596088763e-05, | |
| "loss": 0.2277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2816806435585022, | |
| "step": 2355, | |
| "valid_targets_mean": 3366.7, | |
| "valid_targets_min": 818 | |
| }, | |
| { | |
| "epoch": 4.290909090909091, | |
| "grad_norm": 0.6690799082664834, | |
| "learning_rate": 1.5655718557989848e-05, | |
| "loss": 0.2393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2496490776538849, | |
| "step": 2360, | |
| "valid_targets_mean": 2725.5, | |
| "valid_targets_min": 1070 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 0.5695334533687919, | |
| "learning_rate": 1.5567261799131102e-05, | |
| "loss": 0.2307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25420039892196655, | |
| "step": 2365, | |
| "valid_targets_mean": 3258.9, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 4.309090909090909, | |
| "grad_norm": 0.6093414327334828, | |
| "learning_rate": 1.5478896137385584e-05, | |
| "loss": 0.2182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2125495821237564, | |
| "step": 2370, | |
| "valid_targets_mean": 2488.6, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 4.318181818181818, | |
| "grad_norm": 0.6250240012975116, | |
| "learning_rate": 1.5390623388754232e-05, | |
| "loss": 0.2196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2510995864868164, | |
| "step": 2375, | |
| "valid_targets_mean": 3127.1, | |
| "valid_targets_min": 1117 | |
| }, | |
| { | |
| "epoch": 4.327272727272727, | |
| "grad_norm": 0.5186773991836232, | |
| "learning_rate": 1.5302445367328507e-05, | |
| "loss": 0.2063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19779804348945618, | |
| "step": 2380, | |
| "valid_targets_mean": 3112.1, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 4.336363636363636, | |
| "grad_norm": 0.6104190805746833, | |
| "learning_rate": 1.5214363885253156e-05, | |
| "loss": 0.2564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2571101784706116, | |
| "step": 2385, | |
| "valid_targets_mean": 3122.3, | |
| "valid_targets_min": 1132 | |
| }, | |
| { | |
| "epoch": 4.345454545454546, | |
| "grad_norm": 0.642267748594081, | |
| "learning_rate": 1.5126380752688934e-05, | |
| "loss": 0.2371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21655862033367157, | |
| "step": 2390, | |
| "valid_targets_mean": 2603.0, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 4.3545454545454545, | |
| "grad_norm": 0.9587976914904419, | |
| "learning_rate": 1.503849777777543e-05, | |
| "loss": 0.2337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2582545876502991, | |
| "step": 2395, | |
| "valid_targets_mean": 1959.4, | |
| "valid_targets_min": 660 | |
| }, | |
| { | |
| "epoch": 4.363636363636363, | |
| "grad_norm": 0.6358892336631516, | |
| "learning_rate": 1.4950716766593872e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34545302391052246, | |
| "step": 2400, | |
| "valid_targets_mean": 3421.1, | |
| "valid_targets_min": 1122 | |
| }, | |
| { | |
| "epoch": 4.372727272727273, | |
| "grad_norm": 0.5952265307192156, | |
| "learning_rate": 1.4863039523130054e-05, | |
| "loss": 0.2322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2792406678199768, | |
| "step": 2405, | |
| "valid_targets_mean": 3371.1, | |
| "valid_targets_min": 1372 | |
| }, | |
| { | |
| "epoch": 4.381818181818182, | |
| "grad_norm": 0.5570515523505959, | |
| "learning_rate": 1.4775467849237234e-05, | |
| "loss": 0.2185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1831461489200592, | |
| "step": 2410, | |
| "valid_targets_mean": 2825.3, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 4.390909090909091, | |
| "grad_norm": 0.5578379745764596, | |
| "learning_rate": 1.468800354459912e-05, | |
| "loss": 0.2505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2448475956916809, | |
| "step": 2415, | |
| "valid_targets_mean": 3258.2, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.6261795588390912, | |
| "learning_rate": 1.4600648406692863e-05, | |
| "loss": 0.2201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2323095053434372, | |
| "step": 2420, | |
| "valid_targets_mean": 2830.1, | |
| "valid_targets_min": 986 | |
| }, | |
| { | |
| "epoch": 4.409090909090909, | |
| "grad_norm": 0.6684524920292939, | |
| "learning_rate": 1.451340423075214e-05, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24161775410175323, | |
| "step": 2425, | |
| "valid_targets_mean": 2482.1, | |
| "valid_targets_min": 934 | |
| }, | |
| { | |
| "epoch": 4.418181818181818, | |
| "grad_norm": 0.7897405656539278, | |
| "learning_rate": 1.4426272809730248e-05, | |
| "loss": 0.2275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24546557664871216, | |
| "step": 2430, | |
| "valid_targets_mean": 3149.2, | |
| "valid_targets_min": 1224 | |
| }, | |
| { | |
| "epoch": 4.427272727272728, | |
| "grad_norm": 0.6417621177432642, | |
| "learning_rate": 1.433925593426326e-05, | |
| "loss": 0.1884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18926918506622314, | |
| "step": 2435, | |
| "valid_targets_mean": 2471.9, | |
| "valid_targets_min": 1215 | |
| }, | |
| { | |
| "epoch": 4.4363636363636365, | |
| "grad_norm": 0.5720794355531269, | |
| "learning_rate": 1.4252355392633237e-05, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2298586070537567, | |
| "step": 2440, | |
| "valid_targets_mean": 2885.1, | |
| "valid_targets_min": 1001 | |
| }, | |
| { | |
| "epoch": 4.445454545454545, | |
| "grad_norm": 0.6127734036606598, | |
| "learning_rate": 1.4165572970731435e-05, | |
| "loss": 0.232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25842714309692383, | |
| "step": 2445, | |
| "valid_targets_mean": 3069.7, | |
| "valid_targets_min": 1390 | |
| }, | |
| { | |
| "epoch": 4.454545454545454, | |
| "grad_norm": 0.6598183505433317, | |
| "learning_rate": 1.4078910452021664e-05, | |
| "loss": 0.2309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26327598094940186, | |
| "step": 2450, | |
| "valid_targets_mean": 3190.2, | |
| "valid_targets_min": 1526 | |
| }, | |
| { | |
| "epoch": 4.463636363636364, | |
| "grad_norm": 0.5855757963676725, | |
| "learning_rate": 1.3992369617503594e-05, | |
| "loss": 0.2145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2034580111503601, | |
| "step": 2455, | |
| "valid_targets_mean": 2694.4, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 4.472727272727273, | |
| "grad_norm": 0.7452063256908972, | |
| "learning_rate": 1.3905952245676173e-05, | |
| "loss": 0.2053, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20218415558338165, | |
| "step": 2460, | |
| "valid_targets_mean": 2034.1, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 4.4818181818181815, | |
| "grad_norm": 0.595742074848808, | |
| "learning_rate": 1.3819660112501054e-05, | |
| "loss": 0.2341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20664814114570618, | |
| "step": 2465, | |
| "valid_targets_mean": 2481.1, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 4.490909090909091, | |
| "grad_norm": 0.6514179010516953, | |
| "learning_rate": 1.3733494991366128e-05, | |
| "loss": 0.2263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2232770323753357, | |
| "step": 2470, | |
| "valid_targets_mean": 2275.2, | |
| "valid_targets_min": 1087 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.6180164291435978, | |
| "learning_rate": 1.364745865304906e-05, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2033901810646057, | |
| "step": 2475, | |
| "valid_targets_mean": 2587.8, | |
| "valid_targets_min": 885 | |
| }, | |
| { | |
| "epoch": 4.509090909090909, | |
| "grad_norm": 0.5871826019069449, | |
| "learning_rate": 1.3561552865680899e-05, | |
| "loss": 0.2282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2353324145078659, | |
| "step": 2480, | |
| "valid_targets_mean": 2887.1, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 4.5181818181818185, | |
| "grad_norm": 0.6644157928804563, | |
| "learning_rate": 1.3475779394709754e-05, | |
| "loss": 0.2183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20248183608055115, | |
| "step": 2485, | |
| "valid_targets_mean": 2442.0, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 4.527272727272727, | |
| "grad_norm": 0.6995359046838049, | |
| "learning_rate": 1.3390140002864481e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2637696862220764, | |
| "step": 2490, | |
| "valid_targets_mean": 2592.6, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 4.536363636363636, | |
| "grad_norm": 0.6402215815007072, | |
| "learning_rate": 1.3304636450118495e-05, | |
| "loss": 0.2061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21194347739219666, | |
| "step": 2495, | |
| "valid_targets_mean": 2424.1, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 4.545454545454545, | |
| "grad_norm": 0.527751097417042, | |
| "learning_rate": 1.3219270493653587e-05, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2287774384021759, | |
| "step": 2500, | |
| "valid_targets_mean": 3634.9, | |
| "valid_targets_min": 1182 | |
| }, | |
| { | |
| "epoch": 4.554545454545455, | |
| "grad_norm": 0.6224833314845171, | |
| "learning_rate": 1.3134043887823807e-05, | |
| "loss": 0.2242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17964226007461548, | |
| "step": 2505, | |
| "valid_targets_mean": 2677.7, | |
| "valid_targets_min": 1474 | |
| }, | |
| { | |
| "epoch": 4.5636363636363635, | |
| "grad_norm": 0.4945668374502826, | |
| "learning_rate": 1.3048958384119397e-05, | |
| "loss": 0.224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19316023588180542, | |
| "step": 2510, | |
| "valid_targets_mean": 3429.4, | |
| "valid_targets_min": 1536 | |
| }, | |
| { | |
| "epoch": 4.572727272727272, | |
| "grad_norm": 0.6840328864843762, | |
| "learning_rate": 1.2964015731130836e-05, | |
| "loss": 0.2362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27236825227737427, | |
| "step": 2515, | |
| "valid_targets_mean": 2695.2, | |
| "valid_targets_min": 635 | |
| }, | |
| { | |
| "epoch": 4.581818181818182, | |
| "grad_norm": 0.7745052573395034, | |
| "learning_rate": 1.2879217674512865e-05, | |
| "loss": 0.2227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22089771926403046, | |
| "step": 2520, | |
| "valid_targets_mean": 2492.4, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 4.590909090909091, | |
| "grad_norm": 0.5821037352524397, | |
| "learning_rate": 1.279456595694864e-05, | |
| "loss": 0.2276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2236514389514923, | |
| "step": 2525, | |
| "valid_targets_mean": 2995.3, | |
| "valid_targets_min": 1082 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.6382341766426172, | |
| "learning_rate": 1.2710062318113887e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3152768611907959, | |
| "step": 2530, | |
| "valid_targets_mean": 2937.4, | |
| "valid_targets_min": 1238 | |
| }, | |
| { | |
| "epoch": 4.609090909090909, | |
| "grad_norm": 0.8458728089172884, | |
| "learning_rate": 1.2625708494641188e-05, | |
| "loss": 0.206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21248075366020203, | |
| "step": 2535, | |
| "valid_targets_mean": 3374.9, | |
| "valid_targets_min": 1280 | |
| }, | |
| { | |
| "epoch": 4.618181818181818, | |
| "grad_norm": 0.6472939984619872, | |
| "learning_rate": 1.2541506220084262e-05, | |
| "loss": 0.2235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2410563975572586, | |
| "step": 2540, | |
| "valid_targets_mean": 2703.0, | |
| "valid_targets_min": 862 | |
| }, | |
| { | |
| "epoch": 4.627272727272727, | |
| "grad_norm": 0.615416117105437, | |
| "learning_rate": 1.2457457224882356e-05, | |
| "loss": 0.2324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22178882360458374, | |
| "step": 2545, | |
| "valid_targets_mean": 2794.3, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 4.636363636363637, | |
| "grad_norm": 0.5951692823267178, | |
| "learning_rate": 1.237356323632468e-05, | |
| "loss": 0.2354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2083178460597992, | |
| "step": 2550, | |
| "valid_targets_mean": 2768.1, | |
| "valid_targets_min": 1038 | |
| }, | |
| { | |
| "epoch": 4.6454545454545455, | |
| "grad_norm": 0.6261777189553865, | |
| "learning_rate": 1.2289825978514882e-05, | |
| "loss": 0.2201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2828991711139679, | |
| "step": 2555, | |
| "valid_targets_mean": 2898.4, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 4.654545454545454, | |
| "grad_norm": 0.6988710752962709, | |
| "learning_rate": 1.2206247172335662e-05, | |
| "loss": 0.219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2878910005092621, | |
| "step": 2560, | |
| "valid_targets_mean": 2671.6, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 4.663636363636364, | |
| "grad_norm": 0.6660297859275669, | |
| "learning_rate": 1.2122828535413378e-05, | |
| "loss": 0.2282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2508106231689453, | |
| "step": 2565, | |
| "valid_targets_mean": 2528.4, | |
| "valid_targets_min": 892 | |
| }, | |
| { | |
| "epoch": 4.672727272727273, | |
| "grad_norm": 0.6306980294197534, | |
| "learning_rate": 1.2039571782082762e-05, | |
| "loss": 0.1944, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19929194450378418, | |
| "step": 2570, | |
| "valid_targets_mean": 2654.8, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 4.681818181818182, | |
| "grad_norm": 0.6979301876844083, | |
| "learning_rate": 1.1956478623351652e-05, | |
| "loss": 0.2106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20347945392131805, | |
| "step": 2575, | |
| "valid_targets_mean": 2358.7, | |
| "valid_targets_min": 1167 | |
| }, | |
| { | |
| "epoch": 4.690909090909091, | |
| "grad_norm": 0.6279429718021498, | |
| "learning_rate": 1.187355076686589e-05, | |
| "loss": 0.2212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21126504242420197, | |
| "step": 2580, | |
| "valid_targets_mean": 2410.6, | |
| "valid_targets_min": 1279 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 0.5972306952510262, | |
| "learning_rate": 1.1790789916874172e-05, | |
| "loss": 0.2292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2583068013191223, | |
| "step": 2585, | |
| "valid_targets_mean": 2919.1, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 4.709090909090909, | |
| "grad_norm": 0.5300072603352336, | |
| "learning_rate": 1.1708197774193055e-05, | |
| "loss": 0.2237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21120886504650116, | |
| "step": 2590, | |
| "valid_targets_mean": 3532.8, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 4.718181818181818, | |
| "grad_norm": 0.6386463342420782, | |
| "learning_rate": 1.1625776036172006e-05, | |
| "loss": 0.2303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2677278518676758, | |
| "step": 2595, | |
| "valid_targets_mean": 2936.8, | |
| "valid_targets_min": 1543 | |
| }, | |
| { | |
| "epoch": 4.7272727272727275, | |
| "grad_norm": 0.5972696678353682, | |
| "learning_rate": 1.1543526396658475e-05, | |
| "loss": 0.2308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22586411237716675, | |
| "step": 2600, | |
| "valid_targets_mean": 2978.4, | |
| "valid_targets_min": 1216 | |
| }, | |
| { | |
| "epoch": 4.736363636363636, | |
| "grad_norm": 0.615542151623342, | |
| "learning_rate": 1.1461450545963167e-05, | |
| "loss": 0.243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2534103989601135, | |
| "step": 2605, | |
| "valid_targets_mean": 2970.2, | |
| "valid_targets_min": 919 | |
| }, | |
| { | |
| "epoch": 4.745454545454545, | |
| "grad_norm": 0.6578937951778845, | |
| "learning_rate": 1.137955017082521e-05, | |
| "loss": 0.2202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22950537502765656, | |
| "step": 2610, | |
| "valid_targets_mean": 3471.8, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 4.754545454545455, | |
| "grad_norm": 0.6408798429955752, | |
| "learning_rate": 1.1297826954377587e-05, | |
| "loss": 0.224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2171386182308197, | |
| "step": 2615, | |
| "valid_targets_mean": 2560.1, | |
| "valid_targets_min": 1227 | |
| }, | |
| { | |
| "epoch": 4.763636363636364, | |
| "grad_norm": 0.5415984276640066, | |
| "learning_rate": 1.1216282576112436e-05, | |
| "loss": 0.2262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20664772391319275, | |
| "step": 2620, | |
| "valid_targets_mean": 3412.6, | |
| "valid_targets_min": 1017 | |
| }, | |
| { | |
| "epoch": 4.7727272727272725, | |
| "grad_norm": 0.5251749491334398, | |
| "learning_rate": 1.1134918711846651e-05, | |
| "loss": 0.2224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19899095594882965, | |
| "step": 2625, | |
| "valid_targets_mean": 2995.6, | |
| "valid_targets_min": 1573 | |
| }, | |
| { | |
| "epoch": 4.781818181818182, | |
| "grad_norm": 0.6290363031722594, | |
| "learning_rate": 1.1053737033687346e-05, | |
| "loss": 0.2061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21700023114681244, | |
| "step": 2630, | |
| "valid_targets_mean": 2623.8, | |
| "valid_targets_min": 1085 | |
| }, | |
| { | |
| "epoch": 4.790909090909091, | |
| "grad_norm": 0.6849123593454419, | |
| "learning_rate": 1.097273920999757e-05, | |
| "loss": 0.2407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23019665479660034, | |
| "step": 2635, | |
| "valid_targets_mean": 3421.1, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.5771402980273382, | |
| "learning_rate": 1.0891926905361948e-05, | |
| "loss": 0.2223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18821823596954346, | |
| "step": 2640, | |
| "valid_targets_mean": 2784.2, | |
| "valid_targets_min": 1082 | |
| }, | |
| { | |
| "epoch": 4.809090909090909, | |
| "grad_norm": 0.6207164760514525, | |
| "learning_rate": 1.081130178055251e-05, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23473981022834778, | |
| "step": 2645, | |
| "valid_targets_mean": 2909.9, | |
| "valid_targets_min": 1405 | |
| }, | |
| { | |
| "epoch": 4.818181818181818, | |
| "grad_norm": 0.5918362524255482, | |
| "learning_rate": 1.0730865492494593e-05, | |
| "loss": 0.2198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24564619362354279, | |
| "step": 2650, | |
| "valid_targets_mean": 3247.2, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 4.827272727272727, | |
| "grad_norm": 0.5917360630872475, | |
| "learning_rate": 1.0650619694232704e-05, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2355796992778778, | |
| "step": 2655, | |
| "valid_targets_mean": 2881.9, | |
| "valid_targets_min": 1472 | |
| }, | |
| { | |
| "epoch": 4.836363636363636, | |
| "grad_norm": 0.6108234295069259, | |
| "learning_rate": 1.057056603489665e-05, | |
| "loss": 0.2148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25410670042037964, | |
| "step": 2660, | |
| "valid_targets_mean": 2952.7, | |
| "valid_targets_min": 917 | |
| }, | |
| { | |
| "epoch": 4.845454545454546, | |
| "grad_norm": 0.6401240752548848, | |
| "learning_rate": 1.0490706159667534e-05, | |
| "loss": 0.2231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23201113939285278, | |
| "step": 2665, | |
| "valid_targets_mean": 2641.9, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 4.8545454545454545, | |
| "grad_norm": 0.6225393374592852, | |
| "learning_rate": 1.0411041709744063e-05, | |
| "loss": 0.2318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24733442068099976, | |
| "step": 2670, | |
| "valid_targets_mean": 2914.0, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 4.863636363636363, | |
| "grad_norm": 0.6863647029544806, | |
| "learning_rate": 1.0331574322308722e-05, | |
| "loss": 0.2409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22286084294319153, | |
| "step": 2675, | |
| "valid_targets_mean": 2257.1, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 4.872727272727273, | |
| "grad_norm": 0.7067563529480779, | |
| "learning_rate": 1.0252305630494201e-05, | |
| "loss": 0.2072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17897702753543854, | |
| "step": 2680, | |
| "valid_targets_mean": 2485.5, | |
| "valid_targets_min": 1366 | |
| }, | |
| { | |
| "epoch": 4.881818181818182, | |
| "grad_norm": 0.6697464783481224, | |
| "learning_rate": 1.0173237263349776e-05, | |
| "loss": 0.2516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2502247393131256, | |
| "step": 2685, | |
| "valid_targets_mean": 2746.4, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 4.890909090909091, | |
| "grad_norm": 0.6173196084339941, | |
| "learning_rate": 1.0094370845807857e-05, | |
| "loss": 0.2335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19461512565612793, | |
| "step": 2690, | |
| "valid_targets_mean": 2824.4, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 0.6532101802559712, | |
| "learning_rate": 1.001570799865061e-05, | |
| "loss": 0.2125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2121875137090683, | |
| "step": 2695, | |
| "valid_targets_mean": 2877.6, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 4.909090909090909, | |
| "grad_norm": 0.5488728499968432, | |
| "learning_rate": 9.937250338476607e-06, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29637664556503296, | |
| "step": 2700, | |
| "valid_targets_mean": 4819.6, | |
| "valid_targets_min": 1300 | |
| }, | |
| { | |
| "epoch": 4.918181818181818, | |
| "grad_norm": 0.6606645400766106, | |
| "learning_rate": 9.858999477667656e-06, | |
| "loss": 0.2025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23928359150886536, | |
| "step": 2705, | |
| "valid_targets_mean": 2634.3, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 4.927272727272728, | |
| "grad_norm": 0.544686799589651, | |
| "learning_rate": 9.780957024355591e-06, | |
| "loss": 0.2217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2297343909740448, | |
| "step": 2710, | |
| "valid_targets_mean": 3213.6, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 4.9363636363636365, | |
| "grad_norm": 0.5949988577753448, | |
| "learning_rate": 9.703124582389312e-06, | |
| "loss": 0.2411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2422581911087036, | |
| "step": 2715, | |
| "valid_targets_mean": 3500.9, | |
| "valid_targets_min": 1391 | |
| }, | |
| { | |
| "epoch": 4.945454545454545, | |
| "grad_norm": 0.6392651426837851, | |
| "learning_rate": 9.62550375130175e-06, | |
| "loss": 0.241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23042556643486023, | |
| "step": 2720, | |
| "valid_targets_mean": 3213.1, | |
| "valid_targets_min": 1433 | |
| }, | |
| { | |
| "epoch": 4.954545454545455, | |
| "grad_norm": 0.6966050262955494, | |
| "learning_rate": 9.548096126277058e-06, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25255775451660156, | |
| "step": 2725, | |
| "valid_targets_mean": 3336.6, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 4.963636363636364, | |
| "grad_norm": 0.5928575852962964, | |
| "learning_rate": 9.470903298117744e-06, | |
| "loss": 0.2266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2821640074253082, | |
| "step": 2730, | |
| "valid_targets_mean": 3400.3, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 4.972727272727273, | |
| "grad_norm": 0.6486168657443957, | |
| "learning_rate": 9.393926853212083e-06, | |
| "loss": 0.2344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19834952056407928, | |
| "step": 2735, | |
| "valid_targets_mean": 2398.2, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 4.9818181818181815, | |
| "grad_norm": 0.48416111164674525, | |
| "learning_rate": 9.317168373501426e-06, | |
| "loss": 0.2, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21909256279468536, | |
| "step": 2740, | |
| "valid_targets_mean": 3984.8, | |
| "valid_targets_min": 633 | |
| }, | |
| { | |
| "epoch": 4.990909090909091, | |
| "grad_norm": 0.5590944867565344, | |
| "learning_rate": 9.240629436447752e-06, | |
| "loss": 0.2292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2566272020339966, | |
| "step": 2745, | |
| "valid_targets_mean": 3417.1, | |
| "valid_targets_min": 1627 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.6105837422351534, | |
| "learning_rate": 9.164311615001202e-06, | |
| "loss": 0.2285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22403398156166077, | |
| "step": 2750, | |
| "valid_targets_mean": 2896.4, | |
| "valid_targets_min": 1010 | |
| }, | |
| { | |
| "epoch": 5.009090909090909, | |
| "grad_norm": 0.6206679943666114, | |
| "learning_rate": 9.08821647756778e-06, | |
| "loss": 0.2022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2819976210594177, | |
| "step": 2755, | |
| "valid_targets_mean": 3098.4, | |
| "valid_targets_min": 531 | |
| }, | |
| { | |
| "epoch": 5.0181818181818185, | |
| "grad_norm": 0.5869668965068552, | |
| "learning_rate": 9.012345587977129e-06, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18100440502166748, | |
| "step": 2760, | |
| "valid_targets_mean": 2610.1, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 5.027272727272727, | |
| "grad_norm": 0.6800086014403005, | |
| "learning_rate": 8.936700505450356e-06, | |
| "loss": 0.2233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2488502562046051, | |
| "step": 2765, | |
| "valid_targets_mean": 2924.6, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 5.036363636363636, | |
| "grad_norm": 0.5733287013285376, | |
| "learning_rate": 8.861282784568045e-06, | |
| "loss": 0.2009, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20497402548789978, | |
| "step": 2770, | |
| "valid_targets_mean": 3032.6, | |
| "valid_targets_min": 1401 | |
| }, | |
| { | |
| "epoch": 5.045454545454546, | |
| "grad_norm": 0.5579965277111765, | |
| "learning_rate": 8.786093975238226e-06, | |
| "loss": 0.2287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19170859456062317, | |
| "step": 2775, | |
| "valid_targets_mean": 3130.4, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 5.054545454545455, | |
| "grad_norm": 0.5424604659652602, | |
| "learning_rate": 8.711135622664622e-06, | |
| "loss": 0.2048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1387612372636795, | |
| "step": 2780, | |
| "valid_targets_mean": 3084.8, | |
| "valid_targets_min": 1151 | |
| }, | |
| { | |
| "epoch": 5.0636363636363635, | |
| "grad_norm": 0.6300869155028218, | |
| "learning_rate": 8.636409267314806e-06, | |
| "loss": 0.1891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1826111376285553, | |
| "step": 2785, | |
| "valid_targets_mean": 2739.9, | |
| "valid_targets_min": 1336 | |
| }, | |
| { | |
| "epoch": 5.072727272727272, | |
| "grad_norm": 0.6418965447840044, | |
| "learning_rate": 8.561916444888618e-06, | |
| "loss": 0.219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18538132309913635, | |
| "step": 2790, | |
| "valid_targets_mean": 3084.9, | |
| "valid_targets_min": 1446 | |
| }, | |
| { | |
| "epoch": 5.081818181818182, | |
| "grad_norm": 1.0767392353239975, | |
| "learning_rate": 8.487658686286533e-06, | |
| "loss": 0.235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1957949846982956, | |
| "step": 2795, | |
| "valid_targets_mean": 2874.3, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 5.090909090909091, | |
| "grad_norm": 0.8212955769888806, | |
| "learning_rate": 8.413637517578246e-06, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21617895364761353, | |
| "step": 2800, | |
| "valid_targets_mean": 2496.1, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "grad_norm": 1.2956418315388283, | |
| "learning_rate": 8.339854459971313e-06, | |
| "loss": 0.2267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.249013289809227, | |
| "step": 2805, | |
| "valid_targets_mean": 3000.8, | |
| "valid_targets_min": 1115 | |
| }, | |
| { | |
| "epoch": 5.109090909090909, | |
| "grad_norm": 0.6165988995663725, | |
| "learning_rate": 8.266311029779843e-06, | |
| "loss": 0.2227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.224916011095047, | |
| "step": 2810, | |
| "valid_targets_mean": 3321.6, | |
| "valid_targets_min": 904 | |
| }, | |
| { | |
| "epoch": 5.118181818181818, | |
| "grad_norm": 0.7398245215368483, | |
| "learning_rate": 8.193008738393409e-06, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24344930052757263, | |
| "step": 2815, | |
| "valid_targets_mean": 2388.0, | |
| "valid_targets_min": 811 | |
| }, | |
| { | |
| "epoch": 5.127272727272727, | |
| "grad_norm": 0.6021280092554041, | |
| "learning_rate": 8.119949092245893e-06, | |
| "loss": 0.1942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.170393168926239, | |
| "step": 2820, | |
| "valid_targets_mean": 2751.1, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 5.136363636363637, | |
| "grad_norm": 0.5909261979045841, | |
| "learning_rate": 8.047133592784626e-06, | |
| "loss": 0.2016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1882145255804062, | |
| "step": 2825, | |
| "valid_targets_mean": 3168.0, | |
| "valid_targets_min": 1053 | |
| }, | |
| { | |
| "epoch": 5.1454545454545455, | |
| "grad_norm": 2.4155979064615596, | |
| "learning_rate": 7.974563736439454e-06, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15529166162014008, | |
| "step": 2830, | |
| "valid_targets_mean": 2921.1, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 5.154545454545454, | |
| "grad_norm": 0.6086422217400963, | |
| "learning_rate": 7.902241014592042e-06, | |
| "loss": 0.2193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20463624596595764, | |
| "step": 2835, | |
| "valid_targets_mean": 2895.8, | |
| "valid_targets_min": 1560 | |
| }, | |
| { | |
| "epoch": 5.163636363636364, | |
| "grad_norm": 0.6025680214907746, | |
| "learning_rate": 7.830166913545181e-06, | |
| "loss": 0.2184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20843620598316193, | |
| "step": 2840, | |
| "valid_targets_mean": 3195.0, | |
| "valid_targets_min": 1078 | |
| }, | |
| { | |
| "epoch": 5.172727272727273, | |
| "grad_norm": 0.5849462224788665, | |
| "learning_rate": 7.758342914492257e-06, | |
| "loss": 0.1822, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1787065714597702, | |
| "step": 2845, | |
| "valid_targets_mean": 3042.2, | |
| "valid_targets_min": 1258 | |
| }, | |
| { | |
| "epoch": 5.181818181818182, | |
| "grad_norm": 0.7462903199250011, | |
| "learning_rate": 7.686770493486835e-06, | |
| "loss": 0.1946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18932729959487915, | |
| "step": 2850, | |
| "valid_targets_mean": 2514.8, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 5.190909090909091, | |
| "grad_norm": 0.6088941331030795, | |
| "learning_rate": 7.615451121412285e-06, | |
| "loss": 0.2135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23179104924201965, | |
| "step": 2855, | |
| "valid_targets_mean": 2957.9, | |
| "valid_targets_min": 998 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 0.6362339563375317, | |
| "learning_rate": 7.5443862639516e-06, | |
| "loss": 0.2127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23404110968112946, | |
| "step": 2860, | |
| "valid_targets_mean": 2948.4, | |
| "valid_targets_min": 560 | |
| }, | |
| { | |
| "epoch": 5.209090909090909, | |
| "grad_norm": 0.7425390338195141, | |
| "learning_rate": 7.4735773815572044e-06, | |
| "loss": 0.2001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2154429703950882, | |
| "step": 2865, | |
| "valid_targets_mean": 3096.9, | |
| "valid_targets_min": 839 | |
| }, | |
| { | |
| "epoch": 5.218181818181818, | |
| "grad_norm": 0.6732004978716041, | |
| "learning_rate": 7.403025929421026e-06, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19037508964538574, | |
| "step": 2870, | |
| "valid_targets_mean": 2717.5, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 5.2272727272727275, | |
| "grad_norm": 0.5864918250329665, | |
| "learning_rate": 7.332733357444524e-06, | |
| "loss": 0.2151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22506625950336456, | |
| "step": 2875, | |
| "valid_targets_mean": 3214.4, | |
| "valid_targets_min": 1316 | |
| }, | |
| { | |
| "epoch": 5.236363636363636, | |
| "grad_norm": 0.6619039179552094, | |
| "learning_rate": 7.262701110208936e-06, | |
| "loss": 0.2218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2768268883228302, | |
| "step": 2880, | |
| "valid_targets_mean": 3115.4, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 5.245454545454545, | |
| "grad_norm": 0.8268864892205662, | |
| "learning_rate": 7.192930626945556e-06, | |
| "loss": 0.197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22147375345230103, | |
| "step": 2885, | |
| "valid_targets_mean": 2782.2, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 5.254545454545455, | |
| "grad_norm": 0.6740820931397506, | |
| "learning_rate": 7.123423341506168e-06, | |
| "loss": 0.2014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20615090429782867, | |
| "step": 2890, | |
| "valid_targets_mean": 2661.6, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 5.263636363636364, | |
| "grad_norm": 0.6254507861499846, | |
| "learning_rate": 7.054180682333602e-06, | |
| "loss": 0.1946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18174424767494202, | |
| "step": 2895, | |
| "valid_targets_mean": 2556.6, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 5.2727272727272725, | |
| "grad_norm": 0.6507303440906587, | |
| "learning_rate": 6.985204072432348e-06, | |
| "loss": 0.212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17763866484165192, | |
| "step": 2900, | |
| "valid_targets_mean": 2767.8, | |
| "valid_targets_min": 1254 | |
| }, | |
| { | |
| "epoch": 5.281818181818182, | |
| "grad_norm": 0.615476352490313, | |
| "learning_rate": 6.916494929339315e-06, | |
| "loss": 0.2284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19773416221141815, | |
| "step": 2905, | |
| "valid_targets_mean": 3307.8, | |
| "valid_targets_min": 1636 | |
| }, | |
| { | |
| "epoch": 5.290909090909091, | |
| "grad_norm": 0.6078720402135201, | |
| "learning_rate": 6.848054665094714e-06, | |
| "loss": 0.2094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18933966755867004, | |
| "step": 2910, | |
| "valid_targets_mean": 2963.9, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "grad_norm": 0.6578005853230192, | |
| "learning_rate": 6.779884686213043e-06, | |
| "loss": 0.2041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23294946551322937, | |
| "step": 2915, | |
| "valid_targets_mean": 3041.9, | |
| "valid_targets_min": 1223 | |
| }, | |
| { | |
| "epoch": 5.309090909090909, | |
| "grad_norm": 0.6035763320821946, | |
| "learning_rate": 6.71198639365415e-06, | |
| "loss": 0.2269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18072904646396637, | |
| "step": 2920, | |
| "valid_targets_mean": 3273.4, | |
| "valid_targets_min": 1488 | |
| }, | |
| { | |
| "epoch": 5.318181818181818, | |
| "grad_norm": 0.6502916000400304, | |
| "learning_rate": 6.644361182794494e-06, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22111941874027252, | |
| "step": 2925, | |
| "valid_targets_mean": 2933.1, | |
| "valid_targets_min": 1206 | |
| }, | |
| { | |
| "epoch": 5.327272727272727, | |
| "grad_norm": 0.4745748691092994, | |
| "learning_rate": 6.577010443398388e-06, | |
| "loss": 0.1949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15541119873523712, | |
| "step": 2930, | |
| "valid_targets_mean": 3669.4, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 5.336363636363636, | |
| "grad_norm": 0.6421309510997331, | |
| "learning_rate": 6.50993555958954e-06, | |
| "loss": 0.204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20444971323013306, | |
| "step": 2935, | |
| "valid_targets_mean": 2923.8, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 5.345454545454546, | |
| "grad_norm": 0.6744054487187182, | |
| "learning_rate": 6.4431379098225185e-06, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18364882469177246, | |
| "step": 2940, | |
| "valid_targets_mean": 2425.6, | |
| "valid_targets_min": 1304 | |
| }, | |
| { | |
| "epoch": 5.3545454545454545, | |
| "grad_norm": 0.5763112027918369, | |
| "learning_rate": 6.376618866854485e-06, | |
| "loss": 0.2181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2389511615037918, | |
| "step": 2945, | |
| "valid_targets_mean": 3468.6, | |
| "valid_targets_min": 1635 | |
| }, | |
| { | |
| "epoch": 5.363636363636363, | |
| "grad_norm": 0.6042546105394436, | |
| "learning_rate": 6.310379797716946e-06, | |
| "loss": 0.2075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18797212839126587, | |
| "step": 2950, | |
| "valid_targets_mean": 2697.3, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 5.372727272727273, | |
| "grad_norm": 0.6206834860599623, | |
| "learning_rate": 6.24442206368766e-06, | |
| "loss": 0.1938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17718634009361267, | |
| "step": 2955, | |
| "valid_targets_mean": 2515.1, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 5.381818181818182, | |
| "grad_norm": 0.7073609169477288, | |
| "learning_rate": 6.178747020262708e-06, | |
| "loss": 0.2249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21706639230251312, | |
| "step": 2960, | |
| "valid_targets_mean": 2701.6, | |
| "valid_targets_min": 1219 | |
| }, | |
| { | |
| "epoch": 5.390909090909091, | |
| "grad_norm": 0.6983928267652495, | |
| "learning_rate": 6.1133560171285625e-06, | |
| "loss": 0.2115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1814163625240326, | |
| "step": 2965, | |
| "valid_targets_mean": 2054.0, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.9371617766106026, | |
| "learning_rate": 6.04825039813443e-06, | |
| "loss": 0.206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23152729868888855, | |
| "step": 2970, | |
| "valid_targets_mean": 3087.6, | |
| "valid_targets_min": 1067 | |
| }, | |
| { | |
| "epoch": 5.409090909090909, | |
| "grad_norm": 0.6439823648754108, | |
| "learning_rate": 5.983431501264545e-06, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20572486519813538, | |
| "step": 2975, | |
| "valid_targets_mean": 2651.6, | |
| "valid_targets_min": 1131 | |
| }, | |
| { | |
| "epoch": 5.418181818181818, | |
| "grad_norm": 0.6292024202102485, | |
| "learning_rate": 5.918900658610765e-06, | |
| "loss": 0.1932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22478453814983368, | |
| "step": 2980, | |
| "valid_targets_mean": 3044.0, | |
| "valid_targets_min": 1557 | |
| }, | |
| { | |
| "epoch": 5.427272727272728, | |
| "grad_norm": 0.7004939743631798, | |
| "learning_rate": 5.8546591963451226e-06, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33600202202796936, | |
| "step": 2985, | |
| "valid_targets_mean": 2969.0, | |
| "valid_targets_min": 1061 | |
| }, | |
| { | |
| "epoch": 5.4363636363636365, | |
| "grad_norm": 0.7212119858500032, | |
| "learning_rate": 5.790708434692627e-06, | |
| "loss": 0.2041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24133487045764923, | |
| "step": 2990, | |
| "valid_targets_mean": 2493.2, | |
| "valid_targets_min": 1084 | |
| }, | |
| { | |
| "epoch": 5.445454545454545, | |
| "grad_norm": 0.5878479725042247, | |
| "learning_rate": 5.727049687904076e-06, | |
| "loss": 0.2398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27190083265304565, | |
| "step": 2995, | |
| "valid_targets_mean": 3558.1, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 5.454545454545454, | |
| "grad_norm": 0.7210660143522136, | |
| "learning_rate": 5.66368426422909e-06, | |
| "loss": 0.2017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24498572945594788, | |
| "step": 3000, | |
| "valid_targets_mean": 2708.6, | |
| "valid_targets_min": 893 | |
| }, | |
| { | |
| "epoch": 5.463636363636364, | |
| "grad_norm": 1.0935952651981136, | |
| "learning_rate": 5.60061346588922e-06, | |
| "loss": 0.2147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2006685733795166, | |
| "step": 3005, | |
| "valid_targets_mean": 2522.6, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 5.472727272727273, | |
| "grad_norm": 0.7427392414896691, | |
| "learning_rate": 5.537838589051155e-06, | |
| "loss": 0.2138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21275198459625244, | |
| "step": 3010, | |
| "valid_targets_mean": 2713.2, | |
| "valid_targets_min": 1302 | |
| }, | |
| { | |
| "epoch": 5.4818181818181815, | |
| "grad_norm": 0.6797980800187736, | |
| "learning_rate": 5.475360923800141e-06, | |
| "loss": 0.2157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28571492433547974, | |
| "step": 3015, | |
| "valid_targets_mean": 3031.4, | |
| "valid_targets_min": 1583 | |
| }, | |
| { | |
| "epoch": 5.490909090909091, | |
| "grad_norm": 0.5982952442155729, | |
| "learning_rate": 5.413181754113392e-06, | |
| "loss": 0.188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15068712830543518, | |
| "step": 3020, | |
| "valid_targets_mean": 2895.8, | |
| "valid_targets_min": 1189 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "grad_norm": 0.8202460368123184, | |
| "learning_rate": 5.351302357833785e-06, | |
| "loss": 0.218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18848514556884766, | |
| "step": 3025, | |
| "valid_targets_mean": 2772.6, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 5.509090909090909, | |
| "grad_norm": 0.7535260717612179, | |
| "learning_rate": 5.289724006643529e-06, | |
| "loss": 0.2057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.233030304312706, | |
| "step": 3030, | |
| "valid_targets_mean": 2721.9, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 5.5181818181818185, | |
| "grad_norm": 0.6261585733081678, | |
| "learning_rate": 5.2284479660380906e-06, | |
| "loss": 0.2098, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2688322365283966, | |
| "step": 3035, | |
| "valid_targets_mean": 3717.1, | |
| "valid_targets_min": 1169 | |
| }, | |
| { | |
| "epoch": 5.527272727272727, | |
| "grad_norm": 0.9190797360368729, | |
| "learning_rate": 5.167475495300134e-06, | |
| "loss": 0.2059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1784450113773346, | |
| "step": 3040, | |
| "valid_targets_mean": 2067.2, | |
| "valid_targets_min": 933 | |
| }, | |
| { | |
| "epoch": 5.536363636363636, | |
| "grad_norm": 0.8291481178474116, | |
| "learning_rate": 5.1068078474736695e-06, | |
| "loss": 0.2198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1946008801460266, | |
| "step": 3045, | |
| "valid_targets_mean": 2897.1, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 5.545454545454545, | |
| "grad_norm": 0.7189873688072802, | |
| "learning_rate": 5.046446269338314e-06, | |
| "loss": 0.2177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24384161829948425, | |
| "step": 3050, | |
| "valid_targets_mean": 2461.3, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 5.554545454545455, | |
| "grad_norm": 0.730651619011706, | |
| "learning_rate": 4.986392001383633e-06, | |
| "loss": 0.195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19608566164970398, | |
| "step": 3055, | |
| "valid_targets_mean": 2166.1, | |
| "valid_targets_min": 1070 | |
| }, | |
| { | |
| "epoch": 5.5636363636363635, | |
| "grad_norm": 0.5985262442762385, | |
| "learning_rate": 4.926646277783675e-06, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28461572527885437, | |
| "step": 3060, | |
| "valid_targets_mean": 3714.2, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 5.572727272727272, | |
| "grad_norm": 0.6643144693677729, | |
| "learning_rate": 4.867210326371596e-06, | |
| "loss": 0.2388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25610047578811646, | |
| "step": 3065, | |
| "valid_targets_mean": 3391.6, | |
| "valid_targets_min": 883 | |
| }, | |
| { | |
| "epoch": 5.581818181818182, | |
| "grad_norm": 0.6203488079735966, | |
| "learning_rate": 4.808085368614441e-06, | |
| "loss": 0.2193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19354897737503052, | |
| "step": 3070, | |
| "valid_targets_mean": 2905.4, | |
| "valid_targets_min": 1005 | |
| }, | |
| { | |
| "epoch": 5.590909090909091, | |
| "grad_norm": 0.6419618861722692, | |
| "learning_rate": 4.74927261958801e-06, | |
| "loss": 0.2114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23560906946659088, | |
| "step": 3075, | |
| "valid_targets_mean": 3003.8, | |
| "valid_targets_min": 1333 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 0.6991571121030598, | |
| "learning_rate": 4.690773287951942e-06, | |
| "loss": 0.2316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2553230822086334, | |
| "step": 3080, | |
| "valid_targets_mean": 3061.7, | |
| "valid_targets_min": 949 | |
| }, | |
| { | |
| "epoch": 5.609090909090909, | |
| "grad_norm": 0.5994102701086738, | |
| "learning_rate": 4.632588575924795e-06, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.205175518989563, | |
| "step": 3085, | |
| "valid_targets_mean": 3021.4, | |
| "valid_targets_min": 726 | |
| }, | |
| { | |
| "epoch": 5.618181818181818, | |
| "grad_norm": 0.7723586734348693, | |
| "learning_rate": 4.574719679259425e-06, | |
| "loss": 0.2213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2598193287849426, | |
| "step": 3090, | |
| "valid_targets_mean": 2370.8, | |
| "valid_targets_min": 876 | |
| }, | |
| { | |
| "epoch": 5.627272727272727, | |
| "grad_norm": 0.6452905331420319, | |
| "learning_rate": 4.5171677872183506e-06, | |
| "loss": 0.2359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2051079422235489, | |
| "step": 3095, | |
| "valid_targets_mean": 2581.9, | |
| "valid_targets_min": 1404 | |
| }, | |
| { | |
| "epoch": 5.636363636363637, | |
| "grad_norm": 0.8950147725188888, | |
| "learning_rate": 4.459934082549353e-06, | |
| "loss": 0.1942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2329583317041397, | |
| "step": 3100, | |
| "valid_targets_mean": 2573.9, | |
| "valid_targets_min": 384 | |
| }, | |
| { | |
| "epoch": 5.6454545454545455, | |
| "grad_norm": 0.7301637246838704, | |
| "learning_rate": 4.4030197414611344e-06, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20358063280582428, | |
| "step": 3105, | |
| "valid_targets_mean": 2648.5, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 5.654545454545454, | |
| "grad_norm": 0.6920666316105244, | |
| "learning_rate": 4.346425933599165e-06, | |
| "loss": 0.2329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23128581047058105, | |
| "step": 3110, | |
| "valid_targets_mean": 2874.4, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 5.663636363636364, | |
| "grad_norm": 0.6614747524469519, | |
| "learning_rate": 4.2901538220216565e-06, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23644432425498962, | |
| "step": 3115, | |
| "valid_targets_mean": 2487.2, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 5.672727272727273, | |
| "grad_norm": 0.5958281168548293, | |
| "learning_rate": 4.234204563175625e-06, | |
| "loss": 0.2067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18708546459674835, | |
| "step": 3120, | |
| "valid_targets_mean": 2758.8, | |
| "valid_targets_min": 1276 | |
| }, | |
| { | |
| "epoch": 5.681818181818182, | |
| "grad_norm": 0.6424913341469746, | |
| "learning_rate": 4.17857930687318e-06, | |
| "loss": 0.2037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19697518646717072, | |
| "step": 3125, | |
| "valid_targets_mean": 2944.0, | |
| "valid_targets_min": 870 | |
| }, | |
| { | |
| "epoch": 5.690909090909091, | |
| "grad_norm": 0.7458618870765347, | |
| "learning_rate": 4.123279196267815e-06, | |
| "loss": 0.2095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1647173911333084, | |
| "step": 3130, | |
| "valid_targets_mean": 2270.9, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "grad_norm": 0.599433937279404, | |
| "learning_rate": 4.068305367831002e-06, | |
| "loss": 0.2014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18223410844802856, | |
| "step": 3135, | |
| "valid_targets_mean": 2552.9, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 5.709090909090909, | |
| "grad_norm": 0.730601336937522, | |
| "learning_rate": 4.013658951328769e-06, | |
| "loss": 0.2042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2571181654930115, | |
| "step": 3140, | |
| "valid_targets_mean": 2819.1, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 5.718181818181818, | |
| "grad_norm": 0.7330713546666404, | |
| "learning_rate": 3.95934106979853e-06, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22695882618427277, | |
| "step": 3145, | |
| "valid_targets_mean": 3382.2, | |
| "valid_targets_min": 1015 | |
| }, | |
| { | |
| "epoch": 5.7272727272727275, | |
| "grad_norm": 0.6755758105804183, | |
| "learning_rate": 3.905352839525962e-06, | |
| "loss": 0.1898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1930406093597412, | |
| "step": 3150, | |
| "valid_targets_mean": 2554.9, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 5.736363636363636, | |
| "grad_norm": 0.6885232091114, | |
| "learning_rate": 3.851695370022093e-06, | |
| "loss": 0.206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2078118473291397, | |
| "step": 3155, | |
| "valid_targets_mean": 2820.8, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 5.745454545454545, | |
| "grad_norm": 0.6170351845839065, | |
| "learning_rate": 3.7983697640005048e-06, | |
| "loss": 0.203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19898808002471924, | |
| "step": 3160, | |
| "valid_targets_mean": 2825.7, | |
| "valid_targets_min": 1201 | |
| }, | |
| { | |
| "epoch": 5.754545454545455, | |
| "grad_norm": 0.6069878106397814, | |
| "learning_rate": 3.7453771173546426e-06, | |
| "loss": 0.2057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20460426807403564, | |
| "step": 3165, | |
| "valid_targets_mean": 3422.9, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 5.763636363636364, | |
| "grad_norm": 0.9297530833632488, | |
| "learning_rate": 3.6927185191353188e-06, | |
| "loss": 0.1754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1826719045639038, | |
| "step": 3170, | |
| "valid_targets_mean": 2704.4, | |
| "valid_targets_min": 1095 | |
| }, | |
| { | |
| "epoch": 5.7727272727272725, | |
| "grad_norm": 0.7803037852084785, | |
| "learning_rate": 3.640395051528316e-06, | |
| "loss": 0.2387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20684343576431274, | |
| "step": 3175, | |
| "valid_targets_mean": 2536.1, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 5.781818181818182, | |
| "grad_norm": 0.6068693039253785, | |
| "learning_rate": 3.5884077898321713e-06, | |
| "loss": 0.2106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16533026099205017, | |
| "step": 3180, | |
| "valid_targets_mean": 2962.0, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 5.790909090909091, | |
| "grad_norm": 0.5920662855128672, | |
| "learning_rate": 3.536757802436039e-06, | |
| "loss": 0.203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21043473482131958, | |
| "step": 3185, | |
| "valid_targets_mean": 3233.5, | |
| "valid_targets_min": 1678 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 0.6175569755361424, | |
| "learning_rate": 3.4854461507977776e-06, | |
| "loss": 0.2042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1744406521320343, | |
| "step": 3190, | |
| "valid_targets_mean": 2969.8, | |
| "valid_targets_min": 1746 | |
| }, | |
| { | |
| "epoch": 5.809090909090909, | |
| "grad_norm": 0.6499549912985492, | |
| "learning_rate": 3.4344738894220964e-06, | |
| "loss": 0.2186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18466490507125854, | |
| "step": 3195, | |
| "valid_targets_mean": 2702.5, | |
| "valid_targets_min": 1070 | |
| }, | |
| { | |
| "epoch": 5.818181818181818, | |
| "grad_norm": 0.6417333006893297, | |
| "learning_rate": 3.383842065838907e-06, | |
| "loss": 0.191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1893327832221985, | |
| "step": 3200, | |
| "valid_targets_mean": 2411.5, | |
| "valid_targets_min": 1231 | |
| }, | |
| { | |
| "epoch": 5.827272727272727, | |
| "grad_norm": 0.573769960160142, | |
| "learning_rate": 3.3335517205818e-06, | |
| "loss": 0.188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21150127053260803, | |
| "step": 3205, | |
| "valid_targets_mean": 3080.4, | |
| "valid_targets_min": 1333 | |
| }, | |
| { | |
| "epoch": 5.836363636363636, | |
| "grad_norm": 0.6773897146071632, | |
| "learning_rate": 3.2836038871666444e-06, | |
| "loss": 0.2023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20320796966552734, | |
| "step": 3210, | |
| "valid_targets_mean": 2815.0, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 5.845454545454546, | |
| "grad_norm": 0.6795811187617684, | |
| "learning_rate": 3.2339995920703517e-06, | |
| "loss": 0.2221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2739708423614502, | |
| "step": 3215, | |
| "valid_targets_mean": 3073.7, | |
| "valid_targets_min": 1029 | |
| }, | |
| { | |
| "epoch": 5.8545454545454545, | |
| "grad_norm": 0.633708555899195, | |
| "learning_rate": 3.184739854709784e-06, | |
| "loss": 0.2047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25379064679145813, | |
| "step": 3220, | |
| "valid_targets_mean": 3036.8, | |
| "valid_targets_min": 1249 | |
| }, | |
| { | |
| "epoch": 5.863636363636363, | |
| "grad_norm": 0.742362960713796, | |
| "learning_rate": 3.1358256874208214e-06, | |
| "loss": 0.2232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1899518221616745, | |
| "step": 3225, | |
| "valid_targets_mean": 2378.8, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 5.872727272727273, | |
| "grad_norm": 0.6523582818519996, | |
| "learning_rate": 3.0872580954375177e-06, | |
| "loss": 0.203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21299681067466736, | |
| "step": 3230, | |
| "valid_targets_mean": 3180.2, | |
| "valid_targets_min": 1310 | |
| }, | |
| { | |
| "epoch": 5.881818181818182, | |
| "grad_norm": 0.6231922205998752, | |
| "learning_rate": 3.039038076871481e-06, | |
| "loss": 0.2108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21740776300430298, | |
| "step": 3235, | |
| "valid_targets_mean": 2775.8, | |
| "valid_targets_min": 1065 | |
| }, | |
| { | |
| "epoch": 5.890909090909091, | |
| "grad_norm": 0.7527402910704271, | |
| "learning_rate": 2.9911666226913374e-06, | |
| "loss": 0.2038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2521063983440399, | |
| "step": 3240, | |
| "valid_targets_mean": 3143.5, | |
| "valid_targets_min": 1289 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "grad_norm": 0.7085552104018334, | |
| "learning_rate": 2.9436447167023674e-06, | |
| "loss": 0.2313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27146613597869873, | |
| "step": 3245, | |
| "valid_targets_mean": 2884.2, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 5.909090909090909, | |
| "grad_norm": 0.6308634682071596, | |
| "learning_rate": 2.896473335526313e-06, | |
| "loss": 0.2239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23091664910316467, | |
| "step": 3250, | |
| "valid_targets_mean": 3103.6, | |
| "valid_targets_min": 1442 | |
| }, | |
| { | |
| "epoch": 5.918181818181818, | |
| "grad_norm": 0.6557102602080491, | |
| "learning_rate": 2.849653448581271e-06, | |
| "loss": 0.1925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.193063884973526, | |
| "step": 3255, | |
| "valid_targets_mean": 2485.2, | |
| "valid_targets_min": 885 | |
| }, | |
| { | |
| "epoch": 5.927272727272728, | |
| "grad_norm": 0.6376956968805698, | |
| "learning_rate": 2.8031860180617898e-06, | |
| "loss": 0.2473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25527384877204895, | |
| "step": 3260, | |
| "valid_targets_mean": 3161.6, | |
| "valid_targets_min": 1225 | |
| }, | |
| { | |
| "epoch": 5.9363636363636365, | |
| "grad_norm": 0.6524169940045439, | |
| "learning_rate": 2.757071998919094e-06, | |
| "loss": 0.2084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22266694903373718, | |
| "step": 3265, | |
| "valid_targets_mean": 3114.8, | |
| "valid_targets_min": 1017 | |
| }, | |
| { | |
| "epoch": 5.945454545454545, | |
| "grad_norm": 0.6834100735609628, | |
| "learning_rate": 2.7113123388414674e-06, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22942128777503967, | |
| "step": 3270, | |
| "valid_targets_mean": 2606.1, | |
| "valid_targets_min": 692 | |
| }, | |
| { | |
| "epoch": 5.954545454545455, | |
| "grad_norm": 0.6232577902181976, | |
| "learning_rate": 2.665907978234754e-06, | |
| "loss": 0.2212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2783876061439514, | |
| "step": 3275, | |
| "valid_targets_mean": 3247.4, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 5.963636363636364, | |
| "grad_norm": 0.6630897162275445, | |
| "learning_rate": 2.6208598502030546e-06, | |
| "loss": 0.2265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2094637155532837, | |
| "step": 3280, | |
| "valid_targets_mean": 2547.4, | |
| "valid_targets_min": 1126 | |
| }, | |
| { | |
| "epoch": 5.972727272727273, | |
| "grad_norm": 0.7077469988970768, | |
| "learning_rate": 2.5761688805295305e-06, | |
| "loss": 0.1933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22009465098381042, | |
| "step": 3285, | |
| "valid_targets_mean": 2548.6, | |
| "valid_targets_min": 1294 | |
| }, | |
| { | |
| "epoch": 5.9818181818181815, | |
| "grad_norm": 0.6715778542941565, | |
| "learning_rate": 2.531835987657407e-06, | |
| "loss": 0.2145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19744186103343964, | |
| "step": 3290, | |
| "valid_targets_mean": 2664.1, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 5.990909090909091, | |
| "grad_norm": 0.7233321904636786, | |
| "learning_rate": 2.487862082671064e-06, | |
| "loss": 0.2175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24416381120681763, | |
| "step": 3295, | |
| "valid_targets_mean": 2344.6, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.6660871787495303, | |
| "learning_rate": 2.4442480692773398e-06, | |
| "loss": 0.2042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18832653760910034, | |
| "step": 3300, | |
| "valid_targets_mean": 2622.1, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 6.009090909090909, | |
| "grad_norm": 0.7375102372730574, | |
| "learning_rate": 2.400994843786939e-06, | |
| "loss": 0.2019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2507600784301758, | |
| "step": 3305, | |
| "valid_targets_mean": 2900.4, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 6.0181818181818185, | |
| "grad_norm": 0.6661010728917941, | |
| "learning_rate": 2.3581032950960215e-06, | |
| "loss": 0.2104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2171628773212433, | |
| "step": 3310, | |
| "valid_targets_mean": 2410.8, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 6.027272727272727, | |
| "grad_norm": 0.6587778839047856, | |
| "learning_rate": 2.3155743046679468e-06, | |
| "loss": 0.1866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18798887729644775, | |
| "step": 3315, | |
| "valid_targets_mean": 2753.6, | |
| "valid_targets_min": 923 | |
| }, | |
| { | |
| "epoch": 6.036363636363636, | |
| "grad_norm": 0.6565679632927076, | |
| "learning_rate": 2.273408746515133e-06, | |
| "loss": 0.2033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18916954100131989, | |
| "step": 3320, | |
| "valid_targets_mean": 2660.1, | |
| "valid_targets_min": 839 | |
| }, | |
| { | |
| "epoch": 6.045454545454546, | |
| "grad_norm": 0.668772335037644, | |
| "learning_rate": 2.2316074871811157e-06, | |
| "loss": 0.1858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17284397780895233, | |
| "step": 3325, | |
| "valid_targets_mean": 2571.2, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 6.054545454545455, | |
| "grad_norm": 0.7117814748250814, | |
| "learning_rate": 2.190171385722726e-06, | |
| "loss": 0.2067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22605367004871368, | |
| "step": 3330, | |
| "valid_targets_mean": 2528.2, | |
| "valid_targets_min": 1316 | |
| }, | |
| { | |
| "epoch": 6.0636363636363635, | |
| "grad_norm": 0.6894262085614624, | |
| "learning_rate": 2.1491012936924548e-06, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20072168111801147, | |
| "step": 3335, | |
| "valid_targets_mean": 2462.0, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 6.072727272727272, | |
| "grad_norm": 0.6221932962788573, | |
| "learning_rate": 2.108398055120926e-06, | |
| "loss": 0.1981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18132944405078888, | |
| "step": 3340, | |
| "valid_targets_mean": 2793.1, | |
| "valid_targets_min": 1665 | |
| }, | |
| { | |
| "epoch": 6.081818181818182, | |
| "grad_norm": 0.5952761142844478, | |
| "learning_rate": 2.068062506499584e-06, | |
| "loss": 0.1902, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15214930474758148, | |
| "step": 3345, | |
| "valid_targets_mean": 2650.2, | |
| "valid_targets_min": 1429 | |
| }, | |
| { | |
| "epoch": 6.090909090909091, | |
| "grad_norm": 0.6486703925050182, | |
| "learning_rate": 2.0280954767634674e-06, | |
| "loss": 0.2117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19928964972496033, | |
| "step": 3350, | |
| "valid_targets_mean": 2931.4, | |
| "valid_targets_min": 1410 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "grad_norm": 0.6431480925390909, | |
| "learning_rate": 1.988497787274195e-06, | |
| "loss": 0.1851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2235788255929947, | |
| "step": 3355, | |
| "valid_targets_mean": 3253.4, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 6.109090909090909, | |
| "grad_norm": 0.669316954228609, | |
| "learning_rate": 1.9492702518030905e-06, | |
| "loss": 0.2015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20945610105991364, | |
| "step": 3360, | |
| "valid_targets_mean": 3001.4, | |
| "valid_targets_min": 1636 | |
| }, | |
| { | |
| "epoch": 6.118181818181818, | |
| "grad_norm": 0.6446973463301628, | |
| "learning_rate": 1.910413676514438e-06, | |
| "loss": 0.186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1692398488521576, | |
| "step": 3365, | |
| "valid_targets_mean": 2664.7, | |
| "valid_targets_min": 778 | |
| }, | |
| { | |
| "epoch": 6.127272727272727, | |
| "grad_norm": 0.7129875180586859, | |
| "learning_rate": 1.8719288599489304e-06, | |
| "loss": 0.2172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24184544384479523, | |
| "step": 3370, | |
| "valid_targets_mean": 2537.6, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 6.136363636363637, | |
| "grad_norm": 0.6831168369791281, | |
| "learning_rate": 1.833816593007256e-06, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1838299185037613, | |
| "step": 3375, | |
| "valid_targets_mean": 2457.5, | |
| "valid_targets_min": 1541 | |
| }, | |
| { | |
| "epoch": 6.1454545454545455, | |
| "grad_norm": 0.6179852974797505, | |
| "learning_rate": 1.796077658933848e-06, | |
| "loss": 0.1992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19142737984657288, | |
| "step": 3380, | |
| "valid_targets_mean": 3180.6, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 6.154545454545454, | |
| "grad_norm": 0.7174728400905155, | |
| "learning_rate": 1.7587128333007709e-06, | |
| "loss": 0.1876, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1669439673423767, | |
| "step": 3385, | |
| "valid_targets_mean": 2359.4, | |
| "valid_targets_min": 1171 | |
| }, | |
| { | |
| "epoch": 6.163636363636364, | |
| "grad_norm": 0.7075411705418458, | |
| "learning_rate": 1.7217228839918098e-06, | |
| "loss": 0.2152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20773687958717346, | |
| "step": 3390, | |
| "valid_targets_mean": 2524.9, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 6.172727272727273, | |
| "grad_norm": 0.6514407325688949, | |
| "learning_rate": 1.6851085711866598e-06, | |
| "loss": 0.1921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18543276190757751, | |
| "step": 3395, | |
| "valid_targets_mean": 2648.4, | |
| "valid_targets_min": 968 | |
| }, | |
| { | |
| "epoch": 6.181818181818182, | |
| "grad_norm": 0.6901703887890732, | |
| "learning_rate": 1.648870647345322e-06, | |
| "loss": 0.1766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1513475477695465, | |
| "step": 3400, | |
| "valid_targets_mean": 2702.0, | |
| "valid_targets_min": 1100 | |
| }, | |
| { | |
| "epoch": 6.190909090909091, | |
| "grad_norm": 0.6936967662817098, | |
| "learning_rate": 1.6130098571926468e-06, | |
| "loss": 0.2055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23872001469135284, | |
| "step": 3405, | |
| "valid_targets_mean": 2802.6, | |
| "valid_targets_min": 1819 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "grad_norm": 0.954901470393649, | |
| "learning_rate": 1.577526937703e-06, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23395466804504395, | |
| "step": 3410, | |
| "valid_targets_mean": 2999.4, | |
| "valid_targets_min": 1309 | |
| }, | |
| { | |
| "epoch": 6.209090909090909, | |
| "grad_norm": 0.7301417694762667, | |
| "learning_rate": 1.5424226180851443e-06, | |
| "loss": 0.1896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22834432125091553, | |
| "step": 3415, | |
| "valid_targets_mean": 2777.3, | |
| "valid_targets_min": 1019 | |
| }, | |
| { | |
| "epoch": 6.218181818181818, | |
| "grad_norm": 0.722443584662105, | |
| "learning_rate": 1.5076976197672432e-06, | |
| "loss": 0.2091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21865253150463104, | |
| "step": 3420, | |
| "valid_targets_mean": 2611.9, | |
| "valid_targets_min": 859 | |
| }, | |
| { | |
| "epoch": 6.2272727272727275, | |
| "grad_norm": 0.6718376412603478, | |
| "learning_rate": 1.473352656382039e-06, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1492437869310379, | |
| "step": 3425, | |
| "valid_targets_mean": 2268.1, | |
| "valid_targets_min": 1306 | |
| }, | |
| { | |
| "epoch": 6.236363636363636, | |
| "grad_norm": 0.6385879578869108, | |
| "learning_rate": 1.439388433752178e-06, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21833261847496033, | |
| "step": 3430, | |
| "valid_targets_mean": 3357.3, | |
| "valid_targets_min": 1110 | |
| }, | |
| { | |
| "epoch": 6.245454545454545, | |
| "grad_norm": 0.6898363347432659, | |
| "learning_rate": 1.4058056498757112e-06, | |
| "loss": 0.2109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21196958422660828, | |
| "step": 3435, | |
| "valid_targets_mean": 2529.1, | |
| "valid_targets_min": 884 | |
| }, | |
| { | |
| "epoch": 6.254545454545455, | |
| "grad_norm": 0.6717448075871523, | |
| "learning_rate": 1.372604994911757e-06, | |
| "loss": 0.2076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18704624474048615, | |
| "step": 3440, | |
| "valid_targets_mean": 2528.5, | |
| "valid_targets_min": 811 | |
| }, | |
| { | |
| "epoch": 6.263636363636364, | |
| "grad_norm": 0.6170083596984325, | |
| "learning_rate": 1.3397871511662986e-06, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17854154109954834, | |
| "step": 3445, | |
| "valid_targets_mean": 2765.4, | |
| "valid_targets_min": 1422 | |
| }, | |
| { | |
| "epoch": 6.2727272727272725, | |
| "grad_norm": 0.5945835881991018, | |
| "learning_rate": 1.307352793078187e-06, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20941226184368134, | |
| "step": 3450, | |
| "valid_targets_mean": 3193.4, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 6.281818181818182, | |
| "grad_norm": 0.7318339507251856, | |
| "learning_rate": 1.275302587205256e-06, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22563597559928894, | |
| "step": 3455, | |
| "valid_targets_mean": 2949.8, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 6.290909090909091, | |
| "grad_norm": 0.5851474607507773, | |
| "learning_rate": 1.2436371922106404e-06, | |
| "loss": 0.209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22322428226470947, | |
| "step": 3460, | |
| "valid_targets_mean": 3700.2, | |
| "valid_targets_min": 1437 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "grad_norm": 0.5554704922918074, | |
| "learning_rate": 1.2123572588492306e-06, | |
| "loss": 0.1906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18534304201602936, | |
| "step": 3465, | |
| "valid_targets_mean": 3815.0, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 6.309090909090909, | |
| "grad_norm": 0.6342467907091855, | |
| "learning_rate": 1.1814634299543103e-06, | |
| "loss": 0.1845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20048777759075165, | |
| "step": 3470, | |
| "valid_targets_mean": 2843.0, | |
| "valid_targets_min": 1148 | |
| }, | |
| { | |
| "epoch": 6.318181818181818, | |
| "grad_norm": 0.6654750037522876, | |
| "learning_rate": 1.1509563404243274e-06, | |
| "loss": 0.2293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21820786595344543, | |
| "step": 3475, | |
| "valid_targets_mean": 2823.8, | |
| "valid_targets_min": 1081 | |
| }, | |
| { | |
| "epoch": 6.327272727272727, | |
| "grad_norm": 0.6803497267833459, | |
| "learning_rate": 1.1208366172098684e-06, | |
| "loss": 0.1835, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21640288829803467, | |
| "step": 3480, | |
| "valid_targets_mean": 2524.6, | |
| "valid_targets_min": 1275 | |
| }, | |
| { | |
| "epoch": 6.336363636363636, | |
| "grad_norm": 0.68657482835238, | |
| "learning_rate": 1.0911048793007484e-06, | |
| "loss": 0.2174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23357044160366058, | |
| "step": 3485, | |
| "valid_targets_mean": 3060.0, | |
| "valid_targets_min": 1442 | |
| }, | |
| { | |
| "epoch": 6.345454545454546, | |
| "grad_norm": 0.6951836522091235, | |
| "learning_rate": 1.0617617377133205e-06, | |
| "loss": 0.2264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21933671832084656, | |
| "step": 3490, | |
| "valid_targets_mean": 3150.7, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 6.3545454545454545, | |
| "grad_norm": 0.9540180459343338, | |
| "learning_rate": 1.0328077954778904e-06, | |
| "loss": 0.2126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21296435594558716, | |
| "step": 3495, | |
| "valid_targets_mean": 2526.2, | |
| "valid_targets_min": 1252 | |
| }, | |
| { | |
| "epoch": 6.363636363636363, | |
| "grad_norm": 0.7215486794128292, | |
| "learning_rate": 1.004243647626344e-06, | |
| "loss": 0.2293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2184891402721405, | |
| "step": 3500, | |
| "valid_targets_mean": 2418.1, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 6.372727272727273, | |
| "grad_norm": 0.7230020636540598, | |
| "learning_rate": 9.760698811799064e-07, | |
| "loss": 0.204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2301444411277771, | |
| "step": 3505, | |
| "valid_targets_mean": 2908.6, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 6.381818181818182, | |
| "grad_norm": 0.6685846433200956, | |
| "learning_rate": 9.482870751370755e-07, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17493730783462524, | |
| "step": 3510, | |
| "valid_targets_mean": 2570.5, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 6.390909090909091, | |
| "grad_norm": 0.6689691547111833, | |
| "learning_rate": 9.208958004617475e-07, | |
| "loss": 0.2175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24776533246040344, | |
| "step": 3515, | |
| "valid_targets_mean": 2873.2, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 0.7556459012137372, | |
| "learning_rate": 8.938966200714482e-07, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2316136360168457, | |
| "step": 3520, | |
| "valid_targets_mean": 2475.9, | |
| "valid_targets_min": 1078 | |
| }, | |
| { | |
| "epoch": 6.409090909090909, | |
| "grad_norm": 0.6585887436395376, | |
| "learning_rate": 8.672900888257918e-07, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.247239351272583, | |
| "step": 3525, | |
| "valid_targets_mean": 3246.8, | |
| "valid_targets_min": 1413 | |
| }, | |
| { | |
| "epoch": 6.418181818181818, | |
| "grad_norm": 0.6424132876151116, | |
| "learning_rate": 8.410767535150599e-07, | |
| "loss": 0.2079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1996202915906906, | |
| "step": 3530, | |
| "valid_targets_mean": 2780.2, | |
| "valid_targets_min": 513 | |
| }, | |
| { | |
| "epoch": 6.427272727272728, | |
| "grad_norm": 0.6848931844740308, | |
| "learning_rate": 8.152571528489828e-07, | |
| "loss": 0.1997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19339044392108917, | |
| "step": 3535, | |
| "valid_targets_mean": 2968.0, | |
| "valid_targets_min": 962 | |
| }, | |
| { | |
| "epoch": 6.4363636363636365, | |
| "grad_norm": 0.7963468724693825, | |
| "learning_rate": 7.898318174456498e-07, | |
| "loss": 0.2011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25005707144737244, | |
| "step": 3540, | |
| "valid_targets_mean": 2918.1, | |
| "valid_targets_min": 1036 | |
| }, | |
| { | |
| "epoch": 6.445454545454545, | |
| "grad_norm": 0.6564266424153401, | |
| "learning_rate": 7.64801269820612e-07, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17174042761325836, | |
| "step": 3545, | |
| "valid_targets_mean": 2493.9, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 6.454545454545454, | |
| "grad_norm": 0.7142428582801825, | |
| "learning_rate": 7.401660243761543e-07, | |
| "loss": 0.1944, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23733559250831604, | |
| "step": 3550, | |
| "valid_targets_mean": 2868.8, | |
| "valid_targets_min": 1479 | |
| }, | |
| { | |
| "epoch": 6.463636363636364, | |
| "grad_norm": 0.7267944382985336, | |
| "learning_rate": 7.159265873907006e-07, | |
| "loss": 0.187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23389649391174316, | |
| "step": 3555, | |
| "valid_targets_mean": 2494.9, | |
| "valid_targets_min": 1107 | |
| }, | |
| { | |
| "epoch": 6.472727272727273, | |
| "grad_norm": 0.6732130872138381, | |
| "learning_rate": 6.920834570084389e-07, | |
| "loss": 0.2185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21910206973552704, | |
| "step": 3560, | |
| "valid_targets_mean": 2901.8, | |
| "valid_targets_min": 1316 | |
| }, | |
| { | |
| "epoch": 6.4818181818181815, | |
| "grad_norm": 0.6204104076430839, | |
| "learning_rate": 6.686371232290567e-07, | |
| "loss": 0.1889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19802677631378174, | |
| "step": 3565, | |
| "valid_targets_mean": 3183.4, | |
| "valid_targets_min": 1297 | |
| }, | |
| { | |
| "epoch": 6.490909090909091, | |
| "grad_norm": 1.0729417915230997, | |
| "learning_rate": 6.455880678976845e-07, | |
| "loss": 0.2095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2234289050102234, | |
| "step": 3570, | |
| "valid_targets_mean": 3047.2, | |
| "valid_targets_min": 1492 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "grad_norm": 0.6170422752355285, | |
| "learning_rate": 6.229367646949924e-07, | |
| "loss": 0.1958, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1874658465385437, | |
| "step": 3575, | |
| "valid_targets_mean": 2822.6, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 6.509090909090909, | |
| "grad_norm": 0.665717657809791, | |
| "learning_rate": 6.006836791274606e-07, | |
| "loss": 0.1916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22099323570728302, | |
| "step": 3580, | |
| "valid_targets_mean": 2710.2, | |
| "valid_targets_min": 1198 | |
| }, | |
| { | |
| "epoch": 6.5181818181818185, | |
| "grad_norm": 0.6951502959945852, | |
| "learning_rate": 5.788292685177954e-07, | |
| "loss": 0.2122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18100181221961975, | |
| "step": 3585, | |
| "valid_targets_mean": 2566.7, | |
| "valid_targets_min": 1336 | |
| }, | |
| { | |
| "epoch": 6.527272727272727, | |
| "grad_norm": 0.6090897306366719, | |
| "learning_rate": 5.573739819955459e-07, | |
| "loss": 0.2107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18592379987239838, | |
| "step": 3590, | |
| "valid_targets_mean": 2851.6, | |
| "valid_targets_min": 1019 | |
| }, | |
| { | |
| "epoch": 6.536363636363636, | |
| "grad_norm": 0.6839505277779282, | |
| "learning_rate": 5.363182604878803e-07, | |
| "loss": 0.2159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22848966717720032, | |
| "step": 3595, | |
| "valid_targets_mean": 2868.8, | |
| "valid_targets_min": 509 | |
| }, | |
| { | |
| "epoch": 6.545454545454545, | |
| "grad_norm": 0.5902724277197199, | |
| "learning_rate": 5.156625367104973e-07, | |
| "loss": 0.1879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17370611429214478, | |
| "step": 3600, | |
| "valid_targets_mean": 3350.9, | |
| "valid_targets_min": 1023 | |
| }, | |
| { | |
| "epoch": 6.554545454545455, | |
| "grad_norm": 0.5824926455184319, | |
| "learning_rate": 4.954072351587646e-07, | |
| "loss": 0.2004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2094963937997818, | |
| "step": 3605, | |
| "valid_targets_mean": 3443.2, | |
| "valid_targets_min": 1756 | |
| }, | |
| { | |
| "epoch": 6.5636363636363635, | |
| "grad_norm": 0.7223293649275785, | |
| "learning_rate": 4.75552772098975e-07, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22199690341949463, | |
| "step": 3610, | |
| "valid_targets_mean": 2564.2, | |
| "valid_targets_min": 1198 | |
| }, | |
| { | |
| "epoch": 6.572727272727272, | |
| "grad_norm": 0.6704296286028131, | |
| "learning_rate": 4.560995555597969e-07, | |
| "loss": 0.2145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20293614268302917, | |
| "step": 3615, | |
| "valid_targets_mean": 2849.2, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 6.581818181818182, | |
| "grad_norm": 0.7454734458543368, | |
| "learning_rate": 4.3704798532388624e-07, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3295115828514099, | |
| "step": 3620, | |
| "valid_targets_mean": 2756.9, | |
| "valid_targets_min": 839 | |
| }, | |
| { | |
| "epoch": 6.590909090909091, | |
| "grad_norm": 0.7071629110400431, | |
| "learning_rate": 4.1839845291968607e-07, | |
| "loss": 0.2165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23328623175621033, | |
| "step": 3625, | |
| "valid_targets_mean": 3020.6, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "grad_norm": 0.6733228752450922, | |
| "learning_rate": 4.001513416133551e-07, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1846185326576233, | |
| "step": 3630, | |
| "valid_targets_mean": 2510.1, | |
| "valid_targets_min": 1328 | |
| }, | |
| { | |
| "epoch": 6.609090909090909, | |
| "grad_norm": 0.7503433569098521, | |
| "learning_rate": 3.823070264009099e-07, | |
| "loss": 0.1863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16027329862117767, | |
| "step": 3635, | |
| "valid_targets_mean": 2422.6, | |
| "valid_targets_min": 1004 | |
| }, | |
| { | |
| "epoch": 6.618181818181818, | |
| "grad_norm": 0.7548043438842913, | |
| "learning_rate": 3.648658740005107e-07, | |
| "loss": 0.1785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1915736049413681, | |
| "step": 3640, | |
| "valid_targets_mean": 2116.1, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 6.627272727272727, | |
| "grad_norm": 0.6091929259291805, | |
| "learning_rate": 3.4782824284492975e-07, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2477787882089615, | |
| "step": 3645, | |
| "valid_targets_mean": 3713.2, | |
| "valid_targets_min": 1872 | |
| }, | |
| { | |
| "epoch": 6.636363636363637, | |
| "grad_norm": 0.6229880345830455, | |
| "learning_rate": 3.31194483074182e-07, | |
| "loss": 0.1963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1972864866256714, | |
| "step": 3650, | |
| "valid_targets_mean": 2992.7, | |
| "valid_targets_min": 1513 | |
| }, | |
| { | |
| "epoch": 6.6454545454545455, | |
| "grad_norm": 0.628801173843993, | |
| "learning_rate": 3.149649365283258e-07, | |
| "loss": 0.2056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2724452018737793, | |
| "step": 3655, | |
| "valid_targets_mean": 3391.6, | |
| "valid_targets_min": 1819 | |
| }, | |
| { | |
| "epoch": 6.654545454545454, | |
| "grad_norm": 0.6261436041334418, | |
| "learning_rate": 2.9913993674044904e-07, | |
| "loss": 0.1983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18547657132148743, | |
| "step": 3660, | |
| "valid_targets_mean": 2915.8, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 6.663636363636364, | |
| "grad_norm": 0.7463194929612665, | |
| "learning_rate": 2.8371980892979436e-07, | |
| "loss": 0.2333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22626563906669617, | |
| "step": 3665, | |
| "valid_targets_mean": 2315.8, | |
| "valid_targets_min": 600 | |
| }, | |
| { | |
| "epoch": 6.672727272727273, | |
| "grad_norm": 0.6492852106251501, | |
| "learning_rate": 2.687048699951067e-07, | |
| "loss": 0.1793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19556567072868347, | |
| "step": 3670, | |
| "valid_targets_mean": 3156.1, | |
| "valid_targets_min": 1442 | |
| }, | |
| { | |
| "epoch": 6.681818181818182, | |
| "grad_norm": 0.7197581196148153, | |
| "learning_rate": 2.5409542850808765e-07, | |
| "loss": 0.195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2314002513885498, | |
| "step": 3675, | |
| "valid_targets_mean": 2628.3, | |
| "valid_targets_min": 1269 | |
| }, | |
| { | |
| "epoch": 6.690909090909091, | |
| "grad_norm": 0.6782253107747308, | |
| "learning_rate": 2.3989178470707364e-07, | |
| "loss": 0.204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21835069358348846, | |
| "step": 3680, | |
| "valid_targets_mean": 2813.1, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "grad_norm": 0.738040860037262, | |
| "learning_rate": 2.260942304908609e-07, | |
| "loss": 0.1898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2219957709312439, | |
| "step": 3685, | |
| "valid_targets_mean": 2700.1, | |
| "valid_targets_min": 1276 | |
| }, | |
| { | |
| "epoch": 6.709090909090909, | |
| "grad_norm": 0.6134241635817874, | |
| "learning_rate": 2.1270304941271025e-07, | |
| "loss": 0.1914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14740461111068726, | |
| "step": 3690, | |
| "valid_targets_mean": 2586.2, | |
| "valid_targets_min": 1251 | |
| }, | |
| { | |
| "epoch": 6.718181818181818, | |
| "grad_norm": 0.693132393478107, | |
| "learning_rate": 1.9971851667451413e-07, | |
| "loss": 0.2047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23066049814224243, | |
| "step": 3695, | |
| "valid_targets_mean": 2906.5, | |
| "valid_targets_min": 700 | |
| }, | |
| { | |
| "epoch": 6.7272727272727275, | |
| "grad_norm": 0.6619328999399736, | |
| "learning_rate": 1.8714089912113876e-07, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1960105299949646, | |
| "step": 3700, | |
| "valid_targets_mean": 3064.5, | |
| "valid_targets_min": 1029 | |
| }, | |
| { | |
| "epoch": 6.736363636363636, | |
| "grad_norm": 0.6829964172452905, | |
| "learning_rate": 1.749704552349507e-07, | |
| "loss": 0.1859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1751609444618225, | |
| "step": 3705, | |
| "valid_targets_mean": 2536.2, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 6.745454545454545, | |
| "grad_norm": 0.5770159628606195, | |
| "learning_rate": 1.6320743513049686e-07, | |
| "loss": 0.2, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15475375950336456, | |
| "step": 3710, | |
| "valid_targets_mean": 2827.8, | |
| "valid_targets_min": 910 | |
| }, | |
| { | |
| "epoch": 6.754545454545455, | |
| "grad_norm": 0.6445532521114699, | |
| "learning_rate": 1.5185208054936394e-07, | |
| "loss": 0.221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16979286074638367, | |
| "step": 3715, | |
| "valid_targets_mean": 2648.4, | |
| "valid_targets_min": 1291 | |
| }, | |
| { | |
| "epoch": 6.763636363636364, | |
| "grad_norm": 0.5943719803037179, | |
| "learning_rate": 1.4090462485521816e-07, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18774190545082092, | |
| "step": 3720, | |
| "valid_targets_mean": 2772.4, | |
| "valid_targets_min": 1199 | |
| }, | |
| { | |
| "epoch": 6.7727272727272725, | |
| "grad_norm": 0.6756921790231029, | |
| "learning_rate": 1.303652930289956e-07, | |
| "loss": 0.1938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21918311715126038, | |
| "step": 3725, | |
| "valid_targets_mean": 2937.1, | |
| "valid_targets_min": 1241 | |
| }, | |
| { | |
| "epoch": 6.781818181818182, | |
| "grad_norm": 0.6616175262354211, | |
| "learning_rate": 1.2023430166429485e-07, | |
| "loss": 0.1735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16985666751861572, | |
| "step": 3730, | |
| "valid_targets_mean": 2460.4, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 6.790909090909091, | |
| "grad_norm": 0.7910618205562641, | |
| "learning_rate": 1.1051185896291616e-07, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21274155378341675, | |
| "step": 3735, | |
| "valid_targets_mean": 2476.2, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 0.7154269068956903, | |
| "learning_rate": 1.011981647305782e-07, | |
| "loss": 0.2056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21139511466026306, | |
| "step": 3740, | |
| "valid_targets_mean": 2405.8, | |
| "valid_targets_min": 1248 | |
| }, | |
| { | |
| "epoch": 6.809090909090909, | |
| "grad_norm": 0.7284484849008579, | |
| "learning_rate": 9.22934103728279e-08, | |
| "loss": 0.2117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24537286162376404, | |
| "step": 3745, | |
| "valid_targets_mean": 2740.6, | |
| "valid_targets_min": 870 | |
| }, | |
| { | |
| "epoch": 6.818181818181818, | |
| "grad_norm": 0.6847980903768206, | |
| "learning_rate": 8.37977788910882e-08, | |
| "loss": 0.1934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2240789234638214, | |
| "step": 3750, | |
| "valid_targets_mean": 2620.1, | |
| "valid_targets_min": 1441 | |
| }, | |
| { | |
| "epoch": 6.827272727272727, | |
| "grad_norm": 0.6915252822860436, | |
| "learning_rate": 7.571144487891202e-08, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2609437108039856, | |
| "step": 3755, | |
| "valid_targets_mean": 3013.9, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 6.836363636363636, | |
| "grad_norm": 0.7415434992825277, | |
| "learning_rate": 6.803457451838746e-08, | |
| "loss": 0.2094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19083890318870544, | |
| "step": 3760, | |
| "valid_targets_mean": 2380.1, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 6.845454545454546, | |
| "grad_norm": 0.6739299134659402, | |
| "learning_rate": 6.076732557672272e-08, | |
| "loss": 0.1937, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1840040683746338, | |
| "step": 3765, | |
| "valid_targets_mean": 2367.7, | |
| "valid_targets_min": 1454 | |
| }, | |
| { | |
| "epoch": 6.8545454545454545, | |
| "grad_norm": 0.7509615032546884, | |
| "learning_rate": 5.390984740299976e-08, | |
| "loss": 0.2091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23984074592590332, | |
| "step": 3770, | |
| "valid_targets_mean": 2647.2, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 6.863636363636363, | |
| "grad_norm": 0.6790819350475567, | |
| "learning_rate": 4.7462280925116847e-08, | |
| "loss": 0.1877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16014784574508667, | |
| "step": 3775, | |
| "valid_targets_mean": 2598.4, | |
| "valid_targets_min": 1736 | |
| }, | |
| { | |
| "epoch": 6.872727272727273, | |
| "grad_norm": 0.650157897278632, | |
| "learning_rate": 4.142475864688411e-08, | |
| "loss": 0.1967, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17384964227676392, | |
| "step": 3780, | |
| "valid_targets_mean": 2974.6, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 6.881818181818182, | |
| "grad_norm": 0.6659622954454439, | |
| "learning_rate": 3.5797404645296906e-08, | |
| "loss": 0.2172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26057201623916626, | |
| "step": 3785, | |
| "valid_targets_mean": 2915.7, | |
| "valid_targets_min": 986 | |
| }, | |
| { | |
| "epoch": 6.890909090909091, | |
| "grad_norm": 0.6656737432362372, | |
| "learning_rate": 3.0580334567995585e-08, | |
| "loss": 0.2104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1925896406173706, | |
| "step": 3790, | |
| "valid_targets_mean": 2642.1, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "grad_norm": 0.6694609275277352, | |
| "learning_rate": 2.5773655630880746e-08, | |
| "loss": 0.1896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16200029850006104, | |
| "step": 3795, | |
| "valid_targets_mean": 2400.2, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 6.909090909090909, | |
| "grad_norm": 0.6807347889354627, | |
| "learning_rate": 2.1377466615912778e-08, | |
| "loss": 0.2413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26320990920066833, | |
| "step": 3800, | |
| "valid_targets_mean": 2810.3, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 6.918181818181818, | |
| "grad_norm": 0.7059977237079785, | |
| "learning_rate": 1.7391857869086815e-08, | |
| "loss": 0.2071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2371351718902588, | |
| "step": 3805, | |
| "valid_targets_mean": 2632.0, | |
| "valid_targets_min": 888 | |
| }, | |
| { | |
| "epoch": 6.927272727272728, | |
| "grad_norm": 0.740272454756386, | |
| "learning_rate": 1.3816911298565327e-08, | |
| "loss": 0.2239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19789424538612366, | |
| "step": 3810, | |
| "valid_targets_mean": 2359.9, | |
| "valid_targets_min": 939 | |
| }, | |
| { | |
| "epoch": 6.9363636363636365, | |
| "grad_norm": 0.5823718609644692, | |
| "learning_rate": 1.0652700373006142e-08, | |
| "loss": 0.2081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17436788976192474, | |
| "step": 3815, | |
| "valid_targets_mean": 2827.7, | |
| "valid_targets_min": 1378 | |
| }, | |
| { | |
| "epoch": 6.945454545454545, | |
| "grad_norm": 0.6873846620440276, | |
| "learning_rate": 7.899290120039205e-09, | |
| "loss": 0.1954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2165110558271408, | |
| "step": 3820, | |
| "valid_targets_mean": 2476.6, | |
| "valid_targets_min": 1096 | |
| }, | |
| { | |
| "epoch": 6.954545454545455, | |
| "grad_norm": 0.7150954695545816, | |
| "learning_rate": 5.556737124945422e-09, | |
| "loss": 0.1973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20282520353794098, | |
| "step": 3825, | |
| "valid_targets_mean": 2931.9, | |
| "valid_targets_min": 1063 | |
| }, | |
| { | |
| "epoch": 6.963636363636364, | |
| "grad_norm": 0.6049679393727518, | |
| "learning_rate": 3.6250895294842605e-09, | |
| "loss": 0.2149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19065088033676147, | |
| "step": 3830, | |
| "valid_targets_mean": 2879.9, | |
| "valid_targets_min": 878 | |
| }, | |
| { | |
| "epoch": 6.972727272727273, | |
| "grad_norm": 0.6292861196115748, | |
| "learning_rate": 2.1043870309078727e-09, | |
| "loss": 0.1856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22311565279960632, | |
| "step": 3835, | |
| "valid_targets_mean": 2907.8, | |
| "valid_targets_min": 1273 | |
| }, | |
| { | |
| "epoch": 6.9818181818181815, | |
| "grad_norm": 0.6456495609565179, | |
| "learning_rate": 9.946608811395308e-10, | |
| "loss": 0.1887, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17371340095996857, | |
| "step": 3840, | |
| "valid_targets_mean": 2698.8, | |
| "valid_targets_min": 1180 | |
| }, | |
| { | |
| "epoch": 6.990909090909091, | |
| "grad_norm": 0.6481073142467838, | |
| "learning_rate": 2.959338861407979e-10, | |
| "loss": 0.2125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18638712167739868, | |
| "step": 3845, | |
| "valid_targets_mean": 2776.0, | |
| "valid_targets_min": 1351 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.6608342332932761, | |
| "learning_rate": 8.220405436354384e-12, | |
| "loss": 0.2038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16949467360973358, | |
| "step": 3850, | |
| "valid_targets_mean": 2481.9, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16949467360973358, | |
| "step": 3850, | |
| "total_flos": 488803397074944.0, | |
| "train_loss": 0.26370995024582006, | |
| "train_runtime": 7874.4592, | |
| "train_samples_per_second": 7.812, | |
| "train_steps_per_second": 0.489, | |
| "valid_targets_mean": 2481.9, | |
| "valid_targets_min": 1022 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3850, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 488803397074944.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |