Text Generation
Transformers
Safetensors
qwen3
agents
terminal
code
software-engineering
conversational
text-generation-inference
Instructions to use open-thoughts/OpenThinkerAgent-32B-SFT-100K with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use open-thoughts/OpenThinkerAgent-32B-SFT-100K with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="open-thoughts/OpenThinkerAgent-32B-SFT-100K") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("open-thoughts/OpenThinkerAgent-32B-SFT-100K") model = AutoModelForCausalLM.from_pretrained("open-thoughts/OpenThinkerAgent-32B-SFT-100K") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use open-thoughts/OpenThinkerAgent-32B-SFT-100K with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "open-thoughts/OpenThinkerAgent-32B-SFT-100K" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "open-thoughts/OpenThinkerAgent-32B-SFT-100K", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/open-thoughts/OpenThinkerAgent-32B-SFT-100K
- SGLang
How to use open-thoughts/OpenThinkerAgent-32B-SFT-100K with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "open-thoughts/OpenThinkerAgent-32B-SFT-100K" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "open-thoughts/OpenThinkerAgent-32B-SFT-100K", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "open-thoughts/OpenThinkerAgent-32B-SFT-100K" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "open-thoughts/OpenThinkerAgent-32B-SFT-100K", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use open-thoughts/OpenThinkerAgent-32B-SFT-100K with Docker Model Runner:
docker model run hf.co/open-thoughts/OpenThinkerAgent-32B-SFT-100K
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.982300884955752, | |
| "eval_steps": 500, | |
| "global_step": 3600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0055309734513274336, | |
| "grad_norm": 1.9755608155553037, | |
| "learning_rate": 3.5398230088495575e-07, | |
| "loss": 0.5241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5197785496711731, | |
| "step": 5, | |
| "valid_targets_mean": 6978.2, | |
| "valid_targets_min": 1651 | |
| }, | |
| { | |
| "epoch": 0.011061946902654867, | |
| "grad_norm": 1.9361195326595568, | |
| "learning_rate": 7.964601769911505e-07, | |
| "loss": 0.5296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5223421454429626, | |
| "step": 10, | |
| "valid_targets_mean": 6845.3, | |
| "valid_targets_min": 2319 | |
| }, | |
| { | |
| "epoch": 0.016592920353982302, | |
| "grad_norm": 1.8219995444940191, | |
| "learning_rate": 1.2389380530973452e-06, | |
| "loss": 0.5071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5062374472618103, | |
| "step": 15, | |
| "valid_targets_mean": 6758.8, | |
| "valid_targets_min": 1816 | |
| }, | |
| { | |
| "epoch": 0.022123893805309734, | |
| "grad_norm": 1.3101915029906583, | |
| "learning_rate": 1.68141592920354e-06, | |
| "loss": 0.5048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5095342993736267, | |
| "step": 20, | |
| "valid_targets_mean": 7089.6, | |
| "valid_targets_min": 2058 | |
| }, | |
| { | |
| "epoch": 0.02765486725663717, | |
| "grad_norm": 0.8373096781893925, | |
| "learning_rate": 2.1238938053097345e-06, | |
| "loss": 0.486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48004767298698425, | |
| "step": 25, | |
| "valid_targets_mean": 7069.2, | |
| "valid_targets_min": 2076 | |
| }, | |
| { | |
| "epoch": 0.033185840707964605, | |
| "grad_norm": 0.6159530219957101, | |
| "learning_rate": 2.5663716814159294e-06, | |
| "loss": 0.4518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47218844294548035, | |
| "step": 30, | |
| "valid_targets_mean": 6770.4, | |
| "valid_targets_min": 1599 | |
| }, | |
| { | |
| "epoch": 0.03871681415929203, | |
| "grad_norm": 0.5732710049142548, | |
| "learning_rate": 3.0088495575221242e-06, | |
| "loss": 0.4558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45937982201576233, | |
| "step": 35, | |
| "valid_targets_mean": 6775.0, | |
| "valid_targets_min": 1820 | |
| }, | |
| { | |
| "epoch": 0.04424778761061947, | |
| "grad_norm": 0.48793017678857753, | |
| "learning_rate": 3.4513274336283186e-06, | |
| "loss": 0.4331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.414675235748291, | |
| "step": 40, | |
| "valid_targets_mean": 6978.7, | |
| "valid_targets_min": 2402 | |
| }, | |
| { | |
| "epoch": 0.049778761061946904, | |
| "grad_norm": 0.42929958244615535, | |
| "learning_rate": 3.8938053097345135e-06, | |
| "loss": 0.4155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4104505479335785, | |
| "step": 45, | |
| "valid_targets_mean": 6632.7, | |
| "valid_targets_min": 1557 | |
| }, | |
| { | |
| "epoch": 0.05530973451327434, | |
| "grad_norm": 0.4144762895687238, | |
| "learning_rate": 4.336283185840709e-06, | |
| "loss": 0.4032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4172597825527191, | |
| "step": 50, | |
| "valid_targets_mean": 7002.3, | |
| "valid_targets_min": 2099 | |
| }, | |
| { | |
| "epoch": 0.06084070796460177, | |
| "grad_norm": 0.3326500933783783, | |
| "learning_rate": 4.778761061946903e-06, | |
| "loss": 0.3915, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39051878452301025, | |
| "step": 55, | |
| "valid_targets_mean": 6649.6, | |
| "valid_targets_min": 1277 | |
| }, | |
| { | |
| "epoch": 0.06637168141592921, | |
| "grad_norm": 0.27073039939289695, | |
| "learning_rate": 5.2212389380530985e-06, | |
| "loss": 0.3788, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3567701280117035, | |
| "step": 60, | |
| "valid_targets_mean": 7036.4, | |
| "valid_targets_min": 2910 | |
| }, | |
| { | |
| "epoch": 0.07190265486725664, | |
| "grad_norm": 0.265638893455664, | |
| "learning_rate": 5.663716814159292e-06, | |
| "loss": 0.368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3485967218875885, | |
| "step": 65, | |
| "valid_targets_mean": 6511.1, | |
| "valid_targets_min": 1941 | |
| }, | |
| { | |
| "epoch": 0.07743362831858407, | |
| "grad_norm": 0.3677651994412772, | |
| "learning_rate": 6.1061946902654865e-06, | |
| "loss": 0.3604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3568234145641327, | |
| "step": 70, | |
| "valid_targets_mean": 6669.8, | |
| "valid_targets_min": 1410 | |
| }, | |
| { | |
| "epoch": 0.08296460176991151, | |
| "grad_norm": 0.24685559245875377, | |
| "learning_rate": 6.548672566371682e-06, | |
| "loss": 0.3452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3305952548980713, | |
| "step": 75, | |
| "valid_targets_mean": 6857.9, | |
| "valid_targets_min": 2313 | |
| }, | |
| { | |
| "epoch": 0.08849557522123894, | |
| "grad_norm": 0.2593588614146304, | |
| "learning_rate": 6.991150442477876e-06, | |
| "loss": 0.3473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34555795788764954, | |
| "step": 80, | |
| "valid_targets_mean": 6905.2, | |
| "valid_targets_min": 1638 | |
| }, | |
| { | |
| "epoch": 0.09402654867256637, | |
| "grad_norm": 0.2328018999714726, | |
| "learning_rate": 7.4336283185840714e-06, | |
| "loss": 0.345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3288921117782593, | |
| "step": 85, | |
| "valid_targets_mean": 6667.2, | |
| "valid_targets_min": 2456 | |
| }, | |
| { | |
| "epoch": 0.09955752212389381, | |
| "grad_norm": 0.2480817557443312, | |
| "learning_rate": 7.876106194690266e-06, | |
| "loss": 0.3405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34147533774375916, | |
| "step": 90, | |
| "valid_targets_mean": 7349.0, | |
| "valid_targets_min": 2040 | |
| }, | |
| { | |
| "epoch": 0.10508849557522124, | |
| "grad_norm": 0.22584793135419537, | |
| "learning_rate": 8.31858407079646e-06, | |
| "loss": 0.3265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33613431453704834, | |
| "step": 95, | |
| "valid_targets_mean": 6368.9, | |
| "valid_targets_min": 1632 | |
| }, | |
| { | |
| "epoch": 0.11061946902654868, | |
| "grad_norm": 0.20396364259957264, | |
| "learning_rate": 8.761061946902656e-06, | |
| "loss": 0.3113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3130727708339691, | |
| "step": 100, | |
| "valid_targets_mean": 6859.1, | |
| "valid_targets_min": 1938 | |
| }, | |
| { | |
| "epoch": 0.1161504424778761, | |
| "grad_norm": 0.20691206791919872, | |
| "learning_rate": 9.203539823008851e-06, | |
| "loss": 0.3223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3112635314464569, | |
| "step": 105, | |
| "valid_targets_mean": 6781.7, | |
| "valid_targets_min": 2362 | |
| }, | |
| { | |
| "epoch": 0.12168141592920353, | |
| "grad_norm": 0.2179264137996772, | |
| "learning_rate": 9.646017699115045e-06, | |
| "loss": 0.3222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3180069029331207, | |
| "step": 110, | |
| "valid_targets_mean": 6524.9, | |
| "valid_targets_min": 2014 | |
| }, | |
| { | |
| "epoch": 0.12721238938053098, | |
| "grad_norm": 0.2274404375212976, | |
| "learning_rate": 1.008849557522124e-05, | |
| "loss": 0.3113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3127485513687134, | |
| "step": 115, | |
| "valid_targets_mean": 6918.6, | |
| "valid_targets_min": 2402 | |
| }, | |
| { | |
| "epoch": 0.13274336283185842, | |
| "grad_norm": 0.26546055390156487, | |
| "learning_rate": 1.0530973451327436e-05, | |
| "loss": 0.3036, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2955832779407501, | |
| "step": 120, | |
| "valid_targets_mean": 6611.0, | |
| "valid_targets_min": 2664 | |
| }, | |
| { | |
| "epoch": 0.13827433628318583, | |
| "grad_norm": 0.20593314023481457, | |
| "learning_rate": 1.0973451327433629e-05, | |
| "loss": 0.3059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3184691071510315, | |
| "step": 125, | |
| "valid_targets_mean": 7148.6, | |
| "valid_targets_min": 2156 | |
| }, | |
| { | |
| "epoch": 0.14380530973451328, | |
| "grad_norm": 0.21293595531060708, | |
| "learning_rate": 1.1415929203539825e-05, | |
| "loss": 0.3028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2839641571044922, | |
| "step": 130, | |
| "valid_targets_mean": 6754.8, | |
| "valid_targets_min": 1802 | |
| }, | |
| { | |
| "epoch": 0.14933628318584072, | |
| "grad_norm": 0.6136420101227134, | |
| "learning_rate": 1.1858407079646019e-05, | |
| "loss": 0.2975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3218395411968231, | |
| "step": 135, | |
| "valid_targets_mean": 7185.6, | |
| "valid_targets_min": 2155 | |
| }, | |
| { | |
| "epoch": 0.15486725663716813, | |
| "grad_norm": 0.22598077267711075, | |
| "learning_rate": 1.2300884955752212e-05, | |
| "loss": 0.2862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28336840867996216, | |
| "step": 140, | |
| "valid_targets_mean": 6451.2, | |
| "valid_targets_min": 2295 | |
| }, | |
| { | |
| "epoch": 0.16039823008849557, | |
| "grad_norm": 0.22619995869843001, | |
| "learning_rate": 1.2743362831858408e-05, | |
| "loss": 0.3016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3056568503379822, | |
| "step": 145, | |
| "valid_targets_mean": 7203.3, | |
| "valid_targets_min": 1863 | |
| }, | |
| { | |
| "epoch": 0.16592920353982302, | |
| "grad_norm": 0.23370157871575484, | |
| "learning_rate": 1.3185840707964604e-05, | |
| "loss": 0.297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2864972651004791, | |
| "step": 150, | |
| "valid_targets_mean": 6920.9, | |
| "valid_targets_min": 1842 | |
| }, | |
| { | |
| "epoch": 0.17146017699115043, | |
| "grad_norm": 0.22788405974621587, | |
| "learning_rate": 1.3628318584070797e-05, | |
| "loss": 0.2898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3031662404537201, | |
| "step": 155, | |
| "valid_targets_mean": 6976.0, | |
| "valid_targets_min": 2119 | |
| }, | |
| { | |
| "epoch": 0.17699115044247787, | |
| "grad_norm": 0.23699592971584418, | |
| "learning_rate": 1.4070796460176991e-05, | |
| "loss": 0.2946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30119097232818604, | |
| "step": 160, | |
| "valid_targets_mean": 6861.6, | |
| "valid_targets_min": 2882 | |
| }, | |
| { | |
| "epoch": 0.18252212389380532, | |
| "grad_norm": 0.24193653071694077, | |
| "learning_rate": 1.4513274336283187e-05, | |
| "loss": 0.2905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2951483726501465, | |
| "step": 165, | |
| "valid_targets_mean": 6909.0, | |
| "valid_targets_min": 1950 | |
| }, | |
| { | |
| "epoch": 0.18805309734513273, | |
| "grad_norm": 0.22688784751258725, | |
| "learning_rate": 1.4955752212389383e-05, | |
| "loss": 0.2984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29256922006607056, | |
| "step": 170, | |
| "valid_targets_mean": 6768.3, | |
| "valid_targets_min": 2064 | |
| }, | |
| { | |
| "epoch": 0.19358407079646017, | |
| "grad_norm": 0.21853017493472301, | |
| "learning_rate": 1.5398230088495576e-05, | |
| "loss": 0.2921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2883281409740448, | |
| "step": 175, | |
| "valid_targets_mean": 6756.0, | |
| "valid_targets_min": 2256 | |
| }, | |
| { | |
| "epoch": 0.19911504424778761, | |
| "grad_norm": 0.22017184477874496, | |
| "learning_rate": 1.5840707964601772e-05, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2763049602508545, | |
| "step": 180, | |
| "valid_targets_mean": 6876.8, | |
| "valid_targets_min": 1897 | |
| }, | |
| { | |
| "epoch": 0.20464601769911506, | |
| "grad_norm": 0.23887724313868086, | |
| "learning_rate": 1.628318584070797e-05, | |
| "loss": 0.2903, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28853580355644226, | |
| "step": 185, | |
| "valid_targets_mean": 6651.2, | |
| "valid_targets_min": 2299 | |
| }, | |
| { | |
| "epoch": 0.21017699115044247, | |
| "grad_norm": 0.1999600952958813, | |
| "learning_rate": 1.672566371681416e-05, | |
| "loss": 0.2885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3044419586658478, | |
| "step": 190, | |
| "valid_targets_mean": 6723.0, | |
| "valid_targets_min": 1726 | |
| }, | |
| { | |
| "epoch": 0.2157079646017699, | |
| "grad_norm": 0.2226226893689938, | |
| "learning_rate": 1.7168141592920354e-05, | |
| "loss": 0.2877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27313104271888733, | |
| "step": 195, | |
| "valid_targets_mean": 6312.5, | |
| "valid_targets_min": 1632 | |
| }, | |
| { | |
| "epoch": 0.22123893805309736, | |
| "grad_norm": 0.21084439392054424, | |
| "learning_rate": 1.761061946902655e-05, | |
| "loss": 0.2774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2734854519367218, | |
| "step": 200, | |
| "valid_targets_mean": 6577.9, | |
| "valid_targets_min": 1661 | |
| }, | |
| { | |
| "epoch": 0.22676991150442477, | |
| "grad_norm": 0.3968380293718493, | |
| "learning_rate": 1.8053097345132743e-05, | |
| "loss": 0.3726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4262051284313202, | |
| "step": 205, | |
| "valid_targets_mean": 3888.9, | |
| "valid_targets_min": 1590 | |
| }, | |
| { | |
| "epoch": 0.2323008849557522, | |
| "grad_norm": 0.37610971546811917, | |
| "learning_rate": 1.849557522123894e-05, | |
| "loss": 0.4115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4180339276790619, | |
| "step": 210, | |
| "valid_targets_mean": 4061.8, | |
| "valid_targets_min": 1565 | |
| }, | |
| { | |
| "epoch": 0.23783185840707965, | |
| "grad_norm": 0.2980786445383489, | |
| "learning_rate": 1.8938053097345135e-05, | |
| "loss": 0.3927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3945499360561371, | |
| "step": 215, | |
| "valid_targets_mean": 5207.1, | |
| "valid_targets_min": 2190 | |
| }, | |
| { | |
| "epoch": 0.24336283185840707, | |
| "grad_norm": 0.27980575401419183, | |
| "learning_rate": 1.9380530973451328e-05, | |
| "loss": 0.3863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37614330649375916, | |
| "step": 220, | |
| "valid_targets_mean": 4230.9, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 0.2488938053097345, | |
| "grad_norm": 0.2951669102882478, | |
| "learning_rate": 1.9823008849557524e-05, | |
| "loss": 0.3784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37663379311561584, | |
| "step": 225, | |
| "valid_targets_mean": 4269.3, | |
| "valid_targets_min": 1788 | |
| }, | |
| { | |
| "epoch": 0.25442477876106195, | |
| "grad_norm": 0.29952134684632564, | |
| "learning_rate": 2.0265486725663717e-05, | |
| "loss": 0.3845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3719452917575836, | |
| "step": 230, | |
| "valid_targets_mean": 4732.4, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 0.25995575221238937, | |
| "grad_norm": 0.29819430952839904, | |
| "learning_rate": 2.0707964601769913e-05, | |
| "loss": 0.3916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37744632363319397, | |
| "step": 235, | |
| "valid_targets_mean": 4465.3, | |
| "valid_targets_min": 1555 | |
| }, | |
| { | |
| "epoch": 0.26548672566371684, | |
| "grad_norm": 0.23379306791364543, | |
| "learning_rate": 2.115044247787611e-05, | |
| "loss": 0.3872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3877159059047699, | |
| "step": 240, | |
| "valid_targets_mean": 5104.8, | |
| "valid_targets_min": 1590 | |
| }, | |
| { | |
| "epoch": 0.27101769911504425, | |
| "grad_norm": 0.2662772048328646, | |
| "learning_rate": 2.15929203539823e-05, | |
| "loss": 0.3871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3581514358520508, | |
| "step": 245, | |
| "valid_targets_mean": 4357.9, | |
| "valid_targets_min": 1726 | |
| }, | |
| { | |
| "epoch": 0.27654867256637167, | |
| "grad_norm": 0.2683584031007601, | |
| "learning_rate": 2.2035398230088498e-05, | |
| "loss": 0.3629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3489588797092438, | |
| "step": 250, | |
| "valid_targets_mean": 4695.0, | |
| "valid_targets_min": 1700 | |
| }, | |
| { | |
| "epoch": 0.28207964601769914, | |
| "grad_norm": 0.2911683550298012, | |
| "learning_rate": 2.247787610619469e-05, | |
| "loss": 0.3698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3698735237121582, | |
| "step": 255, | |
| "valid_targets_mean": 4179.3, | |
| "valid_targets_min": 1113 | |
| }, | |
| { | |
| "epoch": 0.28761061946902655, | |
| "grad_norm": 0.2629013296131057, | |
| "learning_rate": 2.2920353982300883e-05, | |
| "loss": 0.3663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37205931544303894, | |
| "step": 260, | |
| "valid_targets_mean": 4739.6, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 0.29314159292035397, | |
| "grad_norm": 0.25886844353948074, | |
| "learning_rate": 2.3362831858407083e-05, | |
| "loss": 0.3539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3396625518798828, | |
| "step": 265, | |
| "valid_targets_mean": 4349.6, | |
| "valid_targets_min": 1681 | |
| }, | |
| { | |
| "epoch": 0.29867256637168144, | |
| "grad_norm": 0.2894512562995122, | |
| "learning_rate": 2.3805309734513275e-05, | |
| "loss": 0.3561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3411494493484497, | |
| "step": 270, | |
| "valid_targets_mean": 4303.8, | |
| "valid_targets_min": 1510 | |
| }, | |
| { | |
| "epoch": 0.30420353982300885, | |
| "grad_norm": 0.2600633567394933, | |
| "learning_rate": 2.424778761061947e-05, | |
| "loss": 0.3572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34964480996131897, | |
| "step": 275, | |
| "valid_targets_mean": 4297.8, | |
| "valid_targets_min": 1323 | |
| }, | |
| { | |
| "epoch": 0.30973451327433627, | |
| "grad_norm": 0.26827105483949776, | |
| "learning_rate": 2.4690265486725668e-05, | |
| "loss": 0.3577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37490615248680115, | |
| "step": 280, | |
| "valid_targets_mean": 4640.1, | |
| "valid_targets_min": 1267 | |
| }, | |
| { | |
| "epoch": 0.31526548672566373, | |
| "grad_norm": 0.2702497132075915, | |
| "learning_rate": 2.513274336283186e-05, | |
| "loss": 0.365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36126241087913513, | |
| "step": 285, | |
| "valid_targets_mean": 4367.4, | |
| "valid_targets_min": 1574 | |
| }, | |
| { | |
| "epoch": 0.32079646017699115, | |
| "grad_norm": 0.2587631044981072, | |
| "learning_rate": 2.5575221238938056e-05, | |
| "loss": 0.3509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3508976995944977, | |
| "step": 290, | |
| "valid_targets_mean": 4589.0, | |
| "valid_targets_min": 1277 | |
| }, | |
| { | |
| "epoch": 0.32632743362831856, | |
| "grad_norm": 0.26575828431699583, | |
| "learning_rate": 2.601769911504425e-05, | |
| "loss": 0.3536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3422420024871826, | |
| "step": 295, | |
| "valid_targets_mean": 4307.6, | |
| "valid_targets_min": 1552 | |
| }, | |
| { | |
| "epoch": 0.33185840707964603, | |
| "grad_norm": 0.2395861850770492, | |
| "learning_rate": 2.6460176991150442e-05, | |
| "loss": 0.358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3833498954772949, | |
| "step": 300, | |
| "valid_targets_mean": 4793.9, | |
| "valid_targets_min": 1518 | |
| }, | |
| { | |
| "epoch": 0.33738938053097345, | |
| "grad_norm": 0.26883107867699846, | |
| "learning_rate": 2.690265486725664e-05, | |
| "loss": 0.3598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3612666130065918, | |
| "step": 305, | |
| "valid_targets_mean": 4308.3, | |
| "valid_targets_min": 1728 | |
| }, | |
| { | |
| "epoch": 0.34292035398230086, | |
| "grad_norm": 0.25488798644808486, | |
| "learning_rate": 2.7345132743362834e-05, | |
| "loss": 0.3557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35433515906333923, | |
| "step": 310, | |
| "valid_targets_mean": 4154.4, | |
| "valid_targets_min": 1365 | |
| }, | |
| { | |
| "epoch": 0.34845132743362833, | |
| "grad_norm": 0.24655130411981097, | |
| "learning_rate": 2.7787610619469027e-05, | |
| "loss": 0.3557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36025676131248474, | |
| "step": 315, | |
| "valid_targets_mean": 4616.9, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 0.35398230088495575, | |
| "grad_norm": 0.26025181064310127, | |
| "learning_rate": 2.8230088495575226e-05, | |
| "loss": 0.3539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3606431186199188, | |
| "step": 320, | |
| "valid_targets_mean": 4498.0, | |
| "valid_targets_min": 1767 | |
| }, | |
| { | |
| "epoch": 0.35951327433628316, | |
| "grad_norm": 0.2464924494476943, | |
| "learning_rate": 2.867256637168142e-05, | |
| "loss": 0.3512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35099339485168457, | |
| "step": 325, | |
| "valid_targets_mean": 4461.3, | |
| "valid_targets_min": 1652 | |
| }, | |
| { | |
| "epoch": 0.36504424778761063, | |
| "grad_norm": 0.3399607929752881, | |
| "learning_rate": 2.9115044247787612e-05, | |
| "loss": 0.3444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3355567753314972, | |
| "step": 330, | |
| "valid_targets_mean": 4369.2, | |
| "valid_targets_min": 1563 | |
| }, | |
| { | |
| "epoch": 0.37057522123893805, | |
| "grad_norm": 0.35556102161403125, | |
| "learning_rate": 2.9557522123893808e-05, | |
| "loss": 0.3506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3818329870700836, | |
| "step": 335, | |
| "valid_targets_mean": 4633.1, | |
| "valid_targets_min": 1720 | |
| }, | |
| { | |
| "epoch": 0.37610619469026546, | |
| "grad_norm": 0.2676254533630978, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.3443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35258927941322327, | |
| "step": 340, | |
| "valid_targets_mean": 4250.8, | |
| "valid_targets_min": 1522 | |
| }, | |
| { | |
| "epoch": 0.38163716814159293, | |
| "grad_norm": 0.27075731398801184, | |
| "learning_rate": 3.0442477876106197e-05, | |
| "loss": 0.3462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34246930480003357, | |
| "step": 345, | |
| "valid_targets_mean": 3938.7, | |
| "valid_targets_min": 972 | |
| }, | |
| { | |
| "epoch": 0.38716814159292035, | |
| "grad_norm": 0.25373177364374, | |
| "learning_rate": 3.0884955752212396e-05, | |
| "loss": 0.3483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33848801255226135, | |
| "step": 350, | |
| "valid_targets_mean": 4336.6, | |
| "valid_targets_min": 1420 | |
| }, | |
| { | |
| "epoch": 0.3926991150442478, | |
| "grad_norm": 0.2644437039067955, | |
| "learning_rate": 3.132743362831859e-05, | |
| "loss": 0.356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35528698563575745, | |
| "step": 355, | |
| "valid_targets_mean": 4522.3, | |
| "valid_targets_min": 1649 | |
| }, | |
| { | |
| "epoch": 0.39823008849557523, | |
| "grad_norm": 0.2698672832556747, | |
| "learning_rate": 3.176991150442478e-05, | |
| "loss": 0.3484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35782673954963684, | |
| "step": 360, | |
| "valid_targets_mean": 4473.8, | |
| "valid_targets_min": 1295 | |
| }, | |
| { | |
| "epoch": 0.40376106194690264, | |
| "grad_norm": 0.25532940349502287, | |
| "learning_rate": 3.2212389380530975e-05, | |
| "loss": 0.3409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.334296315908432, | |
| "step": 365, | |
| "valid_targets_mean": 4149.1, | |
| "valid_targets_min": 1350 | |
| }, | |
| { | |
| "epoch": 0.4092920353982301, | |
| "grad_norm": 0.25112115636952376, | |
| "learning_rate": 3.265486725663717e-05, | |
| "loss": 0.3417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36098864674568176, | |
| "step": 370, | |
| "valid_targets_mean": 4380.3, | |
| "valid_targets_min": 1375 | |
| }, | |
| { | |
| "epoch": 0.41482300884955753, | |
| "grad_norm": 0.23614322381689468, | |
| "learning_rate": 3.309734513274337e-05, | |
| "loss": 0.3448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.357121080160141, | |
| "step": 375, | |
| "valid_targets_mean": 4584.4, | |
| "valid_targets_min": 1484 | |
| }, | |
| { | |
| "epoch": 0.42035398230088494, | |
| "grad_norm": 0.2514696258906926, | |
| "learning_rate": 3.353982300884956e-05, | |
| "loss": 0.339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33986568450927734, | |
| "step": 380, | |
| "valid_targets_mean": 4410.8, | |
| "valid_targets_min": 1654 | |
| }, | |
| { | |
| "epoch": 0.4258849557522124, | |
| "grad_norm": 0.3156544054199801, | |
| "learning_rate": 3.398230088495575e-05, | |
| "loss": 0.3299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32627376914024353, | |
| "step": 385, | |
| "valid_targets_mean": 4151.5, | |
| "valid_targets_min": 1815 | |
| }, | |
| { | |
| "epoch": 0.4314159292035398, | |
| "grad_norm": 0.2690741885669291, | |
| "learning_rate": 3.4424778761061945e-05, | |
| "loss": 0.3456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3648460805416107, | |
| "step": 390, | |
| "valid_targets_mean": 4476.6, | |
| "valid_targets_min": 1590 | |
| }, | |
| { | |
| "epoch": 0.43694690265486724, | |
| "grad_norm": 0.2760983946144871, | |
| "learning_rate": 3.4867256637168145e-05, | |
| "loss": 0.3315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34434404969215393, | |
| "step": 395, | |
| "valid_targets_mean": 4175.1, | |
| "valid_targets_min": 1460 | |
| }, | |
| { | |
| "epoch": 0.4424778761061947, | |
| "grad_norm": 0.23720967173187366, | |
| "learning_rate": 3.530973451327434e-05, | |
| "loss": 0.3353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32391396164894104, | |
| "step": 400, | |
| "valid_targets_mean": 4482.7, | |
| "valid_targets_min": 1558 | |
| }, | |
| { | |
| "epoch": 0.4480088495575221, | |
| "grad_norm": 0.246515523587321, | |
| "learning_rate": 3.575221238938053e-05, | |
| "loss": 0.3408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3464871942996979, | |
| "step": 405, | |
| "valid_targets_mean": 4573.7, | |
| "valid_targets_min": 1765 | |
| }, | |
| { | |
| "epoch": 0.45353982300884954, | |
| "grad_norm": 0.24629787870088246, | |
| "learning_rate": 3.619469026548673e-05, | |
| "loss": 0.3428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3288961946964264, | |
| "step": 410, | |
| "valid_targets_mean": 4464.9, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 0.459070796460177, | |
| "grad_norm": 0.23479791987588386, | |
| "learning_rate": 3.663716814159292e-05, | |
| "loss": 0.3463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3389821946620941, | |
| "step": 415, | |
| "valid_targets_mean": 4674.4, | |
| "valid_targets_min": 1243 | |
| }, | |
| { | |
| "epoch": 0.4646017699115044, | |
| "grad_norm": 0.257519305648294, | |
| "learning_rate": 3.707964601769912e-05, | |
| "loss": 0.343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33774492144584656, | |
| "step": 420, | |
| "valid_targets_mean": 4428.2, | |
| "valid_targets_min": 1812 | |
| }, | |
| { | |
| "epoch": 0.47013274336283184, | |
| "grad_norm": 0.2031384397042068, | |
| "learning_rate": 3.7522123893805314e-05, | |
| "loss": 0.2884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22223834693431854, | |
| "step": 425, | |
| "valid_targets_mean": 5564.1, | |
| "valid_targets_min": 2296 | |
| }, | |
| { | |
| "epoch": 0.4756637168141593, | |
| "grad_norm": 0.23681019057957262, | |
| "learning_rate": 3.796460176991151e-05, | |
| "loss": 0.2355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23336224257946014, | |
| "step": 430, | |
| "valid_targets_mean": 5563.6, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 0.4811946902654867, | |
| "grad_norm": 0.1928728151155162, | |
| "learning_rate": 3.840707964601771e-05, | |
| "loss": 0.2353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24125637114048004, | |
| "step": 435, | |
| "valid_targets_mean": 5803.6, | |
| "valid_targets_min": 1845 | |
| }, | |
| { | |
| "epoch": 0.48672566371681414, | |
| "grad_norm": 0.267234792133035, | |
| "learning_rate": 3.88495575221239e-05, | |
| "loss": 0.2215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24348674714565277, | |
| "step": 440, | |
| "valid_targets_mean": 5367.3, | |
| "valid_targets_min": 1647 | |
| }, | |
| { | |
| "epoch": 0.4922566371681416, | |
| "grad_norm": 0.18851986437755344, | |
| "learning_rate": 3.929203539823009e-05, | |
| "loss": 0.2326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24583430588245392, | |
| "step": 445, | |
| "valid_targets_mean": 5482.7, | |
| "valid_targets_min": 2023 | |
| }, | |
| { | |
| "epoch": 0.497787610619469, | |
| "grad_norm": 0.19393303894420147, | |
| "learning_rate": 3.9734513274336285e-05, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23719541728496552, | |
| "step": 450, | |
| "valid_targets_mean": 5424.7, | |
| "valid_targets_min": 1239 | |
| }, | |
| { | |
| "epoch": 0.5033185840707964, | |
| "grad_norm": 0.18992550607845296, | |
| "learning_rate": 3.999997614399254e-05, | |
| "loss": 0.2182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22034423053264618, | |
| "step": 455, | |
| "valid_targets_mean": 5279.3, | |
| "valid_targets_min": 1875 | |
| }, | |
| { | |
| "epoch": 0.5088495575221239, | |
| "grad_norm": 0.2202243775864502, | |
| "learning_rate": 3.999970776456223e-05, | |
| "loss": 0.2201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21722038090229034, | |
| "step": 460, | |
| "valid_targets_mean": 5437.0, | |
| "valid_targets_min": 2033 | |
| }, | |
| { | |
| "epoch": 0.5143805309734514, | |
| "grad_norm": 0.1978115395724297, | |
| "learning_rate": 3.999914118970715e-05, | |
| "loss": 0.2129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2351229041814804, | |
| "step": 465, | |
| "valid_targets_mean": 5660.2, | |
| "valid_targets_min": 2497 | |
| }, | |
| { | |
| "epoch": 0.5199115044247787, | |
| "grad_norm": 0.16789914555153201, | |
| "learning_rate": 3.999827642787493e-05, | |
| "loss": 0.2167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1969185620546341, | |
| "step": 470, | |
| "valid_targets_mean": 5888.6, | |
| "valid_targets_min": 2096 | |
| }, | |
| { | |
| "epoch": 0.5254424778761062, | |
| "grad_norm": 0.20277595910548082, | |
| "learning_rate": 3.999711349195917e-05, | |
| "loss": 0.2188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2288517951965332, | |
| "step": 475, | |
| "valid_targets_mean": 5902.5, | |
| "valid_targets_min": 2258 | |
| }, | |
| { | |
| "epoch": 0.5309734513274337, | |
| "grad_norm": 0.24248388368269208, | |
| "learning_rate": 3.9995652399299215e-05, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21998776495456696, | |
| "step": 480, | |
| "valid_targets_mean": 5731.6, | |
| "valid_targets_min": 2264 | |
| }, | |
| { | |
| "epoch": 0.536504424778761, | |
| "grad_norm": 0.19176187468329456, | |
| "learning_rate": 3.999389317167995e-05, | |
| "loss": 0.2119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20777660608291626, | |
| "step": 485, | |
| "valid_targets_mean": 5580.7, | |
| "valid_targets_min": 2031 | |
| }, | |
| { | |
| "epoch": 0.5420353982300885, | |
| "grad_norm": 0.18870598351316026, | |
| "learning_rate": 3.999183583533143e-05, | |
| "loss": 0.2117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21205902099609375, | |
| "step": 490, | |
| "valid_targets_mean": 5583.7, | |
| "valid_targets_min": 2375 | |
| }, | |
| { | |
| "epoch": 0.547566371681416, | |
| "grad_norm": 0.1846899005549566, | |
| "learning_rate": 3.9989480420928536e-05, | |
| "loss": 0.207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21297240257263184, | |
| "step": 495, | |
| "valid_targets_mean": 5816.1, | |
| "valid_targets_min": 2615 | |
| }, | |
| { | |
| "epoch": 0.5530973451327433, | |
| "grad_norm": 0.17548168338108097, | |
| "learning_rate": 3.998682696359045e-05, | |
| "loss": 0.2087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20402729511260986, | |
| "step": 500, | |
| "valid_targets_mean": 5295.1, | |
| "valid_targets_min": 2153 | |
| }, | |
| { | |
| "epoch": 0.5586283185840708, | |
| "grad_norm": 0.18341471170643436, | |
| "learning_rate": 3.998387550288021e-05, | |
| "loss": 0.2168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21296370029449463, | |
| "step": 505, | |
| "valid_targets_mean": 5446.5, | |
| "valid_targets_min": 1932 | |
| }, | |
| { | |
| "epoch": 0.5641592920353983, | |
| "grad_norm": 0.17500502351665906, | |
| "learning_rate": 3.998062608280405e-05, | |
| "loss": 0.2105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20280246436595917, | |
| "step": 510, | |
| "valid_targets_mean": 5612.1, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 0.5696902654867256, | |
| "grad_norm": 0.16437455482965171, | |
| "learning_rate": 3.99770787518108e-05, | |
| "loss": 0.2066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20230944454669952, | |
| "step": 515, | |
| "valid_targets_mean": 5663.1, | |
| "valid_targets_min": 2529 | |
| }, | |
| { | |
| "epoch": 0.5752212389380531, | |
| "grad_norm": 0.1821632599080371, | |
| "learning_rate": 3.997323356279111e-05, | |
| "loss": 0.2073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21061837673187256, | |
| "step": 520, | |
| "valid_targets_mean": 5665.3, | |
| "valid_targets_min": 1546 | |
| }, | |
| { | |
| "epoch": 0.5807522123893806, | |
| "grad_norm": 0.19894906356808972, | |
| "learning_rate": 3.9969090573076725e-05, | |
| "loss": 0.2151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2156447172164917, | |
| "step": 525, | |
| "valid_targets_mean": 5509.4, | |
| "valid_targets_min": 2188 | |
| }, | |
| { | |
| "epoch": 0.5862831858407079, | |
| "grad_norm": 0.2447680609432372, | |
| "learning_rate": 3.9964649844439564e-05, | |
| "loss": 0.2112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21354971826076508, | |
| "step": 530, | |
| "valid_targets_mean": 5673.9, | |
| "valid_targets_min": 2144 | |
| }, | |
| { | |
| "epoch": 0.5918141592920354, | |
| "grad_norm": 0.2317575743316455, | |
| "learning_rate": 3.995991144309084e-05, | |
| "loss": 0.2106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21478359401226044, | |
| "step": 535, | |
| "valid_targets_mean": 5578.7, | |
| "valid_targets_min": 1731 | |
| }, | |
| { | |
| "epoch": 0.5973451327433629, | |
| "grad_norm": 0.1702618619587731, | |
| "learning_rate": 3.995487543968008e-05, | |
| "loss": 0.2039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21456564962863922, | |
| "step": 540, | |
| "valid_targets_mean": 5848.1, | |
| "valid_targets_min": 2357 | |
| }, | |
| { | |
| "epoch": 0.6028761061946902, | |
| "grad_norm": 0.17930392281303356, | |
| "learning_rate": 3.994954190929403e-05, | |
| "loss": 0.21, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20728962123394012, | |
| "step": 545, | |
| "valid_targets_mean": 5329.2, | |
| "valid_targets_min": 1973 | |
| }, | |
| { | |
| "epoch": 0.6084070796460177, | |
| "grad_norm": 0.16248650977958623, | |
| "learning_rate": 3.9943910931455565e-05, | |
| "loss": 0.2078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2184375524520874, | |
| "step": 550, | |
| "valid_targets_mean": 5926.5, | |
| "valid_targets_min": 2729 | |
| }, | |
| { | |
| "epoch": 0.6139380530973452, | |
| "grad_norm": 0.17265449916957687, | |
| "learning_rate": 3.993798259012252e-05, | |
| "loss": 0.2152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.212861105799675, | |
| "step": 555, | |
| "valid_targets_mean": 5915.4, | |
| "valid_targets_min": 2392 | |
| }, | |
| { | |
| "epoch": 0.6194690265486725, | |
| "grad_norm": 0.1787149204675534, | |
| "learning_rate": 3.993175697368638e-05, | |
| "loss": 0.2018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18465320765972137, | |
| "step": 560, | |
| "valid_targets_mean": 5626.6, | |
| "valid_targets_min": 1927 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.20000428470235557, | |
| "learning_rate": 3.9925234174971035e-05, | |
| "loss": 0.2002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21930384635925293, | |
| "step": 565, | |
| "valid_targets_mean": 5510.4, | |
| "valid_targets_min": 1659 | |
| }, | |
| { | |
| "epoch": 0.6305309734513275, | |
| "grad_norm": 0.16862149801222848, | |
| "learning_rate": 3.9918414291231326e-05, | |
| "loss": 0.2055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2045677900314331, | |
| "step": 570, | |
| "valid_targets_mean": 5771.7, | |
| "valid_targets_min": 2757 | |
| }, | |
| { | |
| "epoch": 0.6360619469026548, | |
| "grad_norm": 0.1762897111866651, | |
| "learning_rate": 3.991129742415166e-05, | |
| "loss": 0.2005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21408332884311676, | |
| "step": 575, | |
| "valid_targets_mean": 5794.3, | |
| "valid_targets_min": 2085 | |
| }, | |
| { | |
| "epoch": 0.6415929203539823, | |
| "grad_norm": 0.1662555375214533, | |
| "learning_rate": 3.990388367984444e-05, | |
| "loss": 0.2029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19020187854766846, | |
| "step": 580, | |
| "valid_targets_mean": 5551.5, | |
| "valid_targets_min": 2100 | |
| }, | |
| { | |
| "epoch": 0.6471238938053098, | |
| "grad_norm": 0.18236887482346079, | |
| "learning_rate": 3.9896173168848515e-05, | |
| "loss": 0.2074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20354793965816498, | |
| "step": 585, | |
| "valid_targets_mean": 5350.5, | |
| "valid_targets_min": 1462 | |
| }, | |
| { | |
| "epoch": 0.6526548672566371, | |
| "grad_norm": 0.18287059658632582, | |
| "learning_rate": 3.988816600612752e-05, | |
| "loss": 0.2016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20378446578979492, | |
| "step": 590, | |
| "valid_targets_mean": 5704.2, | |
| "valid_targets_min": 2055 | |
| }, | |
| { | |
| "epoch": 0.6581858407079646, | |
| "grad_norm": 0.1682085080376873, | |
| "learning_rate": 3.987986231106817e-05, | |
| "loss": 0.2053, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.206926628947258, | |
| "step": 595, | |
| "valid_targets_mean": 5837.3, | |
| "valid_targets_min": 2300 | |
| }, | |
| { | |
| "epoch": 0.6637168141592921, | |
| "grad_norm": 0.1717889924804115, | |
| "learning_rate": 3.987126220747845e-05, | |
| "loss": 0.2024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21440322697162628, | |
| "step": 600, | |
| "valid_targets_mean": 5825.3, | |
| "valid_targets_min": 2271 | |
| }, | |
| { | |
| "epoch": 0.6692477876106194, | |
| "grad_norm": 0.1811374809666542, | |
| "learning_rate": 3.9862365823585826e-05, | |
| "loss": 0.1974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19254083931446075, | |
| "step": 605, | |
| "valid_targets_mean": 5229.4, | |
| "valid_targets_min": 1874 | |
| }, | |
| { | |
| "epoch": 0.6747787610619469, | |
| "grad_norm": 0.16222538585339927, | |
| "learning_rate": 3.985317329203528e-05, | |
| "loss": 0.2018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20036669075489044, | |
| "step": 610, | |
| "valid_targets_mean": 5866.6, | |
| "valid_targets_min": 1785 | |
| }, | |
| { | |
| "epoch": 0.6803097345132744, | |
| "grad_norm": 0.16665909856898078, | |
| "learning_rate": 3.9843684749887364e-05, | |
| "loss": 0.2056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2179078310728073, | |
| "step": 615, | |
| "valid_targets_mean": 5576.2, | |
| "valid_targets_min": 1880 | |
| }, | |
| { | |
| "epoch": 0.6858407079646017, | |
| "grad_norm": 0.15718367417641263, | |
| "learning_rate": 3.983390033861612e-05, | |
| "loss": 0.2039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18891756236553192, | |
| "step": 620, | |
| "valid_targets_mean": 5715.7, | |
| "valid_targets_min": 2689 | |
| }, | |
| { | |
| "epoch": 0.6913716814159292, | |
| "grad_norm": 0.15167102754992687, | |
| "learning_rate": 3.982382020410704e-05, | |
| "loss": 0.1942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19976215064525604, | |
| "step": 625, | |
| "valid_targets_mean": 5612.2, | |
| "valid_targets_min": 2528 | |
| }, | |
| { | |
| "epoch": 0.6969026548672567, | |
| "grad_norm": 0.6803268497140413, | |
| "learning_rate": 3.98134444966548e-05, | |
| "loss": 0.2013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1946277767419815, | |
| "step": 630, | |
| "valid_targets_mean": 5361.4, | |
| "valid_targets_min": 1619 | |
| }, | |
| { | |
| "epoch": 0.702433628318584, | |
| "grad_norm": 0.27678324434013174, | |
| "learning_rate": 3.9802773370961085e-05, | |
| "loss": 0.1923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19183482229709625, | |
| "step": 635, | |
| "valid_targets_mean": 5468.0, | |
| "valid_targets_min": 1809 | |
| }, | |
| { | |
| "epoch": 0.7079646017699115, | |
| "grad_norm": 0.16480376441706648, | |
| "learning_rate": 3.9791806986132275e-05, | |
| "loss": 0.198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20313429832458496, | |
| "step": 640, | |
| "valid_targets_mean": 5593.9, | |
| "valid_targets_min": 1575 | |
| }, | |
| { | |
| "epoch": 0.713495575221239, | |
| "grad_norm": 0.23769764403682692, | |
| "learning_rate": 3.978054550567704e-05, | |
| "loss": 0.1999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18976962566375732, | |
| "step": 645, | |
| "valid_targets_mean": 5520.8, | |
| "valid_targets_min": 2227 | |
| }, | |
| { | |
| "epoch": 0.7190265486725663, | |
| "grad_norm": 0.1803517322985234, | |
| "learning_rate": 3.976898909750393e-05, | |
| "loss": 0.2043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20457589626312256, | |
| "step": 650, | |
| "valid_targets_mean": 5589.7, | |
| "valid_targets_min": 2576 | |
| }, | |
| { | |
| "epoch": 0.7245575221238938, | |
| "grad_norm": 0.18688338247699685, | |
| "learning_rate": 3.975713793391886e-05, | |
| "loss": 0.1969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19263188540935516, | |
| "step": 655, | |
| "valid_targets_mean": 5756.1, | |
| "valid_targets_min": 1536 | |
| }, | |
| { | |
| "epoch": 0.7300884955752213, | |
| "grad_norm": 0.18350771549139364, | |
| "learning_rate": 3.9744992191622574e-05, | |
| "loss": 0.1952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20049209892749786, | |
| "step": 660, | |
| "valid_targets_mean": 5355.7, | |
| "valid_targets_min": 1725 | |
| }, | |
| { | |
| "epoch": 0.7356194690265486, | |
| "grad_norm": 0.160182224883967, | |
| "learning_rate": 3.973255205170793e-05, | |
| "loss": 0.1994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21431738138198853, | |
| "step": 665, | |
| "valid_targets_mean": 5235.0, | |
| "valid_targets_min": 1306 | |
| }, | |
| { | |
| "epoch": 0.7411504424778761, | |
| "grad_norm": 0.15741699454826696, | |
| "learning_rate": 3.971981769965729e-05, | |
| "loss": 0.1937, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18756063282489777, | |
| "step": 670, | |
| "valid_targets_mean": 5616.5, | |
| "valid_targets_min": 2093 | |
| }, | |
| { | |
| "epoch": 0.7466814159292036, | |
| "grad_norm": 0.15716528086145712, | |
| "learning_rate": 3.97067893253397e-05, | |
| "loss": 0.194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1861332654953003, | |
| "step": 675, | |
| "valid_targets_mean": 5920.9, | |
| "valid_targets_min": 2414 | |
| }, | |
| { | |
| "epoch": 0.7522123893805309, | |
| "grad_norm": 0.16938049514069994, | |
| "learning_rate": 3.969346712300808e-05, | |
| "loss": 0.2067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21274550259113312, | |
| "step": 680, | |
| "valid_targets_mean": 5469.1, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 0.7577433628318584, | |
| "grad_norm": 0.25687705773661557, | |
| "learning_rate": 3.967985129129633e-05, | |
| "loss": 0.3204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36207130551338196, | |
| "step": 685, | |
| "valid_targets_mean": 4090.8, | |
| "valid_targets_min": 1901 | |
| }, | |
| { | |
| "epoch": 0.7632743362831859, | |
| "grad_norm": 0.23668359052326055, | |
| "learning_rate": 3.966594203321634e-05, | |
| "loss": 0.3421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33289945125579834, | |
| "step": 690, | |
| "valid_targets_mean": 4178.5, | |
| "valid_targets_min": 1203 | |
| }, | |
| { | |
| "epoch": 0.7688053097345132, | |
| "grad_norm": 0.20950876708136784, | |
| "learning_rate": 3.965173955615501e-05, | |
| "loss": 0.3332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3364197313785553, | |
| "step": 695, | |
| "valid_targets_mean": 4353.9, | |
| "valid_targets_min": 1773 | |
| }, | |
| { | |
| "epoch": 0.7743362831858407, | |
| "grad_norm": 0.19212252299918386, | |
| "learning_rate": 3.9637244071871106e-05, | |
| "loss": 0.3203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30680403113365173, | |
| "step": 700, | |
| "valid_targets_mean": 4688.5, | |
| "valid_targets_min": 1603 | |
| }, | |
| { | |
| "epoch": 0.7798672566371682, | |
| "grad_norm": 0.21204792793978303, | |
| "learning_rate": 3.9622455796492144e-05, | |
| "loss": 0.3127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3101229667663574, | |
| "step": 705, | |
| "valid_targets_mean": 4554.5, | |
| "valid_targets_min": 1695 | |
| }, | |
| { | |
| "epoch": 0.7853982300884956, | |
| "grad_norm": 0.20014901679388436, | |
| "learning_rate": 3.960737495051115e-05, | |
| "loss": 0.308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31147459149360657, | |
| "step": 710, | |
| "valid_targets_mean": 4264.2, | |
| "valid_targets_min": 1416 | |
| }, | |
| { | |
| "epoch": 0.790929203539823, | |
| "grad_norm": 0.20929576869102048, | |
| "learning_rate": 3.9592001758783375e-05, | |
| "loss": 0.2857, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2720358073711395, | |
| "step": 715, | |
| "valid_targets_mean": 7332.3, | |
| "valid_targets_min": 1254 | |
| }, | |
| { | |
| "epoch": 0.7964601769911505, | |
| "grad_norm": 0.2043193927150728, | |
| "learning_rate": 3.957633645052294e-05, | |
| "loss": 0.2743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26549115777015686, | |
| "step": 720, | |
| "valid_targets_mean": 6887.4, | |
| "valid_targets_min": 1024 | |
| }, | |
| { | |
| "epoch": 0.8019911504424779, | |
| "grad_norm": 0.210310374363524, | |
| "learning_rate": 3.9560379259299415e-05, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2523461580276489, | |
| "step": 725, | |
| "valid_targets_mean": 6928.9, | |
| "valid_targets_min": 885 | |
| }, | |
| { | |
| "epoch": 0.8075221238938053, | |
| "grad_norm": 0.17718070175125805, | |
| "learning_rate": 3.954413042303435e-05, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2585158348083496, | |
| "step": 730, | |
| "valid_targets_mean": 8176.9, | |
| "valid_targets_min": 1833 | |
| }, | |
| { | |
| "epoch": 0.8130530973451328, | |
| "grad_norm": 0.18213077504042655, | |
| "learning_rate": 3.952759018399772e-05, | |
| "loss": 0.2564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24766899645328522, | |
| "step": 735, | |
| "valid_targets_mean": 7889.0, | |
| "valid_targets_min": 1113 | |
| }, | |
| { | |
| "epoch": 0.8185840707964602, | |
| "grad_norm": 0.16718549482590578, | |
| "learning_rate": 3.9510758788804304e-05, | |
| "loss": 0.248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2448243647813797, | |
| "step": 740, | |
| "valid_targets_mean": 8397.6, | |
| "valid_targets_min": 2267 | |
| }, | |
| { | |
| "epoch": 0.8241150442477876, | |
| "grad_norm": 0.14578348697626117, | |
| "learning_rate": 3.949363648841002e-05, | |
| "loss": 0.2221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21307845413684845, | |
| "step": 745, | |
| "valid_targets_mean": 14460.7, | |
| "valid_targets_min": 1121 | |
| }, | |
| { | |
| "epoch": 0.8296460176991151, | |
| "grad_norm": 0.13609596779145847, | |
| "learning_rate": 3.947622353810819e-05, | |
| "loss": 0.2076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21203313767910004, | |
| "step": 750, | |
| "valid_targets_mean": 13473.2, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 0.8351769911504425, | |
| "grad_norm": 0.15835332574909403, | |
| "learning_rate": 3.94585201975257e-05, | |
| "loss": 0.2046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2094709277153015, | |
| "step": 755, | |
| "valid_targets_mean": 13628.6, | |
| "valid_targets_min": 2420 | |
| }, | |
| { | |
| "epoch": 0.8407079646017699, | |
| "grad_norm": 0.14894092671422526, | |
| "learning_rate": 3.944052673061918e-05, | |
| "loss": 0.2066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2118341326713562, | |
| "step": 760, | |
| "valid_targets_mean": 10881.1, | |
| "valid_targets_min": 3549 | |
| }, | |
| { | |
| "epoch": 0.8462389380530974, | |
| "grad_norm": 0.23379254712205116, | |
| "learning_rate": 3.942224340567101e-05, | |
| "loss": 0.2471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.253144234418869, | |
| "step": 765, | |
| "valid_targets_mean": 7384.0, | |
| "valid_targets_min": 1894 | |
| }, | |
| { | |
| "epoch": 0.8517699115044248, | |
| "grad_norm": 0.17658744756781206, | |
| "learning_rate": 3.940367049528537e-05, | |
| "loss": 0.2454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23839257657527924, | |
| "step": 770, | |
| "valid_targets_mean": 7675.6, | |
| "valid_targets_min": 1279 | |
| }, | |
| { | |
| "epoch": 0.8573008849557522, | |
| "grad_norm": 0.194252086194866, | |
| "learning_rate": 3.938480827638416e-05, | |
| "loss": 0.2556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2887425124645233, | |
| "step": 775, | |
| "valid_targets_mean": 4836.4, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 0.8628318584070797, | |
| "grad_norm": 0.23085775589635357, | |
| "learning_rate": 3.936565703020285e-05, | |
| "loss": 0.2806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28100839257240295, | |
| "step": 780, | |
| "valid_targets_mean": 4853.2, | |
| "valid_targets_min": 1767 | |
| }, | |
| { | |
| "epoch": 0.8683628318584071, | |
| "grad_norm": 0.21109291422936674, | |
| "learning_rate": 3.934621704228631e-05, | |
| "loss": 0.2784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26462164521217346, | |
| "step": 785, | |
| "valid_targets_mean": 4680.1, | |
| "valid_targets_min": 1543 | |
| }, | |
| { | |
| "epoch": 0.8738938053097345, | |
| "grad_norm": 0.32929642208244925, | |
| "learning_rate": 3.932648860248455e-05, | |
| "loss": 0.2837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28274524211883545, | |
| "step": 790, | |
| "valid_targets_mean": 4703.0, | |
| "valid_targets_min": 1634 | |
| }, | |
| { | |
| "epoch": 0.879424778761062, | |
| "grad_norm": 0.22024510044666637, | |
| "learning_rate": 3.9306472004948404e-05, | |
| "loss": 0.288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3274838626384735, | |
| "step": 795, | |
| "valid_targets_mean": 4091.5, | |
| "valid_targets_min": 1273 | |
| }, | |
| { | |
| "epoch": 0.8849557522123894, | |
| "grad_norm": 0.1804105282273918, | |
| "learning_rate": 3.928616754812511e-05, | |
| "loss": 0.3179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3182201087474823, | |
| "step": 800, | |
| "valid_targets_mean": 4962.9, | |
| "valid_targets_min": 1621 | |
| }, | |
| { | |
| "epoch": 0.8904867256637168, | |
| "grad_norm": 0.18769953945284557, | |
| "learning_rate": 3.92655755347539e-05, | |
| "loss": 0.3222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31607094407081604, | |
| "step": 805, | |
| "valid_targets_mean": 5159.5, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 0.8960176991150443, | |
| "grad_norm": 0.18491593075240448, | |
| "learning_rate": 3.924469627186147e-05, | |
| "loss": 0.3074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30399420857429504, | |
| "step": 810, | |
| "valid_targets_mean": 5514.9, | |
| "valid_targets_min": 2052 | |
| }, | |
| { | |
| "epoch": 0.9015486725663717, | |
| "grad_norm": 0.2895409100786274, | |
| "learning_rate": 3.92235300707574e-05, | |
| "loss": 0.3151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3202892243862152, | |
| "step": 815, | |
| "valid_targets_mean": 4748.6, | |
| "valid_targets_min": 1382 | |
| }, | |
| { | |
| "epoch": 0.9070796460176991, | |
| "grad_norm": 0.18891271819811623, | |
| "learning_rate": 3.920207724702953e-05, | |
| "loss": 0.3108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2969510853290558, | |
| "step": 820, | |
| "valid_targets_mean": 5192.8, | |
| "valid_targets_min": 2036 | |
| }, | |
| { | |
| "epoch": 0.9126106194690266, | |
| "grad_norm": 0.1960227937214196, | |
| "learning_rate": 3.9180338120539204e-05, | |
| "loss": 0.3131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3346484899520874, | |
| "step": 825, | |
| "valid_targets_mean": 5093.5, | |
| "valid_targets_min": 1461 | |
| }, | |
| { | |
| "epoch": 0.918141592920354, | |
| "grad_norm": 0.20633533993390926, | |
| "learning_rate": 3.9158313015416585e-05, | |
| "loss": 0.3061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3141270875930786, | |
| "step": 830, | |
| "valid_targets_mean": 4668.5, | |
| "valid_targets_min": 1633 | |
| }, | |
| { | |
| "epoch": 0.9236725663716814, | |
| "grad_norm": 0.1838047142459626, | |
| "learning_rate": 3.9136002260055735e-05, | |
| "loss": 0.308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28946492075920105, | |
| "step": 835, | |
| "valid_targets_mean": 5427.1, | |
| "valid_targets_min": 1093 | |
| }, | |
| { | |
| "epoch": 0.9292035398230089, | |
| "grad_norm": 0.21286776415038075, | |
| "learning_rate": 3.911340618710978e-05, | |
| "loss": 0.3135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3171522319316864, | |
| "step": 840, | |
| "valid_targets_mean": 4934.3, | |
| "valid_targets_min": 1674 | |
| }, | |
| { | |
| "epoch": 0.9347345132743363, | |
| "grad_norm": 0.19664301080487778, | |
| "learning_rate": 3.9090525133485924e-05, | |
| "loss": 0.3001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3026646673679352, | |
| "step": 845, | |
| "valid_targets_mean": 5481.1, | |
| "valid_targets_min": 1151 | |
| }, | |
| { | |
| "epoch": 0.9402654867256637, | |
| "grad_norm": 0.22272994193968923, | |
| "learning_rate": 3.906735944034042e-05, | |
| "loss": 0.3008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31106624007225037, | |
| "step": 850, | |
| "valid_targets_mean": 4665.0, | |
| "valid_targets_min": 1512 | |
| }, | |
| { | |
| "epoch": 0.9457964601769911, | |
| "grad_norm": 0.20412095039492995, | |
| "learning_rate": 3.90439094530735e-05, | |
| "loss": 0.3085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29317423701286316, | |
| "step": 855, | |
| "valid_targets_mean": 5448.6, | |
| "valid_targets_min": 2062 | |
| }, | |
| { | |
| "epoch": 0.9513274336283186, | |
| "grad_norm": 0.20987274279114138, | |
| "learning_rate": 3.902017552132422e-05, | |
| "loss": 0.2982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29278409481048584, | |
| "step": 860, | |
| "valid_targets_mean": 4281.3, | |
| "valid_targets_min": 1891 | |
| }, | |
| { | |
| "epoch": 0.956858407079646, | |
| "grad_norm": 0.212536328843497, | |
| "learning_rate": 3.8996157998965254e-05, | |
| "loss": 0.2984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29718199372291565, | |
| "step": 865, | |
| "valid_targets_mean": 4369.3, | |
| "valid_targets_min": 1879 | |
| }, | |
| { | |
| "epoch": 0.9623893805309734, | |
| "grad_norm": 0.2037185111564353, | |
| "learning_rate": 3.897185724409758e-05, | |
| "loss": 0.2968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29531028866767883, | |
| "step": 870, | |
| "valid_targets_mean": 3977.8, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 0.9679203539823009, | |
| "grad_norm": 0.20610487152697554, | |
| "learning_rate": 3.894727361904521e-05, | |
| "loss": 0.2982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3018393814563751, | |
| "step": 875, | |
| "valid_targets_mean": 4370.8, | |
| "valid_targets_min": 1421 | |
| }, | |
| { | |
| "epoch": 0.9734513274336283, | |
| "grad_norm": 0.19292030412412908, | |
| "learning_rate": 3.89224074903497e-05, | |
| "loss": 0.29, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2899584174156189, | |
| "step": 880, | |
| "valid_targets_mean": 4221.8, | |
| "valid_targets_min": 1660 | |
| }, | |
| { | |
| "epoch": 0.9789823008849557, | |
| "grad_norm": 0.2024922326763911, | |
| "learning_rate": 3.889725922876479e-05, | |
| "loss": 0.2965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.307547390460968, | |
| "step": 885, | |
| "valid_targets_mean": 4160.7, | |
| "valid_targets_min": 1696 | |
| }, | |
| { | |
| "epoch": 0.9845132743362832, | |
| "grad_norm": 0.20034820380849994, | |
| "learning_rate": 3.887182920925075e-05, | |
| "loss": 0.2964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30820146203041077, | |
| "step": 890, | |
| "valid_targets_mean": 4385.5, | |
| "valid_targets_min": 1324 | |
| }, | |
| { | |
| "epoch": 0.9900442477876106, | |
| "grad_norm": 0.1907986306888555, | |
| "learning_rate": 3.884611781096892e-05, | |
| "loss": 0.2924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28202396631240845, | |
| "step": 895, | |
| "valid_targets_mean": 4515.7, | |
| "valid_targets_min": 1885 | |
| }, | |
| { | |
| "epoch": 0.995575221238938, | |
| "grad_norm": 0.2033067160052834, | |
| "learning_rate": 3.882012541727596e-05, | |
| "loss": 0.2934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29650285840034485, | |
| "step": 900, | |
| "valid_targets_mean": 4247.0, | |
| "valid_targets_min": 1943 | |
| }, | |
| { | |
| "epoch": 1.0011061946902655, | |
| "grad_norm": 0.18764527803866998, | |
| "learning_rate": 3.879385241571817e-05, | |
| "loss": 0.2854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2728869616985321, | |
| "step": 905, | |
| "valid_targets_mean": 7358.9, | |
| "valid_targets_min": 2475 | |
| }, | |
| { | |
| "epoch": 1.0066371681415929, | |
| "grad_norm": 0.1753216909870255, | |
| "learning_rate": 3.8767299198025727e-05, | |
| "loss": 0.2781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28461799025535583, | |
| "step": 910, | |
| "valid_targets_mean": 7245.9, | |
| "valid_targets_min": 1817 | |
| }, | |
| { | |
| "epoch": 1.0121681415929205, | |
| "grad_norm": 0.15906670630165892, | |
| "learning_rate": 3.874046616010681e-05, | |
| "loss": 0.272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.262459397315979, | |
| "step": 915, | |
| "valid_targets_mean": 6711.8, | |
| "valid_targets_min": 2242 | |
| }, | |
| { | |
| "epoch": 1.0176991150442478, | |
| "grad_norm": 0.17912735743749905, | |
| "learning_rate": 3.871335370204173e-05, | |
| "loss": 0.2637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27788102626800537, | |
| "step": 920, | |
| "valid_targets_mean": 6841.6, | |
| "valid_targets_min": 1918 | |
| }, | |
| { | |
| "epoch": 1.0232300884955752, | |
| "grad_norm": 0.15985370175928384, | |
| "learning_rate": 3.8685962228076934e-05, | |
| "loss": 0.2682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26721683144569397, | |
| "step": 925, | |
| "valid_targets_mean": 6978.7, | |
| "valid_targets_min": 1859 | |
| }, | |
| { | |
| "epoch": 1.0287610619469028, | |
| "grad_norm": 0.1676197970513165, | |
| "learning_rate": 3.8658292146619005e-05, | |
| "loss": 0.2633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25416240096092224, | |
| "step": 930, | |
| "valid_targets_mean": 6627.3, | |
| "valid_targets_min": 1734 | |
| }, | |
| { | |
| "epoch": 1.0342920353982301, | |
| "grad_norm": 0.16679083160481198, | |
| "learning_rate": 3.863034387022855e-05, | |
| "loss": 0.2516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26866433024406433, | |
| "step": 935, | |
| "valid_targets_mean": 7084.4, | |
| "valid_targets_min": 1589 | |
| }, | |
| { | |
| "epoch": 1.0398230088495575, | |
| "grad_norm": 0.15542402381751244, | |
| "learning_rate": 3.860211781561408e-05, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24978803098201752, | |
| "step": 940, | |
| "valid_targets_mean": 6610.8, | |
| "valid_targets_min": 2108 | |
| }, | |
| { | |
| "epoch": 1.045353982300885, | |
| "grad_norm": 0.1630104077461306, | |
| "learning_rate": 3.857361440362573e-05, | |
| "loss": 0.2576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2507806718349457, | |
| "step": 945, | |
| "valid_targets_mean": 6572.6, | |
| "valid_targets_min": 2423 | |
| }, | |
| { | |
| "epoch": 1.0508849557522124, | |
| "grad_norm": 0.17103174788469921, | |
| "learning_rate": 3.85448340592491e-05, | |
| "loss": 0.2543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24514667689800262, | |
| "step": 950, | |
| "valid_targets_mean": 6600.4, | |
| "valid_targets_min": 2520 | |
| }, | |
| { | |
| "epoch": 1.0564159292035398, | |
| "grad_norm": 0.17431363063053332, | |
| "learning_rate": 3.851577721159878e-05, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2476859837770462, | |
| "step": 955, | |
| "valid_targets_mean": 6349.8, | |
| "valid_targets_min": 2285 | |
| }, | |
| { | |
| "epoch": 1.0619469026548674, | |
| "grad_norm": 0.1694303498008927, | |
| "learning_rate": 3.848644429391208e-05, | |
| "loss": 0.2596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2569247782230377, | |
| "step": 960, | |
| "valid_targets_mean": 6743.1, | |
| "valid_targets_min": 1540 | |
| }, | |
| { | |
| "epoch": 1.0674778761061947, | |
| "grad_norm": 0.1646179974368683, | |
| "learning_rate": 3.845683574354246e-05, | |
| "loss": 0.2588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2671889364719391, | |
| "step": 965, | |
| "valid_targets_mean": 6893.5, | |
| "valid_targets_min": 1554 | |
| }, | |
| { | |
| "epoch": 1.073008849557522, | |
| "grad_norm": 0.17582742510155383, | |
| "learning_rate": 3.8426952001953094e-05, | |
| "loss": 0.2559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26603785157203674, | |
| "step": 970, | |
| "valid_targets_mean": 7179.4, | |
| "valid_targets_min": 1805 | |
| }, | |
| { | |
| "epoch": 1.0785398230088497, | |
| "grad_norm": 0.1560282832000148, | |
| "learning_rate": 3.8396793514710235e-05, | |
| "loss": 0.2546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25165605545043945, | |
| "step": 975, | |
| "valid_targets_mean": 6652.2, | |
| "valid_targets_min": 1224 | |
| }, | |
| { | |
| "epoch": 1.084070796460177, | |
| "grad_norm": 0.1553143692632455, | |
| "learning_rate": 3.836636073147661e-05, | |
| "loss": 0.254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26703354716300964, | |
| "step": 980, | |
| "valid_targets_mean": 7013.3, | |
| "valid_targets_min": 1153 | |
| }, | |
| { | |
| "epoch": 1.0896017699115044, | |
| "grad_norm": 0.14650503050403416, | |
| "learning_rate": 3.833565410600468e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26487669348716736, | |
| "step": 985, | |
| "valid_targets_mean": 7367.7, | |
| "valid_targets_min": 2464 | |
| }, | |
| { | |
| "epoch": 1.095132743362832, | |
| "grad_norm": 0.14962514940163674, | |
| "learning_rate": 3.83046740961299e-05, | |
| "loss": 0.257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24338607490062714, | |
| "step": 990, | |
| "valid_targets_mean": 6690.5, | |
| "valid_targets_min": 1698 | |
| }, | |
| { | |
| "epoch": 1.1006637168141593, | |
| "grad_norm": 0.15561446222027916, | |
| "learning_rate": 3.827342116376387e-05, | |
| "loss": 0.2655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2614849805831909, | |
| "step": 995, | |
| "valid_targets_mean": 7013.9, | |
| "valid_targets_min": 1466 | |
| }, | |
| { | |
| "epoch": 1.1061946902654867, | |
| "grad_norm": 0.16510163159694718, | |
| "learning_rate": 3.8241895774887475e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24882449209690094, | |
| "step": 1000, | |
| "valid_targets_mean": 6939.3, | |
| "valid_targets_min": 2543 | |
| }, | |
| { | |
| "epoch": 1.1117256637168142, | |
| "grad_norm": 0.1722580891507891, | |
| "learning_rate": 3.8210098399543927e-05, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26360180974006653, | |
| "step": 1005, | |
| "valid_targets_mean": 6906.1, | |
| "valid_targets_min": 2306 | |
| }, | |
| { | |
| "epoch": 1.1172566371681416, | |
| "grad_norm": 0.16028828366166148, | |
| "learning_rate": 3.817802951183174e-05, | |
| "loss": 0.2562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2679308354854584, | |
| "step": 1010, | |
| "valid_targets_mean": 6972.2, | |
| "valid_targets_min": 1992 | |
| }, | |
| { | |
| "epoch": 1.122787610619469, | |
| "grad_norm": 0.17039603990659744, | |
| "learning_rate": 3.814568958989767e-05, | |
| "loss": 0.2572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26134833693504333, | |
| "step": 1015, | |
| "valid_targets_mean": 6717.2, | |
| "valid_targets_min": 2149 | |
| }, | |
| { | |
| "epoch": 1.1283185840707965, | |
| "grad_norm": 0.15340075173097997, | |
| "learning_rate": 3.811307911592963e-05, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2556039094924927, | |
| "step": 1020, | |
| "valid_targets_mean": 6894.4, | |
| "valid_targets_min": 1806 | |
| }, | |
| { | |
| "epoch": 1.133849557522124, | |
| "grad_norm": 0.27385970755732725, | |
| "learning_rate": 3.8080198576149395e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2588860094547272, | |
| "step": 1025, | |
| "valid_targets_mean": 7202.1, | |
| "valid_targets_min": 1899 | |
| }, | |
| { | |
| "epoch": 1.1393805309734513, | |
| "grad_norm": 0.16168376471469795, | |
| "learning_rate": 3.8047048460805474e-05, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23941798508167267, | |
| "step": 1030, | |
| "valid_targets_mean": 6827.7, | |
| "valid_targets_min": 2089 | |
| }, | |
| { | |
| "epoch": 1.1449115044247788, | |
| "grad_norm": 0.14780538097558296, | |
| "learning_rate": 3.801362926416573e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25472015142440796, | |
| "step": 1035, | |
| "valid_targets_mean": 6651.7, | |
| "valid_targets_min": 2048 | |
| }, | |
| { | |
| "epoch": 1.1504424778761062, | |
| "grad_norm": 0.19906356931861902, | |
| "learning_rate": 3.7979941484510006e-05, | |
| "loss": 0.2449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24431133270263672, | |
| "step": 1040, | |
| "valid_targets_mean": 6973.3, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 1.1559734513274336, | |
| "grad_norm": 0.2222663244718716, | |
| "learning_rate": 3.794598562412275e-05, | |
| "loss": 0.2402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2547883689403534, | |
| "step": 1045, | |
| "valid_targets_mean": 6724.4, | |
| "valid_targets_min": 2135 | |
| }, | |
| { | |
| "epoch": 1.1615044247787611, | |
| "grad_norm": 0.14763321189036382, | |
| "learning_rate": 3.7911762189285444e-05, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2548646628856659, | |
| "step": 1050, | |
| "valid_targets_mean": 6858.4, | |
| "valid_targets_min": 1871 | |
| }, | |
| { | |
| "epoch": 1.1670353982300885, | |
| "grad_norm": 0.16494199124542216, | |
| "learning_rate": 3.787727169026915e-05, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23027609288692474, | |
| "step": 1055, | |
| "valid_targets_mean": 6819.2, | |
| "valid_targets_min": 2150 | |
| }, | |
| { | |
| "epoch": 1.1725663716814159, | |
| "grad_norm": 0.1762501326647317, | |
| "learning_rate": 3.7842514641326816e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24256758391857147, | |
| "step": 1060, | |
| "valid_targets_mean": 6917.9, | |
| "valid_targets_min": 1695 | |
| }, | |
| { | |
| "epoch": 1.1780973451327434, | |
| "grad_norm": 0.16318242647763034, | |
| "learning_rate": 3.7807491560685676e-05, | |
| "loss": 0.2523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25302407145500183, | |
| "step": 1065, | |
| "valid_targets_mean": 7014.2, | |
| "valid_targets_min": 2089 | |
| }, | |
| { | |
| "epoch": 1.1836283185840708, | |
| "grad_norm": 0.16615720841601828, | |
| "learning_rate": 3.7772202970539475e-05, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24882571399211884, | |
| "step": 1070, | |
| "valid_targets_mean": 7009.1, | |
| "valid_targets_min": 2279 | |
| }, | |
| { | |
| "epoch": 1.1891592920353982, | |
| "grad_norm": 0.15917335775201197, | |
| "learning_rate": 3.773664939704071e-05, | |
| "loss": 0.255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24912218749523163, | |
| "step": 1075, | |
| "valid_targets_mean": 6939.1, | |
| "valid_targets_min": 1746 | |
| }, | |
| { | |
| "epoch": 1.1946902654867257, | |
| "grad_norm": 0.17268466244186648, | |
| "learning_rate": 3.7700831370292774e-05, | |
| "loss": 0.2495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2412043958902359, | |
| "step": 1080, | |
| "valid_targets_mean": 6708.7, | |
| "valid_targets_min": 2106 | |
| }, | |
| { | |
| "epoch": 1.200221238938053, | |
| "grad_norm": 0.15275074613008433, | |
| "learning_rate": 3.766474942434205e-05, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24718432128429413, | |
| "step": 1085, | |
| "valid_targets_mean": 7130.4, | |
| "valid_targets_min": 1801 | |
| }, | |
| { | |
| "epoch": 1.2057522123893805, | |
| "grad_norm": 0.17296881684491172, | |
| "learning_rate": 3.7628404097169964e-05, | |
| "loss": 0.2501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24724991619586945, | |
| "step": 1090, | |
| "valid_targets_mean": 6805.5, | |
| "valid_targets_min": 1852 | |
| }, | |
| { | |
| "epoch": 1.211283185840708, | |
| "grad_norm": 0.16644116308089743, | |
| "learning_rate": 3.7591795930684946e-05, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23142194747924805, | |
| "step": 1095, | |
| "valid_targets_mean": 6666.5, | |
| "valid_targets_min": 1830 | |
| }, | |
| { | |
| "epoch": 1.2168141592920354, | |
| "grad_norm": 0.1677663942040491, | |
| "learning_rate": 3.7554925470714366e-05, | |
| "loss": 0.2518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24631698429584503, | |
| "step": 1100, | |
| "valid_targets_mean": 6736.1, | |
| "valid_targets_min": 1961 | |
| }, | |
| { | |
| "epoch": 1.2223451327433628, | |
| "grad_norm": 0.14941920725052024, | |
| "learning_rate": 3.751779326699637e-05, | |
| "loss": 0.24, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.244475319981575, | |
| "step": 1105, | |
| "valid_targets_mean": 6782.5, | |
| "valid_targets_min": 2011 | |
| }, | |
| { | |
| "epoch": 1.2278761061946903, | |
| "grad_norm": 0.2510439677474576, | |
| "learning_rate": 3.748039987317171e-05, | |
| "loss": 0.3212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3324793577194214, | |
| "step": 1110, | |
| "valid_targets_mean": 4749.0, | |
| "valid_targets_min": 1664 | |
| }, | |
| { | |
| "epoch": 1.2334070796460177, | |
| "grad_norm": 0.8629612469129403, | |
| "learning_rate": 3.744274584677549e-05, | |
| "loss": 0.3238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30500295758247375, | |
| "step": 1115, | |
| "valid_targets_mean": 4262.5, | |
| "valid_targets_min": 1807 | |
| }, | |
| { | |
| "epoch": 1.238938053097345, | |
| "grad_norm": 0.22500654668676817, | |
| "learning_rate": 3.740483174922883e-05, | |
| "loss": 0.323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32562780380249023, | |
| "step": 1120, | |
| "valid_targets_mean": 4351.2, | |
| "valid_targets_min": 1276 | |
| }, | |
| { | |
| "epoch": 1.2444690265486726, | |
| "grad_norm": 0.2117243223890732, | |
| "learning_rate": 3.7366658145830506e-05, | |
| "loss": 0.3106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3054439127445221, | |
| "step": 1125, | |
| "valid_targets_mean": 4534.4, | |
| "valid_targets_min": 1405 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.25698010738972227, | |
| "learning_rate": 3.732822560574853e-05, | |
| "loss": 0.3139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31926587224006653, | |
| "step": 1130, | |
| "valid_targets_mean": 4532.7, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 1.2555309734513274, | |
| "grad_norm": 0.2381109188765564, | |
| "learning_rate": 3.728953470201166e-05, | |
| "loss": 0.3203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3142293393611908, | |
| "step": 1135, | |
| "valid_targets_mean": 4263.1, | |
| "valid_targets_min": 1763 | |
| }, | |
| { | |
| "epoch": 1.261061946902655, | |
| "grad_norm": 0.22283779190208894, | |
| "learning_rate": 3.725058601150085e-05, | |
| "loss": 0.3342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3407679796218872, | |
| "step": 1140, | |
| "valid_targets_mean": 4287.6, | |
| "valid_targets_min": 1591 | |
| }, | |
| { | |
| "epoch": 1.2665929203539823, | |
| "grad_norm": 0.2246950856318042, | |
| "learning_rate": 3.721138011494064e-05, | |
| "loss": 0.3233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3271946609020233, | |
| "step": 1145, | |
| "valid_targets_mean": 4005.3, | |
| "valid_targets_min": 1143 | |
| }, | |
| { | |
| "epoch": 1.2721238938053097, | |
| "grad_norm": 0.22037905057936322, | |
| "learning_rate": 3.717191759689054e-05, | |
| "loss": 0.3257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31600961089134216, | |
| "step": 1150, | |
| "valid_targets_mean": 4379.5, | |
| "valid_targets_min": 1948 | |
| }, | |
| { | |
| "epoch": 1.2776548672566372, | |
| "grad_norm": 0.20981834514007755, | |
| "learning_rate": 3.7132199045736236e-05, | |
| "loss": 0.3059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3120702803134918, | |
| "step": 1155, | |
| "valid_targets_mean": 4271.3, | |
| "valid_targets_min": 1489 | |
| }, | |
| { | |
| "epoch": 1.2831858407079646, | |
| "grad_norm": 0.21278795307759427, | |
| "learning_rate": 3.709222505368092e-05, | |
| "loss": 0.3156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3151739239692688, | |
| "step": 1160, | |
| "valid_targets_mean": 4511.6, | |
| "valid_targets_min": 1431 | |
| }, | |
| { | |
| "epoch": 1.288716814159292, | |
| "grad_norm": 0.1958106465884248, | |
| "learning_rate": 3.705199621673637e-05, | |
| "loss": 0.3095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2997433841228485, | |
| "step": 1165, | |
| "valid_targets_mean": 4534.4, | |
| "valid_targets_min": 1451 | |
| }, | |
| { | |
| "epoch": 1.2942477876106195, | |
| "grad_norm": 0.192697173782494, | |
| "learning_rate": 3.70115131347141e-05, | |
| "loss": 0.3017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30174851417541504, | |
| "step": 1170, | |
| "valid_targets_mean": 4414.5, | |
| "valid_targets_min": 1500 | |
| }, | |
| { | |
| "epoch": 1.299778761061947, | |
| "grad_norm": 0.23725455992203628, | |
| "learning_rate": 3.697077641121641e-05, | |
| "loss": 0.3057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30690473318099976, | |
| "step": 1175, | |
| "valid_targets_mean": 4303.6, | |
| "valid_targets_min": 1370 | |
| }, | |
| { | |
| "epoch": 1.3053097345132743, | |
| "grad_norm": 0.28851987080456404, | |
| "learning_rate": 3.692978665362743e-05, | |
| "loss": 0.3023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29298314452171326, | |
| "step": 1180, | |
| "valid_targets_mean": 4320.9, | |
| "valid_targets_min": 1762 | |
| }, | |
| { | |
| "epoch": 1.3108407079646018, | |
| "grad_norm": 0.22192813824775098, | |
| "learning_rate": 3.688854447310398e-05, | |
| "loss": 0.3114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3098334074020386, | |
| "step": 1185, | |
| "valid_targets_mean": 4050.6, | |
| "valid_targets_min": 1631 | |
| }, | |
| { | |
| "epoch": 1.3163716814159292, | |
| "grad_norm": 0.22289694960461987, | |
| "learning_rate": 3.684705048456654e-05, | |
| "loss": 0.3123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3003637492656708, | |
| "step": 1190, | |
| "valid_targets_mean": 4401.1, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 1.3219026548672566, | |
| "grad_norm": 0.22298702559314276, | |
| "learning_rate": 3.680530530669001e-05, | |
| "loss": 0.3027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30823826789855957, | |
| "step": 1195, | |
| "valid_targets_mean": 4159.3, | |
| "valid_targets_min": 1526 | |
| }, | |
| { | |
| "epoch": 1.3274336283185841, | |
| "grad_norm": 0.21355328133896545, | |
| "learning_rate": 3.6763309561894544e-05, | |
| "loss": 0.3025, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29716622829437256, | |
| "step": 1200, | |
| "valid_targets_mean": 4061.5, | |
| "valid_targets_min": 1513 | |
| }, | |
| { | |
| "epoch": 1.3329646017699115, | |
| "grad_norm": 0.21084971538132743, | |
| "learning_rate": 3.672106387633623e-05, | |
| "loss": 0.3134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32338306307792664, | |
| "step": 1205, | |
| "valid_targets_mean": 4433.0, | |
| "valid_targets_min": 1587 | |
| }, | |
| { | |
| "epoch": 1.338495575221239, | |
| "grad_norm": 0.2103628755919011, | |
| "learning_rate": 3.6678568879897796e-05, | |
| "loss": 0.304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2911628782749176, | |
| "step": 1210, | |
| "valid_targets_mean": 4327.0, | |
| "valid_targets_min": 1624 | |
| }, | |
| { | |
| "epoch": 1.3440265486725664, | |
| "grad_norm": 0.21083388389025826, | |
| "learning_rate": 3.6635825206179164e-05, | |
| "loss": 0.3089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30589810013771057, | |
| "step": 1215, | |
| "valid_targets_mean": 4153.4, | |
| "valid_targets_min": 1660 | |
| }, | |
| { | |
| "epoch": 1.3495575221238938, | |
| "grad_norm": 0.22009671105785922, | |
| "learning_rate": 3.6592833492488046e-05, | |
| "loss": 0.3079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3072519302368164, | |
| "step": 1220, | |
| "valid_targets_mean": 4662.7, | |
| "valid_targets_min": 1446 | |
| }, | |
| { | |
| "epoch": 1.3550884955752212, | |
| "grad_norm": 0.2160908561960551, | |
| "learning_rate": 3.654959437983042e-05, | |
| "loss": 0.3045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29844701290130615, | |
| "step": 1225, | |
| "valid_targets_mean": 4457.4, | |
| "valid_targets_min": 1765 | |
| }, | |
| { | |
| "epoch": 1.3606194690265487, | |
| "grad_norm": 0.2176913839045533, | |
| "learning_rate": 3.650610851290099e-05, | |
| "loss": 0.3014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2885574996471405, | |
| "step": 1230, | |
| "valid_targets_mean": 4246.7, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 1.366150442477876, | |
| "grad_norm": 0.21202452566161856, | |
| "learning_rate": 3.646237654007356e-05, | |
| "loss": 0.3021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2936577796936035, | |
| "step": 1235, | |
| "valid_targets_mean": 4501.5, | |
| "valid_targets_min": 1583 | |
| }, | |
| { | |
| "epoch": 1.3716814159292037, | |
| "grad_norm": 0.19907767126945838, | |
| "learning_rate": 3.641839911339136e-05, | |
| "loss": 0.3032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29075562953948975, | |
| "step": 1240, | |
| "valid_targets_mean": 4472.6, | |
| "valid_targets_min": 1735 | |
| }, | |
| { | |
| "epoch": 1.377212389380531, | |
| "grad_norm": 0.21745160459694474, | |
| "learning_rate": 3.637417688855735e-05, | |
| "loss": 0.2983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2995056211948395, | |
| "step": 1245, | |
| "valid_targets_mean": 4517.8, | |
| "valid_targets_min": 1742 | |
| }, | |
| { | |
| "epoch": 1.3827433628318584, | |
| "grad_norm": 0.23888244114258825, | |
| "learning_rate": 3.6329710524924416e-05, | |
| "loss": 0.2988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30721163749694824, | |
| "step": 1250, | |
| "valid_targets_mean": 4580.1, | |
| "valid_targets_min": 1850 | |
| }, | |
| { | |
| "epoch": 1.3882743362831858, | |
| "grad_norm": 0.24545567357006257, | |
| "learning_rate": 3.6285000685485566e-05, | |
| "loss": 0.3018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31131064891815186, | |
| "step": 1255, | |
| "valid_targets_mean": 4356.0, | |
| "valid_targets_min": 1488 | |
| }, | |
| { | |
| "epoch": 1.3938053097345133, | |
| "grad_norm": 0.2081885924218347, | |
| "learning_rate": 3.6240048036863995e-05, | |
| "loss": 0.3015, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2838684618473053, | |
| "step": 1260, | |
| "valid_targets_mean": 4387.4, | |
| "valid_targets_min": 1394 | |
| }, | |
| { | |
| "epoch": 1.3993362831858407, | |
| "grad_norm": 0.1998209218526537, | |
| "learning_rate": 3.619485324930322e-05, | |
| "loss": 0.3028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29139646887779236, | |
| "step": 1265, | |
| "valid_targets_mean": 4471.3, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 1.4048672566371683, | |
| "grad_norm": 0.20334717863975316, | |
| "learning_rate": 3.614941699665704e-05, | |
| "loss": 0.2938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2968079745769501, | |
| "step": 1270, | |
| "valid_targets_mean": 4296.1, | |
| "valid_targets_min": 1599 | |
| }, | |
| { | |
| "epoch": 1.4103982300884956, | |
| "grad_norm": 0.2004049036101942, | |
| "learning_rate": 3.6103739956379464e-05, | |
| "loss": 0.2949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3019116222858429, | |
| "step": 1275, | |
| "valid_targets_mean": 4675.6, | |
| "valid_targets_min": 1525 | |
| }, | |
| { | |
| "epoch": 1.415929203539823, | |
| "grad_norm": 0.1925639145679938, | |
| "learning_rate": 3.605782280951468e-05, | |
| "loss": 0.2934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28479936718940735, | |
| "step": 1280, | |
| "valid_targets_mean": 4668.6, | |
| "valid_targets_min": 1355 | |
| }, | |
| { | |
| "epoch": 1.4214601769911503, | |
| "grad_norm": 0.20620334251083203, | |
| "learning_rate": 3.601166624068685e-05, | |
| "loss": 0.2929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2955062687397003, | |
| "step": 1285, | |
| "valid_targets_mean": 4470.0, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 1.426991150442478, | |
| "grad_norm": 0.1865026901957478, | |
| "learning_rate": 3.59652709380899e-05, | |
| "loss": 0.2824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.290693998336792, | |
| "step": 1290, | |
| "valid_targets_mean": 4488.4, | |
| "valid_targets_min": 1651 | |
| }, | |
| { | |
| "epoch": 1.4325221238938053, | |
| "grad_norm": 0.21441652070660294, | |
| "learning_rate": 3.5918637593477294e-05, | |
| "loss": 0.2982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.278681218624115, | |
| "step": 1295, | |
| "valid_targets_mean": 4119.4, | |
| "valid_targets_min": 1404 | |
| }, | |
| { | |
| "epoch": 1.4380530973451329, | |
| "grad_norm": 0.20810099959887773, | |
| "learning_rate": 3.587176690215168e-05, | |
| "loss": 0.2885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2907056510448456, | |
| "step": 1300, | |
| "valid_targets_mean": 4295.8, | |
| "valid_targets_min": 1745 | |
| }, | |
| { | |
| "epoch": 1.4435840707964602, | |
| "grad_norm": 0.20474603969203878, | |
| "learning_rate": 3.5824659562954574e-05, | |
| "loss": 0.2855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2769152820110321, | |
| "step": 1305, | |
| "valid_targets_mean": 4099.1, | |
| "valid_targets_min": 1508 | |
| }, | |
| { | |
| "epoch": 1.4491150442477876, | |
| "grad_norm": 0.21549549783923755, | |
| "learning_rate": 3.577731627825588e-05, | |
| "loss": 0.2997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30157342553138733, | |
| "step": 1310, | |
| "valid_targets_mean": 4821.3, | |
| "valid_targets_min": 1695 | |
| }, | |
| { | |
| "epoch": 1.454646017699115, | |
| "grad_norm": 0.20323829392121792, | |
| "learning_rate": 3.5729737753943456e-05, | |
| "loss": 0.2957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3023638427257538, | |
| "step": 1315, | |
| "valid_targets_mean": 4410.6, | |
| "valid_targets_min": 1645 | |
| }, | |
| { | |
| "epoch": 1.4601769911504425, | |
| "grad_norm": 0.2172462600793157, | |
| "learning_rate": 3.5681924699412574e-05, | |
| "loss": 0.2986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29906630516052246, | |
| "step": 1320, | |
| "valid_targets_mean": 4509.3, | |
| "valid_targets_min": 1418 | |
| }, | |
| { | |
| "epoch": 1.4657079646017699, | |
| "grad_norm": 0.19835215739171308, | |
| "learning_rate": 3.563387782755538e-05, | |
| "loss": 0.2959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2968510389328003, | |
| "step": 1325, | |
| "valid_targets_mean": 4422.8, | |
| "valid_targets_min": 1371 | |
| }, | |
| { | |
| "epoch": 1.4712389380530975, | |
| "grad_norm": 0.156679783996042, | |
| "learning_rate": 3.5585597854750175e-05, | |
| "loss": 0.2155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18675076961517334, | |
| "step": 1330, | |
| "valid_targets_mean": 5445.8, | |
| "valid_targets_min": 1607 | |
| }, | |
| { | |
| "epoch": 1.4767699115044248, | |
| "grad_norm": 0.17425522282791814, | |
| "learning_rate": 3.5537085500850854e-05, | |
| "loss": 0.1848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18980705738067627, | |
| "step": 1335, | |
| "valid_targets_mean": 5761.9, | |
| "valid_targets_min": 2306 | |
| }, | |
| { | |
| "epoch": 1.4823008849557522, | |
| "grad_norm": 0.1652508737175241, | |
| "learning_rate": 3.548834148917608e-05, | |
| "loss": 0.184, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1776851862668991, | |
| "step": 1340, | |
| "valid_targets_mean": 5405.3, | |
| "valid_targets_min": 2272 | |
| }, | |
| { | |
| "epoch": 1.4878318584070795, | |
| "grad_norm": 0.21452174467915933, | |
| "learning_rate": 3.543936654649853e-05, | |
| "loss": 0.176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16618479788303375, | |
| "step": 1345, | |
| "valid_targets_mean": 5712.8, | |
| "valid_targets_min": 1924 | |
| }, | |
| { | |
| "epoch": 1.4933628318584071, | |
| "grad_norm": 0.14601405787945837, | |
| "learning_rate": 3.5390161403034064e-05, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1803085058927536, | |
| "step": 1350, | |
| "valid_targets_mean": 5638.2, | |
| "valid_targets_min": 2081 | |
| }, | |
| { | |
| "epoch": 1.4988938053097345, | |
| "grad_norm": 0.15617686373078113, | |
| "learning_rate": 3.534072679243084e-05, | |
| "loss": 0.185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18177306652069092, | |
| "step": 1355, | |
| "valid_targets_mean": 5645.9, | |
| "valid_targets_min": 1663 | |
| }, | |
| { | |
| "epoch": 1.504424778761062, | |
| "grad_norm": 0.13563189976461543, | |
| "learning_rate": 3.5291063451758365e-05, | |
| "loss": 0.1739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17406773567199707, | |
| "step": 1360, | |
| "valid_targets_mean": 5683.0, | |
| "valid_targets_min": 2140 | |
| }, | |
| { | |
| "epoch": 1.5099557522123894, | |
| "grad_norm": 0.14550940946515672, | |
| "learning_rate": 3.52411721214965e-05, | |
| "loss": 0.1797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17645658552646637, | |
| "step": 1365, | |
| "valid_targets_mean": 5557.4, | |
| "valid_targets_min": 2542 | |
| }, | |
| { | |
| "epoch": 1.5154867256637168, | |
| "grad_norm": 0.14778210988794122, | |
| "learning_rate": 3.519105354552444e-05, | |
| "loss": 0.1776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19671936333179474, | |
| "step": 1370, | |
| "valid_targets_mean": 5826.9, | |
| "valid_targets_min": 2386 | |
| }, | |
| { | |
| "epoch": 1.5210176991150441, | |
| "grad_norm": 0.14166003273816574, | |
| "learning_rate": 3.5140708471109604e-05, | |
| "loss": 0.1725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.169227734208107, | |
| "step": 1375, | |
| "valid_targets_mean": 5873.3, | |
| "valid_targets_min": 1791 | |
| }, | |
| { | |
| "epoch": 1.5265486725663717, | |
| "grad_norm": 0.16544349775571546, | |
| "learning_rate": 3.509013764889651e-05, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17696590721607208, | |
| "step": 1380, | |
| "valid_targets_mean": 5638.4, | |
| "valid_targets_min": 1962 | |
| }, | |
| { | |
| "epoch": 1.532079646017699, | |
| "grad_norm": 0.15110302822686114, | |
| "learning_rate": 3.5039341832895555e-05, | |
| "loss": 0.1803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1778634786605835, | |
| "step": 1385, | |
| "valid_targets_mean": 5520.5, | |
| "valid_targets_min": 1739 | |
| }, | |
| { | |
| "epoch": 1.5376106194690267, | |
| "grad_norm": 0.15540778611469572, | |
| "learning_rate": 3.498832178047181e-05, | |
| "loss": 0.1749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17161482572555542, | |
| "step": 1390, | |
| "valid_targets_mean": 5709.5, | |
| "valid_targets_min": 1450 | |
| }, | |
| { | |
| "epoch": 1.543141592920354, | |
| "grad_norm": 0.1398431663352184, | |
| "learning_rate": 3.49370782523337e-05, | |
| "loss": 0.1756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16130518913269043, | |
| "step": 1395, | |
| "valid_targets_mean": 5862.9, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 1.5486725663716814, | |
| "grad_norm": 0.13882267245513444, | |
| "learning_rate": 3.4885612012521664e-05, | |
| "loss": 0.1731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17139075696468353, | |
| "step": 1400, | |
| "valid_targets_mean": 5801.3, | |
| "valid_targets_min": 2292 | |
| }, | |
| { | |
| "epoch": 1.5542035398230087, | |
| "grad_norm": 0.14983451592886082, | |
| "learning_rate": 3.483392382839678e-05, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17243027687072754, | |
| "step": 1405, | |
| "valid_targets_mean": 5838.8, | |
| "valid_targets_min": 2722 | |
| }, | |
| { | |
| "epoch": 1.5597345132743363, | |
| "grad_norm": 0.16213535332576523, | |
| "learning_rate": 3.478201447062931e-05, | |
| "loss": 0.1836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18661034107208252, | |
| "step": 1410, | |
| "valid_targets_mean": 5664.7, | |
| "valid_targets_min": 1832 | |
| }, | |
| { | |
| "epoch": 1.5652654867256637, | |
| "grad_norm": 0.1532551272820804, | |
| "learning_rate": 3.472988471318721e-05, | |
| "loss": 0.1729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17288486659526825, | |
| "step": 1415, | |
| "valid_targets_mean": 5415.3, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 1.5707964601769913, | |
| "grad_norm": 0.16760562964211623, | |
| "learning_rate": 3.4677535333324606e-05, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17057883739471436, | |
| "step": 1420, | |
| "valid_targets_mean": 5728.5, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 1.5763274336283186, | |
| "grad_norm": 0.15598351047871833, | |
| "learning_rate": 3.4624967111570175e-05, | |
| "loss": 0.1764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18163637816905975, | |
| "step": 1425, | |
| "valid_targets_mean": 5853.4, | |
| "valid_targets_min": 2486 | |
| }, | |
| { | |
| "epoch": 1.581858407079646, | |
| "grad_norm": 0.14326152985812782, | |
| "learning_rate": 3.457218083171553e-05, | |
| "loss": 0.1808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1812077760696411, | |
| "step": 1430, | |
| "valid_targets_mean": 5732.7, | |
| "valid_targets_min": 1760 | |
| }, | |
| { | |
| "epoch": 1.5873893805309733, | |
| "grad_norm": 0.13936867725749782, | |
| "learning_rate": 3.4519177280803514e-05, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1733517050743103, | |
| "step": 1435, | |
| "valid_targets_mean": 5712.4, | |
| "valid_targets_min": 1939 | |
| }, | |
| { | |
| "epoch": 1.592920353982301, | |
| "grad_norm": 0.13744325405716368, | |
| "learning_rate": 3.4465957249116524e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17935653030872345, | |
| "step": 1440, | |
| "valid_targets_mean": 5532.2, | |
| "valid_targets_min": 1402 | |
| }, | |
| { | |
| "epoch": 1.5984513274336283, | |
| "grad_norm": 0.13802775270219927, | |
| "learning_rate": 3.4412521530164624e-05, | |
| "loss": 0.1727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18662457168102264, | |
| "step": 1445, | |
| "valid_targets_mean": 5571.9, | |
| "valid_targets_min": 1115 | |
| }, | |
| { | |
| "epoch": 1.6039823008849559, | |
| "grad_norm": 0.14210042094986816, | |
| "learning_rate": 3.4358870920673814e-05, | |
| "loss": 0.1757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1729515790939331, | |
| "step": 1450, | |
| "valid_targets_mean": 6065.2, | |
| "valid_targets_min": 2249 | |
| }, | |
| { | |
| "epoch": 1.6095132743362832, | |
| "grad_norm": 0.13797566737991618, | |
| "learning_rate": 3.4305006220574096e-05, | |
| "loss": 0.179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18842734396457672, | |
| "step": 1455, | |
| "valid_targets_mean": 5659.9, | |
| "valid_targets_min": 2419 | |
| }, | |
| { | |
| "epoch": 1.6150442477876106, | |
| "grad_norm": 0.14986035089997057, | |
| "learning_rate": 3.4250928232987563e-05, | |
| "loss": 0.1824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18246431648731232, | |
| "step": 1460, | |
| "valid_targets_mean": 5974.9, | |
| "valid_targets_min": 2366 | |
| }, | |
| { | |
| "epoch": 1.620575221238938, | |
| "grad_norm": 0.16013574558676197, | |
| "learning_rate": 3.419663776421642e-05, | |
| "loss": 0.1643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15535743534564972, | |
| "step": 1465, | |
| "valid_targets_mean": 5648.7, | |
| "valid_targets_min": 2561 | |
| }, | |
| { | |
| "epoch": 1.6261061946902655, | |
| "grad_norm": 0.14152332934421416, | |
| "learning_rate": 3.4142135623730954e-05, | |
| "loss": 0.1707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1698196530342102, | |
| "step": 1470, | |
| "valid_targets_mean": 5608.1, | |
| "valid_targets_min": 2153 | |
| }, | |
| { | |
| "epoch": 1.6316371681415929, | |
| "grad_norm": 0.15070705739656365, | |
| "learning_rate": 3.408742262415749e-05, | |
| "loss": 0.176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18310345709323883, | |
| "step": 1475, | |
| "valid_targets_mean": 5652.2, | |
| "valid_targets_min": 2167 | |
| }, | |
| { | |
| "epoch": 1.6371681415929205, | |
| "grad_norm": 0.15012850496773006, | |
| "learning_rate": 3.403249958126627e-05, | |
| "loss": 0.1711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18646545708179474, | |
| "step": 1480, | |
| "valid_targets_mean": 5662.6, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 1.6426991150442478, | |
| "grad_norm": 0.14082483554863978, | |
| "learning_rate": 3.397736731395925e-05, | |
| "loss": 0.1672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16715407371520996, | |
| "step": 1485, | |
| "valid_targets_mean": 5612.2, | |
| "valid_targets_min": 1864 | |
| }, | |
| { | |
| "epoch": 1.6482300884955752, | |
| "grad_norm": 0.13474342156948407, | |
| "learning_rate": 3.392202664425794e-05, | |
| "loss": 0.1738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15851199626922607, | |
| "step": 1490, | |
| "valid_targets_mean": 5524.3, | |
| "valid_targets_min": 1505 | |
| }, | |
| { | |
| "epoch": 1.6537610619469025, | |
| "grad_norm": 0.14174062919205113, | |
| "learning_rate": 3.386647839729112e-05, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15384052693843842, | |
| "step": 1495, | |
| "valid_targets_mean": 5660.5, | |
| "valid_targets_min": 2150 | |
| }, | |
| { | |
| "epoch": 1.6592920353982301, | |
| "grad_norm": 0.1476240243718378, | |
| "learning_rate": 3.3810723401282544e-05, | |
| "loss": 0.1783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17098252475261688, | |
| "step": 1500, | |
| "valid_targets_mean": 5485.2, | |
| "valid_targets_min": 1925 | |
| }, | |
| { | |
| "epoch": 1.6648230088495575, | |
| "grad_norm": 0.15176282649564898, | |
| "learning_rate": 3.37547624875386e-05, | |
| "loss": 0.1713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17017503082752228, | |
| "step": 1505, | |
| "valid_targets_mean": 5699.4, | |
| "valid_targets_min": 2288 | |
| }, | |
| { | |
| "epoch": 1.670353982300885, | |
| "grad_norm": 0.1543251398322749, | |
| "learning_rate": 3.36985964904359e-05, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1869245022535324, | |
| "step": 1510, | |
| "valid_targets_mean": 5582.2, | |
| "valid_targets_min": 1992 | |
| }, | |
| { | |
| "epoch": 1.6758849557522124, | |
| "grad_norm": 0.14640169839857778, | |
| "learning_rate": 3.364222624740885e-05, | |
| "loss": 0.1678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16812634468078613, | |
| "step": 1515, | |
| "valid_targets_mean": 5590.5, | |
| "valid_targets_min": 1981 | |
| }, | |
| { | |
| "epoch": 1.6814159292035398, | |
| "grad_norm": 0.13813506619762483, | |
| "learning_rate": 3.3585652598937154e-05, | |
| "loss": 0.1743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16573424637317657, | |
| "step": 1520, | |
| "valid_targets_mean": 5700.6, | |
| "valid_targets_min": 2289 | |
| }, | |
| { | |
| "epoch": 1.6869469026548671, | |
| "grad_norm": 0.17686692089856157, | |
| "learning_rate": 3.352887638853329e-05, | |
| "loss": 0.1709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15930159389972687, | |
| "step": 1525, | |
| "valid_targets_mean": 5442.0, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 1.6924778761061947, | |
| "grad_norm": 0.136586542484903, | |
| "learning_rate": 3.347189846272996e-05, | |
| "loss": 0.1656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16397438943386078, | |
| "step": 1530, | |
| "valid_targets_mean": 5615.5, | |
| "valid_targets_min": 1636 | |
| }, | |
| { | |
| "epoch": 1.698008849557522, | |
| "grad_norm": 0.21995891539465323, | |
| "learning_rate": 3.341471967106739e-05, | |
| "loss": 0.1739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1589098572731018, | |
| "step": 1535, | |
| "valid_targets_mean": 5801.5, | |
| "valid_targets_min": 1822 | |
| }, | |
| { | |
| "epoch": 1.7035398230088497, | |
| "grad_norm": 0.1360758921396922, | |
| "learning_rate": 3.335734086608076e-05, | |
| "loss": 0.1677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16596756875514984, | |
| "step": 1540, | |
| "valid_targets_mean": 5636.9, | |
| "valid_targets_min": 2047 | |
| }, | |
| { | |
| "epoch": 1.709070796460177, | |
| "grad_norm": 0.15707458614984834, | |
| "learning_rate": 3.329976290328741e-05, | |
| "loss": 0.1705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1776486039161682, | |
| "step": 1545, | |
| "valid_targets_mean": 5702.3, | |
| "valid_targets_min": 2359 | |
| }, | |
| { | |
| "epoch": 1.7146017699115044, | |
| "grad_norm": 0.140789465344039, | |
| "learning_rate": 3.3241986641174145e-05, | |
| "loss": 0.174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18430674076080322, | |
| "step": 1550, | |
| "valid_targets_mean": 5649.6, | |
| "valid_targets_min": 1620 | |
| }, | |
| { | |
| "epoch": 1.7201327433628317, | |
| "grad_norm": 0.14177664164264792, | |
| "learning_rate": 3.3184012941184406e-05, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16918246448040009, | |
| "step": 1555, | |
| "valid_targets_mean": 5435.4, | |
| "valid_targets_min": 1923 | |
| }, | |
| { | |
| "epoch": 1.7256637168141593, | |
| "grad_norm": 0.14531464200407504, | |
| "learning_rate": 3.312584266770543e-05, | |
| "loss": 0.1703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1760571449995041, | |
| "step": 1560, | |
| "valid_targets_mean": 5783.9, | |
| "valid_targets_min": 1300 | |
| }, | |
| { | |
| "epoch": 1.731194690265487, | |
| "grad_norm": 0.13665532547049286, | |
| "learning_rate": 3.306747668805534e-05, | |
| "loss": 0.1617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1514696329832077, | |
| "step": 1565, | |
| "valid_targets_mean": 5728.1, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 1.7367256637168142, | |
| "grad_norm": 0.14056357436727612, | |
| "learning_rate": 3.300891587247029e-05, | |
| "loss": 0.1759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17050297558307648, | |
| "step": 1570, | |
| "valid_targets_mean": 5754.2, | |
| "valid_targets_min": 2336 | |
| }, | |
| { | |
| "epoch": 1.7422566371681416, | |
| "grad_norm": 0.14997951715171132, | |
| "learning_rate": 3.2950161094091376e-05, | |
| "loss": 0.1663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17910881340503693, | |
| "step": 1575, | |
| "valid_targets_mean": 5566.8, | |
| "valid_targets_min": 1918 | |
| }, | |
| { | |
| "epoch": 1.747787610619469, | |
| "grad_norm": 0.15530111213234574, | |
| "learning_rate": 3.289121322895172e-05, | |
| "loss": 0.164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16749615967273712, | |
| "step": 1580, | |
| "valid_targets_mean": 5561.9, | |
| "valid_targets_min": 1143 | |
| }, | |
| { | |
| "epoch": 1.7533185840707963, | |
| "grad_norm": 0.14997934610848973, | |
| "learning_rate": 3.283207315596333e-05, | |
| "loss": 0.1793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17700177431106567, | |
| "step": 1585, | |
| "valid_targets_mean": 5475.3, | |
| "valid_targets_min": 1415 | |
| }, | |
| { | |
| "epoch": 1.758849557522124, | |
| "grad_norm": 0.17948604343839383, | |
| "learning_rate": 3.277274175690406e-05, | |
| "loss": 0.2806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27077558636665344, | |
| "step": 1590, | |
| "valid_targets_mean": 4454.1, | |
| "valid_targets_min": 1152 | |
| }, | |
| { | |
| "epoch": 1.7643805309734515, | |
| "grad_norm": 0.1975830739646507, | |
| "learning_rate": 3.271321991640443e-05, | |
| "loss": 0.2773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2602733075618744, | |
| "step": 1595, | |
| "valid_targets_mean": 4515.4, | |
| "valid_targets_min": 1363 | |
| }, | |
| { | |
| "epoch": 1.7699115044247788, | |
| "grad_norm": 0.1913110240116289, | |
| "learning_rate": 3.265350852193442e-05, | |
| "loss": 0.2741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26858416199684143, | |
| "step": 1600, | |
| "valid_targets_mean": 4388.7, | |
| "valid_targets_min": 1203 | |
| }, | |
| { | |
| "epoch": 1.7754424778761062, | |
| "grad_norm": 0.1958773570340404, | |
| "learning_rate": 3.259360846379028e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2681581676006317, | |
| "step": 1605, | |
| "valid_targets_mean": 4259.3, | |
| "valid_targets_min": 1798 | |
| }, | |
| { | |
| "epoch": 1.7809734513274336, | |
| "grad_norm": 0.19416765962707797, | |
| "learning_rate": 3.253352063508122e-05, | |
| "loss": 0.2593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2619265913963318, | |
| "step": 1610, | |
| "valid_targets_mean": 4395.8, | |
| "valid_targets_min": 1959 | |
| }, | |
| { | |
| "epoch": 1.786504424778761, | |
| "grad_norm": 0.2058129714958956, | |
| "learning_rate": 3.247324593171611e-05, | |
| "loss": 0.2548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24811017513275146, | |
| "step": 1615, | |
| "valid_targets_mean": 4366.5, | |
| "valid_targets_min": 2120 | |
| }, | |
| { | |
| "epoch": 1.7920353982300885, | |
| "grad_norm": 0.19943735935360057, | |
| "learning_rate": 3.241278525239013e-05, | |
| "loss": 0.2286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22686314582824707, | |
| "step": 1620, | |
| "valid_targets_mean": 6348.7, | |
| "valid_targets_min": 1045 | |
| }, | |
| { | |
| "epoch": 1.797566371681416, | |
| "grad_norm": 0.18733220517258312, | |
| "learning_rate": 3.2352139498571336e-05, | |
| "loss": 0.2228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2159055918455124, | |
| "step": 1625, | |
| "valid_targets_mean": 7297.0, | |
| "valid_targets_min": 1398 | |
| }, | |
| { | |
| "epoch": 1.8030973451327434, | |
| "grad_norm": 0.16508113690793336, | |
| "learning_rate": 3.229130957448727e-05, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22364212572574615, | |
| "step": 1630, | |
| "valid_targets_mean": 7454.9, | |
| "valid_targets_min": 2002 | |
| }, | |
| { | |
| "epoch": 1.8086283185840708, | |
| "grad_norm": 0.20197151162041505, | |
| "learning_rate": 3.2230296387111415e-05, | |
| "loss": 0.2211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21720099449157715, | |
| "step": 1635, | |
| "valid_targets_mean": 8315.2, | |
| "valid_targets_min": 2323 | |
| }, | |
| { | |
| "epoch": 1.8141592920353982, | |
| "grad_norm": 0.17626862730906886, | |
| "learning_rate": 3.2169100846149746e-05, | |
| "loss": 0.2205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2247864007949829, | |
| "step": 1640, | |
| "valid_targets_mean": 8304.0, | |
| "valid_targets_min": 2386 | |
| }, | |
| { | |
| "epoch": 1.8196902654867255, | |
| "grad_norm": 0.17751538434443434, | |
| "learning_rate": 3.210772386402711e-05, | |
| "loss": 0.2095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21817022562026978, | |
| "step": 1645, | |
| "valid_targets_mean": 8337.6, | |
| "valid_targets_min": 2019 | |
| }, | |
| { | |
| "epoch": 1.825221238938053, | |
| "grad_norm": 0.11538613153520993, | |
| "learning_rate": 3.204616635587365e-05, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17687736451625824, | |
| "step": 1650, | |
| "valid_targets_mean": 13967.0, | |
| "valid_targets_min": 1304 | |
| }, | |
| { | |
| "epoch": 1.8307522123893807, | |
| "grad_norm": 0.12481500397895288, | |
| "learning_rate": 3.198442923951113e-05, | |
| "loss": 0.1851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18553711473941803, | |
| "step": 1655, | |
| "valid_targets_mean": 13636.2, | |
| "valid_targets_min": 889 | |
| }, | |
| { | |
| "epoch": 1.836283185840708, | |
| "grad_norm": 0.19001132410036775, | |
| "learning_rate": 3.192251343543928e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18340332806110382, | |
| "step": 1660, | |
| "valid_targets_mean": 13545.2, | |
| "valid_targets_min": 1264 | |
| }, | |
| { | |
| "epoch": 1.8418141592920354, | |
| "grad_norm": 0.16220288842077774, | |
| "learning_rate": 3.186041986682207e-05, | |
| "loss": 0.1865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20574147999286652, | |
| "step": 1665, | |
| "valid_targets_mean": 7997.9, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 1.8473451327433628, | |
| "grad_norm": 0.19739872689048849, | |
| "learning_rate": 3.179814945947393e-05, | |
| "loss": 0.2172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21276624500751495, | |
| "step": 1670, | |
| "valid_targets_mean": 7693.6, | |
| "valid_targets_min": 1221 | |
| }, | |
| { | |
| "epoch": 1.8528761061946901, | |
| "grad_norm": 0.269472881811778, | |
| "learning_rate": 3.173570314184595e-05, | |
| "loss": 0.2101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1973486691713333, | |
| "step": 1675, | |
| "valid_targets_mean": 7421.3, | |
| "valid_targets_min": 935 | |
| }, | |
| { | |
| "epoch": 1.8584070796460177, | |
| "grad_norm": 0.18838604414089802, | |
| "learning_rate": 3.167308184501206e-05, | |
| "loss": 0.2304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24780695140361786, | |
| "step": 1680, | |
| "valid_targets_mean": 4919.4, | |
| "valid_targets_min": 1413 | |
| }, | |
| { | |
| "epoch": 1.8639380530973453, | |
| "grad_norm": 0.18447548982782389, | |
| "learning_rate": 3.1610286502655094e-05, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2503330707550049, | |
| "step": 1685, | |
| "valid_targets_mean": 5071.2, | |
| "valid_targets_min": 1422 | |
| }, | |
| { | |
| "epoch": 1.8694690265486726, | |
| "grad_norm": 0.2029485093975304, | |
| "learning_rate": 3.154731805105293e-05, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2595711648464203, | |
| "step": 1690, | |
| "valid_targets_mean": 4763.4, | |
| "valid_targets_min": 2177 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 0.1842155159263355, | |
| "learning_rate": 3.1484177429064495e-05, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24180889129638672, | |
| "step": 1695, | |
| "valid_targets_mean": 4650.6, | |
| "valid_targets_min": 2033 | |
| }, | |
| { | |
| "epoch": 1.8805309734513274, | |
| "grad_norm": 0.17824131124036674, | |
| "learning_rate": 3.142086557811578e-05, | |
| "loss": 0.2559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2706529200077057, | |
| "step": 1700, | |
| "valid_targets_mean": 4337.8, | |
| "valid_targets_min": 1571 | |
| }, | |
| { | |
| "epoch": 1.8860619469026547, | |
| "grad_norm": 0.18805189416830917, | |
| "learning_rate": 3.135738344218579e-05, | |
| "loss": 0.2739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.294202595949173, | |
| "step": 1705, | |
| "valid_targets_mean": 4592.9, | |
| "valid_targets_min": 1350 | |
| }, | |
| { | |
| "epoch": 1.8915929203539823, | |
| "grad_norm": 0.1648357699322212, | |
| "learning_rate": 3.1293731967792476e-05, | |
| "loss": 0.2714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2605911195278168, | |
| "step": 1710, | |
| "valid_targets_mean": 5433.5, | |
| "valid_targets_min": 2344 | |
| }, | |
| { | |
| "epoch": 1.8971238938053099, | |
| "grad_norm": 0.18430731060690567, | |
| "learning_rate": 3.1229912103978624e-05, | |
| "loss": 0.2719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2930378019809723, | |
| "step": 1715, | |
| "valid_targets_mean": 4830.6, | |
| "valid_targets_min": 1670 | |
| }, | |
| { | |
| "epoch": 1.9026548672566372, | |
| "grad_norm": 0.1902382654728539, | |
| "learning_rate": 3.1165924802297713e-05, | |
| "loss": 0.2741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29358822107315063, | |
| "step": 1720, | |
| "valid_targets_mean": 4921.9, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 1.9081858407079646, | |
| "grad_norm": 0.16534461977342105, | |
| "learning_rate": 3.1101771016799714e-05, | |
| "loss": 0.2671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2554149925708771, | |
| "step": 1725, | |
| "valid_targets_mean": 5355.0, | |
| "valid_targets_min": 1798 | |
| }, | |
| { | |
| "epoch": 1.913716814159292, | |
| "grad_norm": 0.21145696839926176, | |
| "learning_rate": 3.103745170401688e-05, | |
| "loss": 0.275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2637263834476471, | |
| "step": 1730, | |
| "valid_targets_mean": 5142.9, | |
| "valid_targets_min": 1145 | |
| }, | |
| { | |
| "epoch": 1.9192477876106193, | |
| "grad_norm": 0.17717277319609437, | |
| "learning_rate": 3.0972967822949435e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27898454666137695, | |
| "step": 1735, | |
| "valid_targets_mean": 5157.9, | |
| "valid_targets_min": 1799 | |
| }, | |
| { | |
| "epoch": 1.924778761061947, | |
| "grad_norm": 0.5100937977850676, | |
| "learning_rate": 3.0908320335051375e-05, | |
| "loss": 0.2687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27008822560310364, | |
| "step": 1740, | |
| "valid_targets_mean": 4821.9, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 1.9303097345132745, | |
| "grad_norm": 0.1597730064372446, | |
| "learning_rate": 3.0843510204216016e-05, | |
| "loss": 0.2729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25724127888679504, | |
| "step": 1745, | |
| "valid_targets_mean": 5277.2, | |
| "valid_targets_min": 2148 | |
| }, | |
| { | |
| "epoch": 1.9358407079646018, | |
| "grad_norm": 0.17673276956903347, | |
| "learning_rate": 3.077853839676171e-05, | |
| "loss": 0.267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27270737290382385, | |
| "step": 1750, | |
| "valid_targets_mean": 5036.6, | |
| "valid_targets_min": 1849 | |
| }, | |
| { | |
| "epoch": 1.9413716814159292, | |
| "grad_norm": 0.18153490724050117, | |
| "learning_rate": 3.07134058814174e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2732236087322235, | |
| "step": 1755, | |
| "valid_targets_mean": 4712.0, | |
| "valid_targets_min": 1745 | |
| }, | |
| { | |
| "epoch": 1.9469026548672566, | |
| "grad_norm": 0.17523358489476837, | |
| "learning_rate": 3.064811362930819e-05, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24557732045650482, | |
| "step": 1760, | |
| "valid_targets_mean": 5319.5, | |
| "valid_targets_min": 1785 | |
| }, | |
| { | |
| "epoch": 1.952433628318584, | |
| "grad_norm": 0.19039548459254668, | |
| "learning_rate": 3.058266261394082e-05, | |
| "loss": 0.2617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25748392939567566, | |
| "step": 1765, | |
| "valid_targets_mean": 4185.1, | |
| "valid_targets_min": 1345 | |
| }, | |
| { | |
| "epoch": 1.9579646017699115, | |
| "grad_norm": 0.19278342577813265, | |
| "learning_rate": 3.0517053811189245e-05, | |
| "loss": 0.2609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25761401653289795, | |
| "step": 1770, | |
| "valid_targets_mean": 4272.2, | |
| "valid_targets_min": 1453 | |
| }, | |
| { | |
| "epoch": 1.963495575221239, | |
| "grad_norm": 0.18943655085040242, | |
| "learning_rate": 3.045128819927998e-05, | |
| "loss": 0.2577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2516404390335083, | |
| "step": 1775, | |
| "valid_targets_mean": 4076.4, | |
| "valid_targets_min": 1386 | |
| }, | |
| { | |
| "epoch": 1.9690265486725664, | |
| "grad_norm": 0.17621300772093043, | |
| "learning_rate": 3.0385366758777582e-05, | |
| "loss": 0.2613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25460875034332275, | |
| "step": 1780, | |
| "valid_targets_mean": 4306.1, | |
| "valid_targets_min": 1430 | |
| }, | |
| { | |
| "epoch": 1.9745575221238938, | |
| "grad_norm": 0.18061334878815619, | |
| "learning_rate": 3.031929047257002e-05, | |
| "loss": 0.2542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26228848099708557, | |
| "step": 1785, | |
| "valid_targets_mean": 4387.8, | |
| "valid_targets_min": 1720 | |
| }, | |
| { | |
| "epoch": 1.9800884955752212, | |
| "grad_norm": 0.1933468553922685, | |
| "learning_rate": 3.0253060325854e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2725996971130371, | |
| "step": 1790, | |
| "valid_targets_mean": 4309.4, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 1.9856194690265485, | |
| "grad_norm": 0.17773254756994591, | |
| "learning_rate": 3.018667730612028e-05, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25291135907173157, | |
| "step": 1795, | |
| "valid_targets_mean": 4320.4, | |
| "valid_targets_min": 2020 | |
| }, | |
| { | |
| "epoch": 1.991150442477876, | |
| "grad_norm": 0.1765194721320569, | |
| "learning_rate": 3.0120142403138973e-05, | |
| "loss": 0.2551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24717839062213898, | |
| "step": 1800, | |
| "valid_targets_mean": 4314.6, | |
| "valid_targets_min": 1787 | |
| }, | |
| { | |
| "epoch": 1.9966814159292037, | |
| "grad_norm": 0.18094678937234193, | |
| "learning_rate": 3.005345660894474e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2637452781200409, | |
| "step": 1805, | |
| "valid_targets_mean": 4465.5, | |
| "valid_targets_min": 2029 | |
| }, | |
| { | |
| "epoch": 2.002212389380531, | |
| "grad_norm": 0.18549788943596834, | |
| "learning_rate": 2.998662091782206e-05, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23781315982341766, | |
| "step": 1810, | |
| "valid_targets_mean": 6904.5, | |
| "valid_targets_min": 1732 | |
| }, | |
| { | |
| "epoch": 2.0077433628318584, | |
| "grad_norm": 0.16184211077035715, | |
| "learning_rate": 2.9919636326290348e-05, | |
| "loss": 0.2477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24616175889968872, | |
| "step": 1815, | |
| "valid_targets_mean": 6751.8, | |
| "valid_targets_min": 2377 | |
| }, | |
| { | |
| "epoch": 2.0132743362831858, | |
| "grad_norm": 0.15707642506276331, | |
| "learning_rate": 2.985250383308915e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21841520071029663, | |
| "step": 1820, | |
| "valid_targets_mean": 6346.4, | |
| "valid_targets_min": 1804 | |
| }, | |
| { | |
| "epoch": 2.018805309734513, | |
| "grad_norm": 0.13458493257125612, | |
| "learning_rate": 2.978522443916319e-05, | |
| "loss": 0.2333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2235536128282547, | |
| "step": 1825, | |
| "valid_targets_mean": 6772.5, | |
| "valid_targets_min": 1897 | |
| }, | |
| { | |
| "epoch": 2.024336283185841, | |
| "grad_norm": 0.19819310203829493, | |
| "learning_rate": 2.9717799147647505e-05, | |
| "loss": 0.2388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2333071380853653, | |
| "step": 1830, | |
| "valid_targets_mean": 7163.8, | |
| "valid_targets_min": 1758 | |
| }, | |
| { | |
| "epoch": 2.0298672566371683, | |
| "grad_norm": 0.150015853810323, | |
| "learning_rate": 2.9650228963852458e-05, | |
| "loss": 0.2303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.217753604054451, | |
| "step": 1835, | |
| "valid_targets_mean": 6318.1, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 2.0353982300884956, | |
| "grad_norm": 0.1445468443483016, | |
| "learning_rate": 2.9582514895248755e-05, | |
| "loss": 0.2252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2314431220293045, | |
| "step": 1840, | |
| "valid_targets_mean": 6905.8, | |
| "valid_targets_min": 2029 | |
| }, | |
| { | |
| "epoch": 2.040929203539823, | |
| "grad_norm": 0.1475536479508498, | |
| "learning_rate": 2.9514657951452414e-05, | |
| "loss": 0.2317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24088068306446075, | |
| "step": 1845, | |
| "valid_targets_mean": 7085.1, | |
| "valid_targets_min": 1462 | |
| }, | |
| { | |
| "epoch": 2.0464601769911503, | |
| "grad_norm": 0.14076591855690843, | |
| "learning_rate": 2.9446659144209726e-05, | |
| "loss": 0.2257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22339268028736115, | |
| "step": 1850, | |
| "valid_targets_mean": 6297.3, | |
| "valid_targets_min": 2261 | |
| }, | |
| { | |
| "epoch": 2.0519911504424777, | |
| "grad_norm": 0.13779322803826408, | |
| "learning_rate": 2.937851948738218e-05, | |
| "loss": 0.2269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21813249588012695, | |
| "step": 1855, | |
| "valid_targets_mean": 6465.3, | |
| "valid_targets_min": 1584 | |
| }, | |
| { | |
| "epoch": 2.0575221238938055, | |
| "grad_norm": 0.15456612296579678, | |
| "learning_rate": 2.9310239996931303e-05, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2367580682039261, | |
| "step": 1860, | |
| "valid_targets_mean": 6822.2, | |
| "valid_targets_min": 2198 | |
| }, | |
| { | |
| "epoch": 2.063053097345133, | |
| "grad_norm": 0.16044162060177014, | |
| "learning_rate": 2.9241821690903558e-05, | |
| "loss": 0.2318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22911524772644043, | |
| "step": 1865, | |
| "valid_targets_mean": 6615.1, | |
| "valid_targets_min": 1911 | |
| }, | |
| { | |
| "epoch": 2.0685840707964602, | |
| "grad_norm": 0.15669113232894358, | |
| "learning_rate": 2.917326558941514e-05, | |
| "loss": 0.234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23581738770008087, | |
| "step": 1870, | |
| "valid_targets_mean": 7104.1, | |
| "valid_targets_min": 2018 | |
| }, | |
| { | |
| "epoch": 2.0741150442477876, | |
| "grad_norm": 0.14539250038799476, | |
| "learning_rate": 2.910457271463678e-05, | |
| "loss": 0.2308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2319653034210205, | |
| "step": 1875, | |
| "valid_targets_mean": 6725.9, | |
| "valid_targets_min": 1738 | |
| }, | |
| { | |
| "epoch": 2.079646017699115, | |
| "grad_norm": 0.13712341771336523, | |
| "learning_rate": 2.9035744090778487e-05, | |
| "loss": 0.2293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23349328339099884, | |
| "step": 1880, | |
| "valid_targets_mean": 6817.6, | |
| "valid_targets_min": 1971 | |
| }, | |
| { | |
| "epoch": 2.0851769911504423, | |
| "grad_norm": 0.1439869789138518, | |
| "learning_rate": 2.8966780744074294e-05, | |
| "loss": 0.2284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2370002269744873, | |
| "step": 1885, | |
| "valid_targets_mean": 6992.1, | |
| "valid_targets_min": 2459 | |
| }, | |
| { | |
| "epoch": 2.09070796460177, | |
| "grad_norm": 0.15843799460025984, | |
| "learning_rate": 2.8897683702766946e-05, | |
| "loss": 0.2334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24956180155277252, | |
| "step": 1890, | |
| "valid_targets_mean": 7042.8, | |
| "valid_targets_min": 2497 | |
| }, | |
| { | |
| "epoch": 2.0962389380530975, | |
| "grad_norm": 0.14510171656339604, | |
| "learning_rate": 2.8828453997092584e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2558788061141968, | |
| "step": 1895, | |
| "valid_targets_mean": 7236.5, | |
| "valid_targets_min": 1737 | |
| }, | |
| { | |
| "epoch": 2.101769911504425, | |
| "grad_norm": 0.13912006069046348, | |
| "learning_rate": 2.8759092659265352e-05, | |
| "loss": 0.2334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22092382609844208, | |
| "step": 1900, | |
| "valid_targets_mean": 6782.9, | |
| "valid_targets_min": 2320 | |
| }, | |
| { | |
| "epoch": 2.107300884955752, | |
| "grad_norm": 0.1378283753124215, | |
| "learning_rate": 2.8689600723462056e-05, | |
| "loss": 0.2236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2082674503326416, | |
| "step": 1905, | |
| "valid_targets_mean": 6771.7, | |
| "valid_targets_min": 2052 | |
| }, | |
| { | |
| "epoch": 2.1128318584070795, | |
| "grad_norm": 0.14614022589679596, | |
| "learning_rate": 2.8619979225806688e-05, | |
| "loss": 0.2244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21980290114879608, | |
| "step": 1910, | |
| "valid_targets_mean": 6612.8, | |
| "valid_targets_min": 2586 | |
| }, | |
| { | |
| "epoch": 2.118362831858407, | |
| "grad_norm": 0.13558593616085562, | |
| "learning_rate": 2.8550229204355026e-05, | |
| "loss": 0.231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22110795974731445, | |
| "step": 1915, | |
| "valid_targets_mean": 6633.8, | |
| "valid_targets_min": 2410 | |
| }, | |
| { | |
| "epoch": 2.1238938053097347, | |
| "grad_norm": 0.13843284289342314, | |
| "learning_rate": 2.8480351699079133e-05, | |
| "loss": 0.2303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21232974529266357, | |
| "step": 1920, | |
| "valid_targets_mean": 6833.0, | |
| "valid_targets_min": 2209 | |
| }, | |
| { | |
| "epoch": 2.129424778761062, | |
| "grad_norm": 0.13646337472652179, | |
| "learning_rate": 2.8410347751851845e-05, | |
| "loss": 0.2268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21451246738433838, | |
| "step": 1925, | |
| "valid_targets_mean": 7072.6, | |
| "valid_targets_min": 2198 | |
| }, | |
| { | |
| "epoch": 2.1349557522123894, | |
| "grad_norm": 0.14417025141397377, | |
| "learning_rate": 2.834021840643125e-05, | |
| "loss": 0.2282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2326899617910385, | |
| "step": 1930, | |
| "valid_targets_mean": 7083.0, | |
| "valid_targets_min": 2450 | |
| }, | |
| { | |
| "epoch": 2.140486725663717, | |
| "grad_norm": 0.16558054287232848, | |
| "learning_rate": 2.8269964708445127e-05, | |
| "loss": 0.2253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24268478155136108, | |
| "step": 1935, | |
| "valid_targets_mean": 7244.3, | |
| "valid_targets_min": 2254 | |
| }, | |
| { | |
| "epoch": 2.146017699115044, | |
| "grad_norm": 0.1422531034571641, | |
| "learning_rate": 2.8199587705375345e-05, | |
| "loss": 0.2227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20881204307079315, | |
| "step": 1940, | |
| "valid_targets_mean": 6601.0, | |
| "valid_targets_min": 2512 | |
| }, | |
| { | |
| "epoch": 2.1515486725663715, | |
| "grad_norm": 0.15156193347412625, | |
| "learning_rate": 2.812908844654226e-05, | |
| "loss": 0.2232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21357543766498566, | |
| "step": 1945, | |
| "valid_targets_mean": 6363.4, | |
| "valid_targets_min": 1974 | |
| }, | |
| { | |
| "epoch": 2.1570796460176993, | |
| "grad_norm": 0.14814257679756476, | |
| "learning_rate": 2.8058467983089053e-05, | |
| "loss": 0.2217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22998099029064178, | |
| "step": 1950, | |
| "valid_targets_mean": 6868.2, | |
| "valid_targets_min": 1410 | |
| }, | |
| { | |
| "epoch": 2.1626106194690267, | |
| "grad_norm": 0.15387611405053048, | |
| "learning_rate": 2.7987727367966044e-05, | |
| "loss": 0.2295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2313184291124344, | |
| "step": 1955, | |
| "valid_targets_mean": 6691.9, | |
| "valid_targets_min": 1516 | |
| }, | |
| { | |
| "epoch": 2.168141592920354, | |
| "grad_norm": 0.1728137187519612, | |
| "learning_rate": 2.7916867655915064e-05, | |
| "loss": 0.2216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22180426120758057, | |
| "step": 1960, | |
| "valid_targets_mean": 7036.5, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 2.1736725663716814, | |
| "grad_norm": 0.1570230487847243, | |
| "learning_rate": 2.7845889903453623e-05, | |
| "loss": 0.2255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22529596090316772, | |
| "step": 1965, | |
| "valid_targets_mean": 6712.8, | |
| "valid_targets_min": 1824 | |
| }, | |
| { | |
| "epoch": 2.1792035398230087, | |
| "grad_norm": 0.14926425284681696, | |
| "learning_rate": 2.7774795168859247e-05, | |
| "loss": 0.228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21775199472904205, | |
| "step": 1970, | |
| "valid_targets_mean": 6537.2, | |
| "valid_targets_min": 2365 | |
| }, | |
| { | |
| "epoch": 2.184734513274336, | |
| "grad_norm": 0.15794997861660232, | |
| "learning_rate": 2.7703584512153647e-05, | |
| "loss": 0.2246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2207685261964798, | |
| "step": 1975, | |
| "valid_targets_mean": 6756.8, | |
| "valid_targets_min": 1887 | |
| }, | |
| { | |
| "epoch": 2.190265486725664, | |
| "grad_norm": 0.13871966084120946, | |
| "learning_rate": 2.7632258995086952e-05, | |
| "loss": 0.2342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23331673443317413, | |
| "step": 1980, | |
| "valid_targets_mean": 7283.9, | |
| "valid_targets_min": 2399 | |
| }, | |
| { | |
| "epoch": 2.1957964601769913, | |
| "grad_norm": 0.14028608846510035, | |
| "learning_rate": 2.756081968112183e-05, | |
| "loss": 0.2254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22192974388599396, | |
| "step": 1985, | |
| "valid_targets_mean": 7015.1, | |
| "valid_targets_min": 2377 | |
| }, | |
| { | |
| "epoch": 2.2013274336283186, | |
| "grad_norm": 0.18508714873340856, | |
| "learning_rate": 2.7489267635417684e-05, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2470727115869522, | |
| "step": 1990, | |
| "valid_targets_mean": 7364.7, | |
| "valid_targets_min": 2044 | |
| }, | |
| { | |
| "epoch": 2.206858407079646, | |
| "grad_norm": 0.14143675517181956, | |
| "learning_rate": 2.741760392481472e-05, | |
| "loss": 0.226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2372276335954666, | |
| "step": 1995, | |
| "valid_targets_mean": 7528.4, | |
| "valid_targets_min": 1537 | |
| }, | |
| { | |
| "epoch": 2.2123893805309733, | |
| "grad_norm": 0.15211797282697304, | |
| "learning_rate": 2.7345829617818104e-05, | |
| "loss": 0.2256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24850386381149292, | |
| "step": 2000, | |
| "valid_targets_mean": 6714.6, | |
| "valid_targets_min": 2068 | |
| }, | |
| { | |
| "epoch": 2.2179203539823007, | |
| "grad_norm": 0.14689135167811782, | |
| "learning_rate": 2.7273945784581946e-05, | |
| "loss": 0.2219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21092891693115234, | |
| "step": 2005, | |
| "valid_targets_mean": 7020.5, | |
| "valid_targets_min": 1758 | |
| }, | |
| { | |
| "epoch": 2.2234513274336285, | |
| "grad_norm": 0.15093763440740807, | |
| "learning_rate": 2.7201953496893443e-05, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.257379412651062, | |
| "step": 2010, | |
| "valid_targets_mean": 7169.6, | |
| "valid_targets_min": 2250 | |
| }, | |
| { | |
| "epoch": 2.228982300884956, | |
| "grad_norm": 0.2377265688537521, | |
| "learning_rate": 2.7129853828156802e-05, | |
| "loss": 0.2901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2945703864097595, | |
| "step": 2015, | |
| "valid_targets_mean": 4581.1, | |
| "valid_targets_min": 1482 | |
| }, | |
| { | |
| "epoch": 2.234513274336283, | |
| "grad_norm": 0.18841327406002073, | |
| "learning_rate": 2.7057647853377297e-05, | |
| "loss": 0.2843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2817246615886688, | |
| "step": 2020, | |
| "valid_targets_mean": 5139.2, | |
| "valid_targets_min": 1851 | |
| }, | |
| { | |
| "epoch": 2.2400442477876106, | |
| "grad_norm": 0.1898195236514264, | |
| "learning_rate": 2.698533664914523e-05, | |
| "loss": 0.2799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2691561281681061, | |
| "step": 2025, | |
| "valid_targets_mean": 4219.1, | |
| "valid_targets_min": 1708 | |
| }, | |
| { | |
| "epoch": 2.245575221238938, | |
| "grad_norm": 0.22056453635172965, | |
| "learning_rate": 2.6912921293619873e-05, | |
| "loss": 0.2707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28428271412849426, | |
| "step": 2030, | |
| "valid_targets_mean": 4269.6, | |
| "valid_targets_min": 1345 | |
| }, | |
| { | |
| "epoch": 2.2511061946902653, | |
| "grad_norm": 0.20874847338614333, | |
| "learning_rate": 2.684040286651338e-05, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.285367488861084, | |
| "step": 2035, | |
| "valid_targets_mean": 4392.4, | |
| "valid_targets_min": 1561 | |
| }, | |
| { | |
| "epoch": 2.256637168141593, | |
| "grad_norm": 0.19085196268214305, | |
| "learning_rate": 2.6767782449074706e-05, | |
| "loss": 0.2807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30459174513816833, | |
| "step": 2040, | |
| "valid_targets_mean": 4840.8, | |
| "valid_targets_min": 1782 | |
| }, | |
| { | |
| "epoch": 2.2621681415929205, | |
| "grad_norm": 0.1964410505949187, | |
| "learning_rate": 2.6695061124073492e-05, | |
| "loss": 0.2846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27611684799194336, | |
| "step": 2045, | |
| "valid_targets_mean": 4368.2, | |
| "valid_targets_min": 1512 | |
| }, | |
| { | |
| "epoch": 2.267699115044248, | |
| "grad_norm": 0.20946008626665896, | |
| "learning_rate": 2.6622239975783897e-05, | |
| "loss": 0.2799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2839878499507904, | |
| "step": 2050, | |
| "valid_targets_mean": 4123.9, | |
| "valid_targets_min": 1504 | |
| }, | |
| { | |
| "epoch": 2.273230088495575, | |
| "grad_norm": 0.1945450505328953, | |
| "learning_rate": 2.654932008996845e-05, | |
| "loss": 0.2722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24635012447834015, | |
| "step": 2055, | |
| "valid_targets_mean": 4269.1, | |
| "valid_targets_min": 1557 | |
| }, | |
| { | |
| "epoch": 2.2787610619469025, | |
| "grad_norm": 0.2074624073013297, | |
| "learning_rate": 2.647630255386185e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25834646821022034, | |
| "step": 2060, | |
| "valid_targets_mean": 3877.0, | |
| "valid_targets_min": 1640 | |
| }, | |
| { | |
| "epoch": 2.28429203539823, | |
| "grad_norm": 0.19963110140282136, | |
| "learning_rate": 2.6403188456154766e-05, | |
| "loss": 0.2844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2909555435180664, | |
| "step": 2065, | |
| "valid_targets_mean": 4567.2, | |
| "valid_targets_min": 1726 | |
| }, | |
| { | |
| "epoch": 2.2898230088495577, | |
| "grad_norm": 0.2009199592701345, | |
| "learning_rate": 2.6329978886977595e-05, | |
| "loss": 0.2621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2652171552181244, | |
| "step": 2070, | |
| "valid_targets_mean": 4290.6, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 2.295353982300885, | |
| "grad_norm": 0.2040036363674313, | |
| "learning_rate": 2.625667493788423e-05, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2696845829486847, | |
| "step": 2075, | |
| "valid_targets_mean": 4643.4, | |
| "valid_targets_min": 1671 | |
| }, | |
| { | |
| "epoch": 2.3008849557522124, | |
| "grad_norm": 0.21798250367711594, | |
| "learning_rate": 2.618327770183573e-05, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2716253995895386, | |
| "step": 2080, | |
| "valid_targets_mean": 4362.1, | |
| "valid_targets_min": 1337 | |
| }, | |
| { | |
| "epoch": 2.3064159292035398, | |
| "grad_norm": 0.18643630763340444, | |
| "learning_rate": 2.6109788273184103e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2537388801574707, | |
| "step": 2085, | |
| "valid_targets_mean": 4518.2, | |
| "valid_targets_min": 1400 | |
| }, | |
| { | |
| "epoch": 2.311946902654867, | |
| "grad_norm": 0.21018155423985221, | |
| "learning_rate": 2.6036207747655917e-05, | |
| "loss": 0.2754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28010761737823486, | |
| "step": 2090, | |
| "valid_targets_mean": 4291.8, | |
| "valid_targets_min": 1471 | |
| }, | |
| { | |
| "epoch": 2.317477876106195, | |
| "grad_norm": 0.2083179979286552, | |
| "learning_rate": 2.5962537222336013e-05, | |
| "loss": 0.2695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26800477504730225, | |
| "step": 2095, | |
| "valid_targets_mean": 4306.0, | |
| "valid_targets_min": 1330 | |
| }, | |
| { | |
| "epoch": 2.3230088495575223, | |
| "grad_norm": 0.18863297642656535, | |
| "learning_rate": 2.5888777795651118e-05, | |
| "loss": 0.26, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25767698884010315, | |
| "step": 2100, | |
| "valid_targets_mean": 4531.2, | |
| "valid_targets_min": 1637 | |
| }, | |
| { | |
| "epoch": 2.3285398230088497, | |
| "grad_norm": 0.18825757317929676, | |
| "learning_rate": 2.5814930567353488e-05, | |
| "loss": 0.2615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2594314515590668, | |
| "step": 2105, | |
| "valid_targets_mean": 3975.5, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 2.334070796460177, | |
| "grad_norm": 0.19268568359236704, | |
| "learning_rate": 2.5740996638504473e-05, | |
| "loss": 0.2727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26197704672813416, | |
| "step": 2110, | |
| "valid_targets_mean": 4244.6, | |
| "valid_targets_min": 1487 | |
| }, | |
| { | |
| "epoch": 2.3396017699115044, | |
| "grad_norm": 0.19143421890297113, | |
| "learning_rate": 2.5666977111458162e-05, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2626666724681854, | |
| "step": 2115, | |
| "valid_targets_mean": 4455.0, | |
| "valid_targets_min": 1708 | |
| }, | |
| { | |
| "epoch": 2.3451327433628317, | |
| "grad_norm": 0.20188785937312803, | |
| "learning_rate": 2.5592873089844893e-05, | |
| "loss": 0.2647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2516041100025177, | |
| "step": 2120, | |
| "valid_targets_mean": 4131.4, | |
| "valid_targets_min": 1449 | |
| }, | |
| { | |
| "epoch": 2.350663716814159, | |
| "grad_norm": 0.21592545724112366, | |
| "learning_rate": 2.5518685678554813e-05, | |
| "loss": 0.2737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28549471497535706, | |
| "step": 2125, | |
| "valid_targets_mean": 4204.7, | |
| "valid_targets_min": 1515 | |
| }, | |
| { | |
| "epoch": 2.356194690265487, | |
| "grad_norm": 0.18568172275969208, | |
| "learning_rate": 2.5444415983721426e-05, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2657645046710968, | |
| "step": 2130, | |
| "valid_targets_mean": 4525.8, | |
| "valid_targets_min": 1955 | |
| }, | |
| { | |
| "epoch": 2.3617256637168142, | |
| "grad_norm": 0.2114074340271294, | |
| "learning_rate": 2.5370065112705073e-05, | |
| "loss": 0.2627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.267343133687973, | |
| "step": 2135, | |
| "valid_targets_mean": 4609.5, | |
| "valid_targets_min": 1607 | |
| }, | |
| { | |
| "epoch": 2.3672566371681416, | |
| "grad_norm": 0.19988139320134624, | |
| "learning_rate": 2.529563417407642e-05, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2536604702472687, | |
| "step": 2140, | |
| "valid_targets_mean": 4457.6, | |
| "valid_targets_min": 1730 | |
| }, | |
| { | |
| "epoch": 2.372787610619469, | |
| "grad_norm": 0.20550942724084198, | |
| "learning_rate": 2.5221124277599955e-05, | |
| "loss": 0.2666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26966604590415955, | |
| "step": 2145, | |
| "valid_targets_mean": 4404.3, | |
| "valid_targets_min": 1571 | |
| }, | |
| { | |
| "epoch": 2.3783185840707963, | |
| "grad_norm": 0.21546964665770493, | |
| "learning_rate": 2.5146536534217428e-05, | |
| "loss": 0.2543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24814708530902863, | |
| "step": 2150, | |
| "valid_targets_mean": 4645.5, | |
| "valid_targets_min": 1357 | |
| }, | |
| { | |
| "epoch": 2.383849557522124, | |
| "grad_norm": 0.1999343454139305, | |
| "learning_rate": 2.5071872056031277e-05, | |
| "loss": 0.2617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24727587401866913, | |
| "step": 2155, | |
| "valid_targets_mean": 4112.4, | |
| "valid_targets_min": 1374 | |
| }, | |
| { | |
| "epoch": 2.3893805309734515, | |
| "grad_norm": 0.19644154969242167, | |
| "learning_rate": 2.499713195628805e-05, | |
| "loss": 0.2669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27450093626976013, | |
| "step": 2160, | |
| "valid_targets_mean": 4107.3, | |
| "valid_targets_min": 1391 | |
| }, | |
| { | |
| "epoch": 2.394911504424779, | |
| "grad_norm": 0.19175932783462493, | |
| "learning_rate": 2.4922317349361826e-05, | |
| "loss": 0.2601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26379743218421936, | |
| "step": 2165, | |
| "valid_targets_mean": 4354.8, | |
| "valid_targets_min": 1217 | |
| }, | |
| { | |
| "epoch": 2.400442477876106, | |
| "grad_norm": 0.20340256466825102, | |
| "learning_rate": 2.4847429350737585e-05, | |
| "loss": 0.2622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2678232491016388, | |
| "step": 2170, | |
| "valid_targets_mean": 4297.8, | |
| "valid_targets_min": 1588 | |
| }, | |
| { | |
| "epoch": 2.4059734513274336, | |
| "grad_norm": 0.19115807004344504, | |
| "learning_rate": 2.4772469076994562e-05, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24564404785633087, | |
| "step": 2175, | |
| "valid_targets_mean": 4512.5, | |
| "valid_targets_min": 1787 | |
| }, | |
| { | |
| "epoch": 2.411504424778761, | |
| "grad_norm": 0.20801029832792492, | |
| "learning_rate": 2.4697437645789614e-05, | |
| "loss": 0.2557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25381579995155334, | |
| "step": 2180, | |
| "valid_targets_mean": 4544.5, | |
| "valid_targets_min": 1643 | |
| }, | |
| { | |
| "epoch": 2.4170353982300883, | |
| "grad_norm": 0.21235466975864736, | |
| "learning_rate": 2.462233617584058e-05, | |
| "loss": 0.2529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24663154780864716, | |
| "step": 2185, | |
| "valid_targets_mean": 3768.1, | |
| "valid_targets_min": 1447 | |
| }, | |
| { | |
| "epoch": 2.422566371681416, | |
| "grad_norm": 0.21234134003960786, | |
| "learning_rate": 2.4547165786909548e-05, | |
| "loss": 0.2477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22784800827503204, | |
| "step": 2190, | |
| "valid_targets_mean": 3827.2, | |
| "valid_targets_min": 1534 | |
| }, | |
| { | |
| "epoch": 2.4280973451327434, | |
| "grad_norm": 0.2123240480392009, | |
| "learning_rate": 2.44719275997862e-05, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23649664223194122, | |
| "step": 2195, | |
| "valid_targets_mean": 4097.9, | |
| "valid_targets_min": 1649 | |
| }, | |
| { | |
| "epoch": 2.433628318584071, | |
| "grad_norm": 0.19482977551995695, | |
| "learning_rate": 2.439662273627108e-05, | |
| "loss": 0.2645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27336061000823975, | |
| "step": 2200, | |
| "valid_targets_mean": 4598.4, | |
| "valid_targets_min": 1278 | |
| }, | |
| { | |
| "epoch": 2.439159292035398, | |
| "grad_norm": 0.2527690116267459, | |
| "learning_rate": 2.4321252319158893e-05, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2549128830432892, | |
| "step": 2205, | |
| "valid_targets_mean": 4081.0, | |
| "valid_targets_min": 1435 | |
| }, | |
| { | |
| "epoch": 2.4446902654867255, | |
| "grad_norm": 0.20619057211687322, | |
| "learning_rate": 2.424581747222173e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2713813781738281, | |
| "step": 2210, | |
| "valid_targets_mean": 4335.2, | |
| "valid_targets_min": 1659 | |
| }, | |
| { | |
| "epoch": 2.4502212389380533, | |
| "grad_norm": 0.2119346607370242, | |
| "learning_rate": 2.4170319320192332e-05, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2545493543148041, | |
| "step": 2215, | |
| "valid_targets_mean": 4521.1, | |
| "valid_targets_min": 1825 | |
| }, | |
| { | |
| "epoch": 2.4557522123893807, | |
| "grad_norm": 0.23519837096145998, | |
| "learning_rate": 2.409475898874734e-05, | |
| "loss": 0.2646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29995253682136536, | |
| "step": 2220, | |
| "valid_targets_mean": 4302.4, | |
| "valid_targets_min": 1587 | |
| }, | |
| { | |
| "epoch": 2.461283185840708, | |
| "grad_norm": 0.1929408159807869, | |
| "learning_rate": 2.4019137604490453e-05, | |
| "loss": 0.2583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2576839029788971, | |
| "step": 2225, | |
| "valid_targets_mean": 4594.5, | |
| "valid_targets_min": 1470 | |
| }, | |
| { | |
| "epoch": 2.4668141592920354, | |
| "grad_norm": 0.19842446943126885, | |
| "learning_rate": 2.3943456294935688e-05, | |
| "loss": 0.2564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25846683979034424, | |
| "step": 2230, | |
| "valid_targets_mean": 4955.7, | |
| "valid_targets_min": 1164 | |
| }, | |
| { | |
| "epoch": 2.4723451327433628, | |
| "grad_norm": 0.1725992785634467, | |
| "learning_rate": 2.3867716188490563e-05, | |
| "loss": 0.1646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1588878482580185, | |
| "step": 2235, | |
| "valid_targets_mean": 5468.9, | |
| "valid_targets_min": 1736 | |
| }, | |
| { | |
| "epoch": 2.47787610619469, | |
| "grad_norm": 0.15788668471348707, | |
| "learning_rate": 2.3791918414439222e-05, | |
| "loss": 0.1617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16373421251773834, | |
| "step": 2240, | |
| "valid_targets_mean": 5644.7, | |
| "valid_targets_min": 2418 | |
| }, | |
| { | |
| "epoch": 2.4834070796460175, | |
| "grad_norm": 0.1457102640828593, | |
| "learning_rate": 2.3716064102925653e-05, | |
| "loss": 0.1559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14661376178264618, | |
| "step": 2245, | |
| "valid_targets_mean": 5106.6, | |
| "valid_targets_min": 1180 | |
| }, | |
| { | |
| "epoch": 2.4889380530973453, | |
| "grad_norm": 0.15985578960581615, | |
| "learning_rate": 2.364015438493682e-05, | |
| "loss": 0.1574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17278128862380981, | |
| "step": 2250, | |
| "valid_targets_mean": 5663.0, | |
| "valid_targets_min": 2134 | |
| }, | |
| { | |
| "epoch": 2.4944690265486726, | |
| "grad_norm": 0.14373444746354777, | |
| "learning_rate": 2.3564190392285782e-05, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15657931566238403, | |
| "step": 2255, | |
| "valid_targets_mean": 5694.7, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.21090804690495596, | |
| "learning_rate": 2.348817325759485e-05, | |
| "loss": 0.1583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14999999105930328, | |
| "step": 2260, | |
| "valid_targets_mean": 5656.6, | |
| "valid_targets_min": 1602 | |
| }, | |
| { | |
| "epoch": 2.5055309734513274, | |
| "grad_norm": 0.14519616234852964, | |
| "learning_rate": 2.341210411427867e-05, | |
| "loss": 0.1501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1581110805273056, | |
| "step": 2265, | |
| "valid_targets_mean": 5572.3, | |
| "valid_targets_min": 1722 | |
| }, | |
| { | |
| "epoch": 2.5110619469026547, | |
| "grad_norm": 0.1431268972937035, | |
| "learning_rate": 2.333598409652734e-05, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13712096214294434, | |
| "step": 2270, | |
| "valid_targets_mean": 5583.8, | |
| "valid_targets_min": 2253 | |
| }, | |
| { | |
| "epoch": 2.5165929203539825, | |
| "grad_norm": 0.13934989002998358, | |
| "learning_rate": 2.325981433928949e-05, | |
| "loss": 0.1559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1538744568824768, | |
| "step": 2275, | |
| "valid_targets_mean": 5509.1, | |
| "valid_targets_min": 2372 | |
| }, | |
| { | |
| "epoch": 2.52212389380531, | |
| "grad_norm": 0.1348357731401913, | |
| "learning_rate": 2.3183595978255382e-05, | |
| "loss": 0.1459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14874015748500824, | |
| "step": 2280, | |
| "valid_targets_mean": 5591.0, | |
| "valid_targets_min": 1470 | |
| }, | |
| { | |
| "epoch": 2.5276548672566372, | |
| "grad_norm": 0.136313717291871, | |
| "learning_rate": 2.3107330149839932e-05, | |
| "loss": 0.1628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15306438505649567, | |
| "step": 2285, | |
| "valid_targets_mean": 5520.7, | |
| "valid_targets_min": 2281 | |
| }, | |
| { | |
| "epoch": 2.5331858407079646, | |
| "grad_norm": 0.13672849216445582, | |
| "learning_rate": 2.303101799116583e-05, | |
| "loss": 0.1532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14441382884979248, | |
| "step": 2290, | |
| "valid_targets_mean": 5539.8, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 2.538716814159292, | |
| "grad_norm": 0.13349877923919157, | |
| "learning_rate": 2.2954660640046507e-05, | |
| "loss": 0.1513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14603160321712494, | |
| "step": 2295, | |
| "valid_targets_mean": 5621.3, | |
| "valid_targets_min": 2512 | |
| }, | |
| { | |
| "epoch": 2.5442477876106193, | |
| "grad_norm": 0.1390708116248873, | |
| "learning_rate": 2.2878259234969256e-05, | |
| "loss": 0.1522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14958292245864868, | |
| "step": 2300, | |
| "valid_targets_mean": 5260.4, | |
| "valid_targets_min": 1685 | |
| }, | |
| { | |
| "epoch": 2.5497787610619467, | |
| "grad_norm": 0.13761546767299984, | |
| "learning_rate": 2.280181491507818e-05, | |
| "loss": 0.1503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15663492679595947, | |
| "step": 2305, | |
| "valid_targets_mean": 5810.2, | |
| "valid_targets_min": 2140 | |
| }, | |
| { | |
| "epoch": 2.5553097345132745, | |
| "grad_norm": 0.14967721889927416, | |
| "learning_rate": 2.2725328820157263e-05, | |
| "loss": 0.151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16585075855255127, | |
| "step": 2310, | |
| "valid_targets_mean": 5636.8, | |
| "valid_targets_min": 1603 | |
| }, | |
| { | |
| "epoch": 2.560840707964602, | |
| "grad_norm": 0.1300041083691616, | |
| "learning_rate": 2.2648802090613353e-05, | |
| "loss": 0.1573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14434877038002014, | |
| "step": 2315, | |
| "valid_targets_mean": 5656.0, | |
| "valid_targets_min": 2255 | |
| }, | |
| { | |
| "epoch": 2.566371681415929, | |
| "grad_norm": 0.1391401164916481, | |
| "learning_rate": 2.2572235867459133e-05, | |
| "loss": 0.1499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15009278059005737, | |
| "step": 2320, | |
| "valid_targets_mean": 5801.3, | |
| "valid_targets_min": 1753 | |
| }, | |
| { | |
| "epoch": 2.5719026548672566, | |
| "grad_norm": 0.13080152207284676, | |
| "learning_rate": 2.2495631292296176e-05, | |
| "loss": 0.1493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15306101739406586, | |
| "step": 2325, | |
| "valid_targets_mean": 5735.4, | |
| "valid_targets_min": 1668 | |
| }, | |
| { | |
| "epoch": 2.577433628318584, | |
| "grad_norm": 0.17539804942580384, | |
| "learning_rate": 2.241898950729785e-05, | |
| "loss": 0.1558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16614307463169098, | |
| "step": 2330, | |
| "valid_targets_mean": 5734.6, | |
| "valid_targets_min": 2207 | |
| }, | |
| { | |
| "epoch": 2.5829646017699117, | |
| "grad_norm": 0.13020668787559408, | |
| "learning_rate": 2.234231165519234e-05, | |
| "loss": 0.1525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1469842940568924, | |
| "step": 2335, | |
| "valid_targets_mean": 5935.3, | |
| "valid_targets_min": 2004 | |
| }, | |
| { | |
| "epoch": 2.588495575221239, | |
| "grad_norm": 0.14104465735214022, | |
| "learning_rate": 2.2265598879245583e-05, | |
| "loss": 0.1552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.158384308218956, | |
| "step": 2340, | |
| "valid_targets_mean": 5789.6, | |
| "valid_targets_min": 2162 | |
| }, | |
| { | |
| "epoch": 2.5940265486725664, | |
| "grad_norm": 0.15316130768694036, | |
| "learning_rate": 2.2188852323244238e-05, | |
| "loss": 0.15, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13153453171253204, | |
| "step": 2345, | |
| "valid_targets_mean": 5581.1, | |
| "valid_targets_min": 2067 | |
| }, | |
| { | |
| "epoch": 2.599557522123894, | |
| "grad_norm": 0.12994809077299693, | |
| "learning_rate": 2.2112073131478606e-05, | |
| "loss": 0.1542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1528850644826889, | |
| "step": 2350, | |
| "valid_targets_mean": 5589.7, | |
| "valid_targets_min": 1275 | |
| }, | |
| { | |
| "epoch": 2.605088495575221, | |
| "grad_norm": 0.13439298157369908, | |
| "learning_rate": 2.2035262448725606e-05, | |
| "loss": 0.1499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14992761611938477, | |
| "step": 2355, | |
| "valid_targets_mean": 5691.5, | |
| "valid_targets_min": 2260 | |
| }, | |
| { | |
| "epoch": 2.6106194690265485, | |
| "grad_norm": 0.14096400729096306, | |
| "learning_rate": 2.1958421420231684e-05, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16226308047771454, | |
| "step": 2360, | |
| "valid_targets_mean": 5586.6, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 2.616150442477876, | |
| "grad_norm": 0.1397052222943089, | |
| "learning_rate": 2.1881551191695736e-05, | |
| "loss": 0.1566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15690858662128448, | |
| "step": 2365, | |
| "valid_targets_mean": 5680.4, | |
| "valid_targets_min": 2043 | |
| }, | |
| { | |
| "epoch": 2.6216814159292037, | |
| "grad_norm": 0.13971055929511444, | |
| "learning_rate": 2.1804652909252024e-05, | |
| "loss": 0.141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.132961705327034, | |
| "step": 2370, | |
| "valid_targets_mean": 5273.0, | |
| "valid_targets_min": 1542 | |
| }, | |
| { | |
| "epoch": 2.627212389380531, | |
| "grad_norm": 0.13497111036160686, | |
| "learning_rate": 2.1727727719453108e-05, | |
| "loss": 0.1513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1543663591146469, | |
| "step": 2375, | |
| "valid_targets_mean": 5483.7, | |
| "valid_targets_min": 2456 | |
| }, | |
| { | |
| "epoch": 2.6327433628318584, | |
| "grad_norm": 0.14079127274161568, | |
| "learning_rate": 2.1650776769252724e-05, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14466913044452667, | |
| "step": 2380, | |
| "valid_targets_mean": 5671.9, | |
| "valid_targets_min": 2020 | |
| }, | |
| { | |
| "epoch": 2.6382743362831858, | |
| "grad_norm": 0.13590303265339887, | |
| "learning_rate": 2.157380120598872e-05, | |
| "loss": 0.1469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14048151671886444, | |
| "step": 2385, | |
| "valid_targets_mean": 5815.6, | |
| "valid_targets_min": 1507 | |
| }, | |
| { | |
| "epoch": 2.643805309734513, | |
| "grad_norm": 0.17933704568350453, | |
| "learning_rate": 2.1496802177365873e-05, | |
| "loss": 0.1493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16505680978298187, | |
| "step": 2390, | |
| "valid_targets_mean": 5608.7, | |
| "valid_targets_min": 2551 | |
| }, | |
| { | |
| "epoch": 2.649336283185841, | |
| "grad_norm": 0.13278556741329334, | |
| "learning_rate": 2.1419780831438884e-05, | |
| "loss": 0.1487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15489672124385834, | |
| "step": 2395, | |
| "valid_targets_mean": 5705.6, | |
| "valid_targets_min": 1328 | |
| }, | |
| { | |
| "epoch": 2.6548672566371683, | |
| "grad_norm": 0.14456065050556666, | |
| "learning_rate": 2.134273831659517e-05, | |
| "loss": 0.1442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14727798104286194, | |
| "step": 2400, | |
| "valid_targets_mean": 5798.1, | |
| "valid_targets_min": 1202 | |
| }, | |
| { | |
| "epoch": 2.6603982300884956, | |
| "grad_norm": 0.14006756152409477, | |
| "learning_rate": 2.12656757815378e-05, | |
| "loss": 0.1535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14258350431919098, | |
| "step": 2405, | |
| "valid_targets_mean": 5360.5, | |
| "valid_targets_min": 1928 | |
| }, | |
| { | |
| "epoch": 2.665929203539823, | |
| "grad_norm": 0.12268474418324121, | |
| "learning_rate": 2.1188594375268317e-05, | |
| "loss": 0.1487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1365000456571579, | |
| "step": 2410, | |
| "valid_targets_mean": 5662.5, | |
| "valid_targets_min": 2321 | |
| }, | |
| { | |
| "epoch": 2.6714601769911503, | |
| "grad_norm": 0.13212751036542084, | |
| "learning_rate": 2.111149524706966e-05, | |
| "loss": 0.1475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1402900665998459, | |
| "step": 2415, | |
| "valid_targets_mean": 5470.0, | |
| "valid_targets_min": 2393 | |
| }, | |
| { | |
| "epoch": 2.676991150442478, | |
| "grad_norm": 0.16937800477577541, | |
| "learning_rate": 2.1034379546488984e-05, | |
| "loss": 0.1486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16343998908996582, | |
| "step": 2420, | |
| "valid_targets_mean": 5664.2, | |
| "valid_targets_min": 1799 | |
| }, | |
| { | |
| "epoch": 2.682522123893805, | |
| "grad_norm": 0.1323818375790757, | |
| "learning_rate": 2.095724842332054e-05, | |
| "loss": 0.1505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15606127679347992, | |
| "step": 2425, | |
| "valid_targets_mean": 5649.4, | |
| "valid_targets_min": 2402 | |
| }, | |
| { | |
| "epoch": 2.688053097345133, | |
| "grad_norm": 0.1377625638153764, | |
| "learning_rate": 2.0880103027588542e-05, | |
| "loss": 0.1462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14493906497955322, | |
| "step": 2430, | |
| "valid_targets_mean": 5697.3, | |
| "valid_targets_min": 2397 | |
| }, | |
| { | |
| "epoch": 2.6935840707964602, | |
| "grad_norm": 0.13202879028137457, | |
| "learning_rate": 2.080294450953e-05, | |
| "loss": 0.1447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1546434909105301, | |
| "step": 2435, | |
| "valid_targets_mean": 5809.5, | |
| "valid_targets_min": 1465 | |
| }, | |
| { | |
| "epoch": 2.6991150442477876, | |
| "grad_norm": 0.13491295260780567, | |
| "learning_rate": 2.072577401957757e-05, | |
| "loss": 0.1508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14874127507209778, | |
| "step": 2440, | |
| "valid_targets_mean": 5493.7, | |
| "valid_targets_min": 1965 | |
| }, | |
| { | |
| "epoch": 2.704646017699115, | |
| "grad_norm": 0.1283243236724422, | |
| "learning_rate": 2.0648592708342418e-05, | |
| "loss": 0.1422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13708215951919556, | |
| "step": 2445, | |
| "valid_targets_mean": 5704.7, | |
| "valid_targets_min": 2441 | |
| }, | |
| { | |
| "epoch": 2.7101769911504423, | |
| "grad_norm": 0.1338818955137724, | |
| "learning_rate": 2.0571401726597056e-05, | |
| "loss": 0.1469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13535962998867035, | |
| "step": 2450, | |
| "valid_targets_mean": 5347.8, | |
| "valid_targets_min": 1854 | |
| }, | |
| { | |
| "epoch": 2.71570796460177, | |
| "grad_norm": 0.1363403607989519, | |
| "learning_rate": 2.049420222525818e-05, | |
| "loss": 0.1527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1491251140832901, | |
| "step": 2455, | |
| "valid_targets_mean": 5607.5, | |
| "valid_targets_min": 1872 | |
| }, | |
| { | |
| "epoch": 2.7212389380530975, | |
| "grad_norm": 0.15285057601151913, | |
| "learning_rate": 2.041699535536951e-05, | |
| "loss": 0.1494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14376698434352875, | |
| "step": 2460, | |
| "valid_targets_mean": 5596.0, | |
| "valid_targets_min": 2060 | |
| }, | |
| { | |
| "epoch": 2.726769911504425, | |
| "grad_norm": 0.1370003479846577, | |
| "learning_rate": 2.0339782268084636e-05, | |
| "loss": 0.1467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1404213309288025, | |
| "step": 2465, | |
| "valid_targets_mean": 5738.8, | |
| "valid_targets_min": 2624 | |
| }, | |
| { | |
| "epoch": 2.732300884955752, | |
| "grad_norm": 0.12715944031784213, | |
| "learning_rate": 2.0262564114649853e-05, | |
| "loss": 0.1401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14377352595329285, | |
| "step": 2470, | |
| "valid_targets_mean": 5369.5, | |
| "valid_targets_min": 1918 | |
| }, | |
| { | |
| "epoch": 2.7378318584070795, | |
| "grad_norm": 0.14346111883217125, | |
| "learning_rate": 2.0185342046386983e-05, | |
| "loss": 0.153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14263181388378143, | |
| "step": 2475, | |
| "valid_targets_mean": 5758.8, | |
| "valid_targets_min": 2077 | |
| }, | |
| { | |
| "epoch": 2.7433628318584073, | |
| "grad_norm": 0.1369636418115467, | |
| "learning_rate": 2.0108117214676216e-05, | |
| "loss": 0.1444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1492491364479065, | |
| "step": 2480, | |
| "valid_targets_mean": 5576.9, | |
| "valid_targets_min": 2057 | |
| }, | |
| { | |
| "epoch": 2.7488938053097343, | |
| "grad_norm": 0.13976455883556152, | |
| "learning_rate": 2.003089077093896e-05, | |
| "loss": 0.1438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16066919267177582, | |
| "step": 2485, | |
| "valid_targets_mean": 5625.5, | |
| "valid_targets_min": 2814 | |
| }, | |
| { | |
| "epoch": 2.754424778761062, | |
| "grad_norm": 0.2405967867565277, | |
| "learning_rate": 1.995366386662065e-05, | |
| "loss": 0.1731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24664121866226196, | |
| "step": 2490, | |
| "valid_targets_mean": 4540.7, | |
| "valid_targets_min": 1999 | |
| }, | |
| { | |
| "epoch": 2.7599557522123894, | |
| "grad_norm": 0.19316445033752302, | |
| "learning_rate": 1.987643765317358e-05, | |
| "loss": 0.2506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25726640224456787, | |
| "step": 2495, | |
| "valid_targets_mean": 4549.2, | |
| "valid_targets_min": 1395 | |
| }, | |
| { | |
| "epoch": 2.765486725663717, | |
| "grad_norm": 0.19117808725115198, | |
| "learning_rate": 1.9799213282039746e-05, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24328172206878662, | |
| "step": 2500, | |
| "valid_targets_mean": 4098.9, | |
| "valid_targets_min": 1321 | |
| }, | |
| { | |
| "epoch": 2.771017699115044, | |
| "grad_norm": 0.18658198483846983, | |
| "learning_rate": 1.9721991904633702e-05, | |
| "loss": 0.2406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23196060955524445, | |
| "step": 2505, | |
| "valid_targets_mean": 4328.3, | |
| "valid_targets_min": 1563 | |
| }, | |
| { | |
| "epoch": 2.7765486725663715, | |
| "grad_norm": 0.18815403212904494, | |
| "learning_rate": 1.964477467232532e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22732329368591309, | |
| "step": 2510, | |
| "valid_targets_mean": 4167.3, | |
| "valid_targets_min": 1757 | |
| }, | |
| { | |
| "epoch": 2.7820796460176993, | |
| "grad_norm": 0.1822903550362368, | |
| "learning_rate": 1.956756273642271e-05, | |
| "loss": 0.2277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23120920360088348, | |
| "step": 2515, | |
| "valid_targets_mean": 4363.1, | |
| "valid_targets_min": 2024 | |
| }, | |
| { | |
| "epoch": 2.7876106194690267, | |
| "grad_norm": 0.30440940752792567, | |
| "learning_rate": 1.9490357248154976e-05, | |
| "loss": 0.2185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2084188461303711, | |
| "step": 2520, | |
| "valid_targets_mean": 7772.9, | |
| "valid_targets_min": 1241 | |
| }, | |
| { | |
| "epoch": 2.793141592920354, | |
| "grad_norm": 0.17226596617339787, | |
| "learning_rate": 1.9413159358655126e-05, | |
| "loss": 0.1992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19915485382080078, | |
| "step": 2525, | |
| "valid_targets_mean": 6781.0, | |
| "valid_targets_min": 1234 | |
| }, | |
| { | |
| "epoch": 2.7986725663716814, | |
| "grad_norm": 0.1750010387835947, | |
| "learning_rate": 1.9335970218942835e-05, | |
| "loss": 0.192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18839533627033234, | |
| "step": 2530, | |
| "valid_targets_mean": 6552.0, | |
| "valid_targets_min": 934 | |
| }, | |
| { | |
| "epoch": 2.8042035398230087, | |
| "grad_norm": 0.1515594347240757, | |
| "learning_rate": 1.925879097990734e-05, | |
| "loss": 0.1917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18806235492229462, | |
| "step": 2535, | |
| "valid_targets_mean": 8060.1, | |
| "valid_targets_min": 1236 | |
| }, | |
| { | |
| "epoch": 2.8097345132743365, | |
| "grad_norm": 0.1468786489951608, | |
| "learning_rate": 1.918162279229026e-05, | |
| "loss": 0.1977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20665208995342255, | |
| "step": 2540, | |
| "valid_targets_mean": 8211.2, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 2.8152654867256635, | |
| "grad_norm": 0.15107652925477147, | |
| "learning_rate": 1.9104466806668426e-05, | |
| "loss": 0.1877, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1813529133796692, | |
| "step": 2545, | |
| "valid_targets_mean": 8196.4, | |
| "valid_targets_min": 955 | |
| }, | |
| { | |
| "epoch": 2.8207964601769913, | |
| "grad_norm": 0.13734282987078872, | |
| "learning_rate": 1.9027324173436744e-05, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1763150840997696, | |
| "step": 2550, | |
| "valid_targets_mean": 13461.4, | |
| "valid_targets_min": 1948 | |
| }, | |
| { | |
| "epoch": 2.8263274336283186, | |
| "grad_norm": 0.10599477252846069, | |
| "learning_rate": 1.8950196042791026e-05, | |
| "loss": 0.1661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1688251942396164, | |
| "step": 2555, | |
| "valid_targets_mean": 13770.1, | |
| "valid_targets_min": 1105 | |
| }, | |
| { | |
| "epoch": 2.831858407079646, | |
| "grad_norm": 0.10821396444531388, | |
| "learning_rate": 1.8873083564710877e-05, | |
| "loss": 0.1668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1601705700159073, | |
| "step": 2560, | |
| "valid_targets_mean": 13243.3, | |
| "valid_targets_min": 1038 | |
| }, | |
| { | |
| "epoch": 2.8373893805309733, | |
| "grad_norm": 0.1282146188917175, | |
| "learning_rate": 1.879598788894249e-05, | |
| "loss": 0.1681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17964714765548706, | |
| "step": 2565, | |
| "valid_targets_mean": 13645.2, | |
| "valid_targets_min": 3165 | |
| }, | |
| { | |
| "epoch": 2.8429203539823007, | |
| "grad_norm": 0.25889123259080515, | |
| "learning_rate": 1.8718910164981556e-05, | |
| "loss": 0.1736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19528110325336456, | |
| "step": 2570, | |
| "valid_targets_mean": 7102.2, | |
| "valid_targets_min": 1326 | |
| }, | |
| { | |
| "epoch": 2.8484513274336285, | |
| "grad_norm": 0.18760088182511236, | |
| "learning_rate": 1.864185154205609e-05, | |
| "loss": 0.1903, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18288938701152802, | |
| "step": 2575, | |
| "valid_targets_mean": 7134.3, | |
| "valid_targets_min": 802 | |
| }, | |
| { | |
| "epoch": 2.853982300884956, | |
| "grad_norm": 0.1693688407271248, | |
| "learning_rate": 1.8564813169109336e-05, | |
| "loss": 0.186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18678860366344452, | |
| "step": 2580, | |
| "valid_targets_mean": 6840.5, | |
| "valid_targets_min": 974 | |
| }, | |
| { | |
| "epoch": 2.859513274336283, | |
| "grad_norm": 0.17356488529295702, | |
| "learning_rate": 1.8487796194782584e-05, | |
| "loss": 0.2105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2206316739320755, | |
| "step": 2585, | |
| "valid_targets_mean": 4911.3, | |
| "valid_targets_min": 1833 | |
| }, | |
| { | |
| "epoch": 2.8650442477876106, | |
| "grad_norm": 0.24168266392761542, | |
| "learning_rate": 1.8410801767398087e-05, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2202351838350296, | |
| "step": 2590, | |
| "valid_targets_mean": 4557.8, | |
| "valid_targets_min": 1640 | |
| }, | |
| { | |
| "epoch": 2.870575221238938, | |
| "grad_norm": 0.16987345714735308, | |
| "learning_rate": 1.8333831034941907e-05, | |
| "loss": 0.2159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21073831617832184, | |
| "step": 2595, | |
| "valid_targets_mean": 5024.9, | |
| "valid_targets_min": 1415 | |
| }, | |
| { | |
| "epoch": 2.8761061946902657, | |
| "grad_norm": 0.18034206075565468, | |
| "learning_rate": 1.8256885145046837e-05, | |
| "loss": 0.2234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21980033814907074, | |
| "step": 2600, | |
| "valid_targets_mean": 4903.7, | |
| "valid_targets_min": 1921 | |
| }, | |
| { | |
| "epoch": 2.8816371681415927, | |
| "grad_norm": 0.17975988975391416, | |
| "learning_rate": 1.817996524497526e-05, | |
| "loss": 0.2301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23293693363666534, | |
| "step": 2605, | |
| "valid_targets_mean": 4387.2, | |
| "valid_targets_min": 1566 | |
| }, | |
| { | |
| "epoch": 2.8871681415929205, | |
| "grad_norm": 0.17882192643784955, | |
| "learning_rate": 1.810307248160204e-05, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2408549040555954, | |
| "step": 2610, | |
| "valid_targets_mean": 4558.5, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 2.892699115044248, | |
| "grad_norm": 0.16616159366297636, | |
| "learning_rate": 1.802620800139743e-05, | |
| "loss": 0.239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2282954454421997, | |
| "step": 2615, | |
| "valid_targets_mean": 5290.2, | |
| "valid_targets_min": 1819 | |
| }, | |
| { | |
| "epoch": 2.898230088495575, | |
| "grad_norm": 0.18028133121390458, | |
| "learning_rate": 1.7949372950410013e-05, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24078035354614258, | |
| "step": 2620, | |
| "valid_targets_mean": 4675.7, | |
| "valid_targets_min": 1828 | |
| }, | |
| { | |
| "epoch": 2.9037610619469025, | |
| "grad_norm": 0.16063209513198065, | |
| "learning_rate": 1.7872568474249557e-05, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24520045518875122, | |
| "step": 2625, | |
| "valid_targets_mean": 5302.2, | |
| "valid_targets_min": 1799 | |
| }, | |
| { | |
| "epoch": 2.90929203539823, | |
| "grad_norm": 0.17350869831278223, | |
| "learning_rate": 1.7795795718069955e-05, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2590453624725342, | |
| "step": 2630, | |
| "valid_targets_mean": 4664.0, | |
| "valid_targets_min": 1379 | |
| }, | |
| { | |
| "epoch": 2.9148230088495577, | |
| "grad_norm": 0.17394443410051422, | |
| "learning_rate": 1.7719055826552166e-05, | |
| "loss": 0.2406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23139728605747223, | |
| "step": 2635, | |
| "valid_targets_mean": 4844.1, | |
| "valid_targets_min": 1742 | |
| }, | |
| { | |
| "epoch": 2.920353982300885, | |
| "grad_norm": 0.18226451794457624, | |
| "learning_rate": 1.7642349943887146e-05, | |
| "loss": 0.244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2534455955028534, | |
| "step": 2640, | |
| "valid_targets_mean": 4829.4, | |
| "valid_targets_min": 1336 | |
| }, | |
| { | |
| "epoch": 2.9258849557522124, | |
| "grad_norm": 0.17041922132264678, | |
| "learning_rate": 1.7565679213758765e-05, | |
| "loss": 0.2414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2441035509109497, | |
| "step": 2645, | |
| "valid_targets_mean": 4886.9, | |
| "valid_targets_min": 1577 | |
| }, | |
| { | |
| "epoch": 2.9314159292035398, | |
| "grad_norm": 0.17310642199486445, | |
| "learning_rate": 1.7489044779326766e-05, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21932916343212128, | |
| "step": 2650, | |
| "valid_targets_mean": 5259.2, | |
| "valid_targets_min": 1907 | |
| }, | |
| { | |
| "epoch": 2.936946902654867, | |
| "grad_norm": 0.1835964038253878, | |
| "learning_rate": 1.741244778320973e-05, | |
| "loss": 0.2406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23366856575012207, | |
| "step": 2655, | |
| "valid_targets_mean": 4903.1, | |
| "valid_targets_min": 1726 | |
| }, | |
| { | |
| "epoch": 2.942477876106195, | |
| "grad_norm": 0.1780220482845798, | |
| "learning_rate": 1.7335889367468054e-05, | |
| "loss": 0.2382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24425287544727325, | |
| "step": 2660, | |
| "valid_targets_mean": 5308.7, | |
| "valid_targets_min": 2259 | |
| }, | |
| { | |
| "epoch": 2.948008849557522, | |
| "grad_norm": 0.19782089081005108, | |
| "learning_rate": 1.7259370673586865e-05, | |
| "loss": 0.2379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22854922711849213, | |
| "step": 2665, | |
| "valid_targets_mean": 4211.5, | |
| "valid_targets_min": 1191 | |
| }, | |
| { | |
| "epoch": 2.9535398230088497, | |
| "grad_norm": 0.19281920348673445, | |
| "learning_rate": 1.7182892842459057e-05, | |
| "loss": 0.2308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.230901837348938, | |
| "step": 2670, | |
| "valid_targets_mean": 4160.5, | |
| "valid_targets_min": 1672 | |
| }, | |
| { | |
| "epoch": 2.959070796460177, | |
| "grad_norm": 0.18198058197616457, | |
| "learning_rate": 1.710645701436826e-05, | |
| "loss": 0.2295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22771096229553223, | |
| "step": 2675, | |
| "valid_targets_mean": 4389.4, | |
| "valid_targets_min": 1633 | |
| }, | |
| { | |
| "epoch": 2.9646017699115044, | |
| "grad_norm": 0.17890313355271556, | |
| "learning_rate": 1.703006432897184e-05, | |
| "loss": 0.2274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22655832767486572, | |
| "step": 2680, | |
| "valid_targets_mean": 3883.4, | |
| "valid_targets_min": 1242 | |
| }, | |
| { | |
| "epoch": 2.9701327433628317, | |
| "grad_norm": 0.1689612601059163, | |
| "learning_rate": 1.6953715925283902e-05, | |
| "loss": 0.2269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20916379988193512, | |
| "step": 2685, | |
| "valid_targets_mean": 4515.2, | |
| "valid_targets_min": 2100 | |
| }, | |
| { | |
| "epoch": 2.975663716814159, | |
| "grad_norm": 0.1753227662755531, | |
| "learning_rate": 1.6877412941658306e-05, | |
| "loss": 0.2233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21277201175689697, | |
| "step": 2690, | |
| "valid_targets_mean": 4229.7, | |
| "valid_targets_min": 1335 | |
| }, | |
| { | |
| "epoch": 2.981194690265487, | |
| "grad_norm": 0.16654432991299564, | |
| "learning_rate": 1.680115651577172e-05, | |
| "loss": 0.2331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22387611865997314, | |
| "step": 2695, | |
| "valid_targets_mean": 4196.7, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 2.9867256637168142, | |
| "grad_norm": 0.1768741233741524, | |
| "learning_rate": 1.6724947784606617e-05, | |
| "loss": 0.2285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.233941912651062, | |
| "step": 2700, | |
| "valid_targets_mean": 4194.4, | |
| "valid_targets_min": 1730 | |
| }, | |
| { | |
| "epoch": 2.9922566371681416, | |
| "grad_norm": 0.17111595039596983, | |
| "learning_rate": 1.6648787884434338e-05, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23042543232440948, | |
| "step": 2705, | |
| "valid_targets_mean": 4278.5, | |
| "valid_targets_min": 2187 | |
| }, | |
| { | |
| "epoch": 2.997787610619469, | |
| "grad_norm": 0.17536242095915167, | |
| "learning_rate": 1.657267795079817e-05, | |
| "loss": 0.2274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2177818864583969, | |
| "step": 2710, | |
| "valid_targets_mean": 4459.2, | |
| "valid_targets_min": 1768 | |
| }, | |
| { | |
| "epoch": 3.0033185840707963, | |
| "grad_norm": 0.18111048263967075, | |
| "learning_rate": 1.6496619118496405e-05, | |
| "loss": 0.2135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2212601900100708, | |
| "step": 2715, | |
| "valid_targets_mean": 7252.6, | |
| "valid_targets_min": 2114 | |
| }, | |
| { | |
| "epoch": 3.0088495575221237, | |
| "grad_norm": 0.15378440373900218, | |
| "learning_rate": 1.6420612521565392e-05, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24102772772312164, | |
| "step": 2720, | |
| "valid_targets_mean": 6718.6, | |
| "valid_targets_min": 1582 | |
| }, | |
| { | |
| "epoch": 3.0143805309734515, | |
| "grad_norm": 0.13600262147807765, | |
| "learning_rate": 1.634465929326267e-05, | |
| "loss": 0.2115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21981662511825562, | |
| "step": 2725, | |
| "valid_targets_mean": 6896.6, | |
| "valid_targets_min": 2379 | |
| }, | |
| { | |
| "epoch": 3.019911504424779, | |
| "grad_norm": 0.13851940484623462, | |
| "learning_rate": 1.6268760566050028e-05, | |
| "loss": 0.212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21665363013744354, | |
| "step": 2730, | |
| "valid_targets_mean": 6736.1, | |
| "valid_targets_min": 1819 | |
| }, | |
| { | |
| "epoch": 3.025442477876106, | |
| "grad_norm": 0.1417676826042863, | |
| "learning_rate": 1.6192917471576688e-05, | |
| "loss": 0.2172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20872211456298828, | |
| "step": 2735, | |
| "valid_targets_mean": 6684.1, | |
| "valid_targets_min": 2445 | |
| }, | |
| { | |
| "epoch": 3.0309734513274336, | |
| "grad_norm": 0.12839041703730172, | |
| "learning_rate": 1.611713114066234e-05, | |
| "loss": 0.2033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1809227466583252, | |
| "step": 2740, | |
| "valid_targets_mean": 6563.8, | |
| "valid_targets_min": 2047 | |
| }, | |
| { | |
| "epoch": 3.036504424778761, | |
| "grad_norm": 0.14406482359387165, | |
| "learning_rate": 1.6041402703280364e-05, | |
| "loss": 0.2096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20661181211471558, | |
| "step": 2745, | |
| "valid_targets_mean": 6684.9, | |
| "valid_targets_min": 2301 | |
| }, | |
| { | |
| "epoch": 3.0420353982300883, | |
| "grad_norm": 0.15038350256910554, | |
| "learning_rate": 1.5965733288540922e-05, | |
| "loss": 0.2109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20603258907794952, | |
| "step": 2750, | |
| "valid_targets_mean": 6691.5, | |
| "valid_targets_min": 1789 | |
| }, | |
| { | |
| "epoch": 3.047566371681416, | |
| "grad_norm": 0.13064956486553506, | |
| "learning_rate": 1.589012402467418e-05, | |
| "loss": 0.2063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21354766190052032, | |
| "step": 2755, | |
| "valid_targets_mean": 7080.9, | |
| "valid_targets_min": 2291 | |
| }, | |
| { | |
| "epoch": 3.0530973451327434, | |
| "grad_norm": 0.1264279994386061, | |
| "learning_rate": 1.5814576039013433e-05, | |
| "loss": 0.2053, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21360908448696136, | |
| "step": 2760, | |
| "valid_targets_mean": 6833.5, | |
| "valid_targets_min": 1723 | |
| }, | |
| { | |
| "epoch": 3.058628318584071, | |
| "grad_norm": 0.1322344867599257, | |
| "learning_rate": 1.5739090457978323e-05, | |
| "loss": 0.2149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21593303978443146, | |
| "step": 2765, | |
| "valid_targets_mean": 7268.0, | |
| "valid_targets_min": 2510 | |
| }, | |
| { | |
| "epoch": 3.064159292035398, | |
| "grad_norm": 0.12657996342567318, | |
| "learning_rate": 1.5663668407058027e-05, | |
| "loss": 0.211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20862601697444916, | |
| "step": 2770, | |
| "valid_targets_mean": 6906.8, | |
| "valid_targets_min": 2089 | |
| }, | |
| { | |
| "epoch": 3.0696902654867255, | |
| "grad_norm": 0.16349798692224282, | |
| "learning_rate": 1.558831101079452e-05, | |
| "loss": 0.2153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21547730267047882, | |
| "step": 2775, | |
| "valid_targets_mean": 6782.8, | |
| "valid_targets_min": 1875 | |
| }, | |
| { | |
| "epoch": 3.0752212389380533, | |
| "grad_norm": 0.13286243407272705, | |
| "learning_rate": 1.5513019392765756e-05, | |
| "loss": 0.2085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2039157599210739, | |
| "step": 2780, | |
| "valid_targets_mean": 7116.1, | |
| "valid_targets_min": 1642 | |
| }, | |
| { | |
| "epoch": 3.0807522123893807, | |
| "grad_norm": 0.13867563147290238, | |
| "learning_rate": 1.5437794675568932e-05, | |
| "loss": 0.2082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20254142582416534, | |
| "step": 2785, | |
| "valid_targets_mean": 6544.6, | |
| "valid_targets_min": 1743 | |
| }, | |
| { | |
| "epoch": 3.086283185840708, | |
| "grad_norm": 0.13332598046210764, | |
| "learning_rate": 1.536263798080377e-05, | |
| "loss": 0.2081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20397226512432098, | |
| "step": 2790, | |
| "valid_targets_mean": 6431.0, | |
| "valid_targets_min": 2099 | |
| }, | |
| { | |
| "epoch": 3.0918141592920354, | |
| "grad_norm": 0.1571101148157204, | |
| "learning_rate": 1.528755042905576e-05, | |
| "loss": 0.2135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2066522091627121, | |
| "step": 2795, | |
| "valid_targets_mean": 7083.3, | |
| "valid_targets_min": 1760 | |
| }, | |
| { | |
| "epoch": 3.0973451327433628, | |
| "grad_norm": 0.13905555615187867, | |
| "learning_rate": 1.5212533139879491e-05, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21510808169841766, | |
| "step": 2800, | |
| "valid_targets_mean": 6729.2, | |
| "valid_targets_min": 2272 | |
| }, | |
| { | |
| "epoch": 3.10287610619469, | |
| "grad_norm": 0.1290076525786908, | |
| "learning_rate": 1.513758723178191e-05, | |
| "loss": 0.2096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20120497047901154, | |
| "step": 2805, | |
| "valid_targets_mean": 7121.3, | |
| "valid_targets_min": 2617 | |
| }, | |
| { | |
| "epoch": 3.1084070796460175, | |
| "grad_norm": 0.13169979853620847, | |
| "learning_rate": 1.5062713822205708e-05, | |
| "loss": 0.203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2006940394639969, | |
| "step": 2810, | |
| "valid_targets_mean": 6826.8, | |
| "valid_targets_min": 1681 | |
| }, | |
| { | |
| "epoch": 3.1139380530973453, | |
| "grad_norm": 0.1343698732700322, | |
| "learning_rate": 1.49879140275126e-05, | |
| "loss": 0.2077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21670521795749664, | |
| "step": 2815, | |
| "valid_targets_mean": 6561.2, | |
| "valid_targets_min": 2087 | |
| }, | |
| { | |
| "epoch": 3.1194690265486726, | |
| "grad_norm": 0.1341534939022549, | |
| "learning_rate": 1.491318896296671e-05, | |
| "loss": 0.2102, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2204877883195877, | |
| "step": 2820, | |
| "valid_targets_mean": 7094.4, | |
| "valid_targets_min": 1617 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 0.13386376899002622, | |
| "learning_rate": 1.4838539742717921e-05, | |
| "loss": 0.2043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1958327740430832, | |
| "step": 2825, | |
| "valid_targets_mean": 6953.4, | |
| "valid_targets_min": 1644 | |
| }, | |
| { | |
| "epoch": 3.1305309734513274, | |
| "grad_norm": 0.13150207602363484, | |
| "learning_rate": 1.4763967479785315e-05, | |
| "loss": 0.2085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20156078040599823, | |
| "step": 2830, | |
| "valid_targets_mean": 6537.1, | |
| "valid_targets_min": 1940 | |
| }, | |
| { | |
| "epoch": 3.1360619469026547, | |
| "grad_norm": 0.13804661144368804, | |
| "learning_rate": 1.4689473286040512e-05, | |
| "loss": 0.2089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20230381190776825, | |
| "step": 2835, | |
| "valid_targets_mean": 6715.7, | |
| "valid_targets_min": 2763 | |
| }, | |
| { | |
| "epoch": 3.1415929203539825, | |
| "grad_norm": 0.13459935162025458, | |
| "learning_rate": 1.4615058272191122e-05, | |
| "loss": 0.2076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2111784666776657, | |
| "step": 2840, | |
| "valid_targets_mean": 6966.8, | |
| "valid_targets_min": 1883 | |
| }, | |
| { | |
| "epoch": 3.14712389380531, | |
| "grad_norm": 0.13351175872055387, | |
| "learning_rate": 1.4540723547764193e-05, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1869075447320938, | |
| "step": 2845, | |
| "valid_targets_mean": 6941.3, | |
| "valid_targets_min": 2142 | |
| }, | |
| { | |
| "epoch": 3.1526548672566372, | |
| "grad_norm": 0.12841589745705162, | |
| "learning_rate": 1.4466470221089666e-05, | |
| "loss": 0.2056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19431865215301514, | |
| "step": 2850, | |
| "valid_targets_mean": 6630.5, | |
| "valid_targets_min": 1710 | |
| }, | |
| { | |
| "epoch": 3.1581858407079646, | |
| "grad_norm": 0.13897877198466146, | |
| "learning_rate": 1.4392299399283825e-05, | |
| "loss": 0.2039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20424123108386993, | |
| "step": 2855, | |
| "valid_targets_mean": 6807.5, | |
| "valid_targets_min": 1845 | |
| }, | |
| { | |
| "epoch": 3.163716814159292, | |
| "grad_norm": 0.14349757114102196, | |
| "learning_rate": 1.4318212188232809e-05, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21823330223560333, | |
| "step": 2860, | |
| "valid_targets_mean": 6971.9, | |
| "valid_targets_min": 2334 | |
| }, | |
| { | |
| "epoch": 3.1692477876106193, | |
| "grad_norm": 0.1381560954856274, | |
| "learning_rate": 1.424420969257613e-05, | |
| "loss": 0.2, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20149873197078705, | |
| "step": 2865, | |
| "valid_targets_mean": 7064.6, | |
| "valid_targets_min": 1733 | |
| }, | |
| { | |
| "epoch": 3.1747787610619467, | |
| "grad_norm": 0.13496636886850824, | |
| "learning_rate": 1.4170293015690199e-05, | |
| "loss": 0.2092, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21110467612743378, | |
| "step": 2870, | |
| "valid_targets_mean": 7312.6, | |
| "valid_targets_min": 2366 | |
| }, | |
| { | |
| "epoch": 3.1803097345132745, | |
| "grad_norm": 0.1248500699007006, | |
| "learning_rate": 1.4096463259671862e-05, | |
| "loss": 0.205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19368374347686768, | |
| "step": 2875, | |
| "valid_targets_mean": 6874.0, | |
| "valid_targets_min": 1672 | |
| }, | |
| { | |
| "epoch": 3.185840707964602, | |
| "grad_norm": 0.1449326532715722, | |
| "learning_rate": 1.4022721525321968e-05, | |
| "loss": 0.2106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21856482326984406, | |
| "step": 2880, | |
| "valid_targets_mean": 6831.5, | |
| "valid_targets_min": 1840 | |
| }, | |
| { | |
| "epoch": 3.191371681415929, | |
| "grad_norm": 0.14007848492333932, | |
| "learning_rate": 1.3949068912128968e-05, | |
| "loss": 0.2149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2216811627149582, | |
| "step": 2885, | |
| "valid_targets_mean": 7025.9, | |
| "valid_targets_min": 2262 | |
| }, | |
| { | |
| "epoch": 3.1969026548672566, | |
| "grad_norm": 0.1393773543175712, | |
| "learning_rate": 1.3875506518252541e-05, | |
| "loss": 0.2011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19882957637310028, | |
| "step": 2890, | |
| "valid_targets_mean": 6473.6, | |
| "valid_targets_min": 2075 | |
| }, | |
| { | |
| "epoch": 3.202433628318584, | |
| "grad_norm": 0.13541167581271382, | |
| "learning_rate": 1.3802035440507165e-05, | |
| "loss": 0.2074, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19364877045154572, | |
| "step": 2895, | |
| "valid_targets_mean": 7005.2, | |
| "valid_targets_min": 1775 | |
| }, | |
| { | |
| "epoch": 3.2079646017699117, | |
| "grad_norm": 0.13130058937866115, | |
| "learning_rate": 1.3728656774345803e-05, | |
| "loss": 0.2066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.192404106259346, | |
| "step": 2900, | |
| "valid_targets_mean": 6840.6, | |
| "valid_targets_min": 1881 | |
| }, | |
| { | |
| "epoch": 3.213495575221239, | |
| "grad_norm": 0.16191978910049992, | |
| "learning_rate": 1.3655371613843585e-05, | |
| "loss": 0.2124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22498773038387299, | |
| "step": 2905, | |
| "valid_targets_mean": 7338.1, | |
| "valid_targets_min": 1944 | |
| }, | |
| { | |
| "epoch": 3.2190265486725664, | |
| "grad_norm": 0.14374954172435303, | |
| "learning_rate": 1.3582181051681444e-05, | |
| "loss": 0.1987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20642215013504028, | |
| "step": 2910, | |
| "valid_targets_mean": 7105.5, | |
| "valid_targets_min": 2450 | |
| }, | |
| { | |
| "epoch": 3.224557522123894, | |
| "grad_norm": 0.2103431885232863, | |
| "learning_rate": 1.3509086179129863e-05, | |
| "loss": 0.2151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24314801394939423, | |
| "step": 2915, | |
| "valid_targets_mean": 4285.9, | |
| "valid_targets_min": 1207 | |
| }, | |
| { | |
| "epoch": 3.230088495575221, | |
| "grad_norm": 0.20116991357887262, | |
| "learning_rate": 1.3436088086032593e-05, | |
| "loss": 0.2618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2645244300365448, | |
| "step": 2920, | |
| "valid_targets_mean": 4977.9, | |
| "valid_targets_min": 1430 | |
| }, | |
| { | |
| "epoch": 3.2356194690265485, | |
| "grad_norm": 0.19035477958682215, | |
| "learning_rate": 1.3363187860790418e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2536122500896454, | |
| "step": 2925, | |
| "valid_targets_mean": 4501.8, | |
| "valid_targets_min": 1496 | |
| }, | |
| { | |
| "epoch": 3.241150442477876, | |
| "grad_norm": 0.1882294245304289, | |
| "learning_rate": 1.329038659034489e-05, | |
| "loss": 0.2436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24458806216716766, | |
| "step": 2930, | |
| "valid_targets_mean": 4668.5, | |
| "valid_targets_min": 1432 | |
| }, | |
| { | |
| "epoch": 3.2466814159292037, | |
| "grad_norm": 0.18879526237379882, | |
| "learning_rate": 1.3217685360162166e-05, | |
| "loss": 0.2354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23632140457630157, | |
| "step": 2935, | |
| "valid_targets_mean": 4192.5, | |
| "valid_targets_min": 1433 | |
| }, | |
| { | |
| "epoch": 3.252212389380531, | |
| "grad_norm": 0.18752921446229623, | |
| "learning_rate": 1.3145085254216784e-05, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24883420765399933, | |
| "step": 2940, | |
| "valid_targets_mean": 4705.0, | |
| "valid_targets_min": 1849 | |
| }, | |
| { | |
| "epoch": 3.2577433628318584, | |
| "grad_norm": 0.19623605667725005, | |
| "learning_rate": 1.307258735497555e-05, | |
| "loss": 0.2444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2437725067138672, | |
| "step": 2945, | |
| "valid_targets_mean": 4095.9, | |
| "valid_targets_min": 1636 | |
| }, | |
| { | |
| "epoch": 3.2632743362831858, | |
| "grad_norm": 0.1927124265722644, | |
| "learning_rate": 1.3000192743381335e-05, | |
| "loss": 0.2475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24333982169628143, | |
| "step": 2950, | |
| "valid_targets_mean": 4175.1, | |
| "valid_targets_min": 1385 | |
| }, | |
| { | |
| "epoch": 3.268805309734513, | |
| "grad_norm": 0.20458277020212548, | |
| "learning_rate": 1.2927902498837021e-05, | |
| "loss": 0.2466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26310428977012634, | |
| "step": 2955, | |
| "valid_targets_mean": 4406.1, | |
| "valid_targets_min": 1876 | |
| }, | |
| { | |
| "epoch": 3.274336283185841, | |
| "grad_norm": 0.6763619952401517, | |
| "learning_rate": 1.2855717699189351e-05, | |
| "loss": 0.2317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24060583114624023, | |
| "step": 2960, | |
| "valid_targets_mean": 4190.6, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 3.2798672566371683, | |
| "grad_norm": 0.19042566564964014, | |
| "learning_rate": 1.2783639420712914e-05, | |
| "loss": 0.2313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24776381254196167, | |
| "step": 2965, | |
| "valid_targets_mean": 4808.5, | |
| "valid_targets_min": 1563 | |
| }, | |
| { | |
| "epoch": 3.2853982300884956, | |
| "grad_norm": 0.1828520512706826, | |
| "learning_rate": 1.2711668738094058e-05, | |
| "loss": 0.2455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23173843324184418, | |
| "step": 2970, | |
| "valid_targets_mean": 4496.5, | |
| "valid_targets_min": 1255 | |
| }, | |
| { | |
| "epoch": 3.290929203539823, | |
| "grad_norm": 0.18056250594207857, | |
| "learning_rate": 1.263980672441487e-05, | |
| "loss": 0.2279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.229353666305542, | |
| "step": 2975, | |
| "valid_targets_mean": 4533.0, | |
| "valid_targets_min": 1273 | |
| }, | |
| { | |
| "epoch": 3.2964601769911503, | |
| "grad_norm": 0.19364661631351388, | |
| "learning_rate": 1.2568054451137186e-05, | |
| "loss": 0.2269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2304421216249466, | |
| "step": 2980, | |
| "valid_targets_mean": 4621.3, | |
| "valid_targets_min": 1604 | |
| }, | |
| { | |
| "epoch": 3.3019911504424777, | |
| "grad_norm": 0.19400631450003225, | |
| "learning_rate": 1.2496412988086626e-05, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23197375237941742, | |
| "step": 2985, | |
| "valid_targets_mean": 4243.1, | |
| "valid_targets_min": 1227 | |
| }, | |
| { | |
| "epoch": 3.307522123893805, | |
| "grad_norm": 0.1984988960484853, | |
| "learning_rate": 1.2424883403436625e-05, | |
| "loss": 0.22, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22853876650333405, | |
| "step": 2990, | |
| "valid_targets_mean": 4320.6, | |
| "valid_targets_min": 1207 | |
| }, | |
| { | |
| "epoch": 3.313053097345133, | |
| "grad_norm": 0.18771275527544765, | |
| "learning_rate": 1.2353466763692508e-05, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23267777264118195, | |
| "step": 2995, | |
| "valid_targets_mean": 4236.7, | |
| "valid_targets_min": 1758 | |
| }, | |
| { | |
| "epoch": 3.3185840707964602, | |
| "grad_norm": 0.1896955804141215, | |
| "learning_rate": 1.2282164133675583e-05, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22234050929546356, | |
| "step": 3000, | |
| "valid_targets_mean": 4249.2, | |
| "valid_targets_min": 1438 | |
| }, | |
| { | |
| "epoch": 3.3241150442477876, | |
| "grad_norm": 0.19571405576606912, | |
| "learning_rate": 1.2210976576507303e-05, | |
| "loss": 0.2223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21612459421157837, | |
| "step": 3005, | |
| "valid_targets_mean": 4096.8, | |
| "valid_targets_min": 1263 | |
| }, | |
| { | |
| "epoch": 3.329646017699115, | |
| "grad_norm": 0.2027745588299885, | |
| "learning_rate": 1.2139905153593366e-05, | |
| "loss": 0.2276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2425936609506607, | |
| "step": 3010, | |
| "valid_targets_mean": 4844.7, | |
| "valid_targets_min": 1509 | |
| }, | |
| { | |
| "epoch": 3.3351769911504423, | |
| "grad_norm": 0.20934905847919993, | |
| "learning_rate": 1.2068950924607905e-05, | |
| "loss": 0.2355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23137496411800385, | |
| "step": 3015, | |
| "valid_targets_mean": 4345.3, | |
| "valid_targets_min": 1478 | |
| }, | |
| { | |
| "epoch": 3.34070796460177, | |
| "grad_norm": 0.21487158774215187, | |
| "learning_rate": 1.1998114947477708e-05, | |
| "loss": 0.2324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26779279112815857, | |
| "step": 3020, | |
| "valid_targets_mean": 4616.3, | |
| "valid_targets_min": 1552 | |
| }, | |
| { | |
| "epoch": 3.3462389380530975, | |
| "grad_norm": 0.19786781030422343, | |
| "learning_rate": 1.1927398278366424e-05, | |
| "loss": 0.2194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22676421701908112, | |
| "step": 3025, | |
| "valid_targets_mean": 4376.1, | |
| "valid_targets_min": 1496 | |
| }, | |
| { | |
| "epoch": 3.351769911504425, | |
| "grad_norm": 0.19247392165559496, | |
| "learning_rate": 1.1856801971658815e-05, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2190667986869812, | |
| "step": 3030, | |
| "valid_targets_mean": 4303.0, | |
| "valid_targets_min": 1384 | |
| }, | |
| { | |
| "epoch": 3.357300884955752, | |
| "grad_norm": 0.19725784534691013, | |
| "learning_rate": 1.1786327079945036e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22572875022888184, | |
| "step": 3035, | |
| "valid_targets_mean": 4143.6, | |
| "valid_targets_min": 1212 | |
| }, | |
| { | |
| "epoch": 3.3628318584070795, | |
| "grad_norm": 0.1770458909120907, | |
| "learning_rate": 1.1715974654004967e-05, | |
| "loss": 0.2293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23059137165546417, | |
| "step": 3040, | |
| "valid_targets_mean": 4723.2, | |
| "valid_targets_min": 1462 | |
| }, | |
| { | |
| "epoch": 3.368362831858407, | |
| "grad_norm": 0.19598367282099285, | |
| "learning_rate": 1.164574574279251e-05, | |
| "loss": 0.2232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2207968384027481, | |
| "step": 3045, | |
| "valid_targets_mean": 4106.2, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 3.3738938053097347, | |
| "grad_norm": 0.17794904999186312, | |
| "learning_rate": 1.1575641393419965e-05, | |
| "loss": 0.2286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20861780643463135, | |
| "step": 3050, | |
| "valid_targets_mean": 4614.8, | |
| "valid_targets_min": 1237 | |
| }, | |
| { | |
| "epoch": 3.379424778761062, | |
| "grad_norm": 0.2196494727753848, | |
| "learning_rate": 1.1505662651142402e-05, | |
| "loss": 0.2251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23516340553760529, | |
| "step": 3055, | |
| "valid_targets_mean": 4075.3, | |
| "valid_targets_min": 1444 | |
| }, | |
| { | |
| "epoch": 3.3849557522123894, | |
| "grad_norm": 0.18475544190212106, | |
| "learning_rate": 1.1435810559342136e-05, | |
| "loss": 0.2241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22633516788482666, | |
| "step": 3060, | |
| "valid_targets_mean": 4390.5, | |
| "valid_targets_min": 1429 | |
| }, | |
| { | |
| "epoch": 3.390486725663717, | |
| "grad_norm": 0.2074801557482992, | |
| "learning_rate": 1.1366086159513063e-05, | |
| "loss": 0.2325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23653817176818848, | |
| "step": 3065, | |
| "valid_targets_mean": 4534.3, | |
| "valid_targets_min": 1194 | |
| }, | |
| { | |
| "epoch": 3.396017699115044, | |
| "grad_norm": 0.17388033319670293, | |
| "learning_rate": 1.1296490491245242e-05, | |
| "loss": 0.2216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22861051559448242, | |
| "step": 3070, | |
| "valid_targets_mean": 4847.7, | |
| "valid_targets_min": 1586 | |
| }, | |
| { | |
| "epoch": 3.4015486725663715, | |
| "grad_norm": 0.19229470829993017, | |
| "learning_rate": 1.1227024592209326e-05, | |
| "loss": 0.2211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20679305493831635, | |
| "step": 3075, | |
| "valid_targets_mean": 4184.1, | |
| "valid_targets_min": 1609 | |
| }, | |
| { | |
| "epoch": 3.4070796460176993, | |
| "grad_norm": 0.2162797749593719, | |
| "learning_rate": 1.115768949814111e-05, | |
| "loss": 0.214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.212013840675354, | |
| "step": 3080, | |
| "valid_targets_mean": 3989.7, | |
| "valid_targets_min": 1675 | |
| }, | |
| { | |
| "epoch": 3.4126106194690267, | |
| "grad_norm": 0.19739218833829453, | |
| "learning_rate": 1.1088486242826088e-05, | |
| "loss": 0.2219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21500758826732635, | |
| "step": 3085, | |
| "valid_targets_mean": 4299.5, | |
| "valid_targets_min": 1801 | |
| }, | |
| { | |
| "epoch": 3.418141592920354, | |
| "grad_norm": 0.20596507770495046, | |
| "learning_rate": 1.101941585808406e-05, | |
| "loss": 0.218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21605031192302704, | |
| "step": 3090, | |
| "valid_targets_mean": 4011.4, | |
| "valid_targets_min": 1385 | |
| }, | |
| { | |
| "epoch": 3.4236725663716814, | |
| "grad_norm": 0.18201711308927845, | |
| "learning_rate": 1.0950479373753675e-05, | |
| "loss": 0.2102, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2006060630083084, | |
| "step": 3095, | |
| "valid_targets_mean": 4537.2, | |
| "valid_targets_min": 1434 | |
| }, | |
| { | |
| "epoch": 3.4292035398230087, | |
| "grad_norm": 0.21239025509751497, | |
| "learning_rate": 1.0881677817677194e-05, | |
| "loss": 0.2117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2181689739227295, | |
| "step": 3100, | |
| "valid_targets_mean": 4228.4, | |
| "valid_targets_min": 1397 | |
| }, | |
| { | |
| "epoch": 3.434734513274336, | |
| "grad_norm": 0.17658782500917433, | |
| "learning_rate": 1.0813012215685064e-05, | |
| "loss": 0.2254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19946689903736115, | |
| "step": 3105, | |
| "valid_targets_mean": 4689.5, | |
| "valid_targets_min": 1664 | |
| }, | |
| { | |
| "epoch": 3.440265486725664, | |
| "grad_norm": 0.18220228702095048, | |
| "learning_rate": 1.0744483591580648e-05, | |
| "loss": 0.2146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21331290900707245, | |
| "step": 3110, | |
| "valid_targets_mean": 4503.2, | |
| "valid_targets_min": 1423 | |
| }, | |
| { | |
| "epoch": 3.4457964601769913, | |
| "grad_norm": 0.19292914456559715, | |
| "learning_rate": 1.0676092967124985e-05, | |
| "loss": 0.2177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2079990655183792, | |
| "step": 3115, | |
| "valid_targets_mean": 4184.0, | |
| "valid_targets_min": 1596 | |
| }, | |
| { | |
| "epoch": 3.4513274336283186, | |
| "grad_norm": 0.19168197489667196, | |
| "learning_rate": 1.0607841362021562e-05, | |
| "loss": 0.2216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2185291051864624, | |
| "step": 3120, | |
| "valid_targets_mean": 4431.8, | |
| "valid_targets_min": 1236 | |
| }, | |
| { | |
| "epoch": 3.456858407079646, | |
| "grad_norm": 0.19281674664451617, | |
| "learning_rate": 1.0539729793901045e-05, | |
| "loss": 0.2297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22180843353271484, | |
| "step": 3125, | |
| "valid_targets_mean": 5078.7, | |
| "valid_targets_min": 1560 | |
| }, | |
| { | |
| "epoch": 3.4623893805309733, | |
| "grad_norm": 0.19147343583020165, | |
| "learning_rate": 1.0471759278306181e-05, | |
| "loss": 0.225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2231491059064865, | |
| "step": 3130, | |
| "valid_targets_mean": 4391.8, | |
| "valid_targets_min": 1796 | |
| }, | |
| { | |
| "epoch": 3.4679203539823007, | |
| "grad_norm": 0.1943783834063135, | |
| "learning_rate": 1.0403930828676616e-05, | |
| "loss": 0.2112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17563997209072113, | |
| "step": 3135, | |
| "valid_targets_mean": 5109.3, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 3.4734513274336285, | |
| "grad_norm": 0.14509244891590012, | |
| "learning_rate": 1.0336245456333794e-05, | |
| "loss": 0.1386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15025149285793304, | |
| "step": 3140, | |
| "valid_targets_mean": 5938.4, | |
| "valid_targets_min": 2410 | |
| }, | |
| { | |
| "epoch": 3.478982300884956, | |
| "grad_norm": 0.1412188191159081, | |
| "learning_rate": 1.0268704170465884e-05, | |
| "loss": 0.1401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13931624591350555, | |
| "step": 3145, | |
| "valid_targets_mean": 5701.8, | |
| "valid_targets_min": 1738 | |
| }, | |
| { | |
| "epoch": 3.484513274336283, | |
| "grad_norm": 0.13660913665051003, | |
| "learning_rate": 1.0201307978112704e-05, | |
| "loss": 0.1371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13620178401470184, | |
| "step": 3150, | |
| "valid_targets_mean": 5636.8, | |
| "valid_targets_min": 2166 | |
| }, | |
| { | |
| "epoch": 3.4900442477876106, | |
| "grad_norm": 0.136457864708975, | |
| "learning_rate": 1.0134057884150765e-05, | |
| "loss": 0.1392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13991427421569824, | |
| "step": 3155, | |
| "valid_targets_mean": 5739.6, | |
| "valid_targets_min": 2380 | |
| }, | |
| { | |
| "epoch": 3.495575221238938, | |
| "grad_norm": 0.15076926498189058, | |
| "learning_rate": 1.0066954891278243e-05, | |
| "loss": 0.1425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13968341052532196, | |
| "step": 3160, | |
| "valid_targets_mean": 5487.5, | |
| "valid_targets_min": 2383 | |
| }, | |
| { | |
| "epoch": 3.5011061946902657, | |
| "grad_norm": 0.1282211901884834, | |
| "learning_rate": 1.0000000000000006e-05, | |
| "loss": 0.1376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12800611555576324, | |
| "step": 3165, | |
| "valid_targets_mean": 5864.6, | |
| "valid_targets_min": 2740 | |
| }, | |
| { | |
| "epoch": 3.5066371681415927, | |
| "grad_norm": 0.13944639772438178, | |
| "learning_rate": 9.933194208612755e-06, | |
| "loss": 0.1322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13544191420078278, | |
| "step": 3170, | |
| "valid_targets_mean": 5321.1, | |
| "valid_targets_min": 1643 | |
| }, | |
| { | |
| "epoch": 3.5121681415929205, | |
| "grad_norm": 0.1398583430493293, | |
| "learning_rate": 9.86653851319014e-06, | |
| "loss": 0.1295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12386005371809006, | |
| "step": 3175, | |
| "valid_targets_mean": 5644.3, | |
| "valid_targets_min": 1422 | |
| }, | |
| { | |
| "epoch": 3.517699115044248, | |
| "grad_norm": 0.1284867047941179, | |
| "learning_rate": 9.800033907567825e-06, | |
| "loss": 0.1388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13599428534507751, | |
| "step": 3180, | |
| "valid_targets_mean": 6172.9, | |
| "valid_targets_min": 2008 | |
| }, | |
| { | |
| "epoch": 3.523230088495575, | |
| "grad_norm": 0.13395128770049955, | |
| "learning_rate": 9.733681383328764e-06, | |
| "loss": 0.1272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1400829553604126, | |
| "step": 3185, | |
| "valid_targets_mean": 6013.9, | |
| "valid_targets_min": 1750 | |
| }, | |
| { | |
| "epoch": 3.5287610619469025, | |
| "grad_norm": 0.13014001927995014, | |
| "learning_rate": 9.667481929788371e-06, | |
| "loss": 0.1441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14366762340068817, | |
| "step": 3190, | |
| "valid_targets_mean": 5896.5, | |
| "valid_targets_min": 1777 | |
| }, | |
| { | |
| "epoch": 3.53429203539823, | |
| "grad_norm": 0.19543002445876656, | |
| "learning_rate": 9.601436533979776e-06, | |
| "loss": 0.1325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13715559244155884, | |
| "step": 3195, | |
| "valid_targets_mean": 5619.7, | |
| "valid_targets_min": 1866 | |
| }, | |
| { | |
| "epoch": 3.5398230088495577, | |
| "grad_norm": 0.139015086635832, | |
| "learning_rate": 9.535546180639108e-06, | |
| "loss": 0.1337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13973872363567352, | |
| "step": 3200, | |
| "valid_targets_mean": 5370.2, | |
| "valid_targets_min": 1730 | |
| }, | |
| { | |
| "epoch": 3.545353982300885, | |
| "grad_norm": 0.1379748436829809, | |
| "learning_rate": 9.46981185219083e-06, | |
| "loss": 0.131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13197638094425201, | |
| "step": 3205, | |
| "valid_targets_mean": 5825.6, | |
| "valid_targets_min": 1898 | |
| }, | |
| { | |
| "epoch": 3.5508849557522124, | |
| "grad_norm": 0.14617031557294963, | |
| "learning_rate": 9.404234528733035e-06, | |
| "loss": 0.1305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13495153188705444, | |
| "step": 3210, | |
| "valid_targets_mean": 5541.8, | |
| "valid_targets_min": 1377 | |
| }, | |
| { | |
| "epoch": 3.5564159292035398, | |
| "grad_norm": 0.1349327550131193, | |
| "learning_rate": 9.338815188022932e-06, | |
| "loss": 0.1327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1403791308403015, | |
| "step": 3215, | |
| "valid_targets_mean": 5566.9, | |
| "valid_targets_min": 1522 | |
| }, | |
| { | |
| "epoch": 3.561946902654867, | |
| "grad_norm": 0.13403029475590722, | |
| "learning_rate": 9.273554805462159e-06, | |
| "loss": 0.1346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13012872636318207, | |
| "step": 3220, | |
| "valid_targets_mean": 5581.0, | |
| "valid_targets_min": 1647 | |
| }, | |
| { | |
| "epoch": 3.567477876106195, | |
| "grad_norm": 0.13446672936096488, | |
| "learning_rate": 9.208454354082312e-06, | |
| "loss": 0.1309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1381857991218567, | |
| "step": 3225, | |
| "valid_targets_mean": 5720.4, | |
| "valid_targets_min": 1917 | |
| }, | |
| { | |
| "epoch": 3.573008849557522, | |
| "grad_norm": 0.13363949844800652, | |
| "learning_rate": 9.143514804530413e-06, | |
| "loss": 0.1275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13137178122997284, | |
| "step": 3230, | |
| "valid_targets_mean": 5464.2, | |
| "valid_targets_min": 1801 | |
| }, | |
| { | |
| "epoch": 3.5785398230088497, | |
| "grad_norm": 0.14720066978217278, | |
| "learning_rate": 9.07873712505444e-06, | |
| "loss": 0.1361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1314253807067871, | |
| "step": 3235, | |
| "valid_targets_mean": 5706.1, | |
| "valid_targets_min": 2341 | |
| }, | |
| { | |
| "epoch": 3.584070796460177, | |
| "grad_norm": 0.13108570956009358, | |
| "learning_rate": 9.014122281488883e-06, | |
| "loss": 0.1309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12424379587173462, | |
| "step": 3240, | |
| "valid_targets_mean": 5452.3, | |
| "valid_targets_min": 2118 | |
| }, | |
| { | |
| "epoch": 3.5896017699115044, | |
| "grad_norm": 0.14664169610702835, | |
| "learning_rate": 8.94967123724036e-06, | |
| "loss": 0.1364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12991900742053986, | |
| "step": 3245, | |
| "valid_targets_mean": 5547.9, | |
| "valid_targets_min": 1980 | |
| }, | |
| { | |
| "epoch": 3.5951327433628317, | |
| "grad_norm": 0.13334119576547146, | |
| "learning_rate": 8.885384953273235e-06, | |
| "loss": 0.1292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12634767591953278, | |
| "step": 3250, | |
| "valid_targets_mean": 5751.0, | |
| "valid_targets_min": 2882 | |
| }, | |
| { | |
| "epoch": 3.600663716814159, | |
| "grad_norm": 0.13096633827751358, | |
| "learning_rate": 8.821264388095304e-06, | |
| "loss": 0.1334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1260157823562622, | |
| "step": 3255, | |
| "valid_targets_mean": 5552.3, | |
| "valid_targets_min": 2035 | |
| }, | |
| { | |
| "epoch": 3.606194690265487, | |
| "grad_norm": 0.13799559397751943, | |
| "learning_rate": 8.757310497743498e-06, | |
| "loss": 0.131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12790890038013458, | |
| "step": 3260, | |
| "valid_targets_mean": 5325.0, | |
| "valid_targets_min": 1972 | |
| }, | |
| { | |
| "epoch": 3.6117256637168142, | |
| "grad_norm": 0.13953855145433391, | |
| "learning_rate": 8.693524235769608e-06, | |
| "loss": 0.1363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12778446078300476, | |
| "step": 3265, | |
| "valid_targets_mean": 5419.1, | |
| "valid_targets_min": 2222 | |
| }, | |
| { | |
| "epoch": 3.6172566371681416, | |
| "grad_norm": 0.13364765261219527, | |
| "learning_rate": 8.629906553226132e-06, | |
| "loss": 0.1354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12530052661895752, | |
| "step": 3270, | |
| "valid_targets_mean": 5859.9, | |
| "valid_targets_min": 1545 | |
| }, | |
| { | |
| "epoch": 3.622787610619469, | |
| "grad_norm": 0.13589586673817328, | |
| "learning_rate": 8.566458398652e-06, | |
| "loss": 0.1225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12432973831892014, | |
| "step": 3275, | |
| "valid_targets_mean": 5435.5, | |
| "valid_targets_min": 1569 | |
| }, | |
| { | |
| "epoch": 3.6283185840707963, | |
| "grad_norm": 0.13574522183072446, | |
| "learning_rate": 8.503180718058512e-06, | |
| "loss": 0.1325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1325133889913559, | |
| "step": 3280, | |
| "valid_targets_mean": 5686.7, | |
| "valid_targets_min": 1129 | |
| }, | |
| { | |
| "epoch": 3.633849557522124, | |
| "grad_norm": 0.13668367006194093, | |
| "learning_rate": 8.440074454915202e-06, | |
| "loss": 0.1264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11800462007522583, | |
| "step": 3285, | |
| "valid_targets_mean": 5668.3, | |
| "valid_targets_min": 1754 | |
| }, | |
| { | |
| "epoch": 3.6393805309734515, | |
| "grad_norm": 0.16562493516039267, | |
| "learning_rate": 8.377140550135755e-06, | |
| "loss": 0.1321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14407625794410706, | |
| "step": 3290, | |
| "valid_targets_mean": 5627.9, | |
| "valid_targets_min": 2227 | |
| }, | |
| { | |
| "epoch": 3.644911504424779, | |
| "grad_norm": 0.1351462635946838, | |
| "learning_rate": 8.314379942064015e-06, | |
| "loss": 0.1331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1450377106666565, | |
| "step": 3295, | |
| "valid_targets_mean": 5550.3, | |
| "valid_targets_min": 1563 | |
| }, | |
| { | |
| "epoch": 3.650442477876106, | |
| "grad_norm": 0.1399832160910217, | |
| "learning_rate": 8.251793566459967e-06, | |
| "loss": 0.1259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12993066012859344, | |
| "step": 3300, | |
| "valid_targets_mean": 5259.5, | |
| "valid_targets_min": 1512 | |
| }, | |
| { | |
| "epoch": 3.6559734513274336, | |
| "grad_norm": 0.13902579575633053, | |
| "learning_rate": 8.189382356485766e-06, | |
| "loss": 0.1267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13709381222724915, | |
| "step": 3305, | |
| "valid_targets_mean": 5714.9, | |
| "valid_targets_min": 2074 | |
| }, | |
| { | |
| "epoch": 3.661504424778761, | |
| "grad_norm": 0.19561655533517033, | |
| "learning_rate": 8.127147242691898e-06, | |
| "loss": 0.1316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12059325724840164, | |
| "step": 3310, | |
| "valid_targets_mean": 5623.4, | |
| "valid_targets_min": 1560 | |
| }, | |
| { | |
| "epoch": 3.6670353982300883, | |
| "grad_norm": 0.13242050773492808, | |
| "learning_rate": 8.065089153003229e-06, | |
| "loss": 0.1283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12200797349214554, | |
| "step": 3315, | |
| "valid_targets_mean": 5901.2, | |
| "valid_targets_min": 1804 | |
| }, | |
| { | |
| "epoch": 3.672566371681416, | |
| "grad_norm": 0.14369641063953326, | |
| "learning_rate": 8.003209012705187e-06, | |
| "loss": 0.1274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12380051612854004, | |
| "step": 3320, | |
| "valid_targets_mean": 5515.3, | |
| "valid_targets_min": 2151 | |
| }, | |
| { | |
| "epoch": 3.6780973451327434, | |
| "grad_norm": 0.15872176564115237, | |
| "learning_rate": 7.941507744429986e-06, | |
| "loss": 0.1299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12813197076320648, | |
| "step": 3325, | |
| "valid_targets_mean": 5702.0, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 3.683628318584071, | |
| "grad_norm": 0.14713388933945726, | |
| "learning_rate": 7.879986268142888e-06, | |
| "loss": 0.1311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13191306591033936, | |
| "step": 3330, | |
| "valid_targets_mean": 5687.7, | |
| "valid_targets_min": 2524 | |
| }, | |
| { | |
| "epoch": 3.689159292035398, | |
| "grad_norm": 0.1313688201624093, | |
| "learning_rate": 7.81864550112841e-06, | |
| "loss": 0.1264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13270674645900726, | |
| "step": 3335, | |
| "valid_targets_mean": 5539.7, | |
| "valid_targets_min": 2082 | |
| }, | |
| { | |
| "epoch": 3.6946902654867255, | |
| "grad_norm": 0.14455986485893416, | |
| "learning_rate": 7.757486357976728e-06, | |
| "loss": 0.1243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12998269498348236, | |
| "step": 3340, | |
| "valid_targets_mean": 5891.3, | |
| "valid_targets_min": 2331 | |
| }, | |
| { | |
| "epoch": 3.7002212389380533, | |
| "grad_norm": 0.16040425677471426, | |
| "learning_rate": 7.69650975057e-06, | |
| "loss": 0.1291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12375136464834213, | |
| "step": 3345, | |
| "valid_targets_mean": 5382.4, | |
| "valid_targets_min": 1673 | |
| }, | |
| { | |
| "epoch": 3.7057522123893807, | |
| "grad_norm": 0.13768864141470227, | |
| "learning_rate": 7.635716588068777e-06, | |
| "loss": 0.1243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1354590207338333, | |
| "step": 3350, | |
| "valid_targets_mean": 5494.8, | |
| "valid_targets_min": 1656 | |
| }, | |
| { | |
| "epoch": 3.711283185840708, | |
| "grad_norm": 0.14160633124383176, | |
| "learning_rate": 7.575107776898458e-06, | |
| "loss": 0.1271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12877967953681946, | |
| "step": 3355, | |
| "valid_targets_mean": 5347.7, | |
| "valid_targets_min": 2217 | |
| }, | |
| { | |
| "epoch": 3.7168141592920354, | |
| "grad_norm": 0.13577089536439438, | |
| "learning_rate": 7.5146842207357415e-06, | |
| "loss": 0.1354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13344962894916534, | |
| "step": 3360, | |
| "valid_targets_mean": 5603.7, | |
| "valid_targets_min": 1864 | |
| }, | |
| { | |
| "epoch": 3.7223451327433628, | |
| "grad_norm": 0.14819276868047096, | |
| "learning_rate": 7.454446820495192e-06, | |
| "loss": 0.1287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13140900433063507, | |
| "step": 3365, | |
| "valid_targets_mean": 5717.6, | |
| "valid_targets_min": 1864 | |
| }, | |
| { | |
| "epoch": 3.72787610619469, | |
| "grad_norm": 0.13411399201931576, | |
| "learning_rate": 7.394396474315811e-06, | |
| "loss": 0.1245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11731833964586258, | |
| "step": 3370, | |
| "valid_targets_mean": 5494.1, | |
| "valid_targets_min": 2724 | |
| }, | |
| { | |
| "epoch": 3.7334070796460175, | |
| "grad_norm": 0.1533157270721631, | |
| "learning_rate": 7.334534077547582e-06, | |
| "loss": 0.1279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14933831989765167, | |
| "step": 3375, | |
| "valid_targets_mean": 5536.6, | |
| "valid_targets_min": 1305 | |
| }, | |
| { | |
| "epoch": 3.7389380530973453, | |
| "grad_norm": 0.12827707358332582, | |
| "learning_rate": 7.274860522738192e-06, | |
| "loss": 0.1267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11872033029794693, | |
| "step": 3380, | |
| "valid_targets_mean": 5980.4, | |
| "valid_targets_min": 2234 | |
| }, | |
| { | |
| "epoch": 3.7444690265486726, | |
| "grad_norm": 0.13138416490990343, | |
| "learning_rate": 7.215376699619712e-06, | |
| "loss": 0.1227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11342080682516098, | |
| "step": 3385, | |
| "valid_targets_mean": 5468.4, | |
| "valid_targets_min": 1530 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.14298036740256476, | |
| "learning_rate": 7.156083495095276e-06, | |
| "loss": 0.1265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12975452840328217, | |
| "step": 3390, | |
| "valid_targets_mean": 5566.3, | |
| "valid_targets_min": 2194 | |
| }, | |
| { | |
| "epoch": 3.7555309734513274, | |
| "grad_norm": 0.2327779881832984, | |
| "learning_rate": 7.0969817932259235e-06, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22393546998500824, | |
| "step": 3395, | |
| "valid_targets_mean": 4333.5, | |
| "valid_targets_min": 1988 | |
| }, | |
| { | |
| "epoch": 3.7610619469026547, | |
| "grad_norm": 0.18220686207182513, | |
| "learning_rate": 7.038072475217386e-06, | |
| "loss": 0.2286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23058883845806122, | |
| "step": 3400, | |
| "valid_targets_mean": 4373.8, | |
| "valid_targets_min": 1908 | |
| }, | |
| { | |
| "epoch": 3.7665929203539825, | |
| "grad_norm": 0.18600860661425175, | |
| "learning_rate": 6.979356419406951e-06, | |
| "loss": 0.2189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2241000384092331, | |
| "step": 3405, | |
| "valid_targets_mean": 4231.8, | |
| "valid_targets_min": 1173 | |
| }, | |
| { | |
| "epoch": 3.77212389380531, | |
| "grad_norm": 0.17458467044038659, | |
| "learning_rate": 6.920834501250371e-06, | |
| "loss": 0.2158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22292876243591309, | |
| "step": 3410, | |
| "valid_targets_mean": 4543.0, | |
| "valid_targets_min": 1917 | |
| }, | |
| { | |
| "epoch": 3.7776548672566372, | |
| "grad_norm": 0.16650615194716312, | |
| "learning_rate": 6.862507593308818e-06, | |
| "loss": 0.2039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20114605128765106, | |
| "step": 3415, | |
| "valid_targets_mean": 4658.9, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 3.7831858407079646, | |
| "grad_norm": 0.16518920442896343, | |
| "learning_rate": 6.804376565235828e-06, | |
| "loss": 0.2007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19056151807308197, | |
| "step": 3420, | |
| "valid_targets_mean": 4342.9, | |
| "valid_targets_min": 1752 | |
| }, | |
| { | |
| "epoch": 3.788716814159292, | |
| "grad_norm": 0.3116729661205451, | |
| "learning_rate": 6.746442283764427e-06, | |
| "loss": 0.1966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19273680448532104, | |
| "step": 3425, | |
| "valid_targets_mean": 6714.9, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 3.7942477876106193, | |
| "grad_norm": 0.18871294774897107, | |
| "learning_rate": 6.6887056126941e-06, | |
| "loss": 0.1775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18155521154403687, | |
| "step": 3430, | |
| "valid_targets_mean": 7032.9, | |
| "valid_targets_min": 1120 | |
| }, | |
| { | |
| "epoch": 3.7997787610619467, | |
| "grad_norm": 0.15753869920119246, | |
| "learning_rate": 6.631167412877988e-06, | |
| "loss": 0.1724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1789991855621338, | |
| "step": 3435, | |
| "valid_targets_mean": 7732.4, | |
| "valid_targets_min": 996 | |
| }, | |
| { | |
| "epoch": 3.8053097345132745, | |
| "grad_norm": 0.14484824938144322, | |
| "learning_rate": 6.5738285422100236e-06, | |
| "loss": 0.1721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18605315685272217, | |
| "step": 3440, | |
| "valid_targets_mean": 8253.9, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 3.810840707964602, | |
| "grad_norm": 0.1617179718085198, | |
| "learning_rate": 6.516689855612141e-06, | |
| "loss": 0.1705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15909545123577118, | |
| "step": 3445, | |
| "valid_targets_mean": 7443.7, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 3.816371681415929, | |
| "grad_norm": 0.14343506661775315, | |
| "learning_rate": 6.4597522050215414e-06, | |
| "loss": 0.168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16380876302719116, | |
| "step": 3450, | |
| "valid_targets_mean": 8026.6, | |
| "valid_targets_min": 2121 | |
| }, | |
| { | |
| "epoch": 3.8219026548672566, | |
| "grad_norm": 0.11508093264250494, | |
| "learning_rate": 6.403016439377974e-06, | |
| "loss": 0.161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.149913027882576, | |
| "step": 3455, | |
| "valid_targets_mean": 14189.0, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 3.827433628318584, | |
| "grad_norm": 0.10390762725735425, | |
| "learning_rate": 6.34648340461107e-06, | |
| "loss": 0.1536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15544737875461578, | |
| "step": 3460, | |
| "valid_targets_mean": 13749.1, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 3.8329646017699117, | |
| "grad_norm": 0.10424634799165415, | |
| "learning_rate": 6.290153943627782e-06, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14662685990333557, | |
| "step": 3465, | |
| "valid_targets_mean": 12942.2, | |
| "valid_targets_min": 1028 | |
| }, | |
| { | |
| "epoch": 3.838495575221239, | |
| "grad_norm": 0.12713741660056166, | |
| "learning_rate": 6.234028896299755e-06, | |
| "loss": 0.1551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14507780969142914, | |
| "step": 3470, | |
| "valid_targets_mean": 13289.5, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 3.8440265486725664, | |
| "grad_norm": 0.15959526735262128, | |
| "learning_rate": 6.178109099450824e-06, | |
| "loss": 0.1639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1661694496870041, | |
| "step": 3475, | |
| "valid_targets_mean": 7969.1, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 3.849557522123894, | |
| "grad_norm": 0.15426105396455458, | |
| "learning_rate": 6.122395386844555e-06, | |
| "loss": 0.1719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1708608865737915, | |
| "step": 3480, | |
| "valid_targets_mean": 7390.0, | |
| "valid_targets_min": 1478 | |
| }, | |
| { | |
| "epoch": 3.855088495575221, | |
| "grad_norm": 0.170464641867238, | |
| "learning_rate": 6.0668885891717955e-06, | |
| "loss": 0.1632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15790241956710815, | |
| "step": 3485, | |
| "valid_targets_mean": 6870.5, | |
| "valid_targets_min": 962 | |
| }, | |
| { | |
| "epoch": 3.8606194690265485, | |
| "grad_norm": 0.16542620304959912, | |
| "learning_rate": 6.011589534038296e-06, | |
| "loss": 0.2012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20817852020263672, | |
| "step": 3490, | |
| "valid_targets_mean": 5153.0, | |
| "valid_targets_min": 1157 | |
| }, | |
| { | |
| "epoch": 3.866150442477876, | |
| "grad_norm": 0.15939913414600185, | |
| "learning_rate": 5.956499045952362e-06, | |
| "loss": 0.1974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19118501245975494, | |
| "step": 3495, | |
| "valid_targets_mean": 4977.4, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 3.8716814159292037, | |
| "grad_norm": 0.16827995439109927, | |
| "learning_rate": 5.901617946312572e-06, | |
| "loss": 0.1973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2002989798784256, | |
| "step": 3500, | |
| "valid_targets_mean": 5111.0, | |
| "valid_targets_min": 1249 | |
| }, | |
| { | |
| "epoch": 3.877212389380531, | |
| "grad_norm": 0.15617152685761637, | |
| "learning_rate": 5.8469470533955265e-06, | |
| "loss": 0.201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18942542374134064, | |
| "step": 3505, | |
| "valid_targets_mean": 5018.4, | |
| "valid_targets_min": 1840 | |
| }, | |
| { | |
| "epoch": 3.8827433628318584, | |
| "grad_norm": 0.16955023751955867, | |
| "learning_rate": 5.792487182343645e-06, | |
| "loss": 0.2124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21465431153774261, | |
| "step": 3510, | |
| "valid_targets_mean": 4398.6, | |
| "valid_targets_min": 1801 | |
| }, | |
| { | |
| "epoch": 3.8882743362831858, | |
| "grad_norm": 0.16876411880887932, | |
| "learning_rate": 5.738239145152995e-06, | |
| "loss": 0.2215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21885959804058075, | |
| "step": 3515, | |
| "valid_targets_mean": 5274.5, | |
| "valid_targets_min": 2012 | |
| }, | |
| { | |
| "epoch": 3.893805309734513, | |
| "grad_norm": 0.17395468256106653, | |
| "learning_rate": 5.684203750661237e-06, | |
| "loss": 0.2194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22211961448192596, | |
| "step": 3520, | |
| "valid_targets_mean": 4296.5, | |
| "valid_targets_min": 1305 | |
| }, | |
| { | |
| "epoch": 3.899336283185841, | |
| "grad_norm": 0.149960548255276, | |
| "learning_rate": 5.630381804535514e-06, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2037537693977356, | |
| "step": 3525, | |
| "valid_targets_mean": 5300.4, | |
| "valid_targets_min": 2161 | |
| }, | |
| { | |
| "epoch": 3.9048672566371683, | |
| "grad_norm": 0.15653008173877558, | |
| "learning_rate": 5.576774109260444e-06, | |
| "loss": 0.2285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23138241469860077, | |
| "step": 3530, | |
| "valid_targets_mean": 5083.0, | |
| "valid_targets_min": 1617 | |
| }, | |
| { | |
| "epoch": 3.9103982300884956, | |
| "grad_norm": 0.15570286949689432, | |
| "learning_rate": 5.523381464126178e-06, | |
| "loss": 0.2152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20910441875457764, | |
| "step": 3535, | |
| "valid_targets_mean": 4943.5, | |
| "valid_targets_min": 1345 | |
| }, | |
| { | |
| "epoch": 3.915929203539823, | |
| "grad_norm": 0.15739748417307597, | |
| "learning_rate": 5.470204665216494e-06, | |
| "loss": 0.2188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20767073333263397, | |
| "step": 3540, | |
| "valid_targets_mean": 5184.1, | |
| "valid_targets_min": 1650 | |
| }, | |
| { | |
| "epoch": 3.9214601769911503, | |
| "grad_norm": 0.15618920896855482, | |
| "learning_rate": 5.417244505396866e-06, | |
| "loss": 0.2244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21924906969070435, | |
| "step": 3545, | |
| "valid_targets_mean": 4767.4, | |
| "valid_targets_min": 1975 | |
| }, | |
| { | |
| "epoch": 3.926991150442478, | |
| "grad_norm": 0.15432845709938042, | |
| "learning_rate": 5.3645017743027015e-06, | |
| "loss": 0.2218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23112289607524872, | |
| "step": 3550, | |
| "valid_targets_mean": 5238.3, | |
| "valid_targets_min": 1931 | |
| }, | |
| { | |
| "epoch": 3.932522123893805, | |
| "grad_norm": 0.15549776115808153, | |
| "learning_rate": 5.31197725832755e-06, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22551454603672028, | |
| "step": 3555, | |
| "valid_targets_mean": 5372.1, | |
| "valid_targets_min": 1506 | |
| }, | |
| { | |
| "epoch": 3.938053097345133, | |
| "grad_norm": 0.15834672075409317, | |
| "learning_rate": 5.25967174061137e-06, | |
| "loss": 0.2179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21381090581417084, | |
| "step": 3560, | |
| "valid_targets_mean": 5147.3, | |
| "valid_targets_min": 2070 | |
| }, | |
| { | |
| "epoch": 3.9435840707964602, | |
| "grad_norm": 0.1659594670698281, | |
| "learning_rate": 5.207586001028868e-06, | |
| "loss": 0.221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23525546491146088, | |
| "step": 3565, | |
| "valid_targets_mean": 4892.4, | |
| "valid_targets_min": 1760 | |
| }, | |
| { | |
| "epoch": 3.9491150442477876, | |
| "grad_norm": 0.17015805777706655, | |
| "learning_rate": 5.15572081617784e-06, | |
| "loss": 0.2105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2038782835006714, | |
| "step": 3570, | |
| "valid_targets_mean": 4105.6, | |
| "valid_targets_min": 1891 | |
| }, | |
| { | |
| "epoch": 3.954646017699115, | |
| "grad_norm": 0.20794488432468325, | |
| "learning_rate": 5.1040769593676275e-06, | |
| "loss": 0.2084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20502783358097076, | |
| "step": 3575, | |
| "valid_targets_mean": 4236.6, | |
| "valid_targets_min": 1856 | |
| }, | |
| { | |
| "epoch": 3.9601769911504423, | |
| "grad_norm": 0.18227521910625374, | |
| "learning_rate": 5.052655200607592e-06, | |
| "loss": 0.2105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21955735981464386, | |
| "step": 3580, | |
| "valid_targets_mean": 4193.2, | |
| "valid_targets_min": 1634 | |
| }, | |
| { | |
| "epoch": 3.96570796460177, | |
| "grad_norm": 0.17529830158533577, | |
| "learning_rate": 5.001456306595576e-06, | |
| "loss": 0.2004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20946133136749268, | |
| "step": 3585, | |
| "valid_targets_mean": 4184.1, | |
| "valid_targets_min": 1674 | |
| }, | |
| { | |
| "epoch": 3.9712389380530975, | |
| "grad_norm": 0.16667845564146336, | |
| "learning_rate": 4.9504810407065366e-06, | |
| "loss": 0.2008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19890736043453217, | |
| "step": 3590, | |
| "valid_targets_mean": 4282.2, | |
| "valid_targets_min": 1835 | |
| }, | |
| { | |
| "epoch": 3.976769911504425, | |
| "grad_norm": 0.16515295498701313, | |
| "learning_rate": 4.899730162981136e-06, | |
| "loss": 0.1982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20032893121242523, | |
| "step": 3595, | |
| "valid_targets_mean": 4419.7, | |
| "valid_targets_min": 1753 | |
| }, | |
| { | |
| "epoch": 3.982300884955752, | |
| "grad_norm": 0.17458563181482756, | |
| "learning_rate": 4.849204430114405e-06, | |
| "loss": 0.2066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19613082706928253, | |
| "step": 3600, | |
| "valid_targets_mean": 4369.3, | |
| "valid_targets_min": 1279 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4520, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1088722615271424e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |