Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use DCAgent/a1-curriculum_medium with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DCAgent/a1-curriculum_medium with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DCAgent/a1-curriculum_medium") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("DCAgent/a1-curriculum_medium") model = AutoModelForCausalLM.from_pretrained("DCAgent/a1-curriculum_medium") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use DCAgent/a1-curriculum_medium with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DCAgent/a1-curriculum_medium" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-curriculum_medium", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/DCAgent/a1-curriculum_medium
- SGLang
How to use DCAgent/a1-curriculum_medium with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DCAgent/a1-curriculum_medium" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-curriculum_medium", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DCAgent/a1-curriculum_medium" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-curriculum_medium", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use DCAgent/a1-curriculum_medium with Docker Model Runner:
docker model run hf.co/DCAgent/a1-curriculum_medium
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 3976, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008802816901408451, | |
| "grad_norm": 13.827657683108512, | |
| "learning_rate": 4.0201005025125634e-07, | |
| "loss": 0.6346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6291933059692383, | |
| "step": 5, | |
| "valid_targets_mean": 5211.6, | |
| "valid_targets_min": 2060 | |
| }, | |
| { | |
| "epoch": 0.017605633802816902, | |
| "grad_norm": 15.002198579857685, | |
| "learning_rate": 9.045226130653267e-07, | |
| "loss": 0.6336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.655707836151123, | |
| "step": 10, | |
| "valid_targets_mean": 3598.8, | |
| "valid_targets_min": 502 | |
| }, | |
| { | |
| "epoch": 0.02640845070422535, | |
| "grad_norm": 11.241508644931885, | |
| "learning_rate": 1.407035175879397e-06, | |
| "loss": 0.6311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6641210317611694, | |
| "step": 15, | |
| "valid_targets_mean": 5315.2, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 0.035211267605633804, | |
| "grad_norm": 10.230624936396694, | |
| "learning_rate": 1.9095477386934674e-06, | |
| "loss": 0.5807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6135643124580383, | |
| "step": 20, | |
| "valid_targets_mean": 5072.2, | |
| "valid_targets_min": 2018 | |
| }, | |
| { | |
| "epoch": 0.04401408450704225, | |
| "grad_norm": 6.9128014439569005, | |
| "learning_rate": 2.412060301507538e-06, | |
| "loss": 0.5455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5414673686027527, | |
| "step": 25, | |
| "valid_targets_mean": 4854.4, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 0.0528169014084507, | |
| "grad_norm": 3.9683415994818874, | |
| "learning_rate": 2.914572864321608e-06, | |
| "loss": 0.5127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5432224273681641, | |
| "step": 30, | |
| "valid_targets_mean": 4240.9, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 0.061619718309859156, | |
| "grad_norm": 2.3559136739344093, | |
| "learning_rate": 3.4170854271356786e-06, | |
| "loss": 0.4755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4783904552459717, | |
| "step": 35, | |
| "valid_targets_mean": 4634.6, | |
| "valid_targets_min": 2307 | |
| }, | |
| { | |
| "epoch": 0.07042253521126761, | |
| "grad_norm": 1.2197609580429927, | |
| "learning_rate": 3.919597989949749e-06, | |
| "loss": 0.4209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37804752588272095, | |
| "step": 40, | |
| "valid_targets_mean": 4775.7, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 0.07922535211267606, | |
| "grad_norm": 1.106367434490191, | |
| "learning_rate": 4.42211055276382e-06, | |
| "loss": 0.4442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43481674790382385, | |
| "step": 45, | |
| "valid_targets_mean": 4443.8, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 0.0880281690140845, | |
| "grad_norm": 0.9698083612483738, | |
| "learning_rate": 4.92462311557789e-06, | |
| "loss": 0.4508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4421999752521515, | |
| "step": 50, | |
| "valid_targets_mean": 5016.5, | |
| "valid_targets_min": 757 | |
| }, | |
| { | |
| "epoch": 0.09683098591549295, | |
| "grad_norm": 0.7238068152957272, | |
| "learning_rate": 5.42713567839196e-06, | |
| "loss": 0.3845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39949890971183777, | |
| "step": 55, | |
| "valid_targets_mean": 5115.6, | |
| "valid_targets_min": 858 | |
| }, | |
| { | |
| "epoch": 0.1056338028169014, | |
| "grad_norm": 0.6212048313856454, | |
| "learning_rate": 5.9296482412060305e-06, | |
| "loss": 0.4014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3492279350757599, | |
| "step": 60, | |
| "valid_targets_mean": 6253.6, | |
| "valid_targets_min": 3489 | |
| }, | |
| { | |
| "epoch": 0.11443661971830986, | |
| "grad_norm": 0.5918113808756934, | |
| "learning_rate": 6.4321608040201015e-06, | |
| "loss": 0.405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.447465717792511, | |
| "step": 65, | |
| "valid_targets_mean": 7145.5, | |
| "valid_targets_min": 2859 | |
| }, | |
| { | |
| "epoch": 0.12323943661971831, | |
| "grad_norm": 0.5958533552543912, | |
| "learning_rate": 6.934673366834172e-06, | |
| "loss": 0.3717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35591843724250793, | |
| "step": 70, | |
| "valid_targets_mean": 4609.5, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 0.13204225352112675, | |
| "grad_norm": 0.48959920956378644, | |
| "learning_rate": 7.437185929648242e-06, | |
| "loss": 0.39, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3241525888442993, | |
| "step": 75, | |
| "valid_targets_mean": 5249.6, | |
| "valid_targets_min": 2027 | |
| }, | |
| { | |
| "epoch": 0.14084507042253522, | |
| "grad_norm": 0.5098486470457083, | |
| "learning_rate": 7.939698492462312e-06, | |
| "loss": 0.3668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34373387694358826, | |
| "step": 80, | |
| "valid_targets_mean": 5319.1, | |
| "valid_targets_min": 2110 | |
| }, | |
| { | |
| "epoch": 0.14964788732394366, | |
| "grad_norm": 0.5472478185107896, | |
| "learning_rate": 8.442211055276383e-06, | |
| "loss": 0.3597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34631359577178955, | |
| "step": 85, | |
| "valid_targets_mean": 4302.9, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 0.15845070422535212, | |
| "grad_norm": 0.5403760456172538, | |
| "learning_rate": 8.944723618090452e-06, | |
| "loss": 0.3577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40663671493530273, | |
| "step": 90, | |
| "valid_targets_mean": 5238.4, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 0.16725352112676056, | |
| "grad_norm": 0.6045790073336218, | |
| "learning_rate": 9.447236180904523e-06, | |
| "loss": 0.3687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3409307599067688, | |
| "step": 95, | |
| "valid_targets_mean": 4000.7, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 0.176056338028169, | |
| "grad_norm": 0.52698948489387, | |
| "learning_rate": 9.949748743718594e-06, | |
| "loss": 0.3444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3399948477745056, | |
| "step": 100, | |
| "valid_targets_mean": 4394.0, | |
| "valid_targets_min": 244 | |
| }, | |
| { | |
| "epoch": 0.18485915492957747, | |
| "grad_norm": 0.5471398541435097, | |
| "learning_rate": 1.0452261306532665e-05, | |
| "loss": 0.3243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3295767307281494, | |
| "step": 105, | |
| "valid_targets_mean": 4112.8, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 0.1936619718309859, | |
| "grad_norm": 0.5579577055394369, | |
| "learning_rate": 1.0954773869346736e-05, | |
| "loss": 0.3261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35230472683906555, | |
| "step": 110, | |
| "valid_targets_mean": 4216.4, | |
| "valid_targets_min": 585 | |
| }, | |
| { | |
| "epoch": 0.20246478873239437, | |
| "grad_norm": 0.51118334276774, | |
| "learning_rate": 1.1457286432160805e-05, | |
| "loss": 0.3036, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33843207359313965, | |
| "step": 115, | |
| "valid_targets_mean": 4561.2, | |
| "valid_targets_min": 1263 | |
| }, | |
| { | |
| "epoch": 0.2112676056338028, | |
| "grad_norm": 0.4996793116846815, | |
| "learning_rate": 1.1959798994974876e-05, | |
| "loss": 0.3068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34131181240081787, | |
| "step": 120, | |
| "valid_targets_mean": 5550.4, | |
| "valid_targets_min": 659 | |
| }, | |
| { | |
| "epoch": 0.22007042253521128, | |
| "grad_norm": 0.5706858209151635, | |
| "learning_rate": 1.2462311557788947e-05, | |
| "loss": 0.3086, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30767911672592163, | |
| "step": 125, | |
| "valid_targets_mean": 3704.9, | |
| "valid_targets_min": 433 | |
| }, | |
| { | |
| "epoch": 0.22887323943661972, | |
| "grad_norm": 0.4623490905699537, | |
| "learning_rate": 1.2964824120603017e-05, | |
| "loss": 0.313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3191690444946289, | |
| "step": 130, | |
| "valid_targets_mean": 6061.6, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 0.23767605633802816, | |
| "grad_norm": 0.4835203190001623, | |
| "learning_rate": 1.3467336683417087e-05, | |
| "loss": 0.3229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3369474709033966, | |
| "step": 135, | |
| "valid_targets_mean": 6046.4, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 0.24647887323943662, | |
| "grad_norm": 0.5507070791685443, | |
| "learning_rate": 1.3969849246231157e-05, | |
| "loss": 0.3142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27029144763946533, | |
| "step": 140, | |
| "valid_targets_mean": 3615.1, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 0.25528169014084506, | |
| "grad_norm": 0.5668675656789365, | |
| "learning_rate": 1.4472361809045228e-05, | |
| "loss": 0.3246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3400095999240875, | |
| "step": 145, | |
| "valid_targets_mean": 5495.3, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 0.2640845070422535, | |
| "grad_norm": 0.6127079156137091, | |
| "learning_rate": 1.4974874371859299e-05, | |
| "loss": 0.2966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28765174746513367, | |
| "step": 150, | |
| "valid_targets_mean": 3895.4, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 0.272887323943662, | |
| "grad_norm": 0.4860501748678822, | |
| "learning_rate": 1.547738693467337e-05, | |
| "loss": 0.3044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2345464527606964, | |
| "step": 155, | |
| "valid_targets_mean": 4808.2, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 0.28169014084507044, | |
| "grad_norm": 1.5528426414872272, | |
| "learning_rate": 1.5979899497487437e-05, | |
| "loss": 0.3073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30732953548431396, | |
| "step": 160, | |
| "valid_targets_mean": 4137.6, | |
| "valid_targets_min": 2045 | |
| }, | |
| { | |
| "epoch": 0.2904929577464789, | |
| "grad_norm": 0.5814494425469521, | |
| "learning_rate": 1.6482412060301508e-05, | |
| "loss": 0.2912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.299929141998291, | |
| "step": 165, | |
| "valid_targets_mean": 3847.6, | |
| "valid_targets_min": 804 | |
| }, | |
| { | |
| "epoch": 0.2992957746478873, | |
| "grad_norm": 0.5581924010115226, | |
| "learning_rate": 1.698492462311558e-05, | |
| "loss": 0.2879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2811720371246338, | |
| "step": 170, | |
| "valid_targets_mean": 4817.3, | |
| "valid_targets_min": 1695 | |
| }, | |
| { | |
| "epoch": 0.30809859154929575, | |
| "grad_norm": 0.507653886741546, | |
| "learning_rate": 1.748743718592965e-05, | |
| "loss": 0.2886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2968273162841797, | |
| "step": 175, | |
| "valid_targets_mean": 5214.4, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 0.31690140845070425, | |
| "grad_norm": 0.5513424359567471, | |
| "learning_rate": 1.798994974874372e-05, | |
| "loss": 0.2916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31301823258399963, | |
| "step": 180, | |
| "valid_targets_mean": 4700.6, | |
| "valid_targets_min": 2080 | |
| }, | |
| { | |
| "epoch": 0.3257042253521127, | |
| "grad_norm": 0.5909380954222354, | |
| "learning_rate": 1.8492462311557792e-05, | |
| "loss": 0.2818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2663402557373047, | |
| "step": 185, | |
| "valid_targets_mean": 3326.8, | |
| "valid_targets_min": 640 | |
| }, | |
| { | |
| "epoch": 0.3345070422535211, | |
| "grad_norm": 0.587063206779181, | |
| "learning_rate": 1.899497487437186e-05, | |
| "loss": 0.2872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3252738118171692, | |
| "step": 190, | |
| "valid_targets_mean": 4535.1, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 0.34330985915492956, | |
| "grad_norm": 0.583654689601876, | |
| "learning_rate": 1.949748743718593e-05, | |
| "loss": 0.2792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3378089368343353, | |
| "step": 195, | |
| "valid_targets_mean": 4562.3, | |
| "valid_targets_min": 1960 | |
| }, | |
| { | |
| "epoch": 0.352112676056338, | |
| "grad_norm": 0.5705929116699273, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3940221071243286, | |
| "step": 200, | |
| "valid_targets_mean": 4847.8, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 0.3609154929577465, | |
| "grad_norm": 0.7617525274561537, | |
| "learning_rate": 2.0502512562814073e-05, | |
| "loss": 0.2871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32728853821754456, | |
| "step": 205, | |
| "valid_targets_mean": 4190.1, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 0.36971830985915494, | |
| "grad_norm": 0.5899256364911575, | |
| "learning_rate": 2.1005025125628144e-05, | |
| "loss": 0.2856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2600407600402832, | |
| "step": 210, | |
| "valid_targets_mean": 4277.6, | |
| "valid_targets_min": 1996 | |
| }, | |
| { | |
| "epoch": 0.3785211267605634, | |
| "grad_norm": 0.5344559764470844, | |
| "learning_rate": 2.150753768844221e-05, | |
| "loss": 0.2602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26998916268348694, | |
| "step": 215, | |
| "valid_targets_mean": 5398.9, | |
| "valid_targets_min": 1750 | |
| }, | |
| { | |
| "epoch": 0.3873239436619718, | |
| "grad_norm": 0.5275596893119097, | |
| "learning_rate": 2.2010050251256282e-05, | |
| "loss": 0.2832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27641168236732483, | |
| "step": 220, | |
| "valid_targets_mean": 4865.1, | |
| "valid_targets_min": 2576 | |
| }, | |
| { | |
| "epoch": 0.3961267605633803, | |
| "grad_norm": 0.5380817473803088, | |
| "learning_rate": 2.2512562814070353e-05, | |
| "loss": 0.2871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29618871212005615, | |
| "step": 225, | |
| "valid_targets_mean": 4623.6, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 0.40492957746478875, | |
| "grad_norm": 0.6198540130720901, | |
| "learning_rate": 2.3015075376884424e-05, | |
| "loss": 0.2886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33167192339897156, | |
| "step": 230, | |
| "valid_targets_mean": 5299.0, | |
| "valid_targets_min": 379 | |
| }, | |
| { | |
| "epoch": 0.4137323943661972, | |
| "grad_norm": 0.5415300304921695, | |
| "learning_rate": 2.3517587939698495e-05, | |
| "loss": 0.2821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26257216930389404, | |
| "step": 235, | |
| "valid_targets_mean": 3679.8, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 0.4225352112676056, | |
| "grad_norm": 0.524302812838288, | |
| "learning_rate": 2.4020100502512566e-05, | |
| "loss": 0.2697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2730346620082855, | |
| "step": 240, | |
| "valid_targets_mean": 5045.1, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 0.43133802816901406, | |
| "grad_norm": 0.7846450130079605, | |
| "learning_rate": 2.4522613065326634e-05, | |
| "loss": 0.2875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28657352924346924, | |
| "step": 245, | |
| "valid_targets_mean": 4687.1, | |
| "valid_targets_min": 1973 | |
| }, | |
| { | |
| "epoch": 0.44014084507042256, | |
| "grad_norm": 0.5431571545353111, | |
| "learning_rate": 2.5025125628140705e-05, | |
| "loss": 0.2679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.284923791885376, | |
| "step": 250, | |
| "valid_targets_mean": 5064.8, | |
| "valid_targets_min": 1809 | |
| }, | |
| { | |
| "epoch": 0.448943661971831, | |
| "grad_norm": 0.537008517658493, | |
| "learning_rate": 2.5527638190954776e-05, | |
| "loss": 0.2813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30335095524787903, | |
| "step": 255, | |
| "valid_targets_mean": 4442.1, | |
| "valid_targets_min": 985 | |
| }, | |
| { | |
| "epoch": 0.45774647887323944, | |
| "grad_norm": 1.196897661508035, | |
| "learning_rate": 2.6030150753768847e-05, | |
| "loss": 0.2843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31562530994415283, | |
| "step": 260, | |
| "valid_targets_mean": 4382.1, | |
| "valid_targets_min": 451 | |
| }, | |
| { | |
| "epoch": 0.4665492957746479, | |
| "grad_norm": 0.5082875513740519, | |
| "learning_rate": 2.6532663316582917e-05, | |
| "loss": 0.2545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2692258954048157, | |
| "step": 265, | |
| "valid_targets_mean": 4830.2, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 0.4753521126760563, | |
| "grad_norm": 0.5059502507346698, | |
| "learning_rate": 2.7035175879396985e-05, | |
| "loss": 0.3004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27087312936782837, | |
| "step": 270, | |
| "valid_targets_mean": 4921.0, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 0.4841549295774648, | |
| "grad_norm": 0.5711706786115346, | |
| "learning_rate": 2.7537688442211056e-05, | |
| "loss": 0.2801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3497253358364105, | |
| "step": 275, | |
| "valid_targets_mean": 4463.8, | |
| "valid_targets_min": 1833 | |
| }, | |
| { | |
| "epoch": 0.49295774647887325, | |
| "grad_norm": 0.5684640575958502, | |
| "learning_rate": 2.8040201005025127e-05, | |
| "loss": 0.2815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26423439383506775, | |
| "step": 280, | |
| "valid_targets_mean": 5263.6, | |
| "valid_targets_min": 2632 | |
| }, | |
| { | |
| "epoch": 0.5017605633802817, | |
| "grad_norm": 0.6825291711037772, | |
| "learning_rate": 2.8542713567839198e-05, | |
| "loss": 0.258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3055792450904846, | |
| "step": 285, | |
| "valid_targets_mean": 5140.0, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 0.5105633802816901, | |
| "grad_norm": 0.5095026224798578, | |
| "learning_rate": 2.904522613065327e-05, | |
| "loss": 0.2617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28127652406692505, | |
| "step": 290, | |
| "valid_targets_mean": 4741.3, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 0.5193661971830986, | |
| "grad_norm": 0.52227659529817, | |
| "learning_rate": 2.954773869346734e-05, | |
| "loss": 0.2799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29181957244873047, | |
| "step": 295, | |
| "valid_targets_mean": 5472.4, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 0.528169014084507, | |
| "grad_norm": 0.6305170958999824, | |
| "learning_rate": 3.0050251256281408e-05, | |
| "loss": 0.2717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27128323912620544, | |
| "step": 300, | |
| "valid_targets_mean": 3259.1, | |
| "valid_targets_min": 492 | |
| }, | |
| { | |
| "epoch": 0.5369718309859155, | |
| "grad_norm": 0.5411347226150495, | |
| "learning_rate": 3.055276381909548e-05, | |
| "loss": 0.2723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28141260147094727, | |
| "step": 305, | |
| "valid_targets_mean": 5473.5, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 0.545774647887324, | |
| "grad_norm": 0.5585384126215591, | |
| "learning_rate": 3.1055276381909546e-05, | |
| "loss": 0.2471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25099295377731323, | |
| "step": 310, | |
| "valid_targets_mean": 5043.1, | |
| "valid_targets_min": 2036 | |
| }, | |
| { | |
| "epoch": 0.5545774647887324, | |
| "grad_norm": 0.5520768626804684, | |
| "learning_rate": 3.155778894472362e-05, | |
| "loss": 0.2761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25178515911102295, | |
| "step": 315, | |
| "valid_targets_mean": 3727.9, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 0.5633802816901409, | |
| "grad_norm": 0.5126128227570901, | |
| "learning_rate": 3.206030150753769e-05, | |
| "loss": 0.2777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35302281379699707, | |
| "step": 320, | |
| "valid_targets_mean": 6414.8, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 0.5721830985915493, | |
| "grad_norm": 0.5534748906956227, | |
| "learning_rate": 3.256281407035176e-05, | |
| "loss": 0.2567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26682478189468384, | |
| "step": 325, | |
| "valid_targets_mean": 5330.0, | |
| "valid_targets_min": 1430 | |
| }, | |
| { | |
| "epoch": 0.5809859154929577, | |
| "grad_norm": 0.4674378490529338, | |
| "learning_rate": 3.306532663316583e-05, | |
| "loss": 0.2697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25621142983436584, | |
| "step": 330, | |
| "valid_targets_mean": 5545.2, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 0.5897887323943662, | |
| "grad_norm": 0.4455274032323051, | |
| "learning_rate": 3.3567839195979904e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20596367120742798, | |
| "step": 335, | |
| "valid_targets_mean": 5238.6, | |
| "valid_targets_min": 1883 | |
| }, | |
| { | |
| "epoch": 0.5985915492957746, | |
| "grad_norm": 0.5586543702506338, | |
| "learning_rate": 3.407035175879397e-05, | |
| "loss": 0.2608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26900315284729004, | |
| "step": 340, | |
| "valid_targets_mean": 4538.9, | |
| "valid_targets_min": 544 | |
| }, | |
| { | |
| "epoch": 0.6073943661971831, | |
| "grad_norm": 0.5472520378158361, | |
| "learning_rate": 3.457286432160804e-05, | |
| "loss": 0.24, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2581064999103546, | |
| "step": 345, | |
| "valid_targets_mean": 5341.1, | |
| "valid_targets_min": 583 | |
| }, | |
| { | |
| "epoch": 0.6161971830985915, | |
| "grad_norm": 0.5878362015624378, | |
| "learning_rate": 3.5075376884422114e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2711326479911804, | |
| "step": 350, | |
| "valid_targets_mean": 3861.2, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.4492362797332961, | |
| "learning_rate": 3.557788944723618e-05, | |
| "loss": 0.2672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24050337076187134, | |
| "step": 355, | |
| "valid_targets_mean": 5894.1, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 0.6338028169014085, | |
| "grad_norm": 0.6197982000033839, | |
| "learning_rate": 3.6080402010050256e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27513647079467773, | |
| "step": 360, | |
| "valid_targets_mean": 4145.1, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 0.6426056338028169, | |
| "grad_norm": 0.47531652803416163, | |
| "learning_rate": 3.658291457286432e-05, | |
| "loss": 0.2367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22800657153129578, | |
| "step": 365, | |
| "valid_targets_mean": 5027.9, | |
| "valid_targets_min": 736 | |
| }, | |
| { | |
| "epoch": 0.6514084507042254, | |
| "grad_norm": 0.5014597674586907, | |
| "learning_rate": 3.708542713567839e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2688424587249756, | |
| "step": 370, | |
| "valid_targets_mean": 5159.4, | |
| "valid_targets_min": 1327 | |
| }, | |
| { | |
| "epoch": 0.6602112676056338, | |
| "grad_norm": 0.524210165596564, | |
| "learning_rate": 3.7587939698492465e-05, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2499430626630783, | |
| "step": 375, | |
| "valid_targets_mean": 4569.6, | |
| "valid_targets_min": 1091 | |
| }, | |
| { | |
| "epoch": 0.6690140845070423, | |
| "grad_norm": 0.5181535233367678, | |
| "learning_rate": 3.809045226130653e-05, | |
| "loss": 0.2683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22815370559692383, | |
| "step": 380, | |
| "valid_targets_mean": 4789.3, | |
| "valid_targets_min": 1512 | |
| }, | |
| { | |
| "epoch": 0.6778169014084507, | |
| "grad_norm": 0.49447196223209644, | |
| "learning_rate": 3.859296482412061e-05, | |
| "loss": 0.2524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25147581100463867, | |
| "step": 385, | |
| "valid_targets_mean": 5408.9, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 0.6866197183098591, | |
| "grad_norm": 0.5087914785002985, | |
| "learning_rate": 3.9095477386934675e-05, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23765499889850616, | |
| "step": 390, | |
| "valid_targets_mean": 4823.6, | |
| "valid_targets_min": 2241 | |
| }, | |
| { | |
| "epoch": 0.6954225352112676, | |
| "grad_norm": 0.4900531372917739, | |
| "learning_rate": 3.959798994974874e-05, | |
| "loss": 0.2605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23152068257331848, | |
| "step": 395, | |
| "valid_targets_mean": 5502.6, | |
| "valid_targets_min": 427 | |
| }, | |
| { | |
| "epoch": 0.704225352112676, | |
| "grad_norm": 0.582395815955091, | |
| "learning_rate": 3.9999992290627244e-05, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24347423017024994, | |
| "step": 400, | |
| "valid_targets_mean": 4496.8, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 0.7130281690140845, | |
| "grad_norm": 0.5382831143316139, | |
| "learning_rate": 3.999972246320468e-05, | |
| "loss": 0.2695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29593929648399353, | |
| "step": 405, | |
| "valid_targets_mean": 4739.4, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 0.721830985915493, | |
| "grad_norm": 0.7359123152928873, | |
| "learning_rate": 3.99990671730875e-05, | |
| "loss": 0.2344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20520956814289093, | |
| "step": 410, | |
| "valid_targets_mean": 4415.9, | |
| "valid_targets_min": 486 | |
| }, | |
| { | |
| "epoch": 0.7306338028169014, | |
| "grad_norm": 0.5046858739217256, | |
| "learning_rate": 3.9998026432905376e-05, | |
| "loss": 0.2649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2093554437160492, | |
| "step": 415, | |
| "valid_targets_mean": 4331.2, | |
| "valid_targets_min": 2462 | |
| }, | |
| { | |
| "epoch": 0.7394366197183099, | |
| "grad_norm": 0.5286650897836068, | |
| "learning_rate": 3.9996600262716914e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23804761469364166, | |
| "step": 420, | |
| "valid_targets_mean": 5363.2, | |
| "valid_targets_min": 692 | |
| }, | |
| { | |
| "epoch": 0.7482394366197183, | |
| "grad_norm": 0.47244207192656257, | |
| "learning_rate": 3.999478869000926e-05, | |
| "loss": 0.2554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24052928388118744, | |
| "step": 425, | |
| "valid_targets_mean": 5206.3, | |
| "valid_targets_min": 910 | |
| }, | |
| { | |
| "epoch": 0.7570422535211268, | |
| "grad_norm": 0.5355124139212174, | |
| "learning_rate": 3.999259174969759e-05, | |
| "loss": 0.2421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2906588912010193, | |
| "step": 430, | |
| "valid_targets_mean": 5409.2, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 0.7658450704225352, | |
| "grad_norm": 0.7127626956644051, | |
| "learning_rate": 3.999000948412441e-05, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22409944236278534, | |
| "step": 435, | |
| "valid_targets_mean": 4503.0, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 0.7746478873239436, | |
| "grad_norm": 0.5072122329423827, | |
| "learning_rate": 3.9987041943058776e-05, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2065044492483139, | |
| "step": 440, | |
| "valid_targets_mean": 4779.9, | |
| "valid_targets_min": 268 | |
| }, | |
| { | |
| "epoch": 0.7834507042253521, | |
| "grad_norm": 0.515955145826151, | |
| "learning_rate": 3.998368918369529e-05, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2384430468082428, | |
| "step": 445, | |
| "valid_targets_mean": 4364.9, | |
| "valid_targets_min": 589 | |
| }, | |
| { | |
| "epoch": 0.7922535211267606, | |
| "grad_norm": 0.4435578084444248, | |
| "learning_rate": 3.997995127065303e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2372359335422516, | |
| "step": 450, | |
| "valid_targets_mean": 5156.6, | |
| "valid_targets_min": 2336 | |
| }, | |
| { | |
| "epoch": 0.801056338028169, | |
| "grad_norm": 0.43835390515377953, | |
| "learning_rate": 3.9975828275974306e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22686849534511566, | |
| "step": 455, | |
| "valid_targets_mean": 5477.2, | |
| "valid_targets_min": 1780 | |
| }, | |
| { | |
| "epoch": 0.8098591549295775, | |
| "grad_norm": 0.5036641539726763, | |
| "learning_rate": 3.997132027912324e-05, | |
| "loss": 0.2306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2819773554801941, | |
| "step": 460, | |
| "valid_targets_mean": 5394.2, | |
| "valid_targets_min": 2665 | |
| }, | |
| { | |
| "epoch": 0.8186619718309859, | |
| "grad_norm": 0.4496099276092807, | |
| "learning_rate": 3.9966427366984286e-05, | |
| "loss": 0.2307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20520561933517456, | |
| "step": 465, | |
| "valid_targets_mean": 4914.4, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 0.8274647887323944, | |
| "grad_norm": 0.44093143015988556, | |
| "learning_rate": 3.996114963386049e-05, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2057202160358429, | |
| "step": 470, | |
| "valid_targets_mean": 5903.8, | |
| "valid_targets_min": 914 | |
| }, | |
| { | |
| "epoch": 0.8362676056338029, | |
| "grad_norm": 0.5051359402153403, | |
| "learning_rate": 3.995548718147173e-05, | |
| "loss": 0.2389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2304094135761261, | |
| "step": 475, | |
| "valid_targets_mean": 4047.7, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 0.8450704225352113, | |
| "grad_norm": 0.5237330063023291, | |
| "learning_rate": 3.994944011895275e-05, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21810948848724365, | |
| "step": 480, | |
| "valid_targets_mean": 4656.4, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 0.8538732394366197, | |
| "grad_norm": 0.490230042513615, | |
| "learning_rate": 3.994300856285098e-05, | |
| "loss": 0.2454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2417060285806656, | |
| "step": 485, | |
| "valid_targets_mean": 4650.2, | |
| "valid_targets_min": 2506 | |
| }, | |
| { | |
| "epoch": 0.8626760563380281, | |
| "grad_norm": 0.567809849324794, | |
| "learning_rate": 3.993619263712442e-05, | |
| "loss": 0.2382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25893551111221313, | |
| "step": 490, | |
| "valid_targets_mean": 4158.1, | |
| "valid_targets_min": 755 | |
| }, | |
| { | |
| "epoch": 0.8714788732394366, | |
| "grad_norm": 0.547288986993124, | |
| "learning_rate": 3.992899247313912e-05, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2932701110839844, | |
| "step": 495, | |
| "valid_targets_mean": 4670.7, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 0.8802816901408451, | |
| "grad_norm": 0.5445160842638482, | |
| "learning_rate": 3.9921408209666766e-05, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2491871565580368, | |
| "step": 500, | |
| "valid_targets_mean": 4544.2, | |
| "valid_targets_min": 2346 | |
| }, | |
| { | |
| "epoch": 0.8890845070422535, | |
| "grad_norm": 0.4539332692148032, | |
| "learning_rate": 3.9913439992881893e-05, | |
| "loss": 0.283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25512731075286865, | |
| "step": 505, | |
| "valid_targets_mean": 5475.4, | |
| "valid_targets_min": 531 | |
| }, | |
| { | |
| "epoch": 0.897887323943662, | |
| "grad_norm": 0.46236271387736466, | |
| "learning_rate": 3.990508797635917e-05, | |
| "loss": 0.2281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20666904747486115, | |
| "step": 510, | |
| "valid_targets_mean": 5475.8, | |
| "valid_targets_min": 1947 | |
| }, | |
| { | |
| "epoch": 0.9066901408450704, | |
| "grad_norm": 0.566290779990144, | |
| "learning_rate": 3.989635232107034e-05, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23342043161392212, | |
| "step": 515, | |
| "valid_targets_mean": 4323.0, | |
| "valid_targets_min": 907 | |
| }, | |
| { | |
| "epoch": 0.9154929577464789, | |
| "grad_norm": 0.515445626152835, | |
| "learning_rate": 3.988723319538123e-05, | |
| "loss": 0.2357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2399309128522873, | |
| "step": 520, | |
| "valid_targets_mean": 4221.9, | |
| "valid_targets_min": 500 | |
| }, | |
| { | |
| "epoch": 0.9242957746478874, | |
| "grad_norm": 0.6225382588242809, | |
| "learning_rate": 3.98777307750484e-05, | |
| "loss": 0.2182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19136911630630493, | |
| "step": 525, | |
| "valid_targets_mean": 4048.9, | |
| "valid_targets_min": 570 | |
| }, | |
| { | |
| "epoch": 0.9330985915492958, | |
| "grad_norm": 0.5246925951143135, | |
| "learning_rate": 3.9867845243215835e-05, | |
| "loss": 0.2306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1907949447631836, | |
| "step": 530, | |
| "valid_targets_mean": 5354.9, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 0.9419014084507042, | |
| "grad_norm": 0.47323047082010583, | |
| "learning_rate": 3.9857576790411346e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24602621793746948, | |
| "step": 535, | |
| "valid_targets_mean": 5749.2, | |
| "valid_targets_min": 2883 | |
| }, | |
| { | |
| "epoch": 0.9507042253521126, | |
| "grad_norm": 0.44994859348655813, | |
| "learning_rate": 3.9846925614542964e-05, | |
| "loss": 0.2459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23070791363716125, | |
| "step": 540, | |
| "valid_targets_mean": 5560.1, | |
| "valid_targets_min": 544 | |
| }, | |
| { | |
| "epoch": 0.9595070422535211, | |
| "grad_norm": 0.5740591782280486, | |
| "learning_rate": 3.983589192089509e-05, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22747305035591125, | |
| "step": 545, | |
| "valid_targets_mean": 4489.4, | |
| "valid_targets_min": 1882 | |
| }, | |
| { | |
| "epoch": 0.9683098591549296, | |
| "grad_norm": 0.4837497218572469, | |
| "learning_rate": 3.9824475922124535e-05, | |
| "loss": 0.2248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22617879509925842, | |
| "step": 550, | |
| "valid_targets_mean": 5082.2, | |
| "valid_targets_min": 713 | |
| }, | |
| { | |
| "epoch": 0.977112676056338, | |
| "grad_norm": 0.49076782302428185, | |
| "learning_rate": 3.981267783825643e-05, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27261850237846375, | |
| "step": 555, | |
| "valid_targets_mean": 5169.0, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 0.9859154929577465, | |
| "grad_norm": 0.4301727643509022, | |
| "learning_rate": 3.980049789667999e-05, | |
| "loss": 0.2433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22846448421478271, | |
| "step": 560, | |
| "valid_targets_mean": 5574.0, | |
| "valid_targets_min": 1833 | |
| }, | |
| { | |
| "epoch": 0.9947183098591549, | |
| "grad_norm": 0.4788086337558212, | |
| "learning_rate": 3.9787936332144134e-05, | |
| "loss": 0.2305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2420114129781723, | |
| "step": 565, | |
| "valid_targets_mean": 5091.4, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 1.0035211267605635, | |
| "grad_norm": 0.5113255982872339, | |
| "learning_rate": 3.9774993386752945e-05, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2099926769733429, | |
| "step": 570, | |
| "valid_targets_mean": 4975.6, | |
| "valid_targets_min": 1552 | |
| }, | |
| { | |
| "epoch": 1.0123239436619718, | |
| "grad_norm": 0.5455821171225294, | |
| "learning_rate": 3.976166930996102e-05, | |
| "loss": 0.2077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22108103334903717, | |
| "step": 575, | |
| "valid_targets_mean": 4464.4, | |
| "valid_targets_min": 2642 | |
| }, | |
| { | |
| "epoch": 1.0211267605633803, | |
| "grad_norm": 0.5206548185744108, | |
| "learning_rate": 3.974796435856863e-05, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23671972751617432, | |
| "step": 580, | |
| "valid_targets_mean": 4264.8, | |
| "valid_targets_min": 683 | |
| }, | |
| { | |
| "epoch": 1.0299295774647887, | |
| "grad_norm": 0.6079214126020569, | |
| "learning_rate": 3.973387879671684e-05, | |
| "loss": 0.2385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25681987404823303, | |
| "step": 585, | |
| "valid_targets_mean": 4879.8, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 1.0387323943661972, | |
| "grad_norm": 0.5616968936192965, | |
| "learning_rate": 3.971941289588234e-05, | |
| "loss": 0.2037, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20700107514858246, | |
| "step": 590, | |
| "valid_targets_mean": 4490.8, | |
| "valid_targets_min": 714 | |
| }, | |
| { | |
| "epoch": 1.0475352112676057, | |
| "grad_norm": 0.5489518745001745, | |
| "learning_rate": 3.970456693487225e-05, | |
| "loss": 0.214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2513611614704132, | |
| "step": 595, | |
| "valid_targets_mean": 4621.5, | |
| "valid_targets_min": 665 | |
| }, | |
| { | |
| "epoch": 1.056338028169014, | |
| "grad_norm": 0.5670215595331147, | |
| "learning_rate": 3.968934119981875e-05, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23917743563652039, | |
| "step": 600, | |
| "valid_targets_mean": 4233.6, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 1.0651408450704225, | |
| "grad_norm": 1.1483050984798748, | |
| "learning_rate": 3.967373598417355e-05, | |
| "loss": 0.2561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23130115866661072, | |
| "step": 605, | |
| "valid_targets_mean": 3843.0, | |
| "valid_targets_min": 681 | |
| }, | |
| { | |
| "epoch": 1.073943661971831, | |
| "grad_norm": 0.5145561222605187, | |
| "learning_rate": 3.965775158870226e-05, | |
| "loss": 0.2196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2029074728488922, | |
| "step": 610, | |
| "valid_targets_mean": 4708.7, | |
| "valid_targets_min": 1181 | |
| }, | |
| { | |
| "epoch": 1.0827464788732395, | |
| "grad_norm": 0.4647326120650161, | |
| "learning_rate": 3.964138832147856e-05, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22910724580287933, | |
| "step": 615, | |
| "valid_targets_mean": 5499.4, | |
| "valid_targets_min": 1683 | |
| }, | |
| { | |
| "epoch": 1.091549295774648, | |
| "grad_norm": 0.5525909901030697, | |
| "learning_rate": 3.962464649787827e-05, | |
| "loss": 0.2254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22029630839824677, | |
| "step": 620, | |
| "valid_targets_mean": 4462.5, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 1.1003521126760563, | |
| "grad_norm": 0.5094271862356494, | |
| "learning_rate": 3.960752644057329e-05, | |
| "loss": 0.2235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21012914180755615, | |
| "step": 625, | |
| "valid_targets_mean": 4739.3, | |
| "valid_targets_min": 2127 | |
| }, | |
| { | |
| "epoch": 1.1091549295774648, | |
| "grad_norm": 0.4516003323282682, | |
| "learning_rate": 3.9590028479525384e-05, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19071654975414276, | |
| "step": 630, | |
| "valid_targets_mean": 5580.1, | |
| "valid_targets_min": 1963 | |
| }, | |
| { | |
| "epoch": 1.1179577464788732, | |
| "grad_norm": 0.5011224155209435, | |
| "learning_rate": 3.957215295197978e-05, | |
| "loss": 0.2422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25898879766464233, | |
| "step": 635, | |
| "valid_targets_mean": 5126.0, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 1.1267605633802817, | |
| "grad_norm": 0.5039907393850435, | |
| "learning_rate": 3.955390020245872e-05, | |
| "loss": 0.1944, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18999603390693665, | |
| "step": 640, | |
| "valid_targets_mean": 4612.4, | |
| "valid_targets_min": 2116 | |
| }, | |
| { | |
| "epoch": 1.1355633802816902, | |
| "grad_norm": 0.5805999238443137, | |
| "learning_rate": 3.953527058275476e-05, | |
| "loss": 0.2216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21587485074996948, | |
| "step": 645, | |
| "valid_targets_mean": 4771.2, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 1.1443661971830985, | |
| "grad_norm": 0.47186749218482454, | |
| "learning_rate": 3.951626445192409e-05, | |
| "loss": 0.2227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2083638608455658, | |
| "step": 650, | |
| "valid_targets_mean": 4932.9, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 1.153169014084507, | |
| "grad_norm": 0.5693004794120954, | |
| "learning_rate": 3.949688217627949e-05, | |
| "loss": 0.2147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24248987436294556, | |
| "step": 655, | |
| "valid_targets_mean": 5795.4, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 1.1619718309859155, | |
| "grad_norm": 0.5512002280757388, | |
| "learning_rate": 3.947712412938336e-05, | |
| "loss": 0.2182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21828481554985046, | |
| "step": 660, | |
| "valid_targets_mean": 4010.2, | |
| "valid_targets_min": 1555 | |
| }, | |
| { | |
| "epoch": 1.170774647887324, | |
| "grad_norm": 0.4909486894085092, | |
| "learning_rate": 3.945699069204049e-05, | |
| "loss": 0.2174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22219449281692505, | |
| "step": 665, | |
| "valid_targets_mean": 6123.8, | |
| "valid_targets_min": 2410 | |
| }, | |
| { | |
| "epoch": 1.1795774647887325, | |
| "grad_norm": 0.5266150758742527, | |
| "learning_rate": 3.9436482252290706e-05, | |
| "loss": 0.2305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23306189477443695, | |
| "step": 670, | |
| "valid_targets_mean": 4625.8, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 1.1883802816901408, | |
| "grad_norm": 0.6406249976182479, | |
| "learning_rate": 3.9415599205401424e-05, | |
| "loss": 0.2107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2163911908864975, | |
| "step": 675, | |
| "valid_targets_mean": 3297.5, | |
| "valid_targets_min": 939 | |
| }, | |
| { | |
| "epoch": 1.1971830985915493, | |
| "grad_norm": 0.48236609850738654, | |
| "learning_rate": 3.939434195385999e-05, | |
| "loss": 0.2154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20020179450511932, | |
| "step": 680, | |
| "valid_targets_mean": 4420.4, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 1.2059859154929577, | |
| "grad_norm": 0.44578037118443625, | |
| "learning_rate": 3.937271090736599e-05, | |
| "loss": 0.2126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18908943235874176, | |
| "step": 685, | |
| "valid_targets_mean": 5700.2, | |
| "valid_targets_min": 2100 | |
| }, | |
| { | |
| "epoch": 1.2147887323943662, | |
| "grad_norm": 0.5545143359344826, | |
| "learning_rate": 3.935070648282325e-05, | |
| "loss": 0.2218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2028113603591919, | |
| "step": 690, | |
| "valid_targets_mean": 3600.1, | |
| "valid_targets_min": 312 | |
| }, | |
| { | |
| "epoch": 1.2235915492957747, | |
| "grad_norm": 0.8682748136776883, | |
| "learning_rate": 3.9328329104331915e-05, | |
| "loss": 0.2119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22751037776470184, | |
| "step": 695, | |
| "valid_targets_mean": 4403.5, | |
| "valid_targets_min": 735 | |
| }, | |
| { | |
| "epoch": 1.232394366197183, | |
| "grad_norm": 0.5059549350889899, | |
| "learning_rate": 3.93055792031802e-05, | |
| "loss": 0.2147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20328596234321594, | |
| "step": 700, | |
| "valid_targets_mean": 5448.2, | |
| "valid_targets_min": 2375 | |
| }, | |
| { | |
| "epoch": 1.2411971830985915, | |
| "grad_norm": 0.467098649475229, | |
| "learning_rate": 3.928245721783609e-05, | |
| "loss": 0.2107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1908690333366394, | |
| "step": 705, | |
| "valid_targets_mean": 4448.2, | |
| "valid_targets_min": 423 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.6868329232637422, | |
| "learning_rate": 3.925896359393891e-05, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2155512571334839, | |
| "step": 710, | |
| "valid_targets_mean": 5583.6, | |
| "valid_targets_min": 805 | |
| }, | |
| { | |
| "epoch": 1.2588028169014085, | |
| "grad_norm": 0.5324370061406835, | |
| "learning_rate": 3.923509878429073e-05, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20743754506111145, | |
| "step": 715, | |
| "valid_targets_mean": 4750.2, | |
| "valid_targets_min": 374 | |
| }, | |
| { | |
| "epoch": 1.267605633802817, | |
| "grad_norm": 0.4542496645790411, | |
| "learning_rate": 3.921086324884762e-05, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19384685158729553, | |
| "step": 720, | |
| "valid_targets_mean": 5024.4, | |
| "valid_targets_min": 2097 | |
| }, | |
| { | |
| "epoch": 1.2764084507042255, | |
| "grad_norm": 0.5104144615485547, | |
| "learning_rate": 3.9186257454710797e-05, | |
| "loss": 0.2257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2204480767250061, | |
| "step": 725, | |
| "valid_targets_mean": 4245.7, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 1.2852112676056338, | |
| "grad_norm": 0.4385414614137968, | |
| "learning_rate": 3.9161281876117635e-05, | |
| "loss": 0.2267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21599560976028442, | |
| "step": 730, | |
| "valid_targets_mean": 6200.0, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 1.2940140845070423, | |
| "grad_norm": 0.5281254448142318, | |
| "learning_rate": 3.91359369944325e-05, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22602570056915283, | |
| "step": 735, | |
| "valid_targets_mean": 5312.4, | |
| "valid_targets_min": 1772 | |
| }, | |
| { | |
| "epoch": 1.3028169014084507, | |
| "grad_norm": 0.5223692928143993, | |
| "learning_rate": 3.911022329813749e-05, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20628994703292847, | |
| "step": 740, | |
| "valid_targets_mean": 3740.8, | |
| "valid_targets_min": 798 | |
| }, | |
| { | |
| "epoch": 1.311619718309859, | |
| "grad_norm": 0.4560693256739805, | |
| "learning_rate": 3.908414128282302e-05, | |
| "loss": 0.2155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2120925784111023, | |
| "step": 745, | |
| "valid_targets_mean": 5150.4, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 1.3204225352112675, | |
| "grad_norm": 0.4808727941832543, | |
| "learning_rate": 3.905769145117825e-05, | |
| "loss": 0.2014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18834513425827026, | |
| "step": 750, | |
| "valid_targets_mean": 4245.9, | |
| "valid_targets_min": 589 | |
| }, | |
| { | |
| "epoch": 1.329225352112676, | |
| "grad_norm": 0.43979832329670726, | |
| "learning_rate": 3.903087431298145e-05, | |
| "loss": 0.214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20950552821159363, | |
| "step": 755, | |
| "valid_targets_mean": 5464.3, | |
| "valid_targets_min": 2042 | |
| }, | |
| { | |
| "epoch": 1.3380281690140845, | |
| "grad_norm": 0.4716317683605266, | |
| "learning_rate": 3.900369038509007e-05, | |
| "loss": 0.1914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19585487246513367, | |
| "step": 760, | |
| "valid_targets_mean": 4635.2, | |
| "valid_targets_min": 325 | |
| }, | |
| { | |
| "epoch": 1.346830985915493, | |
| "grad_norm": 0.5673848569696425, | |
| "learning_rate": 3.8976140191430914e-05, | |
| "loss": 0.2252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23051196336746216, | |
| "step": 765, | |
| "valid_targets_mean": 4124.6, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 1.3556338028169015, | |
| "grad_norm": 0.6316292492021036, | |
| "learning_rate": 3.894822426298994e-05, | |
| "loss": 0.2328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.291012167930603, | |
| "step": 770, | |
| "valid_targets_mean": 2859.3, | |
| "valid_targets_min": 539 | |
| }, | |
| { | |
| "epoch": 1.36443661971831, | |
| "grad_norm": 0.49572660175000344, | |
| "learning_rate": 3.891994313780205e-05, | |
| "loss": 0.2161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21090422570705414, | |
| "step": 775, | |
| "valid_targets_mean": 4825.3, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 1.3732394366197183, | |
| "grad_norm": 0.4781555034756386, | |
| "learning_rate": 3.8891297360940766e-05, | |
| "loss": 0.2056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22165143489837646, | |
| "step": 780, | |
| "valid_targets_mean": 5081.8, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 1.3820422535211268, | |
| "grad_norm": 0.4693572852520603, | |
| "learning_rate": 3.886228748450765e-05, | |
| "loss": 0.2111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2092868685722351, | |
| "step": 785, | |
| "valid_targets_mean": 6787.9, | |
| "valid_targets_min": 2227 | |
| }, | |
| { | |
| "epoch": 1.3908450704225352, | |
| "grad_norm": 0.5223692473907915, | |
| "learning_rate": 3.883291406762173e-05, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23164348304271698, | |
| "step": 790, | |
| "valid_targets_mean": 5997.0, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 1.3996478873239437, | |
| "grad_norm": 0.49730629326213827, | |
| "learning_rate": 3.880317767640867e-05, | |
| "loss": 0.2261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21987201273441315, | |
| "step": 795, | |
| "valid_targets_mean": 4377.4, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 1.408450704225352, | |
| "grad_norm": 0.4816494433225647, | |
| "learning_rate": 3.8773078883989906e-05, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20579151809215546, | |
| "step": 800, | |
| "valid_targets_mean": 5329.0, | |
| "valid_targets_min": 2524 | |
| }, | |
| { | |
| "epoch": 1.4172535211267605, | |
| "grad_norm": 0.44063145977887247, | |
| "learning_rate": 3.874261827047156e-05, | |
| "loss": 0.1957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20126184821128845, | |
| "step": 805, | |
| "valid_targets_mean": 5208.1, | |
| "valid_targets_min": 2885 | |
| }, | |
| { | |
| "epoch": 1.426056338028169, | |
| "grad_norm": 0.5617800144558075, | |
| "learning_rate": 3.8711796422933295e-05, | |
| "loss": 0.2149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2387004941701889, | |
| "step": 810, | |
| "valid_targets_mean": 4011.3, | |
| "valid_targets_min": 421 | |
| }, | |
| { | |
| "epoch": 1.4348591549295775, | |
| "grad_norm": 0.5233628431005589, | |
| "learning_rate": 3.868061393541698e-05, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22744175791740417, | |
| "step": 815, | |
| "valid_targets_mean": 4668.4, | |
| "valid_targets_min": 620 | |
| }, | |
| { | |
| "epoch": 1.443661971830986, | |
| "grad_norm": 0.5079838213583276, | |
| "learning_rate": 3.8649071408915235e-05, | |
| "loss": 0.2104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20706261694431305, | |
| "step": 820, | |
| "valid_targets_mean": 4449.1, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 1.4524647887323945, | |
| "grad_norm": 0.42542897163028004, | |
| "learning_rate": 3.861716945135985e-05, | |
| "loss": 0.1994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22351810336112976, | |
| "step": 825, | |
| "valid_targets_mean": 5907.6, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 1.4612676056338028, | |
| "grad_norm": 0.44963223239992595, | |
| "learning_rate": 3.858490867761009e-05, | |
| "loss": 0.2165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21441945433616638, | |
| "step": 830, | |
| "valid_targets_mean": 4899.6, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 1.4700704225352113, | |
| "grad_norm": 0.51978427932517, | |
| "learning_rate": 3.8552289709440824e-05, | |
| "loss": 0.2069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24671360850334167, | |
| "step": 835, | |
| "valid_targets_mean": 4579.4, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 1.4788732394366197, | |
| "grad_norm": 0.4396942381345711, | |
| "learning_rate": 3.851931317553054e-05, | |
| "loss": 0.24, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22896093130111694, | |
| "step": 840, | |
| "valid_targets_mean": 5062.1, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 1.4876760563380282, | |
| "grad_norm": 0.5270847775016803, | |
| "learning_rate": 3.848597971144924e-05, | |
| "loss": 0.2106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2162398397922516, | |
| "step": 845, | |
| "valid_targets_mean": 4345.8, | |
| "valid_targets_min": 1419 | |
| }, | |
| { | |
| "epoch": 1.4964788732394365, | |
| "grad_norm": 0.605574073436727, | |
| "learning_rate": 3.845228995964619e-05, | |
| "loss": 0.1988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21375906467437744, | |
| "step": 850, | |
| "valid_targets_mean": 4865.7, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 1.505281690140845, | |
| "grad_norm": 0.5257324694731058, | |
| "learning_rate": 3.8418244569437514e-05, | |
| "loss": 0.2058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18651431798934937, | |
| "step": 855, | |
| "valid_targets_mean": 5031.9, | |
| "valid_targets_min": 2162 | |
| }, | |
| { | |
| "epoch": 1.5140845070422535, | |
| "grad_norm": 0.45263098832805065, | |
| "learning_rate": 3.838384419699372e-05, | |
| "loss": 0.1991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1910189688205719, | |
| "step": 860, | |
| "valid_targets_mean": 4728.1, | |
| "valid_targets_min": 1828 | |
| }, | |
| { | |
| "epoch": 1.522887323943662, | |
| "grad_norm": 0.5248983339174231, | |
| "learning_rate": 3.8349089505327014e-05, | |
| "loss": 0.2174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19189196825027466, | |
| "step": 865, | |
| "valid_targets_mean": 4052.7, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 1.5316901408450705, | |
| "grad_norm": 0.5370464550500719, | |
| "learning_rate": 3.831398116427855e-05, | |
| "loss": 0.2123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1951190084218979, | |
| "step": 870, | |
| "valid_targets_mean": 3747.6, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 1.540492957746479, | |
| "grad_norm": 0.4720461398740345, | |
| "learning_rate": 3.827851985050551e-05, | |
| "loss": 0.198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.203897625207901, | |
| "step": 875, | |
| "valid_targets_mean": 5214.4, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 1.5492957746478875, | |
| "grad_norm": 0.4378339783337603, | |
| "learning_rate": 3.824270624746805e-05, | |
| "loss": 0.1891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17079252004623413, | |
| "step": 880, | |
| "valid_targets_mean": 6074.9, | |
| "valid_targets_min": 3028 | |
| }, | |
| { | |
| "epoch": 1.5580985915492958, | |
| "grad_norm": 0.5286999205948212, | |
| "learning_rate": 3.8206541045416144e-05, | |
| "loss": 0.2133, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22686763107776642, | |
| "step": 885, | |
| "valid_targets_mean": 4069.4, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 1.5669014084507042, | |
| "grad_norm": 0.49595604839524876, | |
| "learning_rate": 3.8170024941376284e-05, | |
| "loss": 0.1947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20608092844486237, | |
| "step": 890, | |
| "valid_targets_mean": 4337.1, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 1.5757042253521125, | |
| "grad_norm": 0.5675733044813193, | |
| "learning_rate": 3.813315863913802e-05, | |
| "loss": 0.1948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2143465280532837, | |
| "step": 895, | |
| "valid_targets_mean": 5225.9, | |
| "valid_targets_min": 471 | |
| }, | |
| { | |
| "epoch": 1.584507042253521, | |
| "grad_norm": 0.4330458529814344, | |
| "learning_rate": 3.809594284924043e-05, | |
| "loss": 0.2187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2484540343284607, | |
| "step": 900, | |
| "valid_targets_mean": 5612.8, | |
| "valid_targets_min": 2217 | |
| }, | |
| { | |
| "epoch": 1.5933098591549295, | |
| "grad_norm": 0.578992874473812, | |
| "learning_rate": 3.8058378288958386e-05, | |
| "loss": 0.2154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23618189990520477, | |
| "step": 905, | |
| "valid_targets_mean": 5113.6, | |
| "valid_targets_min": 1939 | |
| }, | |
| { | |
| "epoch": 1.602112676056338, | |
| "grad_norm": 0.45909378691471936, | |
| "learning_rate": 3.802046568228879e-05, | |
| "loss": 0.2411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25820106267929077, | |
| "step": 910, | |
| "valid_targets_mean": 4841.4, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 1.6109154929577465, | |
| "grad_norm": 0.6114531911378174, | |
| "learning_rate": 3.798220575993654e-05, | |
| "loss": 0.2264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19989237189292908, | |
| "step": 915, | |
| "valid_targets_mean": 3742.5, | |
| "valid_targets_min": 758 | |
| }, | |
| { | |
| "epoch": 1.619718309859155, | |
| "grad_norm": 0.4234850316696309, | |
| "learning_rate": 3.7943599259300506e-05, | |
| "loss": 0.2139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18914183974266052, | |
| "step": 920, | |
| "valid_targets_mean": 5731.9, | |
| "valid_targets_min": 2162 | |
| }, | |
| { | |
| "epoch": 1.6285211267605635, | |
| "grad_norm": 0.4618907993792908, | |
| "learning_rate": 3.7904646924459316e-05, | |
| "loss": 0.2302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2006666660308838, | |
| "step": 925, | |
| "valid_targets_mean": 4938.5, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 1.637323943661972, | |
| "grad_norm": 0.4538288767630076, | |
| "learning_rate": 3.786534950615697e-05, | |
| "loss": 0.2071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22163096070289612, | |
| "step": 930, | |
| "valid_targets_mean": 5614.2, | |
| "valid_targets_min": 2693 | |
| }, | |
| { | |
| "epoch": 1.6461267605633803, | |
| "grad_norm": 0.5155818371030004, | |
| "learning_rate": 3.782570776178843e-05, | |
| "loss": 0.198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21787038445472717, | |
| "step": 935, | |
| "valid_targets_mean": 4266.2, | |
| "valid_targets_min": 501 | |
| }, | |
| { | |
| "epoch": 1.6549295774647887, | |
| "grad_norm": 0.5482779852161948, | |
| "learning_rate": 3.778572245538497e-05, | |
| "loss": 0.244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2714841365814209, | |
| "step": 940, | |
| "valid_targets_mean": 5253.0, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 1.663732394366197, | |
| "grad_norm": 0.5338983388238373, | |
| "learning_rate": 3.774539435759948e-05, | |
| "loss": 0.2307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26818227767944336, | |
| "step": 945, | |
| "valid_targets_mean": 4215.9, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 1.6725352112676055, | |
| "grad_norm": 0.5171844654639834, | |
| "learning_rate": 3.7704724245691614e-05, | |
| "loss": 0.2222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19803369045257568, | |
| "step": 950, | |
| "valid_targets_mean": 4312.8, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 1.681338028169014, | |
| "grad_norm": 0.5270077989620978, | |
| "learning_rate": 3.766371290351279e-05, | |
| "loss": 0.2073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.230101078748703, | |
| "step": 955, | |
| "valid_targets_mean": 3824.4, | |
| "valid_targets_min": 492 | |
| }, | |
| { | |
| "epoch": 1.6901408450704225, | |
| "grad_norm": 0.5018219914634549, | |
| "learning_rate": 3.76223611214911e-05, | |
| "loss": 0.1858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19202342629432678, | |
| "step": 960, | |
| "valid_targets_mean": 4567.6, | |
| "valid_targets_min": 1830 | |
| }, | |
| { | |
| "epoch": 1.698943661971831, | |
| "grad_norm": 0.5873952930816221, | |
| "learning_rate": 3.758066969661608e-05, | |
| "loss": 0.1997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2054356038570404, | |
| "step": 965, | |
| "valid_targets_mean": 3330.8, | |
| "valid_targets_min": 660 | |
| }, | |
| { | |
| "epoch": 1.7077464788732395, | |
| "grad_norm": 0.4469723935468415, | |
| "learning_rate": 3.7538639432423317e-05, | |
| "loss": 0.1986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20283865928649902, | |
| "step": 970, | |
| "valid_targets_mean": 4606.4, | |
| "valid_targets_min": 427 | |
| }, | |
| { | |
| "epoch": 1.716549295774648, | |
| "grad_norm": 0.5071183757110289, | |
| "learning_rate": 3.749627113897901e-05, | |
| "loss": 0.2229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2173660397529602, | |
| "step": 975, | |
| "valid_targets_mean": 4447.0, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 1.7253521126760565, | |
| "grad_norm": 0.44271786882568065, | |
| "learning_rate": 3.74535656328643e-05, | |
| "loss": 0.1938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16701915860176086, | |
| "step": 980, | |
| "valid_targets_mean": 4955.2, | |
| "valid_targets_min": 1552 | |
| }, | |
| { | |
| "epoch": 1.7341549295774648, | |
| "grad_norm": 0.5051925040489004, | |
| "learning_rate": 3.7410523737159594e-05, | |
| "loss": 0.2181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21845248341560364, | |
| "step": 985, | |
| "valid_targets_mean": 4356.4, | |
| "valid_targets_min": 1729 | |
| }, | |
| { | |
| "epoch": 1.7429577464788732, | |
| "grad_norm": 0.4999665862967717, | |
| "learning_rate": 3.7367146281428664e-05, | |
| "loss": 0.241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20738720893859863, | |
| "step": 990, | |
| "valid_targets_mean": 4009.4, | |
| "valid_targets_min": 625 | |
| }, | |
| { | |
| "epoch": 1.7517605633802817, | |
| "grad_norm": 0.45180622720383923, | |
| "learning_rate": 3.7323434101702645e-05, | |
| "loss": 0.1959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20294570922851562, | |
| "step": 995, | |
| "valid_targets_mean": 4451.8, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 1.76056338028169, | |
| "grad_norm": 0.45523942481350615, | |
| "learning_rate": 3.7279388040463965e-05, | |
| "loss": 0.2139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19112583994865417, | |
| "step": 1000, | |
| "valid_targets_mean": 5344.2, | |
| "valid_targets_min": 2480 | |
| }, | |
| { | |
| "epoch": 1.7693661971830985, | |
| "grad_norm": 0.4689743343386692, | |
| "learning_rate": 3.723500894663008e-05, | |
| "loss": 0.229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23146376013755798, | |
| "step": 1005, | |
| "valid_targets_mean": 4585.6, | |
| "valid_targets_min": 1820 | |
| }, | |
| { | |
| "epoch": 1.778169014084507, | |
| "grad_norm": 0.49955288120629576, | |
| "learning_rate": 3.719029767553711e-05, | |
| "loss": 0.2182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17736122012138367, | |
| "step": 1010, | |
| "valid_targets_mean": 4380.4, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 1.7869718309859155, | |
| "grad_norm": 0.45721433359799674, | |
| "learning_rate": 3.7145255088923364e-05, | |
| "loss": 0.1941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1835843026638031, | |
| "step": 1015, | |
| "valid_targets_mean": 5043.3, | |
| "valid_targets_min": 1683 | |
| }, | |
| { | |
| "epoch": 1.795774647887324, | |
| "grad_norm": 0.48628338442997404, | |
| "learning_rate": 3.709988205491273e-05, | |
| "loss": 0.1972, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20398110151290894, | |
| "step": 1020, | |
| "valid_targets_mean": 4612.4, | |
| "valid_targets_min": 1576 | |
| }, | |
| { | |
| "epoch": 1.8045774647887325, | |
| "grad_norm": 0.48552802190863287, | |
| "learning_rate": 3.7054179447997946e-05, | |
| "loss": 0.2169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18387073278427124, | |
| "step": 1025, | |
| "valid_targets_mean": 3664.2, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 1.813380281690141, | |
| "grad_norm": 0.5274519846358211, | |
| "learning_rate": 3.700814814902373e-05, | |
| "loss": 0.1986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17659366130828857, | |
| "step": 1030, | |
| "valid_targets_mean": 3654.9, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 1.8221830985915493, | |
| "grad_norm": 0.44939944231195383, | |
| "learning_rate": 3.696178904516982e-05, | |
| "loss": 0.2275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23166455328464508, | |
| "step": 1035, | |
| "valid_targets_mean": 5954.7, | |
| "valid_targets_min": 611 | |
| }, | |
| { | |
| "epoch": 1.8309859154929577, | |
| "grad_norm": 0.38265583071644643, | |
| "learning_rate": 3.691510302993388e-05, | |
| "loss": 0.2223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18971426784992218, | |
| "step": 1040, | |
| "valid_targets_mean": 5800.6, | |
| "valid_targets_min": 1018 | |
| }, | |
| { | |
| "epoch": 1.8397887323943662, | |
| "grad_norm": 1.0688439265612868, | |
| "learning_rate": 3.6868091003114244e-05, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1774442344903946, | |
| "step": 1045, | |
| "valid_targets_mean": 4211.1, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 1.8485915492957745, | |
| "grad_norm": 0.5464881250556777, | |
| "learning_rate": 3.682075387079262e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22237564623355865, | |
| "step": 1050, | |
| "valid_targets_mean": 4522.1, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 1.857394366197183, | |
| "grad_norm": 0.45431289697739147, | |
| "learning_rate": 3.677309254531659e-05, | |
| "loss": 0.2173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2292489856481552, | |
| "step": 1055, | |
| "valid_targets_mean": 5483.9, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 1.8661971830985915, | |
| "grad_norm": 0.5034630184310709, | |
| "learning_rate": 3.672510794528206e-05, | |
| "loss": 0.2035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17410944402217865, | |
| "step": 1060, | |
| "valid_targets_mean": 4404.8, | |
| "valid_targets_min": 1546 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 0.47654676663434603, | |
| "learning_rate": 3.667680099551551e-05, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2440078854560852, | |
| "step": 1065, | |
| "valid_targets_mean": 5339.8, | |
| "valid_targets_min": 605 | |
| }, | |
| { | |
| "epoch": 1.8838028169014085, | |
| "grad_norm": 0.4845998507279822, | |
| "learning_rate": 3.6628172627056234e-05, | |
| "loss": 0.2199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1784711629152298, | |
| "step": 1070, | |
| "valid_targets_mean": 4129.5, | |
| "valid_targets_min": 2110 | |
| }, | |
| { | |
| "epoch": 1.892605633802817, | |
| "grad_norm": 0.44706910475656814, | |
| "learning_rate": 3.6579223777138316e-05, | |
| "loss": 0.2225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21338728070259094, | |
| "step": 1075, | |
| "valid_targets_mean": 4883.5, | |
| "valid_targets_min": 594 | |
| }, | |
| { | |
| "epoch": 1.9014084507042255, | |
| "grad_norm": 0.49241417773864055, | |
| "learning_rate": 3.652995538917263e-05, | |
| "loss": 0.2079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1975412666797638, | |
| "step": 1080, | |
| "valid_targets_mean": 4859.6, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 1.9102112676056338, | |
| "grad_norm": 0.45653222683404704, | |
| "learning_rate": 3.648036841272864e-05, | |
| "loss": 0.2208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2626076638698578, | |
| "step": 1085, | |
| "valid_targets_mean": 6123.1, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 1.9190140845070423, | |
| "grad_norm": 0.48437341945969714, | |
| "learning_rate": 3.643046380351607e-05, | |
| "loss": 0.1889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21245932579040527, | |
| "step": 1090, | |
| "valid_targets_mean": 5122.2, | |
| "valid_targets_min": 2537 | |
| }, | |
| { | |
| "epoch": 1.9278169014084507, | |
| "grad_norm": 0.41090225964849225, | |
| "learning_rate": 3.6380242523366536e-05, | |
| "loss": 0.2072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20476964116096497, | |
| "step": 1095, | |
| "valid_targets_mean": 5502.2, | |
| "valid_targets_min": 2544 | |
| }, | |
| { | |
| "epoch": 1.936619718309859, | |
| "grad_norm": 0.4538228094228837, | |
| "learning_rate": 3.6329705540214973e-05, | |
| "loss": 0.2075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.246622696518898, | |
| "step": 1100, | |
| "valid_targets_mean": 4610.8, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 1.9454225352112675, | |
| "grad_norm": 0.512542850518571, | |
| "learning_rate": 3.627885382808098e-05, | |
| "loss": 0.1969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16053694486618042, | |
| "step": 1105, | |
| "valid_targets_mean": 4350.6, | |
| "valid_targets_min": 232 | |
| }, | |
| { | |
| "epoch": 1.954225352112676, | |
| "grad_norm": 0.5599206252006377, | |
| "learning_rate": 3.622768836705005e-05, | |
| "loss": 0.1889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20071826875209808, | |
| "step": 1110, | |
| "valid_targets_mean": 4423.5, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 1.9630281690140845, | |
| "grad_norm": 0.39357281986184917, | |
| "learning_rate": 3.61762101432547e-05, | |
| "loss": 0.2106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20038862526416779, | |
| "step": 1115, | |
| "valid_targets_mean": 6065.9, | |
| "valid_targets_min": 2605 | |
| }, | |
| { | |
| "epoch": 1.971830985915493, | |
| "grad_norm": 0.45592933490647597, | |
| "learning_rate": 3.6124420148855426e-05, | |
| "loss": 0.2117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21322622895240784, | |
| "step": 1120, | |
| "valid_targets_mean": 4803.8, | |
| "valid_targets_min": 961 | |
| }, | |
| { | |
| "epoch": 1.9806338028169015, | |
| "grad_norm": 0.46062435135629615, | |
| "learning_rate": 3.607231938202163e-05, | |
| "loss": 0.2217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20594629645347595, | |
| "step": 1125, | |
| "valid_targets_mean": 4752.2, | |
| "valid_targets_min": 566 | |
| }, | |
| { | |
| "epoch": 1.98943661971831, | |
| "grad_norm": 0.42107582354643547, | |
| "learning_rate": 3.601990884691235e-05, | |
| "loss": 0.1905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16003745794296265, | |
| "step": 1130, | |
| "valid_targets_mean": 4792.9, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 1.9982394366197183, | |
| "grad_norm": 0.46897891904095135, | |
| "learning_rate": 3.59671895536569e-05, | |
| "loss": 0.213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24454618990421295, | |
| "step": 1135, | |
| "valid_targets_mean": 4876.4, | |
| "valid_targets_min": 2517 | |
| }, | |
| { | |
| "epoch": 2.007042253521127, | |
| "grad_norm": 0.49837158055082453, | |
| "learning_rate": 3.591416251833543e-05, | |
| "loss": 0.1872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14901307225227356, | |
| "step": 1140, | |
| "valid_targets_mean": 4041.9, | |
| "valid_targets_min": 461 | |
| }, | |
| { | |
| "epoch": 2.015845070422535, | |
| "grad_norm": 0.45692015561681837, | |
| "learning_rate": 3.586082876295931e-05, | |
| "loss": 0.1905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.169135183095932, | |
| "step": 1145, | |
| "valid_targets_mean": 4242.6, | |
| "valid_targets_min": 604 | |
| }, | |
| { | |
| "epoch": 2.0246478873239435, | |
| "grad_norm": 0.499679190397603, | |
| "learning_rate": 3.5807189315451456e-05, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19791707396507263, | |
| "step": 1150, | |
| "valid_targets_mean": 4694.9, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 2.033450704225352, | |
| "grad_norm": 0.49516781337844346, | |
| "learning_rate": 3.575324520962652e-05, | |
| "loss": 0.182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18917003273963928, | |
| "step": 1155, | |
| "valid_targets_mean": 4837.4, | |
| "valid_targets_min": 1771 | |
| }, | |
| { | |
| "epoch": 2.0422535211267605, | |
| "grad_norm": 0.47134800999709064, | |
| "learning_rate": 3.569899748517094e-05, | |
| "loss": 0.1829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15303587913513184, | |
| "step": 1160, | |
| "valid_targets_mean": 4030.2, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 2.051056338028169, | |
| "grad_norm": 0.5220164315759099, | |
| "learning_rate": 3.5644447187622937e-05, | |
| "loss": 0.1949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2291077971458435, | |
| "step": 1165, | |
| "valid_targets_mean": 4616.2, | |
| "valid_targets_min": 393 | |
| }, | |
| { | |
| "epoch": 2.0598591549295775, | |
| "grad_norm": 0.8040649563917794, | |
| "learning_rate": 3.558959536835233e-05, | |
| "loss": 0.194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25454702973365784, | |
| "step": 1170, | |
| "valid_targets_mean": 4945.3, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 2.068661971830986, | |
| "grad_norm": 0.4975814009206656, | |
| "learning_rate": 3.553444308454029e-05, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21568721532821655, | |
| "step": 1175, | |
| "valid_targets_mean": 5387.9, | |
| "valid_targets_min": 706 | |
| }, | |
| { | |
| "epoch": 2.0774647887323945, | |
| "grad_norm": 0.5445416705397909, | |
| "learning_rate": 3.5478991399158976e-05, | |
| "loss": 0.1854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1795259714126587, | |
| "step": 1180, | |
| "valid_targets_mean": 4054.5, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 2.086267605633803, | |
| "grad_norm": 0.5196763549644741, | |
| "learning_rate": 3.542324138095101e-05, | |
| "loss": 0.2002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1868497133255005, | |
| "step": 1185, | |
| "valid_targets_mean": 3955.2, | |
| "valid_targets_min": 582 | |
| }, | |
| { | |
| "epoch": 2.0950704225352115, | |
| "grad_norm": 0.4662580722698638, | |
| "learning_rate": 3.536719410440891e-05, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1876038908958435, | |
| "step": 1190, | |
| "valid_targets_mean": 4993.1, | |
| "valid_targets_min": 1420 | |
| }, | |
| { | |
| "epoch": 2.1038732394366195, | |
| "grad_norm": 0.4446598307571566, | |
| "learning_rate": 3.53108506497544e-05, | |
| "loss": 0.1932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25652265548706055, | |
| "step": 1195, | |
| "valid_targets_mean": 5786.4, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 2.112676056338028, | |
| "grad_norm": 0.4750669922082755, | |
| "learning_rate": 3.525421210291752e-05, | |
| "loss": 0.2028, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.194286048412323, | |
| "step": 1200, | |
| "valid_targets_mean": 4369.2, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 2.1214788732394365, | |
| "grad_norm": 0.41777822350404714, | |
| "learning_rate": 3.5197279555515776e-05, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1680755615234375, | |
| "step": 1205, | |
| "valid_targets_mean": 5369.8, | |
| "valid_targets_min": 2123 | |
| }, | |
| { | |
| "epoch": 2.130281690140845, | |
| "grad_norm": 0.46390748462061954, | |
| "learning_rate": 3.514005410483304e-05, | |
| "loss": 0.2069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2672138214111328, | |
| "step": 1210, | |
| "valid_targets_mean": 5612.5, | |
| "valid_targets_min": 1610 | |
| }, | |
| { | |
| "epoch": 2.1390845070422535, | |
| "grad_norm": 0.7324786141737003, | |
| "learning_rate": 3.5082536853798443e-05, | |
| "loss": 0.1848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16874143481254578, | |
| "step": 1215, | |
| "valid_targets_mean": 3501.8, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 2.147887323943662, | |
| "grad_norm": 0.4633066570910277, | |
| "learning_rate": 3.5024728910965104e-05, | |
| "loss": 0.1854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17661717534065247, | |
| "step": 1220, | |
| "valid_targets_mean": 4799.9, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 2.1566901408450705, | |
| "grad_norm": 0.4777274724902786, | |
| "learning_rate": 3.496663139048876e-05, | |
| "loss": 0.1871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21020928025245667, | |
| "step": 1225, | |
| "valid_targets_mean": 4154.6, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 2.165492957746479, | |
| "grad_norm": 0.5081468452923454, | |
| "learning_rate": 3.490824541210626e-05, | |
| "loss": 0.1866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19894444942474365, | |
| "step": 1230, | |
| "valid_targets_mean": 4111.0, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 2.1742957746478875, | |
| "grad_norm": 0.5331214671058088, | |
| "learning_rate": 3.484957210111407e-05, | |
| "loss": 0.1949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2092374861240387, | |
| "step": 1235, | |
| "valid_targets_mean": 3737.1, | |
| "valid_targets_min": 668 | |
| }, | |
| { | |
| "epoch": 2.183098591549296, | |
| "grad_norm": 0.570381410697436, | |
| "learning_rate": 3.479061258834651e-05, | |
| "loss": 0.1699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1936834752559662, | |
| "step": 1240, | |
| "valid_targets_mean": 4621.7, | |
| "valid_targets_min": 580 | |
| }, | |
| { | |
| "epoch": 2.191901408450704, | |
| "grad_norm": 0.45644795578580405, | |
| "learning_rate": 3.473136801015397e-05, | |
| "loss": 0.1846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18017175793647766, | |
| "step": 1245, | |
| "valid_targets_mean": 4635.8, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 2.2007042253521125, | |
| "grad_norm": 0.4933046354652453, | |
| "learning_rate": 3.4671839508381046e-05, | |
| "loss": 0.1979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23679056763648987, | |
| "step": 1250, | |
| "valid_targets_mean": 5184.8, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 2.209507042253521, | |
| "grad_norm": 0.5151403006783971, | |
| "learning_rate": 3.461202823034449e-05, | |
| "loss": 0.1906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24990913271903992, | |
| "step": 1255, | |
| "valid_targets_mean": 4420.6, | |
| "valid_targets_min": 460 | |
| }, | |
| { | |
| "epoch": 2.2183098591549295, | |
| "grad_norm": 0.48803911755497, | |
| "learning_rate": 3.4551935328811115e-05, | |
| "loss": 0.1959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17088082432746887, | |
| "step": 1260, | |
| "valid_targets_mean": 4209.3, | |
| "valid_targets_min": 539 | |
| }, | |
| { | |
| "epoch": 2.227112676056338, | |
| "grad_norm": 0.5008185327006053, | |
| "learning_rate": 3.449156196197558e-05, | |
| "loss": 0.2014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2017328441143036, | |
| "step": 1265, | |
| "valid_targets_mean": 4679.0, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 2.2359154929577465, | |
| "grad_norm": 0.49697582478770475, | |
| "learning_rate": 3.443090929343807e-05, | |
| "loss": 0.2077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21368472278118134, | |
| "step": 1270, | |
| "valid_targets_mean": 4514.1, | |
| "valid_targets_min": 595 | |
| }, | |
| { | |
| "epoch": 2.244718309859155, | |
| "grad_norm": 0.5303917764049166, | |
| "learning_rate": 3.436997849218186e-05, | |
| "loss": 0.1883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18369421362876892, | |
| "step": 1275, | |
| "valid_targets_mean": 4716.9, | |
| "valid_targets_min": 2220 | |
| }, | |
| { | |
| "epoch": 2.2535211267605635, | |
| "grad_norm": 0.4068579182039538, | |
| "learning_rate": 3.430877073255078e-05, | |
| "loss": 0.2023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.269880086183548, | |
| "step": 1280, | |
| "valid_targets_mean": 7608.6, | |
| "valid_targets_min": 2374 | |
| }, | |
| { | |
| "epoch": 2.262323943661972, | |
| "grad_norm": 0.5175250845869045, | |
| "learning_rate": 3.424728719422662e-05, | |
| "loss": 0.1872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1778397560119629, | |
| "step": 1285, | |
| "valid_targets_mean": 4393.4, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 2.2711267605633805, | |
| "grad_norm": 0.44790660508855323, | |
| "learning_rate": 3.4185529062206316e-05, | |
| "loss": 0.1981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18896383047103882, | |
| "step": 1290, | |
| "valid_targets_mean": 4919.0, | |
| "valid_targets_min": 1292 | |
| }, | |
| { | |
| "epoch": 2.279929577464789, | |
| "grad_norm": 0.543106504679774, | |
| "learning_rate": 3.4123497526779186e-05, | |
| "loss": 0.1861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2077968567609787, | |
| "step": 1295, | |
| "valid_targets_mean": 4786.8, | |
| "valid_targets_min": 608 | |
| }, | |
| { | |
| "epoch": 2.288732394366197, | |
| "grad_norm": 0.4479843237414602, | |
| "learning_rate": 3.406119378350398e-05, | |
| "loss": 0.1737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17033112049102783, | |
| "step": 1300, | |
| "valid_targets_mean": 4374.6, | |
| "valid_targets_min": 2116 | |
| }, | |
| { | |
| "epoch": 2.2975352112676055, | |
| "grad_norm": 0.4044349890616847, | |
| "learning_rate": 3.399861903318578e-05, | |
| "loss": 0.1766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16565683484077454, | |
| "step": 1305, | |
| "valid_targets_mean": 5759.4, | |
| "valid_targets_min": 2241 | |
| }, | |
| { | |
| "epoch": 2.306338028169014, | |
| "grad_norm": 0.4835100853743559, | |
| "learning_rate": 3.393577448185293e-05, | |
| "loss": 0.1827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17125102877616882, | |
| "step": 1310, | |
| "valid_targets_mean": 4939.2, | |
| "valid_targets_min": 1683 | |
| }, | |
| { | |
| "epoch": 2.3151408450704225, | |
| "grad_norm": 0.4690692577771698, | |
| "learning_rate": 3.387266134073373e-05, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1973458230495453, | |
| "step": 1315, | |
| "valid_targets_mean": 5296.4, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 2.323943661971831, | |
| "grad_norm": 0.5806067197675361, | |
| "learning_rate": 3.380928082623315e-05, | |
| "loss": 0.1992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19876274466514587, | |
| "step": 1320, | |
| "valid_targets_mean": 4133.5, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 2.3327464788732395, | |
| "grad_norm": 0.45776559308597203, | |
| "learning_rate": 3.374563415990932e-05, | |
| "loss": 0.1917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16198670864105225, | |
| "step": 1325, | |
| "valid_targets_mean": 4859.3, | |
| "valid_targets_min": 275 | |
| }, | |
| { | |
| "epoch": 2.341549295774648, | |
| "grad_norm": 0.5063764269183072, | |
| "learning_rate": 3.3681722568450045e-05, | |
| "loss": 0.1818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17208705842494965, | |
| "step": 1330, | |
| "valid_targets_mean": 4140.5, | |
| "valid_targets_min": 616 | |
| }, | |
| { | |
| "epoch": 2.3503521126760565, | |
| "grad_norm": 0.49234829211103437, | |
| "learning_rate": 3.3617547283649125e-05, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2028292864561081, | |
| "step": 1335, | |
| "valid_targets_mean": 5001.0, | |
| "valid_targets_min": 1751 | |
| }, | |
| { | |
| "epoch": 2.359154929577465, | |
| "grad_norm": 0.44052717677080805, | |
| "learning_rate": 3.355310954238265e-05, | |
| "loss": 0.1992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18668846786022186, | |
| "step": 1340, | |
| "valid_targets_mean": 5311.0, | |
| "valid_targets_min": 1939 | |
| }, | |
| { | |
| "epoch": 2.367957746478873, | |
| "grad_norm": 0.3859799502580637, | |
| "learning_rate": 3.34884105865851e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18557174503803253, | |
| "step": 1345, | |
| "valid_targets_mean": 5677.9, | |
| "valid_targets_min": 975 | |
| }, | |
| { | |
| "epoch": 2.3767605633802815, | |
| "grad_norm": 0.5188534221540257, | |
| "learning_rate": 3.3423451663225485e-05, | |
| "loss": 0.1925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16947010159492493, | |
| "step": 1350, | |
| "valid_targets_mean": 4419.8, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 2.38556338028169, | |
| "grad_norm": 0.4715261171845374, | |
| "learning_rate": 3.335823402428326e-05, | |
| "loss": 0.1806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16113054752349854, | |
| "step": 1355, | |
| "valid_targets_mean": 4159.4, | |
| "valid_targets_min": 500 | |
| }, | |
| { | |
| "epoch": 2.3943661971830985, | |
| "grad_norm": 0.4567539647576461, | |
| "learning_rate": 3.3292758926724205e-05, | |
| "loss": 0.2071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18637806177139282, | |
| "step": 1360, | |
| "valid_targets_mean": 4859.8, | |
| "valid_targets_min": 1681 | |
| }, | |
| { | |
| "epoch": 2.403169014084507, | |
| "grad_norm": 0.5341311512998459, | |
| "learning_rate": 3.322702763247622e-05, | |
| "loss": 0.1886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1638811230659485, | |
| "step": 1365, | |
| "valid_targets_mean": 5879.1, | |
| "valid_targets_min": 533 | |
| }, | |
| { | |
| "epoch": 2.4119718309859155, | |
| "grad_norm": 0.529070948638356, | |
| "learning_rate": 3.316104140840497e-05, | |
| "loss": 0.1904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18338234722614288, | |
| "step": 1370, | |
| "valid_targets_mean": 4238.6, | |
| "valid_targets_min": 423 | |
| }, | |
| { | |
| "epoch": 2.420774647887324, | |
| "grad_norm": 0.4354325946849577, | |
| "learning_rate": 3.309480152628948e-05, | |
| "loss": 0.2043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19401895999908447, | |
| "step": 1375, | |
| "valid_targets_mean": 6780.1, | |
| "valid_targets_min": 2077 | |
| }, | |
| { | |
| "epoch": 2.4295774647887325, | |
| "grad_norm": 0.45980192780258955, | |
| "learning_rate": 3.3028309262797645e-05, | |
| "loss": 0.1973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21572089195251465, | |
| "step": 1380, | |
| "valid_targets_mean": 5947.9, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 2.438380281690141, | |
| "grad_norm": 0.44465844182385617, | |
| "learning_rate": 3.296156589946161e-05, | |
| "loss": 0.2005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19175928831100464, | |
| "step": 1385, | |
| "valid_targets_mean": 5500.1, | |
| "valid_targets_min": 2375 | |
| }, | |
| { | |
| "epoch": 2.4471830985915495, | |
| "grad_norm": 0.5619365969165188, | |
| "learning_rate": 3.289457272265304e-05, | |
| "loss": 0.1772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1932021975517273, | |
| "step": 1390, | |
| "valid_targets_mean": 4885.1, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 2.455985915492958, | |
| "grad_norm": 0.4248889955715677, | |
| "learning_rate": 3.282733102355839e-05, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19634892046451569, | |
| "step": 1395, | |
| "valid_targets_mean": 6052.0, | |
| "valid_targets_min": 408 | |
| }, | |
| { | |
| "epoch": 2.464788732394366, | |
| "grad_norm": 0.41558861786407775, | |
| "learning_rate": 3.2759842098153974e-05, | |
| "loss": 0.1687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19464778900146484, | |
| "step": 1400, | |
| "valid_targets_mean": 5751.9, | |
| "valid_targets_min": 805 | |
| }, | |
| { | |
| "epoch": 2.4735915492957745, | |
| "grad_norm": 0.4381419626974871, | |
| "learning_rate": 3.269210724718098e-05, | |
| "loss": 0.1751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20120611786842346, | |
| "step": 1405, | |
| "valid_targets_mean": 5223.7, | |
| "valid_targets_min": 1882 | |
| }, | |
| { | |
| "epoch": 2.482394366197183, | |
| "grad_norm": 0.5168885575741314, | |
| "learning_rate": 3.262412777612045e-05, | |
| "loss": 0.1976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2174382507801056, | |
| "step": 1410, | |
| "valid_targets_mean": 4864.4, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 2.4911971830985915, | |
| "grad_norm": 0.5612396381270076, | |
| "learning_rate": 3.2555904995168055e-05, | |
| "loss": 0.1672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16132789850234985, | |
| "step": 1415, | |
| "valid_targets_mean": 2956.1, | |
| "valid_targets_min": 705 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.42337927537547365, | |
| "learning_rate": 3.2487440219208894e-05, | |
| "loss": 0.185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2525796890258789, | |
| "step": 1420, | |
| "valid_targets_mean": 6323.4, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 2.5088028169014085, | |
| "grad_norm": 0.498529524092842, | |
| "learning_rate": 3.241873476779215e-05, | |
| "loss": 0.191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21080796420574188, | |
| "step": 1425, | |
| "valid_targets_mean": 3991.1, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 2.517605633802817, | |
| "grad_norm": 0.5508160507465973, | |
| "learning_rate": 3.2349789965105576e-05, | |
| "loss": 0.2029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17234843969345093, | |
| "step": 1430, | |
| "valid_targets_mean": 3952.5, | |
| "valid_targets_min": 589 | |
| }, | |
| { | |
| "epoch": 2.5264084507042255, | |
| "grad_norm": 0.42953691657584137, | |
| "learning_rate": 3.228060713995013e-05, | |
| "loss": 0.1942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25213393568992615, | |
| "step": 1435, | |
| "valid_targets_mean": 6228.9, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 2.535211267605634, | |
| "grad_norm": 0.4922818694112367, | |
| "learning_rate": 3.2211187625714194e-05, | |
| "loss": 0.192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15114401280879974, | |
| "step": 1440, | |
| "valid_targets_mean": 4009.9, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 2.544014084507042, | |
| "grad_norm": 0.45003919826791106, | |
| "learning_rate": 3.214153276034799e-05, | |
| "loss": 0.1795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19699226319789886, | |
| "step": 1445, | |
| "valid_targets_mean": 5592.2, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 2.552816901408451, | |
| "grad_norm": 0.4972602596195832, | |
| "learning_rate": 3.207164388633777e-05, | |
| "loss": 0.1766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16295522451400757, | |
| "step": 1450, | |
| "valid_targets_mean": 4244.9, | |
| "valid_targets_min": 564 | |
| }, | |
| { | |
| "epoch": 2.561619718309859, | |
| "grad_norm": 0.45953384433811645, | |
| "learning_rate": 3.200152235067989e-05, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17465293407440186, | |
| "step": 1455, | |
| "valid_targets_mean": 4160.6, | |
| "valid_targets_min": 489 | |
| }, | |
| { | |
| "epoch": 2.5704225352112675, | |
| "grad_norm": 0.4278568525591571, | |
| "learning_rate": 3.1931169504854925e-05, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19868826866149902, | |
| "step": 1460, | |
| "valid_targets_mean": 5886.2, | |
| "valid_targets_min": 2247 | |
| }, | |
| { | |
| "epoch": 2.579225352112676, | |
| "grad_norm": 0.5339188716552403, | |
| "learning_rate": 3.186058670480155e-05, | |
| "loss": 0.1932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17631475627422333, | |
| "step": 1465, | |
| "valid_targets_mean": 4739.9, | |
| "valid_targets_min": 1498 | |
| }, | |
| { | |
| "epoch": 2.5880281690140845, | |
| "grad_norm": 0.4839489779291509, | |
| "learning_rate": 3.178977531089048e-05, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19226914644241333, | |
| "step": 1470, | |
| "valid_targets_mean": 4600.2, | |
| "valid_targets_min": 705 | |
| }, | |
| { | |
| "epoch": 2.596830985915493, | |
| "grad_norm": 0.5017756135637779, | |
| "learning_rate": 3.171873668789817e-05, | |
| "loss": 0.1763, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15444476902484894, | |
| "step": 1475, | |
| "valid_targets_mean": 4160.5, | |
| "valid_targets_min": 497 | |
| }, | |
| { | |
| "epoch": 2.6056338028169015, | |
| "grad_norm": 0.44162296696308384, | |
| "learning_rate": 3.164747220498058e-05, | |
| "loss": 0.1763, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18829277157783508, | |
| "step": 1480, | |
| "valid_targets_mean": 5353.6, | |
| "valid_targets_min": 433 | |
| }, | |
| { | |
| "epoch": 2.61443661971831, | |
| "grad_norm": 0.4358328483145143, | |
| "learning_rate": 3.157598323564674e-05, | |
| "loss": 0.1787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14957726001739502, | |
| "step": 1485, | |
| "valid_targets_mean": 4120.0, | |
| "valid_targets_min": 2109 | |
| }, | |
| { | |
| "epoch": 2.623239436619718, | |
| "grad_norm": 0.4911784109798284, | |
| "learning_rate": 3.1504271157732324e-05, | |
| "loss": 0.1921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21047227084636688, | |
| "step": 1490, | |
| "valid_targets_mean": 5051.2, | |
| "valid_targets_min": 2223 | |
| }, | |
| { | |
| "epoch": 2.632042253521127, | |
| "grad_norm": 0.4158802389294613, | |
| "learning_rate": 3.143233735337305e-05, | |
| "loss": 0.172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15601132810115814, | |
| "step": 1495, | |
| "valid_targets_mean": 5633.9, | |
| "valid_targets_min": 2042 | |
| }, | |
| { | |
| "epoch": 2.640845070422535, | |
| "grad_norm": 0.41410283227729716, | |
| "learning_rate": 3.136018320897804e-05, | |
| "loss": 0.1924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21157892048358917, | |
| "step": 1500, | |
| "valid_targets_mean": 5585.2, | |
| "valid_targets_min": 915 | |
| }, | |
| { | |
| "epoch": 2.6496478873239435, | |
| "grad_norm": 0.4418561665919076, | |
| "learning_rate": 3.1287810115203165e-05, | |
| "loss": 0.1827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17784714698791504, | |
| "step": 1505, | |
| "valid_targets_mean": 4986.8, | |
| "valid_targets_min": 757 | |
| }, | |
| { | |
| "epoch": 2.658450704225352, | |
| "grad_norm": 0.40013359578654933, | |
| "learning_rate": 3.121521946692415e-05, | |
| "loss": 0.1835, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15553095936775208, | |
| "step": 1510, | |
| "valid_targets_mean": 5228.5, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 2.6672535211267605, | |
| "grad_norm": 0.42797983932839545, | |
| "learning_rate": 3.114241266320977e-05, | |
| "loss": 0.1744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15967434644699097, | |
| "step": 1515, | |
| "valid_targets_mean": 5027.4, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 2.676056338028169, | |
| "grad_norm": 0.5219569652881605, | |
| "learning_rate": 3.106939110729481e-05, | |
| "loss": 0.1859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17975085973739624, | |
| "step": 1520, | |
| "valid_targets_mean": 4070.6, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 2.6848591549295775, | |
| "grad_norm": 0.4418555422626167, | |
| "learning_rate": 3.099615620655311e-05, | |
| "loss": 0.1822, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16808319091796875, | |
| "step": 1525, | |
| "valid_targets_mean": 5110.2, | |
| "valid_targets_min": 2234 | |
| }, | |
| { | |
| "epoch": 2.693661971830986, | |
| "grad_norm": 0.5117548554074327, | |
| "learning_rate": 3.092270937247035e-05, | |
| "loss": 0.1911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1853608936071396, | |
| "step": 1530, | |
| "valid_targets_mean": 3795.2, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 2.7024647887323945, | |
| "grad_norm": 0.48678911427472665, | |
| "learning_rate": 3.0849052020616915e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16803531348705292, | |
| "step": 1535, | |
| "valid_targets_mean": 3794.8, | |
| "valid_targets_min": 954 | |
| }, | |
| { | |
| "epoch": 2.711267605633803, | |
| "grad_norm": 0.4749042357001247, | |
| "learning_rate": 3.077518557062056e-05, | |
| "loss": 0.202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1853049248456955, | |
| "step": 1540, | |
| "valid_targets_mean": 3871.4, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 2.720070422535211, | |
| "grad_norm": 0.41199947329428777, | |
| "learning_rate": 3.070111144613909e-05, | |
| "loss": 0.1959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20912282168865204, | |
| "step": 1545, | |
| "valid_targets_mean": 6100.6, | |
| "valid_targets_min": 2166 | |
| }, | |
| { | |
| "epoch": 2.72887323943662, | |
| "grad_norm": 0.43635455988365274, | |
| "learning_rate": 3.0626831074832895e-05, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18904328346252441, | |
| "step": 1550, | |
| "valid_targets_mean": 4756.8, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 2.737676056338028, | |
| "grad_norm": 0.403831756424422, | |
| "learning_rate": 3.055234588833745e-05, | |
| "loss": 0.17, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15576642751693726, | |
| "step": 1555, | |
| "valid_targets_mean": 5499.8, | |
| "valid_targets_min": 729 | |
| }, | |
| { | |
| "epoch": 2.7464788732394365, | |
| "grad_norm": 0.4564255430645552, | |
| "learning_rate": 3.047765732223571e-05, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20087014138698578, | |
| "step": 1560, | |
| "valid_targets_mean": 4818.6, | |
| "valid_targets_min": 1743 | |
| }, | |
| { | |
| "epoch": 2.755281690140845, | |
| "grad_norm": 0.5262677823975185, | |
| "learning_rate": 3.040276681603043e-05, | |
| "loss": 0.1856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2311326265335083, | |
| "step": 1565, | |
| "valid_targets_mean": 4233.2, | |
| "valid_targets_min": 902 | |
| }, | |
| { | |
| "epoch": 2.7640845070422535, | |
| "grad_norm": 0.5084482543121024, | |
| "learning_rate": 3.0327675813116487e-05, | |
| "loss": 0.173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.176751047372818, | |
| "step": 1570, | |
| "valid_targets_mean": 4333.8, | |
| "valid_targets_min": 638 | |
| }, | |
| { | |
| "epoch": 2.772887323943662, | |
| "grad_norm": 0.39003538974422036, | |
| "learning_rate": 3.025238576075296e-05, | |
| "loss": 0.1727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17861370742321014, | |
| "step": 1575, | |
| "valid_targets_mean": 5983.6, | |
| "valid_targets_min": 1582 | |
| }, | |
| { | |
| "epoch": 2.7816901408450705, | |
| "grad_norm": 0.45152225340556384, | |
| "learning_rate": 3.017689811003532e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18352942168712616, | |
| "step": 1580, | |
| "valid_targets_mean": 4817.9, | |
| "valid_targets_min": 611 | |
| }, | |
| { | |
| "epoch": 2.790492957746479, | |
| "grad_norm": 0.43313339380190674, | |
| "learning_rate": 3.0101214315867406e-05, | |
| "loss": 0.208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.179938405752182, | |
| "step": 1585, | |
| "valid_targets_mean": 5040.4, | |
| "valid_targets_min": 903 | |
| }, | |
| { | |
| "epoch": 2.7992957746478875, | |
| "grad_norm": 0.4311739012163658, | |
| "learning_rate": 3.0025335836933432e-05, | |
| "loss": 0.1967, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17542585730552673, | |
| "step": 1590, | |
| "valid_targets_mean": 5439.0, | |
| "valid_targets_min": 1546 | |
| }, | |
| { | |
| "epoch": 2.808098591549296, | |
| "grad_norm": 0.6208883688987087, | |
| "learning_rate": 2.9949264135669836e-05, | |
| "loss": 0.2002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17097251117229462, | |
| "step": 1595, | |
| "valid_targets_mean": 4293.1, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 2.816901408450704, | |
| "grad_norm": 0.4184633325796343, | |
| "learning_rate": 2.9873000678237113e-05, | |
| "loss": 0.1744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1637093871831894, | |
| "step": 1600, | |
| "valid_targets_mean": 6689.8, | |
| "valid_targets_min": 1779 | |
| }, | |
| { | |
| "epoch": 2.8257042253521125, | |
| "grad_norm": 0.489009033764824, | |
| "learning_rate": 2.979654693449155e-05, | |
| "loss": 0.172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1565060317516327, | |
| "step": 1605, | |
| "valid_targets_mean": 3716.0, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 2.834507042253521, | |
| "grad_norm": 0.4718228594468646, | |
| "learning_rate": 2.9719904377956896e-05, | |
| "loss": 0.1821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.203069269657135, | |
| "step": 1610, | |
| "valid_targets_mean": 4542.8, | |
| "valid_targets_min": 2211 | |
| }, | |
| { | |
| "epoch": 2.8433098591549295, | |
| "grad_norm": 0.4069743677409105, | |
| "learning_rate": 2.964307448579597e-05, | |
| "loss": 0.1791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16798418760299683, | |
| "step": 1615, | |
| "valid_targets_mean": 5056.9, | |
| "valid_targets_min": 196 | |
| }, | |
| { | |
| "epoch": 2.852112676056338, | |
| "grad_norm": 0.4141430846057944, | |
| "learning_rate": 2.956605873878218e-05, | |
| "loss": 0.1696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18934723734855652, | |
| "step": 1620, | |
| "valid_targets_mean": 5474.9, | |
| "valid_targets_min": 1874 | |
| }, | |
| { | |
| "epoch": 2.8609154929577465, | |
| "grad_norm": 0.49688027910877136, | |
| "learning_rate": 2.9488858621271003e-05, | |
| "loss": 0.2095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2204950749874115, | |
| "step": 1625, | |
| "valid_targets_mean": 4296.1, | |
| "valid_targets_min": 536 | |
| }, | |
| { | |
| "epoch": 2.869718309859155, | |
| "grad_norm": 0.39675500190247687, | |
| "learning_rate": 2.9411475621171334e-05, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16935673356056213, | |
| "step": 1630, | |
| "valid_targets_mean": 5885.1, | |
| "valid_targets_min": 1695 | |
| }, | |
| { | |
| "epoch": 2.8785211267605635, | |
| "grad_norm": 0.47357656691335925, | |
| "learning_rate": 2.933391122991688e-05, | |
| "loss": 0.1875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20661430060863495, | |
| "step": 1635, | |
| "valid_targets_mean": 5042.6, | |
| "valid_targets_min": 684 | |
| }, | |
| { | |
| "epoch": 2.887323943661972, | |
| "grad_norm": 0.4196079800508206, | |
| "learning_rate": 2.9256166942437327e-05, | |
| "loss": 0.1732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15299251675605774, | |
| "step": 1640, | |
| "valid_targets_mean": 4916.2, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 2.89612676056338, | |
| "grad_norm": 0.4211387341093774, | |
| "learning_rate": 2.9178244257129612e-05, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15700024366378784, | |
| "step": 1645, | |
| "valid_targets_mean": 4931.9, | |
| "valid_targets_min": 713 | |
| }, | |
| { | |
| "epoch": 2.904929577464789, | |
| "grad_norm": 0.46565627059120634, | |
| "learning_rate": 2.9100144675828974e-05, | |
| "loss": 0.1953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1654834747314453, | |
| "step": 1650, | |
| "valid_targets_mean": 4718.7, | |
| "valid_targets_min": 1852 | |
| }, | |
| { | |
| "epoch": 2.913732394366197, | |
| "grad_norm": 0.4411729639424015, | |
| "learning_rate": 2.9021869703780065e-05, | |
| "loss": 0.174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18113288283348083, | |
| "step": 1655, | |
| "valid_targets_mean": 4549.8, | |
| "valid_targets_min": 2059 | |
| }, | |
| { | |
| "epoch": 2.9225352112676055, | |
| "grad_norm": 0.4558590657801769, | |
| "learning_rate": 2.8943420849607896e-05, | |
| "loss": 0.1973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1644410938024521, | |
| "step": 1660, | |
| "valid_targets_mean": 4826.1, | |
| "valid_targets_min": 691 | |
| }, | |
| { | |
| "epoch": 2.931338028169014, | |
| "grad_norm": 0.49381092820712585, | |
| "learning_rate": 2.8864799625288787e-05, | |
| "loss": 0.1951, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18676629662513733, | |
| "step": 1665, | |
| "valid_targets_mean": 3949.4, | |
| "valid_targets_min": 658 | |
| }, | |
| { | |
| "epoch": 2.9401408450704225, | |
| "grad_norm": 0.6598167807710108, | |
| "learning_rate": 2.878600754612121e-05, | |
| "loss": 0.174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1920105665922165, | |
| "step": 1670, | |
| "valid_targets_mean": 5225.5, | |
| "valid_targets_min": 1588 | |
| }, | |
| { | |
| "epoch": 2.948943661971831, | |
| "grad_norm": 0.4762406056209046, | |
| "learning_rate": 2.87070461306966e-05, | |
| "loss": 0.1794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17589432001113892, | |
| "step": 1675, | |
| "valid_targets_mean": 4465.9, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 2.9577464788732395, | |
| "grad_norm": 0.4106127882742786, | |
| "learning_rate": 2.8627916900870078e-05, | |
| "loss": 0.2092, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17294633388519287, | |
| "step": 1680, | |
| "valid_targets_mean": 5075.4, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 2.966549295774648, | |
| "grad_norm": 0.4825688986667228, | |
| "learning_rate": 2.8548621381731102e-05, | |
| "loss": 0.207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2769816815853119, | |
| "step": 1685, | |
| "valid_targets_mean": 5624.2, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 2.9753521126760565, | |
| "grad_norm": 0.4333788064918239, | |
| "learning_rate": 2.846916110157412e-05, | |
| "loss": 0.1999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21842582523822784, | |
| "step": 1690, | |
| "valid_targets_mean": 5958.6, | |
| "valid_targets_min": 713 | |
| }, | |
| { | |
| "epoch": 2.984154929577465, | |
| "grad_norm": 0.35630390550910906, | |
| "learning_rate": 2.8389537591869057e-05, | |
| "loss": 0.1939, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16512930393218994, | |
| "step": 1695, | |
| "valid_targets_mean": 6502.1, | |
| "valid_targets_min": 2751 | |
| }, | |
| { | |
| "epoch": 2.992957746478873, | |
| "grad_norm": 0.5665372893958116, | |
| "learning_rate": 2.8309752387231842e-05, | |
| "loss": 0.1867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20267118513584137, | |
| "step": 1700, | |
| "valid_targets_mean": 3167.4, | |
| "valid_targets_min": 498 | |
| }, | |
| { | |
| "epoch": 3.0017605633802815, | |
| "grad_norm": 0.4193384817194654, | |
| "learning_rate": 2.8229807025394815e-05, | |
| "loss": 0.1721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15952523052692413, | |
| "step": 1705, | |
| "valid_targets_mean": 4972.8, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 3.01056338028169, | |
| "grad_norm": 0.47151322380378063, | |
| "learning_rate": 2.8149703047177083e-05, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16956490278244019, | |
| "step": 1710, | |
| "valid_targets_mean": 4909.1, | |
| "valid_targets_min": 570 | |
| }, | |
| { | |
| "epoch": 3.0193661971830985, | |
| "grad_norm": 0.5710138570102699, | |
| "learning_rate": 2.806944199645484e-05, | |
| "loss": 0.1554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14866656064987183, | |
| "step": 1715, | |
| "valid_targets_mean": 3160.8, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 3.028169014084507, | |
| "grad_norm": 0.4820974594168107, | |
| "learning_rate": 2.79890254201316e-05, | |
| "loss": 0.1626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14279060065746307, | |
| "step": 1720, | |
| "valid_targets_mean": 4081.8, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 3.0369718309859155, | |
| "grad_norm": 0.5373715954223984, | |
| "learning_rate": 2.7908454868108363e-05, | |
| "loss": 0.1718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1669020652770996, | |
| "step": 1725, | |
| "valid_targets_mean": 3788.4, | |
| "valid_targets_min": 542 | |
| }, | |
| { | |
| "epoch": 3.045774647887324, | |
| "grad_norm": 0.45872025955196505, | |
| "learning_rate": 2.7827731893253796e-05, | |
| "loss": 0.1614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13917496800422668, | |
| "step": 1730, | |
| "valid_targets_mean": 5021.2, | |
| "valid_targets_min": 925 | |
| }, | |
| { | |
| "epoch": 3.0545774647887325, | |
| "grad_norm": 0.4901932655211166, | |
| "learning_rate": 2.7746858051374265e-05, | |
| "loss": 0.1716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16088977456092834, | |
| "step": 1735, | |
| "valid_targets_mean": 4873.2, | |
| "valid_targets_min": 880 | |
| }, | |
| { | |
| "epoch": 3.063380281690141, | |
| "grad_norm": 0.47084785583393357, | |
| "learning_rate": 2.7665834901183836e-05, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15842463076114655, | |
| "step": 1740, | |
| "valid_targets_mean": 4508.3, | |
| "valid_targets_min": 1911 | |
| }, | |
| { | |
| "epoch": 3.0721830985915495, | |
| "grad_norm": 0.44112330017812884, | |
| "learning_rate": 2.7584664004274276e-05, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12550698220729828, | |
| "step": 1745, | |
| "valid_targets_mean": 4622.8, | |
| "valid_targets_min": 1552 | |
| }, | |
| { | |
| "epoch": 3.080985915492958, | |
| "grad_norm": 0.509783786215351, | |
| "learning_rate": 2.750334692508493e-05, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15914398431777954, | |
| "step": 1750, | |
| "valid_targets_mean": 4016.5, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 3.089788732394366, | |
| "grad_norm": 0.47837221596919827, | |
| "learning_rate": 2.7421885230872563e-05, | |
| "loss": 0.1592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17294535040855408, | |
| "step": 1755, | |
| "valid_targets_mean": 4680.3, | |
| "valid_targets_min": 2217 | |
| }, | |
| { | |
| "epoch": 3.0985915492957745, | |
| "grad_norm": 0.507367190064088, | |
| "learning_rate": 2.7340280491681167e-05, | |
| "loss": 0.1657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15483909845352173, | |
| "step": 1760, | |
| "valid_targets_mean": 3940.6, | |
| "valid_targets_min": 1484 | |
| }, | |
| { | |
| "epoch": 3.107394366197183, | |
| "grad_norm": 0.4934613209359812, | |
| "learning_rate": 2.725853428031172e-05, | |
| "loss": 0.1478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14951565861701965, | |
| "step": 1765, | |
| "valid_targets_mean": 4814.2, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 3.1161971830985915, | |
| "grad_norm": 0.43670623634082417, | |
| "learning_rate": 2.7176648172291812e-05, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15607964992523193, | |
| "step": 1770, | |
| "valid_targets_mean": 5295.5, | |
| "valid_targets_min": 1722 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 0.47947209455374434, | |
| "learning_rate": 2.7094623745845337e-05, | |
| "loss": 0.1965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2232292741537094, | |
| "step": 1775, | |
| "valid_targets_mean": 5207.1, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 3.1338028169014085, | |
| "grad_norm": 0.48581222547007535, | |
| "learning_rate": 2.701246258186206e-05, | |
| "loss": 0.1899, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2075452208518982, | |
| "step": 1780, | |
| "valid_targets_mean": 5911.7, | |
| "valid_targets_min": 500 | |
| }, | |
| { | |
| "epoch": 3.142605633802817, | |
| "grad_norm": 0.6506578318461373, | |
| "learning_rate": 2.6930166263867147e-05, | |
| "loss": 0.1682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12996521592140198, | |
| "step": 1785, | |
| "valid_targets_mean": 4325.1, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 3.1514084507042255, | |
| "grad_norm": 0.44352634623374226, | |
| "learning_rate": 2.6847736377990617e-05, | |
| "loss": 0.1708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16012227535247803, | |
| "step": 1790, | |
| "valid_targets_mean": 4992.1, | |
| "valid_targets_min": 2004 | |
| }, | |
| { | |
| "epoch": 3.160211267605634, | |
| "grad_norm": 0.7164501858578708, | |
| "learning_rate": 2.676517451293682e-05, | |
| "loss": 0.1679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17676299810409546, | |
| "step": 1795, | |
| "valid_targets_mean": 4037.1, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 3.169014084507042, | |
| "grad_norm": 0.49339519430727885, | |
| "learning_rate": 2.6682482259953793e-05, | |
| "loss": 0.1681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15606287121772766, | |
| "step": 1800, | |
| "valid_targets_mean": 4850.1, | |
| "valid_targets_min": 330 | |
| }, | |
| { | |
| "epoch": 3.1778169014084505, | |
| "grad_norm": 0.501263232451406, | |
| "learning_rate": 2.659966121280257e-05, | |
| "loss": 0.1872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17897209525108337, | |
| "step": 1805, | |
| "valid_targets_mean": 4341.2, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 3.186619718309859, | |
| "grad_norm": 0.8672098338416093, | |
| "learning_rate": 2.6516712967726515e-05, | |
| "loss": 0.1781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18799063563346863, | |
| "step": 1810, | |
| "valid_targets_mean": 4184.9, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 3.1954225352112675, | |
| "grad_norm": 0.4499087609134839, | |
| "learning_rate": 2.643363912342051e-05, | |
| "loss": 0.1487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13722729682922363, | |
| "step": 1815, | |
| "valid_targets_mean": 4409.8, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 3.204225352112676, | |
| "grad_norm": 0.49321800105186514, | |
| "learning_rate": 2.6350441281000168e-05, | |
| "loss": 0.1746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1932738721370697, | |
| "step": 1820, | |
| "valid_targets_mean": 4334.6, | |
| "valid_targets_min": 602 | |
| }, | |
| { | |
| "epoch": 3.2130281690140845, | |
| "grad_norm": 0.444288351134661, | |
| "learning_rate": 2.626712104397097e-05, | |
| "loss": 0.1652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.162206768989563, | |
| "step": 1825, | |
| "valid_targets_mean": 5297.5, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 3.221830985915493, | |
| "grad_norm": 0.5192092819499871, | |
| "learning_rate": 2.6183680018197348e-05, | |
| "loss": 0.1663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18548494577407837, | |
| "step": 1830, | |
| "valid_targets_mean": 4056.4, | |
| "valid_targets_min": 586 | |
| }, | |
| { | |
| "epoch": 3.2306338028169015, | |
| "grad_norm": 0.46535673295946217, | |
| "learning_rate": 2.6100119811871752e-05, | |
| "loss": 0.1551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1345115602016449, | |
| "step": 1835, | |
| "valid_targets_mean": 4706.5, | |
| "valid_targets_min": 1552 | |
| }, | |
| { | |
| "epoch": 3.23943661971831, | |
| "grad_norm": 0.48030244243857056, | |
| "learning_rate": 2.6016442035483652e-05, | |
| "loss": 0.1596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14587503671646118, | |
| "step": 1840, | |
| "valid_targets_mean": 4748.1, | |
| "valid_targets_min": 1430 | |
| }, | |
| { | |
| "epoch": 3.2482394366197185, | |
| "grad_norm": 0.5262236711278114, | |
| "learning_rate": 2.5932648301788475e-05, | |
| "loss": 0.1799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20582528412342072, | |
| "step": 1845, | |
| "valid_targets_mean": 4021.9, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 3.257042253521127, | |
| "grad_norm": 0.49119624081027274, | |
| "learning_rate": 2.5848740225776566e-05, | |
| "loss": 0.1695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1406187117099762, | |
| "step": 1850, | |
| "valid_targets_mean": 3435.4, | |
| "valid_targets_min": 765 | |
| }, | |
| { | |
| "epoch": 3.265845070422535, | |
| "grad_norm": 0.4449478253675705, | |
| "learning_rate": 2.5764719424642014e-05, | |
| "loss": 0.1631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1830718070268631, | |
| "step": 1855, | |
| "valid_targets_mean": 5833.8, | |
| "valid_targets_min": 915 | |
| }, | |
| { | |
| "epoch": 3.2746478873239435, | |
| "grad_norm": 0.4747113276533288, | |
| "learning_rate": 2.5680587517751502e-05, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13974440097808838, | |
| "step": 1860, | |
| "valid_targets_mean": 3974.9, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 3.283450704225352, | |
| "grad_norm": 0.4270296666312105, | |
| "learning_rate": 2.559634612661312e-05, | |
| "loss": 0.171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15358084440231323, | |
| "step": 1865, | |
| "valid_targets_mean": 5191.1, | |
| "valid_targets_min": 2223 | |
| }, | |
| { | |
| "epoch": 3.2922535211267605, | |
| "grad_norm": 0.4760280498747896, | |
| "learning_rate": 2.5511996874845072e-05, | |
| "loss": 0.1842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21204963326454163, | |
| "step": 1870, | |
| "valid_targets_mean": 5188.5, | |
| "valid_targets_min": 474 | |
| }, | |
| { | |
| "epoch": 3.301056338028169, | |
| "grad_norm": 0.4678114566671876, | |
| "learning_rate": 2.5427541388144414e-05, | |
| "loss": 0.1694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16822843253612518, | |
| "step": 1875, | |
| "valid_targets_mean": 4974.1, | |
| "valid_targets_min": 1620 | |
| }, | |
| { | |
| "epoch": 3.3098591549295775, | |
| "grad_norm": 0.5056917338421301, | |
| "learning_rate": 2.534298129425571e-05, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13902977108955383, | |
| "step": 1880, | |
| "valid_targets_mean": 3831.8, | |
| "valid_targets_min": 605 | |
| }, | |
| { | |
| "epoch": 3.318661971830986, | |
| "grad_norm": 0.4727384012930409, | |
| "learning_rate": 2.5258318222939662e-05, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21487730741500854, | |
| "step": 1885, | |
| "valid_targets_mean": 5688.2, | |
| "valid_targets_min": 3110 | |
| }, | |
| { | |
| "epoch": 3.3274647887323945, | |
| "grad_norm": 0.4418983324308392, | |
| "learning_rate": 2.5173553805941682e-05, | |
| "loss": 0.1782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18671298027038574, | |
| "step": 1890, | |
| "valid_targets_mean": 5275.9, | |
| "valid_targets_min": 624 | |
| }, | |
| { | |
| "epoch": 3.336267605633803, | |
| "grad_norm": 0.5513113319690053, | |
| "learning_rate": 2.5088689676960477e-05, | |
| "loss": 0.1814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2134743183851242, | |
| "step": 1895, | |
| "valid_targets_mean": 5996.0, | |
| "valid_targets_min": 285 | |
| }, | |
| { | |
| "epoch": 3.345070422535211, | |
| "grad_norm": 0.4905446636178898, | |
| "learning_rate": 2.5003727471616533e-05, | |
| "loss": 0.1759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19587790966033936, | |
| "step": 1900, | |
| "valid_targets_mean": 4838.6, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 3.3538732394366195, | |
| "grad_norm": 0.45541680861459366, | |
| "learning_rate": 2.4918668827420612e-05, | |
| "loss": 0.1806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23907095193862915, | |
| "step": 1905, | |
| "valid_targets_mean": 6238.4, | |
| "valid_targets_min": 1262 | |
| }, | |
| { | |
| "epoch": 3.362676056338028, | |
| "grad_norm": 0.4870371442314524, | |
| "learning_rate": 2.4833515383742164e-05, | |
| "loss": 0.1646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15520170331001282, | |
| "step": 1910, | |
| "valid_targets_mean": 4301.9, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 3.3714788732394365, | |
| "grad_norm": 0.7001955832681735, | |
| "learning_rate": 2.4748268781777763e-05, | |
| "loss": 0.1802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16678684949874878, | |
| "step": 1915, | |
| "valid_targets_mean": 4693.8, | |
| "valid_targets_min": 1841 | |
| }, | |
| { | |
| "epoch": 3.380281690140845, | |
| "grad_norm": 0.4639935004235257, | |
| "learning_rate": 2.4662930664519447e-05, | |
| "loss": 0.1574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13996481895446777, | |
| "step": 1920, | |
| "valid_targets_mean": 3983.6, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 3.3890845070422535, | |
| "grad_norm": 0.4763334352595301, | |
| "learning_rate": 2.457750267672307e-05, | |
| "loss": 0.1704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1486479640007019, | |
| "step": 1925, | |
| "valid_targets_mean": 4398.8, | |
| "valid_targets_min": 2124 | |
| }, | |
| { | |
| "epoch": 3.397887323943662, | |
| "grad_norm": 0.41677372336411134, | |
| "learning_rate": 2.4491986464876615e-05, | |
| "loss": 0.1694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1670335978269577, | |
| "step": 1930, | |
| "valid_targets_mean": 5956.6, | |
| "valid_targets_min": 2481 | |
| }, | |
| { | |
| "epoch": 3.4066901408450705, | |
| "grad_norm": 0.5245362474513732, | |
| "learning_rate": 2.4406383677168405e-05, | |
| "loss": 0.1684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18945202231407166, | |
| "step": 1935, | |
| "valid_targets_mean": 4548.2, | |
| "valid_targets_min": 1419 | |
| }, | |
| { | |
| "epoch": 3.415492957746479, | |
| "grad_norm": 0.4167868230568227, | |
| "learning_rate": 2.432069596345541e-05, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20890513062477112, | |
| "step": 1940, | |
| "valid_targets_mean": 6372.2, | |
| "valid_targets_min": 691 | |
| }, | |
| { | |
| "epoch": 3.4242957746478875, | |
| "grad_norm": 0.48988432116369074, | |
| "learning_rate": 2.423492497523139e-05, | |
| "loss": 0.177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20081228017807007, | |
| "step": 1945, | |
| "valid_targets_mean": 5500.9, | |
| "valid_targets_min": 599 | |
| }, | |
| { | |
| "epoch": 3.433098591549296, | |
| "grad_norm": 0.4689686929907477, | |
| "learning_rate": 2.4149072365595103e-05, | |
| "loss": 0.1535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1486755907535553, | |
| "step": 1950, | |
| "valid_targets_mean": 5458.1, | |
| "valid_targets_min": 2203 | |
| }, | |
| { | |
| "epoch": 3.441901408450704, | |
| "grad_norm": 0.5197914402083211, | |
| "learning_rate": 2.406313978921842e-05, | |
| "loss": 0.1795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16072751581668854, | |
| "step": 1955, | |
| "valid_targets_mean": 5068.9, | |
| "valid_targets_min": 2516 | |
| }, | |
| { | |
| "epoch": 3.4507042253521125, | |
| "grad_norm": 0.47433435586185363, | |
| "learning_rate": 2.3977128902314445e-05, | |
| "loss": 0.1666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14499083161354065, | |
| "step": 1960, | |
| "valid_targets_mean": 4695.9, | |
| "valid_targets_min": 1551 | |
| }, | |
| { | |
| "epoch": 3.459507042253521, | |
| "grad_norm": 0.5000285614168981, | |
| "learning_rate": 2.38910413626056e-05, | |
| "loss": 0.176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1688610017299652, | |
| "step": 1965, | |
| "valid_targets_mean": 4453.2, | |
| "valid_targets_min": 1963 | |
| }, | |
| { | |
| "epoch": 3.4683098591549295, | |
| "grad_norm": 0.46809512868989983, | |
| "learning_rate": 2.3804878829291655e-05, | |
| "loss": 0.1643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21207034587860107, | |
| "step": 1970, | |
| "valid_targets_mean": 5491.1, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 3.477112676056338, | |
| "grad_norm": 0.4690556856911939, | |
| "learning_rate": 2.371864296301777e-05, | |
| "loss": 0.1718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18206427991390228, | |
| "step": 1975, | |
| "valid_targets_mean": 5006.7, | |
| "valid_targets_min": 645 | |
| }, | |
| { | |
| "epoch": 3.4859154929577465, | |
| "grad_norm": 0.47856952095052274, | |
| "learning_rate": 2.3632335425842473e-05, | |
| "loss": 0.1555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15519288182258606, | |
| "step": 1980, | |
| "valid_targets_mean": 4337.6, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 3.494718309859155, | |
| "grad_norm": 0.43736284191054026, | |
| "learning_rate": 2.354595788120565e-05, | |
| "loss": 0.1691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1416277140378952, | |
| "step": 1985, | |
| "valid_targets_mean": 5390.2, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 3.5035211267605635, | |
| "grad_norm": 0.49031757787746666, | |
| "learning_rate": 2.3459511993896447e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16551342606544495, | |
| "step": 1990, | |
| "valid_targets_mean": 5368.8, | |
| "valid_targets_min": 2479 | |
| }, | |
| { | |
| "epoch": 3.512323943661972, | |
| "grad_norm": 0.4939602763441871, | |
| "learning_rate": 2.337299943002123e-05, | |
| "loss": 0.1732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14685872197151184, | |
| "step": 1995, | |
| "valid_targets_mean": 4141.1, | |
| "valid_targets_min": 1575 | |
| }, | |
| { | |
| "epoch": 3.52112676056338, | |
| "grad_norm": 0.5277122318060502, | |
| "learning_rate": 2.3286421856971427e-05, | |
| "loss": 0.177, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21574288606643677, | |
| "step": 2000, | |
| "valid_targets_mean": 5158.5, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 3.529929577464789, | |
| "grad_norm": 0.46460805295016744, | |
| "learning_rate": 2.3199780943391422e-05, | |
| "loss": 0.1721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18154609203338623, | |
| "step": 2005, | |
| "valid_targets_mean": 5005.8, | |
| "valid_targets_min": 697 | |
| }, | |
| { | |
| "epoch": 3.538732394366197, | |
| "grad_norm": 0.42535757645511346, | |
| "learning_rate": 2.31130783591464e-05, | |
| "loss": 0.1671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17354996502399445, | |
| "step": 2010, | |
| "valid_targets_mean": 5819.6, | |
| "valid_targets_min": 2433 | |
| }, | |
| { | |
| "epoch": 3.5475352112676055, | |
| "grad_norm": 0.4328190467243813, | |
| "learning_rate": 2.3026315775290122e-05, | |
| "loss": 0.1558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17017832398414612, | |
| "step": 2015, | |
| "valid_targets_mean": 5322.4, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 3.556338028169014, | |
| "grad_norm": 0.5381452752854767, | |
| "learning_rate": 2.2939494864032773e-05, | |
| "loss": 0.1747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15388700366020203, | |
| "step": 2020, | |
| "valid_targets_mean": 3520.9, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 3.5651408450704225, | |
| "grad_norm": 0.42748778836114515, | |
| "learning_rate": 2.285261729870869e-05, | |
| "loss": 0.1671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18142807483673096, | |
| "step": 2025, | |
| "valid_targets_mean": 6041.7, | |
| "valid_targets_min": 1353 | |
| }, | |
| { | |
| "epoch": 3.573943661971831, | |
| "grad_norm": 0.48926886671424885, | |
| "learning_rate": 2.276568475374413e-05, | |
| "loss": 0.1649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17620721459388733, | |
| "step": 2030, | |
| "valid_targets_mean": 4101.4, | |
| "valid_targets_min": 1177 | |
| }, | |
| { | |
| "epoch": 3.5827464788732395, | |
| "grad_norm": 0.48230989552024833, | |
| "learning_rate": 2.2678698904624996e-05, | |
| "loss": 0.1582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1554032415151596, | |
| "step": 2035, | |
| "valid_targets_mean": 4000.1, | |
| "valid_targets_min": 335 | |
| }, | |
| { | |
| "epoch": 3.591549295774648, | |
| "grad_norm": 0.5121659264464615, | |
| "learning_rate": 2.259166142786454e-05, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2826143801212311, | |
| "step": 2040, | |
| "valid_targets_mean": 5489.6, | |
| "valid_targets_min": 1420 | |
| }, | |
| { | |
| "epoch": 3.6003521126760565, | |
| "grad_norm": 0.44187271452611343, | |
| "learning_rate": 2.250457400097106e-05, | |
| "loss": 0.1783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17597493529319763, | |
| "step": 2045, | |
| "valid_targets_mean": 5164.8, | |
| "valid_targets_min": 1833 | |
| }, | |
| { | |
| "epoch": 3.609154929577465, | |
| "grad_norm": 0.4645594339457232, | |
| "learning_rate": 2.2417438302415557e-05, | |
| "loss": 0.1734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14367757737636566, | |
| "step": 2050, | |
| "valid_targets_mean": 4563.6, | |
| "valid_targets_min": 549 | |
| }, | |
| { | |
| "epoch": 3.617957746478873, | |
| "grad_norm": 0.45618736627176953, | |
| "learning_rate": 2.2330256011599393e-05, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22604569792747498, | |
| "step": 2055, | |
| "valid_targets_mean": 6074.7, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 3.626760563380282, | |
| "grad_norm": 0.40551965765216164, | |
| "learning_rate": 2.224302880882193e-05, | |
| "loss": 0.1704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17311638593673706, | |
| "step": 2060, | |
| "valid_targets_mean": 5632.9, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 3.63556338028169, | |
| "grad_norm": 0.47382666106306875, | |
| "learning_rate": 2.215575837524812e-05, | |
| "loss": 0.1709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1573367565870285, | |
| "step": 2065, | |
| "valid_targets_mean": 4696.1, | |
| "valid_targets_min": 525 | |
| }, | |
| { | |
| "epoch": 3.6443661971830985, | |
| "grad_norm": 0.429699937011881, | |
| "learning_rate": 2.206844639287613e-05, | |
| "loss": 0.1785, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2351783812046051, | |
| "step": 2070, | |
| "valid_targets_mean": 6101.8, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 3.653169014084507, | |
| "grad_norm": 0.49961188508621157, | |
| "learning_rate": 2.1981094544504907e-05, | |
| "loss": 0.1733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15354667603969574, | |
| "step": 2075, | |
| "valid_targets_mean": 4639.8, | |
| "valid_targets_min": 2006 | |
| }, | |
| { | |
| "epoch": 3.6619718309859155, | |
| "grad_norm": 0.473540979495, | |
| "learning_rate": 2.1893704513701773e-05, | |
| "loss": 0.1803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17172960937023163, | |
| "step": 2080, | |
| "valid_targets_mean": 5550.1, | |
| "valid_targets_min": 2221 | |
| }, | |
| { | |
| "epoch": 3.670774647887324, | |
| "grad_norm": 0.4479740539200005, | |
| "learning_rate": 2.1806277984769922e-05, | |
| "loss": 0.162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13346652686595917, | |
| "step": 2085, | |
| "valid_targets_mean": 4858.2, | |
| "valid_targets_min": 595 | |
| }, | |
| { | |
| "epoch": 3.6795774647887325, | |
| "grad_norm": 0.45214197528939604, | |
| "learning_rate": 2.171881664271601e-05, | |
| "loss": 0.1523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14903153479099274, | |
| "step": 2090, | |
| "valid_targets_mean": 4562.4, | |
| "valid_targets_min": 1578 | |
| }, | |
| { | |
| "epoch": 3.688380281690141, | |
| "grad_norm": 0.5139545558420641, | |
| "learning_rate": 2.163132217321767e-05, | |
| "loss": 0.17, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14669877290725708, | |
| "step": 2095, | |
| "valid_targets_mean": 5107.3, | |
| "valid_targets_min": 2659 | |
| }, | |
| { | |
| "epoch": 3.697183098591549, | |
| "grad_norm": 0.4650212856068497, | |
| "learning_rate": 2.1543796262590986e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1713600605726242, | |
| "step": 2100, | |
| "valid_targets_mean": 4587.1, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 3.705985915492958, | |
| "grad_norm": 0.4620240101816985, | |
| "learning_rate": 2.145624059775804e-05, | |
| "loss": 0.1958, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14345994591712952, | |
| "step": 2105, | |
| "valid_targets_mean": 5070.3, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 3.714788732394366, | |
| "grad_norm": 0.4728534255758024, | |
| "learning_rate": 2.1368656866214385e-05, | |
| "loss": 0.1684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1778983622789383, | |
| "step": 2110, | |
| "valid_targets_mean": 4969.3, | |
| "valid_targets_min": 1986 | |
| }, | |
| { | |
| "epoch": 3.7235915492957745, | |
| "grad_norm": 0.41563991802211614, | |
| "learning_rate": 2.128104675599649e-05, | |
| "loss": 0.1627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14127704501152039, | |
| "step": 2115, | |
| "valid_targets_mean": 5493.1, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 3.732394366197183, | |
| "grad_norm": 0.45645094498490163, | |
| "learning_rate": 2.119341195564925e-05, | |
| "loss": 0.1641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16094809770584106, | |
| "step": 2120, | |
| "valid_targets_mean": 5238.9, | |
| "valid_targets_min": 2170 | |
| }, | |
| { | |
| "epoch": 3.7411971830985915, | |
| "grad_norm": 0.40647278456868974, | |
| "learning_rate": 2.110575415419341e-05, | |
| "loss": 0.1518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13593044877052307, | |
| "step": 2125, | |
| "valid_targets_mean": 5087.8, | |
| "valid_targets_min": 1349 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.5062700068283805, | |
| "learning_rate": 2.1018075041093047e-05, | |
| "loss": 0.1624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1377507746219635, | |
| "step": 2130, | |
| "valid_targets_mean": 3662.9, | |
| "valid_targets_min": 510 | |
| }, | |
| { | |
| "epoch": 3.7588028169014085, | |
| "grad_norm": 0.4290076266379455, | |
| "learning_rate": 2.0930376306222963e-05, | |
| "loss": 0.1729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17858129739761353, | |
| "step": 2135, | |
| "valid_targets_mean": 6133.2, | |
| "valid_targets_min": 2615 | |
| }, | |
| { | |
| "epoch": 3.767605633802817, | |
| "grad_norm": 0.4511287648498655, | |
| "learning_rate": 2.084265963983614e-05, | |
| "loss": 0.1626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14971250295639038, | |
| "step": 2140, | |
| "valid_targets_mean": 4966.8, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 3.7764084507042255, | |
| "grad_norm": 0.4893403835139981, | |
| "learning_rate": 2.075492673253118e-05, | |
| "loss": 0.1802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1986551582813263, | |
| "step": 2145, | |
| "valid_targets_mean": 4629.8, | |
| "valid_targets_min": 496 | |
| }, | |
| { | |
| "epoch": 3.785211267605634, | |
| "grad_norm": 0.43367176948599345, | |
| "learning_rate": 2.066717927521968e-05, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.163667231798172, | |
| "step": 2150, | |
| "valid_targets_mean": 6039.9, | |
| "valid_targets_min": 2352 | |
| }, | |
| { | |
| "epoch": 3.794014084507042, | |
| "grad_norm": 0.46607277870197716, | |
| "learning_rate": 2.057941895909368e-05, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16401851177215576, | |
| "step": 2155, | |
| "valid_targets_mean": 5712.9, | |
| "valid_targets_min": 2401 | |
| }, | |
| { | |
| "epoch": 3.802816901408451, | |
| "grad_norm": 0.4485335209091962, | |
| "learning_rate": 2.049164747559305e-05, | |
| "loss": 0.1755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1440940946340561, | |
| "step": 2160, | |
| "valid_targets_mean": 4684.3, | |
| "valid_targets_min": 2090 | |
| }, | |
| { | |
| "epoch": 3.811619718309859, | |
| "grad_norm": 0.5305757342334273, | |
| "learning_rate": 2.0403866516372884e-05, | |
| "loss": 0.1645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15400753915309906, | |
| "step": 2165, | |
| "valid_targets_mean": 5203.9, | |
| "valid_targets_min": 2389 | |
| }, | |
| { | |
| "epoch": 3.8204225352112675, | |
| "grad_norm": 0.48897073791367107, | |
| "learning_rate": 2.0316077773270923e-05, | |
| "loss": 0.1798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17022517323493958, | |
| "step": 2170, | |
| "valid_targets_mean": 4280.6, | |
| "valid_targets_min": 545 | |
| }, | |
| { | |
| "epoch": 3.829225352112676, | |
| "grad_norm": 0.47851272353348717, | |
| "learning_rate": 2.0228282938274918e-05, | |
| "loss": 0.1816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17419424653053284, | |
| "step": 2175, | |
| "valid_targets_mean": 4553.2, | |
| "valid_targets_min": 589 | |
| }, | |
| { | |
| "epoch": 3.8380281690140845, | |
| "grad_norm": 0.46266521100757135, | |
| "learning_rate": 2.0140483703490036e-05, | |
| "loss": 0.1646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15270522236824036, | |
| "step": 2180, | |
| "valid_targets_mean": 4137.0, | |
| "valid_targets_min": 625 | |
| }, | |
| { | |
| "epoch": 3.846830985915493, | |
| "grad_norm": 0.4955322566566694, | |
| "learning_rate": 2.005268176110623e-05, | |
| "loss": 0.1724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16131135821342468, | |
| "step": 2185, | |
| "valid_targets_mean": 4303.1, | |
| "valid_targets_min": 566 | |
| }, | |
| { | |
| "epoch": 3.8556338028169015, | |
| "grad_norm": 0.5273810373359673, | |
| "learning_rate": 1.9964878803365653e-05, | |
| "loss": 0.1635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1715337485074997, | |
| "step": 2190, | |
| "valid_targets_mean": 4336.6, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 3.86443661971831, | |
| "grad_norm": 0.5053975243771894, | |
| "learning_rate": 1.987707652253003e-05, | |
| "loss": 0.2126, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22063566744327545, | |
| "step": 2195, | |
| "valid_targets_mean": 4326.4, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 3.873239436619718, | |
| "grad_norm": 0.56726943450576, | |
| "learning_rate": 1.9789276610848013e-05, | |
| "loss": 0.1634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1559564471244812, | |
| "step": 2200, | |
| "valid_targets_mean": 3436.6, | |
| "valid_targets_min": 825 | |
| }, | |
| { | |
| "epoch": 3.882042253521127, | |
| "grad_norm": 0.4329382561249813, | |
| "learning_rate": 1.9701480760522636e-05, | |
| "loss": 0.1673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19367492198944092, | |
| "step": 2205, | |
| "valid_targets_mean": 6359.5, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 3.890845070422535, | |
| "grad_norm": 0.4530571675261026, | |
| "learning_rate": 1.9613690663678623e-05, | |
| "loss": 0.1916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20116250216960907, | |
| "step": 2210, | |
| "valid_targets_mean": 5060.1, | |
| "valid_targets_min": 1681 | |
| }, | |
| { | |
| "epoch": 3.8996478873239435, | |
| "grad_norm": 0.4893131604786605, | |
| "learning_rate": 1.9525908012329816e-05, | |
| "loss": 0.1896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1856159120798111, | |
| "step": 2215, | |
| "valid_targets_mean": 3993.2, | |
| "valid_targets_min": 282 | |
| }, | |
| { | |
| "epoch": 3.908450704225352, | |
| "grad_norm": 0.4454609430580903, | |
| "learning_rate": 1.9438134498346555e-05, | |
| "loss": 0.1566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13629961013793945, | |
| "step": 2220, | |
| "valid_targets_mean": 4390.7, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 3.9172535211267605, | |
| "grad_norm": 0.4504733417824674, | |
| "learning_rate": 1.9350371813423077e-05, | |
| "loss": 0.1564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13800424337387085, | |
| "step": 2225, | |
| "valid_targets_mean": 4872.4, | |
| "valid_targets_min": 1590 | |
| }, | |
| { | |
| "epoch": 3.926056338028169, | |
| "grad_norm": 0.45083042218560576, | |
| "learning_rate": 1.926262164904492e-05, | |
| "loss": 0.1652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17800462245941162, | |
| "step": 2230, | |
| "valid_targets_mean": 5156.1, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 3.9348591549295775, | |
| "grad_norm": 0.5166566097850995, | |
| "learning_rate": 1.9174885696456277e-05, | |
| "loss": 0.1885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1711212396621704, | |
| "step": 2235, | |
| "valid_targets_mean": 4064.9, | |
| "valid_targets_min": 470 | |
| }, | |
| { | |
| "epoch": 3.943661971830986, | |
| "grad_norm": 0.43487393481243397, | |
| "learning_rate": 1.908716564662746e-05, | |
| "loss": 0.161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.148806631565094, | |
| "step": 2240, | |
| "valid_targets_mean": 5520.5, | |
| "valid_targets_min": 646 | |
| }, | |
| { | |
| "epoch": 3.9524647887323945, | |
| "grad_norm": 0.38981858117480517, | |
| "learning_rate": 1.899946319022225e-05, | |
| "loss": 0.1657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13189315795898438, | |
| "step": 2245, | |
| "valid_targets_mean": 5388.0, | |
| "valid_targets_min": 1711 | |
| }, | |
| { | |
| "epoch": 3.961267605633803, | |
| "grad_norm": 0.3762832869988845, | |
| "learning_rate": 1.8911780017565393e-05, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1705811321735382, | |
| "step": 2250, | |
| "valid_targets_mean": 6868.6, | |
| "valid_targets_min": 1749 | |
| }, | |
| { | |
| "epoch": 3.970070422535211, | |
| "grad_norm": 0.5897881265453858, | |
| "learning_rate": 1.882411781860991e-05, | |
| "loss": 0.1678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1345662921667099, | |
| "step": 2255, | |
| "valid_targets_mean": 4922.1, | |
| "valid_targets_min": 590 | |
| }, | |
| { | |
| "epoch": 3.97887323943662, | |
| "grad_norm": 0.4036099797608837, | |
| "learning_rate": 1.873647828290464e-05, | |
| "loss": 0.1672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12994800508022308, | |
| "step": 2260, | |
| "valid_targets_mean": 5135.7, | |
| "valid_targets_min": 449 | |
| }, | |
| { | |
| "epoch": 3.987676056338028, | |
| "grad_norm": 0.5948929520379044, | |
| "learning_rate": 1.8648863099561583e-05, | |
| "loss": 0.178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21736794710159302, | |
| "step": 2265, | |
| "valid_targets_mean": 4488.5, | |
| "valid_targets_min": 658 | |
| }, | |
| { | |
| "epoch": 3.9964788732394365, | |
| "grad_norm": 0.42904447198377027, | |
| "learning_rate": 1.8561273957223424e-05, | |
| "loss": 0.1793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2254689335823059, | |
| "step": 2270, | |
| "valid_targets_mean": 5716.9, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 4.005281690140845, | |
| "grad_norm": 0.41578296186358826, | |
| "learning_rate": 1.8473712544030914e-05, | |
| "loss": 0.1486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1328504979610443, | |
| "step": 2275, | |
| "valid_targets_mean": 5412.3, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 4.014084507042254, | |
| "grad_norm": 0.48864818047340725, | |
| "learning_rate": 1.8386180547590397e-05, | |
| "loss": 0.1777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15038450062274933, | |
| "step": 2280, | |
| "valid_targets_mean": 4812.4, | |
| "valid_targets_min": 939 | |
| }, | |
| { | |
| "epoch": 4.022887323943662, | |
| "grad_norm": 0.47411862294829393, | |
| "learning_rate": 1.8298679654941237e-05, | |
| "loss": 0.1754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1898292601108551, | |
| "step": 2285, | |
| "valid_targets_mean": 5674.5, | |
| "valid_targets_min": 1110 | |
| }, | |
| { | |
| "epoch": 4.03169014084507, | |
| "grad_norm": 0.5459436523058485, | |
| "learning_rate": 1.8211211552523328e-05, | |
| "loss": 0.1415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1261502206325531, | |
| "step": 2290, | |
| "valid_targets_mean": 3948.4, | |
| "valid_targets_min": 576 | |
| }, | |
| { | |
| "epoch": 4.040492957746479, | |
| "grad_norm": 0.5228839906083369, | |
| "learning_rate": 1.8123777926144596e-05, | |
| "loss": 0.1566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19840890169143677, | |
| "step": 2295, | |
| "valid_targets_mean": 4346.4, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 4.049295774647887, | |
| "grad_norm": 0.5490633591139935, | |
| "learning_rate": 1.8036380460948483e-05, | |
| "loss": 0.1652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16841408610343933, | |
| "step": 2300, | |
| "valid_targets_mean": 3859.4, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 4.058098591549296, | |
| "grad_norm": 0.48930540432688346, | |
| "learning_rate": 1.794902084138148e-05, | |
| "loss": 0.1348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15049806237220764, | |
| "step": 2305, | |
| "valid_targets_mean": 5094.9, | |
| "valid_targets_min": 740 | |
| }, | |
| { | |
| "epoch": 4.066901408450704, | |
| "grad_norm": 0.5021620240690181, | |
| "learning_rate": 1.786170075116067e-05, | |
| "loss": 0.1616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15094603598117828, | |
| "step": 2310, | |
| "valid_targets_mean": 4120.2, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 4.075704225352113, | |
| "grad_norm": 0.4266741671677392, | |
| "learning_rate": 1.777442187324128e-05, | |
| "loss": 0.1426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12231635302305222, | |
| "step": 2315, | |
| "valid_targets_mean": 5445.0, | |
| "valid_targets_min": 1853 | |
| }, | |
| { | |
| "epoch": 4.084507042253521, | |
| "grad_norm": 0.5527335218490069, | |
| "learning_rate": 1.768718588978422e-05, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14894762635231018, | |
| "step": 2320, | |
| "valid_targets_mean": 4726.0, | |
| "valid_targets_min": 781 | |
| }, | |
| { | |
| "epoch": 4.09330985915493, | |
| "grad_norm": 0.5130525303159239, | |
| "learning_rate": 1.7599994482123687e-05, | |
| "loss": 0.1702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16520407795906067, | |
| "step": 2325, | |
| "valid_targets_mean": 4253.9, | |
| "valid_targets_min": 654 | |
| }, | |
| { | |
| "epoch": 4.102112676056338, | |
| "grad_norm": 0.5429303415267194, | |
| "learning_rate": 1.7512849330734734e-05, | |
| "loss": 0.1516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14985045790672302, | |
| "step": 2330, | |
| "valid_targets_mean": 3859.4, | |
| "valid_targets_min": 2288 | |
| }, | |
| { | |
| "epoch": 4.110915492957746, | |
| "grad_norm": 0.5538929822527909, | |
| "learning_rate": 1.7425752115200933e-05, | |
| "loss": 0.1459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14181582629680634, | |
| "step": 2335, | |
| "valid_targets_mean": 4226.1, | |
| "valid_targets_min": 471 | |
| }, | |
| { | |
| "epoch": 4.119718309859155, | |
| "grad_norm": 0.5849617209180507, | |
| "learning_rate": 1.7338704514181937e-05, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17426012456417084, | |
| "step": 2340, | |
| "valid_targets_mean": 5178.8, | |
| "valid_targets_min": 669 | |
| }, | |
| { | |
| "epoch": 4.128521126760563, | |
| "grad_norm": 0.4547492782655953, | |
| "learning_rate": 1.7251708205381175e-05, | |
| "loss": 0.152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15097564458847046, | |
| "step": 2345, | |
| "valid_targets_mean": 4935.0, | |
| "valid_targets_min": 504 | |
| }, | |
| { | |
| "epoch": 4.137323943661972, | |
| "grad_norm": 0.49784945013373944, | |
| "learning_rate": 1.7164764865513485e-05, | |
| "loss": 0.1489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1430858075618744, | |
| "step": 2350, | |
| "valid_targets_mean": 5205.2, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 4.14612676056338, | |
| "grad_norm": 0.4775427486722068, | |
| "learning_rate": 1.7077876170272825e-05, | |
| "loss": 0.1524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14093689620494843, | |
| "step": 2355, | |
| "valid_targets_mean": 4984.8, | |
| "valid_targets_min": 2409 | |
| }, | |
| { | |
| "epoch": 4.154929577464789, | |
| "grad_norm": 0.5126239520181953, | |
| "learning_rate": 1.699104379429998e-05, | |
| "loss": 0.1532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13469412922859192, | |
| "step": 2360, | |
| "valid_targets_mean": 4717.0, | |
| "valid_targets_min": 1966 | |
| }, | |
| { | |
| "epoch": 4.163732394366197, | |
| "grad_norm": 0.4686865830793551, | |
| "learning_rate": 1.6904269411150242e-05, | |
| "loss": 0.1597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13589268922805786, | |
| "step": 2365, | |
| "valid_targets_mean": 4818.1, | |
| "valid_targets_min": 410 | |
| }, | |
| { | |
| "epoch": 4.172535211267606, | |
| "grad_norm": 0.5057173883276658, | |
| "learning_rate": 1.6817554693261194e-05, | |
| "loss": 0.1564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16583459079265594, | |
| "step": 2370, | |
| "valid_targets_mean": 4495.5, | |
| "valid_targets_min": 765 | |
| }, | |
| { | |
| "epoch": 4.181338028169014, | |
| "grad_norm": 0.48945841904985693, | |
| "learning_rate": 1.673090131192047e-05, | |
| "loss": 0.1528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13886778056621552, | |
| "step": 2375, | |
| "valid_targets_mean": 4211.4, | |
| "valid_targets_min": 580 | |
| }, | |
| { | |
| "epoch": 4.190140845070423, | |
| "grad_norm": 0.4591880639976351, | |
| "learning_rate": 1.6644310937233553e-05, | |
| "loss": 0.1505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14640088379383087, | |
| "step": 2380, | |
| "valid_targets_mean": 5389.2, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 4.198943661971831, | |
| "grad_norm": 0.4515650807724913, | |
| "learning_rate": 1.655778523809154e-05, | |
| "loss": 0.1654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16159164905548096, | |
| "step": 2385, | |
| "valid_targets_mean": 5671.8, | |
| "valid_targets_min": 451 | |
| }, | |
| { | |
| "epoch": 4.207746478873239, | |
| "grad_norm": 0.4963841823081782, | |
| "learning_rate": 1.6471325882139045e-05, | |
| "loss": 0.147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1411166489124298, | |
| "step": 2390, | |
| "valid_targets_mean": 4140.2, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 4.216549295774648, | |
| "grad_norm": 0.5033403304061811, | |
| "learning_rate": 1.6384934535742006e-05, | |
| "loss": 0.1446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16997385025024414, | |
| "step": 2395, | |
| "valid_targets_mean": 4862.3, | |
| "valid_targets_min": 1203 | |
| }, | |
| { | |
| "epoch": 4.225352112676056, | |
| "grad_norm": 0.5736693135298484, | |
| "learning_rate": 1.629861286395557e-05, | |
| "loss": 0.1435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12839220464229584, | |
| "step": 2400, | |
| "valid_targets_mean": 3630.4, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 4.234154929577465, | |
| "grad_norm": 0.4934782143957263, | |
| "learning_rate": 1.6212362530492053e-05, | |
| "loss": 0.1609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19510763883590698, | |
| "step": 2405, | |
| "valid_targets_mean": 5796.1, | |
| "valid_targets_min": 1077 | |
| }, | |
| { | |
| "epoch": 4.242957746478873, | |
| "grad_norm": 0.5465490477105494, | |
| "learning_rate": 1.612618519768882e-05, | |
| "loss": 0.14, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14107325673103333, | |
| "step": 2410, | |
| "valid_targets_mean": 4313.2, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 4.251760563380282, | |
| "grad_norm": 0.4922985170564093, | |
| "learning_rate": 1.604008252647626e-05, | |
| "loss": 0.1653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19075796008110046, | |
| "step": 2415, | |
| "valid_targets_mean": 4854.5, | |
| "valid_targets_min": 418 | |
| }, | |
| { | |
| "epoch": 4.26056338028169, | |
| "grad_norm": 0.45093631261046246, | |
| "learning_rate": 1.5954056176345778e-05, | |
| "loss": 0.1495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13266444206237793, | |
| "step": 2420, | |
| "valid_targets_mean": 4844.9, | |
| "valid_targets_min": 674 | |
| }, | |
| { | |
| "epoch": 4.269366197183099, | |
| "grad_norm": 0.4751368262228937, | |
| "learning_rate": 1.5868107805317836e-05, | |
| "loss": 0.1643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12895408272743225, | |
| "step": 2425, | |
| "valid_targets_mean": 4252.9, | |
| "valid_targets_min": 2802 | |
| }, | |
| { | |
| "epoch": 4.278169014084507, | |
| "grad_norm": 0.6132431488100101, | |
| "learning_rate": 1.578223906990994e-05, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16754725575447083, | |
| "step": 2430, | |
| "valid_targets_mean": 4203.2, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 4.286971830985916, | |
| "grad_norm": 0.5259162908585734, | |
| "learning_rate": 1.569645162510477e-05, | |
| "loss": 0.1536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1327393800020218, | |
| "step": 2435, | |
| "valid_targets_mean": 3720.4, | |
| "valid_targets_min": 675 | |
| }, | |
| { | |
| "epoch": 4.295774647887324, | |
| "grad_norm": 0.4965066825641266, | |
| "learning_rate": 1.5610747124318244e-05, | |
| "loss": 0.1529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15128956735134125, | |
| "step": 2440, | |
| "valid_targets_mean": 4442.8, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 4.304577464788732, | |
| "grad_norm": 0.5353607560053651, | |
| "learning_rate": 1.552512721936769e-05, | |
| "loss": 0.1506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12414208799600601, | |
| "step": 2445, | |
| "valid_targets_mean": 3796.6, | |
| "valid_targets_min": 431 | |
| }, | |
| { | |
| "epoch": 4.313380281690141, | |
| "grad_norm": 0.5268935187002102, | |
| "learning_rate": 1.5439593560439957e-05, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2764013409614563, | |
| "step": 2450, | |
| "valid_targets_mean": 5156.4, | |
| "valid_targets_min": 903 | |
| }, | |
| { | |
| "epoch": 4.322183098591549, | |
| "grad_norm": 0.4455677994969596, | |
| "learning_rate": 1.5354147796059664e-05, | |
| "loss": 0.1556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1880626231431961, | |
| "step": 2455, | |
| "valid_targets_mean": 5201.8, | |
| "valid_targets_min": 521 | |
| }, | |
| { | |
| "epoch": 4.330985915492958, | |
| "grad_norm": 0.5465874365692489, | |
| "learning_rate": 1.526879157305739e-05, | |
| "loss": 0.1609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17341095209121704, | |
| "step": 2460, | |
| "valid_targets_mean": 5159.8, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 4.339788732394366, | |
| "grad_norm": 0.46166538139866287, | |
| "learning_rate": 1.5183526536537935e-05, | |
| "loss": 0.1588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14023971557617188, | |
| "step": 2465, | |
| "valid_targets_mean": 4662.8, | |
| "valid_targets_min": 502 | |
| }, | |
| { | |
| "epoch": 4.348591549295775, | |
| "grad_norm": 0.5156624034417078, | |
| "learning_rate": 1.5098354329848658e-05, | |
| "loss": 0.1529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12792891263961792, | |
| "step": 2470, | |
| "valid_targets_mean": 3901.2, | |
| "valid_targets_min": 268 | |
| }, | |
| { | |
| "epoch": 4.357394366197183, | |
| "grad_norm": 0.45085280864666116, | |
| "learning_rate": 1.5013276594547754e-05, | |
| "loss": 0.146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15307819843292236, | |
| "step": 2475, | |
| "valid_targets_mean": 5491.9, | |
| "valid_targets_min": 1441 | |
| }, | |
| { | |
| "epoch": 4.366197183098592, | |
| "grad_norm": 0.4519697723054663, | |
| "learning_rate": 1.4928294970372623e-05, | |
| "loss": 0.1505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16891774535179138, | |
| "step": 2480, | |
| "valid_targets_mean": 5682.1, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 0.4591768121920202, | |
| "learning_rate": 1.4843411095208288e-05, | |
| "loss": 0.1441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1565144956111908, | |
| "step": 2485, | |
| "valid_targets_mean": 5503.3, | |
| "valid_targets_min": 2214 | |
| }, | |
| { | |
| "epoch": 4.383802816901408, | |
| "grad_norm": 0.5483904985424385, | |
| "learning_rate": 1.4758626605055816e-05, | |
| "loss": 0.1486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13672903180122375, | |
| "step": 2490, | |
| "valid_targets_mean": 3591.8, | |
| "valid_targets_min": 539 | |
| }, | |
| { | |
| "epoch": 4.392605633802817, | |
| "grad_norm": 0.5015527466652048, | |
| "learning_rate": 1.4673943134000791e-05, | |
| "loss": 0.1737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2063775509595871, | |
| "step": 2495, | |
| "valid_targets_mean": 5219.6, | |
| "valid_targets_min": 565 | |
| }, | |
| { | |
| "epoch": 4.401408450704225, | |
| "grad_norm": 0.458556043531165, | |
| "learning_rate": 1.4589362314181799e-05, | |
| "loss": 0.1452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15635624527931213, | |
| "step": 2500, | |
| "valid_targets_mean": 6004.8, | |
| "valid_targets_min": 2073 | |
| }, | |
| { | |
| "epoch": 4.410211267605634, | |
| "grad_norm": 0.5155821581899631, | |
| "learning_rate": 1.4504885775758992e-05, | |
| "loss": 0.1549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14403770864009857, | |
| "step": 2505, | |
| "valid_targets_mean": 5039.8, | |
| "valid_targets_min": 917 | |
| }, | |
| { | |
| "epoch": 4.419014084507042, | |
| "grad_norm": 0.45302165156167606, | |
| "learning_rate": 1.4420515146882692e-05, | |
| "loss": 0.1477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1421784907579422, | |
| "step": 2510, | |
| "valid_targets_mean": 5575.2, | |
| "valid_targets_min": 461 | |
| }, | |
| { | |
| "epoch": 4.427816901408451, | |
| "grad_norm": 0.48750209611651835, | |
| "learning_rate": 1.433625205366195e-05, | |
| "loss": 0.1743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1718706637620926, | |
| "step": 2515, | |
| "valid_targets_mean": 5592.9, | |
| "valid_targets_min": 1329 | |
| }, | |
| { | |
| "epoch": 4.436619718309859, | |
| "grad_norm": 0.5452515378715184, | |
| "learning_rate": 1.4252098120133243e-05, | |
| "loss": 0.175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26548853516578674, | |
| "step": 2520, | |
| "valid_targets_mean": 4830.3, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 4.445422535211268, | |
| "grad_norm": 0.5023114646186418, | |
| "learning_rate": 1.416805496822919e-05, | |
| "loss": 0.1435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15291684865951538, | |
| "step": 2525, | |
| "valid_targets_mean": 4450.2, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 4.454225352112676, | |
| "grad_norm": 0.45557589087532935, | |
| "learning_rate": 1.4084124217747244e-05, | |
| "loss": 0.1472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14493940770626068, | |
| "step": 2530, | |
| "valid_targets_mean": 4898.9, | |
| "valid_targets_min": 914 | |
| }, | |
| { | |
| "epoch": 4.463028169014084, | |
| "grad_norm": 0.48524939802472594, | |
| "learning_rate": 1.4000307486318527e-05, | |
| "loss": 0.171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18977460265159607, | |
| "step": 2535, | |
| "valid_targets_mean": 5147.7, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 4.471830985915493, | |
| "grad_norm": 0.5284845338042901, | |
| "learning_rate": 1.3916606389376614e-05, | |
| "loss": 0.1653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17195051908493042, | |
| "step": 2540, | |
| "valid_targets_mean": 4249.1, | |
| "valid_targets_min": 529 | |
| }, | |
| { | |
| "epoch": 4.480633802816901, | |
| "grad_norm": 0.4697891428297426, | |
| "learning_rate": 1.3833022540126408e-05, | |
| "loss": 0.1559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15351040661334991, | |
| "step": 2545, | |
| "valid_targets_mean": 5102.1, | |
| "valid_targets_min": 821 | |
| }, | |
| { | |
| "epoch": 4.48943661971831, | |
| "grad_norm": 0.4899548179790747, | |
| "learning_rate": 1.3749557549513042e-05, | |
| "loss": 0.1605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15383687615394592, | |
| "step": 2550, | |
| "valid_targets_mean": 4562.4, | |
| "valid_targets_min": 1387 | |
| }, | |
| { | |
| "epoch": 4.498239436619718, | |
| "grad_norm": 0.4683650865400761, | |
| "learning_rate": 1.3666213026190857e-05, | |
| "loss": 0.1562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15272648632526398, | |
| "step": 2555, | |
| "valid_targets_mean": 5149.3, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 4.507042253521127, | |
| "grad_norm": 0.378443974795295, | |
| "learning_rate": 1.3582990576492377e-05, | |
| "loss": 0.1569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12310489267110825, | |
| "step": 2560, | |
| "valid_targets_mean": 5920.5, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 4.515845070422535, | |
| "grad_norm": 0.53727659812995, | |
| "learning_rate": 1.3499891804397333e-05, | |
| "loss": 0.1587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19111016392707825, | |
| "step": 2565, | |
| "valid_targets_mean": 5712.9, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 4.524647887323944, | |
| "grad_norm": 0.49695289085676886, | |
| "learning_rate": 1.3416918311501783e-05, | |
| "loss": 0.1452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13759997487068176, | |
| "step": 2570, | |
| "valid_targets_mean": 3903.4, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 4.533450704225352, | |
| "grad_norm": 0.4189875845000706, | |
| "learning_rate": 1.3334071696987238e-05, | |
| "loss": 0.152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14398400485515594, | |
| "step": 2575, | |
| "valid_targets_mean": 5743.6, | |
| "valid_targets_min": 749 | |
| }, | |
| { | |
| "epoch": 4.542253521126761, | |
| "grad_norm": 0.5535481042048098, | |
| "learning_rate": 1.325135355758981e-05, | |
| "loss": 0.1465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15481239557266235, | |
| "step": 2580, | |
| "valid_targets_mean": 3776.8, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 4.551056338028169, | |
| "grad_norm": 0.48238236790826733, | |
| "learning_rate": 1.3168765487569469e-05, | |
| "loss": 0.1336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17277759313583374, | |
| "step": 2585, | |
| "valid_targets_mean": 5082.6, | |
| "valid_targets_min": 418 | |
| }, | |
| { | |
| "epoch": 4.559859154929578, | |
| "grad_norm": 0.6223130745914908, | |
| "learning_rate": 1.3086309078679303e-05, | |
| "loss": 0.1633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21520434319972992, | |
| "step": 2590, | |
| "valid_targets_mean": 3778.6, | |
| "valid_targets_min": 620 | |
| }, | |
| { | |
| "epoch": 4.568661971830986, | |
| "grad_norm": 0.4758346117277764, | |
| "learning_rate": 1.3003985920134837e-05, | |
| "loss": 0.1555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.175389364361763, | |
| "step": 2595, | |
| "valid_targets_mean": 5497.2, | |
| "valid_targets_min": 2589 | |
| }, | |
| { | |
| "epoch": 4.577464788732394, | |
| "grad_norm": 0.48525961572251436, | |
| "learning_rate": 1.2921797598583422e-05, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17201641201972961, | |
| "step": 2600, | |
| "valid_targets_mean": 4567.9, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 4.586267605633803, | |
| "grad_norm": 0.4960186056684276, | |
| "learning_rate": 1.2839745698073642e-05, | |
| "loss": 0.1409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13019582629203796, | |
| "step": 2605, | |
| "valid_targets_mean": 4069.4, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 4.595070422535211, | |
| "grad_norm": 0.4353879561643312, | |
| "learning_rate": 1.2757831800024767e-05, | |
| "loss": 0.1411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13025638461112976, | |
| "step": 2610, | |
| "valid_targets_mean": 5177.3, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 4.60387323943662, | |
| "grad_norm": 0.5263359449099279, | |
| "learning_rate": 1.2676057483196289e-05, | |
| "loss": 0.1578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15034329891204834, | |
| "step": 2615, | |
| "valid_targets_mean": 4330.4, | |
| "valid_targets_min": 583 | |
| }, | |
| { | |
| "epoch": 4.612676056338028, | |
| "grad_norm": 0.486521404762043, | |
| "learning_rate": 1.2594424323657521e-05, | |
| "loss": 0.1595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14321503043174744, | |
| "step": 2620, | |
| "valid_targets_mean": 4689.3, | |
| "valid_targets_min": 320 | |
| }, | |
| { | |
| "epoch": 4.621478873239437, | |
| "grad_norm": 0.4657525593132399, | |
| "learning_rate": 1.2512933894757172e-05, | |
| "loss": 0.1572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15237370133399963, | |
| "step": 2625, | |
| "valid_targets_mean": 5156.9, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 4.630281690140845, | |
| "grad_norm": 0.5265272970510325, | |
| "learning_rate": 1.2431587767093052e-05, | |
| "loss": 0.1444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1557048112154007, | |
| "step": 2630, | |
| "valid_targets_mean": 5164.2, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 4.639084507042254, | |
| "grad_norm": 0.43577807704272525, | |
| "learning_rate": 1.2350387508481799e-05, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1315222978591919, | |
| "step": 2635, | |
| "valid_targets_mean": 4919.8, | |
| "valid_targets_min": 860 | |
| }, | |
| { | |
| "epoch": 4.647887323943662, | |
| "grad_norm": 0.5467997300943339, | |
| "learning_rate": 1.2269334683928641e-05, | |
| "loss": 0.1668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22024103999137878, | |
| "step": 2640, | |
| "valid_targets_mean": 4163.1, | |
| "valid_targets_min": 1935 | |
| }, | |
| { | |
| "epoch": 4.65669014084507, | |
| "grad_norm": 0.4759377192904221, | |
| "learning_rate": 1.2188430855597286e-05, | |
| "loss": 0.1576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1618720293045044, | |
| "step": 2645, | |
| "valid_targets_mean": 4898.1, | |
| "valid_targets_min": 1812 | |
| }, | |
| { | |
| "epoch": 4.665492957746479, | |
| "grad_norm": 0.49469553348768186, | |
| "learning_rate": 1.210767758277974e-05, | |
| "loss": 0.1516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14193172752857208, | |
| "step": 2650, | |
| "valid_targets_mean": 4991.8, | |
| "valid_targets_min": 1604 | |
| }, | |
| { | |
| "epoch": 4.674295774647887, | |
| "grad_norm": 0.4701094824431234, | |
| "learning_rate": 1.2027076421866313e-05, | |
| "loss": 0.1727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1618981659412384, | |
| "step": 2655, | |
| "valid_targets_mean": 5334.9, | |
| "valid_targets_min": 2322 | |
| }, | |
| { | |
| "epoch": 4.683098591549296, | |
| "grad_norm": 0.5527531116299745, | |
| "learning_rate": 1.1946628926315587e-05, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16847437620162964, | |
| "step": 2660, | |
| "valid_targets_mean": 4793.4, | |
| "valid_targets_min": 773 | |
| }, | |
| { | |
| "epoch": 4.691901408450704, | |
| "grad_norm": 0.601509618093992, | |
| "learning_rate": 1.1866336646624512e-05, | |
| "loss": 0.1669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18065690994262695, | |
| "step": 2665, | |
| "valid_targets_mean": 3137.6, | |
| "valid_targets_min": 356 | |
| }, | |
| { | |
| "epoch": 4.700704225352113, | |
| "grad_norm": 0.46001798261763166, | |
| "learning_rate": 1.1786201130298486e-05, | |
| "loss": 0.1549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14724907279014587, | |
| "step": 2670, | |
| "valid_targets_mean": 5081.4, | |
| "valid_targets_min": 1187 | |
| }, | |
| { | |
| "epoch": 4.709507042253521, | |
| "grad_norm": 0.510588818292158, | |
| "learning_rate": 1.1706223921821536e-05, | |
| "loss": 0.1536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1479375958442688, | |
| "step": 2675, | |
| "valid_targets_mean": 5640.8, | |
| "valid_targets_min": 838 | |
| }, | |
| { | |
| "epoch": 4.71830985915493, | |
| "grad_norm": 0.4893331792343721, | |
| "learning_rate": 1.162640656262656e-05, | |
| "loss": 0.1655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15622639656066895, | |
| "step": 2680, | |
| "valid_targets_mean": 5076.8, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 4.727112676056338, | |
| "grad_norm": 0.4809452886456036, | |
| "learning_rate": 1.1546750591065643e-05, | |
| "loss": 0.1622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1484544575214386, | |
| "step": 2685, | |
| "valid_targets_mean": 4294.7, | |
| "valid_targets_min": 627 | |
| }, | |
| { | |
| "epoch": 4.735915492957746, | |
| "grad_norm": 0.45921490045295565, | |
| "learning_rate": 1.1467257542380355e-05, | |
| "loss": 0.1622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15124857425689697, | |
| "step": 2690, | |
| "valid_targets_mean": 5161.9, | |
| "valid_targets_min": 2116 | |
| }, | |
| { | |
| "epoch": 4.744718309859155, | |
| "grad_norm": 0.5230145474661075, | |
| "learning_rate": 1.1387928948672186e-05, | |
| "loss": 0.16, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16103720664978027, | |
| "step": 2695, | |
| "valid_targets_mean": 4263.9, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 4.753521126760563, | |
| "grad_norm": 0.4735292711978127, | |
| "learning_rate": 1.1308766338873038e-05, | |
| "loss": 0.1522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15144971013069153, | |
| "step": 2700, | |
| "valid_targets_mean": 5041.6, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 4.762323943661972, | |
| "grad_norm": 0.4785804664681169, | |
| "learning_rate": 1.122977123871571e-05, | |
| "loss": 0.1572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1642574816942215, | |
| "step": 2705, | |
| "valid_targets_mean": 5188.2, | |
| "valid_targets_min": 2216 | |
| }, | |
| { | |
| "epoch": 4.77112676056338, | |
| "grad_norm": 0.47946423026738955, | |
| "learning_rate": 1.1150945170704547e-05, | |
| "loss": 0.1603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1856997013092041, | |
| "step": 2710, | |
| "valid_targets_mean": 5500.6, | |
| "valid_targets_min": 661 | |
| }, | |
| { | |
| "epoch": 4.779929577464789, | |
| "grad_norm": 0.48901879887005484, | |
| "learning_rate": 1.1072289654086074e-05, | |
| "loss": 0.1458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1390392929315567, | |
| "step": 2715, | |
| "valid_targets_mean": 4527.9, | |
| "valid_targets_min": 282 | |
| }, | |
| { | |
| "epoch": 4.788732394366197, | |
| "grad_norm": 0.4356364874903345, | |
| "learning_rate": 1.0993806204819686e-05, | |
| "loss": 0.1573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13773053884506226, | |
| "step": 2720, | |
| "valid_targets_mean": 5728.1, | |
| "valid_targets_min": 1186 | |
| }, | |
| { | |
| "epoch": 4.797535211267606, | |
| "grad_norm": 0.4705610291417452, | |
| "learning_rate": 1.0915496335548456e-05, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12111662328243256, | |
| "step": 2725, | |
| "valid_targets_mean": 4483.2, | |
| "valid_targets_min": 1252 | |
| }, | |
| { | |
| "epoch": 4.806338028169014, | |
| "grad_norm": 0.5128667138325851, | |
| "learning_rate": 1.0837361555570007e-05, | |
| "loss": 0.1482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14267593622207642, | |
| "step": 2730, | |
| "valid_targets_mean": 4290.2, | |
| "valid_targets_min": 1765 | |
| }, | |
| { | |
| "epoch": 4.815140845070422, | |
| "grad_norm": 0.4804516299028683, | |
| "learning_rate": 1.0759403370807369e-05, | |
| "loss": 0.1515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17430897057056427, | |
| "step": 2735, | |
| "valid_targets_mean": 4815.3, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 4.823943661971831, | |
| "grad_norm": 0.44118416504675695, | |
| "learning_rate": 1.0681623283779982e-05, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14086471498012543, | |
| "step": 2740, | |
| "valid_targets_mean": 6785.6, | |
| "valid_targets_min": 2708 | |
| }, | |
| { | |
| "epoch": 4.832746478873239, | |
| "grad_norm": 0.4873675413263834, | |
| "learning_rate": 1.0604022793574757e-05, | |
| "loss": 0.1519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15508151054382324, | |
| "step": 2745, | |
| "valid_targets_mean": 4627.9, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 4.841549295774648, | |
| "grad_norm": 0.4506221436347831, | |
| "learning_rate": 1.0526603395817158e-05, | |
| "loss": 0.1421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13319256901741028, | |
| "step": 2750, | |
| "valid_targets_mean": 4319.2, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 4.850352112676056, | |
| "grad_norm": 0.46956931276757086, | |
| "learning_rate": 1.0449366582642364e-05, | |
| "loss": 0.1491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1536487638950348, | |
| "step": 2755, | |
| "valid_targets_mean": 5563.9, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 4.859154929577465, | |
| "grad_norm": 0.4987880875462112, | |
| "learning_rate": 1.0372313842666544e-05, | |
| "loss": 0.1695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1490713208913803, | |
| "step": 2760, | |
| "valid_targets_mean": 5485.8, | |
| "valid_targets_min": 798 | |
| }, | |
| { | |
| "epoch": 4.867957746478873, | |
| "grad_norm": 0.4235178456170973, | |
| "learning_rate": 1.0295446660958137e-05, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15671254694461823, | |
| "step": 2765, | |
| "valid_targets_mean": 5981.4, | |
| "valid_targets_min": 2355 | |
| }, | |
| { | |
| "epoch": 4.876760563380282, | |
| "grad_norm": 0.5569766563487603, | |
| "learning_rate": 1.0218766519009252e-05, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15036681294441223, | |
| "step": 2770, | |
| "valid_targets_mean": 3639.5, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 4.88556338028169, | |
| "grad_norm": 0.8023079792060502, | |
| "learning_rate": 1.0142274894707102e-05, | |
| "loss": 0.168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15211105346679688, | |
| "step": 2775, | |
| "valid_targets_mean": 4922.9, | |
| "valid_targets_min": 1217 | |
| }, | |
| { | |
| "epoch": 4.894366197183099, | |
| "grad_norm": 0.5410527465773426, | |
| "learning_rate": 1.0065973262305544e-05, | |
| "loss": 0.1524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15186692774295807, | |
| "step": 2780, | |
| "valid_targets_mean": 4780.6, | |
| "valid_targets_min": 2736 | |
| }, | |
| { | |
| "epoch": 4.903169014084507, | |
| "grad_norm": 0.45387115310656867, | |
| "learning_rate": 9.989863092396615e-06, | |
| "loss": 0.142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1355457603931427, | |
| "step": 2785, | |
| "valid_targets_mean": 5153.8, | |
| "valid_targets_min": 1203 | |
| }, | |
| { | |
| "epoch": 4.911971830985916, | |
| "grad_norm": 0.569430456733756, | |
| "learning_rate": 9.913945851882221e-06, | |
| "loss": 0.143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13012991845607758, | |
| "step": 2790, | |
| "valid_targets_mean": 3606.7, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 4.920774647887324, | |
| "grad_norm": 0.45430273321923625, | |
| "learning_rate": 9.838223003945886e-06, | |
| "loss": 0.155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12484899908304214, | |
| "step": 2795, | |
| "valid_targets_mean": 4605.3, | |
| "valid_targets_min": 484 | |
| }, | |
| { | |
| "epoch": 4.929577464788732, | |
| "grad_norm": 0.45694803096183795, | |
| "learning_rate": 9.762696008024505e-06, | |
| "loss": 0.1686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15040870010852814, | |
| "step": 2800, | |
| "valid_targets_mean": 4972.5, | |
| "valid_targets_min": 2180 | |
| }, | |
| { | |
| "epoch": 4.938380281690141, | |
| "grad_norm": 0.4629289015439348, | |
| "learning_rate": 9.687366319780242e-06, | |
| "loss": 0.1432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13926619291305542, | |
| "step": 2805, | |
| "valid_targets_mean": 5038.8, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 4.947183098591549, | |
| "grad_norm": 0.44325571955368415, | |
| "learning_rate": 9.612235391072483e-06, | |
| "loss": 0.1533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13713839650154114, | |
| "step": 2810, | |
| "valid_targets_mean": 4540.9, | |
| "valid_targets_min": 2027 | |
| }, | |
| { | |
| "epoch": 4.955985915492958, | |
| "grad_norm": 0.49607658211815553, | |
| "learning_rate": 9.537304669929837e-06, | |
| "loss": 0.1506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16329160332679749, | |
| "step": 2815, | |
| "valid_targets_mean": 4439.8, | |
| "valid_targets_min": 883 | |
| }, | |
| { | |
| "epoch": 4.964788732394366, | |
| "grad_norm": 0.48699653495099654, | |
| "learning_rate": 9.46257560052222e-06, | |
| "loss": 0.1562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1788143515586853, | |
| "step": 2820, | |
| "valid_targets_mean": 4410.5, | |
| "valid_targets_min": 366 | |
| }, | |
| { | |
| "epoch": 4.973591549295775, | |
| "grad_norm": 0.5036491405861744, | |
| "learning_rate": 9.388049623133047e-06, | |
| "loss": 0.1494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16246581077575684, | |
| "step": 2825, | |
| "valid_targets_mean": 4873.7, | |
| "valid_targets_min": 533 | |
| }, | |
| { | |
| "epoch": 4.982394366197183, | |
| "grad_norm": 0.44024207596068343, | |
| "learning_rate": 9.313728174131451e-06, | |
| "loss": 0.1421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16089218854904175, | |
| "step": 2830, | |
| "valid_targets_mean": 6215.7, | |
| "valid_targets_min": 675 | |
| }, | |
| { | |
| "epoch": 4.991197183098592, | |
| "grad_norm": 0.4024093719018177, | |
| "learning_rate": 9.239612685944599e-06, | |
| "loss": 0.1467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1541212946176529, | |
| "step": 2835, | |
| "valid_targets_mean": 6917.1, | |
| "valid_targets_min": 2433 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5460154581131677, | |
| "learning_rate": 9.165704587030115e-06, | |
| "loss": 0.1697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15552794933319092, | |
| "step": 2840, | |
| "valid_targets_mean": 4072.6, | |
| "valid_targets_min": 424 | |
| }, | |
| { | |
| "epoch": 5.008802816901408, | |
| "grad_norm": 0.4501666070566837, | |
| "learning_rate": 9.092005301848521e-06, | |
| "loss": 0.1392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12022311240434647, | |
| "step": 2845, | |
| "valid_targets_mean": 4554.1, | |
| "valid_targets_min": 1618 | |
| }, | |
| { | |
| "epoch": 5.017605633802817, | |
| "grad_norm": 0.570741992750102, | |
| "learning_rate": 9.018516250835772e-06, | |
| "loss": 0.1466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17982658743858337, | |
| "step": 2850, | |
| "valid_targets_mean": 4721.6, | |
| "valid_targets_min": 345 | |
| }, | |
| { | |
| "epoch": 5.026408450704225, | |
| "grad_norm": 0.5972558826372519, | |
| "learning_rate": 8.945238850375894e-06, | |
| "loss": 0.1362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13194233179092407, | |
| "step": 2855, | |
| "valid_targets_mean": 4429.4, | |
| "valid_targets_min": 702 | |
| }, | |
| { | |
| "epoch": 5.035211267605634, | |
| "grad_norm": 0.4658877646645019, | |
| "learning_rate": 8.872174512773717e-06, | |
| "loss": 0.1498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13239073753356934, | |
| "step": 2860, | |
| "valid_targets_mean": 5085.1, | |
| "valid_targets_min": 2352 | |
| }, | |
| { | |
| "epoch": 5.044014084507042, | |
| "grad_norm": 0.47494382493744014, | |
| "learning_rate": 8.799324646227596e-06, | |
| "loss": 0.1571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13988803327083588, | |
| "step": 2865, | |
| "valid_targets_mean": 4732.6, | |
| "valid_targets_min": 411 | |
| }, | |
| { | |
| "epoch": 5.052816901408451, | |
| "grad_norm": 0.5359325247491418, | |
| "learning_rate": 8.726690654802301e-06, | |
| "loss": 0.1418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1505393087863922, | |
| "step": 2870, | |
| "valid_targets_mean": 4418.4, | |
| "valid_targets_min": 247 | |
| }, | |
| { | |
| "epoch": 5.061619718309859, | |
| "grad_norm": 0.6200095203970003, | |
| "learning_rate": 8.654273938401973e-06, | |
| "loss": 0.1451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1291838437318802, | |
| "step": 2875, | |
| "valid_targets_mean": 2975.3, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 5.070422535211268, | |
| "grad_norm": 0.46310903305402334, | |
| "learning_rate": 8.582075892743103e-06, | |
| "loss": 0.1323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12733536958694458, | |
| "step": 2880, | |
| "valid_targets_mean": 4765.6, | |
| "valid_targets_min": 2330 | |
| }, | |
| { | |
| "epoch": 5.079225352112676, | |
| "grad_norm": 0.5122027839594212, | |
| "learning_rate": 8.51009790932767e-06, | |
| "loss": 0.1499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21192404627799988, | |
| "step": 2885, | |
| "valid_targets_mean": 5526.3, | |
| "valid_targets_min": 1535 | |
| }, | |
| { | |
| "epoch": 5.088028169014084, | |
| "grad_norm": 0.4886711017901105, | |
| "learning_rate": 8.438341375416294e-06, | |
| "loss": 0.1422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14827603101730347, | |
| "step": 2890, | |
| "valid_targets_mean": 5426.8, | |
| "valid_targets_min": 449 | |
| }, | |
| { | |
| "epoch": 5.096830985915493, | |
| "grad_norm": 0.5807579833111265, | |
| "learning_rate": 8.36680767400151e-06, | |
| "loss": 0.1429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13372167944908142, | |
| "step": 2895, | |
| "valid_targets_mean": 4932.9, | |
| "valid_targets_min": 821 | |
| }, | |
| { | |
| "epoch": 5.105633802816901, | |
| "grad_norm": 0.5916983298719057, | |
| "learning_rate": 8.29549818378111e-06, | |
| "loss": 0.1526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16693195700645447, | |
| "step": 2900, | |
| "valid_targets_mean": 3775.1, | |
| "valid_targets_min": 1082 | |
| }, | |
| { | |
| "epoch": 5.11443661971831, | |
| "grad_norm": 0.5123707947309225, | |
| "learning_rate": 8.224414279131583e-06, | |
| "loss": 0.1441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14167647063732147, | |
| "step": 2905, | |
| "valid_targets_mean": 4416.4, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 5.123239436619718, | |
| "grad_norm": 0.5613960808709247, | |
| "learning_rate": 8.153557330081623e-06, | |
| "loss": 0.1387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14374524354934692, | |
| "step": 2910, | |
| "valid_targets_mean": 4202.0, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 5.132042253521127, | |
| "grad_norm": 0.5284144311281195, | |
| "learning_rate": 8.082928702285694e-06, | |
| "loss": 0.1298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14414337277412415, | |
| "step": 2915, | |
| "valid_targets_mean": 4913.4, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 5.140845070422535, | |
| "grad_norm": 0.571148886490701, | |
| "learning_rate": 8.012529756997747e-06, | |
| "loss": 0.1598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21470078825950623, | |
| "step": 2920, | |
| "valid_targets_mean": 4570.3, | |
| "valid_targets_min": 1898 | |
| }, | |
| { | |
| "epoch": 5.149647887323944, | |
| "grad_norm": 0.477740212222083, | |
| "learning_rate": 7.942361851044973e-06, | |
| "loss": 0.1435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1374620497226715, | |
| "step": 2925, | |
| "valid_targets_mean": 4720.8, | |
| "valid_targets_min": 2305 | |
| }, | |
| { | |
| "epoch": 5.158450704225352, | |
| "grad_norm": 0.5871044223217738, | |
| "learning_rate": 7.872426336801642e-06, | |
| "loss": 0.1413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13672882318496704, | |
| "step": 2930, | |
| "valid_targets_mean": 4366.6, | |
| "valid_targets_min": 1816 | |
| }, | |
| { | |
| "epoch": 5.167253521126761, | |
| "grad_norm": 0.49933721105553536, | |
| "learning_rate": 7.802724562163038e-06, | |
| "loss": 0.1579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17367324233055115, | |
| "step": 2935, | |
| "valid_targets_mean": 4584.1, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 5.176056338028169, | |
| "grad_norm": 0.5653803692577608, | |
| "learning_rate": 7.73325787051951e-06, | |
| "loss": 0.1394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1180843710899353, | |
| "step": 2940, | |
| "valid_targets_mean": 4060.2, | |
| "valid_targets_min": 2960 | |
| }, | |
| { | |
| "epoch": 5.184859154929577, | |
| "grad_norm": 0.5113728674388844, | |
| "learning_rate": 7.664027600730532e-06, | |
| "loss": 0.1551, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15417969226837158, | |
| "step": 2945, | |
| "valid_targets_mean": 5202.6, | |
| "valid_targets_min": 885 | |
| }, | |
| { | |
| "epoch": 5.193661971830986, | |
| "grad_norm": 0.481312435964273, | |
| "learning_rate": 7.595035087098952e-06, | |
| "loss": 0.144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13256672024726868, | |
| "step": 2950, | |
| "valid_targets_mean": 5851.6, | |
| "valid_targets_min": 2328 | |
| }, | |
| { | |
| "epoch": 5.202464788732394, | |
| "grad_norm": 0.6259429971195783, | |
| "learning_rate": 7.526281659345225e-06, | |
| "loss": 0.1384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16258573532104492, | |
| "step": 2955, | |
| "valid_targets_mean": 3822.6, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 5.211267605633803, | |
| "grad_norm": 0.4438278964922863, | |
| "learning_rate": 7.457768642581813e-06, | |
| "loss": 0.1583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14808012545108795, | |
| "step": 2960, | |
| "valid_targets_mean": 5703.8, | |
| "valid_targets_min": 498 | |
| }, | |
| { | |
| "epoch": 5.220070422535211, | |
| "grad_norm": 0.4716143077644737, | |
| "learning_rate": 7.389497357287639e-06, | |
| "loss": 0.1514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18324655294418335, | |
| "step": 2965, | |
| "valid_targets_mean": 5649.1, | |
| "valid_targets_min": 835 | |
| }, | |
| { | |
| "epoch": 5.22887323943662, | |
| "grad_norm": 0.46654285608989515, | |
| "learning_rate": 7.321469119282649e-06, | |
| "loss": 0.1423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13914130628108978, | |
| "step": 2970, | |
| "valid_targets_mean": 5221.3, | |
| "valid_targets_min": 1945 | |
| }, | |
| { | |
| "epoch": 5.237676056338028, | |
| "grad_norm": 0.4309004562438755, | |
| "learning_rate": 7.253685239702439e-06, | |
| "loss": 0.1649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22985681891441345, | |
| "step": 2975, | |
| "valid_targets_mean": 8170.1, | |
| "valid_targets_min": 1889 | |
| }, | |
| { | |
| "epoch": 5.246478873239437, | |
| "grad_norm": 0.48780493181849827, | |
| "learning_rate": 7.186147024972978e-06, | |
| "loss": 0.1607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17854507267475128, | |
| "step": 2980, | |
| "valid_targets_mean": 4861.9, | |
| "valid_targets_min": 449 | |
| }, | |
| { | |
| "epoch": 5.255281690140845, | |
| "grad_norm": 0.45026711855473006, | |
| "learning_rate": 7.118855776785432e-06, | |
| "loss": 0.1303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14062251150608063, | |
| "step": 2985, | |
| "valid_targets_mean": 5243.9, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 5.264084507042254, | |
| "grad_norm": 0.4764407622299529, | |
| "learning_rate": 7.051812792071104e-06, | |
| "loss": 0.158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16819819808006287, | |
| "step": 2990, | |
| "valid_targets_mean": 5499.6, | |
| "valid_targets_min": 838 | |
| }, | |
| { | |
| "epoch": 5.272887323943662, | |
| "grad_norm": 0.5389432401973089, | |
| "learning_rate": 6.9850193629763975e-06, | |
| "loss": 0.1693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1606113314628601, | |
| "step": 2995, | |
| "valid_targets_mean": 4333.1, | |
| "valid_targets_min": 540 | |
| }, | |
| { | |
| "epoch": 5.28169014084507, | |
| "grad_norm": 0.5631639380207211, | |
| "learning_rate": 6.918476776837926e-06, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13505461812019348, | |
| "step": 3000, | |
| "valid_targets_mean": 4219.9, | |
| "valid_targets_min": 915 | |
| }, | |
| { | |
| "epoch": 5.290492957746479, | |
| "grad_norm": 0.5205669943548236, | |
| "learning_rate": 6.852186316157727e-06, | |
| "loss": 0.1473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19009539484977722, | |
| "step": 3005, | |
| "valid_targets_mean": 4777.5, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 5.299295774647887, | |
| "grad_norm": 0.5312325469598596, | |
| "learning_rate": 6.7861492585785005e-06, | |
| "loss": 0.1375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1535080075263977, | |
| "step": 3010, | |
| "valid_targets_mean": 4613.5, | |
| "valid_targets_min": 511 | |
| }, | |
| { | |
| "epoch": 5.308098591549296, | |
| "grad_norm": 0.5030920114368365, | |
| "learning_rate": 6.720366876859028e-06, | |
| "loss": 0.138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13051721453666687, | |
| "step": 3015, | |
| "valid_targets_mean": 4454.8, | |
| "valid_targets_min": 1696 | |
| }, | |
| { | |
| "epoch": 5.316901408450704, | |
| "grad_norm": 0.486417303085051, | |
| "learning_rate": 6.654840438849601e-06, | |
| "loss": 0.149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15947286784648895, | |
| "step": 3020, | |
| "valid_targets_mean": 5209.8, | |
| "valid_targets_min": 859 | |
| }, | |
| { | |
| "epoch": 5.325704225352113, | |
| "grad_norm": 0.5006326539173029, | |
| "learning_rate": 6.589571207467615e-06, | |
| "loss": 0.1469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18616190552711487, | |
| "step": 3025, | |
| "valid_targets_mean": 4957.1, | |
| "valid_targets_min": 1629 | |
| }, | |
| { | |
| "epoch": 5.334507042253521, | |
| "grad_norm": 0.563003658671015, | |
| "learning_rate": 6.5245604406732114e-06, | |
| "loss": 0.1436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14948928356170654, | |
| "step": 3030, | |
| "valid_targets_mean": 4346.8, | |
| "valid_targets_min": 613 | |
| }, | |
| { | |
| "epoch": 5.34330985915493, | |
| "grad_norm": 0.5251836829292532, | |
| "learning_rate": 6.459809391445047e-06, | |
| "loss": 0.1427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13397885859012604, | |
| "step": 3035, | |
| "valid_targets_mean": 5049.6, | |
| "valid_targets_min": 2111 | |
| }, | |
| { | |
| "epoch": 5.352112676056338, | |
| "grad_norm": 0.47104693335404224, | |
| "learning_rate": 6.395319307756142e-06, | |
| "loss": 0.1477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14828750491142273, | |
| "step": 3040, | |
| "valid_targets_mean": 5304.3, | |
| "valid_targets_min": 1571 | |
| }, | |
| { | |
| "epoch": 5.360915492957746, | |
| "grad_norm": 0.46981675017843777, | |
| "learning_rate": 6.331091432549816e-06, | |
| "loss": 0.1397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10679211467504501, | |
| "step": 3045, | |
| "valid_targets_mean": 4715.0, | |
| "valid_targets_min": 421 | |
| }, | |
| { | |
| "epoch": 5.369718309859155, | |
| "grad_norm": 0.5938003892777397, | |
| "learning_rate": 6.267127003715727e-06, | |
| "loss": 0.1307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15001550316810608, | |
| "step": 3050, | |
| "valid_targets_mean": 3652.0, | |
| "valid_targets_min": 443 | |
| }, | |
| { | |
| "epoch": 5.378521126760563, | |
| "grad_norm": 0.5199964369700208, | |
| "learning_rate": 6.203427254066052e-06, | |
| "loss": 0.1342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1213470846414566, | |
| "step": 3055, | |
| "valid_targets_mean": 5144.6, | |
| "valid_targets_min": 2415 | |
| }, | |
| { | |
| "epoch": 5.387323943661972, | |
| "grad_norm": 0.5376508159088477, | |
| "learning_rate": 6.13999341131168e-06, | |
| "loss": 0.1398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1392536610364914, | |
| "step": 3060, | |
| "valid_targets_mean": 4743.1, | |
| "valid_targets_min": 1847 | |
| }, | |
| { | |
| "epoch": 5.39612676056338, | |
| "grad_norm": 0.5200595534322966, | |
| "learning_rate": 6.076826698038567e-06, | |
| "loss": 0.1413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15087604522705078, | |
| "step": 3065, | |
| "valid_targets_mean": 4628.0, | |
| "valid_targets_min": 2233 | |
| }, | |
| { | |
| "epoch": 5.404929577464789, | |
| "grad_norm": 0.47766098888636715, | |
| "learning_rate": 6.013928331684193e-06, | |
| "loss": 0.1454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12682199478149414, | |
| "step": 3070, | |
| "valid_targets_mean": 4830.3, | |
| "valid_targets_min": 2433 | |
| }, | |
| { | |
| "epoch": 5.413732394366197, | |
| "grad_norm": 0.48212456557345695, | |
| "learning_rate": 5.951299524514062e-06, | |
| "loss": 0.1495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14108426868915558, | |
| "step": 3075, | |
| "valid_targets_mean": 5274.1, | |
| "valid_targets_min": 811 | |
| }, | |
| { | |
| "epoch": 5.422535211267606, | |
| "grad_norm": 0.5435568398398792, | |
| "learning_rate": 5.8889414835983715e-06, | |
| "loss": 0.135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14664721488952637, | |
| "step": 3080, | |
| "valid_targets_mean": 4545.1, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 5.431338028169014, | |
| "grad_norm": 0.4672392958053226, | |
| "learning_rate": 5.826855410788719e-06, | |
| "loss": 0.1429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14426971971988678, | |
| "step": 3085, | |
| "valid_targets_mean": 5320.8, | |
| "valid_targets_min": 1922 | |
| }, | |
| { | |
| "epoch": 5.440140845070423, | |
| "grad_norm": 5.0705954753014035, | |
| "learning_rate": 5.765042502694955e-06, | |
| "loss": 0.1403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15294724702835083, | |
| "step": 3090, | |
| "valid_targets_mean": 5735.1, | |
| "valid_targets_min": 1725 | |
| }, | |
| { | |
| "epoch": 5.448943661971831, | |
| "grad_norm": 0.4745697555440363, | |
| "learning_rate": 5.703503950662113e-06, | |
| "loss": 0.1351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11520686745643616, | |
| "step": 3095, | |
| "valid_targets_mean": 4905.4, | |
| "valid_targets_min": 448 | |
| }, | |
| { | |
| "epoch": 5.457746478873239, | |
| "grad_norm": 0.4690270186506746, | |
| "learning_rate": 5.642240940747466e-06, | |
| "loss": 0.1238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11829526722431183, | |
| "step": 3100, | |
| "valid_targets_mean": 5145.1, | |
| "valid_targets_min": 1939 | |
| }, | |
| { | |
| "epoch": 5.466549295774648, | |
| "grad_norm": 0.6281533778926074, | |
| "learning_rate": 5.58125465369763e-06, | |
| "loss": 0.1525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17893964052200317, | |
| "step": 3105, | |
| "valid_targets_mean": 3865.2, | |
| "valid_targets_min": 328 | |
| }, | |
| { | |
| "epoch": 5.475352112676056, | |
| "grad_norm": 0.4878701466214642, | |
| "learning_rate": 5.520546264925859e-06, | |
| "loss": 0.135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14455589652061462, | |
| "step": 3110, | |
| "valid_targets_mean": 5230.8, | |
| "valid_targets_min": 795 | |
| }, | |
| { | |
| "epoch": 5.484154929577465, | |
| "grad_norm": 0.472376418615806, | |
| "learning_rate": 5.460116944489335e-06, | |
| "loss": 0.155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12420351058244705, | |
| "step": 3115, | |
| "valid_targets_mean": 4865.8, | |
| "valid_targets_min": 401 | |
| }, | |
| { | |
| "epoch": 5.492957746478873, | |
| "grad_norm": 0.47177234156447573, | |
| "learning_rate": 5.3999678570666544e-06, | |
| "loss": 0.145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14821934700012207, | |
| "step": 3120, | |
| "valid_targets_mean": 5429.6, | |
| "valid_targets_min": 2281 | |
| }, | |
| { | |
| "epoch": 5.501760563380282, | |
| "grad_norm": 0.47126144836543726, | |
| "learning_rate": 5.340100161935378e-06, | |
| "loss": 0.1345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13389673829078674, | |
| "step": 3125, | |
| "valid_targets_mean": 5264.1, | |
| "valid_targets_min": 507 | |
| }, | |
| { | |
| "epoch": 5.51056338028169, | |
| "grad_norm": 0.4961199599876548, | |
| "learning_rate": 5.280515012949667e-06, | |
| "loss": 0.1306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12710236012935638, | |
| "step": 3130, | |
| "valid_targets_mean": 5229.4, | |
| "valid_targets_min": 1587 | |
| }, | |
| { | |
| "epoch": 5.519366197183099, | |
| "grad_norm": 0.5758963617858718, | |
| "learning_rate": 5.221213558518057e-06, | |
| "loss": 0.1591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13208642601966858, | |
| "step": 3135, | |
| "valid_targets_mean": 3484.8, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 5.528169014084507, | |
| "grad_norm": 0.5516628547881831, | |
| "learning_rate": 5.162196941581334e-06, | |
| "loss": 0.1627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15435585379600525, | |
| "step": 3140, | |
| "valid_targets_mean": 3796.7, | |
| "valid_targets_min": 1557 | |
| }, | |
| { | |
| "epoch": 5.536971830985916, | |
| "grad_norm": 0.41527599107400437, | |
| "learning_rate": 5.103466299590498e-06, | |
| "loss": 0.1634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17757152020931244, | |
| "step": 3145, | |
| "valid_targets_mean": 7428.8, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 5.545774647887324, | |
| "grad_norm": 0.47235227215758596, | |
| "learning_rate": 5.045022764484826e-06, | |
| "loss": 0.1511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1660035252571106, | |
| "step": 3150, | |
| "valid_targets_mean": 5651.9, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 5.554577464788732, | |
| "grad_norm": 0.46254151200092164, | |
| "learning_rate": 4.986867462670077e-06, | |
| "loss": 0.1505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16039714217185974, | |
| "step": 3155, | |
| "valid_targets_mean": 5833.9, | |
| "valid_targets_min": 2397 | |
| }, | |
| { | |
| "epoch": 5.563380281690141, | |
| "grad_norm": 0.5049874787633386, | |
| "learning_rate": 4.9290015149967654e-06, | |
| "loss": 0.1606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1314234435558319, | |
| "step": 3160, | |
| "valid_targets_mean": 4718.3, | |
| "valid_targets_min": 2509 | |
| }, | |
| { | |
| "epoch": 5.572183098591549, | |
| "grad_norm": 0.5456654697136112, | |
| "learning_rate": 4.871426036738584e-06, | |
| "loss": 0.1402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13221964240074158, | |
| "step": 3165, | |
| "valid_targets_mean": 3908.2, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 5.580985915492958, | |
| "grad_norm": 0.436237640239324, | |
| "learning_rate": 4.814142137570872e-06, | |
| "loss": 0.1363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1246955394744873, | |
| "step": 3170, | |
| "valid_targets_mean": 5052.5, | |
| "valid_targets_min": 2162 | |
| }, | |
| { | |
| "epoch": 5.589788732394366, | |
| "grad_norm": 0.5395945875910536, | |
| "learning_rate": 4.757150921549265e-06, | |
| "loss": 0.1493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13676472008228302, | |
| "step": 3175, | |
| "valid_targets_mean": 3908.5, | |
| "valid_targets_min": 658 | |
| }, | |
| { | |
| "epoch": 5.598591549295775, | |
| "grad_norm": 0.49327869271293695, | |
| "learning_rate": 4.7004534870883875e-06, | |
| "loss": 0.146, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13129764795303345, | |
| "step": 3180, | |
| "valid_targets_mean": 4758.2, | |
| "valid_targets_min": 914 | |
| }, | |
| { | |
| "epoch": 5.607394366197183, | |
| "grad_norm": 0.4912453005533349, | |
| "learning_rate": 4.6440509269406904e-06, | |
| "loss": 0.1406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12718603014945984, | |
| "step": 3185, | |
| "valid_targets_mean": 4519.6, | |
| "valid_targets_min": 2217 | |
| }, | |
| { | |
| "epoch": 5.616197183098592, | |
| "grad_norm": 0.5436722928688413, | |
| "learning_rate": 4.587944328175411e-06, | |
| "loss": 0.1339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14402826130390167, | |
| "step": 3190, | |
| "valid_targets_mean": 4190.1, | |
| "valid_targets_min": 1143 | |
| }, | |
| { | |
| "epoch": 5.625, | |
| "grad_norm": 0.5491397897277362, | |
| "learning_rate": 4.5321347721575885e-06, | |
| "loss": 0.1417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15087661147117615, | |
| "step": 3195, | |
| "valid_targets_mean": 4904.7, | |
| "valid_targets_min": 580 | |
| }, | |
| { | |
| "epoch": 5.633802816901408, | |
| "grad_norm": 0.5909037113845439, | |
| "learning_rate": 4.47662333452723e-06, | |
| "loss": 0.1455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15874144434928894, | |
| "step": 3200, | |
| "valid_targets_mean": 4601.3, | |
| "valid_targets_min": 292 | |
| }, | |
| { | |
| "epoch": 5.642605633802817, | |
| "grad_norm": 0.4728612898091008, | |
| "learning_rate": 4.4214110851786105e-06, | |
| "loss": 0.1524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12207137048244476, | |
| "step": 3205, | |
| "valid_targets_mean": 4998.2, | |
| "valid_targets_min": 663 | |
| }, | |
| { | |
| "epoch": 5.651408450704225, | |
| "grad_norm": 0.5081621161868719, | |
| "learning_rate": 4.366499088239622e-06, | |
| "loss": 0.1565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1542491763830185, | |
| "step": 3210, | |
| "valid_targets_mean": 4859.1, | |
| "valid_targets_min": 2042 | |
| }, | |
| { | |
| "epoch": 5.660211267605634, | |
| "grad_norm": 0.5266810134654849, | |
| "learning_rate": 4.3118884020512584e-06, | |
| "loss": 0.1468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14367185533046722, | |
| "step": 3215, | |
| "valid_targets_mean": 4789.1, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 5.669014084507042, | |
| "grad_norm": 0.48199360621261034, | |
| "learning_rate": 4.257580079147241e-06, | |
| "loss": 0.1422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.160629004240036, | |
| "step": 3220, | |
| "valid_targets_mean": 4815.8, | |
| "valid_targets_min": 552 | |
| }, | |
| { | |
| "epoch": 5.677816901408451, | |
| "grad_norm": 0.485414781530496, | |
| "learning_rate": 4.2035751662337106e-06, | |
| "loss": 0.1471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13349416851997375, | |
| "step": 3225, | |
| "valid_targets_mean": 4971.4, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 5.686619718309859, | |
| "grad_norm": 0.47472966596972765, | |
| "learning_rate": 4.149874704169086e-06, | |
| "loss": 0.1425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12864427268505096, | |
| "step": 3230, | |
| "valid_targets_mean": 5178.8, | |
| "valid_targets_min": 460 | |
| }, | |
| { | |
| "epoch": 5.695422535211268, | |
| "grad_norm": 0.4670408002879044, | |
| "learning_rate": 4.096479727943958e-06, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19213852286338806, | |
| "step": 3235, | |
| "valid_targets_mean": 6211.7, | |
| "valid_targets_min": 322 | |
| }, | |
| { | |
| "epoch": 5.704225352112676, | |
| "grad_norm": 0.4788384677856596, | |
| "learning_rate": 4.043391266661192e-06, | |
| "loss": 0.1353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14478346705436707, | |
| "step": 3240, | |
| "valid_targets_mean": 6120.4, | |
| "valid_targets_min": 2219 | |
| }, | |
| { | |
| "epoch": 5.713028169014084, | |
| "grad_norm": 0.5723602789358954, | |
| "learning_rate": 3.990610343516046e-06, | |
| "loss": 0.1408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11496154963970184, | |
| "step": 3245, | |
| "valid_targets_mean": 5244.6, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 5.721830985915493, | |
| "grad_norm": 0.5551855129635233, | |
| "learning_rate": 3.938137975776475e-06, | |
| "loss": 0.1533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16428819298744202, | |
| "step": 3250, | |
| "valid_targets_mean": 4925.7, | |
| "valid_targets_min": 595 | |
| }, | |
| { | |
| "epoch": 5.730633802816901, | |
| "grad_norm": 0.5404426760818474, | |
| "learning_rate": 3.885975174763536e-06, | |
| "loss": 0.166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15624471008777618, | |
| "step": 3255, | |
| "valid_targets_mean": 5711.8, | |
| "valid_targets_min": 1630 | |
| }, | |
| { | |
| "epoch": 5.73943661971831, | |
| "grad_norm": 0.46059331782554475, | |
| "learning_rate": 3.834122945831866e-06, | |
| "loss": 0.1583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1860368251800537, | |
| "step": 3260, | |
| "valid_targets_mean": 6151.7, | |
| "valid_targets_min": 761 | |
| }, | |
| { | |
| "epoch": 5.748239436619718, | |
| "grad_norm": 0.5736223707317745, | |
| "learning_rate": 3.782582288350325e-06, | |
| "loss": 0.1552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17216193675994873, | |
| "step": 3265, | |
| "valid_targets_mean": 4451.0, | |
| "valid_targets_min": 424 | |
| }, | |
| { | |
| "epoch": 5.757042253521127, | |
| "grad_norm": 0.4998432391333853, | |
| "learning_rate": 3.7313541956827347e-06, | |
| "loss": 0.1384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12954282760620117, | |
| "step": 3270, | |
| "valid_targets_mean": 4360.0, | |
| "valid_targets_min": 2052 | |
| }, | |
| { | |
| "epoch": 5.765845070422535, | |
| "grad_norm": 0.5809908930153733, | |
| "learning_rate": 3.6804396551687373e-06, | |
| "loss": 0.1403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1761077344417572, | |
| "step": 3275, | |
| "valid_targets_mean": 4025.2, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 5.774647887323944, | |
| "grad_norm": 1.0181561849165504, | |
| "learning_rate": 3.6298396481047405e-06, | |
| "loss": 0.1462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14065641164779663, | |
| "step": 3280, | |
| "valid_targets_mean": 4677.1, | |
| "valid_targets_min": 1911 | |
| }, | |
| { | |
| "epoch": 5.783450704225352, | |
| "grad_norm": 0.5110962776023475, | |
| "learning_rate": 3.5795551497250338e-06, | |
| "loss": 0.1401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12975157797336578, | |
| "step": 3285, | |
| "valid_targets_mean": 4627.1, | |
| "valid_targets_min": 451 | |
| }, | |
| { | |
| "epoch": 5.792253521126761, | |
| "grad_norm": 0.5350762730138342, | |
| "learning_rate": 3.5295871291829695e-06, | |
| "loss": 0.1446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12250363826751709, | |
| "step": 3290, | |
| "valid_targets_mean": 4094.4, | |
| "valid_targets_min": 954 | |
| }, | |
| { | |
| "epoch": 5.801056338028169, | |
| "grad_norm": 0.5003438521672692, | |
| "learning_rate": 3.479936549532319e-06, | |
| "loss": 0.1431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14709848165512085, | |
| "step": 3295, | |
| "valid_targets_mean": 5150.6, | |
| "valid_targets_min": 2412 | |
| }, | |
| { | |
| "epoch": 5.809859154929578, | |
| "grad_norm": 0.5090929497126097, | |
| "learning_rate": 3.4306043677086588e-06, | |
| "loss": 0.145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1464647501707077, | |
| "step": 3300, | |
| "valid_targets_mean": 5811.6, | |
| "valid_targets_min": 1683 | |
| }, | |
| { | |
| "epoch": 5.818661971830986, | |
| "grad_norm": 0.5487108088477599, | |
| "learning_rate": 3.381591534510982e-06, | |
| "loss": 0.1568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14587390422821045, | |
| "step": 3305, | |
| "valid_targets_mean": 3675.2, | |
| "valid_targets_min": 502 | |
| }, | |
| { | |
| "epoch": 5.827464788732394, | |
| "grad_norm": 0.5523183358824412, | |
| "learning_rate": 3.332898994583329e-06, | |
| "loss": 0.1319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13197696208953857, | |
| "step": 3310, | |
| "valid_targets_mean": 4123.2, | |
| "valid_targets_min": 1572 | |
| }, | |
| { | |
| "epoch": 5.836267605633803, | |
| "grad_norm": 0.4624285935910217, | |
| "learning_rate": 3.284527686396599e-06, | |
| "loss": 0.1404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13002830743789673, | |
| "step": 3315, | |
| "valid_targets_mean": 5129.8, | |
| "valid_targets_min": 866 | |
| }, | |
| { | |
| "epoch": 5.845070422535211, | |
| "grad_norm": 0.47341361478340843, | |
| "learning_rate": 3.236478542230481e-06, | |
| "loss": 0.1369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14910481870174408, | |
| "step": 3320, | |
| "valid_targets_mean": 5668.1, | |
| "valid_targets_min": 2012 | |
| }, | |
| { | |
| "epoch": 5.85387323943662, | |
| "grad_norm": 0.48140117919568937, | |
| "learning_rate": 3.1887524881554486e-06, | |
| "loss": 0.1356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13689473271369934, | |
| "step": 3325, | |
| "valid_targets_mean": 4955.8, | |
| "valid_targets_min": 2265 | |
| }, | |
| { | |
| "epoch": 5.862676056338028, | |
| "grad_norm": 0.418442098134719, | |
| "learning_rate": 3.1413504440149323e-06, | |
| "loss": 0.1468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18040907382965088, | |
| "step": 3330, | |
| "valid_targets_mean": 7045.4, | |
| "valid_targets_min": 803 | |
| }, | |
| { | |
| "epoch": 5.871478873239437, | |
| "grad_norm": 0.5475156838894291, | |
| "learning_rate": 3.0942733234075995e-06, | |
| "loss": 0.1247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13491827249526978, | |
| "step": 3335, | |
| "valid_targets_mean": 5390.9, | |
| "valid_targets_min": 923 | |
| }, | |
| { | |
| "epoch": 5.880281690140845, | |
| "grad_norm": 0.483010007890447, | |
| "learning_rate": 3.047522033669732e-06, | |
| "loss": 0.1351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14320901036262512, | |
| "step": 3340, | |
| "valid_targets_mean": 4951.1, | |
| "valid_targets_min": 619 | |
| }, | |
| { | |
| "epoch": 5.889084507042254, | |
| "grad_norm": 0.5010034552712743, | |
| "learning_rate": 3.001097475857735e-06, | |
| "loss": 0.1487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1546856164932251, | |
| "step": 3345, | |
| "valid_targets_mean": 5350.6, | |
| "valid_targets_min": 631 | |
| }, | |
| { | |
| "epoch": 5.897887323943662, | |
| "grad_norm": 0.5632677711451385, | |
| "learning_rate": 2.955000544730784e-06, | |
| "loss": 0.1339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16293910145759583, | |
| "step": 3350, | |
| "valid_targets_mean": 4437.8, | |
| "valid_targets_min": 355 | |
| }, | |
| { | |
| "epoch": 5.90669014084507, | |
| "grad_norm": 0.44158255097826604, | |
| "learning_rate": 2.90923212873357e-06, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18100960552692413, | |
| "step": 3355, | |
| "valid_targets_mean": 6654.2, | |
| "valid_targets_min": 1797 | |
| }, | |
| { | |
| "epoch": 5.915492957746479, | |
| "grad_norm": 0.5168645554390789, | |
| "learning_rate": 2.8637931099791806e-06, | |
| "loss": 0.1306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12227396667003632, | |
| "step": 3360, | |
| "valid_targets_mean": 4040.5, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 5.924295774647887, | |
| "grad_norm": 0.5115706685406894, | |
| "learning_rate": 2.8186843642321004e-06, | |
| "loss": 0.1543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14923377335071564, | |
| "step": 3365, | |
| "valid_targets_mean": 4982.3, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 5.933098591549296, | |
| "grad_norm": 0.4290995535233675, | |
| "learning_rate": 2.773906760891334e-06, | |
| "loss": 0.1353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13125595450401306, | |
| "step": 3370, | |
| "valid_targets_mean": 6557.5, | |
| "valid_targets_min": 1819 | |
| }, | |
| { | |
| "epoch": 5.941901408450704, | |
| "grad_norm": 0.5217262452619486, | |
| "learning_rate": 2.7294611629736345e-06, | |
| "loss": 0.1289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11718772351741791, | |
| "step": 3375, | |
| "valid_targets_mean": 4255.5, | |
| "valid_targets_min": 527 | |
| }, | |
| { | |
| "epoch": 5.950704225352113, | |
| "grad_norm": 0.4714058810281695, | |
| "learning_rate": 2.685348427096881e-06, | |
| "loss": 0.1425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1319168210029602, | |
| "step": 3380, | |
| "valid_targets_mean": 5245.4, | |
| "valid_targets_min": 1522 | |
| }, | |
| { | |
| "epoch": 5.959507042253521, | |
| "grad_norm": 0.5405329145765186, | |
| "learning_rate": 2.641569403463584e-06, | |
| "loss": 0.1407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13677750527858734, | |
| "step": 3385, | |
| "valid_targets_mean": 4938.3, | |
| "valid_targets_min": 2067 | |
| }, | |
| { | |
| "epoch": 5.96830985915493, | |
| "grad_norm": 0.4839713421980333, | |
| "learning_rate": 2.5981249358444682e-06, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14570334553718567, | |
| "step": 3390, | |
| "valid_targets_mean": 4723.7, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 5.977112676056338, | |
| "grad_norm": 0.543289108439346, | |
| "learning_rate": 2.5550158615622265e-06, | |
| "loss": 0.1532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1419191062450409, | |
| "step": 3395, | |
| "valid_targets_mean": 4087.4, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 5.985915492957746, | |
| "grad_norm": 0.49763400277771375, | |
| "learning_rate": 2.5122430114753906e-06, | |
| "loss": 0.1562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16934366524219513, | |
| "step": 3400, | |
| "valid_targets_mean": 5536.5, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 5.994718309859155, | |
| "grad_norm": 0.49056918969522467, | |
| "learning_rate": 2.4698072099623025e-06, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15616066753864288, | |
| "step": 3405, | |
| "valid_targets_mean": 5258.5, | |
| "valid_targets_min": 1986 | |
| }, | |
| { | |
| "epoch": 6.003521126760563, | |
| "grad_norm": 0.4345837378582337, | |
| "learning_rate": 2.4277092749052343e-06, | |
| "loss": 0.1316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0999487116932869, | |
| "step": 3410, | |
| "valid_targets_mean": 4572.1, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 6.012323943661972, | |
| "grad_norm": 0.44960433035129926, | |
| "learning_rate": 2.3859500176746143e-06, | |
| "loss": 0.1333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13360948860645294, | |
| "step": 3415, | |
| "valid_targets_mean": 5016.4, | |
| "valid_targets_min": 2214 | |
| }, | |
| { | |
| "epoch": 6.02112676056338, | |
| "grad_norm": 0.6045244046282963, | |
| "learning_rate": 2.344530243113403e-06, | |
| "loss": 0.1327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12261538952589035, | |
| "step": 3420, | |
| "valid_targets_mean": 4248.2, | |
| "valid_targets_min": 375 | |
| }, | |
| { | |
| "epoch": 6.029929577464789, | |
| "grad_norm": 0.4781355508388481, | |
| "learning_rate": 2.303450749521572e-06, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20978538691997528, | |
| "step": 3425, | |
| "valid_targets_mean": 5465.4, | |
| "valid_targets_min": 907 | |
| }, | |
| { | |
| "epoch": 6.038732394366197, | |
| "grad_norm": 0.5633949955870742, | |
| "learning_rate": 2.262712328640726e-06, | |
| "loss": 0.1417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12932050228118896, | |
| "step": 3430, | |
| "valid_targets_mean": 3406.6, | |
| "valid_targets_min": 360 | |
| }, | |
| { | |
| "epoch": 6.047535211267606, | |
| "grad_norm": 0.5224488774391776, | |
| "learning_rate": 2.2223157656388384e-06, | |
| "loss": 0.1478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13892272114753723, | |
| "step": 3435, | |
| "valid_targets_mean": 4630.9, | |
| "valid_targets_min": 580 | |
| }, | |
| { | |
| "epoch": 6.056338028169014, | |
| "grad_norm": 0.5423182224851776, | |
| "learning_rate": 2.18226183909511e-06, | |
| "loss": 0.1457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16125619411468506, | |
| "step": 3440, | |
| "valid_targets_mean": 4955.6, | |
| "valid_targets_min": 2104 | |
| }, | |
| { | |
| "epoch": 6.065140845070423, | |
| "grad_norm": 0.47545826566549465, | |
| "learning_rate": 2.1425513209849736e-06, | |
| "loss": 0.1342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12231684476137161, | |
| "step": 3445, | |
| "valid_targets_mean": 5146.8, | |
| "valid_targets_min": 2526 | |
| }, | |
| { | |
| "epoch": 6.073943661971831, | |
| "grad_norm": 0.5514796843483201, | |
| "learning_rate": 2.103184976665222e-06, | |
| "loss": 0.1394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15755456686019897, | |
| "step": 3450, | |
| "valid_targets_mean": 4724.2, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 6.082746478873239, | |
| "grad_norm": 0.5460448684974949, | |
| "learning_rate": 2.0641635648592404e-06, | |
| "loss": 0.1293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12847205996513367, | |
| "step": 3455, | |
| "valid_targets_mean": 3752.8, | |
| "valid_targets_min": 839 | |
| }, | |
| { | |
| "epoch": 6.091549295774648, | |
| "grad_norm": 0.5229100820577318, | |
| "learning_rate": 2.0254878376423883e-06, | |
| "loss": 0.1456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14487887918949127, | |
| "step": 3460, | |
| "valid_targets_mean": 4234.0, | |
| "valid_targets_min": 536 | |
| }, | |
| { | |
| "epoch": 6.100352112676056, | |
| "grad_norm": 0.48784863029679126, | |
| "learning_rate": 1.9871585404275117e-06, | |
| "loss": 0.134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17177344858646393, | |
| "step": 3465, | |
| "valid_targets_mean": 5540.1, | |
| "valid_targets_min": 618 | |
| }, | |
| { | |
| "epoch": 6.109154929577465, | |
| "grad_norm": 0.4662073141607394, | |
| "learning_rate": 1.949176411950577e-06, | |
| "loss": 0.1336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12477079778909683, | |
| "step": 3470, | |
| "valid_targets_mean": 5932.0, | |
| "valid_targets_min": 2366 | |
| }, | |
| { | |
| "epoch": 6.117957746478873, | |
| "grad_norm": 0.46870014183364367, | |
| "learning_rate": 1.911542184256421e-06, | |
| "loss": 0.1378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13209542632102966, | |
| "step": 3475, | |
| "valid_targets_mean": 5480.9, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 6.126760563380282, | |
| "grad_norm": 0.4684610277168845, | |
| "learning_rate": 1.874256582684646e-06, | |
| "loss": 0.1445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11796166002750397, | |
| "step": 3480, | |
| "valid_targets_mean": 5042.1, | |
| "valid_targets_min": 1814 | |
| }, | |
| { | |
| "epoch": 6.13556338028169, | |
| "grad_norm": 0.6580579425390192, | |
| "learning_rate": 1.8373203258556472e-06, | |
| "loss": 0.1353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1560782492160797, | |
| "step": 3485, | |
| "valid_targets_mean": 5288.4, | |
| "valid_targets_min": 634 | |
| }, | |
| { | |
| "epoch": 6.144366197183099, | |
| "grad_norm": 0.5451515601145017, | |
| "learning_rate": 1.8007341256567578e-06, | |
| "loss": 0.1371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1439949870109558, | |
| "step": 3490, | |
| "valid_targets_mean": 4689.0, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 6.153169014084507, | |
| "grad_norm": 0.5143304918273679, | |
| "learning_rate": 1.7644986872285286e-06, | |
| "loss": 0.1481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1766376793384552, | |
| "step": 3495, | |
| "valid_targets_mean": 4941.1, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 6.161971830985916, | |
| "grad_norm": 0.6537369360205026, | |
| "learning_rate": 1.7286147089511418e-06, | |
| "loss": 0.1397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1368342787027359, | |
| "step": 3500, | |
| "valid_targets_mean": 5094.8, | |
| "valid_targets_min": 1081 | |
| }, | |
| { | |
| "epoch": 6.170774647887324, | |
| "grad_norm": 0.4967970020439909, | |
| "learning_rate": 1.6930828824309387e-06, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12366166710853577, | |
| "step": 3505, | |
| "valid_targets_mean": 4611.1, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 6.179577464788732, | |
| "grad_norm": 0.5331430010613298, | |
| "learning_rate": 1.6579038924871005e-06, | |
| "loss": 0.15, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11595480144023895, | |
| "step": 3510, | |
| "valid_targets_mean": 4079.6, | |
| "valid_targets_min": 376 | |
| }, | |
| { | |
| "epoch": 6.188380281690141, | |
| "grad_norm": 0.5158992482621985, | |
| "learning_rate": 1.623078417138455e-06, | |
| "loss": 0.1275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14745491743087769, | |
| "step": 3515, | |
| "valid_targets_mean": 4656.3, | |
| "valid_targets_min": 263 | |
| }, | |
| { | |
| "epoch": 6.197183098591549, | |
| "grad_norm": 0.5381463562638122, | |
| "learning_rate": 1.5886071275903913e-06, | |
| "loss": 0.1424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11239135265350342, | |
| "step": 3520, | |
| "valid_targets_mean": 3814.6, | |
| "valid_targets_min": 410 | |
| }, | |
| { | |
| "epoch": 6.205985915492958, | |
| "grad_norm": 0.482554449623009, | |
| "learning_rate": 1.5544906882219347e-06, | |
| "loss": 0.1425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1223248541355133, | |
| "step": 3525, | |
| "valid_targets_mean": 4788.8, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 6.214788732394366, | |
| "grad_norm": 0.52014874467926, | |
| "learning_rate": 1.5207297565729429e-06, | |
| "loss": 0.1371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1221373900771141, | |
| "step": 3530, | |
| "valid_targets_mean": 4462.4, | |
| "valid_targets_min": 650 | |
| }, | |
| { | |
| "epoch": 6.223591549295775, | |
| "grad_norm": 0.4722887298029329, | |
| "learning_rate": 1.4873249833314351e-06, | |
| "loss": 0.1478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14757949113845825, | |
| "step": 3535, | |
| "valid_targets_mean": 5234.2, | |
| "valid_targets_min": 2073 | |
| }, | |
| { | |
| "epoch": 6.232394366197183, | |
| "grad_norm": 0.5126862671026297, | |
| "learning_rate": 1.454277012321037e-06, | |
| "loss": 0.1443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13863685727119446, | |
| "step": 3540, | |
| "valid_targets_mean": 4859.9, | |
| "valid_targets_min": 1642 | |
| }, | |
| { | |
| "epoch": 6.241197183098592, | |
| "grad_norm": 0.5624056529537277, | |
| "learning_rate": 1.4215864804885838e-06, | |
| "loss": 0.1664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1549038141965866, | |
| "step": 3545, | |
| "valid_targets_mean": 3855.4, | |
| "valid_targets_min": 635 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 0.522087590740538, | |
| "learning_rate": 1.3892540178918456e-06, | |
| "loss": 0.1426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18053975701332092, | |
| "step": 3550, | |
| "valid_targets_mean": 5159.5, | |
| "valid_targets_min": 1679 | |
| }, | |
| { | |
| "epoch": 6.258802816901408, | |
| "grad_norm": 0.5614259323490514, | |
| "learning_rate": 1.3572802476873737e-06, | |
| "loss": 0.131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1442645788192749, | |
| "step": 3555, | |
| "valid_targets_mean": 5067.6, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 6.267605633802817, | |
| "grad_norm": 0.44031232889224864, | |
| "learning_rate": 1.3256657861185063e-06, | |
| "loss": 0.1557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1345158815383911, | |
| "step": 3560, | |
| "valid_targets_mean": 6142.8, | |
| "valid_targets_min": 790 | |
| }, | |
| { | |
| "epoch": 6.276408450704225, | |
| "grad_norm": 0.544707135090562, | |
| "learning_rate": 1.2944112425034704e-06, | |
| "loss": 0.1339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15208196640014648, | |
| "step": 3565, | |
| "valid_targets_mean": 4324.5, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 6.285211267605634, | |
| "grad_norm": 0.44342906540681853, | |
| "learning_rate": 1.26351721922366e-06, | |
| "loss": 0.1301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11831603944301605, | |
| "step": 3570, | |
| "valid_targets_mean": 5400.0, | |
| "valid_targets_min": 415 | |
| }, | |
| { | |
| "epoch": 6.294014084507042, | |
| "grad_norm": 0.4908968702741274, | |
| "learning_rate": 1.2329843117120066e-06, | |
| "loss": 0.1494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15591813623905182, | |
| "step": 3575, | |
| "valid_targets_mean": 5032.2, | |
| "valid_targets_min": 2769 | |
| }, | |
| { | |
| "epoch": 6.302816901408451, | |
| "grad_norm": 0.49619765652968867, | |
| "learning_rate": 1.2028131084415206e-06, | |
| "loss": 0.1558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12985055148601532, | |
| "step": 3580, | |
| "valid_targets_mean": 5426.8, | |
| "valid_targets_min": 1019 | |
| }, | |
| { | |
| "epoch": 6.311619718309859, | |
| "grad_norm": 0.534731464626865, | |
| "learning_rate": 1.1730041909139377e-06, | |
| "loss": 0.1479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17519858479499817, | |
| "step": 3585, | |
| "valid_targets_mean": 4754.6, | |
| "valid_targets_min": 510 | |
| }, | |
| { | |
| "epoch": 6.320422535211268, | |
| "grad_norm": 0.5925832263610169, | |
| "learning_rate": 1.1435581336485102e-06, | |
| "loss": 0.1435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1586245745420456, | |
| "step": 3590, | |
| "valid_targets_mean": 5664.7, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 6.329225352112676, | |
| "grad_norm": 0.4897546865514776, | |
| "learning_rate": 1.1144755041709399e-06, | |
| "loss": 0.1393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13721773028373718, | |
| "step": 3595, | |
| "valid_targets_mean": 5064.9, | |
| "valid_targets_min": 385 | |
| }, | |
| { | |
| "epoch": 6.338028169014084, | |
| "grad_norm": 0.47399735269066057, | |
| "learning_rate": 1.0857568630024472e-06, | |
| "loss": 0.1359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13113580644130707, | |
| "step": 3600, | |
| "valid_targets_mean": 5085.2, | |
| "valid_targets_min": 1663 | |
| }, | |
| { | |
| "epoch": 6.346830985915493, | |
| "grad_norm": 0.5446614644389745, | |
| "learning_rate": 1.0574027636489537e-06, | |
| "loss": 0.1275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10861470550298691, | |
| "step": 3605, | |
| "valid_targets_mean": 3904.8, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 6.355633802816901, | |
| "grad_norm": 0.5012432041096462, | |
| "learning_rate": 1.029413752590418e-06, | |
| "loss": 0.1337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14453452825546265, | |
| "step": 3610, | |
| "valid_targets_mean": 4877.2, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 6.36443661971831, | |
| "grad_norm": 0.547033965362819, | |
| "learning_rate": 1.001790369270308e-06, | |
| "loss": 0.1411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15618237853050232, | |
| "step": 3615, | |
| "valid_targets_mean": 4698.3, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 6.373239436619718, | |
| "grad_norm": 0.4911974096483243, | |
| "learning_rate": 9.745331460851947e-07, | |
| "loss": 0.1368, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1374177634716034, | |
| "step": 3620, | |
| "valid_targets_mean": 4769.1, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 6.382042253521127, | |
| "grad_norm": 0.5994612757842834, | |
| "learning_rate": 9.476426083745104e-07, | |
| "loss": 0.1238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13156624138355255, | |
| "step": 3625, | |
| "valid_targets_mean": 3961.8, | |
| "valid_targets_min": 831 | |
| }, | |
| { | |
| "epoch": 6.390845070422535, | |
| "grad_norm": 0.5323107024004411, | |
| "learning_rate": 9.211192744103958e-07, | |
| "loss": 0.1353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13586384057998657, | |
| "step": 3630, | |
| "valid_targets_mean": 4101.6, | |
| "valid_targets_min": 689 | |
| }, | |
| { | |
| "epoch": 6.399647887323944, | |
| "grad_norm": 0.8458496135404939, | |
| "learning_rate": 8.949636553877439e-07, | |
| "loss": 0.1331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12035247683525085, | |
| "step": 3635, | |
| "valid_targets_mean": 3417.3, | |
| "valid_targets_min": 468 | |
| }, | |
| { | |
| "epoch": 6.408450704225352, | |
| "grad_norm": 0.46231251680681773, | |
| "learning_rate": 8.69176255414308e-07, | |
| "loss": 0.1404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12341925501823425, | |
| "step": 3640, | |
| "valid_targets_mean": 4592.4, | |
| "valid_targets_min": 644 | |
| }, | |
| { | |
| "epoch": 6.417253521126761, | |
| "grad_norm": 0.5191565162106292, | |
| "learning_rate": 8.437575715010293e-07, | |
| "loss": 0.137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13376471400260925, | |
| "step": 3645, | |
| "valid_targets_mean": 4782.5, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 6.426056338028169, | |
| "grad_norm": 0.559035838942885, | |
| "learning_rate": 8.187080935524205e-07, | |
| "loss": 0.1604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15134574472904205, | |
| "step": 3650, | |
| "valid_targets_mean": 3934.8, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 6.434859154929577, | |
| "grad_norm": 0.5093648074508976, | |
| "learning_rate": 7.940283043571462e-07, | |
| "loss": 0.1379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15099284052848816, | |
| "step": 3655, | |
| "valid_targets_mean": 4482.9, | |
| "valid_targets_min": 430 | |
| }, | |
| { | |
| "epoch": 6.443661971830986, | |
| "grad_norm": 0.5506769070148886, | |
| "learning_rate": 7.697186795787059e-07, | |
| "loss": 0.1345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12886866927146912, | |
| "step": 3660, | |
| "valid_targets_mean": 5144.4, | |
| "valid_targets_min": 1695 | |
| }, | |
| { | |
| "epoch": 6.452464788732394, | |
| "grad_norm": 0.467576149334022, | |
| "learning_rate": 7.457796877462776e-07, | |
| "loss": 0.1384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13769331574440002, | |
| "step": 3665, | |
| "valid_targets_mean": 5470.7, | |
| "valid_targets_min": 774 | |
| }, | |
| { | |
| "epoch": 6.461267605633803, | |
| "grad_norm": 0.43349774181137446, | |
| "learning_rate": 7.222117902456815e-07, | |
| "loss": 0.1565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13145123422145844, | |
| "step": 3670, | |
| "valid_targets_mean": 6050.0, | |
| "valid_targets_min": 1843 | |
| }, | |
| { | |
| "epoch": 6.470070422535211, | |
| "grad_norm": 0.5130538434107541, | |
| "learning_rate": 6.990154413104799e-07, | |
| "loss": 0.1419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1344628632068634, | |
| "step": 3675, | |
| "valid_targets_mean": 4779.1, | |
| "valid_targets_min": 1618 | |
| }, | |
| { | |
| "epoch": 6.47887323943662, | |
| "grad_norm": 0.5180553031664631, | |
| "learning_rate": 6.761910880132406e-07, | |
| "loss": 0.1239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13148555159568787, | |
| "step": 3680, | |
| "valid_targets_mean": 4473.4, | |
| "valid_targets_min": 2223 | |
| }, | |
| { | |
| "epoch": 6.487676056338028, | |
| "grad_norm": 0.5128653791259178, | |
| "learning_rate": 6.537391702568973e-07, | |
| "loss": 0.132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12966038286685944, | |
| "step": 3685, | |
| "valid_targets_mean": 4934.4, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 6.496478873239437, | |
| "grad_norm": 0.43686692483414613, | |
| "learning_rate": 6.316601207662953e-07, | |
| "loss": 0.1256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10755065083503723, | |
| "step": 3690, | |
| "valid_targets_mean": 5577.5, | |
| "valid_targets_min": 2336 | |
| }, | |
| { | |
| "epoch": 6.505281690140845, | |
| "grad_norm": 0.5901482581057765, | |
| "learning_rate": 6.099543650798345e-07, | |
| "loss": 0.1317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1625135987997055, | |
| "step": 3695, | |
| "valid_targets_mean": 3733.6, | |
| "valid_targets_min": 566 | |
| }, | |
| { | |
| "epoch": 6.514084507042254, | |
| "grad_norm": 0.5184458383275362, | |
| "learning_rate": 5.886223215412745e-07, | |
| "loss": 0.1331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12808898091316223, | |
| "step": 3700, | |
| "valid_targets_mean": 4517.6, | |
| "valid_targets_min": 489 | |
| }, | |
| { | |
| "epoch": 6.522887323943662, | |
| "grad_norm": 0.4569147690193747, | |
| "learning_rate": 5.676644012916654e-07, | |
| "loss": 0.1392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1251828372478485, | |
| "step": 3705, | |
| "valid_targets_mean": 5500.9, | |
| "valid_targets_min": 601 | |
| }, | |
| { | |
| "epoch": 6.53169014084507, | |
| "grad_norm": 0.5955739695612043, | |
| "learning_rate": 5.47081008261443e-07, | |
| "loss": 0.1298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12023209780454636, | |
| "step": 3710, | |
| "valid_targets_mean": 3920.7, | |
| "valid_targets_min": 1563 | |
| }, | |
| { | |
| "epoch": 6.540492957746479, | |
| "grad_norm": 0.4236285487518285, | |
| "learning_rate": 5.268725391626106e-07, | |
| "loss": 0.1459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13967207074165344, | |
| "step": 3715, | |
| "valid_targets_mean": 6803.4, | |
| "valid_targets_min": 1750 | |
| }, | |
| { | |
| "epoch": 6.549295774647887, | |
| "grad_norm": 0.4997947939300797, | |
| "learning_rate": 5.070393834811227e-07, | |
| "loss": 0.1256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12099511176347733, | |
| "step": 3720, | |
| "valid_targets_mean": 4376.5, | |
| "valid_targets_min": 268 | |
| }, | |
| { | |
| "epoch": 6.558098591549296, | |
| "grad_norm": 0.5949079454309374, | |
| "learning_rate": 4.875819234693669e-07, | |
| "loss": 0.1518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15520477294921875, | |
| "step": 3725, | |
| "valid_targets_mean": 3592.7, | |
| "valid_targets_min": 422 | |
| }, | |
| { | |
| "epoch": 6.566901408450704, | |
| "grad_norm": 0.5158239896401939, | |
| "learning_rate": 4.6850053413879384e-07, | |
| "loss": 0.1452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12767250835895538, | |
| "step": 3730, | |
| "valid_targets_mean": 4828.2, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 6.575704225352113, | |
| "grad_norm": 5.204110368915318, | |
| "learning_rate": 4.497955832526946e-07, | |
| "loss": 0.1379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13973121345043182, | |
| "step": 3735, | |
| "valid_targets_mean": 5128.5, | |
| "valid_targets_min": 781 | |
| }, | |
| { | |
| "epoch": 6.584507042253521, | |
| "grad_norm": 0.49661962315449965, | |
| "learning_rate": 4.314674313191147e-07, | |
| "loss": 0.1384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1290832906961441, | |
| "step": 3740, | |
| "valid_targets_mean": 4996.8, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 6.59330985915493, | |
| "grad_norm": 0.49913963336876854, | |
| "learning_rate": 4.1351643158389135e-07, | |
| "loss": 0.1281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1183624118566513, | |
| "step": 3745, | |
| "valid_targets_mean": 5135.5, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 6.602112676056338, | |
| "grad_norm": 0.5055846621241453, | |
| "learning_rate": 3.9594293002386486e-07, | |
| "loss": 0.1572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14708563685417175, | |
| "step": 3750, | |
| "valid_targets_mean": 5182.0, | |
| "valid_targets_min": 2863 | |
| }, | |
| { | |
| "epoch": 6.610915492957746, | |
| "grad_norm": 0.5679686924808536, | |
| "learning_rate": 3.7874726534019803e-07, | |
| "loss": 0.1353, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16401495039463043, | |
| "step": 3755, | |
| "valid_targets_mean": 4951.6, | |
| "valid_targets_min": 1618 | |
| }, | |
| { | |
| "epoch": 6.619718309859155, | |
| "grad_norm": 0.5042182472061159, | |
| "learning_rate": 3.6192976895185197e-07, | |
| "loss": 0.1434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15072056651115417, | |
| "step": 3760, | |
| "valid_targets_mean": 5545.9, | |
| "valid_targets_min": 502 | |
| }, | |
| { | |
| "epoch": 6.628521126760563, | |
| "grad_norm": 0.4478310619510605, | |
| "learning_rate": 3.454907649892003e-07, | |
| "loss": 0.13, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1209946870803833, | |
| "step": 3765, | |
| "valid_targets_mean": 5408.1, | |
| "valid_targets_min": 2117 | |
| }, | |
| { | |
| "epoch": 6.637323943661972, | |
| "grad_norm": 0.48280439303594297, | |
| "learning_rate": 3.294305702877765e-07, | |
| "loss": 0.1548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12625285983085632, | |
| "step": 3770, | |
| "valid_targets_mean": 4874.4, | |
| "valid_targets_min": 316 | |
| }, | |
| { | |
| "epoch": 6.64612676056338, | |
| "grad_norm": 0.5214789503066944, | |
| "learning_rate": 3.137494943821717e-07, | |
| "loss": 0.1313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12771427631378174, | |
| "step": 3775, | |
| "valid_targets_mean": 4606.7, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 6.654929577464789, | |
| "grad_norm": 0.5161473232450198, | |
| "learning_rate": 2.984478395000712e-07, | |
| "loss": 0.1403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1472235918045044, | |
| "step": 3780, | |
| "valid_targets_mean": 4458.8, | |
| "valid_targets_min": 595 | |
| }, | |
| { | |
| "epoch": 6.663732394366197, | |
| "grad_norm": 0.5112762299613887, | |
| "learning_rate": 2.835259005564184e-07, | |
| "loss": 0.1504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14102263748645782, | |
| "step": 3785, | |
| "valid_targets_mean": 5401.2, | |
| "valid_targets_min": 746 | |
| }, | |
| { | |
| "epoch": 6.672535211267606, | |
| "grad_norm": 0.5402917808091763, | |
| "learning_rate": 2.689839651477466e-07, | |
| "loss": 0.1391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11662067472934723, | |
| "step": 3790, | |
| "valid_targets_mean": 4540.5, | |
| "valid_targets_min": 754 | |
| }, | |
| { | |
| "epoch": 6.681338028169014, | |
| "grad_norm": 0.5537853286173946, | |
| "learning_rate": 2.5482231354662766e-07, | |
| "loss": 0.1295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1329900324344635, | |
| "step": 3795, | |
| "valid_targets_mean": 4086.2, | |
| "valid_targets_min": 1771 | |
| }, | |
| { | |
| "epoch": 6.690140845070422, | |
| "grad_norm": 0.5132774232279969, | |
| "learning_rate": 2.410412186962674e-07, | |
| "loss": 0.1569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13712632656097412, | |
| "step": 3800, | |
| "valid_targets_mean": 4599.0, | |
| "valid_targets_min": 672 | |
| }, | |
| { | |
| "epoch": 6.698943661971831, | |
| "grad_norm": 0.482088257189529, | |
| "learning_rate": 2.2764094620524758e-07, | |
| "loss": 0.1441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12420330196619034, | |
| "step": 3805, | |
| "valid_targets_mean": 4648.1, | |
| "valid_targets_min": 1498 | |
| }, | |
| { | |
| "epoch": 6.707746478873239, | |
| "grad_norm": 0.5243304652599475, | |
| "learning_rate": 2.1462175434241006e-07, | |
| "loss": 0.1537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12946471571922302, | |
| "step": 3810, | |
| "valid_targets_mean": 4619.9, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 6.716549295774648, | |
| "grad_norm": 0.5515239554295401, | |
| "learning_rate": 2.0198389403187634e-07, | |
| "loss": 0.1312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12040004879236221, | |
| "step": 3815, | |
| "valid_targets_mean": 3953.8, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 6.725352112676056, | |
| "grad_norm": 0.4972142040426553, | |
| "learning_rate": 1.897276088482114e-07, | |
| "loss": 0.1348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13801251351833344, | |
| "step": 3820, | |
| "valid_targets_mean": 5580.2, | |
| "valid_targets_min": 2296 | |
| }, | |
| { | |
| "epoch": 6.734154929577465, | |
| "grad_norm": 0.5871031160476039, | |
| "learning_rate": 1.778531350117274e-07, | |
| "loss": 0.1279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13027629256248474, | |
| "step": 3825, | |
| "valid_targets_mean": 3938.6, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 6.742957746478873, | |
| "grad_norm": 0.6190380850772422, | |
| "learning_rate": 1.6636070138393634e-07, | |
| "loss": 0.132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1213836818933487, | |
| "step": 3830, | |
| "valid_targets_mean": 4684.8, | |
| "valid_targets_min": 1961 | |
| }, | |
| { | |
| "epoch": 6.751760563380282, | |
| "grad_norm": 0.4826258894557253, | |
| "learning_rate": 1.5525052946313123e-07, | |
| "loss": 0.1388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13808231055736542, | |
| "step": 3835, | |
| "valid_targets_mean": 5236.9, | |
| "valid_targets_min": 2342 | |
| }, | |
| { | |
| "epoch": 6.76056338028169, | |
| "grad_norm": 0.478219529514962, | |
| "learning_rate": 1.4452283338012518e-07, | |
| "loss": 0.1494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1022307425737381, | |
| "step": 3840, | |
| "valid_targets_mean": 4580.9, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 6.769366197183099, | |
| "grad_norm": 0.5641956405150934, | |
| "learning_rate": 1.3417781989411904e-07, | |
| "loss": 0.1398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16835173964500427, | |
| "step": 3845, | |
| "valid_targets_mean": 4964.6, | |
| "valid_targets_min": 611 | |
| }, | |
| { | |
| "epoch": 6.778169014084507, | |
| "grad_norm": 0.47629348757396694, | |
| "learning_rate": 1.242156883887202e-07, | |
| "loss": 0.142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1282704770565033, | |
| "step": 3850, | |
| "valid_targets_mean": 5001.7, | |
| "valid_targets_min": 769 | |
| }, | |
| { | |
| "epoch": 6.786971830985916, | |
| "grad_norm": 0.5554173824443297, | |
| "learning_rate": 1.1463663086809018e-07, | |
| "loss": 0.1372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1403650939464569, | |
| "step": 3855, | |
| "valid_targets_mean": 4853.1, | |
| "valid_targets_min": 929 | |
| }, | |
| { | |
| "epoch": 6.795774647887324, | |
| "grad_norm": 0.472728953774368, | |
| "learning_rate": 1.0544083195326293e-07, | |
| "loss": 0.1566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16951721906661987, | |
| "step": 3860, | |
| "valid_targets_mean": 6472.5, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 6.804577464788732, | |
| "grad_norm": 0.4689170758503489, | |
| "learning_rate": 9.662846887856792e-08, | |
| "loss": 0.1376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10850979387760162, | |
| "step": 3865, | |
| "valid_targets_mean": 4721.9, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 6.813380281690141, | |
| "grad_norm": 0.5120769305717948, | |
| "learning_rate": 8.819971148822159e-08, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18803714215755463, | |
| "step": 3870, | |
| "valid_targets_mean": 5475.3, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 6.822183098591549, | |
| "grad_norm": 0.5807638234515028, | |
| "learning_rate": 8.015472223305676e-08, | |
| "loss": 0.1348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13857172429561615, | |
| "step": 3875, | |
| "valid_targets_mean": 4163.2, | |
| "valid_targets_min": 536 | |
| }, | |
| { | |
| "epoch": 6.830985915492958, | |
| "grad_norm": 0.44413053809396497, | |
| "learning_rate": 7.249365616738502e-08, | |
| "loss": 0.1358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1431388258934021, | |
| "step": 3880, | |
| "valid_targets_mean": 6079.1, | |
| "valid_targets_min": 1884 | |
| }, | |
| { | |
| "epoch": 6.839788732394366, | |
| "grad_norm": 0.5053019371730413, | |
| "learning_rate": 6.521666094601475e-08, | |
| "loss": 0.1646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11904206871986389, | |
| "step": 3885, | |
| "valid_targets_mean": 4538.6, | |
| "valid_targets_min": 1744 | |
| }, | |
| { | |
| "epoch": 6.848591549295775, | |
| "grad_norm": 0.4504386243811128, | |
| "learning_rate": 5.832387682140228e-08, | |
| "loss": 0.1352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1264674812555313, | |
| "step": 3890, | |
| "valid_targets_mean": 5569.6, | |
| "valid_targets_min": 1996 | |
| }, | |
| { | |
| "epoch": 6.857394366197183, | |
| "grad_norm": 0.4406752046660607, | |
| "learning_rate": 5.181543664094735e-08, | |
| "loss": 0.1506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16318558156490326, | |
| "step": 3895, | |
| "valid_targets_mean": 6695.7, | |
| "valid_targets_min": 2019 | |
| }, | |
| { | |
| "epoch": 6.866197183098592, | |
| "grad_norm": 0.5404471459503121, | |
| "learning_rate": 4.569146584443518e-08, | |
| "loss": 0.1341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15698353946208954, | |
| "step": 3900, | |
| "valid_targets_mean": 4885.8, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 6.875, | |
| "grad_norm": 0.4491427488541899, | |
| "learning_rate": 3.995208246161619e-08, | |
| "loss": 0.148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14439737796783447, | |
| "step": 3905, | |
| "valid_targets_mean": 5856.3, | |
| "valid_targets_min": 1364 | |
| }, | |
| { | |
| "epoch": 6.883802816901408, | |
| "grad_norm": 0.5442810464187087, | |
| "learning_rate": 3.4597397109936704e-08, | |
| "loss": 0.1416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13218745589256287, | |
| "step": 3910, | |
| "valid_targets_mean": 4103.6, | |
| "valid_targets_min": 834 | |
| }, | |
| { | |
| "epoch": 6.892605633802817, | |
| "grad_norm": 0.5314395931439294, | |
| "learning_rate": 2.962751299240285e-08, | |
| "loss": 0.1379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15859127044677734, | |
| "step": 3915, | |
| "valid_targets_mean": 4381.8, | |
| "valid_targets_min": 431 | |
| }, | |
| { | |
| "epoch": 6.901408450704225, | |
| "grad_norm": 0.5481858371783543, | |
| "learning_rate": 2.5042525895586645e-08, | |
| "loss": 0.1238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14879918098449707, | |
| "step": 3920, | |
| "valid_targets_mean": 4848.9, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 6.910211267605634, | |
| "grad_norm": 0.5024406787906156, | |
| "learning_rate": 2.0842524187789647e-08, | |
| "loss": 0.1281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12088795006275177, | |
| "step": 3925, | |
| "valid_targets_mean": 4772.4, | |
| "valid_targets_min": 529 | |
| }, | |
| { | |
| "epoch": 6.919014084507042, | |
| "grad_norm": 0.5019230140344024, | |
| "learning_rate": 1.7027588817335462e-08, | |
| "loss": 0.1484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1242571622133255, | |
| "step": 3930, | |
| "valid_targets_mean": 4930.0, | |
| "valid_targets_min": 560 | |
| }, | |
| { | |
| "epoch": 6.927816901408451, | |
| "grad_norm": 0.5802204218020438, | |
| "learning_rate": 1.3597793311004304e-08, | |
| "loss": 0.142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12173550575971603, | |
| "step": 3935, | |
| "valid_targets_mean": 4195.8, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 6.936619718309859, | |
| "grad_norm": 0.550502191121643, | |
| "learning_rate": 1.0553203772627474e-08, | |
| "loss": 0.1453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15253986418247223, | |
| "step": 3940, | |
| "valid_targets_mean": 4248.6, | |
| "valid_targets_min": 591 | |
| }, | |
| { | |
| "epoch": 6.945422535211268, | |
| "grad_norm": 0.5801532415632528, | |
| "learning_rate": 7.89387888180171e-09, | |
| "loss": 0.1375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13552480936050415, | |
| "step": 3945, | |
| "valid_targets_mean": 3476.6, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 6.954225352112676, | |
| "grad_norm": 0.4527142784019784, | |
| "learning_rate": 5.61986989276786e-09, | |
| "loss": 0.1509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19037893414497375, | |
| "step": 3950, | |
| "valid_targets_mean": 6103.2, | |
| "valid_targets_min": 2214 | |
| }, | |
| { | |
| "epoch": 6.963028169014084, | |
| "grad_norm": 0.5734887260088123, | |
| "learning_rate": 3.7312206334116915e-09, | |
| "loss": 0.1297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12395994365215302, | |
| "step": 3955, | |
| "valid_targets_mean": 3680.4, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 6.971830985915493, | |
| "grad_norm": 0.5020804020464924, | |
| "learning_rate": 2.227967504433437e-09, | |
| "loss": 0.1501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1413247287273407, | |
| "step": 3960, | |
| "valid_targets_mean": 4620.6, | |
| "valid_targets_min": 389 | |
| }, | |
| { | |
| "epoch": 6.980633802816901, | |
| "grad_norm": 0.542152115833304, | |
| "learning_rate": 1.1101394786350306e-09, | |
| "loss": 0.133, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1374783217906952, | |
| "step": 3965, | |
| "valid_targets_mean": 3837.2, | |
| "valid_targets_min": 1914 | |
| }, | |
| { | |
| "epoch": 6.98943661971831, | |
| "grad_norm": 0.5131902934085243, | |
| "learning_rate": 3.777581003627795e-10, | |
| "loss": 0.1392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13894666731357574, | |
| "step": 3970, | |
| "valid_targets_mean": 5099.8, | |
| "valid_targets_min": 917 | |
| }, | |
| { | |
| "epoch": 6.998239436619718, | |
| "grad_norm": 0.4711959001764335, | |
| "learning_rate": 3.0837485098800245e-11, | |
| "loss": 0.1479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1285422146320343, | |
| "step": 3975, | |
| "valid_targets_mean": 5568.9, | |
| "valid_targets_min": 1908 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14995810389518738, | |
| "step": 3976, | |
| "total_flos": 1254621303865344.0, | |
| "train_loss": 0.1876786600655953, | |
| "train_runtime": 21484.049, | |
| "train_samples_per_second": 2.956, | |
| "train_steps_per_second": 0.185, | |
| "valid_targets_mean": 5724.8, | |
| "valid_targets_min": 613 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3976, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1254621303865344.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |