Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use DCAgent/g1_timeout_e1_gpt_long_tacc with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DCAgent/g1_timeout_e1_gpt_long_tacc with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DCAgent/g1_timeout_e1_gpt_long_tacc") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("DCAgent/g1_timeout_e1_gpt_long_tacc") model = AutoModelForCausalLM.from_pretrained("DCAgent/g1_timeout_e1_gpt_long_tacc") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use DCAgent/g1_timeout_e1_gpt_long_tacc with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DCAgent/g1_timeout_e1_gpt_long_tacc" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/g1_timeout_e1_gpt_long_tacc", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/DCAgent/g1_timeout_e1_gpt_long_tacc
- SGLang
How to use DCAgent/g1_timeout_e1_gpt_long_tacc with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DCAgent/g1_timeout_e1_gpt_long_tacc" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/g1_timeout_e1_gpt_long_tacc", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DCAgent/g1_timeout_e1_gpt_long_tacc" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/g1_timeout_e1_gpt_long_tacc", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use DCAgent/g1_timeout_e1_gpt_long_tacc with Docker Model Runner:
docker model run hf.co/DCAgent/g1_timeout_e1_gpt_long_tacc
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 4347, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008051529790660225, | |
| "grad_norm": 12.6211720137527, | |
| "learning_rate": 3.6781609195402303e-07, | |
| "loss": 0.8023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.8141472339630127, | |
| "step": 5, | |
| "valid_targets_mean": 6454.3, | |
| "valid_targets_min": 2941 | |
| }, | |
| { | |
| "epoch": 0.01610305958132045, | |
| "grad_norm": 13.535375108314145, | |
| "learning_rate": 8.275862068965518e-07, | |
| "loss": 0.779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7463928461074829, | |
| "step": 10, | |
| "valid_targets_mean": 6041.3, | |
| "valid_targets_min": 3050 | |
| }, | |
| { | |
| "epoch": 0.024154589371980676, | |
| "grad_norm": 11.267948292741378, | |
| "learning_rate": 1.2873563218390806e-06, | |
| "loss": 0.7929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7566848993301392, | |
| "step": 15, | |
| "valid_targets_mean": 6337.3, | |
| "valid_targets_min": 3318 | |
| }, | |
| { | |
| "epoch": 0.0322061191626409, | |
| "grad_norm": 9.384853818135273, | |
| "learning_rate": 1.7471264367816093e-06, | |
| "loss": 0.7322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7497715353965759, | |
| "step": 20, | |
| "valid_targets_mean": 6536.2, | |
| "valid_targets_min": 3943 | |
| }, | |
| { | |
| "epoch": 0.040257648953301126, | |
| "grad_norm": 6.508928535304328, | |
| "learning_rate": 2.206896551724138e-06, | |
| "loss": 0.6848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6616116762161255, | |
| "step": 25, | |
| "valid_targets_mean": 6144.0, | |
| "valid_targets_min": 1873 | |
| }, | |
| { | |
| "epoch": 0.04830917874396135, | |
| "grad_norm": 4.939706725980711, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.6527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6172756552696228, | |
| "step": 30, | |
| "valid_targets_mean": 5658.6, | |
| "valid_targets_min": 3195 | |
| }, | |
| { | |
| "epoch": 0.05636070853462158, | |
| "grad_norm": 2.5910555463911815, | |
| "learning_rate": 3.1264367816091956e-06, | |
| "loss": 0.626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6274504065513611, | |
| "step": 35, | |
| "valid_targets_mean": 5992.8, | |
| "valid_targets_min": 3207 | |
| }, | |
| { | |
| "epoch": 0.0644122383252818, | |
| "grad_norm": 1.7689592025893621, | |
| "learning_rate": 3.5862068965517243e-06, | |
| "loss": 0.576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6012336015701294, | |
| "step": 40, | |
| "valid_targets_mean": 5498.4, | |
| "valid_targets_min": 2385 | |
| }, | |
| { | |
| "epoch": 0.07246376811594203, | |
| "grad_norm": 1.3230501227984852, | |
| "learning_rate": 4.0459770114942535e-06, | |
| "loss": 0.5463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5329886078834534, | |
| "step": 45, | |
| "valid_targets_mean": 6101.3, | |
| "valid_targets_min": 2571 | |
| }, | |
| { | |
| "epoch": 0.08051529790660225, | |
| "grad_norm": 1.1549301298631403, | |
| "learning_rate": 4.505747126436782e-06, | |
| "loss": 0.5354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5498359203338623, | |
| "step": 50, | |
| "valid_targets_mean": 5591.6, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 0.08856682769726248, | |
| "grad_norm": 0.9148612268441723, | |
| "learning_rate": 4.965517241379311e-06, | |
| "loss": 0.5173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5667314529418945, | |
| "step": 55, | |
| "valid_targets_mean": 7175.1, | |
| "valid_targets_min": 3309 | |
| }, | |
| { | |
| "epoch": 0.0966183574879227, | |
| "grad_norm": 0.9051707439274824, | |
| "learning_rate": 5.42528735632184e-06, | |
| "loss": 0.4999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49746763706207275, | |
| "step": 60, | |
| "valid_targets_mean": 5700.9, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 0.10466988727858294, | |
| "grad_norm": 0.7105966724914392, | |
| "learning_rate": 5.8850574712643685e-06, | |
| "loss": 0.4905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49637746810913086, | |
| "step": 65, | |
| "valid_targets_mean": 6462.1, | |
| "valid_targets_min": 3176 | |
| }, | |
| { | |
| "epoch": 0.11272141706924316, | |
| "grad_norm": 0.6112534858050221, | |
| "learning_rate": 6.344827586206898e-06, | |
| "loss": 0.483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44268643856048584, | |
| "step": 70, | |
| "valid_targets_mean": 6671.4, | |
| "valid_targets_min": 2978 | |
| }, | |
| { | |
| "epoch": 0.12077294685990338, | |
| "grad_norm": 0.6428768043018936, | |
| "learning_rate": 6.804597701149426e-06, | |
| "loss": 0.4662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4915180206298828, | |
| "step": 75, | |
| "valid_targets_mean": 5441.8, | |
| "valid_targets_min": 2285 | |
| }, | |
| { | |
| "epoch": 0.1288244766505636, | |
| "grad_norm": 0.9646038113879557, | |
| "learning_rate": 7.264367816091955e-06, | |
| "loss": 0.4314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43008822202682495, | |
| "step": 80, | |
| "valid_targets_mean": 5118.4, | |
| "valid_targets_min": 2205 | |
| }, | |
| { | |
| "epoch": 0.13687600644122383, | |
| "grad_norm": 1.4574457159646115, | |
| "learning_rate": 7.724137931034483e-06, | |
| "loss": 0.615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6628851890563965, | |
| "step": 85, | |
| "valid_targets_mean": 2431.5, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 0.14492753623188406, | |
| "grad_norm": 1.041883232166695, | |
| "learning_rate": 8.183908045977013e-06, | |
| "loss": 0.701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6815399527549744, | |
| "step": 90, | |
| "valid_targets_mean": 3045.8, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 0.1529790660225443, | |
| "grad_norm": 0.8494250273002256, | |
| "learning_rate": 8.643678160919541e-06, | |
| "loss": 0.6405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6369081735610962, | |
| "step": 95, | |
| "valid_targets_mean": 3644.5, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 0.1610305958132045, | |
| "grad_norm": 0.8637751062370636, | |
| "learning_rate": 9.10344827586207e-06, | |
| "loss": 0.6437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6281415224075317, | |
| "step": 100, | |
| "valid_targets_mean": 2985.6, | |
| "valid_targets_min": 1353 | |
| }, | |
| { | |
| "epoch": 0.16908212560386474, | |
| "grad_norm": 0.7659577551413557, | |
| "learning_rate": 9.563218390804598e-06, | |
| "loss": 0.5845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5499722957611084, | |
| "step": 105, | |
| "valid_targets_mean": 4235.5, | |
| "valid_targets_min": 1153 | |
| }, | |
| { | |
| "epoch": 0.17713365539452497, | |
| "grad_norm": 0.8658771361706771, | |
| "learning_rate": 1.0022988505747126e-05, | |
| "loss": 0.6017, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6165111660957336, | |
| "step": 110, | |
| "valid_targets_mean": 3142.8, | |
| "valid_targets_min": 1555 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 0.7434733707474204, | |
| "learning_rate": 1.0482758620689658e-05, | |
| "loss": 0.5981, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5669258832931519, | |
| "step": 115, | |
| "valid_targets_mean": 3279.6, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 0.1932367149758454, | |
| "grad_norm": 0.8722216577271487, | |
| "learning_rate": 1.0942528735632186e-05, | |
| "loss": 0.6172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6591456532478333, | |
| "step": 120, | |
| "valid_targets_mean": 2693.6, | |
| "valid_targets_min": 1497 | |
| }, | |
| { | |
| "epoch": 0.20128824476650564, | |
| "grad_norm": 0.8310182578313449, | |
| "learning_rate": 1.1402298850574713e-05, | |
| "loss": 0.5898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5488319396972656, | |
| "step": 125, | |
| "valid_targets_mean": 2785.3, | |
| "valid_targets_min": 1343 | |
| }, | |
| { | |
| "epoch": 0.20933977455716588, | |
| "grad_norm": 0.9235409692277453, | |
| "learning_rate": 1.1862068965517241e-05, | |
| "loss": 0.6038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6315065622329712, | |
| "step": 130, | |
| "valid_targets_mean": 2680.9, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 0.21739130434782608, | |
| "grad_norm": 0.7693874973200193, | |
| "learning_rate": 1.2321839080459773e-05, | |
| "loss": 0.5719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5175089836120605, | |
| "step": 135, | |
| "valid_targets_mean": 3356.8, | |
| "valid_targets_min": 1216 | |
| }, | |
| { | |
| "epoch": 0.22544283413848631, | |
| "grad_norm": 0.926054709365038, | |
| "learning_rate": 1.2781609195402301e-05, | |
| "loss": 0.5767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.59300696849823, | |
| "step": 140, | |
| "valid_targets_mean": 2648.8, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 0.23349436392914655, | |
| "grad_norm": 0.7436423550718153, | |
| "learning_rate": 1.324137931034483e-05, | |
| "loss": 0.5838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5333613157272339, | |
| "step": 145, | |
| "valid_targets_mean": 3610.2, | |
| "valid_targets_min": 1346 | |
| }, | |
| { | |
| "epoch": 0.24154589371980675, | |
| "grad_norm": 0.8771301167213914, | |
| "learning_rate": 1.3701149425287356e-05, | |
| "loss": 0.5807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5605872869491577, | |
| "step": 150, | |
| "valid_targets_mean": 2815.4, | |
| "valid_targets_min": 1085 | |
| }, | |
| { | |
| "epoch": 0.249597423510467, | |
| "grad_norm": 0.7265522281955346, | |
| "learning_rate": 1.4160919540229888e-05, | |
| "loss": 0.5597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5334085822105408, | |
| "step": 155, | |
| "valid_targets_mean": 3536.7, | |
| "valid_targets_min": 1598 | |
| }, | |
| { | |
| "epoch": 0.2576489533011272, | |
| "grad_norm": 0.8637169240692523, | |
| "learning_rate": 1.4620689655172416e-05, | |
| "loss": 0.5512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5399806499481201, | |
| "step": 160, | |
| "valid_targets_mean": 2561.4, | |
| "valid_targets_min": 1339 | |
| }, | |
| { | |
| "epoch": 0.26570048309178745, | |
| "grad_norm": 0.7684987940501175, | |
| "learning_rate": 1.5080459770114944e-05, | |
| "loss": 0.5561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5099571943283081, | |
| "step": 165, | |
| "valid_targets_mean": 2889.4, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 0.27375201288244766, | |
| "grad_norm": 1.2443263629609338, | |
| "learning_rate": 1.5540229885057473e-05, | |
| "loss": 0.5758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5990105271339417, | |
| "step": 170, | |
| "valid_targets_mean": 2557.4, | |
| "valid_targets_min": 1094 | |
| }, | |
| { | |
| "epoch": 0.28180354267310787, | |
| "grad_norm": 0.7025661086648686, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.5516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5607724785804749, | |
| "step": 175, | |
| "valid_targets_mean": 3530.8, | |
| "valid_targets_min": 1290 | |
| }, | |
| { | |
| "epoch": 0.2898550724637681, | |
| "grad_norm": 0.9299035150917279, | |
| "learning_rate": 1.645977011494253e-05, | |
| "loss": 0.5504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5684704184532166, | |
| "step": 180, | |
| "valid_targets_mean": 2579.7, | |
| "valid_targets_min": 1229 | |
| }, | |
| { | |
| "epoch": 0.29790660225442833, | |
| "grad_norm": 0.9029683337889093, | |
| "learning_rate": 1.691954022988506e-05, | |
| "loss": 0.5549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5909307599067688, | |
| "step": 185, | |
| "valid_targets_mean": 3024.5, | |
| "valid_targets_min": 1126 | |
| }, | |
| { | |
| "epoch": 0.3059581320450886, | |
| "grad_norm": 0.7196257441971349, | |
| "learning_rate": 1.7379310344827586e-05, | |
| "loss": 0.533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5063704252243042, | |
| "step": 190, | |
| "valid_targets_mean": 3523.4, | |
| "valid_targets_min": 1277 | |
| }, | |
| { | |
| "epoch": 0.3140096618357488, | |
| "grad_norm": 0.7255462834778044, | |
| "learning_rate": 1.7839080459770116e-05, | |
| "loss": 0.5285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5233041644096375, | |
| "step": 195, | |
| "valid_targets_mean": 3317.8, | |
| "valid_targets_min": 1389 | |
| }, | |
| { | |
| "epoch": 0.322061191626409, | |
| "grad_norm": 0.8908272457952328, | |
| "learning_rate": 1.8298850574712646e-05, | |
| "loss": 0.566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5723013877868652, | |
| "step": 200, | |
| "valid_targets_mean": 2728.8, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 0.33011272141706927, | |
| "grad_norm": 0.7237478954756121, | |
| "learning_rate": 1.8758620689655173e-05, | |
| "loss": 0.5403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.488622784614563, | |
| "step": 205, | |
| "valid_targets_mean": 3474.4, | |
| "valid_targets_min": 1520 | |
| }, | |
| { | |
| "epoch": 0.33816425120772947, | |
| "grad_norm": 0.9731721468625133, | |
| "learning_rate": 1.9218390804597703e-05, | |
| "loss": 0.5479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5853530168533325, | |
| "step": 210, | |
| "valid_targets_mean": 2857.8, | |
| "valid_targets_min": 1154 | |
| }, | |
| { | |
| "epoch": 0.3462157809983897, | |
| "grad_norm": 0.8620329842750755, | |
| "learning_rate": 1.9678160919540233e-05, | |
| "loss": 0.5582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5213043689727783, | |
| "step": 215, | |
| "valid_targets_mean": 2419.3, | |
| "valid_targets_min": 1115 | |
| }, | |
| { | |
| "epoch": 0.35426731078904994, | |
| "grad_norm": 0.9094077963820549, | |
| "learning_rate": 2.013793103448276e-05, | |
| "loss": 0.5365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5487880706787109, | |
| "step": 220, | |
| "valid_targets_mean": 2817.8, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 0.36231884057971014, | |
| "grad_norm": 0.7944146685277702, | |
| "learning_rate": 2.059770114942529e-05, | |
| "loss": 0.5477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5611993074417114, | |
| "step": 225, | |
| "valid_targets_mean": 2906.6, | |
| "valid_targets_min": 1414 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 0.8404791965077937, | |
| "learning_rate": 2.1057471264367816e-05, | |
| "loss": 0.5326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5319218635559082, | |
| "step": 230, | |
| "valid_targets_mean": 2699.4, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 0.3784219001610306, | |
| "grad_norm": 0.7398954624086636, | |
| "learning_rate": 2.1517241379310346e-05, | |
| "loss": 0.5158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5021366477012634, | |
| "step": 235, | |
| "valid_targets_mean": 3474.1, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 0.3864734299516908, | |
| "grad_norm": 0.6565950720543745, | |
| "learning_rate": 2.1977011494252873e-05, | |
| "loss": 0.516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4567714333534241, | |
| "step": 240, | |
| "valid_targets_mean": 4942.6, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 0.394524959742351, | |
| "grad_norm": 0.7843837006160241, | |
| "learning_rate": 2.2436781609195406e-05, | |
| "loss": 0.5148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.525688886642456, | |
| "step": 245, | |
| "valid_targets_mean": 3203.9, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 0.4025764895330113, | |
| "grad_norm": 0.786413171985749, | |
| "learning_rate": 2.2896551724137933e-05, | |
| "loss": 0.4953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5001181364059448, | |
| "step": 250, | |
| "valid_targets_mean": 2741.4, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 0.4106280193236715, | |
| "grad_norm": 0.9340749614199906, | |
| "learning_rate": 2.3356321839080463e-05, | |
| "loss": 0.5043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5208650827407837, | |
| "step": 255, | |
| "valid_targets_mean": 2505.7, | |
| "valid_targets_min": 1217 | |
| }, | |
| { | |
| "epoch": 0.41867954911433175, | |
| "grad_norm": 0.7974854363515583, | |
| "learning_rate": 2.381609195402299e-05, | |
| "loss": 0.4952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4734256863594055, | |
| "step": 260, | |
| "valid_targets_mean": 2908.2, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 0.42673107890499196, | |
| "grad_norm": 0.7083832787574131, | |
| "learning_rate": 2.427586206896552e-05, | |
| "loss": 0.4563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4569528102874756, | |
| "step": 265, | |
| "valid_targets_mean": 3492.0, | |
| "valid_targets_min": 1549 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 0.8471307752430234, | |
| "learning_rate": 2.4735632183908046e-05, | |
| "loss": 0.4805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4980763792991638, | |
| "step": 270, | |
| "valid_targets_mean": 2432.0, | |
| "valid_targets_min": 838 | |
| }, | |
| { | |
| "epoch": 0.4428341384863124, | |
| "grad_norm": 0.7581722015778103, | |
| "learning_rate": 2.5195402298850576e-05, | |
| "loss": 0.4698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4556068778038025, | |
| "step": 275, | |
| "valid_targets_mean": 3069.9, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 0.45088566827697263, | |
| "grad_norm": 0.7647100815494069, | |
| "learning_rate": 2.5655172413793103e-05, | |
| "loss": 0.5029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5297585725784302, | |
| "step": 280, | |
| "valid_targets_mean": 3644.1, | |
| "valid_targets_min": 1308 | |
| }, | |
| { | |
| "epoch": 0.45893719806763283, | |
| "grad_norm": 0.6888367368546122, | |
| "learning_rate": 2.6114942528735636e-05, | |
| "loss": 0.4635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4325258135795593, | |
| "step": 285, | |
| "valid_targets_mean": 3538.7, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 0.4669887278582931, | |
| "grad_norm": 0.7918334936431798, | |
| "learning_rate": 2.6574712643678166e-05, | |
| "loss": 0.4829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48796749114990234, | |
| "step": 290, | |
| "valid_targets_mean": 2789.2, | |
| "valid_targets_min": 1084 | |
| }, | |
| { | |
| "epoch": 0.4750402576489533, | |
| "grad_norm": 0.8297680238669458, | |
| "learning_rate": 2.7034482758620693e-05, | |
| "loss": 0.5134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47267746925354004, | |
| "step": 295, | |
| "valid_targets_mean": 2663.7, | |
| "valid_targets_min": 1007 | |
| }, | |
| { | |
| "epoch": 0.4830917874396135, | |
| "grad_norm": 0.6633379830429746, | |
| "learning_rate": 2.749425287356322e-05, | |
| "loss": 0.4589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49964243173599243, | |
| "step": 300, | |
| "valid_targets_mean": 3865.0, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 0.49114331723027377, | |
| "grad_norm": 0.6267750181705374, | |
| "learning_rate": 2.795402298850575e-05, | |
| "loss": 0.4686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44949406385421753, | |
| "step": 305, | |
| "valid_targets_mean": 4117.5, | |
| "valid_targets_min": 1094 | |
| }, | |
| { | |
| "epoch": 0.499194847020934, | |
| "grad_norm": 0.705425101438482, | |
| "learning_rate": 2.8413793103448276e-05, | |
| "loss": 0.4443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4142606258392334, | |
| "step": 310, | |
| "valid_targets_mean": 3565.3, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 0.5072463768115942, | |
| "grad_norm": 0.6719556250206123, | |
| "learning_rate": 2.8873563218390806e-05, | |
| "loss": 0.4748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46530526876449585, | |
| "step": 315, | |
| "valid_targets_mean": 3507.9, | |
| "valid_targets_min": 1135 | |
| }, | |
| { | |
| "epoch": 0.5152979066022544, | |
| "grad_norm": 0.7129427212893642, | |
| "learning_rate": 2.9333333333333333e-05, | |
| "loss": 0.4742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4753148555755615, | |
| "step": 320, | |
| "valid_targets_mean": 3822.9, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 0.5233494363929146, | |
| "grad_norm": 0.7453050849315828, | |
| "learning_rate": 2.9793103448275866e-05, | |
| "loss": 0.4602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4605864882469177, | |
| "step": 325, | |
| "valid_targets_mean": 2932.9, | |
| "valid_targets_min": 1398 | |
| }, | |
| { | |
| "epoch": 0.5314009661835749, | |
| "grad_norm": 0.6416359264690352, | |
| "learning_rate": 3.0252873563218396e-05, | |
| "loss": 0.4427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3964742124080658, | |
| "step": 330, | |
| "valid_targets_mean": 3473.2, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 0.5394524959742351, | |
| "grad_norm": 0.8091118683339827, | |
| "learning_rate": 3.071264367816092e-05, | |
| "loss": 0.4865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5222635269165039, | |
| "step": 335, | |
| "valid_targets_mean": 2707.5, | |
| "valid_targets_min": 1453 | |
| }, | |
| { | |
| "epoch": 0.5475040257648953, | |
| "grad_norm": 0.6475514263706704, | |
| "learning_rate": 3.117241379310345e-05, | |
| "loss": 0.468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4403412640094757, | |
| "step": 340, | |
| "valid_targets_mean": 3770.1, | |
| "valid_targets_min": 1554 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 0.6905557475577746, | |
| "learning_rate": 3.1632183908045976e-05, | |
| "loss": 0.4429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42495399713516235, | |
| "step": 345, | |
| "valid_targets_mean": 3687.9, | |
| "valid_targets_min": 1423 | |
| }, | |
| { | |
| "epoch": 0.5636070853462157, | |
| "grad_norm": 0.7104740968387137, | |
| "learning_rate": 3.2091954022988506e-05, | |
| "loss": 0.455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43538206815719604, | |
| "step": 350, | |
| "valid_targets_mean": 3580.0, | |
| "valid_targets_min": 1778 | |
| }, | |
| { | |
| "epoch": 0.571658615136876, | |
| "grad_norm": 0.7499873582780294, | |
| "learning_rate": 3.2551724137931036e-05, | |
| "loss": 0.471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42534762620925903, | |
| "step": 355, | |
| "valid_targets_mean": 3154.2, | |
| "valid_targets_min": 1070 | |
| }, | |
| { | |
| "epoch": 0.5797101449275363, | |
| "grad_norm": 0.6642445186064259, | |
| "learning_rate": 3.3011494252873566e-05, | |
| "loss": 0.4456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45597341656684875, | |
| "step": 360, | |
| "valid_targets_mean": 3442.1, | |
| "valid_targets_min": 1222 | |
| }, | |
| { | |
| "epoch": 0.5877616747181964, | |
| "grad_norm": 0.9240276056077943, | |
| "learning_rate": 3.3471264367816096e-05, | |
| "loss": 0.4582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46805498003959656, | |
| "step": 365, | |
| "valid_targets_mean": 2991.2, | |
| "valid_targets_min": 1344 | |
| }, | |
| { | |
| "epoch": 0.5958132045088567, | |
| "grad_norm": 0.7060945620088765, | |
| "learning_rate": 3.3931034482758626e-05, | |
| "loss": 0.4711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42430639266967773, | |
| "step": 370, | |
| "valid_targets_mean": 3584.1, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 0.6038647342995169, | |
| "grad_norm": 0.6951756292278192, | |
| "learning_rate": 3.4390804597701156e-05, | |
| "loss": 0.435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4317897856235504, | |
| "step": 375, | |
| "valid_targets_mean": 3283.6, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 0.6119162640901772, | |
| "grad_norm": 0.6929947538123722, | |
| "learning_rate": 3.485057471264368e-05, | |
| "loss": 0.4654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44339442253112793, | |
| "step": 380, | |
| "valid_targets_mean": 3267.1, | |
| "valid_targets_min": 1469 | |
| }, | |
| { | |
| "epoch": 0.6199677938808373, | |
| "grad_norm": 0.59684922350695, | |
| "learning_rate": 3.531034482758621e-05, | |
| "loss": 0.4385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38249504566192627, | |
| "step": 385, | |
| "valid_targets_mean": 4220.8, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 0.6280193236714976, | |
| "grad_norm": 0.6361797000665275, | |
| "learning_rate": 3.577011494252874e-05, | |
| "loss": 0.4499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42138925194740295, | |
| "step": 390, | |
| "valid_targets_mean": 3403.3, | |
| "valid_targets_min": 1315 | |
| }, | |
| { | |
| "epoch": 0.6360708534621579, | |
| "grad_norm": 0.7553402257839422, | |
| "learning_rate": 3.622988505747126e-05, | |
| "loss": 0.4251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.415188193321228, | |
| "step": 395, | |
| "valid_targets_mean": 3994.0, | |
| "valid_targets_min": 1243 | |
| }, | |
| { | |
| "epoch": 0.644122383252818, | |
| "grad_norm": 0.7840399240091517, | |
| "learning_rate": 3.668965517241379e-05, | |
| "loss": 0.4666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4885357618331909, | |
| "step": 400, | |
| "valid_targets_mean": 2728.6, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 0.6521739130434783, | |
| "grad_norm": 0.7737252334457005, | |
| "learning_rate": 3.714942528735633e-05, | |
| "loss": 0.4453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4366183876991272, | |
| "step": 405, | |
| "valid_targets_mean": 2805.7, | |
| "valid_targets_min": 1316 | |
| }, | |
| { | |
| "epoch": 0.6602254428341385, | |
| "grad_norm": 0.8020067156942912, | |
| "learning_rate": 3.760919540229885e-05, | |
| "loss": 0.4458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4633807837963104, | |
| "step": 410, | |
| "valid_targets_mean": 3165.5, | |
| "valid_targets_min": 1339 | |
| }, | |
| { | |
| "epoch": 0.6682769726247987, | |
| "grad_norm": 0.7674549160676873, | |
| "learning_rate": 3.806896551724138e-05, | |
| "loss": 0.4431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4483141601085663, | |
| "step": 415, | |
| "valid_targets_mean": 2954.1, | |
| "valid_targets_min": 1158 | |
| }, | |
| { | |
| "epoch": 0.6763285024154589, | |
| "grad_norm": 0.6203648387560737, | |
| "learning_rate": 3.852873563218391e-05, | |
| "loss": 0.4338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4143407642841339, | |
| "step": 420, | |
| "valid_targets_mean": 4536.0, | |
| "valid_targets_min": 1647 | |
| }, | |
| { | |
| "epoch": 0.6843800322061192, | |
| "grad_norm": 0.6999255524103093, | |
| "learning_rate": 3.898850574712644e-05, | |
| "loss": 0.4453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41560059785842896, | |
| "step": 425, | |
| "valid_targets_mean": 3440.9, | |
| "valid_targets_min": 1237 | |
| }, | |
| { | |
| "epoch": 0.6924315619967794, | |
| "grad_norm": 0.7359356732077772, | |
| "learning_rate": 3.9448275862068966e-05, | |
| "loss": 0.4852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4889700412750244, | |
| "step": 430, | |
| "valid_targets_mean": 3381.9, | |
| "valid_targets_min": 1538 | |
| }, | |
| { | |
| "epoch": 0.7004830917874396, | |
| "grad_norm": 0.6434530063256044, | |
| "learning_rate": 3.9908045977011496e-05, | |
| "loss": 0.438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43669044971466064, | |
| "step": 435, | |
| "valid_targets_mean": 4124.1, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 0.7085346215780999, | |
| "grad_norm": 0.7259157801334376, | |
| "learning_rate": 3.9999896813789735e-05, | |
| "loss": 0.4362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46412330865859985, | |
| "step": 440, | |
| "valid_targets_mean": 3666.5, | |
| "valid_targets_min": 1144 | |
| }, | |
| { | |
| "epoch": 0.71658615136876, | |
| "grad_norm": 0.6920837839026965, | |
| "learning_rate": 3.999947762163533e-05, | |
| "loss": 0.4364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4421723783016205, | |
| "step": 445, | |
| "valid_targets_mean": 3813.6, | |
| "valid_targets_min": 1150 | |
| }, | |
| { | |
| "epoch": 0.7246376811594203, | |
| "grad_norm": 0.8229582070019114, | |
| "learning_rate": 3.999873598115203e-05, | |
| "loss": 0.4244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4543113112449646, | |
| "step": 450, | |
| "valid_targets_mean": 3184.7, | |
| "valid_targets_min": 1064 | |
| }, | |
| { | |
| "epoch": 0.7326892109500805, | |
| "grad_norm": 0.6618965870642886, | |
| "learning_rate": 3.999767190429718e-05, | |
| "loss": 0.4354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4174274504184723, | |
| "step": 455, | |
| "valid_targets_mean": 3734.6, | |
| "valid_targets_min": 1689 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 0.7350636189187817, | |
| "learning_rate": 3.99962854082267e-05, | |
| "loss": 0.4358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45041102170944214, | |
| "step": 460, | |
| "valid_targets_mean": 3706.6, | |
| "valid_targets_min": 1537 | |
| }, | |
| { | |
| "epoch": 0.748792270531401, | |
| "grad_norm": 0.6527924396728313, | |
| "learning_rate": 3.9994576515294864e-05, | |
| "loss": 0.4562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44863760471343994, | |
| "step": 465, | |
| "valid_targets_mean": 3998.4, | |
| "valid_targets_min": 1261 | |
| }, | |
| { | |
| "epoch": 0.7568438003220612, | |
| "grad_norm": 0.7162099025470566, | |
| "learning_rate": 3.999254525305386e-05, | |
| "loss": 0.3562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30241087079048157, | |
| "step": 470, | |
| "valid_targets_mean": 6379.5, | |
| "valid_targets_min": 3100 | |
| }, | |
| { | |
| "epoch": 0.7648953301127214, | |
| "grad_norm": 0.5518921640398805, | |
| "learning_rate": 3.999019165425341e-05, | |
| "loss": 0.2954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.271371066570282, | |
| "step": 475, | |
| "valid_targets_mean": 5043.8, | |
| "valid_targets_min": 2708 | |
| }, | |
| { | |
| "epoch": 0.7729468599033816, | |
| "grad_norm": 0.5572299520035154, | |
| "learning_rate": 3.99875157568402e-05, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2725101113319397, | |
| "step": 480, | |
| "valid_targets_mean": 5717.3, | |
| "valid_targets_min": 3191 | |
| }, | |
| { | |
| "epoch": 0.7809983896940419, | |
| "grad_norm": 0.6593727425698053, | |
| "learning_rate": 3.998451760395729e-05, | |
| "loss": 0.3122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33532363176345825, | |
| "step": 485, | |
| "valid_targets_mean": 6198.8, | |
| "valid_targets_min": 2599 | |
| }, | |
| { | |
| "epoch": 0.789049919484702, | |
| "grad_norm": 0.4904371382366237, | |
| "learning_rate": 3.99811972439434e-05, | |
| "loss": 0.2597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27875158190727234, | |
| "step": 490, | |
| "valid_targets_mean": 5880.9, | |
| "valid_targets_min": 3427 | |
| }, | |
| { | |
| "epoch": 0.7971014492753623, | |
| "grad_norm": 0.5384641711313225, | |
| "learning_rate": 3.997755473033218e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27927276492118835, | |
| "step": 495, | |
| "valid_targets_mean": 5429.1, | |
| "valid_targets_min": 3331 | |
| }, | |
| { | |
| "epoch": 0.8051529790660226, | |
| "grad_norm": 0.4872366354078386, | |
| "learning_rate": 3.997359012185127e-05, | |
| "loss": 0.2644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2796809673309326, | |
| "step": 500, | |
| "valid_targets_mean": 5318.9, | |
| "valid_targets_min": 3176 | |
| }, | |
| { | |
| "epoch": 0.8132045088566827, | |
| "grad_norm": 0.5513399943173102, | |
| "learning_rate": 3.996930348242141e-05, | |
| "loss": 0.3046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34563466906547546, | |
| "step": 505, | |
| "valid_targets_mean": 6334.5, | |
| "valid_targets_min": 3200 | |
| }, | |
| { | |
| "epoch": 0.821256038647343, | |
| "grad_norm": 0.5178713450708176, | |
| "learning_rate": 3.996469488115539e-05, | |
| "loss": 0.2697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2517154812812805, | |
| "step": 510, | |
| "valid_targets_mean": 5575.2, | |
| "valid_targets_min": 2982 | |
| }, | |
| { | |
| "epoch": 0.8293075684380032, | |
| "grad_norm": 0.5354106021483718, | |
| "learning_rate": 3.995976439235694e-05, | |
| "loss": 0.2742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2508094310760498, | |
| "step": 515, | |
| "valid_targets_mean": 5045.2, | |
| "valid_targets_min": 2662 | |
| }, | |
| { | |
| "epoch": 0.8373590982286635, | |
| "grad_norm": 0.5043478969730439, | |
| "learning_rate": 3.995451209551953e-05, | |
| "loss": 0.2601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23723244667053223, | |
| "step": 520, | |
| "valid_targets_mean": 5929.9, | |
| "valid_targets_min": 3640 | |
| }, | |
| { | |
| "epoch": 0.8454106280193237, | |
| "grad_norm": 0.4644833157640728, | |
| "learning_rate": 3.994893807532509e-05, | |
| "loss": 0.2754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2522507905960083, | |
| "step": 525, | |
| "valid_targets_mean": 5560.6, | |
| "valid_targets_min": 3146 | |
| }, | |
| { | |
| "epoch": 0.8534621578099839, | |
| "grad_norm": 0.5528859754209329, | |
| "learning_rate": 3.994304242164265e-05, | |
| "loss": 0.2484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2732771635055542, | |
| "step": 530, | |
| "valid_targets_mean": 5438.9, | |
| "valid_targets_min": 3095 | |
| }, | |
| { | |
| "epoch": 0.8615136876006442, | |
| "grad_norm": 0.475067141876231, | |
| "learning_rate": 3.9936825229526855e-05, | |
| "loss": 0.2612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2705361843109131, | |
| "step": 535, | |
| "valid_targets_mean": 6153.6, | |
| "valid_targets_min": 3325 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 0.4480740353625752, | |
| "learning_rate": 3.9930286599216506e-05, | |
| "loss": 0.2628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25481677055358887, | |
| "step": 540, | |
| "valid_targets_mean": 6176.7, | |
| "valid_targets_min": 3690 | |
| }, | |
| { | |
| "epoch": 0.8776167471819646, | |
| "grad_norm": 0.5197397102755734, | |
| "learning_rate": 3.9923426636132866e-05, | |
| "loss": 0.2514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23781773447990417, | |
| "step": 545, | |
| "valid_targets_mean": 5253.8, | |
| "valid_targets_min": 2926 | |
| }, | |
| { | |
| "epoch": 0.8856682769726248, | |
| "grad_norm": 0.4625888856613296, | |
| "learning_rate": 3.991624545087801e-05, | |
| "loss": 0.2515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22309911251068115, | |
| "step": 550, | |
| "valid_targets_mean": 6097.0, | |
| "valid_targets_min": 3707 | |
| }, | |
| { | |
| "epoch": 0.893719806763285, | |
| "grad_norm": 0.4887112175957848, | |
| "learning_rate": 3.9908743159233016e-05, | |
| "loss": 0.254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25249046087265015, | |
| "step": 555, | |
| "valid_targets_mean": 5931.1, | |
| "valid_targets_min": 3697 | |
| }, | |
| { | |
| "epoch": 0.9017713365539453, | |
| "grad_norm": 0.4459880666225944, | |
| "learning_rate": 3.990091988215612e-05, | |
| "loss": 0.2425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24061977863311768, | |
| "step": 560, | |
| "valid_targets_mean": 6370.6, | |
| "valid_targets_min": 3665 | |
| }, | |
| { | |
| "epoch": 0.9098228663446055, | |
| "grad_norm": 0.5441022062942001, | |
| "learning_rate": 3.989277574578074e-05, | |
| "loss": 0.2596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26900017261505127, | |
| "step": 565, | |
| "valid_targets_mean": 6119.8, | |
| "valid_targets_min": 2561 | |
| }, | |
| { | |
| "epoch": 0.9178743961352657, | |
| "grad_norm": 0.5238796115702521, | |
| "learning_rate": 3.9884310881413473e-05, | |
| "loss": 0.2428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24351592361927032, | |
| "step": 570, | |
| "valid_targets_mean": 5352.8, | |
| "valid_targets_min": 3838 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 0.4638163386116075, | |
| "learning_rate": 3.987552542553194e-05, | |
| "loss": 0.2427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24837109446525574, | |
| "step": 575, | |
| "valid_targets_mean": 6064.1, | |
| "valid_targets_min": 2800 | |
| }, | |
| { | |
| "epoch": 0.9339774557165862, | |
| "grad_norm": 0.4296037881388161, | |
| "learning_rate": 3.9866419519782636e-05, | |
| "loss": 0.2526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21541734039783478, | |
| "step": 580, | |
| "valid_targets_mean": 6725.1, | |
| "valid_targets_min": 4014 | |
| }, | |
| { | |
| "epoch": 0.9420289855072463, | |
| "grad_norm": 0.5667563203419782, | |
| "learning_rate": 3.985699331097858e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25082963705062866, | |
| "step": 585, | |
| "valid_targets_mean": 5340.4, | |
| "valid_targets_min": 3261 | |
| }, | |
| { | |
| "epoch": 0.9500805152979066, | |
| "grad_norm": 0.44699582805642185, | |
| "learning_rate": 3.984724695109702e-05, | |
| "loss": 0.2436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24760377407073975, | |
| "step": 590, | |
| "valid_targets_mean": 6194.7, | |
| "valid_targets_min": 3579 | |
| }, | |
| { | |
| "epoch": 0.9581320450885669, | |
| "grad_norm": 0.4523900544730103, | |
| "learning_rate": 3.983718059727693e-05, | |
| "loss": 0.2375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22146061062812805, | |
| "step": 595, | |
| "valid_targets_mean": 6527.8, | |
| "valid_targets_min": 2379 | |
| }, | |
| { | |
| "epoch": 0.966183574879227, | |
| "grad_norm": 0.39826806360509687, | |
| "learning_rate": 3.9826794411816495e-05, | |
| "loss": 0.2484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23311802744865417, | |
| "step": 600, | |
| "valid_targets_mean": 6887.7, | |
| "valid_targets_min": 3569 | |
| }, | |
| { | |
| "epoch": 0.9742351046698873, | |
| "grad_norm": 0.6229816324333505, | |
| "learning_rate": 3.981608856217049e-05, | |
| "loss": 0.2314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21930500864982605, | |
| "step": 605, | |
| "valid_targets_mean": 5398.8, | |
| "valid_targets_min": 2580 | |
| }, | |
| { | |
| "epoch": 0.9822866344605475, | |
| "grad_norm": 0.4999823141609161, | |
| "learning_rate": 3.980506322094761e-05, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2858148217201233, | |
| "step": 610, | |
| "valid_targets_mean": 6136.1, | |
| "valid_targets_min": 2610 | |
| }, | |
| { | |
| "epoch": 0.9903381642512077, | |
| "grad_norm": 0.46445064808991693, | |
| "learning_rate": 3.979371856590762e-05, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21342137455940247, | |
| "step": 615, | |
| "valid_targets_mean": 5957.4, | |
| "valid_targets_min": 2562 | |
| }, | |
| { | |
| "epoch": 0.998389694041868, | |
| "grad_norm": 0.5766730357055111, | |
| "learning_rate": 3.978205477995856e-05, | |
| "loss": 0.2373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23376914858818054, | |
| "step": 620, | |
| "valid_targets_mean": 5860.3, | |
| "valid_targets_min": 2833 | |
| }, | |
| { | |
| "epoch": 1.0064412238325282, | |
| "grad_norm": 0.5381872293931007, | |
| "learning_rate": 3.9770072051153754e-05, | |
| "loss": 0.3512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38717740774154663, | |
| "step": 625, | |
| "valid_targets_mean": 5898.7, | |
| "valid_targets_min": 2769 | |
| }, | |
| { | |
| "epoch": 1.0144927536231885, | |
| "grad_norm": 0.5254280438649158, | |
| "learning_rate": 3.9757770572688786e-05, | |
| "loss": 0.3512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3415398895740509, | |
| "step": 630, | |
| "valid_targets_mean": 6068.6, | |
| "valid_targets_min": 2285 | |
| }, | |
| { | |
| "epoch": 1.0225442834138487, | |
| "grad_norm": 0.48064107372040804, | |
| "learning_rate": 3.9745150542898405e-05, | |
| "loss": 0.3506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36373406648635864, | |
| "step": 635, | |
| "valid_targets_mean": 6519.8, | |
| "valid_targets_min": 3314 | |
| }, | |
| { | |
| "epoch": 1.0305958132045088, | |
| "grad_norm": 0.4867942399417131, | |
| "learning_rate": 3.97322121652533e-05, | |
| "loss": 0.3243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33237212896347046, | |
| "step": 640, | |
| "valid_targets_mean": 6028.6, | |
| "valid_targets_min": 2671 | |
| }, | |
| { | |
| "epoch": 1.038647342995169, | |
| "grad_norm": 0.5535759217279507, | |
| "learning_rate": 3.971895564835683e-05, | |
| "loss": 0.3405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36408573389053345, | |
| "step": 645, | |
| "valid_targets_mean": 6266.9, | |
| "valid_targets_min": 469 | |
| }, | |
| { | |
| "epoch": 1.0466988727858293, | |
| "grad_norm": 0.5234567604492989, | |
| "learning_rate": 3.970538120594166e-05, | |
| "loss": 0.3275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30862703919410706, | |
| "step": 650, | |
| "valid_targets_mean": 5759.1, | |
| "valid_targets_min": 2568 | |
| }, | |
| { | |
| "epoch": 1.0547504025764896, | |
| "grad_norm": 0.5104944272693731, | |
| "learning_rate": 3.9691489056866324e-05, | |
| "loss": 0.335, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31290531158447266, | |
| "step": 655, | |
| "valid_targets_mean": 5530.2, | |
| "valid_targets_min": 611 | |
| }, | |
| { | |
| "epoch": 1.0628019323671498, | |
| "grad_norm": 0.4962198988376796, | |
| "learning_rate": 3.9677279425111684e-05, | |
| "loss": 0.3172, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.292328417301178, | |
| "step": 660, | |
| "valid_targets_mean": 5696.9, | |
| "valid_targets_min": 2392 | |
| }, | |
| { | |
| "epoch": 1.07085346215781, | |
| "grad_norm": 0.4883167094213102, | |
| "learning_rate": 3.9662752539777314e-05, | |
| "loss": 0.3178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3085409700870514, | |
| "step": 665, | |
| "valid_targets_mean": 5683.9, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 1.0789049919484701, | |
| "grad_norm": 0.45277731991583026, | |
| "learning_rate": 3.9647908635077845e-05, | |
| "loss": 0.3094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27562692761421204, | |
| "step": 670, | |
| "valid_targets_mean": 6331.4, | |
| "valid_targets_min": 3356 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 0.5167803974967058, | |
| "learning_rate": 3.963274795033913e-05, | |
| "loss": 0.3027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28318291902542114, | |
| "step": 675, | |
| "valid_targets_mean": 5326.1, | |
| "valid_targets_min": 1676 | |
| }, | |
| { | |
| "epoch": 1.0950080515297906, | |
| "grad_norm": 0.5095041080696596, | |
| "learning_rate": 3.9617270729994436e-05, | |
| "loss": 0.3183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3311567008495331, | |
| "step": 680, | |
| "valid_targets_mean": 6448.9, | |
| "valid_targets_min": 2859 | |
| }, | |
| { | |
| "epoch": 1.103059581320451, | |
| "grad_norm": 0.49987418312419335, | |
| "learning_rate": 3.960147722358046e-05, | |
| "loss": 0.3109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3058398962020874, | |
| "step": 685, | |
| "valid_targets_mean": 5910.2, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.4553139112077016, | |
| "learning_rate": 3.958536768573335e-05, | |
| "loss": 0.3275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3590342402458191, | |
| "step": 690, | |
| "valid_targets_mean": 7141.1, | |
| "valid_targets_min": 3571 | |
| }, | |
| { | |
| "epoch": 1.1191626409017714, | |
| "grad_norm": 0.4708599542937082, | |
| "learning_rate": 3.956894237618456e-05, | |
| "loss": 0.3048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32140040397644043, | |
| "step": 695, | |
| "valid_targets_mean": 6492.9, | |
| "valid_targets_min": 3446 | |
| }, | |
| { | |
| "epoch": 1.1272141706924317, | |
| "grad_norm": 0.48610528051988894, | |
| "learning_rate": 3.955220155975669e-05, | |
| "loss": 0.2973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26136165857315063, | |
| "step": 700, | |
| "valid_targets_mean": 5447.6, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 1.1352657004830917, | |
| "grad_norm": 0.7998551023631064, | |
| "learning_rate": 3.9535145506359206e-05, | |
| "loss": 0.4179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4753114581108093, | |
| "step": 705, | |
| "valid_targets_mean": 3657.8, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 1.143317230273752, | |
| "grad_norm": 0.8628326452111007, | |
| "learning_rate": 3.951777449098408e-05, | |
| "loss": 0.5288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5633917450904846, | |
| "step": 710, | |
| "valid_targets_mean": 2673.1, | |
| "valid_targets_min": 1309 | |
| }, | |
| { | |
| "epoch": 1.1513687600644122, | |
| "grad_norm": 0.8324627405108382, | |
| "learning_rate": 3.9500088793701387e-05, | |
| "loss": 0.4929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4774799644947052, | |
| "step": 715, | |
| "valid_targets_mean": 3209.7, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 1.1594202898550725, | |
| "grad_norm": 0.802960087915467, | |
| "learning_rate": 3.948208869965473e-05, | |
| "loss": 0.5032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5248502492904663, | |
| "step": 720, | |
| "valid_targets_mean": 3054.8, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 1.1674718196457328, | |
| "grad_norm": 0.7752102610736108, | |
| "learning_rate": 3.946377449905672e-05, | |
| "loss": 0.4698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45278412103652954, | |
| "step": 725, | |
| "valid_targets_mean": 2978.8, | |
| "valid_targets_min": 1175 | |
| }, | |
| { | |
| "epoch": 1.1755233494363928, | |
| "grad_norm": 0.7774619112330354, | |
| "learning_rate": 3.9445146487184226e-05, | |
| "loss": 0.4626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4363817572593689, | |
| "step": 730, | |
| "valid_targets_mean": 2849.6, | |
| "valid_targets_min": 1295 | |
| }, | |
| { | |
| "epoch": 1.183574879227053, | |
| "grad_norm": 1.1474142413076773, | |
| "learning_rate": 3.942620496437366e-05, | |
| "loss": 0.4836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.519567608833313, | |
| "step": 735, | |
| "valid_targets_mean": 2615.8, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 1.1916264090177133, | |
| "grad_norm": 0.7140245197333289, | |
| "learning_rate": 3.940695023601612e-05, | |
| "loss": 0.482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.49396196007728577, | |
| "step": 740, | |
| "valid_targets_mean": 3296.4, | |
| "valid_targets_min": 1316 | |
| }, | |
| { | |
| "epoch": 1.1996779388083736, | |
| "grad_norm": 0.6546808722676869, | |
| "learning_rate": 3.938738261255247e-05, | |
| "loss": 0.4933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46171700954437256, | |
| "step": 745, | |
| "valid_targets_mean": 3968.6, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 1.2077294685990339, | |
| "grad_norm": 0.8090731054477709, | |
| "learning_rate": 3.9367502409468315e-05, | |
| "loss": 0.4818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5525240898132324, | |
| "step": 750, | |
| "valid_targets_mean": 3078.5, | |
| "valid_targets_min": 1174 | |
| }, | |
| { | |
| "epoch": 1.2157809983896941, | |
| "grad_norm": 0.6725814967283812, | |
| "learning_rate": 3.934730994728893e-05, | |
| "loss": 0.4887, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44579121470451355, | |
| "step": 755, | |
| "valid_targets_mean": 3417.5, | |
| "valid_targets_min": 1252 | |
| }, | |
| { | |
| "epoch": 1.2238325281803544, | |
| "grad_norm": 0.7197375797510343, | |
| "learning_rate": 3.932680555157413e-05, | |
| "loss": 0.4645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5104143619537354, | |
| "step": 760, | |
| "valid_targets_mean": 3427.6, | |
| "valid_targets_min": 1218 | |
| }, | |
| { | |
| "epoch": 1.2318840579710144, | |
| "grad_norm": 0.8543242804532502, | |
| "learning_rate": 3.9305989552912936e-05, | |
| "loss": 0.4923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5740408897399902, | |
| "step": 765, | |
| "valid_targets_mean": 2875.2, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 1.2399355877616747, | |
| "grad_norm": 0.832686925614947, | |
| "learning_rate": 3.928486228691831e-05, | |
| "loss": 0.4789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.498767614364624, | |
| "step": 770, | |
| "valid_targets_mean": 2933.1, | |
| "valid_targets_min": 1485 | |
| }, | |
| { | |
| "epoch": 1.247987117552335, | |
| "grad_norm": 0.8162275317686818, | |
| "learning_rate": 3.926342409422175e-05, | |
| "loss": 0.474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5068527460098267, | |
| "step": 775, | |
| "valid_targets_mean": 2699.5, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 1.2560386473429952, | |
| "grad_norm": 0.6180614081605837, | |
| "learning_rate": 3.924167532046773e-05, | |
| "loss": 0.4594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.426825612783432, | |
| "step": 780, | |
| "valid_targets_mean": 3932.2, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 1.2640901771336555, | |
| "grad_norm": 0.8309975887016753, | |
| "learning_rate": 3.9219616316308215e-05, | |
| "loss": 0.475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48292607069015503, | |
| "step": 785, | |
| "valid_targets_mean": 2747.4, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 1.2721417069243155, | |
| "grad_norm": 0.829396143340556, | |
| "learning_rate": 3.919724743739694e-05, | |
| "loss": 0.4728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4963272511959076, | |
| "step": 790, | |
| "valid_targets_mean": 3029.8, | |
| "valid_targets_min": 1355 | |
| }, | |
| { | |
| "epoch": 1.2801932367149758, | |
| "grad_norm": 0.7797044579353001, | |
| "learning_rate": 3.91745690443837e-05, | |
| "loss": 0.471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4795590937137604, | |
| "step": 795, | |
| "valid_targets_mean": 3227.2, | |
| "valid_targets_min": 1427 | |
| }, | |
| { | |
| "epoch": 1.288244766505636, | |
| "grad_norm": 0.7154505490752525, | |
| "learning_rate": 3.915158150290855e-05, | |
| "loss": 0.4652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4680216312408447, | |
| "step": 800, | |
| "valid_targets_mean": 3224.4, | |
| "valid_targets_min": 1045 | |
| }, | |
| { | |
| "epoch": 1.2962962962962963, | |
| "grad_norm": 0.6366798215464128, | |
| "learning_rate": 3.912828518359588e-05, | |
| "loss": 0.4689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4367047846317291, | |
| "step": 805, | |
| "valid_targets_mean": 4021.2, | |
| "valid_targets_min": 1118 | |
| }, | |
| { | |
| "epoch": 1.3043478260869565, | |
| "grad_norm": 0.7245476673199482, | |
| "learning_rate": 3.910468046204846e-05, | |
| "loss": 0.4691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41532838344573975, | |
| "step": 810, | |
| "valid_targets_mean": 3145.9, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 1.3123993558776168, | |
| "grad_norm": 0.965081597376756, | |
| "learning_rate": 3.908076771884139e-05, | |
| "loss": 0.4461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47352737188339233, | |
| "step": 815, | |
| "valid_targets_mean": 2583.8, | |
| "valid_targets_min": 1167 | |
| }, | |
| { | |
| "epoch": 1.320450885668277, | |
| "grad_norm": 0.7363364439864812, | |
| "learning_rate": 3.905654733951595e-05, | |
| "loss": 0.4784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4524630904197693, | |
| "step": 820, | |
| "valid_targets_mean": 3109.6, | |
| "valid_targets_min": 1268 | |
| }, | |
| { | |
| "epoch": 1.3285024154589373, | |
| "grad_norm": 0.7636753657586481, | |
| "learning_rate": 3.9032019714573366e-05, | |
| "loss": 0.4786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4579421281814575, | |
| "step": 825, | |
| "valid_targets_mean": 2516.6, | |
| "valid_targets_min": 1159 | |
| }, | |
| { | |
| "epoch": 1.3365539452495974, | |
| "grad_norm": 0.6680520600678994, | |
| "learning_rate": 3.9007185239468554e-05, | |
| "loss": 0.456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4591274857521057, | |
| "step": 830, | |
| "valid_targets_mean": 4016.8, | |
| "valid_targets_min": 1642 | |
| }, | |
| { | |
| "epoch": 1.3446054750402576, | |
| "grad_norm": 0.7001951095419365, | |
| "learning_rate": 3.8982044314603725e-05, | |
| "loss": 0.4926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4835224151611328, | |
| "step": 835, | |
| "valid_targets_mean": 3258.7, | |
| "valid_targets_min": 1158 | |
| }, | |
| { | |
| "epoch": 1.3526570048309179, | |
| "grad_norm": 0.6740616815716441, | |
| "learning_rate": 3.8956597345321927e-05, | |
| "loss": 0.4595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4707792401313782, | |
| "step": 840, | |
| "valid_targets_mean": 3767.5, | |
| "valid_targets_min": 1430 | |
| }, | |
| { | |
| "epoch": 1.3607085346215781, | |
| "grad_norm": 0.678578864364496, | |
| "learning_rate": 3.893084474190051e-05, | |
| "loss": 0.4685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4893713593482971, | |
| "step": 845, | |
| "valid_targets_mean": 3205.4, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 1.3687600644122382, | |
| "grad_norm": 0.7052919421243395, | |
| "learning_rate": 3.890478691954452e-05, | |
| "loss": 0.465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4288814067840576, | |
| "step": 850, | |
| "valid_targets_mean": 3383.2, | |
| "valid_targets_min": 1706 | |
| }, | |
| { | |
| "epoch": 1.3768115942028984, | |
| "grad_norm": 0.7065538252025207, | |
| "learning_rate": 3.8878424298379996e-05, | |
| "loss": 0.4327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43889719247817993, | |
| "step": 855, | |
| "valid_targets_mean": 2897.8, | |
| "valid_targets_min": 1102 | |
| }, | |
| { | |
| "epoch": 1.3848631239935587, | |
| "grad_norm": 0.826059371011481, | |
| "learning_rate": 3.885175730344718e-05, | |
| "loss": 0.4279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.489162802696228, | |
| "step": 860, | |
| "valid_targets_mean": 2376.7, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 1.392914653784219, | |
| "grad_norm": 0.6534195388501567, | |
| "learning_rate": 3.882478636469372e-05, | |
| "loss": 0.4147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40982362627983093, | |
| "step": 865, | |
| "valid_targets_mean": 3869.4, | |
| "valid_targets_min": 1445 | |
| }, | |
| { | |
| "epoch": 1.4009661835748792, | |
| "grad_norm": 0.5643024903852305, | |
| "learning_rate": 3.879751191696766e-05, | |
| "loss": 0.4128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3592322766780853, | |
| "step": 870, | |
| "valid_targets_mean": 4852.6, | |
| "valid_targets_min": 1536 | |
| }, | |
| { | |
| "epoch": 1.4090177133655395, | |
| "grad_norm": 0.8046362197064678, | |
| "learning_rate": 3.8769934400010506e-05, | |
| "loss": 0.4132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42762547731399536, | |
| "step": 875, | |
| "valid_targets_mean": 3675.9, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 1.4170692431561998, | |
| "grad_norm": 0.6344687303861078, | |
| "learning_rate": 3.8742054258450085e-05, | |
| "loss": 0.4197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3801061809062958, | |
| "step": 880, | |
| "valid_targets_mean": 3460.8, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 1.42512077294686, | |
| "grad_norm": 0.6323525098555549, | |
| "learning_rate": 3.871387194179338e-05, | |
| "loss": 0.3821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3990853428840637, | |
| "step": 885, | |
| "valid_targets_mean": 4104.0, | |
| "valid_targets_min": 1569 | |
| }, | |
| { | |
| "epoch": 1.43317230273752, | |
| "grad_norm": 0.7366585526555239, | |
| "learning_rate": 3.868538790441931e-05, | |
| "loss": 0.3944, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39089301228523254, | |
| "step": 890, | |
| "valid_targets_mean": 3000.4, | |
| "valid_targets_min": 1169 | |
| }, | |
| { | |
| "epoch": 1.4412238325281803, | |
| "grad_norm": 0.6980083863459992, | |
| "learning_rate": 3.865660260557138e-05, | |
| "loss": 0.3997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45632731914520264, | |
| "step": 895, | |
| "valid_targets_mean": 3155.8, | |
| "valid_targets_min": 1407 | |
| }, | |
| { | |
| "epoch": 1.4492753623188406, | |
| "grad_norm": 0.7473196990397595, | |
| "learning_rate": 3.8627516509350286e-05, | |
| "loss": 0.4058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4090888500213623, | |
| "step": 900, | |
| "valid_targets_mean": 2876.1, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 1.4573268921095008, | |
| "grad_norm": 0.7252851362258242, | |
| "learning_rate": 3.859813008470644e-05, | |
| "loss": 0.4055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4397547245025635, | |
| "step": 905, | |
| "valid_targets_mean": 3099.4, | |
| "valid_targets_min": 1258 | |
| }, | |
| { | |
| "epoch": 1.465378421900161, | |
| "grad_norm": 0.8116656536711668, | |
| "learning_rate": 3.856844380543239e-05, | |
| "loss": 0.3953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4384375810623169, | |
| "step": 910, | |
| "valid_targets_mean": 2574.1, | |
| "valid_targets_min": 1351 | |
| }, | |
| { | |
| "epoch": 1.4734299516908211, | |
| "grad_norm": 0.7311144238819602, | |
| "learning_rate": 3.8538458150155186e-05, | |
| "loss": 0.4362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4589492678642273, | |
| "step": 915, | |
| "valid_targets_mean": 3223.3, | |
| "valid_targets_min": 1314 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 0.6970970758853244, | |
| "learning_rate": 3.850817360232869e-05, | |
| "loss": 0.3792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39102041721343994, | |
| "step": 920, | |
| "valid_targets_mean": 3132.0, | |
| "valid_targets_min": 1272 | |
| }, | |
| { | |
| "epoch": 1.4895330112721417, | |
| "grad_norm": 0.6439455681571239, | |
| "learning_rate": 3.8477590650225735e-05, | |
| "loss": 0.4058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3645486533641815, | |
| "step": 925, | |
| "valid_targets_mean": 3755.3, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 1.497584541062802, | |
| "grad_norm": 0.7469424687646696, | |
| "learning_rate": 3.8446709786930305e-05, | |
| "loss": 0.3823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36221805214881897, | |
| "step": 930, | |
| "valid_targets_mean": 3029.9, | |
| "valid_targets_min": 1590 | |
| }, | |
| { | |
| "epoch": 1.5056360708534622, | |
| "grad_norm": 0.7074858488388046, | |
| "learning_rate": 3.841553151032953e-05, | |
| "loss": 0.3914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38697734475135803, | |
| "step": 935, | |
| "valid_targets_mean": 3930.0, | |
| "valid_targets_min": 1734 | |
| }, | |
| { | |
| "epoch": 1.5136876006441224, | |
| "grad_norm": 0.6408672423143533, | |
| "learning_rate": 3.8384056323105695e-05, | |
| "loss": 0.4006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45191776752471924, | |
| "step": 940, | |
| "valid_targets_mean": 3632.3, | |
| "valid_targets_min": 1021 | |
| }, | |
| { | |
| "epoch": 1.5217391304347827, | |
| "grad_norm": 0.6999987480130047, | |
| "learning_rate": 3.835228473272814e-05, | |
| "loss": 0.3926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40401631593704224, | |
| "step": 945, | |
| "valid_targets_mean": 3690.8, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 1.529790660225443, | |
| "grad_norm": 0.7300521054387631, | |
| "learning_rate": 3.832021725144506e-05, | |
| "loss": 0.3856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40716779232025146, | |
| "step": 950, | |
| "valid_targets_mean": 2800.6, | |
| "valid_targets_min": 485 | |
| }, | |
| { | |
| "epoch": 1.537842190016103, | |
| "grad_norm": 0.7731778429667089, | |
| "learning_rate": 3.828785439627523e-05, | |
| "loss": 0.3912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43859434127807617, | |
| "step": 955, | |
| "valid_targets_mean": 3744.1, | |
| "valid_targets_min": 1267 | |
| }, | |
| { | |
| "epoch": 1.5458937198067633, | |
| "grad_norm": 0.6705224246744239, | |
| "learning_rate": 3.825519668899972e-05, | |
| "loss": 0.4118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3778587877750397, | |
| "step": 960, | |
| "valid_targets_mean": 3365.1, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 1.5539452495974235, | |
| "grad_norm": 0.7645910195884874, | |
| "learning_rate": 3.8222244656153444e-05, | |
| "loss": 0.3801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3704504668712616, | |
| "step": 965, | |
| "valid_targets_mean": 3473.1, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 1.5619967793880838, | |
| "grad_norm": 0.7395785345258863, | |
| "learning_rate": 3.818899882901666e-05, | |
| "loss": 0.3848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4030625820159912, | |
| "step": 970, | |
| "valid_targets_mean": 2840.8, | |
| "valid_targets_min": 1494 | |
| }, | |
| { | |
| "epoch": 1.5700483091787438, | |
| "grad_norm": 0.736229535025106, | |
| "learning_rate": 3.815545974360644e-05, | |
| "loss": 0.4024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4085889160633087, | |
| "step": 975, | |
| "valid_targets_mean": 2815.1, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 1.578099838969404, | |
| "grad_norm": 0.7854099739280379, | |
| "learning_rate": 3.812162794066802e-05, | |
| "loss": 0.3743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.379982054233551, | |
| "step": 980, | |
| "valid_targets_mean": 3228.2, | |
| "valid_targets_min": 1394 | |
| }, | |
| { | |
| "epoch": 1.5861513687600644, | |
| "grad_norm": 0.7315662602632311, | |
| "learning_rate": 3.8087503965666057e-05, | |
| "loss": 0.3898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40219753980636597, | |
| "step": 985, | |
| "valid_targets_mean": 3174.3, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 1.5942028985507246, | |
| "grad_norm": 0.6694592764443967, | |
| "learning_rate": 3.805308836877586e-05, | |
| "loss": 0.4112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37781035900115967, | |
| "step": 990, | |
| "valid_targets_mean": 3438.4, | |
| "valid_targets_min": 1124 | |
| }, | |
| { | |
| "epoch": 1.6022544283413849, | |
| "grad_norm": 0.7386035260971374, | |
| "learning_rate": 3.80183817048745e-05, | |
| "loss": 0.3702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38043975830078125, | |
| "step": 995, | |
| "valid_targets_mean": 2587.6, | |
| "valid_targets_min": 1218 | |
| }, | |
| { | |
| "epoch": 1.6103059581320451, | |
| "grad_norm": 0.7294675876286597, | |
| "learning_rate": 3.7983384533531894e-05, | |
| "loss": 0.395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3978623151779175, | |
| "step": 1000, | |
| "valid_targets_mean": 2969.8, | |
| "valid_targets_min": 1309 | |
| }, | |
| { | |
| "epoch": 1.6183574879227054, | |
| "grad_norm": 0.6439654018799925, | |
| "learning_rate": 3.7948097419001736e-05, | |
| "loss": 0.3842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.402493953704834, | |
| "step": 1005, | |
| "valid_targets_mean": 3475.1, | |
| "valid_targets_min": 1128 | |
| }, | |
| { | |
| "epoch": 1.6264090177133657, | |
| "grad_norm": 0.7469049629085611, | |
| "learning_rate": 3.7912520930212445e-05, | |
| "loss": 0.378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3579831123352051, | |
| "step": 1010, | |
| "valid_targets_mean": 2925.9, | |
| "valid_targets_min": 1311 | |
| }, | |
| { | |
| "epoch": 1.634460547504026, | |
| "grad_norm": 0.6828460239332308, | |
| "learning_rate": 3.7876655640757974e-05, | |
| "loss": 0.3646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37845996022224426, | |
| "step": 1015, | |
| "valid_targets_mean": 3369.0, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 1.642512077294686, | |
| "grad_norm": 0.916703135376311, | |
| "learning_rate": 3.784050212888857e-05, | |
| "loss": 0.3908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3890652358531952, | |
| "step": 1020, | |
| "valid_targets_mean": 3307.7, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 1.6505636070853462, | |
| "grad_norm": 0.8453007862430895, | |
| "learning_rate": 3.780406097750141e-05, | |
| "loss": 0.3892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.393949419260025, | |
| "step": 1025, | |
| "valid_targets_mean": 2625.4, | |
| "valid_targets_min": 1313 | |
| }, | |
| { | |
| "epoch": 1.6586151368760065, | |
| "grad_norm": 0.7030261980183728, | |
| "learning_rate": 3.776733277413127e-05, | |
| "loss": 0.3771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3959161043167114, | |
| "step": 1030, | |
| "valid_targets_mean": 3028.0, | |
| "valid_targets_min": 1602 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.6424805669240223, | |
| "learning_rate": 3.7730318110941004e-05, | |
| "loss": 0.3843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3867754340171814, | |
| "step": 1035, | |
| "valid_targets_mean": 3680.5, | |
| "valid_targets_min": 1441 | |
| }, | |
| { | |
| "epoch": 1.6747181964573268, | |
| "grad_norm": 0.6723162629033781, | |
| "learning_rate": 3.7693017584712013e-05, | |
| "loss": 0.3793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3691941499710083, | |
| "step": 1040, | |
| "valid_targets_mean": 3472.8, | |
| "valid_targets_min": 1029 | |
| }, | |
| { | |
| "epoch": 1.682769726247987, | |
| "grad_norm": 0.66473478421512, | |
| "learning_rate": 3.765543179683462e-05, | |
| "loss": 0.3855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37193185091018677, | |
| "step": 1045, | |
| "valid_targets_mean": 3390.6, | |
| "valid_targets_min": 1873 | |
| }, | |
| { | |
| "epoch": 1.6908212560386473, | |
| "grad_norm": 0.6827138169001735, | |
| "learning_rate": 3.7617561353298395e-05, | |
| "loss": 0.4096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.407214492559433, | |
| "step": 1050, | |
| "valid_targets_mean": 3282.8, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 1.6988727858293076, | |
| "grad_norm": 0.6054569389155301, | |
| "learning_rate": 3.7579406864682327e-05, | |
| "loss": 0.3876, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39312368631362915, | |
| "step": 1055, | |
| "valid_targets_mean": 4544.4, | |
| "valid_targets_min": 1616 | |
| }, | |
| { | |
| "epoch": 1.7069243156199678, | |
| "grad_norm": 0.6376030347219761, | |
| "learning_rate": 3.7540968946145036e-05, | |
| "loss": 0.3712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37451237440109253, | |
| "step": 1060, | |
| "valid_targets_mean": 3384.4, | |
| "valid_targets_min": 1236 | |
| }, | |
| { | |
| "epoch": 1.714975845410628, | |
| "grad_norm": 0.6211030350353202, | |
| "learning_rate": 3.750224821741486e-05, | |
| "loss": 0.3837, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38833314180374146, | |
| "step": 1065, | |
| "valid_targets_mean": 4230.6, | |
| "valid_targets_min": 1229 | |
| }, | |
| { | |
| "epoch": 1.7230273752012883, | |
| "grad_norm": 1.3513202092043939, | |
| "learning_rate": 3.7463245302779795e-05, | |
| "loss": 0.3654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3714137673377991, | |
| "step": 1070, | |
| "valid_targets_mean": 3359.7, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 1.7310789049919486, | |
| "grad_norm": 0.6120247412950662, | |
| "learning_rate": 3.742396083107751e-05, | |
| "loss": 0.384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36947697401046753, | |
| "step": 1075, | |
| "valid_targets_mean": 4327.3, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 0.6000696270867342, | |
| "learning_rate": 3.7384395435685166e-05, | |
| "loss": 0.3725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33209776878356934, | |
| "step": 1080, | |
| "valid_targets_mean": 3812.0, | |
| "valid_targets_min": 1142 | |
| }, | |
| { | |
| "epoch": 1.747181964573269, | |
| "grad_norm": 0.8080033004353664, | |
| "learning_rate": 3.7344549754509196e-05, | |
| "loss": 0.3952, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4237212538719177, | |
| "step": 1085, | |
| "valid_targets_mean": 2473.6, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 1.7552334943639292, | |
| "grad_norm": 0.5805164381467139, | |
| "learning_rate": 3.7304424429975046e-05, | |
| "loss": 0.3171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3186456561088562, | |
| "step": 1090, | |
| "valid_targets_mean": 7490.2, | |
| "valid_targets_min": 2721 | |
| }, | |
| { | |
| "epoch": 1.7632850241545892, | |
| "grad_norm": 0.4969614961756743, | |
| "learning_rate": 3.726402010901681e-05, | |
| "loss": 0.2399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25411802530288696, | |
| "step": 1095, | |
| "valid_targets_mean": 6344.0, | |
| "valid_targets_min": 3073 | |
| }, | |
| { | |
| "epoch": 1.7713365539452495, | |
| "grad_norm": 0.5036039262445117, | |
| "learning_rate": 3.722333744306678e-05, | |
| "loss": 0.2202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2208937406539917, | |
| "step": 1100, | |
| "valid_targets_mean": 5062.1, | |
| "valid_targets_min": 2985 | |
| }, | |
| { | |
| "epoch": 1.7793880837359097, | |
| "grad_norm": 0.47890617580766465, | |
| "learning_rate": 3.7182377088044984e-05, | |
| "loss": 0.2477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27038395404815674, | |
| "step": 1105, | |
| "valid_targets_mean": 5613.6, | |
| "valid_targets_min": 2620 | |
| }, | |
| { | |
| "epoch": 1.78743961352657, | |
| "grad_norm": 1.0317443624660205, | |
| "learning_rate": 3.7141139704348576e-05, | |
| "loss": 0.2268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22091877460479736, | |
| "step": 1110, | |
| "valid_targets_mean": 6100.1, | |
| "valid_targets_min": 3494 | |
| }, | |
| { | |
| "epoch": 1.7954911433172303, | |
| "grad_norm": 0.4528546728089874, | |
| "learning_rate": 3.7099625956841175e-05, | |
| "loss": 0.2163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19431838393211365, | |
| "step": 1115, | |
| "valid_targets_mean": 5711.6, | |
| "valid_targets_min": 3761 | |
| }, | |
| { | |
| "epoch": 1.8035426731078905, | |
| "grad_norm": 0.48395965253856643, | |
| "learning_rate": 3.70578365148422e-05, | |
| "loss": 0.2224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23526781797409058, | |
| "step": 1120, | |
| "valid_targets_mean": 5863.1, | |
| "valid_targets_min": 3178 | |
| }, | |
| { | |
| "epoch": 1.8115942028985508, | |
| "grad_norm": 0.40906986287532093, | |
| "learning_rate": 3.701577205211604e-05, | |
| "loss": 0.2471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22184664011001587, | |
| "step": 1125, | |
| "valid_targets_mean": 6090.8, | |
| "valid_targets_min": 2870 | |
| }, | |
| { | |
| "epoch": 1.819645732689211, | |
| "grad_norm": 0.4479703485506722, | |
| "learning_rate": 3.697343324686119e-05, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2089478075504303, | |
| "step": 1130, | |
| "valid_targets_mean": 5746.5, | |
| "valid_targets_min": 3334 | |
| }, | |
| { | |
| "epoch": 1.8276972624798713, | |
| "grad_norm": 0.5767247918497662, | |
| "learning_rate": 3.693082078169933e-05, | |
| "loss": 0.2338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23523643612861633, | |
| "step": 1135, | |
| "valid_targets_mean": 5464.9, | |
| "valid_targets_min": 3204 | |
| }, | |
| { | |
| "epoch": 1.8357487922705316, | |
| "grad_norm": 0.47967598737718, | |
| "learning_rate": 3.68879353436643e-05, | |
| "loss": 0.2249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2230021208524704, | |
| "step": 1140, | |
| "valid_targets_mean": 5526.6, | |
| "valid_targets_min": 2793 | |
| }, | |
| { | |
| "epoch": 1.8438003220611916, | |
| "grad_norm": 0.4741249695731208, | |
| "learning_rate": 3.684477762419108e-05, | |
| "loss": 0.2344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24306999146938324, | |
| "step": 1145, | |
| "valid_targets_mean": 6074.9, | |
| "valid_targets_min": 3285 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 0.48153493651658874, | |
| "learning_rate": 3.6801348319104546e-05, | |
| "loss": 0.2082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2044723629951477, | |
| "step": 1150, | |
| "valid_targets_mean": 5260.6, | |
| "valid_targets_min": 2292 | |
| }, | |
| { | |
| "epoch": 1.8599033816425121, | |
| "grad_norm": 0.584632590412221, | |
| "learning_rate": 3.675764812860833e-05, | |
| "loss": 0.2258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.224099799990654, | |
| "step": 1155, | |
| "valid_targets_mean": 5752.9, | |
| "valid_targets_min": 4204 | |
| }, | |
| { | |
| "epoch": 1.8679549114331722, | |
| "grad_norm": 0.5420753765458437, | |
| "learning_rate": 3.671367775727353e-05, | |
| "loss": 0.2301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21649691462516785, | |
| "step": 1160, | |
| "valid_targets_mean": 4836.7, | |
| "valid_targets_min": 2111 | |
| }, | |
| { | |
| "epoch": 1.8760064412238324, | |
| "grad_norm": 0.4760661908355776, | |
| "learning_rate": 3.666943791402726e-05, | |
| "loss": 0.2199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24728357791900635, | |
| "step": 1165, | |
| "valid_targets_mean": 6021.0, | |
| "valid_targets_min": 4439 | |
| }, | |
| { | |
| "epoch": 1.8840579710144927, | |
| "grad_norm": 0.6720080745282876, | |
| "learning_rate": 3.662492931214137e-05, | |
| "loss": 0.2198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21869316697120667, | |
| "step": 1170, | |
| "valid_targets_mean": 5661.9, | |
| "valid_targets_min": 3607 | |
| }, | |
| { | |
| "epoch": 1.892109500805153, | |
| "grad_norm": 0.4890482756646293, | |
| "learning_rate": 3.6580152669220784e-05, | |
| "loss": 0.2143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22565293312072754, | |
| "step": 1175, | |
| "valid_targets_mean": 5420.4, | |
| "valid_targets_min": 3078 | |
| }, | |
| { | |
| "epoch": 1.9001610305958132, | |
| "grad_norm": 0.5189964339664618, | |
| "learning_rate": 3.6535108707192053e-05, | |
| "loss": 0.2131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20920491218566895, | |
| "step": 1180, | |
| "valid_targets_mean": 5750.4, | |
| "valid_targets_min": 2491 | |
| }, | |
| { | |
| "epoch": 1.9082125603864735, | |
| "grad_norm": 0.4214170728597196, | |
| "learning_rate": 3.648979815229167e-05, | |
| "loss": 0.2228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20261229574680328, | |
| "step": 1185, | |
| "valid_targets_mean": 5996.6, | |
| "valid_targets_min": 4004 | |
| }, | |
| { | |
| "epoch": 1.9162640901771337, | |
| "grad_norm": 0.43481442428355693, | |
| "learning_rate": 3.644422173505433e-05, | |
| "loss": 0.2163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20599091053009033, | |
| "step": 1190, | |
| "valid_targets_mean": 6200.9, | |
| "valid_targets_min": 3148 | |
| }, | |
| { | |
| "epoch": 1.924315619967794, | |
| "grad_norm": 0.45638204853677106, | |
| "learning_rate": 3.639838019030123e-05, | |
| "loss": 0.2124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1937769055366516, | |
| "step": 1195, | |
| "valid_targets_mean": 6472.3, | |
| "valid_targets_min": 3581 | |
| }, | |
| { | |
| "epoch": 1.9323671497584543, | |
| "grad_norm": 0.4186145666771615, | |
| "learning_rate": 3.635227425712812e-05, | |
| "loss": 0.2278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2861325144767761, | |
| "step": 1200, | |
| "valid_targets_mean": 7532.8, | |
| "valid_targets_min": 3206 | |
| }, | |
| { | |
| "epoch": 1.9404186795491143, | |
| "grad_norm": 0.5308258730995378, | |
| "learning_rate": 3.6305904678893504e-05, | |
| "loss": 0.2245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2531401515007019, | |
| "step": 1205, | |
| "valid_targets_mean": 5952.9, | |
| "valid_targets_min": 3304 | |
| }, | |
| { | |
| "epoch": 1.9484702093397746, | |
| "grad_norm": 0.46680263986526405, | |
| "learning_rate": 3.6259272203206535e-05, | |
| "loss": 0.2135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21227112412452698, | |
| "step": 1210, | |
| "valid_targets_mean": 5735.1, | |
| "valid_targets_min": 3368 | |
| }, | |
| { | |
| "epoch": 1.9565217391304348, | |
| "grad_norm": 0.491355991243715, | |
| "learning_rate": 3.621237758191505e-05, | |
| "loss": 0.214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2222747504711151, | |
| "step": 1215, | |
| "valid_targets_mean": 5506.7, | |
| "valid_targets_min": 2908 | |
| }, | |
| { | |
| "epoch": 1.9645732689210949, | |
| "grad_norm": 0.4862497243022364, | |
| "learning_rate": 3.616522157109342e-05, | |
| "loss": 0.2174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2260490208864212, | |
| "step": 1220, | |
| "valid_targets_mean": 5777.9, | |
| "valid_targets_min": 3451 | |
| }, | |
| { | |
| "epoch": 1.9726247987117551, | |
| "grad_norm": 0.4051875398639865, | |
| "learning_rate": 3.6117804931030324e-05, | |
| "loss": 0.2057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19149649143218994, | |
| "step": 1225, | |
| "valid_targets_mean": 6045.9, | |
| "valid_targets_min": 2967 | |
| }, | |
| { | |
| "epoch": 1.9806763285024154, | |
| "grad_norm": 0.48575560614945534, | |
| "learning_rate": 3.607012842621657e-05, | |
| "loss": 0.2131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21349817514419556, | |
| "step": 1230, | |
| "valid_targets_mean": 6113.4, | |
| "valid_targets_min": 3912 | |
| }, | |
| { | |
| "epoch": 1.9887278582930756, | |
| "grad_norm": 0.4334111032895171, | |
| "learning_rate": 3.602219282533269e-05, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1997205913066864, | |
| "step": 1235, | |
| "valid_targets_mean": 5837.0, | |
| "valid_targets_min": 3528 | |
| }, | |
| { | |
| "epoch": 1.996779388083736, | |
| "grad_norm": 0.4425270232705891, | |
| "learning_rate": 3.597399890123659e-05, | |
| "loss": 0.2078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22543010115623474, | |
| "step": 1240, | |
| "valid_targets_mean": 6356.6, | |
| "valid_targets_min": 3725 | |
| }, | |
| { | |
| "epoch": 2.004830917874396, | |
| "grad_norm": 0.49848016805395046, | |
| "learning_rate": 3.5925547430951094e-05, | |
| "loss": 0.2709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30809903144836426, | |
| "step": 1245, | |
| "valid_targets_mean": 6803.5, | |
| "valid_targets_min": 3040 | |
| }, | |
| { | |
| "epoch": 2.0128824476650564, | |
| "grad_norm": 0.5893052782286002, | |
| "learning_rate": 3.587683919565136e-05, | |
| "loss": 0.3082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27029603719711304, | |
| "step": 1250, | |
| "valid_targets_mean": 5952.2, | |
| "valid_targets_min": 3694 | |
| }, | |
| { | |
| "epoch": 2.0209339774557167, | |
| "grad_norm": 0.5557870949459839, | |
| "learning_rate": 3.582787498065237e-05, | |
| "loss": 0.2989, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3327678442001343, | |
| "step": 1255, | |
| "valid_targets_mean": 5764.1, | |
| "valid_targets_min": 2130 | |
| }, | |
| { | |
| "epoch": 2.028985507246377, | |
| "grad_norm": 0.4584733838358168, | |
| "learning_rate": 3.577865557539621e-05, | |
| "loss": 0.2879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30011194944381714, | |
| "step": 1260, | |
| "valid_targets_mean": 6953.2, | |
| "valid_targets_min": 3425 | |
| }, | |
| { | |
| "epoch": 2.037037037037037, | |
| "grad_norm": 1.64628767564844, | |
| "learning_rate": 3.572918177343935e-05, | |
| "loss": 0.2943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30400946736335754, | |
| "step": 1265, | |
| "valid_targets_mean": 6372.9, | |
| "valid_targets_min": 2633 | |
| }, | |
| { | |
| "epoch": 2.0450885668276975, | |
| "grad_norm": 0.47363097381646996, | |
| "learning_rate": 3.567945437243987e-05, | |
| "loss": 0.3, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3237455487251282, | |
| "step": 1270, | |
| "valid_targets_mean": 6667.6, | |
| "valid_targets_min": 3230 | |
| }, | |
| { | |
| "epoch": 2.0531400966183573, | |
| "grad_norm": 0.6121579597876319, | |
| "learning_rate": 3.5629474174144564e-05, | |
| "loss": 0.2976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3143073320388794, | |
| "step": 1275, | |
| "valid_targets_mean": 5952.1, | |
| "valid_targets_min": 3816 | |
| }, | |
| { | |
| "epoch": 2.0611916264090175, | |
| "grad_norm": 0.5122726834409544, | |
| "learning_rate": 3.5579241984376065e-05, | |
| "loss": 0.285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2792460322380066, | |
| "step": 1280, | |
| "valid_targets_mean": 6181.7, | |
| "valid_targets_min": 1726 | |
| }, | |
| { | |
| "epoch": 2.069243156199678, | |
| "grad_norm": 0.5279318998363034, | |
| "learning_rate": 3.5528758613019804e-05, | |
| "loss": 0.2829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2636644244194031, | |
| "step": 1285, | |
| "valid_targets_mean": 5198.9, | |
| "valid_targets_min": 1161 | |
| }, | |
| { | |
| "epoch": 2.077294685990338, | |
| "grad_norm": 0.4637806256747439, | |
| "learning_rate": 3.547802487401097e-05, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2678183317184448, | |
| "step": 1290, | |
| "valid_targets_mean": 5936.2, | |
| "valid_targets_min": 2526 | |
| }, | |
| { | |
| "epoch": 2.0853462157809983, | |
| "grad_norm": 0.44332589750107415, | |
| "learning_rate": 3.54270415853214e-05, | |
| "loss": 0.2707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2518324851989746, | |
| "step": 1295, | |
| "valid_targets_mean": 6524.1, | |
| "valid_targets_min": 3439 | |
| }, | |
| { | |
| "epoch": 2.0933977455716586, | |
| "grad_norm": 0.5070450536840013, | |
| "learning_rate": 3.537580956894638e-05, | |
| "loss": 0.278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27724772691726685, | |
| "step": 1300, | |
| "valid_targets_mean": 6404.5, | |
| "valid_targets_min": 2689 | |
| }, | |
| { | |
| "epoch": 2.101449275362319, | |
| "grad_norm": 0.6584385593174998, | |
| "learning_rate": 3.532432965089138e-05, | |
| "loss": 0.286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2892034649848938, | |
| "step": 1305, | |
| "valid_targets_mean": 5932.1, | |
| "valid_targets_min": 2432 | |
| }, | |
| { | |
| "epoch": 2.109500805152979, | |
| "grad_norm": 0.4944188614557695, | |
| "learning_rate": 3.527260266115876e-05, | |
| "loss": 0.2888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32951074838638306, | |
| "step": 1310, | |
| "valid_targets_mean": 6782.6, | |
| "valid_targets_min": 3784 | |
| }, | |
| { | |
| "epoch": 2.1175523349436394, | |
| "grad_norm": 0.4980617413022126, | |
| "learning_rate": 3.522062943373438e-05, | |
| "loss": 0.2859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2504119575023651, | |
| "step": 1315, | |
| "valid_targets_mean": 6065.6, | |
| "valid_targets_min": 3311 | |
| }, | |
| { | |
| "epoch": 2.1256038647342996, | |
| "grad_norm": 0.5112205537054673, | |
| "learning_rate": 3.516841080657413e-05, | |
| "loss": 0.2798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2678036689758301, | |
| "step": 1320, | |
| "valid_targets_mean": 4948.5, | |
| "valid_targets_min": 1373 | |
| }, | |
| { | |
| "epoch": 2.13365539452496, | |
| "grad_norm": 0.9291687075910791, | |
| "learning_rate": 3.511594762159046e-05, | |
| "loss": 0.3325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42405465245246887, | |
| "step": 1325, | |
| "valid_targets_mean": 2751.3, | |
| "valid_targets_min": 1151 | |
| }, | |
| { | |
| "epoch": 2.14170692431562, | |
| "grad_norm": 0.8796559274131959, | |
| "learning_rate": 3.506324072463878e-05, | |
| "loss": 0.4501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4509429931640625, | |
| "step": 1330, | |
| "valid_targets_mean": 2481.8, | |
| "valid_targets_min": 872 | |
| }, | |
| { | |
| "epoch": 2.14975845410628, | |
| "grad_norm": 0.7338052792870016, | |
| "learning_rate": 3.5010290965503826e-05, | |
| "loss": 0.4473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42610806226730347, | |
| "step": 1335, | |
| "valid_targets_mean": 3228.8, | |
| "valid_targets_min": 1082 | |
| }, | |
| { | |
| "epoch": 2.1578099838969402, | |
| "grad_norm": 0.9099556688067492, | |
| "learning_rate": 3.495709919788597e-05, | |
| "loss": 0.4364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4756355881690979, | |
| "step": 1340, | |
| "valid_targets_mean": 3290.7, | |
| "valid_targets_min": 905 | |
| }, | |
| { | |
| "epoch": 2.1658615136876005, | |
| "grad_norm": 0.7280295738870289, | |
| "learning_rate": 3.490366627938742e-05, | |
| "loss": 0.4281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4415823817253113, | |
| "step": 1345, | |
| "valid_targets_mean": 3735.3, | |
| "valid_targets_min": 1115 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 0.8552032601411625, | |
| "learning_rate": 3.484999307149846e-05, | |
| "loss": 0.4093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4188133478164673, | |
| "step": 1350, | |
| "valid_targets_mean": 2806.2, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 2.181964573268921, | |
| "grad_norm": 0.7699287073880586, | |
| "learning_rate": 3.47960804395835e-05, | |
| "loss": 0.4144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38652926683425903, | |
| "step": 1355, | |
| "valid_targets_mean": 2498.2, | |
| "valid_targets_min": 970 | |
| }, | |
| { | |
| "epoch": 2.1900161030595813, | |
| "grad_norm": 0.6914816395930312, | |
| "learning_rate": 3.474192925286714e-05, | |
| "loss": 0.4313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39671099185943604, | |
| "step": 1360, | |
| "valid_targets_mean": 3014.8, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 2.1980676328502415, | |
| "grad_norm": 0.8257531389708893, | |
| "learning_rate": 3.468754038442017e-05, | |
| "loss": 0.4392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4281843304634094, | |
| "step": 1365, | |
| "valid_targets_mean": 2247.2, | |
| "valid_targets_min": 1125 | |
| }, | |
| { | |
| "epoch": 2.206119162640902, | |
| "grad_norm": 0.7268372945505573, | |
| "learning_rate": 3.463291471114548e-05, | |
| "loss": 0.4095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4062803387641907, | |
| "step": 1370, | |
| "valid_targets_mean": 3460.9, | |
| "valid_targets_min": 1652 | |
| }, | |
| { | |
| "epoch": 2.214170692431562, | |
| "grad_norm": 0.8070422587353386, | |
| "learning_rate": 3.4578053113763936e-05, | |
| "loss": 0.4513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4336710572242737, | |
| "step": 1375, | |
| "valid_targets_mean": 2609.9, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 1.67186958813965, | |
| "learning_rate": 3.452295647680014e-05, | |
| "loss": 0.4001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40693116188049316, | |
| "step": 1380, | |
| "valid_targets_mean": 3187.6, | |
| "valid_targets_min": 1303 | |
| }, | |
| { | |
| "epoch": 2.2302737520128826, | |
| "grad_norm": 0.7010798669064308, | |
| "learning_rate": 3.4467625688568245e-05, | |
| "loss": 0.4279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40231195092201233, | |
| "step": 1385, | |
| "valid_targets_mean": 3980.8, | |
| "valid_targets_min": 1495 | |
| }, | |
| { | |
| "epoch": 2.238325281803543, | |
| "grad_norm": 0.9239851489046476, | |
| "learning_rate": 3.4412061641157546e-05, | |
| "loss": 0.4443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.434467613697052, | |
| "step": 1390, | |
| "valid_targets_mean": 2712.9, | |
| "valid_targets_min": 1495 | |
| }, | |
| { | |
| "epoch": 2.246376811594203, | |
| "grad_norm": 0.7442921098581603, | |
| "learning_rate": 3.435626523041815e-05, | |
| "loss": 0.4188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4101715683937073, | |
| "step": 1395, | |
| "valid_targets_mean": 3055.6, | |
| "valid_targets_min": 1216 | |
| }, | |
| { | |
| "epoch": 2.2544283413848634, | |
| "grad_norm": 0.7796509221772335, | |
| "learning_rate": 3.430023735594653e-05, | |
| "loss": 0.4248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43132680654525757, | |
| "step": 1400, | |
| "valid_targets_mean": 3022.4, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 2.262479871175523, | |
| "grad_norm": 0.8845650683341554, | |
| "learning_rate": 3.4243978921071005e-05, | |
| "loss": 0.4115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4295615553855896, | |
| "step": 1405, | |
| "valid_targets_mean": 2932.4, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 2.2705314009661834, | |
| "grad_norm": 0.7654694935136631, | |
| "learning_rate": 3.418749083283719e-05, | |
| "loss": 0.4139, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41558438539505005, | |
| "step": 1410, | |
| "valid_targets_mean": 2893.2, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 2.2785829307568437, | |
| "grad_norm": 0.7620855439193758, | |
| "learning_rate": 3.413077400199334e-05, | |
| "loss": 0.4227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3939434885978699, | |
| "step": 1415, | |
| "valid_targets_mean": 2413.7, | |
| "valid_targets_min": 1044 | |
| }, | |
| { | |
| "epoch": 2.286634460547504, | |
| "grad_norm": 0.6909091800489532, | |
| "learning_rate": 3.407382934297571e-05, | |
| "loss": 0.4174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3776891231536865, | |
| "step": 1420, | |
| "valid_targets_mean": 3168.1, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 2.2946859903381642, | |
| "grad_norm": 0.7978903184807417, | |
| "learning_rate": 3.4016657773893785e-05, | |
| "loss": 0.422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.46548742055892944, | |
| "step": 1425, | |
| "valid_targets_mean": 3087.7, | |
| "valid_targets_min": 1699 | |
| }, | |
| { | |
| "epoch": 2.3027375201288245, | |
| "grad_norm": 0.8248703939493464, | |
| "learning_rate": 3.3959260216515495e-05, | |
| "loss": 0.4219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41521650552749634, | |
| "step": 1430, | |
| "valid_targets_mean": 2646.4, | |
| "valid_targets_min": 1509 | |
| }, | |
| { | |
| "epoch": 2.3107890499194848, | |
| "grad_norm": 0.7311952260051283, | |
| "learning_rate": 3.3901637596252325e-05, | |
| "loss": 0.3865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36918574571609497, | |
| "step": 1435, | |
| "valid_targets_mean": 2969.6, | |
| "valid_targets_min": 1251 | |
| }, | |
| { | |
| "epoch": 2.318840579710145, | |
| "grad_norm": 0.7976719684765547, | |
| "learning_rate": 3.384379084214443e-05, | |
| "loss": 0.432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4152054190635681, | |
| "step": 1440, | |
| "valid_targets_mean": 3140.2, | |
| "valid_targets_min": 1161 | |
| }, | |
| { | |
| "epoch": 2.3268921095008053, | |
| "grad_norm": 0.7964318060668145, | |
| "learning_rate": 3.378572088684562e-05, | |
| "loss": 0.4245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45041656494140625, | |
| "step": 1445, | |
| "valid_targets_mean": 2786.8, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 2.3349436392914655, | |
| "grad_norm": 0.779177762219085, | |
| "learning_rate": 3.372742866660836e-05, | |
| "loss": 0.4063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42900824546813965, | |
| "step": 1450, | |
| "valid_targets_mean": 3073.4, | |
| "valid_targets_min": 1434 | |
| }, | |
| { | |
| "epoch": 2.342995169082126, | |
| "grad_norm": 0.8252841045839578, | |
| "learning_rate": 3.3668915121268636e-05, | |
| "loss": 0.4356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42234331369400024, | |
| "step": 1455, | |
| "valid_targets_mean": 2718.1, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 2.3510466988727856, | |
| "grad_norm": 0.7559789239050331, | |
| "learning_rate": 3.361018119423085e-05, | |
| "loss": 0.4107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3902217745780945, | |
| "step": 1460, | |
| "valid_targets_mean": 3051.3, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 2.359098228663446, | |
| "grad_norm": 0.7118532524697113, | |
| "learning_rate": 3.3551227832452555e-05, | |
| "loss": 0.4167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4386996924877167, | |
| "step": 1465, | |
| "valid_targets_mean": 3645.1, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 2.367149758454106, | |
| "grad_norm": 0.7314007089106499, | |
| "learning_rate": 3.3492055986429235e-05, | |
| "loss": 0.4215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3893301486968994, | |
| "step": 1470, | |
| "valid_targets_mean": 3020.9, | |
| "valid_targets_min": 1036 | |
| }, | |
| { | |
| "epoch": 2.3752012882447664, | |
| "grad_norm": 0.7883519271357176, | |
| "learning_rate": 3.3432666610178936e-05, | |
| "loss": 0.3829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3778621554374695, | |
| "step": 1475, | |
| "valid_targets_mean": 2636.9, | |
| "valid_targets_min": 632 | |
| }, | |
| { | |
| "epoch": 2.3832528180354267, | |
| "grad_norm": 0.5950705396292623, | |
| "learning_rate": 3.3373060661226944e-05, | |
| "loss": 0.3704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3533982038497925, | |
| "step": 1480, | |
| "valid_targets_mean": 4681.1, | |
| "valid_targets_min": 1703 | |
| }, | |
| { | |
| "epoch": 2.391304347826087, | |
| "grad_norm": 0.6284697509500937, | |
| "learning_rate": 3.331323910059027e-05, | |
| "loss": 0.3813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36771711707115173, | |
| "step": 1485, | |
| "valid_targets_mean": 4288.2, | |
| "valid_targets_min": 349 | |
| }, | |
| { | |
| "epoch": 2.399355877616747, | |
| "grad_norm": 0.6241002825603361, | |
| "learning_rate": 3.3253202892762244e-05, | |
| "loss": 0.3774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35911619663238525, | |
| "step": 1490, | |
| "valid_targets_mean": 4145.6, | |
| "valid_targets_min": 1505 | |
| }, | |
| { | |
| "epoch": 2.4074074074074074, | |
| "grad_norm": 0.7195975803486768, | |
| "learning_rate": 3.319295300569686e-05, | |
| "loss": 0.3541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3680953085422516, | |
| "step": 1495, | |
| "valid_targets_mean": 3169.7, | |
| "valid_targets_min": 1125 | |
| }, | |
| { | |
| "epoch": 2.4154589371980677, | |
| "grad_norm": 0.6678894669137365, | |
| "learning_rate": 3.3132490410793294e-05, | |
| "loss": 0.3807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40425896644592285, | |
| "step": 1500, | |
| "valid_targets_mean": 3801.9, | |
| "valid_targets_min": 1805 | |
| }, | |
| { | |
| "epoch": 2.423510466988728, | |
| "grad_norm": 0.67381554336476, | |
| "learning_rate": 3.3071816082880115e-05, | |
| "loss": 0.3364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3536141514778137, | |
| "step": 1505, | |
| "valid_targets_mean": 3517.1, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 2.4315619967793882, | |
| "grad_norm": 0.5844843611773384, | |
| "learning_rate": 3.3010931000199674e-05, | |
| "loss": 0.3513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3546409606933594, | |
| "step": 1510, | |
| "valid_targets_mean": 4191.2, | |
| "valid_targets_min": 2286 | |
| }, | |
| { | |
| "epoch": 2.4396135265700485, | |
| "grad_norm": 0.6086706004595346, | |
| "learning_rate": 3.2949836144392256e-05, | |
| "loss": 0.3428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32763075828552246, | |
| "step": 1515, | |
| "valid_targets_mean": 4189.8, | |
| "valid_targets_min": 1568 | |
| }, | |
| { | |
| "epoch": 2.4476650563607087, | |
| "grad_norm": 0.8158802293533742, | |
| "learning_rate": 3.28885325004803e-05, | |
| "loss": 0.3678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35021084547042847, | |
| "step": 1520, | |
| "valid_targets_mean": 3081.1, | |
| "valid_targets_min": 1168 | |
| }, | |
| { | |
| "epoch": 2.455716586151369, | |
| "grad_norm": 0.6124714609883762, | |
| "learning_rate": 3.282702105685251e-05, | |
| "loss": 0.3543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33643293380737305, | |
| "step": 1525, | |
| "valid_targets_mean": 3844.6, | |
| "valid_targets_min": 1318 | |
| }, | |
| { | |
| "epoch": 2.463768115942029, | |
| "grad_norm": 0.6419535517935, | |
| "learning_rate": 3.2765302805247885e-05, | |
| "loss": 0.3512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.324346661567688, | |
| "step": 1530, | |
| "valid_targets_mean": 3268.8, | |
| "valid_targets_min": 1153 | |
| }, | |
| { | |
| "epoch": 2.471819645732689, | |
| "grad_norm": 0.707763033567243, | |
| "learning_rate": 3.270337874073977e-05, | |
| "loss": 0.3842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39264971017837524, | |
| "step": 1535, | |
| "valid_targets_mean": 3251.4, | |
| "valid_targets_min": 1576 | |
| }, | |
| { | |
| "epoch": 2.4798711755233493, | |
| "grad_norm": 0.6517860581223903, | |
| "learning_rate": 3.264124986171981e-05, | |
| "loss": 0.3503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30974501371383667, | |
| "step": 1540, | |
| "valid_targets_mean": 3231.7, | |
| "valid_targets_min": 1471 | |
| }, | |
| { | |
| "epoch": 2.4879227053140096, | |
| "grad_norm": 0.6861932870077228, | |
| "learning_rate": 3.2578917169881816e-05, | |
| "loss": 0.3679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3639856278896332, | |
| "step": 1545, | |
| "valid_targets_mean": 3158.2, | |
| "valid_targets_min": 373 | |
| }, | |
| { | |
| "epoch": 2.49597423510467, | |
| "grad_norm": 0.5800378877703327, | |
| "learning_rate": 3.2516381670205665e-05, | |
| "loss": 0.3418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33077770471572876, | |
| "step": 1550, | |
| "valid_targets_mean": 4750.0, | |
| "valid_targets_min": 1270 | |
| }, | |
| { | |
| "epoch": 2.50402576489533, | |
| "grad_norm": 0.6790879537056581, | |
| "learning_rate": 3.245364437094105e-05, | |
| "loss": 0.3436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36615103483200073, | |
| "step": 1555, | |
| "valid_targets_mean": 3425.2, | |
| "valid_targets_min": 1034 | |
| }, | |
| { | |
| "epoch": 2.5120772946859904, | |
| "grad_norm": 0.7532708248786624, | |
| "learning_rate": 3.239070628359126e-05, | |
| "loss": 0.3473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3348079025745392, | |
| "step": 1560, | |
| "valid_targets_mean": 2773.3, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 2.5201288244766507, | |
| "grad_norm": 0.6602490038924721, | |
| "learning_rate": 3.232756842289685e-05, | |
| "loss": 0.3609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3299705982208252, | |
| "step": 1565, | |
| "valid_targets_mean": 4059.8, | |
| "valid_targets_min": 1416 | |
| }, | |
| { | |
| "epoch": 2.528180354267311, | |
| "grad_norm": 0.635105782473463, | |
| "learning_rate": 3.2264231806819286e-05, | |
| "loss": 0.3443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3414621353149414, | |
| "step": 1570, | |
| "valid_targets_mean": 3796.5, | |
| "valid_targets_min": 1490 | |
| }, | |
| { | |
| "epoch": 2.536231884057971, | |
| "grad_norm": 0.6634194373750147, | |
| "learning_rate": 3.220069745652456e-05, | |
| "loss": 0.3435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3682880997657776, | |
| "step": 1575, | |
| "valid_targets_mean": 3359.2, | |
| "valid_targets_min": 1401 | |
| }, | |
| { | |
| "epoch": 2.544283413848631, | |
| "grad_norm": 0.6748832386881813, | |
| "learning_rate": 3.213696639636666e-05, | |
| "loss": 0.3803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37244677543640137, | |
| "step": 1580, | |
| "valid_targets_mean": 3234.9, | |
| "valid_targets_min": 1543 | |
| }, | |
| { | |
| "epoch": 2.5523349436392913, | |
| "grad_norm": 0.6147417333490566, | |
| "learning_rate": 3.207303965387114e-05, | |
| "loss": 0.3398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2844334840774536, | |
| "step": 1585, | |
| "valid_targets_mean": 3613.4, | |
| "valid_targets_min": 1535 | |
| }, | |
| { | |
| "epoch": 2.5603864734299515, | |
| "grad_norm": 0.6644115534295828, | |
| "learning_rate": 3.200891825971846e-05, | |
| "loss": 0.341, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34174948930740356, | |
| "step": 1590, | |
| "valid_targets_mean": 3520.8, | |
| "valid_targets_min": 1561 | |
| }, | |
| { | |
| "epoch": 2.5684380032206118, | |
| "grad_norm": 0.6492076929001203, | |
| "learning_rate": 3.194460324772746e-05, | |
| "loss": 0.3575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34135136008262634, | |
| "step": 1595, | |
| "valid_targets_mean": 3454.2, | |
| "valid_targets_min": 1036 | |
| }, | |
| { | |
| "epoch": 2.576489533011272, | |
| "grad_norm": 0.6262401028348537, | |
| "learning_rate": 3.188009565483861e-05, | |
| "loss": 0.3389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37102776765823364, | |
| "step": 1600, | |
| "valid_targets_mean": 3877.2, | |
| "valid_targets_min": 1400 | |
| }, | |
| { | |
| "epoch": 2.5845410628019323, | |
| "grad_norm": 0.6690368914483794, | |
| "learning_rate": 3.1815396521097376e-05, | |
| "loss": 0.3443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30660367012023926, | |
| "step": 1605, | |
| "valid_targets_mean": 3103.3, | |
| "valid_targets_min": 1327 | |
| }, | |
| { | |
| "epoch": 2.5925925925925926, | |
| "grad_norm": 0.6782490130071587, | |
| "learning_rate": 3.1750506889637366e-05, | |
| "loss": 0.3738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3212292492389679, | |
| "step": 1610, | |
| "valid_targets_mean": 3301.8, | |
| "valid_targets_min": 1530 | |
| }, | |
| { | |
| "epoch": 2.600644122383253, | |
| "grad_norm": 0.6136219154267422, | |
| "learning_rate": 3.1685427806663574e-05, | |
| "loss": 0.3319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3197278678417206, | |
| "step": 1615, | |
| "valid_targets_mean": 3827.2, | |
| "valid_targets_min": 1604 | |
| }, | |
| { | |
| "epoch": 2.608695652173913, | |
| "grad_norm": 0.7590245604099114, | |
| "learning_rate": 3.1620160321435475e-05, | |
| "loss": 0.3493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41536277532577515, | |
| "step": 1620, | |
| "valid_targets_mean": 3155.0, | |
| "valid_targets_min": 1301 | |
| }, | |
| { | |
| "epoch": 2.6167471819645733, | |
| "grad_norm": 0.6668069970888741, | |
| "learning_rate": 3.155470548625014e-05, | |
| "loss": 0.3414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34977424144744873, | |
| "step": 1625, | |
| "valid_targets_mean": 3495.8, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 2.6247987117552336, | |
| "grad_norm": 0.6443781380528575, | |
| "learning_rate": 3.1489064356425235e-05, | |
| "loss": 0.3452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.348181813955307, | |
| "step": 1630, | |
| "valid_targets_mean": 3598.6, | |
| "valid_targets_min": 1663 | |
| }, | |
| { | |
| "epoch": 2.632850241545894, | |
| "grad_norm": 0.6477230031259141, | |
| "learning_rate": 3.142323799028204e-05, | |
| "loss": 0.3225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34765568375587463, | |
| "step": 1635, | |
| "valid_targets_mean": 3781.5, | |
| "valid_targets_min": 1585 | |
| }, | |
| { | |
| "epoch": 2.640901771336554, | |
| "grad_norm": 0.6757898090196188, | |
| "learning_rate": 3.135722744912836e-05, | |
| "loss": 0.3501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38952842354774475, | |
| "step": 1640, | |
| "valid_targets_mean": 4125.3, | |
| "valid_targets_min": 1262 | |
| }, | |
| { | |
| "epoch": 2.6489533011272144, | |
| "grad_norm": 0.6699916964095252, | |
| "learning_rate": 3.129103379724143e-05, | |
| "loss": 0.3481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37032926082611084, | |
| "step": 1645, | |
| "valid_targets_mean": 3489.9, | |
| "valid_targets_min": 1525 | |
| }, | |
| { | |
| "epoch": 2.6570048309178746, | |
| "grad_norm": 0.639493421870476, | |
| "learning_rate": 3.122465810185075e-05, | |
| "loss": 0.3367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33100080490112305, | |
| "step": 1650, | |
| "valid_targets_mean": 3673.7, | |
| "valid_targets_min": 1354 | |
| }, | |
| { | |
| "epoch": 2.6650563607085345, | |
| "grad_norm": 0.5871950095800859, | |
| "learning_rate": 3.1158101433120863e-05, | |
| "loss": 0.3465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34115904569625854, | |
| "step": 1655, | |
| "valid_targets_mean": 4327.2, | |
| "valid_targets_min": 1228 | |
| }, | |
| { | |
| "epoch": 2.6731078904991947, | |
| "grad_norm": 0.6666026792346776, | |
| "learning_rate": 3.1091364864134136e-05, | |
| "loss": 0.3438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39104804396629333, | |
| "step": 1660, | |
| "valid_targets_mean": 3398.6, | |
| "valid_targets_min": 1445 | |
| }, | |
| { | |
| "epoch": 2.681159420289855, | |
| "grad_norm": 0.6426036994837567, | |
| "learning_rate": 3.102444947087342e-05, | |
| "loss": 0.3465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34018710255622864, | |
| "step": 1665, | |
| "valid_targets_mean": 3618.1, | |
| "valid_targets_min": 1665 | |
| }, | |
| { | |
| "epoch": 2.6892109500805152, | |
| "grad_norm": 0.7200889886232774, | |
| "learning_rate": 3.0957356332204745e-05, | |
| "loss": 0.3624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4483473300933838, | |
| "step": 1670, | |
| "valid_targets_mean": 3554.4, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 2.6972624798711755, | |
| "grad_norm": 0.6807458182499385, | |
| "learning_rate": 3.089008652985989e-05, | |
| "loss": 0.3517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31604230403900146, | |
| "step": 1675, | |
| "valid_targets_mean": 3257.0, | |
| "valid_targets_min": 1286 | |
| }, | |
| { | |
| "epoch": 2.7053140096618358, | |
| "grad_norm": 0.7052899877260982, | |
| "learning_rate": 3.082264114841892e-05, | |
| "loss": 0.3374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3132331371307373, | |
| "step": 1680, | |
| "valid_targets_mean": 2975.9, | |
| "valid_targets_min": 1470 | |
| }, | |
| { | |
| "epoch": 2.713365539452496, | |
| "grad_norm": 0.6354810637538505, | |
| "learning_rate": 3.07550212752928e-05, | |
| "loss": 0.3442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3128201365470886, | |
| "step": 1685, | |
| "valid_targets_mean": 3791.3, | |
| "valid_targets_min": 1211 | |
| }, | |
| { | |
| "epoch": 2.7214170692431563, | |
| "grad_norm": 0.5906889884547679, | |
| "learning_rate": 3.068722800070574e-05, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2894616723060608, | |
| "step": 1690, | |
| "valid_targets_mean": 3958.0, | |
| "valid_targets_min": 1407 | |
| }, | |
| { | |
| "epoch": 2.7294685990338166, | |
| "grad_norm": 0.643609035593482, | |
| "learning_rate": 3.0619262417677695e-05, | |
| "loss": 0.3489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34073030948638916, | |
| "step": 1695, | |
| "valid_targets_mean": 3950.0, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 2.7375201288244764, | |
| "grad_norm": 0.7282369023618399, | |
| "learning_rate": 3.055112562200673e-05, | |
| "loss": 0.3436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3716239631175995, | |
| "step": 1700, | |
| "valid_targets_mean": 3736.1, | |
| "valid_targets_min": 991 | |
| }, | |
| { | |
| "epoch": 2.7455716586151366, | |
| "grad_norm": 0.7474926314066599, | |
| "learning_rate": 3.0482818712251318e-05, | |
| "loss": 0.3402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35656508803367615, | |
| "step": 1705, | |
| "valid_targets_mean": 3098.4, | |
| "valid_targets_min": 1529 | |
| }, | |
| { | |
| "epoch": 2.753623188405797, | |
| "grad_norm": 0.5882343918140231, | |
| "learning_rate": 3.0414342789712675e-05, | |
| "loss": 0.303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22480350732803345, | |
| "step": 1710, | |
| "valid_targets_mean": 6031.0, | |
| "valid_targets_min": 1980 | |
| }, | |
| { | |
| "epoch": 2.761674718196457, | |
| "grad_norm": 0.7860705058373512, | |
| "learning_rate": 3.034569895841699e-05, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21748200058937073, | |
| "step": 1715, | |
| "valid_targets_mean": 5359.5, | |
| "valid_targets_min": 3428 | |
| }, | |
| { | |
| "epoch": 2.7697262479871174, | |
| "grad_norm": 0.46882998249925517, | |
| "learning_rate": 3.0276888325097583e-05, | |
| "loss": 0.2014, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.211460679769516, | |
| "step": 1720, | |
| "valid_targets_mean": 5453.4, | |
| "valid_targets_min": 2625 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 0.5396421101555618, | |
| "learning_rate": 3.020791199917713e-05, | |
| "loss": 0.2123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22912117838859558, | |
| "step": 1725, | |
| "valid_targets_mean": 6407.3, | |
| "valid_targets_min": 4226 | |
| }, | |
| { | |
| "epoch": 2.785829307568438, | |
| "grad_norm": 0.44331928629182393, | |
| "learning_rate": 3.0138771092749722e-05, | |
| "loss": 0.2118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19119247794151306, | |
| "step": 1730, | |
| "valid_targets_mean": 6119.8, | |
| "valid_targets_min": 2891 | |
| }, | |
| { | |
| "epoch": 2.793880837359098, | |
| "grad_norm": 0.4549848502323358, | |
| "learning_rate": 3.006946672056297e-05, | |
| "loss": 0.2003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18082711100578308, | |
| "step": 1735, | |
| "valid_targets_mean": 5309.0, | |
| "valid_targets_min": 3122 | |
| }, | |
| { | |
| "epoch": 2.8019323671497585, | |
| "grad_norm": 0.5088412843130042, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.1904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18192598223686218, | |
| "step": 1740, | |
| "valid_targets_mean": 5206.8, | |
| "valid_targets_min": 3536 | |
| }, | |
| { | |
| "epoch": 2.8099838969404187, | |
| "grad_norm": 0.4533466677445726, | |
| "learning_rate": 2.993037205106147e-05, | |
| "loss": 0.2254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22841483354568481, | |
| "step": 1745, | |
| "valid_targets_mean": 6110.2, | |
| "valid_targets_min": 3433 | |
| }, | |
| { | |
| "epoch": 2.818035426731079, | |
| "grad_norm": 0.42286915041535666, | |
| "learning_rate": 2.9860583996347495e-05, | |
| "loss": 0.2238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16705384850502014, | |
| "step": 1750, | |
| "valid_targets_mean": 5580.4, | |
| "valid_targets_min": 2346 | |
| }, | |
| { | |
| "epoch": 2.8260869565217392, | |
| "grad_norm": 0.46308406661472623, | |
| "learning_rate": 2.9790636961039524e-05, | |
| "loss": 0.2044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20052894949913025, | |
| "step": 1755, | |
| "valid_targets_mean": 5809.4, | |
| "valid_targets_min": 2808 | |
| }, | |
| { | |
| "epoch": 2.8341384863123995, | |
| "grad_norm": 0.4973033226604493, | |
| "learning_rate": 2.9720532072882268e-05, | |
| "loss": 0.2035, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19987952709197998, | |
| "step": 1760, | |
| "valid_targets_mean": 5466.4, | |
| "valid_targets_min": 1878 | |
| }, | |
| { | |
| "epoch": 2.8421900161030598, | |
| "grad_norm": 0.49364706625593213, | |
| "learning_rate": 2.965027046216544e-05, | |
| "loss": 0.2072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24134349822998047, | |
| "step": 1765, | |
| "valid_targets_mean": 6193.4, | |
| "valid_targets_min": 2985 | |
| }, | |
| { | |
| "epoch": 2.85024154589372, | |
| "grad_norm": 0.44774305114424784, | |
| "learning_rate": 2.9579853261705573e-05, | |
| "loss": 0.1939, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1811220645904541, | |
| "step": 1770, | |
| "valid_targets_mean": 5857.2, | |
| "valid_targets_min": 3369 | |
| }, | |
| { | |
| "epoch": 2.8582930756843803, | |
| "grad_norm": 0.5084367346859436, | |
| "learning_rate": 2.950928160682775e-05, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16717293858528137, | |
| "step": 1775, | |
| "valid_targets_mean": 4669.6, | |
| "valid_targets_min": 2984 | |
| }, | |
| { | |
| "epoch": 2.86634460547504, | |
| "grad_norm": 0.5258355536138191, | |
| "learning_rate": 2.943855663534731e-05, | |
| "loss": 0.2085, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21475860476493835, | |
| "step": 1780, | |
| "valid_targets_mean": 5739.6, | |
| "valid_targets_min": 3163 | |
| }, | |
| { | |
| "epoch": 2.8743961352657004, | |
| "grad_norm": 0.624047290830488, | |
| "learning_rate": 2.9367679487551473e-05, | |
| "loss": 0.1906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17102691531181335, | |
| "step": 1785, | |
| "valid_targets_mean": 5951.2, | |
| "valid_targets_min": 2774 | |
| }, | |
| { | |
| "epoch": 2.8824476650563606, | |
| "grad_norm": 0.48242204698030056, | |
| "learning_rate": 2.929665130618098e-05, | |
| "loss": 0.2024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20739901065826416, | |
| "step": 1790, | |
| "valid_targets_mean": 5372.6, | |
| "valid_targets_min": 2692 | |
| }, | |
| { | |
| "epoch": 2.890499194847021, | |
| "grad_norm": 0.4326608763924011, | |
| "learning_rate": 2.9225473236411655e-05, | |
| "loss": 0.1911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17670348286628723, | |
| "step": 1795, | |
| "valid_targets_mean": 5611.9, | |
| "valid_targets_min": 3302 | |
| }, | |
| { | |
| "epoch": 2.898550724637681, | |
| "grad_norm": 0.4674400578356549, | |
| "learning_rate": 2.915414642583596e-05, | |
| "loss": 0.1939, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.192458838224411, | |
| "step": 1800, | |
| "valid_targets_mean": 5552.4, | |
| "valid_targets_min": 3435 | |
| }, | |
| { | |
| "epoch": 2.9066022544283414, | |
| "grad_norm": 0.46590415009442393, | |
| "learning_rate": 2.9082672024444485e-05, | |
| "loss": 0.2019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20876693725585938, | |
| "step": 1805, | |
| "valid_targets_mean": 5709.1, | |
| "valid_targets_min": 3713 | |
| }, | |
| { | |
| "epoch": 2.9146537842190017, | |
| "grad_norm": 0.55521296635976, | |
| "learning_rate": 2.901105118460737e-05, | |
| "loss": 0.1949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20165735483169556, | |
| "step": 1810, | |
| "valid_targets_mean": 5800.1, | |
| "valid_targets_min": 3378 | |
| }, | |
| { | |
| "epoch": 2.922705314009662, | |
| "grad_norm": 0.44671622462161964, | |
| "learning_rate": 2.8939285061055807e-05, | |
| "loss": 0.1933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20931756496429443, | |
| "step": 1815, | |
| "valid_targets_mean": 5923.8, | |
| "valid_targets_min": 3662 | |
| }, | |
| { | |
| "epoch": 2.930756843800322, | |
| "grad_norm": 0.45724838211116503, | |
| "learning_rate": 2.8867374810863325e-05, | |
| "loss": 0.1886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18910741806030273, | |
| "step": 1820, | |
| "valid_targets_mean": 5651.0, | |
| "valid_targets_min": 3365 | |
| }, | |
| { | |
| "epoch": 2.938808373590982, | |
| "grad_norm": 0.45924176560852137, | |
| "learning_rate": 2.8795321593427227e-05, | |
| "loss": 0.2101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2153066247701645, | |
| "step": 1825, | |
| "valid_targets_mean": 6766.6, | |
| "valid_targets_min": 3372 | |
| }, | |
| { | |
| "epoch": 2.9468599033816423, | |
| "grad_norm": 0.4602179071328878, | |
| "learning_rate": 2.8723126570449813e-05, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17615215480327606, | |
| "step": 1830, | |
| "valid_targets_mean": 5546.4, | |
| "valid_targets_min": 3374 | |
| }, | |
| { | |
| "epoch": 2.9549114331723025, | |
| "grad_norm": 0.4757535168677279, | |
| "learning_rate": 2.8650790905919724e-05, | |
| "loss": 0.1903, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18606898188591003, | |
| "step": 1835, | |
| "valid_targets_mean": 5870.6, | |
| "valid_targets_min": 3626 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 0.4550197511497243, | |
| "learning_rate": 2.8578315766093133e-05, | |
| "loss": 0.1957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20954301953315735, | |
| "step": 1840, | |
| "valid_targets_mean": 5978.6, | |
| "valid_targets_min": 3695 | |
| }, | |
| { | |
| "epoch": 2.971014492753623, | |
| "grad_norm": 0.48815632704334255, | |
| "learning_rate": 2.850570231947493e-05, | |
| "loss": 0.1927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20100757479667664, | |
| "step": 1845, | |
| "valid_targets_mean": 5347.4, | |
| "valid_targets_min": 3463 | |
| }, | |
| { | |
| "epoch": 2.9790660225442833, | |
| "grad_norm": 0.45140508763909315, | |
| "learning_rate": 2.8432951736799933e-05, | |
| "loss": 0.1867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1854209005832672, | |
| "step": 1850, | |
| "valid_targets_mean": 5264.8, | |
| "valid_targets_min": 3132 | |
| }, | |
| { | |
| "epoch": 2.9871175523349436, | |
| "grad_norm": 0.47657839693030885, | |
| "learning_rate": 2.8360065191013967e-05, | |
| "loss": 0.2063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1967577189207077, | |
| "step": 1855, | |
| "valid_targets_mean": 5723.9, | |
| "valid_targets_min": 3017 | |
| }, | |
| { | |
| "epoch": 2.995169082125604, | |
| "grad_norm": 0.4892874249873267, | |
| "learning_rate": 2.8287043857254957e-05, | |
| "loss": 0.1822, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1871507167816162, | |
| "step": 1860, | |
| "valid_targets_mean": 6460.0, | |
| "valid_targets_min": 2935 | |
| }, | |
| { | |
| "epoch": 3.003220611916264, | |
| "grad_norm": 0.5527497946640381, | |
| "learning_rate": 2.8213888912834026e-05, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.258331298828125, | |
| "step": 1865, | |
| "valid_targets_mean": 5212.3, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 3.0112721417069244, | |
| "grad_norm": 0.5830002451186027, | |
| "learning_rate": 2.814060153721644e-05, | |
| "loss": 0.2892, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2999268174171448, | |
| "step": 1870, | |
| "valid_targets_mean": 6001.2, | |
| "valid_targets_min": 433 | |
| }, | |
| { | |
| "epoch": 3.0193236714975846, | |
| "grad_norm": 0.4500521340884674, | |
| "learning_rate": 2.8067182912002663e-05, | |
| "loss": 0.2623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2809280753135681, | |
| "step": 1875, | |
| "valid_targets_mean": 6695.0, | |
| "valid_targets_min": 3606 | |
| }, | |
| { | |
| "epoch": 3.027375201288245, | |
| "grad_norm": 0.4652104269522271, | |
| "learning_rate": 2.7993634220909254e-05, | |
| "loss": 0.2699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22316525876522064, | |
| "step": 1880, | |
| "valid_targets_mean": 5460.7, | |
| "valid_targets_min": 284 | |
| }, | |
| { | |
| "epoch": 3.035426731078905, | |
| "grad_norm": 0.46090061714203895, | |
| "learning_rate": 2.7919956649749826e-05, | |
| "loss": 0.2702, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2710314095020294, | |
| "step": 1885, | |
| "valid_targets_mean": 6763.2, | |
| "valid_targets_min": 3435 | |
| }, | |
| { | |
| "epoch": 3.0434782608695654, | |
| "grad_norm": 0.4611138651345071, | |
| "learning_rate": 2.784615138641588e-05, | |
| "loss": 0.2755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.242034912109375, | |
| "step": 1890, | |
| "valid_targets_mean": 6750.6, | |
| "valid_targets_min": 3554 | |
| }, | |
| { | |
| "epoch": 3.0515297906602252, | |
| "grad_norm": 0.49645873562039455, | |
| "learning_rate": 2.7772219620857685e-05, | |
| "loss": 0.2781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28464362025260925, | |
| "step": 1895, | |
| "valid_targets_mean": 6853.2, | |
| "valid_targets_min": 4715 | |
| }, | |
| { | |
| "epoch": 3.0595813204508855, | |
| "grad_norm": 0.4950622875309601, | |
| "learning_rate": 2.769816254506509e-05, | |
| "loss": 0.2694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2540530562400818, | |
| "step": 1900, | |
| "valid_targets_mean": 5465.6, | |
| "valid_targets_min": 2485 | |
| }, | |
| { | |
| "epoch": 3.0676328502415457, | |
| "grad_norm": 0.4737619492864805, | |
| "learning_rate": 2.76239813530483e-05, | |
| "loss": 0.2639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2536058723926544, | |
| "step": 1905, | |
| "valid_targets_mean": 7040.4, | |
| "valid_targets_min": 3642 | |
| }, | |
| { | |
| "epoch": 3.075684380032206, | |
| "grad_norm": 0.43563346257793917, | |
| "learning_rate": 2.7549677240818628e-05, | |
| "loss": 0.26, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25332552194595337, | |
| "step": 1910, | |
| "valid_targets_mean": 6890.4, | |
| "valid_targets_min": 3408 | |
| }, | |
| { | |
| "epoch": 3.0837359098228663, | |
| "grad_norm": 0.5538637159995918, | |
| "learning_rate": 2.7475251406369197e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27943116426467896, | |
| "step": 1915, | |
| "valid_targets_mean": 5696.9, | |
| "valid_targets_min": 2285 | |
| }, | |
| { | |
| "epoch": 3.0917874396135265, | |
| "grad_norm": 0.502755804416658, | |
| "learning_rate": 2.740070504965565e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2628275156021118, | |
| "step": 1920, | |
| "valid_targets_mean": 5780.6, | |
| "valid_targets_min": 1991 | |
| }, | |
| { | |
| "epoch": 3.099838969404187, | |
| "grad_norm": 0.46642134348041764, | |
| "learning_rate": 2.7326039372576782e-05, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26721107959747314, | |
| "step": 1925, | |
| "valid_targets_mean": 5684.6, | |
| "valid_targets_min": 2267 | |
| }, | |
| { | |
| "epoch": 3.107890499194847, | |
| "grad_norm": 0.4761056162555204, | |
| "learning_rate": 2.7251255578955186e-05, | |
| "loss": 0.2604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24499773979187012, | |
| "step": 1930, | |
| "valid_targets_mean": 6097.4, | |
| "valid_targets_min": 536 | |
| }, | |
| { | |
| "epoch": 3.1159420289855073, | |
| "grad_norm": 0.4574904350207186, | |
| "learning_rate": 2.7176354874517805e-05, | |
| "loss": 0.2795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2528035044670105, | |
| "step": 1935, | |
| "valid_targets_mean": 5954.2, | |
| "valid_targets_min": 3071 | |
| }, | |
| { | |
| "epoch": 3.1239935587761676, | |
| "grad_norm": 0.5257344524597956, | |
| "learning_rate": 2.7101338466876542e-05, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25541383028030396, | |
| "step": 1940, | |
| "valid_targets_mean": 5115.2, | |
| "valid_targets_min": 2196 | |
| }, | |
| { | |
| "epoch": 3.132045088566828, | |
| "grad_norm": 0.9148021670271776, | |
| "learning_rate": 2.702620756550874e-05, | |
| "loss": 0.276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4044179320335388, | |
| "step": 1945, | |
| "valid_targets_mean": 2789.6, | |
| "valid_targets_min": 1009 | |
| }, | |
| { | |
| "epoch": 3.140096618357488, | |
| "grad_norm": 0.8933463865740449, | |
| "learning_rate": 2.6950963381737728e-05, | |
| "loss": 0.4008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43537747859954834, | |
| "step": 1950, | |
| "valid_targets_mean": 3042.1, | |
| "valid_targets_min": 1721 | |
| }, | |
| { | |
| "epoch": 3.148148148148148, | |
| "grad_norm": 0.7586492212375334, | |
| "learning_rate": 2.687560712871325e-05, | |
| "loss": 0.4051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40004706382751465, | |
| "step": 1955, | |
| "valid_targets_mean": 3377.2, | |
| "valid_targets_min": 860 | |
| }, | |
| { | |
| "epoch": 3.156199677938808, | |
| "grad_norm": 0.8200189711325815, | |
| "learning_rate": 2.6800140021391933e-05, | |
| "loss": 0.3918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42270979285240173, | |
| "step": 1960, | |
| "valid_targets_mean": 3314.4, | |
| "valid_targets_min": 1619 | |
| }, | |
| { | |
| "epoch": 3.1642512077294684, | |
| "grad_norm": 0.7884155012819621, | |
| "learning_rate": 2.6724563276517697e-05, | |
| "loss": 0.3936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36296790838241577, | |
| "step": 1965, | |
| "valid_targets_mean": 3112.6, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 3.1723027375201287, | |
| "grad_norm": 0.6764869788258896, | |
| "learning_rate": 2.6648878112602115e-05, | |
| "loss": 0.3728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35345786809921265, | |
| "step": 1970, | |
| "valid_targets_mean": 3572.9, | |
| "valid_targets_min": 1413 | |
| }, | |
| { | |
| "epoch": 3.180354267310789, | |
| "grad_norm": 0.7225388514917793, | |
| "learning_rate": 2.6573085749904784e-05, | |
| "loss": 0.3774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39635008573532104, | |
| "step": 1975, | |
| "valid_targets_mean": 3774.3, | |
| "valid_targets_min": 1338 | |
| }, | |
| { | |
| "epoch": 3.1884057971014492, | |
| "grad_norm": 0.8231423436666249, | |
| "learning_rate": 2.6497187410413676e-05, | |
| "loss": 0.3828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3863440752029419, | |
| "step": 1980, | |
| "valid_targets_mean": 2317.6, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 3.1964573268921095, | |
| "grad_norm": 0.7285170435595408, | |
| "learning_rate": 2.642118431782537e-05, | |
| "loss": 0.3874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3951292335987091, | |
| "step": 1985, | |
| "valid_targets_mean": 3185.3, | |
| "valid_targets_min": 985 | |
| }, | |
| { | |
| "epoch": 3.2045088566827697, | |
| "grad_norm": 0.7017080619872773, | |
| "learning_rate": 2.6345077697525394e-05, | |
| "loss": 0.3695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.347123384475708, | |
| "step": 1990, | |
| "valid_targets_mean": 3075.4, | |
| "valid_targets_min": 1376 | |
| }, | |
| { | |
| "epoch": 3.21256038647343, | |
| "grad_norm": 0.8231794679729749, | |
| "learning_rate": 2.6268868776568416e-05, | |
| "loss": 0.4001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35584574937820435, | |
| "step": 1995, | |
| "valid_targets_mean": 2823.5, | |
| "valid_targets_min": 1187 | |
| }, | |
| { | |
| "epoch": 3.2206119162640903, | |
| "grad_norm": 0.7683051537663695, | |
| "learning_rate": 2.619255878365849e-05, | |
| "loss": 0.3626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33899423480033875, | |
| "step": 2000, | |
| "valid_targets_mean": 3121.4, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 3.2286634460547505, | |
| "grad_norm": 1.0228623510526746, | |
| "learning_rate": 2.6116148949129237e-05, | |
| "loss": 0.3926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3965111970901489, | |
| "step": 2005, | |
| "valid_targets_mean": 3859.0, | |
| "valid_targets_min": 1457 | |
| }, | |
| { | |
| "epoch": 3.236714975845411, | |
| "grad_norm": 0.8910998328193267, | |
| "learning_rate": 2.603964050492401e-05, | |
| "loss": 0.3947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4256768822669983, | |
| "step": 2010, | |
| "valid_targets_mean": 2689.9, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 3.244766505636071, | |
| "grad_norm": 0.7345012383668745, | |
| "learning_rate": 2.5963034684576024e-05, | |
| "loss": 0.3855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3804903030395508, | |
| "step": 2015, | |
| "valid_targets_mean": 3814.0, | |
| "valid_targets_min": 1182 | |
| }, | |
| { | |
| "epoch": 3.2528180354267313, | |
| "grad_norm": 0.7678805707006796, | |
| "learning_rate": 2.5886332723188484e-05, | |
| "loss": 0.3777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36300283670425415, | |
| "step": 2020, | |
| "valid_targets_mean": 3139.2, | |
| "valid_targets_min": 999 | |
| }, | |
| { | |
| "epoch": 3.260869565217391, | |
| "grad_norm": 0.8103254411230125, | |
| "learning_rate": 2.5809535857414637e-05, | |
| "loss": 0.3704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3861234784126282, | |
| "step": 2025, | |
| "valid_targets_mean": 3032.6, | |
| "valid_targets_min": 1346 | |
| }, | |
| { | |
| "epoch": 3.2689210950080514, | |
| "grad_norm": 0.8485120319760017, | |
| "learning_rate": 2.573264532543788e-05, | |
| "loss": 0.3716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.399763822555542, | |
| "step": 2030, | |
| "valid_targets_mean": 2960.4, | |
| "valid_targets_min": 1292 | |
| }, | |
| { | |
| "epoch": 3.2769726247987117, | |
| "grad_norm": 0.7941028816750529, | |
| "learning_rate": 2.5655662366951778e-05, | |
| "loss": 0.3836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38156741857528687, | |
| "step": 2035, | |
| "valid_targets_mean": 3008.5, | |
| "valid_targets_min": 1031 | |
| }, | |
| { | |
| "epoch": 3.285024154589372, | |
| "grad_norm": 0.7517179718878059, | |
| "learning_rate": 2.557858822314007e-05, | |
| "loss": 0.3784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3794868588447571, | |
| "step": 2040, | |
| "valid_targets_mean": 2940.0, | |
| "valid_targets_min": 1404 | |
| }, | |
| { | |
| "epoch": 3.293075684380032, | |
| "grad_norm": 0.7474671950243801, | |
| "learning_rate": 2.5501424136656635e-05, | |
| "loss": 0.3627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3578556180000305, | |
| "step": 2045, | |
| "valid_targets_mean": 2994.5, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 3.3011272141706924, | |
| "grad_norm": 0.8073459873948605, | |
| "learning_rate": 2.5424171351605518e-05, | |
| "loss": 0.3917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35137873888015747, | |
| "step": 2050, | |
| "valid_targets_mean": 2981.6, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 3.3091787439613527, | |
| "grad_norm": 0.7363545984797931, | |
| "learning_rate": 2.5346831113520827e-05, | |
| "loss": 0.3558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33045268058776855, | |
| "step": 2055, | |
| "valid_targets_mean": 2965.2, | |
| "valid_targets_min": 1119 | |
| }, | |
| { | |
| "epoch": 3.317230273752013, | |
| "grad_norm": 0.8551622649502074, | |
| "learning_rate": 2.526940466934664e-05, | |
| "loss": 0.3791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4106612801551819, | |
| "step": 2060, | |
| "valid_targets_mean": 2819.8, | |
| "valid_targets_min": 1638 | |
| }, | |
| { | |
| "epoch": 3.325281803542673, | |
| "grad_norm": 0.8197438218947848, | |
| "learning_rate": 2.5191893267416964e-05, | |
| "loss": 0.3761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35730135440826416, | |
| "step": 2065, | |
| "valid_targets_mean": 3237.6, | |
| "valid_targets_min": 1255 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.8675787777451592, | |
| "learning_rate": 2.5114298157435526e-05, | |
| "loss": 0.3704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3833295404911041, | |
| "step": 2070, | |
| "valid_targets_mean": 3031.8, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 3.3413848631239937, | |
| "grad_norm": 0.7989238817393547, | |
| "learning_rate": 2.503662059045568e-05, | |
| "loss": 0.3959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39606934785842896, | |
| "step": 2075, | |
| "valid_targets_mean": 2751.6, | |
| "valid_targets_min": 1349 | |
| }, | |
| { | |
| "epoch": 3.3494363929146536, | |
| "grad_norm": 0.7649931205299405, | |
| "learning_rate": 2.4958861818860217e-05, | |
| "loss": 0.3756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3948572874069214, | |
| "step": 2080, | |
| "valid_targets_mean": 2998.9, | |
| "valid_targets_min": 1344 | |
| }, | |
| { | |
| "epoch": 3.357487922705314, | |
| "grad_norm": 0.6957699032853977, | |
| "learning_rate": 2.488102309634119e-05, | |
| "loss": 0.3733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36002251505851746, | |
| "step": 2085, | |
| "valid_targets_mean": 3551.8, | |
| "valid_targets_min": 1310 | |
| }, | |
| { | |
| "epoch": 3.365539452495974, | |
| "grad_norm": 0.8172302341279781, | |
| "learning_rate": 2.480310567787967e-05, | |
| "loss": 0.3891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3608362078666687, | |
| "step": 2090, | |
| "valid_targets_mean": 2457.2, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 3.3735909822866343, | |
| "grad_norm": 0.6603911880036989, | |
| "learning_rate": 2.4725110819725542e-05, | |
| "loss": 0.3467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34838154911994934, | |
| "step": 2095, | |
| "valid_targets_mean": 4338.5, | |
| "valid_targets_min": 1915 | |
| }, | |
| { | |
| "epoch": 3.3816425120772946, | |
| "grad_norm": 0.7013111479678409, | |
| "learning_rate": 2.464703977937723e-05, | |
| "loss": 0.339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3159921169281006, | |
| "step": 2100, | |
| "valid_targets_mean": 3443.4, | |
| "valid_targets_min": 1224 | |
| }, | |
| { | |
| "epoch": 3.389694041867955, | |
| "grad_norm": 0.7271690127494458, | |
| "learning_rate": 2.456889381556144e-05, | |
| "loss": 0.3431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3470936417579651, | |
| "step": 2105, | |
| "valid_targets_mean": 3482.4, | |
| "valid_targets_min": 1451 | |
| }, | |
| { | |
| "epoch": 3.397745571658615, | |
| "grad_norm": 0.6846909480553407, | |
| "learning_rate": 2.449067418821285e-05, | |
| "loss": 0.3453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3760640025138855, | |
| "step": 2110, | |
| "valid_targets_mean": 3535.4, | |
| "valid_targets_min": 1258 | |
| }, | |
| { | |
| "epoch": 3.4057971014492754, | |
| "grad_norm": 0.6498807742334415, | |
| "learning_rate": 2.4412382158453807e-05, | |
| "loss": 0.3209, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31845319271087646, | |
| "step": 2115, | |
| "valid_targets_mean": 3810.8, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 3.4138486312399356, | |
| "grad_norm": 0.7280328109327153, | |
| "learning_rate": 2.4334018988573983e-05, | |
| "loss": 0.3395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33589398860931396, | |
| "step": 2120, | |
| "valid_targets_mean": 3309.2, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 3.421900161030596, | |
| "grad_norm": 0.5931265791786551, | |
| "learning_rate": 2.425558594201004e-05, | |
| "loss": 0.3142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26291149854660034, | |
| "step": 2125, | |
| "valid_targets_mean": 4325.1, | |
| "valid_targets_min": 1575 | |
| }, | |
| { | |
| "epoch": 3.429951690821256, | |
| "grad_norm": 0.7111318648584927, | |
| "learning_rate": 2.417708428332525e-05, | |
| "loss": 0.3171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34463876485824585, | |
| "step": 2130, | |
| "valid_targets_mean": 3129.6, | |
| "valid_targets_min": 1545 | |
| }, | |
| { | |
| "epoch": 3.4380032206119164, | |
| "grad_norm": 0.6250487281091418, | |
| "learning_rate": 2.4098515278189097e-05, | |
| "loss": 0.3156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3047589063644409, | |
| "step": 2135, | |
| "valid_targets_mean": 4161.9, | |
| "valid_targets_min": 1557 | |
| }, | |
| { | |
| "epoch": 3.4460547504025767, | |
| "grad_norm": 0.626198180719724, | |
| "learning_rate": 2.4019880193356902e-05, | |
| "loss": 0.3293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3424261808395386, | |
| "step": 2140, | |
| "valid_targets_mean": 3786.2, | |
| "valid_targets_min": 1129 | |
| }, | |
| { | |
| "epoch": 3.454106280193237, | |
| "grad_norm": 0.6155081722038326, | |
| "learning_rate": 2.3941180296649348e-05, | |
| "loss": 0.3243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31505197286605835, | |
| "step": 2145, | |
| "valid_targets_mean": 4313.6, | |
| "valid_targets_min": 1130 | |
| }, | |
| { | |
| "epoch": 3.4621578099838968, | |
| "grad_norm": 0.6956667846707119, | |
| "learning_rate": 2.3862416856932087e-05, | |
| "loss": 0.3203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32704436779022217, | |
| "step": 2150, | |
| "valid_targets_mean": 3571.6, | |
| "valid_targets_min": 1459 | |
| }, | |
| { | |
| "epoch": 3.470209339774557, | |
| "grad_norm": 0.7061438709341965, | |
| "learning_rate": 2.378359114409527e-05, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3402191996574402, | |
| "step": 2155, | |
| "valid_targets_mean": 3576.9, | |
| "valid_targets_min": 1717 | |
| }, | |
| { | |
| "epoch": 3.4782608695652173, | |
| "grad_norm": 0.6733367985439651, | |
| "learning_rate": 2.370470442903306e-05, | |
| "loss": 0.3334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29829442501068115, | |
| "step": 2160, | |
| "valid_targets_mean": 3445.9, | |
| "valid_targets_min": 1340 | |
| }, | |
| { | |
| "epoch": 3.4863123993558776, | |
| "grad_norm": 0.7199769182800366, | |
| "learning_rate": 2.362575798362315e-05, | |
| "loss": 0.3245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3266036808490753, | |
| "step": 2165, | |
| "valid_targets_mean": 3381.9, | |
| "valid_targets_min": 968 | |
| }, | |
| { | |
| "epoch": 3.494363929146538, | |
| "grad_norm": 0.762977409147056, | |
| "learning_rate": 2.3546753080706242e-05, | |
| "loss": 0.3162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32624316215515137, | |
| "step": 2170, | |
| "valid_targets_mean": 3398.4, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 3.502415458937198, | |
| "grad_norm": 0.7356969210292927, | |
| "learning_rate": 2.346769099406557e-05, | |
| "loss": 0.3061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3401956260204315, | |
| "step": 2175, | |
| "valid_targets_mean": 3281.4, | |
| "valid_targets_min": 1153 | |
| }, | |
| { | |
| "epoch": 3.5104669887278583, | |
| "grad_norm": 0.6884173861250173, | |
| "learning_rate": 2.33885729984063e-05, | |
| "loss": 0.3222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33478766679763794, | |
| "step": 2180, | |
| "valid_targets_mean": 3259.2, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 3.5185185185185186, | |
| "grad_norm": 0.6833192926719988, | |
| "learning_rate": 2.3309400369335033e-05, | |
| "loss": 0.3293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29600557684898376, | |
| "step": 2185, | |
| "valid_targets_mean": 3833.4, | |
| "valid_targets_min": 1495 | |
| }, | |
| { | |
| "epoch": 3.526570048309179, | |
| "grad_norm": 0.6951855497662062, | |
| "learning_rate": 2.3230174383339196e-05, | |
| "loss": 0.3115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3087206780910492, | |
| "step": 2190, | |
| "valid_targets_mean": 3652.0, | |
| "valid_targets_min": 1088 | |
| }, | |
| { | |
| "epoch": 3.534621578099839, | |
| "grad_norm": 0.6613560062519789, | |
| "learning_rate": 2.3150896317766505e-05, | |
| "loss": 0.306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2983492612838745, | |
| "step": 2195, | |
| "valid_targets_mean": 3762.8, | |
| "valid_targets_min": 1512 | |
| }, | |
| { | |
| "epoch": 3.542673107890499, | |
| "grad_norm": 0.68550903113913, | |
| "learning_rate": 2.3071567450804325e-05, | |
| "loss": 0.3477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35058295726776123, | |
| "step": 2200, | |
| "valid_targets_mean": 3278.4, | |
| "valid_targets_min": 1405 | |
| }, | |
| { | |
| "epoch": 3.550724637681159, | |
| "grad_norm": 0.8603392885960655, | |
| "learning_rate": 2.299218906145909e-05, | |
| "loss": 0.3221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34910333156585693, | |
| "step": 2205, | |
| "valid_targets_mean": 2441.1, | |
| "valid_targets_min": 1235 | |
| }, | |
| { | |
| "epoch": 3.5587761674718195, | |
| "grad_norm": 0.6801260592469724, | |
| "learning_rate": 2.2912762429535684e-05, | |
| "loss": 0.2986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2981488108634949, | |
| "step": 2210, | |
| "valid_targets_mean": 3300.4, | |
| "valid_targets_min": 1317 | |
| }, | |
| { | |
| "epoch": 3.5668276972624797, | |
| "grad_norm": 0.7979499575329786, | |
| "learning_rate": 2.2833288835616784e-05, | |
| "loss": 0.3221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3507813513278961, | |
| "step": 2215, | |
| "valid_targets_mean": 3180.9, | |
| "valid_targets_min": 1377 | |
| }, | |
| { | |
| "epoch": 3.57487922705314, | |
| "grad_norm": 0.7589237518218688, | |
| "learning_rate": 2.2753769561042235e-05, | |
| "loss": 0.3005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2893664240837097, | |
| "step": 2220, | |
| "valid_targets_mean": 2736.6, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 3.5829307568438002, | |
| "grad_norm": 0.7842484155012712, | |
| "learning_rate": 2.2674205887888386e-05, | |
| "loss": 0.3236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34598594903945923, | |
| "step": 2225, | |
| "valid_targets_mean": 2817.1, | |
| "valid_targets_min": 1021 | |
| }, | |
| { | |
| "epoch": 3.5909822866344605, | |
| "grad_norm": 0.7553985888978503, | |
| "learning_rate": 2.259459909894742e-05, | |
| "loss": 0.3366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3397219777107239, | |
| "step": 2230, | |
| "valid_targets_mean": 3038.3, | |
| "valid_targets_min": 1150 | |
| }, | |
| { | |
| "epoch": 3.5990338164251208, | |
| "grad_norm": 0.6631101299213469, | |
| "learning_rate": 2.2514950477706657e-05, | |
| "loss": 0.301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2631319761276245, | |
| "step": 2235, | |
| "valid_targets_mean": 3378.8, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 3.607085346215781, | |
| "grad_norm": 0.6917879399712983, | |
| "learning_rate": 2.2435261308327875e-05, | |
| "loss": 0.2979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2921825051307678, | |
| "step": 2240, | |
| "valid_targets_mean": 3375.1, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 3.6151368760064413, | |
| "grad_norm": 0.6646988908267994, | |
| "learning_rate": 2.2355532875626612e-05, | |
| "loss": 0.3218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28387004137039185, | |
| "step": 2245, | |
| "valid_targets_mean": 3651.2, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 3.6231884057971016, | |
| "grad_norm": 0.7566641430033721, | |
| "learning_rate": 2.2275766465051444e-05, | |
| "loss": 0.3134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3653978705406189, | |
| "step": 2250, | |
| "valid_targets_mean": 3000.1, | |
| "valid_targets_min": 937 | |
| }, | |
| { | |
| "epoch": 3.631239935587762, | |
| "grad_norm": 0.640762301083058, | |
| "learning_rate": 2.2195963362663236e-05, | |
| "loss": 0.2918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2885698080062866, | |
| "step": 2255, | |
| "valid_targets_mean": 4016.1, | |
| "valid_targets_min": 1710 | |
| }, | |
| { | |
| "epoch": 3.639291465378422, | |
| "grad_norm": 0.7312405714684358, | |
| "learning_rate": 2.211612485511446e-05, | |
| "loss": 0.3109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3239280581474304, | |
| "step": 2260, | |
| "valid_targets_mean": 3113.6, | |
| "valid_targets_min": 1036 | |
| }, | |
| { | |
| "epoch": 3.6473429951690823, | |
| "grad_norm": 0.6797552789086924, | |
| "learning_rate": 2.2036252229628392e-05, | |
| "loss": 0.3208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30023393034935, | |
| "step": 2265, | |
| "valid_targets_mean": 3113.1, | |
| "valid_targets_min": 1421 | |
| }, | |
| { | |
| "epoch": 3.6553945249597426, | |
| "grad_norm": 0.7033890984196507, | |
| "learning_rate": 2.19563467739784e-05, | |
| "loss": 0.3128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33096373081207275, | |
| "step": 2270, | |
| "valid_targets_mean": 3861.4, | |
| "valid_targets_min": 1570 | |
| }, | |
| { | |
| "epoch": 3.6634460547504024, | |
| "grad_norm": 0.693299418919331, | |
| "learning_rate": 2.1876409776467165e-05, | |
| "loss": 0.3152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31812596321105957, | |
| "step": 2275, | |
| "valid_targets_mean": 3224.8, | |
| "valid_targets_min": 1644 | |
| }, | |
| { | |
| "epoch": 3.6714975845410627, | |
| "grad_norm": 0.7050830766977225, | |
| "learning_rate": 2.1796442525905923e-05, | |
| "loss": 0.3029, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2968364953994751, | |
| "step": 2280, | |
| "valid_targets_mean": 3072.1, | |
| "valid_targets_min": 1239 | |
| }, | |
| { | |
| "epoch": 3.679549114331723, | |
| "grad_norm": 0.7092053234510587, | |
| "learning_rate": 2.171644631159366e-05, | |
| "loss": 0.3251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33621740341186523, | |
| "step": 2285, | |
| "valid_targets_mean": 3552.6, | |
| "valid_targets_min": 1180 | |
| }, | |
| { | |
| "epoch": 3.687600644122383, | |
| "grad_norm": 0.7327779149808457, | |
| "learning_rate": 2.163642242329633e-05, | |
| "loss": 0.3097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3615042567253113, | |
| "step": 2290, | |
| "valid_targets_mean": 3199.9, | |
| "valid_targets_min": 1402 | |
| }, | |
| { | |
| "epoch": 3.6956521739130435, | |
| "grad_norm": 0.6747000049468731, | |
| "learning_rate": 2.1556372151226097e-05, | |
| "loss": 0.3444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30473870038986206, | |
| "step": 2295, | |
| "valid_targets_mean": 3625.8, | |
| "valid_targets_min": 1668 | |
| }, | |
| { | |
| "epoch": 3.7037037037037037, | |
| "grad_norm": 0.6905439473556321, | |
| "learning_rate": 2.1476296786020502e-05, | |
| "loss": 0.3079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3153340816497803, | |
| "step": 2300, | |
| "valid_targets_mean": 3234.1, | |
| "valid_targets_min": 1476 | |
| }, | |
| { | |
| "epoch": 3.711755233494364, | |
| "grad_norm": 0.757410046151389, | |
| "learning_rate": 2.139619761872163e-05, | |
| "loss": 0.3156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32494598627090454, | |
| "step": 2305, | |
| "valid_targets_mean": 3108.1, | |
| "valid_targets_min": 1329 | |
| }, | |
| { | |
| "epoch": 3.7198067632850242, | |
| "grad_norm": 0.6333814485490104, | |
| "learning_rate": 2.1316075940755363e-05, | |
| "loss": 0.3101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30045998096466064, | |
| "step": 2310, | |
| "valid_targets_mean": 4108.0, | |
| "valid_targets_min": 1439 | |
| }, | |
| { | |
| "epoch": 3.7278582930756845, | |
| "grad_norm": 0.8569424628003814, | |
| "learning_rate": 2.1235933043910488e-05, | |
| "loss": 0.3102, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3410085439682007, | |
| "step": 2315, | |
| "valid_targets_mean": 2746.4, | |
| "valid_targets_min": 965 | |
| }, | |
| { | |
| "epoch": 3.7359098228663448, | |
| "grad_norm": 0.7003250678607069, | |
| "learning_rate": 2.1155770220317918e-05, | |
| "loss": 0.3087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29915452003479004, | |
| "step": 2320, | |
| "valid_targets_mean": 3250.2, | |
| "valid_targets_min": 1373 | |
| }, | |
| { | |
| "epoch": 3.7439613526570046, | |
| "grad_norm": 0.731064940807403, | |
| "learning_rate": 2.107558876242983e-05, | |
| "loss": 0.3152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.330859899520874, | |
| "step": 2325, | |
| "valid_targets_mean": 3688.2, | |
| "valid_targets_min": 1267 | |
| }, | |
| { | |
| "epoch": 3.752012882447665, | |
| "grad_norm": 0.6231472245606579, | |
| "learning_rate": 2.0995389962998845e-05, | |
| "loss": 0.301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20000654458999634, | |
| "step": 2330, | |
| "valid_targets_mean": 4773.8, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 3.760064412238325, | |
| "grad_norm": 1.1067173824667762, | |
| "learning_rate": 2.091517511505719e-05, | |
| "loss": 0.2102, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1921842396259308, | |
| "step": 2335, | |
| "valid_targets_mean": 5335.0, | |
| "valid_targets_min": 2383 | |
| }, | |
| { | |
| "epoch": 3.7681159420289854, | |
| "grad_norm": 0.4630108390914341, | |
| "learning_rate": 2.0834945511895816e-05, | |
| "loss": 0.1852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1726524531841278, | |
| "step": 2340, | |
| "valid_targets_mean": 5678.3, | |
| "valid_targets_min": 2928 | |
| }, | |
| { | |
| "epoch": 3.7761674718196456, | |
| "grad_norm": 0.5122162205986636, | |
| "learning_rate": 2.0754702447043585e-05, | |
| "loss": 0.1906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21546904742717743, | |
| "step": 2345, | |
| "valid_targets_mean": 6460.8, | |
| "valid_targets_min": 2335 | |
| }, | |
| { | |
| "epoch": 3.784219001610306, | |
| "grad_norm": 0.43207305254475, | |
| "learning_rate": 2.0674447214246394e-05, | |
| "loss": 0.2006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15075618028640747, | |
| "step": 2350, | |
| "valid_targets_mean": 6145.8, | |
| "valid_targets_min": 3540 | |
| }, | |
| { | |
| "epoch": 3.792270531400966, | |
| "grad_norm": 0.4774154606281994, | |
| "learning_rate": 2.059418110744633e-05, | |
| "loss": 0.1845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17323094606399536, | |
| "step": 2355, | |
| "valid_targets_mean": 5129.8, | |
| "valid_targets_min": 3650 | |
| }, | |
| { | |
| "epoch": 3.8003220611916264, | |
| "grad_norm": 0.46983051174721563, | |
| "learning_rate": 2.0513905420760798e-05, | |
| "loss": 0.1738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1917821168899536, | |
| "step": 2360, | |
| "valid_targets_mean": 6283.1, | |
| "valid_targets_min": 3367 | |
| }, | |
| { | |
| "epoch": 3.8083735909822867, | |
| "grad_norm": 0.4902336033194191, | |
| "learning_rate": 2.043362144846164e-05, | |
| "loss": 0.1973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25147587060928345, | |
| "step": 2365, | |
| "valid_targets_mean": 5965.1, | |
| "valid_targets_min": 3021 | |
| }, | |
| { | |
| "epoch": 3.816425120772947, | |
| "grad_norm": 0.42280694124833545, | |
| "learning_rate": 2.035333048495431e-05, | |
| "loss": 0.2166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20615729689598083, | |
| "step": 2370, | |
| "valid_targets_mean": 7093.3, | |
| "valid_targets_min": 2985 | |
| }, | |
| { | |
| "epoch": 3.824476650563607, | |
| "grad_norm": 0.4889232418853953, | |
| "learning_rate": 2.0273033824756964e-05, | |
| "loss": 0.1792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2106115221977234, | |
| "step": 2375, | |
| "valid_targets_mean": 5944.9, | |
| "valid_targets_min": 3130 | |
| }, | |
| { | |
| "epoch": 3.8325281803542675, | |
| "grad_norm": 0.4486166262691516, | |
| "learning_rate": 2.0192732762479616e-05, | |
| "loss": 0.1848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20369920134544373, | |
| "step": 2380, | |
| "valid_targets_mean": 6768.1, | |
| "valid_targets_min": 2346 | |
| }, | |
| { | |
| "epoch": 3.8405797101449277, | |
| "grad_norm": 0.5052893828675203, | |
| "learning_rate": 2.011242859280325e-05, | |
| "loss": 0.18, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20773877203464508, | |
| "step": 2385, | |
| "valid_targets_mean": 5635.4, | |
| "valid_targets_min": 3421 | |
| }, | |
| { | |
| "epoch": 3.848631239935588, | |
| "grad_norm": 0.552069432523759, | |
| "learning_rate": 2.0032122610458947e-05, | |
| "loss": 0.1882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16113221645355225, | |
| "step": 2390, | |
| "valid_targets_mean": 4650.4, | |
| "valid_targets_min": 2970 | |
| }, | |
| { | |
| "epoch": 3.8566827697262482, | |
| "grad_norm": 0.524046615447888, | |
| "learning_rate": 1.9951816110207004e-05, | |
| "loss": 0.1839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20094186067581177, | |
| "step": 2395, | |
| "valid_targets_mean": 5519.9, | |
| "valid_targets_min": 3054 | |
| }, | |
| { | |
| "epoch": 3.864734299516908, | |
| "grad_norm": 0.48693523721557375, | |
| "learning_rate": 1.9871510386816103e-05, | |
| "loss": 0.1811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18589508533477783, | |
| "step": 2400, | |
| "valid_targets_mean": 6197.9, | |
| "valid_targets_min": 3424 | |
| }, | |
| { | |
| "epoch": 3.8727858293075683, | |
| "grad_norm": 0.49674650842025453, | |
| "learning_rate": 1.979120673504235e-05, | |
| "loss": 0.1806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16874763369560242, | |
| "step": 2405, | |
| "valid_targets_mean": 5672.4, | |
| "valid_targets_min": 3860 | |
| }, | |
| { | |
| "epoch": 3.8808373590982286, | |
| "grad_norm": 0.5935011533211295, | |
| "learning_rate": 1.9710906449608498e-05, | |
| "loss": 0.1782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16884219646453857, | |
| "step": 2410, | |
| "valid_targets_mean": 5892.7, | |
| "valid_targets_min": 2898 | |
| }, | |
| { | |
| "epoch": 3.888888888888889, | |
| "grad_norm": 0.4678758596506749, | |
| "learning_rate": 1.9630610825182992e-05, | |
| "loss": 0.1789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1843390017747879, | |
| "step": 2415, | |
| "valid_targets_mean": 5762.8, | |
| "valid_targets_min": 3670 | |
| }, | |
| { | |
| "epoch": 3.896940418679549, | |
| "grad_norm": 0.466433159436347, | |
| "learning_rate": 1.955032115635915e-05, | |
| "loss": 0.1733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16634991765022278, | |
| "step": 2420, | |
| "valid_targets_mean": 6079.9, | |
| "valid_targets_min": 3135 | |
| }, | |
| { | |
| "epoch": 3.9049919484702094, | |
| "grad_norm": 0.4703450305531825, | |
| "learning_rate": 1.9470038737634257e-05, | |
| "loss": 0.1817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18430741131305695, | |
| "step": 2425, | |
| "valid_targets_mean": 5954.4, | |
| "valid_targets_min": 2502 | |
| }, | |
| { | |
| "epoch": 3.9130434782608696, | |
| "grad_norm": 0.4988164707549782, | |
| "learning_rate": 1.9389764863388706e-05, | |
| "loss": 0.1794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18756403028964996, | |
| "step": 2430, | |
| "valid_targets_mean": 5311.7, | |
| "valid_targets_min": 3278 | |
| }, | |
| { | |
| "epoch": 3.92109500805153, | |
| "grad_norm": 0.4630341885874764, | |
| "learning_rate": 1.9309500827865136e-05, | |
| "loss": 0.1745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15896305441856384, | |
| "step": 2435, | |
| "valid_targets_mean": 5995.9, | |
| "valid_targets_min": 2819 | |
| }, | |
| { | |
| "epoch": 3.92914653784219, | |
| "grad_norm": 0.4913791539120621, | |
| "learning_rate": 1.9229247925147553e-05, | |
| "loss": 0.1751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16495642066001892, | |
| "step": 2440, | |
| "valid_targets_mean": 5220.8, | |
| "valid_targets_min": 3358 | |
| }, | |
| { | |
| "epoch": 3.9371980676328504, | |
| "grad_norm": 0.46392543060132874, | |
| "learning_rate": 1.9149007449140462e-05, | |
| "loss": 0.19, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17253006994724274, | |
| "step": 2445, | |
| "valid_targets_mean": 4946.7, | |
| "valid_targets_min": 2665 | |
| }, | |
| { | |
| "epoch": 3.9452495974235102, | |
| "grad_norm": 0.4222579875698941, | |
| "learning_rate": 1.906878069354804e-05, | |
| "loss": 0.189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16640131175518036, | |
| "step": 2450, | |
| "valid_targets_mean": 6109.9, | |
| "valid_targets_min": 2762 | |
| }, | |
| { | |
| "epoch": 3.9533011272141705, | |
| "grad_norm": 0.5936694712109022, | |
| "learning_rate": 1.898856895185322e-05, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1677541732788086, | |
| "step": 2455, | |
| "valid_targets_mean": 5266.1, | |
| "valid_targets_min": 3731 | |
| }, | |
| { | |
| "epoch": 3.9613526570048307, | |
| "grad_norm": 0.5089530035696851, | |
| "learning_rate": 1.8908373517296888e-05, | |
| "loss": 0.1739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17881962656974792, | |
| "step": 2460, | |
| "valid_targets_mean": 6246.1, | |
| "valid_targets_min": 2970 | |
| }, | |
| { | |
| "epoch": 3.969404186795491, | |
| "grad_norm": 0.4163908705213259, | |
| "learning_rate": 1.882819568285701e-05, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16383564472198486, | |
| "step": 2465, | |
| "valid_targets_mean": 6126.9, | |
| "valid_targets_min": 3345 | |
| }, | |
| { | |
| "epoch": 3.9774557165861513, | |
| "grad_norm": 0.5352174387805062, | |
| "learning_rate": 1.874803674122778e-05, | |
| "loss": 0.1716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1686192899942398, | |
| "step": 2470, | |
| "valid_targets_mean": 5409.5, | |
| "valid_targets_min": 2785 | |
| }, | |
| { | |
| "epoch": 3.9855072463768115, | |
| "grad_norm": 0.5174379593558874, | |
| "learning_rate": 1.8667897984798804e-05, | |
| "loss": 0.1864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20292040705680847, | |
| "step": 2475, | |
| "valid_targets_mean": 6526.0, | |
| "valid_targets_min": 3724 | |
| }, | |
| { | |
| "epoch": 3.993558776167472, | |
| "grad_norm": 0.46174047001815965, | |
| "learning_rate": 1.858778070563422e-05, | |
| "loss": 0.1668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1856258511543274, | |
| "step": 2480, | |
| "valid_targets_mean": 5751.2, | |
| "valid_targets_min": 3683 | |
| }, | |
| { | |
| "epoch": 4.001610305958132, | |
| "grad_norm": 0.486458689377258, | |
| "learning_rate": 1.8507686195451918e-05, | |
| "loss": 0.1927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22269584238529205, | |
| "step": 2485, | |
| "valid_targets_mean": 6743.1, | |
| "valid_targets_min": 2494 | |
| }, | |
| { | |
| "epoch": 4.009661835748792, | |
| "grad_norm": 0.49067053864038795, | |
| "learning_rate": 1.8427615745602667e-05, | |
| "loss": 0.2628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24867583811283112, | |
| "step": 2490, | |
| "valid_targets_mean": 6679.0, | |
| "valid_targets_min": 2802 | |
| }, | |
| { | |
| "epoch": 4.017713365539453, | |
| "grad_norm": 0.48082700217324775, | |
| "learning_rate": 1.834757064704933e-05, | |
| "loss": 0.2484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2587515413761139, | |
| "step": 2495, | |
| "valid_targets_mean": 6534.6, | |
| "valid_targets_min": 2100 | |
| }, | |
| { | |
| "epoch": 4.025764895330113, | |
| "grad_norm": 0.463238335944571, | |
| "learning_rate": 1.826755219034603e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24451354146003723, | |
| "step": 2500, | |
| "valid_targets_mean": 6190.5, | |
| "valid_targets_min": 2430 | |
| }, | |
| { | |
| "epoch": 4.033816425120773, | |
| "grad_norm": 0.47115946029794054, | |
| "learning_rate": 1.818756166561733e-05, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2353125512599945, | |
| "step": 2505, | |
| "valid_targets_mean": 6432.6, | |
| "valid_targets_min": 1969 | |
| }, | |
| { | |
| "epoch": 4.041867954911433, | |
| "grad_norm": 0.5556347094263723, | |
| "learning_rate": 1.8107600362537473e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25839778780937195, | |
| "step": 2510, | |
| "valid_targets_mean": 4996.2, | |
| "valid_targets_min": 695 | |
| }, | |
| { | |
| "epoch": 4.049919484702094, | |
| "grad_norm": 0.4828357803593343, | |
| "learning_rate": 1.8027669570309572e-05, | |
| "loss": 0.2532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28702059388160706, | |
| "step": 2515, | |
| "valid_targets_mean": 7438.4, | |
| "valid_targets_min": 3650 | |
| }, | |
| { | |
| "epoch": 4.057971014492754, | |
| "grad_norm": 0.4732689439450728, | |
| "learning_rate": 1.7947770577644787e-05, | |
| "loss": 0.2576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24621464312076569, | |
| "step": 2520, | |
| "valid_targets_mean": 5771.1, | |
| "valid_targets_min": 2778 | |
| }, | |
| { | |
| "epoch": 4.066022544283414, | |
| "grad_norm": 0.51704075854913, | |
| "learning_rate": 1.786790467274161e-05, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27112066745758057, | |
| "step": 2525, | |
| "valid_targets_mean": 6732.4, | |
| "valid_targets_min": 880 | |
| }, | |
| { | |
| "epoch": 4.074074074074074, | |
| "grad_norm": 0.4888484890427802, | |
| "learning_rate": 1.778807314326505e-05, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2849244475364685, | |
| "step": 2530, | |
| "valid_targets_mean": 6322.0, | |
| "valid_targets_min": 2822 | |
| }, | |
| { | |
| "epoch": 4.082125603864735, | |
| "grad_norm": 0.5118382404505906, | |
| "learning_rate": 1.7708277276325886e-05, | |
| "loss": 0.2316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2283010631799698, | |
| "step": 2535, | |
| "valid_targets_mean": 6036.7, | |
| "valid_targets_min": 1334 | |
| }, | |
| { | |
| "epoch": 4.090177133655395, | |
| "grad_norm": 0.47873074069899385, | |
| "learning_rate": 1.762851835845992e-05, | |
| "loss": 0.24, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20961812138557434, | |
| "step": 2540, | |
| "valid_targets_mean": 5966.9, | |
| "valid_targets_min": 3109 | |
| }, | |
| { | |
| "epoch": 4.098228663446055, | |
| "grad_norm": 0.5079150029764018, | |
| "learning_rate": 1.754879767560723e-05, | |
| "loss": 0.2445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23400455713272095, | |
| "step": 2545, | |
| "valid_targets_mean": 5874.7, | |
| "valid_targets_min": 2080 | |
| }, | |
| { | |
| "epoch": 4.106280193236715, | |
| "grad_norm": 0.4709937790052201, | |
| "learning_rate": 1.746911651309144e-05, | |
| "loss": 0.2479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23563805222511292, | |
| "step": 2550, | |
| "valid_targets_mean": 5984.3, | |
| "valid_targets_min": 2278 | |
| }, | |
| { | |
| "epoch": 4.114331723027375, | |
| "grad_norm": 0.5582460016862327, | |
| "learning_rate": 1.7389476155598974e-05, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.261229932308197, | |
| "step": 2555, | |
| "valid_targets_mean": 4817.7, | |
| "valid_targets_min": 1991 | |
| }, | |
| { | |
| "epoch": 4.122383252818035, | |
| "grad_norm": 0.4613472981940905, | |
| "learning_rate": 1.7309877887158388e-05, | |
| "loss": 0.2386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22766956686973572, | |
| "step": 2560, | |
| "valid_targets_mean": 6135.9, | |
| "valid_targets_min": 1736 | |
| }, | |
| { | |
| "epoch": 4.130434782608695, | |
| "grad_norm": 0.5809580379347714, | |
| "learning_rate": 1.723032299111964e-05, | |
| "loss": 0.2284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2500440180301666, | |
| "step": 2565, | |
| "valid_targets_mean": 5818.6, | |
| "valid_targets_min": 2083 | |
| }, | |
| { | |
| "epoch": 4.138486312399356, | |
| "grad_norm": 0.9151207208782325, | |
| "learning_rate": 1.7150812750133382e-05, | |
| "loss": 0.3601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3996202051639557, | |
| "step": 2570, | |
| "valid_targets_mean": 3210.2, | |
| "valid_targets_min": 1526 | |
| }, | |
| { | |
| "epoch": 4.146537842190016, | |
| "grad_norm": 0.8283372169743392, | |
| "learning_rate": 1.707134844613032e-05, | |
| "loss": 0.3757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34221383929252625, | |
| "step": 2575, | |
| "valid_targets_mean": 2571.2, | |
| "valid_targets_min": 1220 | |
| }, | |
| { | |
| "epoch": 4.154589371980676, | |
| "grad_norm": 0.7100117027673157, | |
| "learning_rate": 1.699193136030052e-05, | |
| "loss": 0.3546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34208422899246216, | |
| "step": 2580, | |
| "valid_targets_mean": 3851.6, | |
| "valid_targets_min": 1610 | |
| }, | |
| { | |
| "epoch": 4.162640901771336, | |
| "grad_norm": 0.7684487763500514, | |
| "learning_rate": 1.6912562773072765e-05, | |
| "loss": 0.3701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3207693099975586, | |
| "step": 2585, | |
| "valid_targets_mean": 3207.0, | |
| "valid_targets_min": 1232 | |
| }, | |
| { | |
| "epoch": 4.170692431561997, | |
| "grad_norm": 0.8557363619696622, | |
| "learning_rate": 1.6833243964093877e-05, | |
| "loss": 0.3431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3694392144680023, | |
| "step": 2590, | |
| "valid_targets_mean": 2929.4, | |
| "valid_targets_min": 1174 | |
| }, | |
| { | |
| "epoch": 4.178743961352657, | |
| "grad_norm": 0.8161354731224225, | |
| "learning_rate": 1.6753976212208137e-05, | |
| "loss": 0.3345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3618141710758209, | |
| "step": 2595, | |
| "valid_targets_mean": 3058.9, | |
| "valid_targets_min": 1415 | |
| }, | |
| { | |
| "epoch": 4.186795491143317, | |
| "grad_norm": 0.7945413438871889, | |
| "learning_rate": 1.667476079543664e-05, | |
| "loss": 0.3499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3769470155239105, | |
| "step": 2600, | |
| "valid_targets_mean": 3233.4, | |
| "valid_targets_min": 910 | |
| }, | |
| { | |
| "epoch": 4.194847020933977, | |
| "grad_norm": 0.7506914826900978, | |
| "learning_rate": 1.659559899095667e-05, | |
| "loss": 0.3485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3336166739463806, | |
| "step": 2605, | |
| "valid_targets_mean": 3071.4, | |
| "valid_targets_min": 1422 | |
| }, | |
| { | |
| "epoch": 4.202898550724638, | |
| "grad_norm": 0.8680474470245183, | |
| "learning_rate": 1.651649207508114e-05, | |
| "loss": 0.3423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3558621406555176, | |
| "step": 2610, | |
| "valid_targets_mean": 2435.2, | |
| "valid_targets_min": 1299 | |
| }, | |
| { | |
| "epoch": 4.210950080515298, | |
| "grad_norm": 0.850673248513953, | |
| "learning_rate": 1.643744132323801e-05, | |
| "loss": 0.3612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3782421946525574, | |
| "step": 2615, | |
| "valid_targets_mean": 2803.0, | |
| "valid_targets_min": 1129 | |
| }, | |
| { | |
| "epoch": 4.219001610305958, | |
| "grad_norm": 0.9623854143221985, | |
| "learning_rate": 1.6358448009949714e-05, | |
| "loss": 0.3289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3601483106613159, | |
| "step": 2620, | |
| "valid_targets_mean": 2628.6, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 4.2270531400966185, | |
| "grad_norm": 0.7298752703256921, | |
| "learning_rate": 1.6279513408812603e-05, | |
| "loss": 0.3494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3530423641204834, | |
| "step": 2625, | |
| "valid_targets_mean": 3935.0, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 4.235104669887279, | |
| "grad_norm": 0.7922927416603631, | |
| "learning_rate": 1.620063879247643e-05, | |
| "loss": 0.357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33408689498901367, | |
| "step": 2630, | |
| "valid_targets_mean": 2861.8, | |
| "valid_targets_min": 1352 | |
| }, | |
| { | |
| "epoch": 4.243156199677939, | |
| "grad_norm": 0.8225640040752034, | |
| "learning_rate": 1.6121825432623827e-05, | |
| "loss": 0.3569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34123361110687256, | |
| "step": 2635, | |
| "valid_targets_mean": 2913.5, | |
| "valid_targets_min": 1603 | |
| }, | |
| { | |
| "epoch": 4.251207729468599, | |
| "grad_norm": 0.862014730987376, | |
| "learning_rate": 1.6043074599949785e-05, | |
| "loss": 0.3476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35428357124328613, | |
| "step": 2640, | |
| "valid_targets_mean": 2604.4, | |
| "valid_targets_min": 1216 | |
| }, | |
| { | |
| "epoch": 4.2592592592592595, | |
| "grad_norm": 0.8556648106440016, | |
| "learning_rate": 1.5964387564141192e-05, | |
| "loss": 0.3331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.339069128036499, | |
| "step": 2645, | |
| "valid_targets_mean": 3359.8, | |
| "valid_targets_min": 1454 | |
| }, | |
| { | |
| "epoch": 4.26731078904992, | |
| "grad_norm": 0.849595092751884, | |
| "learning_rate": 1.588576559385635e-05, | |
| "loss": 0.3321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3184316158294678, | |
| "step": 2650, | |
| "valid_targets_mean": 2601.7, | |
| "valid_targets_min": 1398 | |
| }, | |
| { | |
| "epoch": 4.27536231884058, | |
| "grad_norm": 0.7834684766024429, | |
| "learning_rate": 1.5807209956704505e-05, | |
| "loss": 0.3522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3305340111255646, | |
| "step": 2655, | |
| "valid_targets_mean": 3171.8, | |
| "valid_targets_min": 1293 | |
| }, | |
| { | |
| "epoch": 4.28341384863124, | |
| "grad_norm": 0.8658423398271352, | |
| "learning_rate": 1.5728721919225428e-05, | |
| "loss": 0.3447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35852861404418945, | |
| "step": 2660, | |
| "valid_targets_mean": 3561.2, | |
| "valid_targets_min": 995 | |
| }, | |
| { | |
| "epoch": 4.291465378421901, | |
| "grad_norm": 0.7999552824406122, | |
| "learning_rate": 1.5650302746869004e-05, | |
| "loss": 0.3338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3435271978378296, | |
| "step": 2665, | |
| "valid_targets_mean": 3375.6, | |
| "valid_targets_min": 1027 | |
| }, | |
| { | |
| "epoch": 4.29951690821256, | |
| "grad_norm": 0.8649636583204158, | |
| "learning_rate": 1.5571953703974813e-05, | |
| "loss": 0.3578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37238696217536926, | |
| "step": 2670, | |
| "valid_targets_mean": 2952.2, | |
| "valid_targets_min": 1455 | |
| }, | |
| { | |
| "epoch": 4.30756843800322, | |
| "grad_norm": 0.8745862341667207, | |
| "learning_rate": 1.5493676053751747e-05, | |
| "loss": 0.328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36845505237579346, | |
| "step": 2675, | |
| "valid_targets_mean": 3117.5, | |
| "valid_targets_min": 1411 | |
| }, | |
| { | |
| "epoch": 4.3156199677938805, | |
| "grad_norm": 0.9341881778545614, | |
| "learning_rate": 1.5415471058257638e-05, | |
| "loss": 0.329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37140941619873047, | |
| "step": 2680, | |
| "valid_targets_mean": 2730.5, | |
| "valid_targets_min": 1549 | |
| }, | |
| { | |
| "epoch": 4.323671497584541, | |
| "grad_norm": 0.9405657665431416, | |
| "learning_rate": 1.533733997837893e-05, | |
| "loss": 0.3553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3740704655647278, | |
| "step": 2685, | |
| "valid_targets_mean": 2825.3, | |
| "valid_targets_min": 1180 | |
| }, | |
| { | |
| "epoch": 4.331723027375201, | |
| "grad_norm": 0.7710353538662376, | |
| "learning_rate": 1.5259284073810333e-05, | |
| "loss": 0.3317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32604238390922546, | |
| "step": 2690, | |
| "valid_targets_mean": 3510.8, | |
| "valid_targets_min": 1347 | |
| }, | |
| { | |
| "epoch": 4.339774557165861, | |
| "grad_norm": 0.781426537140954, | |
| "learning_rate": 1.5181304603034513e-05, | |
| "loss": 0.3603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37608543038368225, | |
| "step": 2695, | |
| "valid_targets_mean": 3150.6, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 0.7237047762120449, | |
| "learning_rate": 1.5103402823301819e-05, | |
| "loss": 0.34, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33800530433654785, | |
| "step": 2700, | |
| "valid_targets_mean": 3118.8, | |
| "valid_targets_min": 1146 | |
| }, | |
| { | |
| "epoch": 4.355877616747182, | |
| "grad_norm": 0.7630896900796991, | |
| "learning_rate": 1.5025579990609973e-05, | |
| "loss": 0.3471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3303137421607971, | |
| "step": 2705, | |
| "valid_targets_mean": 3119.9, | |
| "valid_targets_min": 1082 | |
| }, | |
| { | |
| "epoch": 4.363929146537842, | |
| "grad_norm": 0.8601788048234646, | |
| "learning_rate": 1.4947837359683882e-05, | |
| "loss": 0.3547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3552572727203369, | |
| "step": 2710, | |
| "valid_targets_mean": 2656.8, | |
| "valid_targets_min": 1224 | |
| }, | |
| { | |
| "epoch": 4.371980676328502, | |
| "grad_norm": 0.7820969944556317, | |
| "learning_rate": 1.487017618395534e-05, | |
| "loss": 0.316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.309645414352417, | |
| "step": 2715, | |
| "valid_targets_mean": 3481.2, | |
| "valid_targets_min": 1468 | |
| }, | |
| { | |
| "epoch": 4.3800322061191626, | |
| "grad_norm": 0.7637150424395313, | |
| "learning_rate": 1.479259771554288e-05, | |
| "loss": 0.3188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34639042615890503, | |
| "step": 2720, | |
| "valid_targets_mean": 3636.6, | |
| "valid_targets_min": 784 | |
| }, | |
| { | |
| "epoch": 4.388083735909823, | |
| "grad_norm": 0.6908525532816608, | |
| "learning_rate": 1.4715103205231545e-05, | |
| "loss": 0.3107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.327004075050354, | |
| "step": 2725, | |
| "valid_targets_mean": 4069.1, | |
| "valid_targets_min": 1625 | |
| }, | |
| { | |
| "epoch": 4.396135265700483, | |
| "grad_norm": 0.6634760061573296, | |
| "learning_rate": 1.463769390245273e-05, | |
| "loss": 0.3152, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30868253111839294, | |
| "step": 2730, | |
| "valid_targets_mean": 4105.0, | |
| "valid_targets_min": 1173 | |
| }, | |
| { | |
| "epoch": 4.404186795491143, | |
| "grad_norm": 0.6687824762796025, | |
| "learning_rate": 1.4560371055264052e-05, | |
| "loss": 0.3082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3181726336479187, | |
| "step": 2735, | |
| "valid_targets_mean": 4012.6, | |
| "valid_targets_min": 2398 | |
| }, | |
| { | |
| "epoch": 4.412238325281804, | |
| "grad_norm": 0.7648132029162774, | |
| "learning_rate": 1.448313591032922e-05, | |
| "loss": 0.3089, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30312180519104004, | |
| "step": 2740, | |
| "valid_targets_mean": 3146.2, | |
| "valid_targets_min": 1247 | |
| }, | |
| { | |
| "epoch": 4.420289855072464, | |
| "grad_norm": 0.6458192190255184, | |
| "learning_rate": 1.4405989712897923e-05, | |
| "loss": 0.3033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30585235357284546, | |
| "step": 2745, | |
| "valid_targets_mean": 4329.4, | |
| "valid_targets_min": 1125 | |
| }, | |
| { | |
| "epoch": 4.428341384863124, | |
| "grad_norm": 0.6550907118099679, | |
| "learning_rate": 1.4328933706785782e-05, | |
| "loss": 0.277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2688300609588623, | |
| "step": 2750, | |
| "valid_targets_mean": 3632.9, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 4.436392914653784, | |
| "grad_norm": 0.7080323021524921, | |
| "learning_rate": 1.4251969134354247e-05, | |
| "loss": 0.297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29432380199432373, | |
| "step": 2755, | |
| "valid_targets_mean": 3553.6, | |
| "valid_targets_min": 1094 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 0.7541689777697117, | |
| "learning_rate": 1.4175097236490627e-05, | |
| "loss": 0.2966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3119310438632965, | |
| "step": 2760, | |
| "valid_targets_mean": 3095.5, | |
| "valid_targets_min": 1793 | |
| }, | |
| { | |
| "epoch": 4.452495974235105, | |
| "grad_norm": 0.7223929430673088, | |
| "learning_rate": 1.409831925258805e-05, | |
| "loss": 0.3023, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2738376259803772, | |
| "step": 2765, | |
| "valid_targets_mean": 3082.8, | |
| "valid_targets_min": 1495 | |
| }, | |
| { | |
| "epoch": 4.460547504025765, | |
| "grad_norm": 0.7341114060579149, | |
| "learning_rate": 1.4021636420525466e-05, | |
| "loss": 0.2945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2992332875728607, | |
| "step": 2770, | |
| "valid_targets_mean": 2982.9, | |
| "valid_targets_min": 1490 | |
| }, | |
| { | |
| "epoch": 4.468599033816425, | |
| "grad_norm": 0.7930767785136432, | |
| "learning_rate": 1.3945049976647726e-05, | |
| "loss": 0.3031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3382141590118408, | |
| "step": 2775, | |
| "valid_targets_mean": 3224.7, | |
| "valid_targets_min": 1414 | |
| }, | |
| { | |
| "epoch": 4.476650563607086, | |
| "grad_norm": 0.7051589780334897, | |
| "learning_rate": 1.3868561155745628e-05, | |
| "loss": 0.3149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.302938848733902, | |
| "step": 2780, | |
| "valid_targets_mean": 4591.2, | |
| "valid_targets_min": 1895 | |
| }, | |
| { | |
| "epoch": 4.484702093397746, | |
| "grad_norm": 0.776963321434189, | |
| "learning_rate": 1.3792171191036001e-05, | |
| "loss": 0.2936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32361382246017456, | |
| "step": 2785, | |
| "valid_targets_mean": 3124.8, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 4.492753623188406, | |
| "grad_norm": 0.8112701248746712, | |
| "learning_rate": 1.3715881314141835e-05, | |
| "loss": 0.2925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2923651337623596, | |
| "step": 2790, | |
| "valid_targets_mean": 2744.4, | |
| "valid_targets_min": 1399 | |
| }, | |
| { | |
| "epoch": 4.500805152979066, | |
| "grad_norm": 0.7331706558443012, | |
| "learning_rate": 1.3639692755072429e-05, | |
| "loss": 0.2814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3007197380065918, | |
| "step": 2795, | |
| "valid_targets_mean": 3716.5, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 4.508856682769727, | |
| "grad_norm": 0.647744700371841, | |
| "learning_rate": 1.3563606742203548e-05, | |
| "loss": 0.2993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2892110347747803, | |
| "step": 2800, | |
| "valid_targets_mean": 4289.1, | |
| "valid_targets_min": 1433 | |
| }, | |
| { | |
| "epoch": 4.516908212560386, | |
| "grad_norm": 0.9421850377075999, | |
| "learning_rate": 1.3487624502257598e-05, | |
| "loss": 0.3149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3276273310184479, | |
| "step": 2805, | |
| "valid_targets_mean": 2789.2, | |
| "valid_targets_min": 1322 | |
| }, | |
| { | |
| "epoch": 4.524959742351046, | |
| "grad_norm": 0.7188700197789585, | |
| "learning_rate": 1.3411747260283905e-05, | |
| "loss": 0.2852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2845657765865326, | |
| "step": 2810, | |
| "valid_targets_mean": 3436.0, | |
| "valid_targets_min": 1213 | |
| }, | |
| { | |
| "epoch": 4.533011272141707, | |
| "grad_norm": 0.7781869831067078, | |
| "learning_rate": 1.333597623963892e-05, | |
| "loss": 0.2833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29683226346969604, | |
| "step": 2815, | |
| "valid_targets_mean": 2580.9, | |
| "valid_targets_min": 1280 | |
| }, | |
| { | |
| "epoch": 4.541062801932367, | |
| "grad_norm": 0.7012527244821464, | |
| "learning_rate": 1.3260312661966487e-05, | |
| "loss": 0.3125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30411043763160706, | |
| "step": 2820, | |
| "valid_targets_mean": 4231.2, | |
| "valid_targets_min": 1381 | |
| }, | |
| { | |
| "epoch": 4.549114331723027, | |
| "grad_norm": 0.6896904130191109, | |
| "learning_rate": 1.3184757747178187e-05, | |
| "loss": 0.2979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29390496015548706, | |
| "step": 2825, | |
| "valid_targets_mean": 3503.9, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 4.557165861513687, | |
| "grad_norm": 0.696983130694775, | |
| "learning_rate": 1.3109312713433642e-05, | |
| "loss": 0.2834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31736573576927185, | |
| "step": 2830, | |
| "valid_targets_mean": 3569.1, | |
| "valid_targets_min": 1245 | |
| }, | |
| { | |
| "epoch": 4.565217391304348, | |
| "grad_norm": 0.7902589844756526, | |
| "learning_rate": 1.3033978777120861e-05, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30584877729415894, | |
| "step": 2835, | |
| "valid_targets_mean": 2826.0, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 4.573268921095008, | |
| "grad_norm": 0.7274888924433361, | |
| "learning_rate": 1.2958757152836671e-05, | |
| "loss": 0.2881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25177866220474243, | |
| "step": 2840, | |
| "valid_targets_mean": 3450.6, | |
| "valid_targets_min": 1486 | |
| }, | |
| { | |
| "epoch": 4.581320450885668, | |
| "grad_norm": 0.725180749478288, | |
| "learning_rate": 1.2883649053367106e-05, | |
| "loss": 0.2872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2852326035499573, | |
| "step": 2845, | |
| "valid_targets_mean": 3446.2, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 4.5893719806763285, | |
| "grad_norm": 0.8999719879391306, | |
| "learning_rate": 1.2808655689667846e-05, | |
| "loss": 0.3124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.386888325214386, | |
| "step": 2850, | |
| "valid_targets_mean": 3519.2, | |
| "valid_targets_min": 1327 | |
| }, | |
| { | |
| "epoch": 4.597423510466989, | |
| "grad_norm": 0.7442219129910428, | |
| "learning_rate": 1.2733778270844712e-05, | |
| "loss": 0.2916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3202979862689972, | |
| "step": 2855, | |
| "valid_targets_mean": 3581.2, | |
| "valid_targets_min": 1277 | |
| }, | |
| { | |
| "epoch": 4.605475040257649, | |
| "grad_norm": 0.7961958367210087, | |
| "learning_rate": 1.265901800413416e-05, | |
| "loss": 0.2675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.289141982793808, | |
| "step": 2860, | |
| "valid_targets_mean": 3373.2, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 4.613526570048309, | |
| "grad_norm": 0.7594348180961747, | |
| "learning_rate": 1.2584376094883832e-05, | |
| "loss": 0.2997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29926833510398865, | |
| "step": 2865, | |
| "valid_targets_mean": 3217.2, | |
| "valid_targets_min": 1432 | |
| }, | |
| { | |
| "epoch": 4.6215780998389695, | |
| "grad_norm": 0.7903300067618602, | |
| "learning_rate": 1.250985374653311e-05, | |
| "loss": 0.2729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2627432942390442, | |
| "step": 2870, | |
| "valid_targets_mean": 2827.8, | |
| "valid_targets_min": 368 | |
| }, | |
| { | |
| "epoch": 4.62962962962963, | |
| "grad_norm": 0.746273737284161, | |
| "learning_rate": 1.2435452160593698e-05, | |
| "loss": 0.2818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2513338625431061, | |
| "step": 2875, | |
| "valid_targets_mean": 3353.4, | |
| "valid_targets_min": 1064 | |
| }, | |
| { | |
| "epoch": 4.63768115942029, | |
| "grad_norm": 0.7106900237367672, | |
| "learning_rate": 1.2361172536630288e-05, | |
| "loss": 0.2823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2939052879810333, | |
| "step": 2880, | |
| "valid_targets_mean": 4223.3, | |
| "valid_targets_min": 1746 | |
| }, | |
| { | |
| "epoch": 4.64573268921095, | |
| "grad_norm": 0.6667954441747274, | |
| "learning_rate": 1.2287016072241195e-05, | |
| "loss": 0.3012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2683846652507782, | |
| "step": 2885, | |
| "valid_targets_mean": 3858.1, | |
| "valid_targets_min": 1327 | |
| }, | |
| { | |
| "epoch": 4.6537842190016105, | |
| "grad_norm": 0.9490652913436398, | |
| "learning_rate": 1.221298396303904e-05, | |
| "loss": 0.2801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2613232135772705, | |
| "step": 2890, | |
| "valid_targets_mean": 2620.6, | |
| "valid_targets_min": 1039 | |
| }, | |
| { | |
| "epoch": 4.661835748792271, | |
| "grad_norm": 0.7333762476710057, | |
| "learning_rate": 1.2139077402631495e-05, | |
| "loss": 0.2977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2822628617286682, | |
| "step": 2895, | |
| "valid_targets_mean": 3479.4, | |
| "valid_targets_min": 1238 | |
| }, | |
| { | |
| "epoch": 4.669887278582931, | |
| "grad_norm": 0.6515841944617482, | |
| "learning_rate": 1.2065297582602037e-05, | |
| "loss": 0.2824, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25587737560272217, | |
| "step": 2900, | |
| "valid_targets_mean": 3427.9, | |
| "valid_targets_min": 1281 | |
| }, | |
| { | |
| "epoch": 4.677938808373591, | |
| "grad_norm": 0.8050654547421896, | |
| "learning_rate": 1.199164569249071e-05, | |
| "loss": 0.2925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.299319863319397, | |
| "step": 2905, | |
| "valid_targets_mean": 2845.1, | |
| "valid_targets_min": 991 | |
| }, | |
| { | |
| "epoch": 4.685990338164252, | |
| "grad_norm": 0.6708735466300162, | |
| "learning_rate": 1.191812291977497e-05, | |
| "loss": 0.2809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2744864225387573, | |
| "step": 2910, | |
| "valid_targets_mean": 4298.4, | |
| "valid_targets_min": 1411 | |
| }, | |
| { | |
| "epoch": 4.694041867954912, | |
| "grad_norm": 0.7514813752903359, | |
| "learning_rate": 1.1844730449850546e-05, | |
| "loss": 0.33, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3047374486923218, | |
| "step": 2915, | |
| "valid_targets_mean": 3738.6, | |
| "valid_targets_min": 1555 | |
| }, | |
| { | |
| "epoch": 4.702093397745571, | |
| "grad_norm": 0.8585820063404703, | |
| "learning_rate": 1.1771469466012309e-05, | |
| "loss": 0.2818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.263333261013031, | |
| "step": 2920, | |
| "valid_targets_mean": 2371.1, | |
| "valid_targets_min": 1181 | |
| }, | |
| { | |
| "epoch": 4.710144927536232, | |
| "grad_norm": 0.9084047054321588, | |
| "learning_rate": 1.1698341149435196e-05, | |
| "loss": 0.2898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29706811904907227, | |
| "step": 2925, | |
| "valid_targets_mean": 3284.2, | |
| "valid_targets_min": 1517 | |
| }, | |
| { | |
| "epoch": 4.718196457326892, | |
| "grad_norm": 0.7335312030781107, | |
| "learning_rate": 1.1625346679155179e-05, | |
| "loss": 0.292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2993118166923523, | |
| "step": 2930, | |
| "valid_targets_mean": 3596.9, | |
| "valid_targets_min": 1119 | |
| }, | |
| { | |
| "epoch": 4.726247987117552, | |
| "grad_norm": 0.7645383662240981, | |
| "learning_rate": 1.1552487232050242e-05, | |
| "loss": 0.2793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27383461594581604, | |
| "step": 2935, | |
| "valid_targets_mean": 3022.3, | |
| "valid_targets_min": 1298 | |
| }, | |
| { | |
| "epoch": 4.734299516908212, | |
| "grad_norm": 0.7045343410442323, | |
| "learning_rate": 1.1479763982821414e-05, | |
| "loss": 0.2943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30053383111953735, | |
| "step": 2940, | |
| "valid_targets_mean": 3824.2, | |
| "valid_targets_min": 1978 | |
| }, | |
| { | |
| "epoch": 4.7423510466988725, | |
| "grad_norm": 0.7631623179378855, | |
| "learning_rate": 1.1407178103973834e-05, | |
| "loss": 0.2866, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2785217761993408, | |
| "step": 2945, | |
| "valid_targets_mean": 3441.6, | |
| "valid_targets_min": 1594 | |
| }, | |
| { | |
| "epoch": 4.750402576489533, | |
| "grad_norm": 0.6749936353133249, | |
| "learning_rate": 1.1334730765797843e-05, | |
| "loss": 0.3031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28506165742874146, | |
| "step": 2950, | |
| "valid_targets_mean": 3849.7, | |
| "valid_targets_min": 1650 | |
| }, | |
| { | |
| "epoch": 4.758454106280193, | |
| "grad_norm": 0.737322666574153, | |
| "learning_rate": 1.1262423136350087e-05, | |
| "loss": 0.1971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.161736398935318, | |
| "step": 2955, | |
| "valid_targets_mean": 5966.6, | |
| "valid_targets_min": 3184 | |
| }, | |
| { | |
| "epoch": 4.766505636070853, | |
| "grad_norm": 0.5423893032678805, | |
| "learning_rate": 1.1190256381434738e-05, | |
| "loss": 0.1768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1538485288619995, | |
| "step": 2960, | |
| "valid_targets_mean": 5159.4, | |
| "valid_targets_min": 3589 | |
| }, | |
| { | |
| "epoch": 4.774557165861514, | |
| "grad_norm": 0.5128933204704713, | |
| "learning_rate": 1.1118231664584674e-05, | |
| "loss": 0.1688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17879360914230347, | |
| "step": 2965, | |
| "valid_targets_mean": 6292.2, | |
| "valid_targets_min": 3131 | |
| }, | |
| { | |
| "epoch": 4.782608695652174, | |
| "grad_norm": 0.5098998582752253, | |
| "learning_rate": 1.1046350147042681e-05, | |
| "loss": 0.1995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15875032544136047, | |
| "step": 2970, | |
| "valid_targets_mean": 5304.0, | |
| "valid_targets_min": 3117 | |
| }, | |
| { | |
| "epoch": 4.790660225442834, | |
| "grad_norm": 0.5044753740910969, | |
| "learning_rate": 1.0974612987742807e-05, | |
| "loss": 0.1667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18598923087120056, | |
| "step": 2975, | |
| "valid_targets_mean": 6316.2, | |
| "valid_targets_min": 3455 | |
| }, | |
| { | |
| "epoch": 4.798711755233494, | |
| "grad_norm": 0.4536792935375198, | |
| "learning_rate": 1.0903021343291613e-05, | |
| "loss": 0.1572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1645420491695404, | |
| "step": 2980, | |
| "valid_targets_mean": 6636.1, | |
| "valid_targets_min": 3364 | |
| }, | |
| { | |
| "epoch": 4.806763285024155, | |
| "grad_norm": 1.0919493102135451, | |
| "learning_rate": 1.0831576367949555e-05, | |
| "loss": 0.1709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1736430525779724, | |
| "step": 2985, | |
| "valid_targets_mean": 5804.9, | |
| "valid_targets_min": 2579 | |
| }, | |
| { | |
| "epoch": 4.814814814814815, | |
| "grad_norm": 0.44890465909191923, | |
| "learning_rate": 1.0760279213612362e-05, | |
| "loss": 0.2108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23404830694198608, | |
| "step": 2990, | |
| "valid_targets_mean": 6861.9, | |
| "valid_targets_min": 2907 | |
| }, | |
| { | |
| "epoch": 4.822866344605475, | |
| "grad_norm": 0.5343037492936906, | |
| "learning_rate": 1.068913102979248e-05, | |
| "loss": 0.1656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1848660707473755, | |
| "step": 2995, | |
| "valid_targets_mean": 6218.1, | |
| "valid_targets_min": 2748 | |
| }, | |
| { | |
| "epoch": 4.830917874396135, | |
| "grad_norm": 0.4771916103447067, | |
| "learning_rate": 1.0618132963600507e-05, | |
| "loss": 0.1722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1645662933588028, | |
| "step": 3000, | |
| "valid_targets_mean": 5813.8, | |
| "valid_targets_min": 3623 | |
| }, | |
| { | |
| "epoch": 4.838969404186796, | |
| "grad_norm": 0.5572712759897953, | |
| "learning_rate": 1.0547286159726743e-05, | |
| "loss": 0.1651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15293219685554504, | |
| "step": 3005, | |
| "valid_targets_mean": 5563.2, | |
| "valid_targets_min": 2843 | |
| }, | |
| { | |
| "epoch": 4.847020933977456, | |
| "grad_norm": 0.5223306827253792, | |
| "learning_rate": 1.047659176042268e-05, | |
| "loss": 0.1855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16464412212371826, | |
| "step": 3010, | |
| "valid_targets_mean": 6044.7, | |
| "valid_targets_min": 3602 | |
| }, | |
| { | |
| "epoch": 4.855072463768116, | |
| "grad_norm": 0.47001363201449686, | |
| "learning_rate": 1.0406050905482647e-05, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.183122456073761, | |
| "step": 3015, | |
| "valid_targets_mean": 6674.1, | |
| "valid_targets_min": 3783 | |
| }, | |
| { | |
| "epoch": 4.8631239935587764, | |
| "grad_norm": 0.5165001888223757, | |
| "learning_rate": 1.033566473222539e-05, | |
| "loss": 0.1701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18210646510124207, | |
| "step": 3020, | |
| "valid_targets_mean": 5845.1, | |
| "valid_targets_min": 2402 | |
| }, | |
| { | |
| "epoch": 4.871175523349437, | |
| "grad_norm": 0.5031892171999971, | |
| "learning_rate": 1.0265434375475744e-05, | |
| "loss": 0.1705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17503511905670166, | |
| "step": 3025, | |
| "valid_targets_mean": 5448.2, | |
| "valid_targets_min": 3720 | |
| }, | |
| { | |
| "epoch": 4.879227053140097, | |
| "grad_norm": 0.49986928711541884, | |
| "learning_rate": 1.0195360967546342e-05, | |
| "loss": 0.165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18064576387405396, | |
| "step": 3030, | |
| "valid_targets_mean": 5979.9, | |
| "valid_targets_min": 2553 | |
| }, | |
| { | |
| "epoch": 4.887278582930757, | |
| "grad_norm": 0.5125587155597235, | |
| "learning_rate": 1.0125445638219369e-05, | |
| "loss": 0.1623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16748838126659393, | |
| "step": 3035, | |
| "valid_targets_mean": 6232.3, | |
| "valid_targets_min": 2860 | |
| }, | |
| { | |
| "epoch": 4.8953301127214175, | |
| "grad_norm": 0.4616045885291346, | |
| "learning_rate": 1.00556895147283e-05, | |
| "loss": 0.1634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.162489652633667, | |
| "step": 3040, | |
| "valid_targets_mean": 5831.9, | |
| "valid_targets_min": 3564 | |
| }, | |
| { | |
| "epoch": 4.903381642512077, | |
| "grad_norm": 0.46322600369785283, | |
| "learning_rate": 9.986093721739793e-06, | |
| "loss": 0.1643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18634885549545288, | |
| "step": 3045, | |
| "valid_targets_mean": 6287.4, | |
| "valid_targets_min": 2925 | |
| }, | |
| { | |
| "epoch": 4.911433172302738, | |
| "grad_norm": 0.46732093070548425, | |
| "learning_rate": 9.916659381335524e-06, | |
| "loss": 0.1657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14257997274398804, | |
| "step": 3050, | |
| "valid_targets_mean": 5975.3, | |
| "valid_targets_min": 2785 | |
| }, | |
| { | |
| "epoch": 4.919484702093397, | |
| "grad_norm": 0.49090695641865617, | |
| "learning_rate": 9.847387612994065e-06, | |
| "loss": 0.1679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1775178611278534, | |
| "step": 3055, | |
| "valid_targets_mean": 5815.7, | |
| "valid_targets_min": 2991 | |
| }, | |
| { | |
| "epoch": 4.927536231884058, | |
| "grad_norm": 0.4936584974779153, | |
| "learning_rate": 9.778279533572894e-06, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16541126370429993, | |
| "step": 3060, | |
| "valid_targets_mean": 6023.0, | |
| "valid_targets_min": 3528 | |
| }, | |
| { | |
| "epoch": 4.935587761674718, | |
| "grad_norm": 0.4843782424726677, | |
| "learning_rate": 9.70933625729035e-06, | |
| "loss": 0.1755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17926865816116333, | |
| "step": 3065, | |
| "valid_targets_mean": 6609.0, | |
| "valid_targets_min": 3348 | |
| }, | |
| { | |
| "epoch": 4.943639291465378, | |
| "grad_norm": 0.6642579622564159, | |
| "learning_rate": 9.640558895707681e-06, | |
| "loss": 0.1771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1745370775461197, | |
| "step": 3070, | |
| "valid_targets_mean": 5392.2, | |
| "valid_targets_min": 2988 | |
| }, | |
| { | |
| "epoch": 4.951690821256038, | |
| "grad_norm": 0.5815282039563188, | |
| "learning_rate": 9.571948557711104e-06, | |
| "loss": 0.1592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16913628578186035, | |
| "step": 3075, | |
| "valid_targets_mean": 5687.4, | |
| "valid_targets_min": 3662 | |
| }, | |
| { | |
| "epoch": 4.959742351046699, | |
| "grad_norm": 0.4644634261413639, | |
| "learning_rate": 9.503506349493959e-06, | |
| "loss": 0.1584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1633993536233902, | |
| "step": 3080, | |
| "valid_targets_mean": 5669.5, | |
| "valid_targets_min": 2991 | |
| }, | |
| { | |
| "epoch": 4.967793880837359, | |
| "grad_norm": 0.5022295538466749, | |
| "learning_rate": 9.435233374538848e-06, | |
| "loss": 0.1681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15409933030605316, | |
| "step": 3085, | |
| "valid_targets_mean": 5877.1, | |
| "valid_targets_min": 3272 | |
| }, | |
| { | |
| "epoch": 4.975845410628019, | |
| "grad_norm": 0.5467333590771565, | |
| "learning_rate": 9.367130733599863e-06, | |
| "loss": 0.1573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1819993257522583, | |
| "step": 3090, | |
| "valid_targets_mean": 5579.3, | |
| "valid_targets_min": 2743 | |
| }, | |
| { | |
| "epoch": 4.9838969404186795, | |
| "grad_norm": 0.47589633784217944, | |
| "learning_rate": 9.299199524684815e-06, | |
| "loss": 0.1667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16169019043445587, | |
| "step": 3095, | |
| "valid_targets_mean": 6154.2, | |
| "valid_targets_min": 3288 | |
| }, | |
| { | |
| "epoch": 4.99194847020934, | |
| "grad_norm": 0.4987072791545887, | |
| "learning_rate": 9.23144084303756e-06, | |
| "loss": 0.1581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14372682571411133, | |
| "step": 3100, | |
| "valid_targets_mean": 5617.6, | |
| "valid_targets_min": 2206 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5420052690244692, | |
| "learning_rate": 9.163855781120302e-06, | |
| "loss": 0.1723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1929244101047516, | |
| "step": 3105, | |
| "valid_targets_mean": 6658.9, | |
| "valid_targets_min": 2494 | |
| }, | |
| { | |
| "epoch": 5.00805152979066, | |
| "grad_norm": 0.5838464125620735, | |
| "learning_rate": 9.096445428596026e-06, | |
| "loss": 0.2433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26280146837234497, | |
| "step": 3110, | |
| "valid_targets_mean": 6454.3, | |
| "valid_targets_min": 2941 | |
| }, | |
| { | |
| "epoch": 5.0161030595813205, | |
| "grad_norm": 0.5544032083369451, | |
| "learning_rate": 9.029210872310884e-06, | |
| "loss": 0.2334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2069273293018341, | |
| "step": 3115, | |
| "valid_targets_mean": 6041.3, | |
| "valid_targets_min": 3050 | |
| }, | |
| { | |
| "epoch": 5.024154589371981, | |
| "grad_norm": 0.5060239859737673, | |
| "learning_rate": 8.962153196276713e-06, | |
| "loss": 0.2536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2327526956796646, | |
| "step": 3120, | |
| "valid_targets_mean": 6337.3, | |
| "valid_targets_min": 3318 | |
| }, | |
| { | |
| "epoch": 5.032206119162641, | |
| "grad_norm": 0.5024924584087652, | |
| "learning_rate": 8.895273481653527e-06, | |
| "loss": 0.2318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2556541860103607, | |
| "step": 3125, | |
| "valid_targets_mean": 6536.2, | |
| "valid_targets_min": 3943 | |
| }, | |
| { | |
| "epoch": 5.040257648953301, | |
| "grad_norm": 0.5532271429624148, | |
| "learning_rate": 8.828572806732103e-06, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23633992671966553, | |
| "step": 3130, | |
| "valid_targets_mean": 6144.0, | |
| "valid_targets_min": 1873 | |
| }, | |
| { | |
| "epoch": 5.048309178743962, | |
| "grad_norm": 0.5020434087339521, | |
| "learning_rate": 8.76205224691659e-06, | |
| "loss": 0.2349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.220601886510849, | |
| "step": 3135, | |
| "valid_targets_mean": 5658.6, | |
| "valid_targets_min": 3195 | |
| }, | |
| { | |
| "epoch": 5.056360708534622, | |
| "grad_norm": 0.5004024186733302, | |
| "learning_rate": 8.695712874707169e-06, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2531841993331909, | |
| "step": 3140, | |
| "valid_targets_mean": 5992.8, | |
| "valid_targets_min": 3207 | |
| }, | |
| { | |
| "epoch": 5.064412238325282, | |
| "grad_norm": 0.5475537882143925, | |
| "learning_rate": 8.629555759682756e-06, | |
| "loss": 0.2285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2494160532951355, | |
| "step": 3145, | |
| "valid_targets_mean": 5498.4, | |
| "valid_targets_min": 2385 | |
| }, | |
| { | |
| "epoch": 5.072463768115942, | |
| "grad_norm": 0.5809278130638802, | |
| "learning_rate": 8.563581968483774e-06, | |
| "loss": 0.2299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22677326202392578, | |
| "step": 3150, | |
| "valid_targets_mean": 6101.3, | |
| "valid_targets_min": 2571 | |
| }, | |
| { | |
| "epoch": 5.080515297906603, | |
| "grad_norm": 0.5328987070977548, | |
| "learning_rate": 8.497792564794935e-06, | |
| "loss": 0.2308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23735421895980835, | |
| "step": 3155, | |
| "valid_targets_mean": 5591.6, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 5.088566827697263, | |
| "grad_norm": 0.4993503923966659, | |
| "learning_rate": 8.432188609328112e-06, | |
| "loss": 0.2315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28377091884613037, | |
| "step": 3160, | |
| "valid_targets_mean": 7175.1, | |
| "valid_targets_min": 3309 | |
| }, | |
| { | |
| "epoch": 5.096618357487923, | |
| "grad_norm": 0.5672470270119984, | |
| "learning_rate": 8.366771159805222e-06, | |
| "loss": 0.2273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22815349698066711, | |
| "step": 3165, | |
| "valid_targets_mean": 5700.9, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 5.1046698872785825, | |
| "grad_norm": 0.5110748498614881, | |
| "learning_rate": 8.301541270941178e-06, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25175783038139343, | |
| "step": 3170, | |
| "valid_targets_mean": 6462.1, | |
| "valid_targets_min": 3176 | |
| }, | |
| { | |
| "epoch": 5.112721417069243, | |
| "grad_norm": 0.5146196376849012, | |
| "learning_rate": 8.236499994426886e-06, | |
| "loss": 0.2436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2239510416984558, | |
| "step": 3175, | |
| "valid_targets_mean": 6671.4, | |
| "valid_targets_min": 2978 | |
| }, | |
| { | |
| "epoch": 5.120772946859903, | |
| "grad_norm": 0.5276651861060871, | |
| "learning_rate": 8.171648378912272e-06, | |
| "loss": 0.2328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24736565351486206, | |
| "step": 3180, | |
| "valid_targets_mean": 5441.8, | |
| "valid_targets_min": 2285 | |
| }, | |
| { | |
| "epoch": 5.128824476650563, | |
| "grad_norm": 0.5713617059940914, | |
| "learning_rate": 8.1069874699894e-06, | |
| "loss": 0.2113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21684984862804413, | |
| "step": 3185, | |
| "valid_targets_mean": 5118.4, | |
| "valid_targets_min": 2205 | |
| }, | |
| { | |
| "epoch": 5.1368760064412236, | |
| "grad_norm": 0.9446935557315936, | |
| "learning_rate": 8.042518310175607e-06, | |
| "loss": 0.3086, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3164033889770508, | |
| "step": 3190, | |
| "valid_targets_mean": 2431.5, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 5.144927536231884, | |
| "grad_norm": 0.8102422471310984, | |
| "learning_rate": 7.978241938896679e-06, | |
| "loss": 0.3619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3511773347854614, | |
| "step": 3195, | |
| "valid_targets_mean": 3045.8, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 5.152979066022544, | |
| "grad_norm": 0.8229058252512985, | |
| "learning_rate": 7.914159392470118e-06, | |
| "loss": 0.3316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3436507284641266, | |
| "step": 3200, | |
| "valid_targets_mean": 3644.5, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 5.161030595813204, | |
| "grad_norm": 0.7911041242061355, | |
| "learning_rate": 7.850271704088396e-06, | |
| "loss": 0.3503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3280431628227234, | |
| "step": 3205, | |
| "valid_targets_mean": 2985.6, | |
| "valid_targets_min": 1353 | |
| }, | |
| { | |
| "epoch": 5.169082125603865, | |
| "grad_norm": 0.7417281880292826, | |
| "learning_rate": 7.786579903802342e-06, | |
| "loss": 0.311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3037336766719818, | |
| "step": 3210, | |
| "valid_targets_mean": 4235.5, | |
| "valid_targets_min": 1153 | |
| }, | |
| { | |
| "epoch": 5.177133655394525, | |
| "grad_norm": 0.8043169878590806, | |
| "learning_rate": 7.723085018504512e-06, | |
| "loss": 0.3114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31899648904800415, | |
| "step": 3215, | |
| "valid_targets_mean": 3142.8, | |
| "valid_targets_min": 1555 | |
| }, | |
| { | |
| "epoch": 5.185185185185185, | |
| "grad_norm": 0.7681103865645257, | |
| "learning_rate": 7.659788071912612e-06, | |
| "loss": 0.3224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3006795644760132, | |
| "step": 3220, | |
| "valid_targets_mean": 3279.6, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 5.193236714975845, | |
| "grad_norm": 0.9279631678919613, | |
| "learning_rate": 7.59669008455304e-06, | |
| "loss": 0.3323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3557708263397217, | |
| "step": 3225, | |
| "valid_targets_mean": 2693.6, | |
| "valid_targets_min": 1497 | |
| }, | |
| { | |
| "epoch": 5.201288244766506, | |
| "grad_norm": 0.8325517861916301, | |
| "learning_rate": 7.533792073744395e-06, | |
| "loss": 0.3134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29200559854507446, | |
| "step": 3230, | |
| "valid_targets_mean": 2785.3, | |
| "valid_targets_min": 1343 | |
| }, | |
| { | |
| "epoch": 5.209339774557166, | |
| "grad_norm": 0.9000004067843751, | |
| "learning_rate": 7.471095053581086e-06, | |
| "loss": 0.3322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33962708711624146, | |
| "step": 3235, | |
| "valid_targets_mean": 2680.9, | |
| "valid_targets_min": 1319 | |
| }, | |
| { | |
| "epoch": 5.217391304347826, | |
| "grad_norm": 0.7351616652174657, | |
| "learning_rate": 7.4086000349169864e-06, | |
| "loss": 0.3073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27437877655029297, | |
| "step": 3240, | |
| "valid_targets_mean": 3356.8, | |
| "valid_targets_min": 1216 | |
| }, | |
| { | |
| "epoch": 5.225442834138486, | |
| "grad_norm": 0.8649092507616926, | |
| "learning_rate": 7.346308025349138e-06, | |
| "loss": 0.326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3227480947971344, | |
| "step": 3245, | |
| "valid_targets_mean": 2648.8, | |
| "valid_targets_min": 1013 | |
| }, | |
| { | |
| "epoch": 5.233494363929147, | |
| "grad_norm": 0.7894068022798766, | |
| "learning_rate": 7.2842200292014805e-06, | |
| "loss": 0.3381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2977900505065918, | |
| "step": 3250, | |
| "valid_targets_mean": 3610.2, | |
| "valid_targets_min": 1346 | |
| }, | |
| { | |
| "epoch": 5.241545893719807, | |
| "grad_norm": 0.8348958744102947, | |
| "learning_rate": 7.2223370475086896e-06, | |
| "loss": 0.3291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31349635124206543, | |
| "step": 3255, | |
| "valid_targets_mean": 2815.4, | |
| "valid_targets_min": 1085 | |
| }, | |
| { | |
| "epoch": 5.249597423510467, | |
| "grad_norm": 0.783279282550219, | |
| "learning_rate": 7.160660078000028e-06, | |
| "loss": 0.3223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31384730339050293, | |
| "step": 3260, | |
| "valid_targets_mean": 3536.7, | |
| "valid_targets_min": 1598 | |
| }, | |
| { | |
| "epoch": 5.2576489533011275, | |
| "grad_norm": 0.8474474930539333, | |
| "learning_rate": 7.099190115083259e-06, | |
| "loss": 0.3117, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2967156171798706, | |
| "step": 3265, | |
| "valid_targets_mean": 2561.4, | |
| "valid_targets_min": 1339 | |
| }, | |
| { | |
| "epoch": 5.265700483091788, | |
| "grad_norm": 0.808644026983678, | |
| "learning_rate": 7.037928149828608e-06, | |
| "loss": 0.3127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2750012278556824, | |
| "step": 3270, | |
| "valid_targets_mean": 2889.4, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 5.273752012882448, | |
| "grad_norm": 0.9194627850557411, | |
| "learning_rate": 6.97687516995279e-06, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3373414874076843, | |
| "step": 3275, | |
| "valid_targets_mean": 2557.4, | |
| "valid_targets_min": 1094 | |
| }, | |
| { | |
| "epoch": 5.281803542673108, | |
| "grad_norm": 0.822826899306057, | |
| "learning_rate": 6.916032159803088e-06, | |
| "loss": 0.3148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3265712857246399, | |
| "step": 3280, | |
| "valid_targets_mean": 3530.8, | |
| "valid_targets_min": 1290 | |
| }, | |
| { | |
| "epoch": 5.2898550724637685, | |
| "grad_norm": 1.0801701198259575, | |
| "learning_rate": 6.855400100341458e-06, | |
| "loss": 0.3135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31815803050994873, | |
| "step": 3285, | |
| "valid_targets_mean": 2579.7, | |
| "valid_targets_min": 1229 | |
| }, | |
| { | |
| "epoch": 5.297906602254429, | |
| "grad_norm": 0.9189668274655115, | |
| "learning_rate": 6.794979969128755e-06, | |
| "loss": 0.3284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35337209701538086, | |
| "step": 3290, | |
| "valid_targets_mean": 3024.5, | |
| "valid_targets_min": 1126 | |
| }, | |
| { | |
| "epoch": 5.305958132045088, | |
| "grad_norm": 0.7830019148830716, | |
| "learning_rate": 6.7347727403089325e-06, | |
| "loss": 0.3048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3025501072406769, | |
| "step": 3295, | |
| "valid_targets_mean": 3523.4, | |
| "valid_targets_min": 1277 | |
| }, | |
| { | |
| "epoch": 5.314009661835748, | |
| "grad_norm": 0.7973020737314889, | |
| "learning_rate": 6.674779384593373e-06, | |
| "loss": 0.3047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3119211196899414, | |
| "step": 3300, | |
| "valid_targets_mean": 3317.8, | |
| "valid_targets_min": 1389 | |
| }, | |
| { | |
| "epoch": 5.322061191626409, | |
| "grad_norm": 0.8741442602989578, | |
| "learning_rate": 6.61500086924519e-06, | |
| "loss": 0.3296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32590872049331665, | |
| "step": 3305, | |
| "valid_targets_mean": 2728.8, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 5.330112721417069, | |
| "grad_norm": 0.7121746609134123, | |
| "learning_rate": 6.555438158063683e-06, | |
| "loss": 0.3176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29646340012550354, | |
| "step": 3310, | |
| "valid_targets_mean": 3474.4, | |
| "valid_targets_min": 1520 | |
| }, | |
| { | |
| "epoch": 5.338164251207729, | |
| "grad_norm": 0.8449323115888453, | |
| "learning_rate": 6.4960922113687695e-06, | |
| "loss": 0.3273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3494471311569214, | |
| "step": 3315, | |
| "valid_targets_mean": 2857.8, | |
| "valid_targets_min": 1154 | |
| }, | |
| { | |
| "epoch": 5.3462157809983895, | |
| "grad_norm": 0.885312187387555, | |
| "learning_rate": 6.4369639859855115e-06, | |
| "loss": 0.3228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2813508212566376, | |
| "step": 3320, | |
| "valid_targets_mean": 2419.3, | |
| "valid_targets_min": 1115 | |
| }, | |
| { | |
| "epoch": 5.35426731078905, | |
| "grad_norm": 0.9927190814901663, | |
| "learning_rate": 6.378054435228671e-06, | |
| "loss": 0.3254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34994691610336304, | |
| "step": 3325, | |
| "valid_targets_mean": 2817.8, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 5.36231884057971, | |
| "grad_norm": 0.8385621560859085, | |
| "learning_rate": 6.319364508887371e-06, | |
| "loss": 0.3274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3212810456752777, | |
| "step": 3330, | |
| "valid_targets_mean": 2906.6, | |
| "valid_targets_min": 1414 | |
| }, | |
| { | |
| "epoch": 5.37037037037037, | |
| "grad_norm": 0.9132443618507105, | |
| "learning_rate": 6.260895153209763e-06, | |
| "loss": 0.2996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3039951026439667, | |
| "step": 3335, | |
| "valid_targets_mean": 2699.4, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 5.3784219001610305, | |
| "grad_norm": 0.7695295851549366, | |
| "learning_rate": 6.202647310887764e-06, | |
| "loss": 0.2929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2774300277233124, | |
| "step": 3340, | |
| "valid_targets_mean": 3474.1, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 5.386473429951691, | |
| "grad_norm": 0.6668937071959811, | |
| "learning_rate": 6.14462192104188e-06, | |
| "loss": 0.2975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27856311202049255, | |
| "step": 3345, | |
| "valid_targets_mean": 4942.6, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 5.394524959742351, | |
| "grad_norm": 0.8455706950790055, | |
| "learning_rate": 6.086819919206051e-06, | |
| "loss": 0.3022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30070313811302185, | |
| "step": 3350, | |
| "valid_targets_mean": 3203.9, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 5.402576489533011, | |
| "grad_norm": 0.836900372814081, | |
| "learning_rate": 6.029242237312554e-06, | |
| "loss": 0.2896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2704571485519409, | |
| "step": 3355, | |
| "valid_targets_mean": 2741.4, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 5.4106280193236715, | |
| "grad_norm": 1.0931862207345193, | |
| "learning_rate": 5.971889803676996e-06, | |
| "loss": 0.2948, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2953025698661804, | |
| "step": 3360, | |
| "valid_targets_mean": 2505.7, | |
| "valid_targets_min": 1217 | |
| }, | |
| { | |
| "epoch": 5.418679549114332, | |
| "grad_norm": 0.7654117324257275, | |
| "learning_rate": 5.914763542983355e-06, | |
| "loss": 0.2854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26454564929008484, | |
| "step": 3365, | |
| "valid_targets_mean": 2908.2, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 5.426731078904992, | |
| "grad_norm": 0.7330273533335651, | |
| "learning_rate": 5.857864376269051e-06, | |
| "loss": 0.269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26756301522254944, | |
| "step": 3370, | |
| "valid_targets_mean": 3492.0, | |
| "valid_targets_min": 1549 | |
| }, | |
| { | |
| "epoch": 5.434782608695652, | |
| "grad_norm": 0.8733408409541297, | |
| "learning_rate": 5.801193220910108e-06, | |
| "loss": 0.2752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26825398206710815, | |
| "step": 3375, | |
| "valid_targets_mean": 2432.0, | |
| "valid_targets_min": 838 | |
| }, | |
| { | |
| "epoch": 5.442834138486313, | |
| "grad_norm": 0.7169076864712473, | |
| "learning_rate": 5.744750990606356e-06, | |
| "loss": 0.2771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24838054180145264, | |
| "step": 3380, | |
| "valid_targets_mean": 3069.9, | |
| "valid_targets_min": 269 | |
| }, | |
| { | |
| "epoch": 5.450885668276973, | |
| "grad_norm": 0.7502178650889314, | |
| "learning_rate": 5.688538595366706e-06, | |
| "loss": 0.2918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31911134719848633, | |
| "step": 3385, | |
| "valid_targets_mean": 3644.1, | |
| "valid_targets_min": 1308 | |
| }, | |
| { | |
| "epoch": 5.458937198067633, | |
| "grad_norm": 1.414216146795669, | |
| "learning_rate": 5.632556941494482e-06, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25184541940689087, | |
| "step": 3390, | |
| "valid_targets_mean": 3538.7, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 5.466988727858293, | |
| "grad_norm": 0.8372271865966336, | |
| "learning_rate": 5.5768069315727895e-06, | |
| "loss": 0.2771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27714282274246216, | |
| "step": 3395, | |
| "valid_targets_mean": 2789.2, | |
| "valid_targets_min": 1084 | |
| }, | |
| { | |
| "epoch": 5.475040257648954, | |
| "grad_norm": 0.8184499988808632, | |
| "learning_rate": 5.521289464449975e-06, | |
| "loss": 0.3038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2645953595638275, | |
| "step": 3400, | |
| "valid_targets_mean": 2663.7, | |
| "valid_targets_min": 1007 | |
| }, | |
| { | |
| "epoch": 5.483091787439614, | |
| "grad_norm": 0.7185289726192565, | |
| "learning_rate": 5.46600543522515e-06, | |
| "loss": 0.2736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30967646837234497, | |
| "step": 3405, | |
| "valid_targets_mean": 3865.0, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 5.491143317230274, | |
| "grad_norm": 0.705180498725472, | |
| "learning_rate": 5.410955735233736e-06, | |
| "loss": 0.2819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2706761658191681, | |
| "step": 3410, | |
| "valid_targets_mean": 4117.5, | |
| "valid_targets_min": 1094 | |
| }, | |
| { | |
| "epoch": 5.499194847020934, | |
| "grad_norm": 0.6957397872406959, | |
| "learning_rate": 5.3561412520331025e-06, | |
| "loss": 0.263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23413681983947754, | |
| "step": 3415, | |
| "valid_targets_mean": 3565.3, | |
| "valid_targets_min": 333 | |
| }, | |
| { | |
| "epoch": 5.507246376811594, | |
| "grad_norm": 0.7323699706744174, | |
| "learning_rate": 5.30156286938826e-06, | |
| "loss": 0.285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27193206548690796, | |
| "step": 3420, | |
| "valid_targets_mean": 3507.9, | |
| "valid_targets_min": 1135 | |
| }, | |
| { | |
| "epoch": 5.515297906602254, | |
| "grad_norm": 0.7948157490925247, | |
| "learning_rate": 5.24722146725761e-06, | |
| "loss": 0.2912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30066072940826416, | |
| "step": 3425, | |
| "valid_targets_mean": 3822.9, | |
| "valid_targets_min": 428 | |
| }, | |
| { | |
| "epoch": 5.523349436392914, | |
| "grad_norm": 0.7330705322398546, | |
| "learning_rate": 5.193117921778743e-06, | |
| "loss": 0.2784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2574806809425354, | |
| "step": 3430, | |
| "valid_targets_mean": 2932.9, | |
| "valid_targets_min": 1398 | |
| }, | |
| { | |
| "epoch": 5.531400966183575, | |
| "grad_norm": 0.737623162777766, | |
| "learning_rate": 5.139253105254336e-06, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2380131632089615, | |
| "step": 3435, | |
| "valid_targets_mean": 3473.2, | |
| "valid_targets_min": 1265 | |
| }, | |
| { | |
| "epoch": 5.539452495974235, | |
| "grad_norm": 0.872809824307193, | |
| "learning_rate": 5.085627886138078e-06, | |
| "loss": 0.2942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3099297881126404, | |
| "step": 3440, | |
| "valid_targets_mean": 2707.5, | |
| "valid_targets_min": 1453 | |
| }, | |
| { | |
| "epoch": 5.547504025764895, | |
| "grad_norm": 0.698343107099745, | |
| "learning_rate": 5.032243129020671e-06, | |
| "loss": 0.2838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.263833224773407, | |
| "step": 3445, | |
| "valid_targets_mean": 3770.1, | |
| "valid_targets_min": 1554 | |
| }, | |
| { | |
| "epoch": 5.555555555555555, | |
| "grad_norm": 0.7253845641200191, | |
| "learning_rate": 4.9790996946158695e-06, | |
| "loss": 0.2621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2566887438297272, | |
| "step": 3450, | |
| "valid_targets_mean": 3687.9, | |
| "valid_targets_min": 1423 | |
| }, | |
| { | |
| "epoch": 5.563607085346216, | |
| "grad_norm": 0.7712918784806847, | |
| "learning_rate": 4.926198439746641e-06, | |
| "loss": 0.2724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26648885011672974, | |
| "step": 3455, | |
| "valid_targets_mean": 3580.0, | |
| "valid_targets_min": 1778 | |
| }, | |
| { | |
| "epoch": 5.571658615136876, | |
| "grad_norm": 0.856414024782782, | |
| "learning_rate": 4.873540217331325e-06, | |
| "loss": 0.2817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2561458945274353, | |
| "step": 3460, | |
| "valid_targets_mean": 3154.2, | |
| "valid_targets_min": 1070 | |
| }, | |
| { | |
| "epoch": 5.579710144927536, | |
| "grad_norm": 0.7683716866413719, | |
| "learning_rate": 4.82112587636989e-06, | |
| "loss": 0.2644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2730039358139038, | |
| "step": 3465, | |
| "valid_targets_mean": 3442.1, | |
| "valid_targets_min": 1222 | |
| }, | |
| { | |
| "epoch": 5.587761674718196, | |
| "grad_norm": 0.8230430968296925, | |
| "learning_rate": 4.768956261930233e-06, | |
| "loss": 0.276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27767133712768555, | |
| "step": 3470, | |
| "valid_targets_mean": 2991.2, | |
| "valid_targets_min": 1344 | |
| }, | |
| { | |
| "epoch": 5.595813204508857, | |
| "grad_norm": 0.8004415970586688, | |
| "learning_rate": 4.717032215134576e-06, | |
| "loss": 0.2884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2605861723423004, | |
| "step": 3475, | |
| "valid_targets_mean": 3584.1, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 5.603864734299517, | |
| "grad_norm": 0.7353749562835018, | |
| "learning_rate": 4.66535457314589e-06, | |
| "loss": 0.2576, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24795982241630554, | |
| "step": 3480, | |
| "valid_targets_mean": 3283.6, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 5.611916264090177, | |
| "grad_norm": 0.7934788767299171, | |
| "learning_rate": 4.613924169154406e-06, | |
| "loss": 0.2812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2561984658241272, | |
| "step": 3485, | |
| "valid_targets_mean": 3267.1, | |
| "valid_targets_min": 1469 | |
| }, | |
| { | |
| "epoch": 5.6199677938808374, | |
| "grad_norm": 0.6451673243838487, | |
| "learning_rate": 4.5627418323641705e-06, | |
| "loss": 0.2645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23117291927337646, | |
| "step": 3490, | |
| "valid_targets_mean": 4220.8, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 5.628019323671498, | |
| "grad_norm": 0.7547471899894796, | |
| "learning_rate": 4.51180838797969e-06, | |
| "loss": 0.2671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2534325420856476, | |
| "step": 3495, | |
| "valid_targets_mean": 3403.3, | |
| "valid_targets_min": 1315 | |
| }, | |
| { | |
| "epoch": 5.636070853462158, | |
| "grad_norm": 0.773278558835617, | |
| "learning_rate": 4.461124657192612e-06, | |
| "loss": 0.2602, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26870912313461304, | |
| "step": 3500, | |
| "valid_targets_mean": 3994.0, | |
| "valid_targets_min": 1243 | |
| }, | |
| { | |
| "epoch": 5.644122383252818, | |
| "grad_norm": 0.810555778973838, | |
| "learning_rate": 4.410691457168488e-06, | |
| "loss": 0.2896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28086018562316895, | |
| "step": 3505, | |
| "valid_targets_mean": 2728.6, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 5.6521739130434785, | |
| "grad_norm": 0.8120077352409444, | |
| "learning_rate": 4.3605096010336115e-06, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24294914305210114, | |
| "step": 3510, | |
| "valid_targets_mean": 2805.7, | |
| "valid_targets_min": 1316 | |
| }, | |
| { | |
| "epoch": 5.660225442834139, | |
| "grad_norm": 0.8130325924281421, | |
| "learning_rate": 4.310579897861902e-06, | |
| "loss": 0.2781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28100401163101196, | |
| "step": 3515, | |
| "valid_targets_mean": 3165.5, | |
| "valid_targets_min": 1339 | |
| }, | |
| { | |
| "epoch": 5.668276972624799, | |
| "grad_norm": 0.8162870406632708, | |
| "learning_rate": 4.26090315266185e-06, | |
| "loss": 0.2723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26572561264038086, | |
| "step": 3520, | |
| "valid_targets_mean": 2954.1, | |
| "valid_targets_min": 1158 | |
| }, | |
| { | |
| "epoch": 5.676328502415459, | |
| "grad_norm": 0.6514530451354953, | |
| "learning_rate": 4.2114801663635504e-06, | |
| "loss": 0.2687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2698674201965332, | |
| "step": 3525, | |
| "valid_targets_mean": 4536.0, | |
| "valid_targets_min": 1647 | |
| }, | |
| { | |
| "epoch": 5.6843800322061195, | |
| "grad_norm": 0.7583758417148468, | |
| "learning_rate": 4.1623117358057865e-06, | |
| "loss": 0.2696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24608030915260315, | |
| "step": 3530, | |
| "valid_targets_mean": 3440.9, | |
| "valid_targets_min": 1237 | |
| }, | |
| { | |
| "epoch": 5.692431561996779, | |
| "grad_norm": 0.8068612199962942, | |
| "learning_rate": 4.113398653723168e-06, | |
| "loss": 0.3091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30153918266296387, | |
| "step": 3535, | |
| "valid_targets_mean": 3381.9, | |
| "valid_targets_min": 1538 | |
| }, | |
| { | |
| "epoch": 5.70048309178744, | |
| "grad_norm": 0.6753881803646461, | |
| "learning_rate": 4.0647417087333776e-06, | |
| "loss": 0.2767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2815188765525818, | |
| "step": 3540, | |
| "valid_targets_mean": 4124.1, | |
| "valid_targets_min": 1524 | |
| }, | |
| { | |
| "epoch": 5.708534621578099, | |
| "grad_norm": 0.7538186575725058, | |
| "learning_rate": 4.0163416853244385e-06, | |
| "loss": 0.267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3041001558303833, | |
| "step": 3545, | |
| "valid_targets_mean": 3666.5, | |
| "valid_targets_min": 1144 | |
| }, | |
| { | |
| "epoch": 5.71658615136876, | |
| "grad_norm": 0.7597861294821214, | |
| "learning_rate": 3.968199363842056e-06, | |
| "loss": 0.2788, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28277167677879333, | |
| "step": 3550, | |
| "valid_targets_mean": 3813.6, | |
| "valid_targets_min": 1150 | |
| }, | |
| { | |
| "epoch": 5.72463768115942, | |
| "grad_norm": 0.8692552896889315, | |
| "learning_rate": 3.920315520477065e-06, | |
| "loss": 0.2705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.290031373500824, | |
| "step": 3555, | |
| "valid_targets_mean": 3184.7, | |
| "valid_targets_min": 1064 | |
| }, | |
| { | |
| "epoch": 5.73268921095008, | |
| "grad_norm": 0.725354422712945, | |
| "learning_rate": 3.872690927252891e-06, | |
| "loss": 0.2739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2602255642414093, | |
| "step": 3560, | |
| "valid_targets_mean": 3734.6, | |
| "valid_targets_min": 1689 | |
| }, | |
| { | |
| "epoch": 5.7407407407407405, | |
| "grad_norm": 0.7906077868844616, | |
| "learning_rate": 3.825326352013119e-06, | |
| "loss": 0.2771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2873994708061218, | |
| "step": 3565, | |
| "valid_targets_mean": 3706.6, | |
| "valid_targets_min": 1537 | |
| }, | |
| { | |
| "epoch": 5.748792270531401, | |
| "grad_norm": 0.7271200769973074, | |
| "learning_rate": 3.7782225584091016e-06, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29486900568008423, | |
| "step": 3570, | |
| "valid_targets_mean": 3998.4, | |
| "valid_targets_min": 1261 | |
| }, | |
| { | |
| "epoch": 5.756843800322061, | |
| "grad_norm": 0.6627264051632484, | |
| "learning_rate": 3.731380305887644e-06, | |
| "loss": 0.2122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17932528257369995, | |
| "step": 3575, | |
| "valid_targets_mean": 6379.5, | |
| "valid_targets_min": 3100 | |
| }, | |
| { | |
| "epoch": 5.764895330112721, | |
| "grad_norm": 0.5856709563027361, | |
| "learning_rate": 3.684800349678781e-06, | |
| "loss": 0.1718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15101756155490875, | |
| "step": 3580, | |
| "valid_targets_mean": 5043.8, | |
| "valid_targets_min": 2708 | |
| }, | |
| { | |
| "epoch": 5.7729468599033815, | |
| "grad_norm": 0.5383802419402726, | |
| "learning_rate": 3.638483440783576e-06, | |
| "loss": 0.1566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1619020700454712, | |
| "step": 3585, | |
| "valid_targets_mean": 5717.3, | |
| "valid_targets_min": 3191 | |
| }, | |
| { | |
| "epoch": 5.780998389694042, | |
| "grad_norm": 0.5400164906310356, | |
| "learning_rate": 3.5924303259620307e-06, | |
| "loss": 0.1954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22362828254699707, | |
| "step": 3590, | |
| "valid_targets_mean": 6198.8, | |
| "valid_targets_min": 2599 | |
| }, | |
| { | |
| "epoch": 5.789049919484702, | |
| "grad_norm": 0.49147798358688005, | |
| "learning_rate": 3.546641747721036e-06, | |
| "loss": 0.1541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16704602539539337, | |
| "step": 3595, | |
| "valid_targets_mean": 5880.9, | |
| "valid_targets_min": 3427 | |
| }, | |
| { | |
| "epoch": 5.797101449275362, | |
| "grad_norm": 0.5344086768922587, | |
| "learning_rate": 3.501118444302394e-06, | |
| "loss": 0.1543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16091877222061157, | |
| "step": 3600, | |
| "valid_targets_mean": 5429.1, | |
| "valid_targets_min": 3331 | |
| }, | |
| { | |
| "epoch": 5.805152979066023, | |
| "grad_norm": 0.5393969729072521, | |
| "learning_rate": 3.4558611496709384e-06, | |
| "loss": 0.1611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1679922491312027, | |
| "step": 3605, | |
| "valid_targets_mean": 5318.9, | |
| "valid_targets_min": 3176 | |
| }, | |
| { | |
| "epoch": 5.813204508856683, | |
| "grad_norm": 0.5476815168907639, | |
| "learning_rate": 3.4108705935026685e-06, | |
| "loss": 0.1914, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21377882361412048, | |
| "step": 3610, | |
| "valid_targets_mean": 6334.5, | |
| "valid_targets_min": 3200 | |
| }, | |
| { | |
| "epoch": 5.821256038647343, | |
| "grad_norm": 0.5454273677417246, | |
| "learning_rate": 3.3661475011730206e-06, | |
| "loss": 0.1677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15195472538471222, | |
| "step": 3615, | |
| "valid_targets_mean": 5575.2, | |
| "valid_targets_min": 2982 | |
| }, | |
| { | |
| "epoch": 5.829307568438003, | |
| "grad_norm": 0.487579490900084, | |
| "learning_rate": 3.321692593745147e-06, | |
| "loss": 0.1689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14653947949409485, | |
| "step": 3620, | |
| "valid_targets_mean": 5045.2, | |
| "valid_targets_min": 2662 | |
| }, | |
| { | |
| "epoch": 5.837359098228664, | |
| "grad_norm": 0.46110914845574164, | |
| "learning_rate": 3.2775065879582948e-06, | |
| "loss": 0.1594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14655046164989471, | |
| "step": 3625, | |
| "valid_targets_mean": 5929.9, | |
| "valid_targets_min": 3640 | |
| }, | |
| { | |
| "epoch": 5.845410628019324, | |
| "grad_norm": 0.5371793623726028, | |
| "learning_rate": 3.233590196216263e-06, | |
| "loss": 0.1759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15916280448436737, | |
| "step": 3630, | |
| "valid_targets_mean": 5560.6, | |
| "valid_targets_min": 3146 | |
| }, | |
| { | |
| "epoch": 5.853462157809984, | |
| "grad_norm": 0.5336623676265232, | |
| "learning_rate": 3.1899441265759036e-06, | |
| "loss": 0.1519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17341580986976624, | |
| "step": 3635, | |
| "valid_targets_mean": 5438.9, | |
| "valid_targets_min": 3095 | |
| }, | |
| { | |
| "epoch": 5.861513687600644, | |
| "grad_norm": 0.506978820179007, | |
| "learning_rate": 3.1465690827356955e-06, | |
| "loss": 0.1629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16827458143234253, | |
| "step": 3640, | |
| "valid_targets_mean": 6153.6, | |
| "valid_targets_min": 3325 | |
| }, | |
| { | |
| "epoch": 5.869565217391305, | |
| "grad_norm": 0.48238153810687057, | |
| "learning_rate": 3.103465764024438e-06, | |
| "loss": 0.1636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1539824903011322, | |
| "step": 3645, | |
| "valid_targets_mean": 6176.7, | |
| "valid_targets_min": 3690 | |
| }, | |
| { | |
| "epoch": 5.877616747181965, | |
| "grad_norm": 0.6316647612921683, | |
| "learning_rate": 3.0606348653899288e-06, | |
| "loss": 0.156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14262428879737854, | |
| "step": 3650, | |
| "valid_targets_mean": 5253.8, | |
| "valid_targets_min": 2926 | |
| }, | |
| { | |
| "epoch": 5.885668276972625, | |
| "grad_norm": 0.5087670946379775, | |
| "learning_rate": 3.0180770773877866e-06, | |
| "loss": 0.1575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1334628015756607, | |
| "step": 3655, | |
| "valid_targets_mean": 6097.0, | |
| "valid_targets_min": 3707 | |
| }, | |
| { | |
| "epoch": 5.8937198067632846, | |
| "grad_norm": 0.5000631591042055, | |
| "learning_rate": 2.9757930861703223e-06, | |
| "loss": 0.1567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16042768955230713, | |
| "step": 3660, | |
| "valid_targets_mean": 5931.1, | |
| "valid_targets_min": 3697 | |
| }, | |
| { | |
| "epoch": 5.901771336553946, | |
| "grad_norm": 0.5448493291259932, | |
| "learning_rate": 2.9337835734754504e-06, | |
| "loss": 0.1528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15106025338172913, | |
| "step": 3665, | |
| "valid_targets_mean": 6370.6, | |
| "valid_targets_min": 3665 | |
| }, | |
| { | |
| "epoch": 5.909822866344605, | |
| "grad_norm": 0.4904247806452323, | |
| "learning_rate": 2.892049216615724e-06, | |
| "loss": 0.1669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17938336730003357, | |
| "step": 3670, | |
| "valid_targets_mean": 6119.8, | |
| "valid_targets_min": 2561 | |
| }, | |
| { | |
| "epoch": 5.917874396135265, | |
| "grad_norm": 0.5176054978636475, | |
| "learning_rate": 2.850590688467405e-06, | |
| "loss": 0.1538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1513424813747406, | |
| "step": 3675, | |
| "valid_targets_mean": 5352.8, | |
| "valid_targets_min": 3838 | |
| }, | |
| { | |
| "epoch": 5.925925925925926, | |
| "grad_norm": 0.5014302674357601, | |
| "learning_rate": 2.8094086574595934e-06, | |
| "loss": 0.1564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16385960578918457, | |
| "step": 3680, | |
| "valid_targets_mean": 6064.1, | |
| "valid_targets_min": 2800 | |
| }, | |
| { | |
| "epoch": 5.933977455716586, | |
| "grad_norm": 0.4897422253909219, | |
| "learning_rate": 2.768503787563497e-06, | |
| "loss": 0.165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14840051531791687, | |
| "step": 3685, | |
| "valid_targets_mean": 6725.1, | |
| "valid_targets_min": 4014 | |
| }, | |
| { | |
| "epoch": 5.942028985507246, | |
| "grad_norm": 0.527797846005594, | |
| "learning_rate": 2.7278767382816828e-06, | |
| "loss": 0.1707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15732444822788239, | |
| "step": 3690, | |
| "valid_targets_mean": 5340.4, | |
| "valid_targets_min": 3261 | |
| }, | |
| { | |
| "epoch": 5.950080515297906, | |
| "grad_norm": 0.4537750452077363, | |
| "learning_rate": 2.687528164637474e-06, | |
| "loss": 0.1532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15537717938423157, | |
| "step": 3695, | |
| "valid_targets_mean": 6194.7, | |
| "valid_targets_min": 3579 | |
| }, | |
| { | |
| "epoch": 5.958132045088567, | |
| "grad_norm": 0.481130982772452, | |
| "learning_rate": 2.647458717164357e-06, | |
| "loss": 0.1524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1415160596370697, | |
| "step": 3700, | |
| "valid_targets_mean": 6527.8, | |
| "valid_targets_min": 2379 | |
| }, | |
| { | |
| "epoch": 5.966183574879227, | |
| "grad_norm": 0.4971167453343456, | |
| "learning_rate": 2.607669041895535e-06, | |
| "loss": 0.1627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16012218594551086, | |
| "step": 3705, | |
| "valid_targets_mean": 6887.7, | |
| "valid_targets_min": 3569 | |
| }, | |
| { | |
| "epoch": 5.974235104669887, | |
| "grad_norm": 0.47404245776286746, | |
| "learning_rate": 2.568159780353476e-06, | |
| "loss": 0.1448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.134698748588562, | |
| "step": 3710, | |
| "valid_targets_mean": 5398.8, | |
| "valid_targets_min": 2580 | |
| }, | |
| { | |
| "epoch": 5.982286634460547, | |
| "grad_norm": 0.5394680793747297, | |
| "learning_rate": 2.5289315695395834e-06, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1926458775997162, | |
| "step": 3715, | |
| "valid_targets_mean": 6136.1, | |
| "valid_targets_min": 2610 | |
| }, | |
| { | |
| "epoch": 5.990338164251208, | |
| "grad_norm": 0.5073583213947698, | |
| "learning_rate": 2.489985041923928e-06, | |
| "loss": 0.1542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1339704990386963, | |
| "step": 3720, | |
| "valid_targets_mean": 5957.4, | |
| "valid_targets_min": 2562 | |
| }, | |
| { | |
| "epoch": 5.998389694041868, | |
| "grad_norm": 0.5506968197613669, | |
| "learning_rate": 2.4513208254350486e-06, | |
| "loss": 0.1552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1555670201778412, | |
| "step": 3725, | |
| "valid_targets_mean": 5860.3, | |
| "valid_targets_min": 2833 | |
| }, | |
| { | |
| "epoch": 6.006441223832528, | |
| "grad_norm": 0.6724494776596883, | |
| "learning_rate": 2.412939543449828e-06, | |
| "loss": 0.2218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2651374936103821, | |
| "step": 3730, | |
| "valid_targets_mean": 5898.7, | |
| "valid_targets_min": 2769 | |
| }, | |
| { | |
| "epoch": 6.0144927536231885, | |
| "grad_norm": 0.5836006033084224, | |
| "learning_rate": 2.3748418147834394e-06, | |
| "loss": 0.2381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2323194146156311, | |
| "step": 3735, | |
| "valid_targets_mean": 6068.6, | |
| "valid_targets_min": 2285 | |
| }, | |
| { | |
| "epoch": 6.022544283413849, | |
| "grad_norm": 0.5642017551180925, | |
| "learning_rate": 2.337028253679381e-06, | |
| "loss": 0.2429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26346156001091003, | |
| "step": 3740, | |
| "valid_targets_mean": 6519.8, | |
| "valid_targets_min": 3314 | |
| }, | |
| { | |
| "epoch": 6.030595813204509, | |
| "grad_norm": 0.7066259408147071, | |
| "learning_rate": 2.299499469799542e-06, | |
| "loss": 0.221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2269531935453415, | |
| "step": 3745, | |
| "valid_targets_mean": 6028.6, | |
| "valid_targets_min": 2671 | |
| }, | |
| { | |
| "epoch": 6.038647342995169, | |
| "grad_norm": 0.5632095775983418, | |
| "learning_rate": 2.262256068214421e-06, | |
| "loss": 0.2446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2706316113471985, | |
| "step": 3750, | |
| "valid_targets_mean": 6266.9, | |
| "valid_targets_min": 469 | |
| }, | |
| { | |
| "epoch": 6.0466988727858295, | |
| "grad_norm": 0.5348831735859538, | |
| "learning_rate": 2.2252986493933237e-06, | |
| "loss": 0.2318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21734777092933655, | |
| "step": 3755, | |
| "valid_targets_mean": 5759.1, | |
| "valid_targets_min": 2568 | |
| }, | |
| { | |
| "epoch": 6.05475040257649, | |
| "grad_norm": 0.5569500892452331, | |
| "learning_rate": 2.18862780919471e-06, | |
| "loss": 0.2402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21391534805297852, | |
| "step": 3760, | |
| "valid_targets_mean": 5530.2, | |
| "valid_targets_min": 611 | |
| }, | |
| { | |
| "epoch": 6.06280193236715, | |
| "grad_norm": 0.5382181428705697, | |
| "learning_rate": 2.152244138856585e-06, | |
| "loss": 0.2235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20388023555278778, | |
| "step": 3765, | |
| "valid_targets_mean": 5696.9, | |
| "valid_targets_min": 2392 | |
| }, | |
| { | |
| "epoch": 6.07085346215781, | |
| "grad_norm": 0.6463190792326495, | |
| "learning_rate": 2.1161482249869513e-06, | |
| "loss": 0.2284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21541935205459595, | |
| "step": 3770, | |
| "valid_targets_mean": 5683.9, | |
| "valid_targets_min": 1285 | |
| }, | |
| { | |
| "epoch": 6.078904991948471, | |
| "grad_norm": 0.4769019487796991, | |
| "learning_rate": 2.080340649554369e-06, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1947912871837616, | |
| "step": 3775, | |
| "valid_targets_mean": 6331.4, | |
| "valid_targets_min": 3356 | |
| }, | |
| { | |
| "epoch": 6.086956521739131, | |
| "grad_norm": 0.5260688671936042, | |
| "learning_rate": 2.044821989878558e-06, | |
| "loss": 0.2162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1972348690032959, | |
| "step": 3780, | |
| "valid_targets_mean": 5326.1, | |
| "valid_targets_min": 1676 | |
| }, | |
| { | |
| "epoch": 6.095008051529791, | |
| "grad_norm": 0.5385684220997624, | |
| "learning_rate": 2.0095928186210956e-06, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24432078003883362, | |
| "step": 3785, | |
| "valid_targets_mean": 6448.9, | |
| "valid_targets_min": 2859 | |
| }, | |
| { | |
| "epoch": 6.1030595813204505, | |
| "grad_norm": 0.5134775076305466, | |
| "learning_rate": 1.974653703776188e-06, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21769499778747559, | |
| "step": 3790, | |
| "valid_targets_mean": 5910.2, | |
| "valid_targets_min": 1417 | |
| }, | |
| { | |
| "epoch": 6.111111111111111, | |
| "grad_norm": 0.500956162314828, | |
| "learning_rate": 1.9400052086615153e-06, | |
| "loss": 0.2424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26993608474731445, | |
| "step": 3795, | |
| "valid_targets_mean": 7141.1, | |
| "valid_targets_min": 3571 | |
| }, | |
| { | |
| "epoch": 6.119162640901771, | |
| "grad_norm": 0.5451480445857494, | |
| "learning_rate": 1.9056478919091236e-06, | |
| "loss": 0.2211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23594680428504944, | |
| "step": 3800, | |
| "valid_targets_mean": 6492.9, | |
| "valid_targets_min": 3446 | |
| }, | |
| { | |
| "epoch": 6.127214170692431, | |
| "grad_norm": 0.4970674059635784, | |
| "learning_rate": 1.8715823074564587e-06, | |
| "loss": 0.21, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18086868524551392, | |
| "step": 3805, | |
| "valid_targets_mean": 5447.6, | |
| "valid_targets_min": 306 | |
| }, | |
| { | |
| "epoch": 6.1352657004830915, | |
| "grad_norm": 0.8407143099623481, | |
| "learning_rate": 1.837809004537401e-06, | |
| "loss": 0.278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31480419635772705, | |
| "step": 3810, | |
| "valid_targets_mean": 3657.8, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 6.143317230273752, | |
| "grad_norm": 0.9619594700485488, | |
| "learning_rate": 1.8043285276734334e-06, | |
| "loss": 0.3421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3486429452896118, | |
| "step": 3815, | |
| "valid_targets_mean": 2673.1, | |
| "valid_targets_min": 1309 | |
| }, | |
| { | |
| "epoch": 6.151368760064412, | |
| "grad_norm": 0.8500263557004369, | |
| "learning_rate": 1.7711414166648365e-06, | |
| "loss": 0.3224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3212417662143707, | |
| "step": 3820, | |
| "valid_targets_mean": 3209.7, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 6.159420289855072, | |
| "grad_norm": 0.9189308209039743, | |
| "learning_rate": 1.7382482065820138e-06, | |
| "loss": 0.3419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35360682010650635, | |
| "step": 3825, | |
| "valid_targets_mean": 3054.8, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 6.1674718196457325, | |
| "grad_norm": 0.843417341220929, | |
| "learning_rate": 1.7056494277568503e-06, | |
| "loss": 0.3048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28352683782577515, | |
| "step": 3830, | |
| "valid_targets_mean": 2978.8, | |
| "valid_targets_min": 1175 | |
| }, | |
| { | |
| "epoch": 6.175523349436393, | |
| "grad_norm": 0.8163445646433484, | |
| "learning_rate": 1.6733456057741592e-06, | |
| "loss": 0.2972, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2696455121040344, | |
| "step": 3835, | |
| "valid_targets_mean": 2849.6, | |
| "valid_targets_min": 1295 | |
| }, | |
| { | |
| "epoch": 6.183574879227053, | |
| "grad_norm": 0.9881052112221266, | |
| "learning_rate": 1.641337261463216e-06, | |
| "loss": 0.3143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.332567423582077, | |
| "step": 3840, | |
| "valid_targets_mean": 2615.8, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 6.191626409017713, | |
| "grad_norm": 0.8302563038314946, | |
| "learning_rate": 1.6096249108893602e-06, | |
| "loss": 0.3094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33189308643341064, | |
| "step": 3845, | |
| "valid_targets_mean": 3296.4, | |
| "valid_targets_min": 1316 | |
| }, | |
| { | |
| "epoch": 6.199677938808374, | |
| "grad_norm": 0.8685755282410109, | |
| "learning_rate": 1.5782090653456616e-06, | |
| "loss": 0.3143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31094080209732056, | |
| "step": 3850, | |
| "valid_targets_mean": 3968.6, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 6.207729468599034, | |
| "grad_norm": 0.9006410427832183, | |
| "learning_rate": 1.547090231344699e-06, | |
| "loss": 0.3111, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37766462564468384, | |
| "step": 3855, | |
| "valid_targets_mean": 3078.5, | |
| "valid_targets_min": 1174 | |
| }, | |
| { | |
| "epoch": 6.215780998389694, | |
| "grad_norm": 0.8145924540125793, | |
| "learning_rate": 1.5162689106103746e-06, | |
| "loss": 0.3082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.283829003572464, | |
| "step": 3860, | |
| "valid_targets_mean": 3417.5, | |
| "valid_targets_min": 1252 | |
| }, | |
| { | |
| "epoch": 6.223832528180354, | |
| "grad_norm": 0.8259786070193814, | |
| "learning_rate": 1.4857456000698366e-06, | |
| "loss": 0.3045, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3424111008644104, | |
| "step": 3865, | |
| "valid_targets_mean": 3427.6, | |
| "valid_targets_min": 1218 | |
| }, | |
| { | |
| "epoch": 6.231884057971015, | |
| "grad_norm": 0.9647531902604716, | |
| "learning_rate": 1.4555207918454662e-06, | |
| "loss": 0.3322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37204068899154663, | |
| "step": 3870, | |
| "valid_targets_mean": 2875.2, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 6.239935587761675, | |
| "grad_norm": 0.878124106513052, | |
| "learning_rate": 1.4255949732469309e-06, | |
| "loss": 0.3147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.326835960149765, | |
| "step": 3875, | |
| "valid_targets_mean": 2933.1, | |
| "valid_targets_min": 1485 | |
| }, | |
| { | |
| "epoch": 6.247987117552335, | |
| "grad_norm": 0.9256224608559495, | |
| "learning_rate": 1.3959686267633488e-06, | |
| "loss": 0.3116, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32315731048583984, | |
| "step": 3880, | |
| "valid_targets_mean": 2699.5, | |
| "valid_targets_min": 1170 | |
| }, | |
| { | |
| "epoch": 6.256038647342995, | |
| "grad_norm": 0.7806676732625024, | |
| "learning_rate": 1.3666422300554905e-06, | |
| "loss": 0.3044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2884427607059479, | |
| "step": 3885, | |
| "valid_targets_mean": 3932.2, | |
| "valid_targets_min": 1108 | |
| }, | |
| { | |
| "epoch": 6.264090177133656, | |
| "grad_norm": 0.8683190375235182, | |
| "learning_rate": 1.3376162559480822e-06, | |
| "loss": 0.3071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3049579858779907, | |
| "step": 3890, | |
| "valid_targets_mean": 2747.4, | |
| "valid_targets_min": 1205 | |
| }, | |
| { | |
| "epoch": 6.272141706924316, | |
| "grad_norm": 0.8710255989708383, | |
| "learning_rate": 1.308891172422193e-06, | |
| "loss": 0.3008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33199816942214966, | |
| "step": 3895, | |
| "valid_targets_mean": 3029.8, | |
| "valid_targets_min": 1355 | |
| }, | |
| { | |
| "epoch": 6.280193236714976, | |
| "grad_norm": 0.8477552990968392, | |
| "learning_rate": 1.2804674426076757e-06, | |
| "loss": 0.3069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.325872540473938, | |
| "step": 3900, | |
| "valid_targets_mean": 3227.2, | |
| "valid_targets_min": 1427 | |
| }, | |
| { | |
| "epoch": 6.2882447665056365, | |
| "grad_norm": 0.7965348453218628, | |
| "learning_rate": 1.2523455247757088e-06, | |
| "loss": 0.3048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30131930112838745, | |
| "step": 3905, | |
| "valid_targets_mean": 3224.4, | |
| "valid_targets_min": 1045 | |
| }, | |
| { | |
| "epoch": 6.296296296296296, | |
| "grad_norm": 0.7395568722630161, | |
| "learning_rate": 1.224525872331408e-06, | |
| "loss": 0.3113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3066796660423279, | |
| "step": 3910, | |
| "valid_targets_mean": 4021.2, | |
| "valid_targets_min": 1118 | |
| }, | |
| { | |
| "epoch": 6.304347826086957, | |
| "grad_norm": 0.8162687764572345, | |
| "learning_rate": 1.1970089338065071e-06, | |
| "loss": 0.3041, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2592276334762573, | |
| "step": 3915, | |
| "valid_targets_mean": 3145.9, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 6.312399355877616, | |
| "grad_norm": 0.8944188897503945, | |
| "learning_rate": 1.1697951528521422e-06, | |
| "loss": 0.2928, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3102479875087738, | |
| "step": 3920, | |
| "valid_targets_mean": 2583.8, | |
| "valid_targets_min": 1167 | |
| }, | |
| { | |
| "epoch": 6.320450885668277, | |
| "grad_norm": 0.8873873245958489, | |
| "learning_rate": 1.1428849682316766e-06, | |
| "loss": 0.3162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2901550829410553, | |
| "step": 3925, | |
| "valid_targets_mean": 3109.6, | |
| "valid_targets_min": 1268 | |
| }, | |
| { | |
| "epoch": 6.328502415458937, | |
| "grad_norm": 0.8886136259904959, | |
| "learning_rate": 1.116278813813647e-06, | |
| "loss": 0.3118, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2849825918674469, | |
| "step": 3930, | |
| "valid_targets_mean": 2516.6, | |
| "valid_targets_min": 1159 | |
| }, | |
| { | |
| "epoch": 6.336553945249597, | |
| "grad_norm": 0.8340658998428512, | |
| "learning_rate": 1.08997711856476e-06, | |
| "loss": 0.3062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31567925214767456, | |
| "step": 3935, | |
| "valid_targets_mean": 4016.8, | |
| "valid_targets_min": 1642 | |
| }, | |
| { | |
| "epoch": 6.344605475040257, | |
| "grad_norm": 0.8397362085202228, | |
| "learning_rate": 1.0639803065429755e-06, | |
| "loss": 0.3254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3202289938926697, | |
| "step": 3940, | |
| "valid_targets_mean": 3258.7, | |
| "valid_targets_min": 1158 | |
| }, | |
| { | |
| "epoch": 6.352657004830918, | |
| "grad_norm": 0.8392706087036921, | |
| "learning_rate": 1.0382887968906718e-06, | |
| "loss": 0.3012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3209483027458191, | |
| "step": 3945, | |
| "valid_targets_mean": 3767.5, | |
| "valid_targets_min": 1430 | |
| }, | |
| { | |
| "epoch": 6.360708534621578, | |
| "grad_norm": 0.8201489913983286, | |
| "learning_rate": 1.012903003827883e-06, | |
| "loss": 0.3229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3404323160648346, | |
| "step": 3950, | |
| "valid_targets_mean": 3205.4, | |
| "valid_targets_min": 977 | |
| }, | |
| { | |
| "epoch": 6.368760064412238, | |
| "grad_norm": 0.811177676991476, | |
| "learning_rate": 9.87823336645628e-07, | |
| "loss": 0.2924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27225881814956665, | |
| "step": 3955, | |
| "valid_targets_mean": 3383.2, | |
| "valid_targets_min": 1706 | |
| }, | |
| { | |
| "epoch": 6.3768115942028984, | |
| "grad_norm": 0.858309663982121, | |
| "learning_rate": 9.630501996993091e-07, | |
| "loss": 0.2884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2884863018989563, | |
| "step": 3960, | |
| "valid_targets_mean": 2897.8, | |
| "valid_targets_min": 1102 | |
| }, | |
| { | |
| "epoch": 6.384863123993559, | |
| "grad_norm": 0.9879313892597844, | |
| "learning_rate": 9.385839924021844e-07, | |
| "loss": 0.2884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3102876842021942, | |
| "step": 3965, | |
| "valid_targets_mean": 2376.7, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 6.392914653784219, | |
| "grad_norm": 0.8341622182276683, | |
| "learning_rate": 9.144251092189416e-07, | |
| "loss": 0.2908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29059672355651855, | |
| "step": 3970, | |
| "valid_targets_mean": 3869.4, | |
| "valid_targets_min": 1445 | |
| }, | |
| { | |
| "epoch": 6.400966183574879, | |
| "grad_norm": 0.6305412459076566, | |
| "learning_rate": 8.905739396593316e-07, | |
| "loss": 0.2888, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2593904137611389, | |
| "step": 3975, | |
| "valid_targets_mean": 4852.6, | |
| "valid_targets_min": 1536 | |
| }, | |
| { | |
| "epoch": 6.4090177133655395, | |
| "grad_norm": 0.8415630056656381, | |
| "learning_rate": 8.670308682718853e-07, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30260005593299866, | |
| "step": 3980, | |
| "valid_targets_mean": 3675.9, | |
| "valid_targets_min": 1260 | |
| }, | |
| { | |
| "epoch": 6.4170692431562, | |
| "grad_norm": 0.7289917763669883, | |
| "learning_rate": 8.437962746377204e-07, | |
| "loss": 0.284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25300300121307373, | |
| "step": 3985, | |
| "valid_targets_mean": 3460.8, | |
| "valid_targets_min": 406 | |
| }, | |
| { | |
| "epoch": 6.42512077294686, | |
| "grad_norm": 0.8183662125493177, | |
| "learning_rate": 8.208705333644129e-07, | |
| "loss": 0.2618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27502337098121643, | |
| "step": 3990, | |
| "valid_targets_mean": 4104.0, | |
| "valid_targets_min": 1569 | |
| }, | |
| { | |
| "epoch": 6.43317230273752, | |
| "grad_norm": 0.8025243264792566, | |
| "learning_rate": 7.982540140799688e-07, | |
| "loss": 0.2676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2634972333908081, | |
| "step": 3995, | |
| "valid_targets_mean": 3000.4, | |
| "valid_targets_min": 1169 | |
| }, | |
| { | |
| "epoch": 6.4412238325281805, | |
| "grad_norm": 0.8460006631505339, | |
| "learning_rate": 7.759470814268489e-07, | |
| "loss": 0.2736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3205861449241638, | |
| "step": 4000, | |
| "valid_targets_mean": 3155.8, | |
| "valid_targets_min": 1407 | |
| }, | |
| { | |
| "epoch": 6.449275362318841, | |
| "grad_norm": 0.8293418081000622, | |
| "learning_rate": 7.539500950561063e-07, | |
| "loss": 0.2704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.261262983083725, | |
| "step": 4005, | |
| "valid_targets_mean": 2876.1, | |
| "valid_targets_min": 1097 | |
| }, | |
| { | |
| "epoch": 6.457326892109501, | |
| "grad_norm": 0.8476655987165361, | |
| "learning_rate": 7.322634096215831e-07, | |
| "loss": 0.2798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2980925440788269, | |
| "step": 4010, | |
| "valid_targets_mean": 3099.4, | |
| "valid_targets_min": 1258 | |
| }, | |
| { | |
| "epoch": 6.465378421900161, | |
| "grad_norm": 0.8876078091742649, | |
| "learning_rate": 7.108873747741807e-07, | |
| "loss": 0.267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28537583351135254, | |
| "step": 4015, | |
| "valid_targets_mean": 2574.1, | |
| "valid_targets_min": 1351 | |
| }, | |
| { | |
| "epoch": 6.473429951690822, | |
| "grad_norm": 0.8957582444852635, | |
| "learning_rate": 6.898223351562405e-07, | |
| "loss": 0.2985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32109585404396057, | |
| "step": 4020, | |
| "valid_targets_mean": 3223.3, | |
| "valid_targets_min": 1314 | |
| }, | |
| { | |
| "epoch": 6.481481481481482, | |
| "grad_norm": 0.764077364410412, | |
| "learning_rate": 6.690686303959748e-07, | |
| "loss": 0.2577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2634509801864624, | |
| "step": 4025, | |
| "valid_targets_mean": 3132.0, | |
| "valid_targets_min": 1272 | |
| }, | |
| { | |
| "epoch": 6.489533011272142, | |
| "grad_norm": 0.7443505087806759, | |
| "learning_rate": 6.48626595101991e-07, | |
| "loss": 0.2828, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2569301724433899, | |
| "step": 4030, | |
| "valid_targets_mean": 3755.3, | |
| "valid_targets_min": 1230 | |
| }, | |
| { | |
| "epoch": 6.4975845410628015, | |
| "grad_norm": 0.7930493772415029, | |
| "learning_rate": 6.284965588579028e-07, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24235209822654724, | |
| "step": 4035, | |
| "valid_targets_mean": 3029.9, | |
| "valid_targets_min": 1590 | |
| }, | |
| { | |
| "epoch": 6.505636070853463, | |
| "grad_norm": 0.7138369856169103, | |
| "learning_rate": 6.08678846217019e-07, | |
| "loss": 0.2706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27324530482292175, | |
| "step": 4040, | |
| "valid_targets_mean": 3930.0, | |
| "valid_targets_min": 1734 | |
| }, | |
| { | |
| "epoch": 6.513687600644122, | |
| "grad_norm": 0.7841606768019419, | |
| "learning_rate": 5.891737766970984e-07, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32244470715522766, | |
| "step": 4045, | |
| "valid_targets_mean": 3632.3, | |
| "valid_targets_min": 1021 | |
| }, | |
| { | |
| "epoch": 6.521739130434782, | |
| "grad_norm": 0.761125195078748, | |
| "learning_rate": 5.699816647752077e-07, | |
| "loss": 0.2805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2867166996002197, | |
| "step": 4050, | |
| "valid_targets_mean": 3690.8, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 6.5297906602254425, | |
| "grad_norm": 0.9595718427005088, | |
| "learning_rate": 5.511028198826496e-07, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2752280831336975, | |
| "step": 4055, | |
| "valid_targets_mean": 2800.6, | |
| "valid_targets_min": 485 | |
| }, | |
| { | |
| "epoch": 6.537842190016103, | |
| "grad_norm": 0.7724065995439069, | |
| "learning_rate": 5.32537546399976e-07, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3199930787086487, | |
| "step": 4060, | |
| "valid_targets_mean": 3744.1, | |
| "valid_targets_min": 1267 | |
| }, | |
| { | |
| "epoch": 6.545893719806763, | |
| "grad_norm": 0.7955896348465847, | |
| "learning_rate": 5.142861436520763e-07, | |
| "loss": 0.286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26323774456977844, | |
| "step": 4065, | |
| "valid_targets_mean": 3365.1, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 6.553945249597423, | |
| "grad_norm": 0.7123095433809674, | |
| "learning_rate": 4.963489059033477e-07, | |
| "loss": 0.2572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2584189176559448, | |
| "step": 4070, | |
| "valid_targets_mean": 3473.1, | |
| "valid_targets_min": 1657 | |
| }, | |
| { | |
| "epoch": 6.561996779388084, | |
| "grad_norm": 0.8106421482681555, | |
| "learning_rate": 4.787261223529616e-07, | |
| "loss": 0.2639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26411914825439453, | |
| "step": 4075, | |
| "valid_targets_mean": 2840.8, | |
| "valid_targets_min": 1494 | |
| }, | |
| { | |
| "epoch": 6.570048309178744, | |
| "grad_norm": 0.7944422290785246, | |
| "learning_rate": 4.6141807713019793e-07, | |
| "loss": 0.2772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28050708770751953, | |
| "step": 4080, | |
| "valid_targets_mean": 2815.1, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 6.578099838969404, | |
| "grad_norm": 0.7829124652037258, | |
| "learning_rate": 4.444250492898539e-07, | |
| "loss": 0.2555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2613917589187622, | |
| "step": 4085, | |
| "valid_targets_mean": 3228.2, | |
| "valid_targets_min": 1394 | |
| }, | |
| { | |
| "epoch": 6.586151368760064, | |
| "grad_norm": 0.785576038884685, | |
| "learning_rate": 4.277473128077625e-07, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2885136008262634, | |
| "step": 4090, | |
| "valid_targets_mean": 3174.3, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 6.594202898550725, | |
| "grad_norm": 0.7531252267510662, | |
| "learning_rate": 4.113851365763544e-07, | |
| "loss": 0.2859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2605210542678833, | |
| "step": 4095, | |
| "valid_targets_mean": 3438.4, | |
| "valid_targets_min": 1124 | |
| }, | |
| { | |
| "epoch": 6.602254428341385, | |
| "grad_norm": 0.8483462893898233, | |
| "learning_rate": 3.953387844003431e-07, | |
| "loss": 0.2543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24956530332565308, | |
| "step": 4100, | |
| "valid_targets_mean": 2587.6, | |
| "valid_targets_min": 1218 | |
| }, | |
| { | |
| "epoch": 6.610305958132045, | |
| "grad_norm": 0.8250120431362822, | |
| "learning_rate": 3.7960851499245554e-07, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2679558992385864, | |
| "step": 4105, | |
| "valid_targets_mean": 2969.8, | |
| "valid_targets_min": 1309 | |
| }, | |
| { | |
| "epoch": 6.618357487922705, | |
| "grad_norm": 0.9900756494723584, | |
| "learning_rate": 3.6419458196926825e-07, | |
| "loss": 0.2638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.276964396238327, | |
| "step": 4110, | |
| "valid_targets_mean": 3475.1, | |
| "valid_targets_min": 1128 | |
| }, | |
| { | |
| "epoch": 6.626409017713366, | |
| "grad_norm": 0.7872199755262294, | |
| "learning_rate": 3.4909723384712436e-07, | |
| "loss": 0.2567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2374078333377838, | |
| "step": 4115, | |
| "valid_targets_mean": 2925.9, | |
| "valid_targets_min": 1311 | |
| }, | |
| { | |
| "epoch": 6.634460547504026, | |
| "grad_norm": 0.7593732626595731, | |
| "learning_rate": 3.3431671403811207e-07, | |
| "loss": 0.2516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2528577148914337, | |
| "step": 4120, | |
| "valid_targets_mean": 3369.0, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 6.642512077294686, | |
| "grad_norm": 0.7712701520481997, | |
| "learning_rate": 3.198532608461524e-07, | |
| "loss": 0.2814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27561864256858826, | |
| "step": 4125, | |
| "valid_targets_mean": 3307.7, | |
| "valid_targets_min": 1172 | |
| }, | |
| { | |
| "epoch": 6.650563607085346, | |
| "grad_norm": 0.8293254928497902, | |
| "learning_rate": 3.0570710746314903e-07, | |
| "loss": 0.2668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2669210433959961, | |
| "step": 4130, | |
| "valid_targets_mean": 2625.4, | |
| "valid_targets_min": 1313 | |
| }, | |
| { | |
| "epoch": 6.658615136876007, | |
| "grad_norm": 0.9102984688640577, | |
| "learning_rate": 2.9187848196524205e-07, | |
| "loss": 0.2646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29154983162879944, | |
| "step": 4135, | |
| "valid_targets_mean": 3028.0, | |
| "valid_targets_min": 1602 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 0.7950386744860756, | |
| "learning_rate": 2.7836760730910464e-07, | |
| "loss": 0.2699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2758431136608124, | |
| "step": 4140, | |
| "valid_targets_mean": 3680.5, | |
| "valid_targets_min": 1441 | |
| }, | |
| { | |
| "epoch": 6.674718196457327, | |
| "grad_norm": 0.77773549922398, | |
| "learning_rate": 2.6517470132838117e-07, | |
| "loss": 0.2621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2587966322898865, | |
| "step": 4145, | |
| "valid_targets_mean": 3472.8, | |
| "valid_targets_min": 1029 | |
| }, | |
| { | |
| "epoch": 6.6827697262479875, | |
| "grad_norm": 0.8232297238523694, | |
| "learning_rate": 2.522999767301482e-07, | |
| "loss": 0.2689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.251497745513916, | |
| "step": 4150, | |
| "valid_targets_mean": 3390.6, | |
| "valid_targets_min": 1873 | |
| }, | |
| { | |
| "epoch": 6.690821256038648, | |
| "grad_norm": 0.8278987027552264, | |
| "learning_rate": 2.3974364109149886e-07, | |
| "loss": 0.2923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2831830382347107, | |
| "step": 4155, | |
| "valid_targets_mean": 3282.8, | |
| "valid_targets_min": 588 | |
| }, | |
| { | |
| "epoch": 6.698872785829307, | |
| "grad_norm": 0.7184574441839767, | |
| "learning_rate": 2.2750589685619495e-07, | |
| "loss": 0.2751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29470086097717285, | |
| "step": 4160, | |
| "valid_targets_mean": 4544.4, | |
| "valid_targets_min": 1616 | |
| }, | |
| { | |
| "epoch": 6.706924315619968, | |
| "grad_norm": 0.6991872186724033, | |
| "learning_rate": 2.1558694133139823e-07, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2618224620819092, | |
| "step": 4165, | |
| "valid_targets_mean": 3384.4, | |
| "valid_targets_min": 1236 | |
| }, | |
| { | |
| "epoch": 6.714975845410628, | |
| "grad_norm": 0.6730952845085181, | |
| "learning_rate": 2.039869666844929e-07, | |
| "loss": 0.2772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2868538200855255, | |
| "step": 4170, | |
| "valid_targets_mean": 4230.6, | |
| "valid_targets_min": 1229 | |
| }, | |
| { | |
| "epoch": 6.723027375201288, | |
| "grad_norm": 0.8142353289352747, | |
| "learning_rate": 1.9270615993998375e-07, | |
| "loss": 0.2628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27020856738090515, | |
| "step": 4175, | |
| "valid_targets_mean": 3359.7, | |
| "valid_targets_min": 1076 | |
| }, | |
| { | |
| "epoch": 6.731078904991948, | |
| "grad_norm": 0.6894366977045292, | |
| "learning_rate": 1.817447029764874e-07, | |
| "loss": 0.2735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2708439230918884, | |
| "step": 4180, | |
| "valid_targets_mean": 4327.3, | |
| "valid_targets_min": 1075 | |
| }, | |
| { | |
| "epoch": 6.739130434782608, | |
| "grad_norm": 0.6974543175965264, | |
| "learning_rate": 1.7110277252379238e-07, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23700222373008728, | |
| "step": 4185, | |
| "valid_targets_mean": 3812.0, | |
| "valid_targets_min": 1142 | |
| }, | |
| { | |
| "epoch": 6.747181964573269, | |
| "grad_norm": 0.902579622282395, | |
| "learning_rate": 1.607805401600149e-07, | |
| "loss": 0.2777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28798216581344604, | |
| "step": 4190, | |
| "valid_targets_mean": 2473.6, | |
| "valid_targets_min": 1208 | |
| }, | |
| { | |
| "epoch": 6.755233494363929, | |
| "grad_norm": 1.1198738028974389, | |
| "learning_rate": 1.5077817230883419e-07, | |
| "loss": 0.2306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24988670647144318, | |
| "step": 4195, | |
| "valid_targets_mean": 7490.2, | |
| "valid_targets_min": 2721 | |
| }, | |
| { | |
| "epoch": 6.763285024154589, | |
| "grad_norm": 0.6445241702724612, | |
| "learning_rate": 1.4109583023679706e-07, | |
| "loss": 0.1741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19134119153022766, | |
| "step": 4200, | |
| "valid_targets_mean": 6344.0, | |
| "valid_targets_min": 3073 | |
| }, | |
| { | |
| "epoch": 6.7713365539452495, | |
| "grad_norm": 0.6558044755460182, | |
| "learning_rate": 1.3173367005073545e-07, | |
| "loss": 0.1521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14993247389793396, | |
| "step": 4205, | |
| "valid_targets_mean": 5062.1, | |
| "valid_targets_min": 2985 | |
| }, | |
| { | |
| "epoch": 6.77938808373591, | |
| "grad_norm": 0.6428349765980677, | |
| "learning_rate": 1.2269184269523282e-07, | |
| "loss": 0.181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19283941388130188, | |
| "step": 4210, | |
| "valid_targets_mean": 5613.6, | |
| "valid_targets_min": 2620 | |
| }, | |
| { | |
| "epoch": 6.78743961352657, | |
| "grad_norm": 0.6404100414290556, | |
| "learning_rate": 1.1397049395020842e-07, | |
| "loss": 0.1639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16561368107795715, | |
| "step": 4215, | |
| "valid_targets_mean": 6100.1, | |
| "valid_targets_min": 3494 | |
| }, | |
| { | |
| "epoch": 6.79549114331723, | |
| "grad_norm": 0.5799011128340407, | |
| "learning_rate": 1.0556976442854805e-07, | |
| "loss": 0.1534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1338614821434021, | |
| "step": 4220, | |
| "valid_targets_mean": 5711.6, | |
| "valid_targets_min": 3761 | |
| }, | |
| { | |
| "epoch": 6.8035426731078905, | |
| "grad_norm": 0.5831529495090866, | |
| "learning_rate": 9.748978957385025e-08, | |
| "loss": 0.158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16792958974838257, | |
| "step": 4225, | |
| "valid_targets_mean": 5863.1, | |
| "valid_targets_min": 3178 | |
| }, | |
| { | |
| "epoch": 6.811594202898551, | |
| "grad_norm": 0.5618368648500537, | |
| "learning_rate": 8.9730699658237e-08, | |
| "loss": 0.1807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15672558546066284, | |
| "step": 4230, | |
| "valid_targets_mean": 6090.8, | |
| "valid_targets_min": 2870 | |
| }, | |
| { | |
| "epoch": 6.819645732689211, | |
| "grad_norm": 0.5753023449670881, | |
| "learning_rate": 8.229261978025316e-08, | |
| "loss": 0.179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14445531368255615, | |
| "step": 4235, | |
| "valid_targets_mean": 5746.5, | |
| "valid_targets_min": 3334 | |
| }, | |
| { | |
| "epoch": 6.827697262479871, | |
| "grad_norm": 0.6046612188056499, | |
| "learning_rate": 7.517566986285474e-08, | |
| "loss": 0.1688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16920645534992218, | |
| "step": 4240, | |
| "valid_targets_mean": 5464.9, | |
| "valid_targets_min": 3204 | |
| }, | |
| { | |
| "epoch": 6.835748792270532, | |
| "grad_norm": 0.566160926164469, | |
| "learning_rate": 6.837996465146823e-08, | |
| "loss": 0.1581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1559247374534607, | |
| "step": 4245, | |
| "valid_targets_mean": 5526.6, | |
| "valid_targets_min": 2793 | |
| }, | |
| { | |
| "epoch": 6.843800322061192, | |
| "grad_norm": 0.5934842203255924, | |
| "learning_rate": 6.190561371214321e-08, | |
| "loss": 0.1722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17836138606071472, | |
| "step": 4250, | |
| "valid_targets_mean": 6074.9, | |
| "valid_targets_min": 3285 | |
| }, | |
| { | |
| "epoch": 6.851851851851852, | |
| "grad_norm": 0.5989111745239429, | |
| "learning_rate": 5.575272142978927e-08, | |
| "loss": 0.1479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14026236534118652, | |
| "step": 4255, | |
| "valid_targets_mean": 5260.6, | |
| "valid_targets_min": 2292 | |
| }, | |
| { | |
| "epoch": 6.859903381642512, | |
| "grad_norm": 0.5852529999276449, | |
| "learning_rate": 4.992138700649074e-08, | |
| "loss": 0.163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16409310698509216, | |
| "step": 4260, | |
| "valid_targets_mean": 5752.9, | |
| "valid_targets_min": 4204 | |
| }, | |
| { | |
| "epoch": 6.867954911433173, | |
| "grad_norm": 0.6115217698676657, | |
| "learning_rate": 4.4411704459903506e-08, | |
| "loss": 0.1653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1484546661376953, | |
| "step": 4265, | |
| "valid_targets_mean": 4836.7, | |
| "valid_targets_min": 2111 | |
| }, | |
| { | |
| "epoch": 6.876006441223833, | |
| "grad_norm": 0.6316569084859304, | |
| "learning_rate": 3.92237626217451e-08, | |
| "loss": 0.1577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18417194485664368, | |
| "step": 4270, | |
| "valid_targets_mean": 6021.0, | |
| "valid_targets_min": 4439 | |
| }, | |
| { | |
| "epoch": 6.884057971014493, | |
| "grad_norm": 0.5659170506134107, | |
| "learning_rate": 3.435764513635809e-08, | |
| "loss": 0.1585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16125309467315674, | |
| "step": 4275, | |
| "valid_targets_mean": 5661.9, | |
| "valid_targets_min": 3607 | |
| }, | |
| { | |
| "epoch": 6.892109500805153, | |
| "grad_norm": 0.6183161652818728, | |
| "learning_rate": 2.9813430459364465e-08, | |
| "loss": 0.1506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16375675797462463, | |
| "step": 4280, | |
| "valid_targets_mean": 5420.4, | |
| "valid_targets_min": 3078 | |
| }, | |
| { | |
| "epoch": 6.900161030595813, | |
| "grad_norm": 0.598243453584514, | |
| "learning_rate": 2.5591191856397802e-08, | |
| "loss": 0.1538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15643054246902466, | |
| "step": 4285, | |
| "valid_targets_mean": 5750.4, | |
| "valid_targets_min": 2491 | |
| }, | |
| { | |
| "epoch": 6.908212560386474, | |
| "grad_norm": 0.5759364478429377, | |
| "learning_rate": 2.1690997401928593e-08, | |
| "loss": 0.1605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14383944869041443, | |
| "step": 4290, | |
| "valid_targets_mean": 5996.6, | |
| "valid_targets_min": 4004 | |
| }, | |
| { | |
| "epoch": 6.916264090177133, | |
| "grad_norm": 0.5552887684264204, | |
| "learning_rate": 1.811290997815851e-08, | |
| "loss": 0.1586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14651936292648315, | |
| "step": 4295, | |
| "valid_targets_mean": 6200.9, | |
| "valid_targets_min": 3148 | |
| }, | |
| { | |
| "epoch": 6.9243156199677935, | |
| "grad_norm": 0.49927201422387957, | |
| "learning_rate": 1.485698727400564e-08, | |
| "loss": 0.1534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13894528150558472, | |
| "step": 4300, | |
| "valid_targets_mean": 6472.3, | |
| "valid_targets_min": 3581 | |
| }, | |
| { | |
| "epoch": 6.932367149758454, | |
| "grad_norm": 0.6237894773842607, | |
| "learning_rate": 1.1923281784185226e-08, | |
| "loss": 0.1677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22623667120933533, | |
| "step": 4305, | |
| "valid_targets_mean": 7532.8, | |
| "valid_targets_min": 3206 | |
| }, | |
| { | |
| "epoch": 6.940418679549114, | |
| "grad_norm": 0.5964161951339775, | |
| "learning_rate": 9.311840808357009e-09, | |
| "loss": 0.1687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1940787136554718, | |
| "step": 4310, | |
| "valid_targets_mean": 5952.9, | |
| "valid_targets_min": 3304 | |
| }, | |
| { | |
| "epoch": 6.948470209339774, | |
| "grad_norm": 0.5723446720622757, | |
| "learning_rate": 7.022706450354744e-09, | |
| "loss": 0.1532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15612125396728516, | |
| "step": 4315, | |
| "valid_targets_mean": 5735.1, | |
| "valid_targets_min": 3368 | |
| }, | |
| { | |
| "epoch": 6.956521739130435, | |
| "grad_norm": 0.6247370179630106, | |
| "learning_rate": 5.055915617522278e-09, | |
| "loss": 0.1553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16278734803199768, | |
| "step": 4320, | |
| "valid_targets_mean": 5506.7, | |
| "valid_targets_min": 2908 | |
| }, | |
| { | |
| "epoch": 6.964573268921095, | |
| "grad_norm": 0.6002549871414108, | |
| "learning_rate": 3.411500020109593e-09, | |
| "loss": 0.1588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16616703569889069, | |
| "step": 4325, | |
| "valid_targets_mean": 5777.9, | |
| "valid_targets_min": 3451 | |
| }, | |
| { | |
| "epoch": 6.972624798711755, | |
| "grad_norm": 0.4927861563779905, | |
| "learning_rate": 2.08948617075988e-09, | |
| "loss": 0.1497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13573740422725677, | |
| "step": 4330, | |
| "valid_targets_mean": 6045.9, | |
| "valid_targets_min": 2967 | |
| }, | |
| { | |
| "epoch": 6.980676328502415, | |
| "grad_norm": 0.5443175862615581, | |
| "learning_rate": 1.0898953840898786e-09, | |
| "loss": 0.151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1544107049703598, | |
| "step": 4335, | |
| "valid_targets_mean": 6113.4, | |
| "valid_targets_min": 3912 | |
| }, | |
| { | |
| "epoch": 6.988727858293076, | |
| "grad_norm": 0.5346570746956858, | |
| "learning_rate": 4.127437763390418e-10, | |
| "loss": 0.1656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1397796869277954, | |
| "step": 4340, | |
| "valid_targets_mean": 5837.0, | |
| "valid_targets_min": 3528 | |
| }, | |
| { | |
| "epoch": 6.996779388083736, | |
| "grad_norm": 0.5722567869143517, | |
| "learning_rate": 5.804226511196831e-11, | |
| "loss": 0.1502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16464683413505554, | |
| "step": 4345, | |
| "valid_targets_mean": 6356.6, | |
| "valid_targets_min": 3725 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18215620517730713, | |
| "step": 4347, | |
| "total_flos": 1416797650550784.0, | |
| "train_loss": 0.3156141554908915, | |
| "train_runtime": 23294.0349, | |
| "train_samples_per_second": 2.983, | |
| "train_steps_per_second": 0.187, | |
| "valid_targets_mean": 6658.9, | |
| "valid_targets_min": 2494 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4347, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1416797650550784.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |