Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use DCAgent/a1-agenttuning_db with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DCAgent/a1-agenttuning_db with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DCAgent/a1-agenttuning_db") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("DCAgent/a1-agenttuning_db") model = AutoModelForCausalLM.from_pretrained("DCAgent/a1-agenttuning_db") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use DCAgent/a1-agenttuning_db with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DCAgent/a1-agenttuning_db" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-agenttuning_db", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/DCAgent/a1-agenttuning_db
- SGLang
How to use DCAgent/a1-agenttuning_db with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DCAgent/a1-agenttuning_db" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-agenttuning_db", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DCAgent/a1-agenttuning_db" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-agenttuning_db", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use DCAgent/a1-agenttuning_db with Docker Model Runner:
docker model run hf.co/DCAgent/a1-agenttuning_db
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 4375, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 41.48267217588327, | |
| "learning_rate": 3.6529680365296803e-07, | |
| "loss": 1.1093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 1.0744223594665527, | |
| "step": 5, | |
| "valid_targets_mean": 5344.6, | |
| "valid_targets_min": 4753 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 42.52472136972466, | |
| "learning_rate": 8.219178082191781e-07, | |
| "loss": 1.0867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 1.1271662712097168, | |
| "step": 10, | |
| "valid_targets_mean": 5144.6, | |
| "valid_targets_min": 4308 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 36.36384573086545, | |
| "learning_rate": 1.278538812785388e-06, | |
| "loss": 1.0403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.973920464515686, | |
| "step": 15, | |
| "valid_targets_mean": 5821.7, | |
| "valid_targets_min": 4699 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 27.781760950256945, | |
| "learning_rate": 1.7351598173515982e-06, | |
| "loss": 0.9402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.9180818200111389, | |
| "step": 20, | |
| "valid_targets_mean": 5087.6, | |
| "valid_targets_min": 4594 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 12.355719861374782, | |
| "learning_rate": 2.191780821917808e-06, | |
| "loss": 0.8414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.7690733671188354, | |
| "step": 25, | |
| "valid_targets_mean": 5578.6, | |
| "valid_targets_min": 4697 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 5.787558122095733, | |
| "learning_rate": 2.6484018264840183e-06, | |
| "loss": 0.7263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6984169483184814, | |
| "step": 30, | |
| "valid_targets_mean": 5860.6, | |
| "valid_targets_min": 4661 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 3.229060374203213, | |
| "learning_rate": 3.1050228310502285e-06, | |
| "loss": 0.694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6726365089416504, | |
| "step": 35, | |
| "valid_targets_mean": 5143.1, | |
| "valid_targets_min": 4364 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 2.207626507677227, | |
| "learning_rate": 3.5616438356164386e-06, | |
| "loss": 0.6432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6480756998062134, | |
| "step": 40, | |
| "valid_targets_mean": 5455.8, | |
| "valid_targets_min": 4723 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 1.7410719786322346, | |
| "learning_rate": 4.018264840182649e-06, | |
| "loss": 0.6224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5961447954177856, | |
| "step": 45, | |
| "valid_targets_mean": 5163.5, | |
| "valid_targets_min": 4653 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.2859706452093995, | |
| "learning_rate": 4.4748858447488585e-06, | |
| "loss": 0.6021, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5773907899856567, | |
| "step": 50, | |
| "valid_targets_mean": 6126.1, | |
| "valid_targets_min": 4695 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 1.0641543008915124, | |
| "learning_rate": 4.931506849315069e-06, | |
| "loss": 0.5795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5590709447860718, | |
| "step": 55, | |
| "valid_targets_mean": 5600.0, | |
| "valid_targets_min": 4520 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.9130219930407264, | |
| "learning_rate": 5.388127853881279e-06, | |
| "loss": 0.5563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.538820207118988, | |
| "step": 60, | |
| "valid_targets_mean": 5120.5, | |
| "valid_targets_min": 4645 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.8072425983577204, | |
| "learning_rate": 5.8447488584474885e-06, | |
| "loss": 0.5388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5388973951339722, | |
| "step": 65, | |
| "valid_targets_mean": 5509.6, | |
| "valid_targets_min": 4190 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.7042491950885484, | |
| "learning_rate": 6.301369863013699e-06, | |
| "loss": 0.5207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5141251087188721, | |
| "step": 70, | |
| "valid_targets_mean": 5484.8, | |
| "valid_targets_min": 4547 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.6856928593070973, | |
| "learning_rate": 6.757990867579909e-06, | |
| "loss": 0.5003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.48507508635520935, | |
| "step": 75, | |
| "valid_targets_mean": 5222.2, | |
| "valid_targets_min": 4540 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.6451077420191259, | |
| "learning_rate": 7.214611872146119e-06, | |
| "loss": 0.4648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4541280269622803, | |
| "step": 80, | |
| "valid_targets_mean": 5086.8, | |
| "valid_targets_min": 4759 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.6395528199729872, | |
| "learning_rate": 7.671232876712329e-06, | |
| "loss": 0.4682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47132256627082825, | |
| "step": 85, | |
| "valid_targets_mean": 5097.5, | |
| "valid_targets_min": 4644 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.5963768078264076, | |
| "learning_rate": 8.127853881278539e-06, | |
| "loss": 0.4426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4383199214935303, | |
| "step": 90, | |
| "valid_targets_mean": 5104.0, | |
| "valid_targets_min": 4467 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 1.050450828316397, | |
| "learning_rate": 8.584474885844748e-06, | |
| "loss": 0.441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4402307868003845, | |
| "step": 95, | |
| "valid_targets_mean": 5362.3, | |
| "valid_targets_min": 4578 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.5422721096483499, | |
| "learning_rate": 9.04109589041096e-06, | |
| "loss": 0.4249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4008721709251404, | |
| "step": 100, | |
| "valid_targets_mean": 5205.2, | |
| "valid_targets_min": 4748 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.5381734091262664, | |
| "learning_rate": 9.49771689497717e-06, | |
| "loss": 0.4043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40058964490890503, | |
| "step": 105, | |
| "valid_targets_mean": 5629.2, | |
| "valid_targets_min": 4405 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.5638734527181037, | |
| "learning_rate": 9.95433789954338e-06, | |
| "loss": 0.3936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3704482316970825, | |
| "step": 110, | |
| "valid_targets_mean": 5082.3, | |
| "valid_targets_min": 4500 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.5802623531574952, | |
| "learning_rate": 1.0410958904109589e-05, | |
| "loss": 0.3818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37275075912475586, | |
| "step": 115, | |
| "valid_targets_mean": 5028.9, | |
| "valid_targets_min": 4339 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.5790409357550212, | |
| "learning_rate": 1.08675799086758e-05, | |
| "loss": 0.3878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37736862897872925, | |
| "step": 120, | |
| "valid_targets_mean": 5552.3, | |
| "valid_targets_min": 4798 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.5503637009399802, | |
| "learning_rate": 1.132420091324201e-05, | |
| "loss": 0.3734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3660987615585327, | |
| "step": 125, | |
| "valid_targets_mean": 4975.8, | |
| "valid_targets_min": 4565 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.570961100711023, | |
| "learning_rate": 1.178082191780822e-05, | |
| "loss": 0.3777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38703659176826477, | |
| "step": 130, | |
| "valid_targets_mean": 5782.8, | |
| "valid_targets_min": 4667 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.9417323725658978, | |
| "learning_rate": 1.223744292237443e-05, | |
| "loss": 0.3652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.352072149515152, | |
| "step": 135, | |
| "valid_targets_mean": 5190.6, | |
| "valid_targets_min": 4736 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.6293383015972439, | |
| "learning_rate": 1.2694063926940641e-05, | |
| "loss": 0.3635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37142515182495117, | |
| "step": 140, | |
| "valid_targets_mean": 5122.1, | |
| "valid_targets_min": 4428 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.5802320067314515, | |
| "learning_rate": 1.3150684931506849e-05, | |
| "loss": 0.3546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35850074887275696, | |
| "step": 145, | |
| "valid_targets_mean": 5028.2, | |
| "valid_targets_min": 4484 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.6073320113946218, | |
| "learning_rate": 1.360730593607306e-05, | |
| "loss": 0.3547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3670348525047302, | |
| "step": 150, | |
| "valid_targets_mean": 5141.9, | |
| "valid_targets_min": 4888 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.6088108890614897, | |
| "learning_rate": 1.406392694063927e-05, | |
| "loss": 0.3448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33971989154815674, | |
| "step": 155, | |
| "valid_targets_mean": 4916.2, | |
| "valid_targets_min": 4437 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.6209901065703772, | |
| "learning_rate": 1.4520547945205482e-05, | |
| "loss": 0.3474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34659576416015625, | |
| "step": 160, | |
| "valid_targets_mean": 5073.0, | |
| "valid_targets_min": 4560 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.6014676883165583, | |
| "learning_rate": 1.497716894977169e-05, | |
| "loss": 0.3457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33949244022369385, | |
| "step": 165, | |
| "valid_targets_mean": 5042.2, | |
| "valid_targets_min": 4619 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.6239447262653012, | |
| "learning_rate": 1.54337899543379e-05, | |
| "loss": 0.3468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34782153367996216, | |
| "step": 170, | |
| "valid_targets_mean": 5183.5, | |
| "valid_targets_min": 4753 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.6548069365108764, | |
| "learning_rate": 1.589041095890411e-05, | |
| "loss": 0.3423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33342432975769043, | |
| "step": 175, | |
| "valid_targets_mean": 5068.6, | |
| "valid_targets_min": 4431 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.5967771751046361, | |
| "learning_rate": 1.634703196347032e-05, | |
| "loss": 0.3334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33005160093307495, | |
| "step": 180, | |
| "valid_targets_mean": 5033.6, | |
| "valid_targets_min": 4378 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.6135754826741349, | |
| "learning_rate": 1.680365296803653e-05, | |
| "loss": 0.3378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3495161235332489, | |
| "step": 185, | |
| "valid_targets_mean": 5340.8, | |
| "valid_targets_min": 4548 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.5489684931005692, | |
| "learning_rate": 1.726027397260274e-05, | |
| "loss": 0.3303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33336079120635986, | |
| "step": 190, | |
| "valid_targets_mean": 5938.4, | |
| "valid_targets_min": 4728 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.6748282376959986, | |
| "learning_rate": 1.771689497716895e-05, | |
| "loss": 0.3339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34268665313720703, | |
| "step": 195, | |
| "valid_targets_mean": 5557.3, | |
| "valid_targets_min": 4542 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.664951324661493, | |
| "learning_rate": 1.8173515981735163e-05, | |
| "loss": 0.3263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3339554965496063, | |
| "step": 200, | |
| "valid_targets_mean": 5046.9, | |
| "valid_targets_min": 4331 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.6062415357232775, | |
| "learning_rate": 1.863013698630137e-05, | |
| "loss": 0.3293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3297001123428345, | |
| "step": 205, | |
| "valid_targets_mean": 6081.2, | |
| "valid_targets_min": 4859 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.6307853362000495, | |
| "learning_rate": 1.9086757990867582e-05, | |
| "loss": 0.3318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34066876769065857, | |
| "step": 210, | |
| "valid_targets_mean": 5621.7, | |
| "valid_targets_min": 4453 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.5957249141636137, | |
| "learning_rate": 1.954337899543379e-05, | |
| "loss": 0.3255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.329799085855484, | |
| "step": 215, | |
| "valid_targets_mean": 5076.2, | |
| "valid_targets_min": 4187 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.6500261678623673, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3213668167591095, | |
| "step": 220, | |
| "valid_targets_mean": 5095.1, | |
| "valid_targets_min": 4630 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.6659143977408585, | |
| "learning_rate": 2.045662100456621e-05, | |
| "loss": 0.3224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3278706967830658, | |
| "step": 225, | |
| "valid_targets_mean": 5207.9, | |
| "valid_targets_min": 4612 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.6306846194009973, | |
| "learning_rate": 2.0913242009132424e-05, | |
| "loss": 0.3275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3368001878261566, | |
| "step": 230, | |
| "valid_targets_mean": 5141.5, | |
| "valid_targets_min": 4579 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.6136656613987913, | |
| "learning_rate": 2.1369863013698632e-05, | |
| "loss": 0.3159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30407845973968506, | |
| "step": 235, | |
| "valid_targets_mean": 4947.9, | |
| "valid_targets_min": 4524 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.5882882056289576, | |
| "learning_rate": 2.182648401826484e-05, | |
| "loss": 0.314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30712687969207764, | |
| "step": 240, | |
| "valid_targets_mean": 5277.5, | |
| "valid_targets_min": 4985 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.6792390820549776, | |
| "learning_rate": 2.2283105022831052e-05, | |
| "loss": 0.307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3128761649131775, | |
| "step": 245, | |
| "valid_targets_mean": 5452.2, | |
| "valid_targets_min": 4492 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.5952280582180847, | |
| "learning_rate": 2.2739726027397263e-05, | |
| "loss": 0.3196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31811392307281494, | |
| "step": 250, | |
| "valid_targets_mean": 5191.4, | |
| "valid_targets_min": 4659 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.553861122120998, | |
| "learning_rate": 2.3196347031963475e-05, | |
| "loss": 0.3285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34972524642944336, | |
| "step": 255, | |
| "valid_targets_mean": 6231.8, | |
| "valid_targets_min": 4974 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.5976105949318701, | |
| "learning_rate": 2.3652968036529683e-05, | |
| "loss": 0.32, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3123967945575714, | |
| "step": 260, | |
| "valid_targets_mean": 5537.2, | |
| "valid_targets_min": 4520 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 0.6283023799511579, | |
| "learning_rate": 2.410958904109589e-05, | |
| "loss": 0.3096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.304302841424942, | |
| "step": 265, | |
| "valid_targets_mean": 5917.9, | |
| "valid_targets_min": 4658 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.5755727059026366, | |
| "learning_rate": 2.4566210045662106e-05, | |
| "loss": 0.3155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30874377489089966, | |
| "step": 270, | |
| "valid_targets_mean": 5225.2, | |
| "valid_targets_min": 4551 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.6066646651524692, | |
| "learning_rate": 2.5022831050228314e-05, | |
| "loss": 0.3131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3122427463531494, | |
| "step": 275, | |
| "valid_targets_mean": 5139.6, | |
| "valid_targets_min": 4495 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.6105779746811376, | |
| "learning_rate": 2.547945205479452e-05, | |
| "loss": 0.3088, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3014758825302124, | |
| "step": 280, | |
| "valid_targets_mean": 5128.3, | |
| "valid_targets_min": 4672 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 0.5993220405983207, | |
| "learning_rate": 2.593607305936073e-05, | |
| "loss": 0.3005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3007251024246216, | |
| "step": 285, | |
| "valid_targets_mean": 5872.6, | |
| "valid_targets_min": 4373 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.6043205201545263, | |
| "learning_rate": 2.6392694063926944e-05, | |
| "loss": 0.3137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3071848452091217, | |
| "step": 290, | |
| "valid_targets_mean": 5066.6, | |
| "valid_targets_min": 4557 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 0.6209107225771124, | |
| "learning_rate": 2.6849315068493153e-05, | |
| "loss": 0.313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2957759499549866, | |
| "step": 295, | |
| "valid_targets_mean": 5574.6, | |
| "valid_targets_min": 4459 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.7010821387332346, | |
| "learning_rate": 2.7305936073059364e-05, | |
| "loss": 0.3054, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.305367648601532, | |
| "step": 300, | |
| "valid_targets_mean": 5012.4, | |
| "valid_targets_min": 4301 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 0.5914207395944693, | |
| "learning_rate": 2.7762557077625572e-05, | |
| "loss": 0.3072, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3117062449455261, | |
| "step": 305, | |
| "valid_targets_mean": 5830.9, | |
| "valid_targets_min": 4402 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.6574101565082576, | |
| "learning_rate": 2.8219178082191783e-05, | |
| "loss": 0.3051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29925376176834106, | |
| "step": 310, | |
| "valid_targets_mean": 5027.9, | |
| "valid_targets_min": 4557 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 0.5825922751564784, | |
| "learning_rate": 2.8675799086757995e-05, | |
| "loss": 0.3143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.296475887298584, | |
| "step": 315, | |
| "valid_targets_mean": 5498.8, | |
| "valid_targets_min": 4820 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.5981429387600079, | |
| "learning_rate": 2.9132420091324203e-05, | |
| "loss": 0.303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30361106991767883, | |
| "step": 320, | |
| "valid_targets_mean": 5333.4, | |
| "valid_targets_min": 4812 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.6352501446074462, | |
| "learning_rate": 2.958904109589041e-05, | |
| "loss": 0.3079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3079923987388611, | |
| "step": 325, | |
| "valid_targets_mean": 5235.1, | |
| "valid_targets_min": 4411 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.6141412078573231, | |
| "learning_rate": 3.0045662100456626e-05, | |
| "loss": 0.3107, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3160425126552582, | |
| "step": 330, | |
| "valid_targets_mean": 5018.9, | |
| "valid_targets_min": 2945 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 0.6191319310590888, | |
| "learning_rate": 3.0502283105022834e-05, | |
| "loss": 0.3058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.302371621131897, | |
| "step": 335, | |
| "valid_targets_mean": 5202.3, | |
| "valid_targets_min": 4643 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.5648586517092755, | |
| "learning_rate": 3.0958904109589045e-05, | |
| "loss": 0.3047, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2902504801750183, | |
| "step": 340, | |
| "valid_targets_mean": 5616.7, | |
| "valid_targets_min": 4672 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.6106871850574572, | |
| "learning_rate": 3.141552511415525e-05, | |
| "loss": 0.3019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2895923852920532, | |
| "step": 345, | |
| "valid_targets_mean": 4986.4, | |
| "valid_targets_min": 4533 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.647436092067867, | |
| "learning_rate": 3.187214611872147e-05, | |
| "loss": 0.3114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3162408471107483, | |
| "step": 350, | |
| "valid_targets_mean": 5294.6, | |
| "valid_targets_min": 4369 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 0.6056025256796437, | |
| "learning_rate": 3.2328767123287676e-05, | |
| "loss": 0.3039, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2989775836467743, | |
| "step": 355, | |
| "valid_targets_mean": 5238.6, | |
| "valid_targets_min": 4875 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.5975718097905182, | |
| "learning_rate": 3.2785388127853884e-05, | |
| "loss": 0.3058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2913365066051483, | |
| "step": 360, | |
| "valid_targets_mean": 5083.4, | |
| "valid_targets_min": 4568 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 0.7038548400945549, | |
| "learning_rate": 3.324200913242009e-05, | |
| "loss": 0.3001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2951727509498596, | |
| "step": 365, | |
| "valid_targets_mean": 5037.6, | |
| "valid_targets_min": 4648 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.5556234656501057, | |
| "learning_rate": 3.369863013698631e-05, | |
| "loss": 0.3006, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29591482877731323, | |
| "step": 370, | |
| "valid_targets_mean": 5071.1, | |
| "valid_targets_min": 4720 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.628123847086434, | |
| "learning_rate": 3.4155251141552515e-05, | |
| "loss": 0.3, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30407440662384033, | |
| "step": 375, | |
| "valid_targets_mean": 5449.8, | |
| "valid_targets_min": 4479 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.6686542851466162, | |
| "learning_rate": 3.461187214611872e-05, | |
| "loss": 0.2943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29530227184295654, | |
| "step": 380, | |
| "valid_targets_mean": 5117.3, | |
| "valid_targets_min": 4442 | |
| }, | |
| { | |
| "epoch": 0.616, | |
| "grad_norm": 0.6299620829469124, | |
| "learning_rate": 3.506849315068493e-05, | |
| "loss": 0.3063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29595834016799927, | |
| "step": 385, | |
| "valid_targets_mean": 5537.3, | |
| "valid_targets_min": 4621 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.5689884222530954, | |
| "learning_rate": 3.5525114155251146e-05, | |
| "loss": 0.2959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30083680152893066, | |
| "step": 390, | |
| "valid_targets_mean": 5410.8, | |
| "valid_targets_min": 4318 | |
| }, | |
| { | |
| "epoch": 0.632, | |
| "grad_norm": 0.5393422719779996, | |
| "learning_rate": 3.5981735159817354e-05, | |
| "loss": 0.2986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29868602752685547, | |
| "step": 395, | |
| "valid_targets_mean": 5314.3, | |
| "valid_targets_min": 4546 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.613915509031289, | |
| "learning_rate": 3.643835616438356e-05, | |
| "loss": 0.2971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.297812283039093, | |
| "step": 400, | |
| "valid_targets_mean": 5712.1, | |
| "valid_targets_min": 4784 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 0.5233061954472192, | |
| "learning_rate": 3.689497716894977e-05, | |
| "loss": 0.3061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29525867104530334, | |
| "step": 405, | |
| "valid_targets_mean": 5602.2, | |
| "valid_targets_min": 4662 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.5854346601241329, | |
| "learning_rate": 3.7351598173515985e-05, | |
| "loss": 0.2893, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30001023411750793, | |
| "step": 410, | |
| "valid_targets_mean": 5574.3, | |
| "valid_targets_min": 4538 | |
| }, | |
| { | |
| "epoch": 0.664, | |
| "grad_norm": 0.6046664345899331, | |
| "learning_rate": 3.780821917808219e-05, | |
| "loss": 0.2959, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2902340292930603, | |
| "step": 415, | |
| "valid_targets_mean": 4981.3, | |
| "valid_targets_min": 4345 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.5241939446578651, | |
| "learning_rate": 3.82648401826484e-05, | |
| "loss": 0.3011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.313001811504364, | |
| "step": 420, | |
| "valid_targets_mean": 5190.8, | |
| "valid_targets_min": 4676 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.5880769097011542, | |
| "learning_rate": 3.8721461187214615e-05, | |
| "loss": 0.2869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29051679372787476, | |
| "step": 425, | |
| "valid_targets_mean": 5137.5, | |
| "valid_targets_min": 4657 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.5504134246839527, | |
| "learning_rate": 3.9178082191780823e-05, | |
| "loss": 0.2925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29594725370407104, | |
| "step": 430, | |
| "valid_targets_mean": 5574.6, | |
| "valid_targets_min": 4710 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 0.6774366820067329, | |
| "learning_rate": 3.963470319634704e-05, | |
| "loss": 0.295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29221445322036743, | |
| "step": 435, | |
| "valid_targets_mean": 5040.4, | |
| "valid_targets_min": 4625 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.5818544136111268, | |
| "learning_rate": 3.99999936325009e-05, | |
| "loss": 0.2985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2968166172504425, | |
| "step": 440, | |
| "valid_targets_mean": 5139.3, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 0.712, | |
| "grad_norm": 0.5810487586614415, | |
| "learning_rate": 3.9999770770457856e-05, | |
| "loss": 0.2934, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29999667406082153, | |
| "step": 445, | |
| "valid_targets_mean": 5989.9, | |
| "valid_targets_min": 4530 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.6423517893635512, | |
| "learning_rate": 3.9999229537513936e-05, | |
| "loss": 0.3044, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3244275748729706, | |
| "step": 450, | |
| "valid_targets_mean": 5128.0, | |
| "valid_targets_min": 4625 | |
| }, | |
| { | |
| "epoch": 0.728, | |
| "grad_norm": 0.613550368878516, | |
| "learning_rate": 3.999836994228487e-05, | |
| "loss": 0.2935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3070667088031769, | |
| "step": 455, | |
| "valid_targets_mean": 4993.0, | |
| "valid_targets_min": 4487 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.5972359745175845, | |
| "learning_rate": 3.999719199845432e-05, | |
| "loss": 0.289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2970615029335022, | |
| "step": 460, | |
| "valid_targets_mean": 5119.9, | |
| "valid_targets_min": 4574 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 0.5818756234855581, | |
| "learning_rate": 3.999569572477366e-05, | |
| "loss": 0.2956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2938227653503418, | |
| "step": 465, | |
| "valid_targets_mean": 5169.8, | |
| "valid_targets_min": 4485 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.61491129339576, | |
| "learning_rate": 3.999388114506166e-05, | |
| "loss": 0.288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2962954044342041, | |
| "step": 470, | |
| "valid_targets_mean": 5511.5, | |
| "valid_targets_min": 4566 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.6432644343015743, | |
| "learning_rate": 3.999174828820413e-05, | |
| "loss": 0.2963, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29986363649368286, | |
| "step": 475, | |
| "valid_targets_mean": 5241.1, | |
| "valid_targets_min": 4777 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.5291835801340822, | |
| "learning_rate": 3.998929718815341e-05, | |
| "loss": 0.3002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29931387305259705, | |
| "step": 480, | |
| "valid_targets_mean": 5244.9, | |
| "valid_targets_min": 4432 | |
| }, | |
| { | |
| "epoch": 0.776, | |
| "grad_norm": 0.5539051100335697, | |
| "learning_rate": 3.998652788392792e-05, | |
| "loss": 0.2874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28588584065437317, | |
| "step": 485, | |
| "valid_targets_mean": 5145.2, | |
| "valid_targets_min": 4431 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.5957547083463285, | |
| "learning_rate": 3.9983440419611445e-05, | |
| "loss": 0.2956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2997606098651886, | |
| "step": 490, | |
| "valid_targets_mean": 5269.9, | |
| "valid_targets_min": 4489 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 0.5135634784827406, | |
| "learning_rate": 3.9980034844352494e-05, | |
| "loss": 0.2941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.288108229637146, | |
| "step": 495, | |
| "valid_targets_mean": 5646.1, | |
| "valid_targets_min": 4747 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.7786071102398887, | |
| "learning_rate": 3.9976311212363495e-05, | |
| "loss": 0.2869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29409587383270264, | |
| "step": 500, | |
| "valid_targets_mean": 5127.2, | |
| "valid_targets_min": 4570 | |
| }, | |
| { | |
| "epoch": 0.808, | |
| "grad_norm": 0.5900978552753009, | |
| "learning_rate": 3.997226958291992e-05, | |
| "loss": 0.2918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29185256361961365, | |
| "step": 505, | |
| "valid_targets_mean": 5180.9, | |
| "valid_targets_min": 4856 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.5765291268139495, | |
| "learning_rate": 3.996791002035937e-05, | |
| "loss": 0.3084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29964008927345276, | |
| "step": 510, | |
| "valid_targets_mean": 5147.1, | |
| "valid_targets_min": 4573 | |
| }, | |
| { | |
| "epoch": 0.824, | |
| "grad_norm": 0.5345944980435329, | |
| "learning_rate": 3.996323259408055e-05, | |
| "loss": 0.2923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29471057653427124, | |
| "step": 515, | |
| "valid_targets_mean": 5030.1, | |
| "valid_targets_min": 4411 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.5934672620702418, | |
| "learning_rate": 3.995823737854211e-05, | |
| "loss": 0.2904, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2861570715904236, | |
| "step": 520, | |
| "valid_targets_mean": 4959.1, | |
| "valid_targets_min": 4518 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.5978966468475728, | |
| "learning_rate": 3.9952924453261534e-05, | |
| "loss": 0.2871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29697802662849426, | |
| "step": 525, | |
| "valid_targets_mean": 5309.2, | |
| "valid_targets_min": 4293 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.503430763048133, | |
| "learning_rate": 3.994729390281384e-05, | |
| "loss": 0.2875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28601276874542236, | |
| "step": 530, | |
| "valid_targets_mean": 5651.4, | |
| "valid_targets_min": 4617 | |
| }, | |
| { | |
| "epoch": 0.856, | |
| "grad_norm": 0.5325995595456505, | |
| "learning_rate": 3.994134581683021e-05, | |
| "loss": 0.286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2800452709197998, | |
| "step": 535, | |
| "valid_targets_mean": 5135.8, | |
| "valid_targets_min": 4585 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.5928448012945597, | |
| "learning_rate": 3.9935080289996626e-05, | |
| "loss": 0.2833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2776099741458893, | |
| "step": 540, | |
| "valid_targets_mean": 5109.6, | |
| "valid_targets_min": 4626 | |
| }, | |
| { | |
| "epoch": 0.872, | |
| "grad_norm": 0.5531818122324985, | |
| "learning_rate": 3.992849742205228e-05, | |
| "loss": 0.2883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29633668065071106, | |
| "step": 545, | |
| "valid_targets_mean": 5172.6, | |
| "valid_targets_min": 4519 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.4819551724569532, | |
| "learning_rate": 3.9921597317788065e-05, | |
| "loss": 0.2867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2966027855873108, | |
| "step": 550, | |
| "valid_targets_mean": 5180.7, | |
| "valid_targets_min": 4598 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 0.47304400878096264, | |
| "learning_rate": 3.991438008704486e-05, | |
| "loss": 0.2903, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3065999746322632, | |
| "step": 555, | |
| "valid_targets_mean": 5670.3, | |
| "valid_targets_min": 4262 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.6549257352690704, | |
| "learning_rate": 3.990684584471179e-05, | |
| "loss": 0.2967, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2979024350643158, | |
| "step": 560, | |
| "valid_targets_mean": 5180.9, | |
| "valid_targets_min": 4424 | |
| }, | |
| { | |
| "epoch": 0.904, | |
| "grad_norm": 0.5134984457990459, | |
| "learning_rate": 3.989899471072441e-05, | |
| "loss": 0.2856, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2805606424808502, | |
| "step": 565, | |
| "valid_targets_mean": 5006.9, | |
| "valid_targets_min": 4433 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.5395517642708562, | |
| "learning_rate": 3.9890826810062784e-05, | |
| "loss": 0.2993, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29373061656951904, | |
| "step": 570, | |
| "valid_targets_mean": 5275.9, | |
| "valid_targets_min": 4684 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.5494611765837467, | |
| "learning_rate": 3.988234227274949e-05, | |
| "loss": 0.2886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28657403588294983, | |
| "step": 575, | |
| "valid_targets_mean": 5887.9, | |
| "valid_targets_min": 4297 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.5233528807579381, | |
| "learning_rate": 3.987354123384757e-05, | |
| "loss": 0.2816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26802366971969604, | |
| "step": 580, | |
| "valid_targets_mean": 4961.1, | |
| "valid_targets_min": 4341 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 0.5794274396635953, | |
| "learning_rate": 3.9864423833458364e-05, | |
| "loss": 0.2943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29995492100715637, | |
| "step": 585, | |
| "valid_targets_mean": 5014.4, | |
| "valid_targets_min": 4324 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.4942748323031088, | |
| "learning_rate": 3.9854990216719285e-05, | |
| "loss": 0.2863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27287155389785767, | |
| "step": 590, | |
| "valid_targets_mean": 5245.6, | |
| "valid_targets_min": 4601 | |
| }, | |
| { | |
| "epoch": 0.952, | |
| "grad_norm": 0.5196422262960827, | |
| "learning_rate": 3.98452405338015e-05, | |
| "loss": 0.2843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2753028869628906, | |
| "step": 595, | |
| "valid_targets_mean": 5108.1, | |
| "valid_targets_min": 4662 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.4849911085198617, | |
| "learning_rate": 3.983517493990756e-05, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27941620349884033, | |
| "step": 600, | |
| "valid_targets_mean": 4982.4, | |
| "valid_targets_min": 4306 | |
| }, | |
| { | |
| "epoch": 0.968, | |
| "grad_norm": 0.500878799328196, | |
| "learning_rate": 3.982479359526892e-05, | |
| "loss": 0.2789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2786497175693512, | |
| "step": 605, | |
| "valid_targets_mean": 5163.8, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.48362508947855243, | |
| "learning_rate": 3.981409666514336e-05, | |
| "loss": 0.2926, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2899950444698334, | |
| "step": 610, | |
| "valid_targets_mean": 5158.2, | |
| "valid_targets_min": 4647 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 0.49249733387119954, | |
| "learning_rate": 3.98030843198124e-05, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2825813293457031, | |
| "step": 615, | |
| "valid_targets_mean": 5084.9, | |
| "valid_targets_min": 4550 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.46293935420644333, | |
| "learning_rate": 3.979175673457858e-05, | |
| "loss": 0.2886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2907191514968872, | |
| "step": 620, | |
| "valid_targets_mean": 6027.4, | |
| "valid_targets_min": 4633 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5160447694156247, | |
| "learning_rate": 3.9780114089762616e-05, | |
| "loss": 0.2798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28003865480422974, | |
| "step": 625, | |
| "valid_targets_mean": 5021.4, | |
| "valid_targets_min": 4538 | |
| }, | |
| { | |
| "epoch": 1.008, | |
| "grad_norm": 0.5226058804790251, | |
| "learning_rate": 3.976815657070062e-05, | |
| "loss": 0.2765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2695348560810089, | |
| "step": 630, | |
| "valid_targets_mean": 5099.3, | |
| "valid_targets_min": 4455 | |
| }, | |
| { | |
| "epoch": 1.016, | |
| "grad_norm": 0.5317509862631608, | |
| "learning_rate": 3.975588436774107e-05, | |
| "loss": 0.2851, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2909337282180786, | |
| "step": 635, | |
| "valid_targets_mean": 5288.8, | |
| "valid_targets_min": 4728 | |
| }, | |
| { | |
| "epoch": 1.024, | |
| "grad_norm": 0.5860693647519873, | |
| "learning_rate": 3.9743297676241826e-05, | |
| "loss": 0.2855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29314106702804565, | |
| "step": 640, | |
| "valid_targets_mean": 5575.3, | |
| "valid_targets_min": 4375 | |
| }, | |
| { | |
| "epoch": 1.032, | |
| "grad_norm": 0.4772801569431276, | |
| "learning_rate": 3.9730396696566994e-05, | |
| "loss": 0.2777, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27620768547058105, | |
| "step": 645, | |
| "valid_targets_mean": 5102.0, | |
| "valid_targets_min": 4796 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.5081132137784612, | |
| "learning_rate": 3.971718163408375e-05, | |
| "loss": 0.2848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28528887033462524, | |
| "step": 650, | |
| "valid_targets_mean": 5050.3, | |
| "valid_targets_min": 4506 | |
| }, | |
| { | |
| "epoch": 1.048, | |
| "grad_norm": 0.48153408227821004, | |
| "learning_rate": 3.9703652699159093e-05, | |
| "loss": 0.2765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26511329412460327, | |
| "step": 655, | |
| "valid_targets_mean": 5047.1, | |
| "valid_targets_min": 4653 | |
| }, | |
| { | |
| "epoch": 1.056, | |
| "grad_norm": 0.5154803201881213, | |
| "learning_rate": 3.9689810107156425e-05, | |
| "loss": 0.2845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27684324979782104, | |
| "step": 660, | |
| "valid_targets_mean": 4914.2, | |
| "valid_targets_min": 4405 | |
| }, | |
| { | |
| "epoch": 1.064, | |
| "grad_norm": 0.4872566035516795, | |
| "learning_rate": 3.967565407843222e-05, | |
| "loss": 0.2812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28388112783432007, | |
| "step": 665, | |
| "valid_targets_mean": 5674.2, | |
| "valid_targets_min": 4722 | |
| }, | |
| { | |
| "epoch": 1.072, | |
| "grad_norm": 0.4875016426055404, | |
| "learning_rate": 3.966118483833242e-05, | |
| "loss": 0.281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27613145112991333, | |
| "step": 670, | |
| "valid_targets_mean": 5037.3, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.4737390479539686, | |
| "learning_rate": 3.964640261718893e-05, | |
| "loss": 0.2793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27517664432525635, | |
| "step": 675, | |
| "valid_targets_mean": 5489.9, | |
| "valid_targets_min": 4550 | |
| }, | |
| { | |
| "epoch": 1.088, | |
| "grad_norm": 0.5060389357158048, | |
| "learning_rate": 3.963130765031589e-05, | |
| "loss": 0.2773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2846803367137909, | |
| "step": 680, | |
| "valid_targets_mean": 5161.4, | |
| "valid_targets_min": 4375 | |
| }, | |
| { | |
| "epoch": 1.096, | |
| "grad_norm": 0.5458461603758565, | |
| "learning_rate": 3.961590017800598e-05, | |
| "loss": 0.275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28260499238967896, | |
| "step": 685, | |
| "valid_targets_mean": 5651.3, | |
| "valid_targets_min": 4774 | |
| }, | |
| { | |
| "epoch": 1.104, | |
| "grad_norm": 0.4905501145897204, | |
| "learning_rate": 3.960018044552653e-05, | |
| "loss": 0.2864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2654094696044922, | |
| "step": 690, | |
| "valid_targets_mean": 5069.3, | |
| "valid_targets_min": 4389 | |
| }, | |
| { | |
| "epoch": 1.112, | |
| "grad_norm": 0.48251485205366196, | |
| "learning_rate": 3.9584148703115704e-05, | |
| "loss": 0.2854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2779829502105713, | |
| "step": 695, | |
| "valid_targets_mean": 5273.8, | |
| "valid_targets_min": 4892 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.49082534413871515, | |
| "learning_rate": 3.956780520597842e-05, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27341553568840027, | |
| "step": 700, | |
| "valid_targets_mean": 5519.4, | |
| "valid_targets_min": 4657 | |
| }, | |
| { | |
| "epoch": 1.1280000000000001, | |
| "grad_norm": 0.5024532017659665, | |
| "learning_rate": 3.955115021428236e-05, | |
| "loss": 0.2798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28446292877197266, | |
| "step": 705, | |
| "valid_targets_mean": 5229.1, | |
| "valid_targets_min": 4826 | |
| }, | |
| { | |
| "epoch": 1.1360000000000001, | |
| "grad_norm": 0.5118021283301418, | |
| "learning_rate": 3.95341839931538e-05, | |
| "loss": 0.2848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2773263156414032, | |
| "step": 710, | |
| "valid_targets_mean": 5034.5, | |
| "valid_targets_min": 4515 | |
| }, | |
| { | |
| "epoch": 1.144, | |
| "grad_norm": 0.47063572611555277, | |
| "learning_rate": 3.95169068126734e-05, | |
| "loss": 0.2802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2705405354499817, | |
| "step": 715, | |
| "valid_targets_mean": 5516.2, | |
| "valid_targets_min": 4791 | |
| }, | |
| { | |
| "epoch": 1.152, | |
| "grad_norm": 0.4661386838885875, | |
| "learning_rate": 3.949931894787187e-05, | |
| "loss": 0.2842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2839936316013336, | |
| "step": 720, | |
| "valid_targets_mean": 5725.4, | |
| "valid_targets_min": 4189 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.45814645188148906, | |
| "learning_rate": 3.948142067872565e-05, | |
| "loss": 0.2775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2712182402610779, | |
| "step": 725, | |
| "valid_targets_mean": 4939.9, | |
| "valid_targets_min": 4576 | |
| }, | |
| { | |
| "epoch": 1.168, | |
| "grad_norm": 0.5004084717452577, | |
| "learning_rate": 3.946321229015241e-05, | |
| "loss": 0.2826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.279945969581604, | |
| "step": 730, | |
| "valid_targets_mean": 5309.3, | |
| "valid_targets_min": 4708 | |
| }, | |
| { | |
| "epoch": 1.176, | |
| "grad_norm": 0.47274122869042495, | |
| "learning_rate": 3.944469407200652e-05, | |
| "loss": 0.2794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28295987844467163, | |
| "step": 735, | |
| "valid_targets_mean": 5540.9, | |
| "valid_targets_min": 4545 | |
| }, | |
| { | |
| "epoch": 1.184, | |
| "grad_norm": 0.4750741553626001, | |
| "learning_rate": 3.942586631907444e-05, | |
| "loss": 0.2839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27753764390945435, | |
| "step": 740, | |
| "valid_targets_mean": 5191.3, | |
| "valid_targets_min": 4639 | |
| }, | |
| { | |
| "epoch": 1.192, | |
| "grad_norm": 0.4538909940104585, | |
| "learning_rate": 3.9406729331070054e-05, | |
| "loss": 0.2798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26948511600494385, | |
| "step": 745, | |
| "valid_targets_mean": 5866.4, | |
| "valid_targets_min": 4518 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.5612766224832479, | |
| "learning_rate": 3.938728341262985e-05, | |
| "loss": 0.2833, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29091164469718933, | |
| "step": 750, | |
| "valid_targets_mean": 5297.4, | |
| "valid_targets_min": 4757 | |
| }, | |
| { | |
| "epoch": 1.208, | |
| "grad_norm": 0.4343034285512249, | |
| "learning_rate": 3.936752887330812e-05, | |
| "loss": 0.2771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2773773670196533, | |
| "step": 755, | |
| "valid_targets_mean": 5564.2, | |
| "valid_targets_min": 4746 | |
| }, | |
| { | |
| "epoch": 1.216, | |
| "grad_norm": 0.4859321976954645, | |
| "learning_rate": 3.9347466027571975e-05, | |
| "loss": 0.2774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2706279754638672, | |
| "step": 760, | |
| "valid_targets_mean": 5135.0, | |
| "valid_targets_min": 4700 | |
| }, | |
| { | |
| "epoch": 1.224, | |
| "grad_norm": 0.4548221944364064, | |
| "learning_rate": 3.932709519479639e-05, | |
| "loss": 0.2844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2676329016685486, | |
| "step": 765, | |
| "valid_targets_mean": 5511.2, | |
| "valid_targets_min": 4733 | |
| }, | |
| { | |
| "epoch": 1.232, | |
| "grad_norm": 0.5140887004571317, | |
| "learning_rate": 3.930641669925911e-05, | |
| "loss": 0.2825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29188498854637146, | |
| "step": 770, | |
| "valid_targets_mean": 5134.3, | |
| "valid_targets_min": 4366 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.4821239090314257, | |
| "learning_rate": 3.928543087013546e-05, | |
| "loss": 0.2825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2697313129901886, | |
| "step": 775, | |
| "valid_targets_mean": 5184.4, | |
| "valid_targets_min": 4482 | |
| }, | |
| { | |
| "epoch": 1.248, | |
| "grad_norm": 0.4506386450660479, | |
| "learning_rate": 3.926413804149315e-05, | |
| "loss": 0.2819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28536325693130493, | |
| "step": 780, | |
| "valid_targets_mean": 5145.8, | |
| "valid_targets_min": 4232 | |
| }, | |
| { | |
| "epoch": 1.256, | |
| "grad_norm": 0.4905935276701051, | |
| "learning_rate": 3.9242538552286894e-05, | |
| "loss": 0.2825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2914523184299469, | |
| "step": 785, | |
| "valid_targets_mean": 5185.0, | |
| "valid_targets_min": 4444 | |
| }, | |
| { | |
| "epoch": 1.264, | |
| "grad_norm": 0.49133505495054797, | |
| "learning_rate": 3.9220632746353096e-05, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27615886926651, | |
| "step": 790, | |
| "valid_targets_mean": 5116.6, | |
| "valid_targets_min": 4608 | |
| }, | |
| { | |
| "epoch": 1.272, | |
| "grad_norm": 0.5273414070466526, | |
| "learning_rate": 3.91984209724043e-05, | |
| "loss": 0.271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26419854164123535, | |
| "step": 795, | |
| "valid_targets_mean": 5008.3, | |
| "valid_targets_min": 4317 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.5023882312706708, | |
| "learning_rate": 3.917590358402369e-05, | |
| "loss": 0.2783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.278087854385376, | |
| "step": 800, | |
| "valid_targets_mean": 5014.5, | |
| "valid_targets_min": 4459 | |
| }, | |
| { | |
| "epoch": 1.288, | |
| "grad_norm": 0.45295208016443633, | |
| "learning_rate": 3.915308093965943e-05, | |
| "loss": 0.2815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2823716998100281, | |
| "step": 805, | |
| "valid_targets_mean": 5831.9, | |
| "valid_targets_min": 4688 | |
| }, | |
| { | |
| "epoch": 1.296, | |
| "grad_norm": 0.4736185291933048, | |
| "learning_rate": 3.9129953402618976e-05, | |
| "loss": 0.2771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26458173990249634, | |
| "step": 810, | |
| "valid_targets_mean": 5239.7, | |
| "valid_targets_min": 4261 | |
| }, | |
| { | |
| "epoch": 1.304, | |
| "grad_norm": 0.48713145112021844, | |
| "learning_rate": 3.91065213410633e-05, | |
| "loss": 0.2745, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2789565622806549, | |
| "step": 815, | |
| "valid_targets_mean": 5136.8, | |
| "valid_targets_min": 4516 | |
| }, | |
| { | |
| "epoch": 1.312, | |
| "grad_norm": 0.4817585084007966, | |
| "learning_rate": 3.908278512800098e-05, | |
| "loss": 0.2674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2718963027000427, | |
| "step": 820, | |
| "valid_targets_mean": 4982.5, | |
| "valid_targets_min": 4028 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.4500305079689075, | |
| "learning_rate": 3.905874514128235e-05, | |
| "loss": 0.283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.273202121257782, | |
| "step": 825, | |
| "valid_targets_mean": 5248.3, | |
| "valid_targets_min": 4557 | |
| }, | |
| { | |
| "epoch": 1.328, | |
| "grad_norm": 0.4403632264670863, | |
| "learning_rate": 3.903440176359338e-05, | |
| "loss": 0.2771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2947240471839905, | |
| "step": 830, | |
| "valid_targets_mean": 5509.8, | |
| "valid_targets_min": 4965 | |
| }, | |
| { | |
| "epoch": 1.336, | |
| "grad_norm": 0.49019330158006275, | |
| "learning_rate": 3.90097553824497e-05, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2857062816619873, | |
| "step": 835, | |
| "valid_targets_mean": 5693.3, | |
| "valid_targets_min": 4663 | |
| }, | |
| { | |
| "epoch": 1.3439999999999999, | |
| "grad_norm": 0.47891380639109704, | |
| "learning_rate": 3.8984806390190304e-05, | |
| "loss": 0.2751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2642487585544586, | |
| "step": 840, | |
| "valid_targets_mean": 4965.2, | |
| "valid_targets_min": 4281 | |
| }, | |
| { | |
| "epoch": 1.3519999999999999, | |
| "grad_norm": 0.46782955455367464, | |
| "learning_rate": 3.895955518397141e-05, | |
| "loss": 0.2755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27184945344924927, | |
| "step": 845, | |
| "valid_targets_mean": 5111.6, | |
| "valid_targets_min": 4419 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.48102907917752635, | |
| "learning_rate": 3.893400216576011e-05, | |
| "loss": 0.2768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2891300320625305, | |
| "step": 850, | |
| "valid_targets_mean": 5109.8, | |
| "valid_targets_min": 4424 | |
| }, | |
| { | |
| "epoch": 1.3679999999999999, | |
| "grad_norm": 0.4592128572616621, | |
| "learning_rate": 3.89081477423279e-05, | |
| "loss": 0.271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2600412368774414, | |
| "step": 855, | |
| "valid_targets_mean": 5445.3, | |
| "valid_targets_min": 4633 | |
| }, | |
| { | |
| "epoch": 1.376, | |
| "grad_norm": 0.4674441860742552, | |
| "learning_rate": 3.888199232524434e-05, | |
| "loss": 0.275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2726278305053711, | |
| "step": 860, | |
| "valid_targets_mean": 5023.3, | |
| "valid_targets_min": 4565 | |
| }, | |
| { | |
| "epoch": 1.384, | |
| "grad_norm": 0.44835881965479724, | |
| "learning_rate": 3.8855536330870354e-05, | |
| "loss": 0.2746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27754950523376465, | |
| "step": 865, | |
| "valid_targets_mean": 5929.1, | |
| "valid_targets_min": 4265 | |
| }, | |
| { | |
| "epoch": 1.392, | |
| "grad_norm": 0.4840350530619543, | |
| "learning_rate": 3.882878018035173e-05, | |
| "loss": 0.2762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2762763202190399, | |
| "step": 870, | |
| "valid_targets_mean": 5041.6, | |
| "valid_targets_min": 4276 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.45538175963142036, | |
| "learning_rate": 3.880172429961232e-05, | |
| "loss": 0.2754, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2720921039581299, | |
| "step": 875, | |
| "valid_targets_mean": 5064.2, | |
| "valid_targets_min": 4294 | |
| }, | |
| { | |
| "epoch": 1.408, | |
| "grad_norm": 0.48851161355273864, | |
| "learning_rate": 3.877436911934733e-05, | |
| "loss": 0.2758, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28127169609069824, | |
| "step": 880, | |
| "valid_targets_mean": 5187.4, | |
| "valid_targets_min": 4716 | |
| }, | |
| { | |
| "epoch": 1.416, | |
| "grad_norm": 0.46534847983356614, | |
| "learning_rate": 3.874671507501641e-05, | |
| "loss": 0.2741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26857540011405945, | |
| "step": 885, | |
| "valid_targets_mean": 5808.9, | |
| "valid_targets_min": 4370 | |
| }, | |
| { | |
| "epoch": 1.424, | |
| "grad_norm": 0.459572781919761, | |
| "learning_rate": 3.871876260683677e-05, | |
| "loss": 0.274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26840341091156006, | |
| "step": 890, | |
| "valid_targets_mean": 5071.9, | |
| "valid_targets_min": 4619 | |
| }, | |
| { | |
| "epoch": 1.432, | |
| "grad_norm": 0.49158121769133095, | |
| "learning_rate": 3.869051215977612e-05, | |
| "loss": 0.2701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2642505168914795, | |
| "step": 895, | |
| "valid_targets_mean": 5139.9, | |
| "valid_targets_min": 4772 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.479286900404796, | |
| "learning_rate": 3.8661964183545634e-05, | |
| "loss": 0.2818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28979548811912537, | |
| "step": 900, | |
| "valid_targets_mean": 5245.5, | |
| "valid_targets_min": 4381 | |
| }, | |
| { | |
| "epoch": 1.448, | |
| "grad_norm": 0.4517176059024187, | |
| "learning_rate": 3.863311913259276e-05, | |
| "loss": 0.2733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2717403471469879, | |
| "step": 905, | |
| "valid_targets_mean": 5086.3, | |
| "valid_targets_min": 4308 | |
| }, | |
| { | |
| "epoch": 1.456, | |
| "grad_norm": 0.4914086716188088, | |
| "learning_rate": 3.860397746609402e-05, | |
| "loss": 0.2707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27707457542419434, | |
| "step": 910, | |
| "valid_targets_mean": 5526.8, | |
| "valid_targets_min": 4668 | |
| }, | |
| { | |
| "epoch": 1.464, | |
| "grad_norm": 0.4503212365960944, | |
| "learning_rate": 3.857453964794764e-05, | |
| "loss": 0.2784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2732169032096863, | |
| "step": 915, | |
| "valid_targets_mean": 5043.6, | |
| "valid_targets_min": 4695 | |
| }, | |
| { | |
| "epoch": 1.472, | |
| "grad_norm": 0.4685792687435555, | |
| "learning_rate": 3.854480614676624e-05, | |
| "loss": 0.2713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26808834075927734, | |
| "step": 920, | |
| "valid_targets_mean": 5040.2, | |
| "valid_targets_min": 4540 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.4658172746385567, | |
| "learning_rate": 3.851477743586932e-05, | |
| "loss": 0.2789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2980782687664032, | |
| "step": 925, | |
| "valid_targets_mean": 5602.1, | |
| "valid_targets_min": 4561 | |
| }, | |
| { | |
| "epoch": 1.488, | |
| "grad_norm": 0.4549412555828652, | |
| "learning_rate": 3.8484453993275746e-05, | |
| "loss": 0.2764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28826165199279785, | |
| "step": 930, | |
| "valid_targets_mean": 5096.9, | |
| "valid_targets_min": 4582 | |
| }, | |
| { | |
| "epoch": 1.496, | |
| "grad_norm": 0.48447710309700837, | |
| "learning_rate": 3.8453836301696134e-05, | |
| "loss": 0.2791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2800939381122589, | |
| "step": 935, | |
| "valid_targets_mean": 5160.6, | |
| "valid_targets_min": 4726 | |
| }, | |
| { | |
| "epoch": 1.504, | |
| "grad_norm": 0.46174017848690474, | |
| "learning_rate": 3.842292484852518e-05, | |
| "loss": 0.2784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2808574438095093, | |
| "step": 940, | |
| "valid_targets_mean": 5684.6, | |
| "valid_targets_min": 4702 | |
| }, | |
| { | |
| "epoch": 1.512, | |
| "grad_norm": 0.454157270839013, | |
| "learning_rate": 3.8391720125833875e-05, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2780129313468933, | |
| "step": 945, | |
| "valid_targets_mean": 5352.1, | |
| "valid_targets_min": 4850 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.48444528075953375, | |
| "learning_rate": 3.83602226303617e-05, | |
| "loss": 0.2775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27945470809936523, | |
| "step": 950, | |
| "valid_targets_mean": 5163.1, | |
| "valid_targets_min": 4571 | |
| }, | |
| { | |
| "epoch": 1.528, | |
| "grad_norm": 0.43130357138693015, | |
| "learning_rate": 3.83284328635087e-05, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2779189348220825, | |
| "step": 955, | |
| "valid_targets_mean": 5726.4, | |
| "valid_targets_min": 4839 | |
| }, | |
| { | |
| "epoch": 1.536, | |
| "grad_norm": 0.4383732529787542, | |
| "learning_rate": 3.829635133132751e-05, | |
| "loss": 0.2801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2892981171607971, | |
| "step": 960, | |
| "valid_targets_mean": 5804.6, | |
| "valid_targets_min": 4564 | |
| }, | |
| { | |
| "epoch": 1.544, | |
| "grad_norm": 0.48569651085302806, | |
| "learning_rate": 3.8263978544515304e-05, | |
| "loss": 0.2694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27060210704803467, | |
| "step": 965, | |
| "valid_targets_mean": 5040.2, | |
| "valid_targets_min": 4727 | |
| }, | |
| { | |
| "epoch": 1.552, | |
| "grad_norm": 0.4620556151064429, | |
| "learning_rate": 3.823131501840565e-05, | |
| "loss": 0.2731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2721632719039917, | |
| "step": 970, | |
| "valid_targets_mean": 5093.4, | |
| "valid_targets_min": 4585 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.45350465772280013, | |
| "learning_rate": 3.819836127296032e-05, | |
| "loss": 0.2789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2734321355819702, | |
| "step": 975, | |
| "valid_targets_mean": 5689.2, | |
| "valid_targets_min": 4529 | |
| }, | |
| { | |
| "epoch": 1.568, | |
| "grad_norm": 0.46806733776249154, | |
| "learning_rate": 3.8165117832761016e-05, | |
| "loss": 0.2676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26196902990341187, | |
| "step": 980, | |
| "valid_targets_mean": 4903.1, | |
| "valid_targets_min": 4339 | |
| }, | |
| { | |
| "epoch": 1.576, | |
| "grad_norm": 0.4390767193153199, | |
| "learning_rate": 3.813158522700098e-05, | |
| "loss": 0.274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28208720684051514, | |
| "step": 985, | |
| "valid_targets_mean": 5065.0, | |
| "valid_targets_min": 4466 | |
| }, | |
| { | |
| "epoch": 1.584, | |
| "grad_norm": 0.4370400548035411, | |
| "learning_rate": 3.809776398947665e-05, | |
| "loss": 0.2765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2764721214771271, | |
| "step": 990, | |
| "valid_targets_mean": 5267.0, | |
| "valid_targets_min": 4234 | |
| }, | |
| { | |
| "epoch": 1.592, | |
| "grad_norm": 0.4527861963289008, | |
| "learning_rate": 3.806365465857908e-05, | |
| "loss": 0.2747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2703186869621277, | |
| "step": 995, | |
| "valid_targets_mean": 5182.6, | |
| "valid_targets_min": 4387 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.46412685674392057, | |
| "learning_rate": 3.802925777728541e-05, | |
| "loss": 0.272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27285662293434143, | |
| "step": 1000, | |
| "valid_targets_mean": 5115.7, | |
| "valid_targets_min": 4552 | |
| }, | |
| { | |
| "epoch": 1.608, | |
| "grad_norm": 0.4338824126453332, | |
| "learning_rate": 3.799457389315023e-05, | |
| "loss": 0.2751, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2795727849006653, | |
| "step": 1005, | |
| "valid_targets_mean": 5446.8, | |
| "valid_targets_min": 4524 | |
| }, | |
| { | |
| "epoch": 1.616, | |
| "grad_norm": 0.4438503029921287, | |
| "learning_rate": 3.795960355829683e-05, | |
| "loss": 0.2748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28046315908432007, | |
| "step": 1010, | |
| "valid_targets_mean": 5236.4, | |
| "valid_targets_min": 4378 | |
| }, | |
| { | |
| "epoch": 1.624, | |
| "grad_norm": 0.4735034334092632, | |
| "learning_rate": 3.7924347329408444e-05, | |
| "loss": 0.2746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2841615080833435, | |
| "step": 1015, | |
| "valid_targets_mean": 5007.6, | |
| "valid_targets_min": 4524 | |
| }, | |
| { | |
| "epoch": 1.6320000000000001, | |
| "grad_norm": 0.44256920416308937, | |
| "learning_rate": 3.788880576771937e-05, | |
| "loss": 0.2772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2712724208831787, | |
| "step": 1020, | |
| "valid_targets_mean": 5175.6, | |
| "valid_targets_min": 4789 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.4144427178562593, | |
| "learning_rate": 3.785297943900605e-05, | |
| "loss": 0.274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2837413549423218, | |
| "step": 1025, | |
| "valid_targets_mean": 6050.4, | |
| "valid_targets_min": 4451 | |
| }, | |
| { | |
| "epoch": 1.6480000000000001, | |
| "grad_norm": 0.42221000849413626, | |
| "learning_rate": 3.7816868913578044e-05, | |
| "loss": 0.2697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27200859785079956, | |
| "step": 1030, | |
| "valid_targets_mean": 4994.9, | |
| "valid_targets_min": 4471 | |
| }, | |
| { | |
| "epoch": 1.6560000000000001, | |
| "grad_norm": 0.43193238822089985, | |
| "learning_rate": 3.778047476626897e-05, | |
| "loss": 0.2712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2673565447330475, | |
| "step": 1035, | |
| "valid_targets_mean": 5207.8, | |
| "valid_targets_min": 4657 | |
| }, | |
| { | |
| "epoch": 1.6640000000000001, | |
| "grad_norm": 0.44772575163078215, | |
| "learning_rate": 3.7743797576427335e-05, | |
| "loss": 0.2725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2654898166656494, | |
| "step": 1040, | |
| "valid_targets_mean": 4977.7, | |
| "valid_targets_min": 4545 | |
| }, | |
| { | |
| "epoch": 1.6720000000000002, | |
| "grad_norm": 0.44043991753951073, | |
| "learning_rate": 3.770683792790733e-05, | |
| "loss": 0.2722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2779926061630249, | |
| "step": 1045, | |
| "valid_targets_mean": 5257.2, | |
| "valid_targets_min": 4940 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.421639238585126, | |
| "learning_rate": 3.766959640905954e-05, | |
| "loss": 0.2664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2679011821746826, | |
| "step": 1050, | |
| "valid_targets_mean": 5113.9, | |
| "valid_targets_min": 4568 | |
| }, | |
| { | |
| "epoch": 1.688, | |
| "grad_norm": 0.44884261810064296, | |
| "learning_rate": 3.763207361272153e-05, | |
| "loss": 0.2705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27626657485961914, | |
| "step": 1055, | |
| "valid_targets_mean": 5168.1, | |
| "valid_targets_min": 4731 | |
| }, | |
| { | |
| "epoch": 1.696, | |
| "grad_norm": 0.4402085365910254, | |
| "learning_rate": 3.759427013620849e-05, | |
| "loss": 0.2744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29398608207702637, | |
| "step": 1060, | |
| "valid_targets_mean": 5220.2, | |
| "valid_targets_min": 4654 | |
| }, | |
| { | |
| "epoch": 1.704, | |
| "grad_norm": 0.4386392172686101, | |
| "learning_rate": 3.755618658130366e-05, | |
| "loss": 0.2713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2678283452987671, | |
| "step": 1065, | |
| "valid_targets_mean": 5127.4, | |
| "valid_targets_min": 4625 | |
| }, | |
| { | |
| "epoch": 1.712, | |
| "grad_norm": 0.41909539417661956, | |
| "learning_rate": 3.751782355424877e-05, | |
| "loss": 0.2742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27132901549339294, | |
| "step": 1070, | |
| "valid_targets_mean": 5497.4, | |
| "valid_targets_min": 4551 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.4177169129432134, | |
| "learning_rate": 3.7479181665734395e-05, | |
| "loss": 0.2669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2601410746574402, | |
| "step": 1075, | |
| "valid_targets_mean": 5063.2, | |
| "valid_targets_min": 4500 | |
| }, | |
| { | |
| "epoch": 1.728, | |
| "grad_norm": 0.40236892854610734, | |
| "learning_rate": 3.7440261530890213e-05, | |
| "loss": 0.2693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27684485912323, | |
| "step": 1080, | |
| "valid_targets_mean": 6075.2, | |
| "valid_targets_min": 4710 | |
| }, | |
| { | |
| "epoch": 1.736, | |
| "grad_norm": 0.4517896137257272, | |
| "learning_rate": 3.740106376927527e-05, | |
| "loss": 0.2704, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2630888521671295, | |
| "step": 1085, | |
| "valid_targets_mean": 5134.1, | |
| "valid_targets_min": 4673 | |
| }, | |
| { | |
| "epoch": 1.744, | |
| "grad_norm": 0.3939404944543937, | |
| "learning_rate": 3.7361589004868035e-05, | |
| "loss": 0.2695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2808040678501129, | |
| "step": 1090, | |
| "valid_targets_mean": 5529.3, | |
| "valid_targets_min": 4318 | |
| }, | |
| { | |
| "epoch": 1.752, | |
| "grad_norm": 0.4262648262777727, | |
| "learning_rate": 3.7321837866056535e-05, | |
| "loss": 0.2709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2572194039821625, | |
| "step": 1095, | |
| "valid_targets_mean": 5336.7, | |
| "valid_targets_min": 4278 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.41423674168608615, | |
| "learning_rate": 3.728181098562831e-05, | |
| "loss": 0.276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26673072576522827, | |
| "step": 1100, | |
| "valid_targets_mean": 5220.0, | |
| "valid_targets_min": 4544 | |
| }, | |
| { | |
| "epoch": 1.768, | |
| "grad_norm": 0.4147900545050869, | |
| "learning_rate": 3.7241509000760355e-05, | |
| "loss": 0.2749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27419865131378174, | |
| "step": 1105, | |
| "valid_targets_mean": 5153.2, | |
| "valid_targets_min": 4708 | |
| }, | |
| { | |
| "epoch": 1.776, | |
| "grad_norm": 0.3889135706876527, | |
| "learning_rate": 3.720093255300899e-05, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27808883786201477, | |
| "step": 1110, | |
| "valid_targets_mean": 5127.2, | |
| "valid_targets_min": 4584 | |
| }, | |
| { | |
| "epoch": 1.784, | |
| "grad_norm": 0.3995059767959737, | |
| "learning_rate": 3.7160082288299645e-05, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2689400017261505, | |
| "step": 1115, | |
| "valid_targets_mean": 5830.5, | |
| "valid_targets_min": 4715 | |
| }, | |
| { | |
| "epoch": 1.792, | |
| "grad_norm": 0.46584914844947084, | |
| "learning_rate": 3.7118958856916534e-05, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2709977626800537, | |
| "step": 1120, | |
| "valid_targets_mean": 5141.9, | |
| "valid_targets_min": 4699 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.41201006727752826, | |
| "learning_rate": 3.707756291349237e-05, | |
| "loss": 0.273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28116050362586975, | |
| "step": 1125, | |
| "valid_targets_mean": 6227.6, | |
| "valid_targets_min": 4498 | |
| }, | |
| { | |
| "epoch": 1.808, | |
| "grad_norm": 0.4178640700207778, | |
| "learning_rate": 3.703589511699787e-05, | |
| "loss": 0.2687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.267488956451416, | |
| "step": 1130, | |
| "valid_targets_mean": 5050.4, | |
| "valid_targets_min": 4471 | |
| }, | |
| { | |
| "epoch": 1.8159999999999998, | |
| "grad_norm": 0.4169265408112601, | |
| "learning_rate": 3.6993956130731355e-05, | |
| "loss": 0.2719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27151012420654297, | |
| "step": 1135, | |
| "valid_targets_mean": 5096.7, | |
| "valid_targets_min": 4519 | |
| }, | |
| { | |
| "epoch": 1.8239999999999998, | |
| "grad_norm": 0.42606132227268767, | |
| "learning_rate": 3.6951746622308106e-05, | |
| "loss": 0.2791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2671900987625122, | |
| "step": 1140, | |
| "valid_targets_mean": 5170.3, | |
| "valid_targets_min": 4606 | |
| }, | |
| { | |
| "epoch": 1.8319999999999999, | |
| "grad_norm": 0.4733167184268155, | |
| "learning_rate": 3.69092672636498e-05, | |
| "loss": 0.277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26970502734184265, | |
| "step": 1145, | |
| "valid_targets_mean": 5157.2, | |
| "valid_targets_min": 4736 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.4113084408390369, | |
| "learning_rate": 3.686651873097375e-05, | |
| "loss": 0.2798, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2608642578125, | |
| "step": 1150, | |
| "valid_targets_mean": 5131.9, | |
| "valid_targets_min": 4036 | |
| }, | |
| { | |
| "epoch": 1.8479999999999999, | |
| "grad_norm": 0.4191900953166415, | |
| "learning_rate": 3.682350170478223e-05, | |
| "loss": 0.2716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26852309703826904, | |
| "step": 1155, | |
| "valid_targets_mean": 5472.8, | |
| "valid_targets_min": 4328 | |
| }, | |
| { | |
| "epoch": 1.8559999999999999, | |
| "grad_norm": 0.44547787626468943, | |
| "learning_rate": 3.678021686985153e-05, | |
| "loss": 0.272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27121421694755554, | |
| "step": 1160, | |
| "valid_targets_mean": 4932.2, | |
| "valid_targets_min": 4292 | |
| }, | |
| { | |
| "epoch": 1.8639999999999999, | |
| "grad_norm": 0.4482605656698092, | |
| "learning_rate": 3.6736664915221144e-05, | |
| "loss": 0.2742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26977333426475525, | |
| "step": 1165, | |
| "valid_targets_mean": 5524.2, | |
| "valid_targets_min": 4713 | |
| }, | |
| { | |
| "epoch": 1.8719999999999999, | |
| "grad_norm": 0.43989805250686337, | |
| "learning_rate": 3.669284653418278e-05, | |
| "loss": 0.269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2658311128616333, | |
| "step": 1170, | |
| "valid_targets_mean": 5004.9, | |
| "valid_targets_min": 4417 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.4161408710604701, | |
| "learning_rate": 3.6648762424269306e-05, | |
| "loss": 0.2718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2672930061817169, | |
| "step": 1175, | |
| "valid_targets_mean": 5251.4, | |
| "valid_targets_min": 4510 | |
| }, | |
| { | |
| "epoch": 1.888, | |
| "grad_norm": 0.44551344756361044, | |
| "learning_rate": 3.660441328724365e-05, | |
| "loss": 0.2716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2628483772277832, | |
| "step": 1180, | |
| "valid_targets_mean": 5112.8, | |
| "valid_targets_min": 4428 | |
| }, | |
| { | |
| "epoch": 1.896, | |
| "grad_norm": 0.5260324772380767, | |
| "learning_rate": 3.655979982908764e-05, | |
| "loss": 0.2797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2807210087776184, | |
| "step": 1185, | |
| "valid_targets_mean": 5162.4, | |
| "valid_targets_min": 4580 | |
| }, | |
| { | |
| "epoch": 1.904, | |
| "grad_norm": 0.4225557571068519, | |
| "learning_rate": 3.6514922759990756e-05, | |
| "loss": 0.2699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2636038064956665, | |
| "step": 1190, | |
| "valid_targets_mean": 5206.2, | |
| "valid_targets_min": 4755 | |
| }, | |
| { | |
| "epoch": 1.912, | |
| "grad_norm": 0.4319064765414918, | |
| "learning_rate": 3.646978279433883e-05, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2623159885406494, | |
| "step": 1195, | |
| "valid_targets_mean": 5139.4, | |
| "valid_targets_min": 4599 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.45259848227256105, | |
| "learning_rate": 3.6424380650702685e-05, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28122270107269287, | |
| "step": 1200, | |
| "valid_targets_mean": 5277.5, | |
| "valid_targets_min": 4333 | |
| }, | |
| { | |
| "epoch": 1.928, | |
| "grad_norm": 0.3947404156665269, | |
| "learning_rate": 3.637871705182667e-05, | |
| "loss": 0.2767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2715410590171814, | |
| "step": 1205, | |
| "valid_targets_mean": 5066.3, | |
| "valid_targets_min": 4579 | |
| }, | |
| { | |
| "epoch": 1.936, | |
| "grad_norm": 0.4020970149153179, | |
| "learning_rate": 3.633279272461717e-05, | |
| "loss": 0.2696, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26953428983688354, | |
| "step": 1210, | |
| "valid_targets_mean": 5559.9, | |
| "valid_targets_min": 4672 | |
| }, | |
| { | |
| "epoch": 1.944, | |
| "grad_norm": 0.4395304716806097, | |
| "learning_rate": 3.628660840013102e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27504533529281616, | |
| "step": 1215, | |
| "valid_targets_mean": 4967.1, | |
| "valid_targets_min": 4073 | |
| }, | |
| { | |
| "epoch": 1.952, | |
| "grad_norm": 0.46320679905948864, | |
| "learning_rate": 3.624016481356392e-05, | |
| "loss": 0.2682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2830734848976135, | |
| "step": 1220, | |
| "valid_targets_mean": 5067.8, | |
| "valid_targets_min": 4361 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.4433064825368503, | |
| "learning_rate": 3.619346270423866e-05, | |
| "loss": 0.2711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.272439569234848, | |
| "step": 1225, | |
| "valid_targets_mean": 5023.0, | |
| "valid_targets_min": 4606 | |
| }, | |
| { | |
| "epoch": 1.968, | |
| "grad_norm": 0.39182229302420485, | |
| "learning_rate": 3.6146502815593384e-05, | |
| "loss": 0.2699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27151912450790405, | |
| "step": 1230, | |
| "valid_targets_mean": 5832.8, | |
| "valid_targets_min": 4662 | |
| }, | |
| { | |
| "epoch": 1.976, | |
| "grad_norm": 0.43108305897456967, | |
| "learning_rate": 3.609928589516977e-05, | |
| "loss": 0.2699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26756954193115234, | |
| "step": 1235, | |
| "valid_targets_mean": 5175.4, | |
| "valid_targets_min": 4707 | |
| }, | |
| { | |
| "epoch": 1.984, | |
| "grad_norm": 0.4481003356477769, | |
| "learning_rate": 3.6051812694601114e-05, | |
| "loss": 0.2718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2817588448524475, | |
| "step": 1240, | |
| "valid_targets_mean": 5353.6, | |
| "valid_targets_min": 4573 | |
| }, | |
| { | |
| "epoch": 1.992, | |
| "grad_norm": 0.4370007949299128, | |
| "learning_rate": 3.6004083969600346e-05, | |
| "loss": 0.2673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.263859361410141, | |
| "step": 1245, | |
| "valid_targets_mean": 5045.7, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.3916284851658144, | |
| "learning_rate": 3.595610047994804e-05, | |
| "loss": 0.265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2623412609100342, | |
| "step": 1250, | |
| "valid_targets_mean": 5133.8, | |
| "valid_targets_min": 4707 | |
| }, | |
| { | |
| "epoch": 2.008, | |
| "grad_norm": 0.38647150731682806, | |
| "learning_rate": 3.5907862989480285e-05, | |
| "loss": 0.2666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28468072414398193, | |
| "step": 1255, | |
| "valid_targets_mean": 5559.6, | |
| "valid_targets_min": 4498 | |
| }, | |
| { | |
| "epoch": 2.016, | |
| "grad_norm": 0.43975911263786754, | |
| "learning_rate": 3.585937226607656e-05, | |
| "loss": 0.2625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26077812910079956, | |
| "step": 1260, | |
| "valid_targets_mean": 5193.1, | |
| "valid_targets_min": 4608 | |
| }, | |
| { | |
| "epoch": 2.024, | |
| "grad_norm": 0.42709274959865967, | |
| "learning_rate": 3.5810629081647476e-05, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2689046263694763, | |
| "step": 1265, | |
| "valid_targets_mean": 5109.5, | |
| "valid_targets_min": 4377 | |
| }, | |
| { | |
| "epoch": 2.032, | |
| "grad_norm": 0.40719432506183706, | |
| "learning_rate": 3.576163421212249e-05, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2627074718475342, | |
| "step": 1270, | |
| "valid_targets_mean": 5707.1, | |
| "valid_targets_min": 4670 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.4125459776301987, | |
| "learning_rate": 3.5712388437437576e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2600048780441284, | |
| "step": 1275, | |
| "valid_targets_mean": 4904.2, | |
| "valid_targets_min": 4101 | |
| }, | |
| { | |
| "epoch": 2.048, | |
| "grad_norm": 0.4382164997268334, | |
| "learning_rate": 3.566289254152283e-05, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2644914984703064, | |
| "step": 1280, | |
| "valid_targets_mean": 4860.1, | |
| "valid_targets_min": 2945 | |
| }, | |
| { | |
| "epoch": 2.056, | |
| "grad_norm": 0.43585199294057764, | |
| "learning_rate": 3.56131473122899e-05, | |
| "loss": 0.2688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2632347345352173, | |
| "step": 1285, | |
| "valid_targets_mean": 5061.4, | |
| "valid_targets_min": 4428 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 0.39736056949686677, | |
| "learning_rate": 3.556315354161955e-05, | |
| "loss": 0.2649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2632041573524475, | |
| "step": 1290, | |
| "valid_targets_mean": 5119.9, | |
| "valid_targets_min": 4538 | |
| }, | |
| { | |
| "epoch": 2.072, | |
| "grad_norm": 0.4463035288537384, | |
| "learning_rate": 3.551291202534899e-05, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27033740282058716, | |
| "step": 1295, | |
| "valid_targets_mean": 5177.2, | |
| "valid_targets_min": 4594 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.4339344269012719, | |
| "learning_rate": 3.546242356325922e-05, | |
| "loss": 0.2619, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25774192810058594, | |
| "step": 1300, | |
| "valid_targets_mean": 4949.8, | |
| "valid_targets_min": 4342 | |
| }, | |
| { | |
| "epoch": 2.088, | |
| "grad_norm": 0.41051778626745666, | |
| "learning_rate": 3.5411688959062323e-05, | |
| "loss": 0.2626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26130008697509766, | |
| "step": 1305, | |
| "valid_targets_mean": 5136.3, | |
| "valid_targets_min": 4515 | |
| }, | |
| { | |
| "epoch": 2.096, | |
| "grad_norm": 0.45137195434385025, | |
| "learning_rate": 3.5360709020388625e-05, | |
| "loss": 0.2691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27292606234550476, | |
| "step": 1310, | |
| "valid_targets_mean": 5174.8, | |
| "valid_targets_min": 4306 | |
| }, | |
| { | |
| "epoch": 2.104, | |
| "grad_norm": 0.40285139682283977, | |
| "learning_rate": 3.530948455877388e-05, | |
| "loss": 0.2681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2631682753562927, | |
| "step": 1315, | |
| "valid_targets_mean": 5091.1, | |
| "valid_targets_min": 4506 | |
| }, | |
| { | |
| "epoch": 2.112, | |
| "grad_norm": 0.38872503367931965, | |
| "learning_rate": 3.525801638964634e-05, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26407986879348755, | |
| "step": 1320, | |
| "valid_targets_mean": 5063.1, | |
| "valid_targets_min": 4529 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.4357037975811673, | |
| "learning_rate": 3.520630533231376e-05, | |
| "loss": 0.2618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26900553703308105, | |
| "step": 1325, | |
| "valid_targets_mean": 5539.9, | |
| "valid_targets_min": 4698 | |
| }, | |
| { | |
| "epoch": 2.128, | |
| "grad_norm": 0.39968440055474036, | |
| "learning_rate": 3.5154352209950376e-05, | |
| "loss": 0.2625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27105459570884705, | |
| "step": 1330, | |
| "valid_targets_mean": 5280.4, | |
| "valid_targets_min": 4761 | |
| }, | |
| { | |
| "epoch": 2.136, | |
| "grad_norm": 0.42035164866684993, | |
| "learning_rate": 3.510215784958376e-05, | |
| "loss": 0.2683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26866498589515686, | |
| "step": 1335, | |
| "valid_targets_mean": 5041.2, | |
| "valid_targets_min": 4274 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 0.41997127819029784, | |
| "learning_rate": 3.5049723082081755e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2786520719528198, | |
| "step": 1340, | |
| "valid_targets_mean": 5106.0, | |
| "valid_targets_min": 4556 | |
| }, | |
| { | |
| "epoch": 2.152, | |
| "grad_norm": 0.44346749692311094, | |
| "learning_rate": 3.49970487421391e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26144152879714966, | |
| "step": 1345, | |
| "valid_targets_mean": 5059.1, | |
| "valid_targets_min": 4187 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.44190008223329036, | |
| "learning_rate": 3.494413566826427e-05, | |
| "loss": 0.2614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26917821168899536, | |
| "step": 1350, | |
| "valid_targets_mean": 5100.8, | |
| "valid_targets_min": 4727 | |
| }, | |
| { | |
| "epoch": 2.168, | |
| "grad_norm": 0.4206317172535002, | |
| "learning_rate": 3.489098470276608e-05, | |
| "loss": 0.2672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2669784128665924, | |
| "step": 1355, | |
| "valid_targets_mean": 5059.5, | |
| "valid_targets_min": 4635 | |
| }, | |
| { | |
| "epoch": 2.176, | |
| "grad_norm": 0.4078014135122523, | |
| "learning_rate": 3.483759669174024e-05, | |
| "loss": 0.2635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.262362003326416, | |
| "step": 1360, | |
| "valid_targets_mean": 5154.9, | |
| "valid_targets_min": 4785 | |
| }, | |
| { | |
| "epoch": 2.184, | |
| "grad_norm": 0.4211878584371934, | |
| "learning_rate": 3.478397248505598e-05, | |
| "loss": 0.2678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2775549292564392, | |
| "step": 1365, | |
| "valid_targets_mean": 5114.8, | |
| "valid_targets_min": 4308 | |
| }, | |
| { | |
| "epoch": 2.192, | |
| "grad_norm": 0.41757099303158707, | |
| "learning_rate": 3.473011293634241e-05, | |
| "loss": 0.2706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27814704179763794, | |
| "step": 1370, | |
| "valid_targets_mean": 5201.4, | |
| "valid_targets_min": 4873 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.3910386856118759, | |
| "learning_rate": 3.467601890297502e-05, | |
| "loss": 0.2633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2525354325771332, | |
| "step": 1375, | |
| "valid_targets_mean": 5483.4, | |
| "valid_targets_min": 4368 | |
| }, | |
| { | |
| "epoch": 2.208, | |
| "grad_norm": 0.4001003856489286, | |
| "learning_rate": 3.4621691246061976e-05, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2715739905834198, | |
| "step": 1380, | |
| "valid_targets_mean": 5945.1, | |
| "valid_targets_min": 4381 | |
| }, | |
| { | |
| "epoch": 2.216, | |
| "grad_norm": 0.39993678681694006, | |
| "learning_rate": 3.456713083043046e-05, | |
| "loss": 0.2693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2684268355369568, | |
| "step": 1385, | |
| "valid_targets_mean": 5146.6, | |
| "valid_targets_min": 4676 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 0.39230494319633913, | |
| "learning_rate": 3.451233852461285e-05, | |
| "loss": 0.2669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27615708112716675, | |
| "step": 1390, | |
| "valid_targets_mean": 5679.9, | |
| "valid_targets_min": 4573 | |
| }, | |
| { | |
| "epoch": 2.232, | |
| "grad_norm": 0.4393404250260886, | |
| "learning_rate": 3.4457315200832935e-05, | |
| "loss": 0.2675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26387226581573486, | |
| "step": 1395, | |
| "valid_targets_mean": 5233.7, | |
| "valid_targets_min": 4715 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.6121099626667268, | |
| "learning_rate": 3.440206173499201e-05, | |
| "loss": 0.2706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2748655676841736, | |
| "step": 1400, | |
| "valid_targets_mean": 5164.6, | |
| "valid_targets_min": 4634 | |
| }, | |
| { | |
| "epoch": 2.248, | |
| "grad_norm": 0.4451834147780181, | |
| "learning_rate": 3.4346579006654945e-05, | |
| "loss": 0.2673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2720465660095215, | |
| "step": 1405, | |
| "valid_targets_mean": 5134.6, | |
| "valid_targets_min": 4545 | |
| }, | |
| { | |
| "epoch": 2.2560000000000002, | |
| "grad_norm": 0.43431966806565653, | |
| "learning_rate": 3.4290867899036166e-05, | |
| "loss": 0.2664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2673499286174774, | |
| "step": 1410, | |
| "valid_targets_mean": 5219.0, | |
| "valid_targets_min": 4372 | |
| }, | |
| { | |
| "epoch": 2.2640000000000002, | |
| "grad_norm": 0.42640247107585244, | |
| "learning_rate": 3.4234929298985614e-05, | |
| "loss": 0.2635, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25850701332092285, | |
| "step": 1415, | |
| "valid_targets_mean": 5475.9, | |
| "valid_targets_min": 4662 | |
| }, | |
| { | |
| "epoch": 2.2720000000000002, | |
| "grad_norm": 0.4199548280513426, | |
| "learning_rate": 3.417876409697463e-05, | |
| "loss": 0.2622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26264137029647827, | |
| "step": 1420, | |
| "valid_targets_mean": 5411.2, | |
| "valid_targets_min": 4527 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 0.4190296119076448, | |
| "learning_rate": 3.412237318708175e-05, | |
| "loss": 0.2647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26476988196372986, | |
| "step": 1425, | |
| "valid_targets_mean": 5122.2, | |
| "valid_targets_min": 4278 | |
| }, | |
| { | |
| "epoch": 2.288, | |
| "grad_norm": 0.40975846288793405, | |
| "learning_rate": 3.4065757466978504e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2588670253753662, | |
| "step": 1430, | |
| "valid_targets_mean": 5126.4, | |
| "valid_targets_min": 4498 | |
| }, | |
| { | |
| "epoch": 2.296, | |
| "grad_norm": 0.41493372695959746, | |
| "learning_rate": 3.400891783791511e-05, | |
| "loss": 0.2688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2684968113899231, | |
| "step": 1435, | |
| "valid_targets_mean": 5287.9, | |
| "valid_targets_min": 4541 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 0.4185050880233193, | |
| "learning_rate": 3.395185520470614e-05, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27325066924095154, | |
| "step": 1440, | |
| "valid_targets_mean": 5364.9, | |
| "valid_targets_min": 4566 | |
| }, | |
| { | |
| "epoch": 2.312, | |
| "grad_norm": 0.3998150481372783, | |
| "learning_rate": 3.38945704757161e-05, | |
| "loss": 0.2658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2626110911369324, | |
| "step": 1445, | |
| "valid_targets_mean": 5122.1, | |
| "valid_targets_min": 4450 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.4082329218663247, | |
| "learning_rate": 3.383706456284498e-05, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2748129963874817, | |
| "step": 1450, | |
| "valid_targets_mean": 5428.5, | |
| "valid_targets_min": 4066 | |
| }, | |
| { | |
| "epoch": 2.328, | |
| "grad_norm": 0.39503557037169834, | |
| "learning_rate": 3.377933838151374e-05, | |
| "loss": 0.2566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24841894209384918, | |
| "step": 1455, | |
| "valid_targets_mean": 5079.7, | |
| "valid_targets_min": 4466 | |
| }, | |
| { | |
| "epoch": 2.336, | |
| "grad_norm": 0.40200626405148376, | |
| "learning_rate": 3.3721392850649714e-05, | |
| "loss": 0.2623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26934224367141724, | |
| "step": 1460, | |
| "valid_targets_mean": 5501.9, | |
| "valid_targets_min": 4320 | |
| }, | |
| { | |
| "epoch": 2.344, | |
| "grad_norm": 0.3860773126071369, | |
| "learning_rate": 3.3663228892672034e-05, | |
| "loss": 0.2684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2690867781639099, | |
| "step": 1465, | |
| "valid_targets_mean": 5484.4, | |
| "valid_targets_min": 4585 | |
| }, | |
| { | |
| "epoch": 2.352, | |
| "grad_norm": 0.4414094274246389, | |
| "learning_rate": 3.36048474334769e-05, | |
| "loss": 0.2618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2631913423538208, | |
| "step": 1470, | |
| "valid_targets_mean": 4990.8, | |
| "valid_targets_min": 4341 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.3978056197368681, | |
| "learning_rate": 3.3546249402422834e-05, | |
| "loss": 0.2673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2645632028579712, | |
| "step": 1475, | |
| "valid_targets_mean": 5298.4, | |
| "valid_targets_min": 4748 | |
| }, | |
| { | |
| "epoch": 2.368, | |
| "grad_norm": 0.4144030245826016, | |
| "learning_rate": 3.3487435732315944e-05, | |
| "loss": 0.2621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28147369623184204, | |
| "step": 1480, | |
| "valid_targets_mean": 5640.7, | |
| "valid_targets_min": 4821 | |
| }, | |
| { | |
| "epoch": 2.376, | |
| "grad_norm": 0.4018638165552282, | |
| "learning_rate": 3.342840735939501e-05, | |
| "loss": 0.2659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25910401344299316, | |
| "step": 1485, | |
| "valid_targets_mean": 5168.5, | |
| "valid_targets_min": 4638 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 0.42369985969985285, | |
| "learning_rate": 3.33691652233166e-05, | |
| "loss": 0.265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27076345682144165, | |
| "step": 1490, | |
| "valid_targets_mean": 5150.6, | |
| "valid_targets_min": 4726 | |
| }, | |
| { | |
| "epoch": 2.392, | |
| "grad_norm": 0.43048045781644, | |
| "learning_rate": 3.330971026714016e-05, | |
| "loss": 0.2676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2771048843860626, | |
| "step": 1495, | |
| "valid_targets_mean": 5231.6, | |
| "valid_targets_min": 4909 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.40414276824411277, | |
| "learning_rate": 3.325004343731292e-05, | |
| "loss": 0.2638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26731228828430176, | |
| "step": 1500, | |
| "valid_targets_mean": 5129.8, | |
| "valid_targets_min": 4272 | |
| }, | |
| { | |
| "epoch": 2.408, | |
| "grad_norm": 0.42875740750724184, | |
| "learning_rate": 3.3190165683654885e-05, | |
| "loss": 0.2615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27428919076919556, | |
| "step": 1505, | |
| "valid_targets_mean": 5202.4, | |
| "valid_targets_min": 4562 | |
| }, | |
| { | |
| "epoch": 2.416, | |
| "grad_norm": 0.40635370864383735, | |
| "learning_rate": 3.31300779593437e-05, | |
| "loss": 0.2655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25647979974746704, | |
| "step": 1510, | |
| "valid_targets_mean": 5014.4, | |
| "valid_targets_min": 4420 | |
| }, | |
| { | |
| "epoch": 2.424, | |
| "grad_norm": 0.4025593138454305, | |
| "learning_rate": 3.306978122089948e-05, | |
| "loss": 0.2642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.263387531042099, | |
| "step": 1515, | |
| "valid_targets_mean": 5246.6, | |
| "valid_targets_min": 4777 | |
| }, | |
| { | |
| "epoch": 2.432, | |
| "grad_norm": 0.4178219851932713, | |
| "learning_rate": 3.300927642816957e-05, | |
| "loss": 0.2665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2747010290622711, | |
| "step": 1520, | |
| "valid_targets_mean": 5130.4, | |
| "valid_targets_min": 4488 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 0.41305559407036047, | |
| "learning_rate": 3.294856454431328e-05, | |
| "loss": 0.27, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29068523645401, | |
| "step": 1525, | |
| "valid_targets_mean": 5658.7, | |
| "valid_targets_min": 4557 | |
| }, | |
| { | |
| "epoch": 2.448, | |
| "grad_norm": 0.40259518780334536, | |
| "learning_rate": 3.288764653578653e-05, | |
| "loss": 0.267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2760384678840637, | |
| "step": 1530, | |
| "valid_targets_mean": 5133.9, | |
| "valid_targets_min": 4421 | |
| }, | |
| { | |
| "epoch": 2.456, | |
| "grad_norm": 0.4130184850547339, | |
| "learning_rate": 3.2826523372326516e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2531367242336273, | |
| "step": 1535, | |
| "valid_targets_mean": 5048.2, | |
| "valid_targets_min": 4516 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 0.37416054018269695, | |
| "learning_rate": 3.276519602693621e-05, | |
| "loss": 0.2649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2576143145561218, | |
| "step": 1540, | |
| "valid_targets_mean": 5099.1, | |
| "valid_targets_min": 4673 | |
| }, | |
| { | |
| "epoch": 2.472, | |
| "grad_norm": 0.37538957310372123, | |
| "learning_rate": 3.270366547586892e-05, | |
| "loss": 0.2716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2841678261756897, | |
| "step": 1545, | |
| "valid_targets_mean": 5835.3, | |
| "valid_targets_min": 4824 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.4043142323428006, | |
| "learning_rate": 3.2641932698612715e-05, | |
| "loss": 0.2654, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26274266839027405, | |
| "step": 1550, | |
| "valid_targets_mean": 5155.9, | |
| "valid_targets_min": 4572 | |
| }, | |
| { | |
| "epoch": 2.488, | |
| "grad_norm": 0.4206048087697023, | |
| "learning_rate": 3.2579998677874855e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26059937477111816, | |
| "step": 1555, | |
| "valid_targets_mean": 5195.6, | |
| "valid_targets_min": 4546 | |
| }, | |
| { | |
| "epoch": 2.496, | |
| "grad_norm": 0.38838610572888527, | |
| "learning_rate": 3.251786439956614e-05, | |
| "loss": 0.2672, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2727297246456146, | |
| "step": 1560, | |
| "valid_targets_mean": 5554.8, | |
| "valid_targets_min": 4582 | |
| }, | |
| { | |
| "epoch": 2.504, | |
| "grad_norm": 0.45374516978425866, | |
| "learning_rate": 3.2455530852785206e-05, | |
| "loss": 0.2638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25287145376205444, | |
| "step": 1565, | |
| "valid_targets_mean": 5002.6, | |
| "valid_targets_min": 4561 | |
| }, | |
| { | |
| "epoch": 2.512, | |
| "grad_norm": 0.3843309037852518, | |
| "learning_rate": 3.239299902980281e-05, | |
| "loss": 0.2645, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2585162818431854, | |
| "step": 1570, | |
| "valid_targets_mean": 5511.2, | |
| "valid_targets_min": 4475 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.43335701698086815, | |
| "learning_rate": 3.2330269926046e-05, | |
| "loss": 0.2673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27319490909576416, | |
| "step": 1575, | |
| "valid_targets_mean": 5192.8, | |
| "valid_targets_min": 4466 | |
| }, | |
| { | |
| "epoch": 2.528, | |
| "grad_norm": 0.3894106584125143, | |
| "learning_rate": 3.2267344540082284e-05, | |
| "loss": 0.2655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2645202875137329, | |
| "step": 1580, | |
| "valid_targets_mean": 6043.8, | |
| "valid_targets_min": 4338 | |
| }, | |
| { | |
| "epoch": 2.536, | |
| "grad_norm": 0.41356926541858297, | |
| "learning_rate": 3.220422387360373e-05, | |
| "loss": 0.2647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2693471908569336, | |
| "step": 1585, | |
| "valid_targets_mean": 5589.9, | |
| "valid_targets_min": 4584 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 0.4079897393413228, | |
| "learning_rate": 3.2140908931411026e-05, | |
| "loss": 0.2658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2815423309803009, | |
| "step": 1590, | |
| "valid_targets_mean": 5253.6, | |
| "valid_targets_min": 4594 | |
| }, | |
| { | |
| "epoch": 2.552, | |
| "grad_norm": 0.3961278663802116, | |
| "learning_rate": 3.207740072139748e-05, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2676053047180176, | |
| "step": 1595, | |
| "valid_targets_mean": 5061.8, | |
| "valid_targets_min": 4495 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.4087540460791139, | |
| "learning_rate": 3.2013700254532996e-05, | |
| "loss": 0.2658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2671356499195099, | |
| "step": 1600, | |
| "valid_targets_mean": 5119.0, | |
| "valid_targets_min": 4647 | |
| }, | |
| { | |
| "epoch": 2.568, | |
| "grad_norm": 0.40855112908780067, | |
| "learning_rate": 3.194980854484794e-05, | |
| "loss": 0.2646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2565302550792694, | |
| "step": 1605, | |
| "valid_targets_mean": 5036.5, | |
| "valid_targets_min": 4575 | |
| }, | |
| { | |
| "epoch": 2.576, | |
| "grad_norm": 0.3975412882612971, | |
| "learning_rate": 3.188572660941702e-05, | |
| "loss": 0.2646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2670160233974457, | |
| "step": 1610, | |
| "valid_targets_mean": 5546.9, | |
| "valid_targets_min": 4665 | |
| }, | |
| { | |
| "epoch": 2.584, | |
| "grad_norm": 0.3802809654943076, | |
| "learning_rate": 3.182145546834311e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2641444206237793, | |
| "step": 1615, | |
| "valid_targets_mean": 5664.9, | |
| "valid_targets_min": 4713 | |
| }, | |
| { | |
| "epoch": 2.592, | |
| "grad_norm": 0.39580992250745756, | |
| "learning_rate": 3.1756996144740994e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2824433743953705, | |
| "step": 1620, | |
| "valid_targets_mean": 5904.4, | |
| "valid_targets_min": 4753 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.38104160695763, | |
| "learning_rate": 3.1692349664721074e-05, | |
| "loss": 0.2608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2570064663887024, | |
| "step": 1625, | |
| "valid_targets_mean": 5614.2, | |
| "valid_targets_min": 4541 | |
| }, | |
| { | |
| "epoch": 2.608, | |
| "grad_norm": 0.43313507589311906, | |
| "learning_rate": 3.1627517057373046e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.253144770860672, | |
| "step": 1630, | |
| "valid_targets_mean": 5069.7, | |
| "valid_targets_min": 4185 | |
| }, | |
| { | |
| "epoch": 2.616, | |
| "grad_norm": 0.3877321287956724, | |
| "learning_rate": 3.156249935474953e-05, | |
| "loss": 0.2604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27623024582862854, | |
| "step": 1635, | |
| "valid_targets_mean": 5730.8, | |
| "valid_targets_min": 4695 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 0.3772335768733949, | |
| "learning_rate": 3.1497297591849614e-05, | |
| "loss": 0.2615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2648906707763672, | |
| "step": 1640, | |
| "valid_targets_mean": 5381.7, | |
| "valid_targets_min": 4576 | |
| }, | |
| { | |
| "epoch": 2.632, | |
| "grad_norm": 0.4034443686288007, | |
| "learning_rate": 3.143191280660238e-05, | |
| "loss": 0.2623, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2619234621524811, | |
| "step": 1645, | |
| "valid_targets_mean": 5571.5, | |
| "valid_targets_min": 4320 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.39936916955784146, | |
| "learning_rate": 3.1366346039850424e-05, | |
| "loss": 0.264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2666994035243988, | |
| "step": 1650, | |
| "valid_targets_mean": 5306.7, | |
| "valid_targets_min": 4531 | |
| }, | |
| { | |
| "epoch": 2.648, | |
| "grad_norm": 0.38656464508467425, | |
| "learning_rate": 3.130059833533323e-05, | |
| "loss": 0.2585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25914907455444336, | |
| "step": 1655, | |
| "valid_targets_mean": 5179.9, | |
| "valid_targets_min": 4683 | |
| }, | |
| { | |
| "epoch": 2.656, | |
| "grad_norm": 0.4153081748281799, | |
| "learning_rate": 3.123467073967059e-05, | |
| "loss": 0.2605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2556573152542114, | |
| "step": 1660, | |
| "valid_targets_mean": 5489.3, | |
| "valid_targets_min": 4695 | |
| }, | |
| { | |
| "epoch": 2.664, | |
| "grad_norm": 0.39426543262603564, | |
| "learning_rate": 3.116856430234594e-05, | |
| "loss": 0.263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2817539870738983, | |
| "step": 1665, | |
| "valid_targets_mean": 5751.3, | |
| "valid_targets_min": 4565 | |
| }, | |
| { | |
| "epoch": 2.672, | |
| "grad_norm": 0.3765620622412519, | |
| "learning_rate": 3.110228007568963e-05, | |
| "loss": 0.2677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25782492756843567, | |
| "step": 1670, | |
| "valid_targets_mean": 5083.5, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.3902893258022378, | |
| "learning_rate": 3.103581911486221e-05, | |
| "loss": 0.2616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26476383209228516, | |
| "step": 1675, | |
| "valid_targets_mean": 5053.3, | |
| "valid_targets_min": 4385 | |
| }, | |
| { | |
| "epoch": 2.6879999999999997, | |
| "grad_norm": 0.39530314746607503, | |
| "learning_rate": 3.0969182477837604e-05, | |
| "loss": 0.2659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26915478706359863, | |
| "step": 1680, | |
| "valid_targets_mean": 5652.5, | |
| "valid_targets_min": 4518 | |
| }, | |
| { | |
| "epoch": 2.6959999999999997, | |
| "grad_norm": 0.401945793837044, | |
| "learning_rate": 3.090237122538628e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2647216320037842, | |
| "step": 1685, | |
| "valid_targets_mean": 5648.5, | |
| "valid_targets_min": 4696 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 0.36805919035303825, | |
| "learning_rate": 3.0835386421058345e-05, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2573416829109192, | |
| "step": 1690, | |
| "valid_targets_mean": 5326.2, | |
| "valid_targets_min": 4651 | |
| }, | |
| { | |
| "epoch": 2.7119999999999997, | |
| "grad_norm": 0.3622284022217115, | |
| "learning_rate": 3.0768229131166664e-05, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2625514566898346, | |
| "step": 1695, | |
| "valid_targets_mean": 5644.1, | |
| "valid_targets_min": 4626 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.39209374002164715, | |
| "learning_rate": 3.070090042476983e-05, | |
| "loss": 0.258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2617104947566986, | |
| "step": 1700, | |
| "valid_targets_mean": 5118.5, | |
| "valid_targets_min": 4449 | |
| }, | |
| { | |
| "epoch": 2.7279999999999998, | |
| "grad_norm": 0.4052065981954741, | |
| "learning_rate": 3.063340137365517e-05, | |
| "loss": 0.2661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25953617691993713, | |
| "step": 1705, | |
| "valid_targets_mean": 4979.4, | |
| "valid_targets_min": 4197 | |
| }, | |
| { | |
| "epoch": 2.7359999999999998, | |
| "grad_norm": 0.3850556897680927, | |
| "learning_rate": 3.0565733052321674e-05, | |
| "loss": 0.2687, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.270086407661438, | |
| "step": 1710, | |
| "valid_targets_mean": 5134.0, | |
| "valid_targets_min": 4523 | |
| }, | |
| { | |
| "epoch": 2.7439999999999998, | |
| "grad_norm": 0.3846499816756663, | |
| "learning_rate": 3.0497896537962924e-05, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2651664614677429, | |
| "step": 1715, | |
| "valid_targets_mean": 5576.8, | |
| "valid_targets_min": 4584 | |
| }, | |
| { | |
| "epoch": 2.752, | |
| "grad_norm": 0.3912606894805303, | |
| "learning_rate": 3.042989291044991e-05, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2645653486251831, | |
| "step": 1720, | |
| "valid_targets_mean": 5106.0, | |
| "valid_targets_min": 4740 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.4201847656303527, | |
| "learning_rate": 3.036172325231383e-05, | |
| "loss": 0.2608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26078614592552185, | |
| "step": 1725, | |
| "valid_targets_mean": 4930.8, | |
| "valid_targets_min": 4461 | |
| }, | |
| { | |
| "epoch": 2.768, | |
| "grad_norm": 0.36880465327806744, | |
| "learning_rate": 3.0293388648728908e-05, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25412940979003906, | |
| "step": 1730, | |
| "valid_targets_mean": 5474.9, | |
| "valid_targets_min": 4662 | |
| }, | |
| { | |
| "epoch": 2.776, | |
| "grad_norm": 0.39829690130888995, | |
| "learning_rate": 3.022489018749508e-05, | |
| "loss": 0.2647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2608550786972046, | |
| "step": 1735, | |
| "valid_targets_mean": 5073.0, | |
| "valid_targets_min": 4570 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 0.37972548557171015, | |
| "learning_rate": 3.015622895902068e-05, | |
| "loss": 0.26, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2650458812713623, | |
| "step": 1740, | |
| "valid_targets_mean": 5798.2, | |
| "valid_targets_min": 4956 | |
| }, | |
| { | |
| "epoch": 2.792, | |
| "grad_norm": 0.379287815231503, | |
| "learning_rate": 3.008740605630508e-05, | |
| "loss": 0.262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.255679190158844, | |
| "step": 1745, | |
| "valid_targets_mean": 5012.8, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.40057716983397035, | |
| "learning_rate": 3.0018422574921337e-05, | |
| "loss": 0.265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25868844985961914, | |
| "step": 1750, | |
| "valid_targets_mean": 5096.9, | |
| "valid_targets_min": 4611 | |
| }, | |
| { | |
| "epoch": 2.808, | |
| "grad_norm": 0.3675659738112009, | |
| "learning_rate": 2.9949279612998673e-05, | |
| "loss": 0.2643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2618846595287323, | |
| "step": 1755, | |
| "valid_targets_mean": 5151.0, | |
| "valid_targets_min": 4674 | |
| }, | |
| { | |
| "epoch": 2.816, | |
| "grad_norm": 0.38031600988044323, | |
| "learning_rate": 2.9879978271205064e-05, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2578386664390564, | |
| "step": 1760, | |
| "valid_targets_mean": 5080.0, | |
| "valid_targets_min": 4455 | |
| }, | |
| { | |
| "epoch": 2.824, | |
| "grad_norm": 0.41326983054415795, | |
| "learning_rate": 2.9810519652729692e-05, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2599087953567505, | |
| "step": 1765, | |
| "valid_targets_mean": 5081.8, | |
| "valid_targets_min": 4521 | |
| }, | |
| { | |
| "epoch": 2.832, | |
| "grad_norm": 0.39150447545652417, | |
| "learning_rate": 2.9740904863265378e-05, | |
| "loss": 0.2588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2668408751487732, | |
| "step": 1770, | |
| "valid_targets_mean": 5743.9, | |
| "valid_targets_min": 4720 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.38048852992842547, | |
| "learning_rate": 2.967113501099097e-05, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2716270089149475, | |
| "step": 1775, | |
| "valid_targets_mean": 5608.3, | |
| "valid_targets_min": 4791 | |
| }, | |
| { | |
| "epoch": 2.848, | |
| "grad_norm": 0.4241137020935267, | |
| "learning_rate": 2.9601211206553745e-05, | |
| "loss": 0.262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26419615745544434, | |
| "step": 1780, | |
| "valid_targets_mean": 5044.1, | |
| "valid_targets_min": 4573 | |
| }, | |
| { | |
| "epoch": 2.856, | |
| "grad_norm": 0.3834199034551767, | |
| "learning_rate": 2.9531134563051686e-05, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2629735767841339, | |
| "step": 1785, | |
| "valid_targets_mean": 5593.7, | |
| "valid_targets_min": 4579 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 0.37963092411648275, | |
| "learning_rate": 2.946090619601579e-05, | |
| "loss": 0.2589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2598267197608948, | |
| "step": 1790, | |
| "valid_targets_mean": 5047.7, | |
| "valid_targets_min": 4576 | |
| }, | |
| { | |
| "epoch": 2.872, | |
| "grad_norm": 0.4007505182311276, | |
| "learning_rate": 2.9390527223392292e-05, | |
| "loss": 0.2605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25906598567962646, | |
| "step": 1795, | |
| "valid_targets_mean": 5123.7, | |
| "valid_targets_min": 4556 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.38399553574523715, | |
| "learning_rate": 2.931999876552488e-05, | |
| "loss": 0.2585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2675209045410156, | |
| "step": 1800, | |
| "valid_targets_mean": 5218.8, | |
| "valid_targets_min": 4739 | |
| }, | |
| { | |
| "epoch": 2.888, | |
| "grad_norm": 0.3691142774605129, | |
| "learning_rate": 2.9249321945136854e-05, | |
| "loss": 0.2698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2655124068260193, | |
| "step": 1805, | |
| "valid_targets_mean": 6329.1, | |
| "valid_targets_min": 4036 | |
| }, | |
| { | |
| "epoch": 2.896, | |
| "grad_norm": 0.36527548737915194, | |
| "learning_rate": 2.9178497887313257e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2561267018318176, | |
| "step": 1810, | |
| "valid_targets_mean": 5584.8, | |
| "valid_targets_min": 4757 | |
| }, | |
| { | |
| "epoch": 2.904, | |
| "grad_norm": 0.37115845833929934, | |
| "learning_rate": 2.9107527719482968e-05, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24974587559700012, | |
| "step": 1815, | |
| "valid_targets_mean": 5108.6, | |
| "valid_targets_min": 4739 | |
| }, | |
| { | |
| "epoch": 2.912, | |
| "grad_norm": 0.37174406116175074, | |
| "learning_rate": 2.9036412571400747e-05, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26333725452423096, | |
| "step": 1820, | |
| "valid_targets_mean": 5249.0, | |
| "valid_targets_min": 4756 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.5244548456776246, | |
| "learning_rate": 2.8965153575129255e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2545211911201477, | |
| "step": 1825, | |
| "valid_targets_mean": 5060.2, | |
| "valid_targets_min": 4661 | |
| }, | |
| { | |
| "epoch": 2.928, | |
| "grad_norm": 0.4268100859000517, | |
| "learning_rate": 2.8893751865021044e-05, | |
| "loss": 0.2608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26448148488998413, | |
| "step": 1830, | |
| "valid_targets_mean": 5417.8, | |
| "valid_targets_min": 4369 | |
| }, | |
| { | |
| "epoch": 2.936, | |
| "grad_norm": 0.40297990469391415, | |
| "learning_rate": 2.8822208577700473e-05, | |
| "loss": 0.2614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25776785612106323, | |
| "step": 1835, | |
| "valid_targets_mean": 5146.0, | |
| "valid_targets_min": 4699 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 0.3970984613662667, | |
| "learning_rate": 2.8750524852045642e-05, | |
| "loss": 0.2677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2640109658241272, | |
| "step": 1840, | |
| "valid_targets_mean": 5263.3, | |
| "valid_targets_min": 4790 | |
| }, | |
| { | |
| "epoch": 2.952, | |
| "grad_norm": 0.4223873205548612, | |
| "learning_rate": 2.867870182917024e-05, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2486611306667328, | |
| "step": 1845, | |
| "valid_targets_mean": 5144.0, | |
| "valid_targets_min": 4287 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.4069271921149705, | |
| "learning_rate": 2.8606740652405394e-05, | |
| "loss": 0.2608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2636624574661255, | |
| "step": 1850, | |
| "valid_targets_mean": 5223.5, | |
| "valid_targets_min": 4624 | |
| }, | |
| { | |
| "epoch": 2.968, | |
| "grad_norm": 0.3866958946638761, | |
| "learning_rate": 2.853464246728147e-05, | |
| "loss": 0.265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2874758839607239, | |
| "step": 1855, | |
| "valid_targets_mean": 5635.8, | |
| "valid_targets_min": 4308 | |
| }, | |
| { | |
| "epoch": 2.976, | |
| "grad_norm": 0.42246902597606817, | |
| "learning_rate": 2.846240842150984e-05, | |
| "loss": 0.2566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25802749395370483, | |
| "step": 1860, | |
| "valid_targets_mean": 5212.9, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 2.984, | |
| "grad_norm": 0.4092229473863057, | |
| "learning_rate": 2.839003966496458e-05, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2596660256385803, | |
| "step": 1865, | |
| "valid_targets_mean": 5173.3, | |
| "valid_targets_min": 4510 | |
| }, | |
| { | |
| "epoch": 2.992, | |
| "grad_norm": 0.4042386680061695, | |
| "learning_rate": 2.8317537349664215e-05, | |
| "loss": 0.2627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25971055030822754, | |
| "step": 1870, | |
| "valid_targets_mean": 5638.8, | |
| "valid_targets_min": 4709 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.4051002242844838, | |
| "learning_rate": 2.824490262975334e-05, | |
| "loss": 0.2588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25733834505081177, | |
| "step": 1875, | |
| "valid_targets_mean": 5091.9, | |
| "valid_targets_min": 4750 | |
| }, | |
| { | |
| "epoch": 3.008, | |
| "grad_norm": 0.4017493116761928, | |
| "learning_rate": 2.817213666148427e-05, | |
| "loss": 0.2554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2615068852901459, | |
| "step": 1880, | |
| "valid_targets_mean": 5247.6, | |
| "valid_targets_min": 4492 | |
| }, | |
| { | |
| "epoch": 3.016, | |
| "grad_norm": 0.38743564127808927, | |
| "learning_rate": 2.809924060319862e-05, | |
| "loss": 0.2537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26098230481147766, | |
| "step": 1885, | |
| "valid_targets_mean": 5441.4, | |
| "valid_targets_min": 4672 | |
| }, | |
| { | |
| "epoch": 3.024, | |
| "grad_norm": 0.41437786108191504, | |
| "learning_rate": 2.802621561530888e-05, | |
| "loss": 0.2562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25473156571388245, | |
| "step": 1890, | |
| "valid_targets_mean": 5242.3, | |
| "valid_targets_min": 4651 | |
| }, | |
| { | |
| "epoch": 3.032, | |
| "grad_norm": 0.41757986678811276, | |
| "learning_rate": 2.7953062860279937e-05, | |
| "loss": 0.2581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2574443221092224, | |
| "step": 1895, | |
| "valid_targets_mean": 5013.8, | |
| "valid_targets_min": 4323 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.38808501068179957, | |
| "learning_rate": 2.7879783502610557e-05, | |
| "loss": 0.26, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2597794830799103, | |
| "step": 1900, | |
| "valid_targets_mean": 5037.2, | |
| "valid_targets_min": 4654 | |
| }, | |
| { | |
| "epoch": 3.048, | |
| "grad_norm": 0.3918720292260328, | |
| "learning_rate": 2.7806378708814875e-05, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24929499626159668, | |
| "step": 1905, | |
| "valid_targets_mean": 4982.4, | |
| "valid_targets_min": 4619 | |
| }, | |
| { | |
| "epoch": 3.056, | |
| "grad_norm": 0.3998723784672861, | |
| "learning_rate": 2.773284964740379e-05, | |
| "loss": 0.2561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26851534843444824, | |
| "step": 1910, | |
| "valid_targets_mean": 5088.8, | |
| "valid_targets_min": 4822 | |
| }, | |
| { | |
| "epoch": 3.064, | |
| "grad_norm": 0.387693240123785, | |
| "learning_rate": 2.7659197488866403e-05, | |
| "loss": 0.2559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24849367141723633, | |
| "step": 1915, | |
| "valid_targets_mean": 5119.2, | |
| "valid_targets_min": 4625 | |
| }, | |
| { | |
| "epoch": 3.072, | |
| "grad_norm": 0.3577505658631945, | |
| "learning_rate": 2.7585423405651347e-05, | |
| "loss": 0.2544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2374514639377594, | |
| "step": 1920, | |
| "valid_targets_mean": 5427.0, | |
| "valid_targets_min": 4466 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 0.42423136743379575, | |
| "learning_rate": 2.7511528572148153e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2630290389060974, | |
| "step": 1925, | |
| "valid_targets_mean": 5122.5, | |
| "valid_targets_min": 4416 | |
| }, | |
| { | |
| "epoch": 3.088, | |
| "grad_norm": 0.39646944134087847, | |
| "learning_rate": 2.7437514164668536e-05, | |
| "loss": 0.2614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26025158166885376, | |
| "step": 1930, | |
| "valid_targets_mean": 5186.6, | |
| "valid_targets_min": 4676 | |
| }, | |
| { | |
| "epoch": 3.096, | |
| "grad_norm": 0.41169588309698196, | |
| "learning_rate": 2.7363381361427692e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26889950037002563, | |
| "step": 1935, | |
| "valid_targets_mean": 5103.2, | |
| "valid_targets_min": 4468 | |
| }, | |
| { | |
| "epoch": 3.104, | |
| "grad_norm": 0.4134380393160243, | |
| "learning_rate": 2.72891313425255e-05, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25800013542175293, | |
| "step": 1940, | |
| "valid_targets_mean": 4990.0, | |
| "valid_targets_min": 4531 | |
| }, | |
| { | |
| "epoch": 3.112, | |
| "grad_norm": 0.4111043631529694, | |
| "learning_rate": 2.7214765289927777e-05, | |
| "loss": 0.2612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2527834475040436, | |
| "step": 1945, | |
| "valid_targets_mean": 5063.6, | |
| "valid_targets_min": 4437 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.3791328611182782, | |
| "learning_rate": 2.714028438744746e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25852206349372864, | |
| "step": 1950, | |
| "valid_targets_mean": 5192.9, | |
| "valid_targets_min": 4687 | |
| }, | |
| { | |
| "epoch": 3.128, | |
| "grad_norm": 0.3842998254139912, | |
| "learning_rate": 2.706568982072573e-05, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24949491024017334, | |
| "step": 1955, | |
| "valid_targets_mean": 5541.7, | |
| "valid_targets_min": 4366 | |
| }, | |
| { | |
| "epoch": 3.136, | |
| "grad_norm": 0.3765569075955108, | |
| "learning_rate": 2.6990982777213174e-05, | |
| "loss": 0.2561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2668150067329407, | |
| "step": 1960, | |
| "valid_targets_mean": 5992.1, | |
| "valid_targets_min": 4602 | |
| }, | |
| { | |
| "epoch": 3.144, | |
| "grad_norm": 0.3781268574142044, | |
| "learning_rate": 2.691616444615085e-05, | |
| "loss": 0.2563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2584110498428345, | |
| "step": 1965, | |
| "valid_targets_mean": 5448.6, | |
| "valid_targets_min": 4361 | |
| }, | |
| { | |
| "epoch": 3.152, | |
| "grad_norm": 0.40520752367750124, | |
| "learning_rate": 2.6841236018551402e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25754767656326294, | |
| "step": 1970, | |
| "valid_targets_mean": 5062.9, | |
| "valid_targets_min": 4702 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.4267061678706972, | |
| "learning_rate": 2.6766198687180028e-05, | |
| "loss": 0.2554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26330867409706116, | |
| "step": 1975, | |
| "valid_targets_mean": 5101.2, | |
| "valid_targets_min": 4247 | |
| }, | |
| { | |
| "epoch": 3.168, | |
| "grad_norm": 0.4153497751505656, | |
| "learning_rate": 2.6691053646535564e-05, | |
| "loss": 0.2533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24770234525203705, | |
| "step": 1980, | |
| "valid_targets_mean": 5090.2, | |
| "valid_targets_min": 4699 | |
| }, | |
| { | |
| "epoch": 3.176, | |
| "grad_norm": 0.3824326801704376, | |
| "learning_rate": 2.6615802092831446e-05, | |
| "loss": 0.2557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2542661726474762, | |
| "step": 1985, | |
| "valid_targets_mean": 5193.1, | |
| "valid_targets_min": 4679 | |
| }, | |
| { | |
| "epoch": 3.184, | |
| "grad_norm": 0.3715833773133048, | |
| "learning_rate": 2.6540445223976637e-05, | |
| "loss": 0.2511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2473723292350769, | |
| "step": 1990, | |
| "valid_targets_mean": 4982.2, | |
| "valid_targets_min": 4583 | |
| }, | |
| { | |
| "epoch": 3.192, | |
| "grad_norm": 0.40416306823468356, | |
| "learning_rate": 2.6464984239556602e-05, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27596116065979004, | |
| "step": 1995, | |
| "valid_targets_mean": 5217.9, | |
| "valid_targets_min": 4524 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.39935296965821354, | |
| "learning_rate": 2.63894203408142e-05, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25643453001976013, | |
| "step": 2000, | |
| "valid_targets_mean": 5030.4, | |
| "valid_targets_min": 4381 | |
| }, | |
| { | |
| "epoch": 3.208, | |
| "grad_norm": 0.3992815582088795, | |
| "learning_rate": 2.6313754730630528e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2489120364189148, | |
| "step": 2005, | |
| "valid_targets_mean": 4996.1, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 3.216, | |
| "grad_norm": 0.3717603332443528, | |
| "learning_rate": 2.623798861350582e-05, | |
| "loss": 0.2622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.267914742231369, | |
| "step": 2010, | |
| "valid_targets_mean": 5370.2, | |
| "valid_targets_min": 4995 | |
| }, | |
| { | |
| "epoch": 3.224, | |
| "grad_norm": 0.3754010026512321, | |
| "learning_rate": 2.6162123195540247e-05, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2619240880012512, | |
| "step": 2015, | |
| "valid_targets_mean": 5149.1, | |
| "valid_targets_min": 4400 | |
| }, | |
| { | |
| "epoch": 3.232, | |
| "grad_norm": 0.36279287870057525, | |
| "learning_rate": 2.6086159684414726e-05, | |
| "loss": 0.2601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27412256598472595, | |
| "step": 2020, | |
| "valid_targets_mean": 5388.9, | |
| "valid_targets_min": 4605 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 0.36951304903885596, | |
| "learning_rate": 2.6010099289371694e-05, | |
| "loss": 0.2614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27248796820640564, | |
| "step": 2025, | |
| "valid_targets_mean": 5653.0, | |
| "valid_targets_min": 4755 | |
| }, | |
| { | |
| "epoch": 3.248, | |
| "grad_norm": 0.3522171064957345, | |
| "learning_rate": 2.5933943221195844e-05, | |
| "loss": 0.2562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24924296140670776, | |
| "step": 2030, | |
| "valid_targets_mean": 5767.5, | |
| "valid_targets_min": 4760 | |
| }, | |
| { | |
| "epoch": 3.2560000000000002, | |
| "grad_norm": 0.3741280341950422, | |
| "learning_rate": 2.5857692692194884e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25600665807724, | |
| "step": 2035, | |
| "valid_targets_mean": 5357.2, | |
| "valid_targets_min": 4459 | |
| }, | |
| { | |
| "epoch": 3.2640000000000002, | |
| "grad_norm": 0.41158494509032223, | |
| "learning_rate": 2.5781348916180195e-05, | |
| "loss": 0.258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2568986415863037, | |
| "step": 2040, | |
| "valid_targets_mean": 5082.5, | |
| "valid_targets_min": 4498 | |
| }, | |
| { | |
| "epoch": 3.2720000000000002, | |
| "grad_norm": 0.39987003522639153, | |
| "learning_rate": 2.570491310844755e-05, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2685532569885254, | |
| "step": 2045, | |
| "valid_targets_mean": 5198.1, | |
| "valid_targets_min": 4746 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 0.3635384020730392, | |
| "learning_rate": 2.562838648575774e-05, | |
| "loss": 0.255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25414279103279114, | |
| "step": 2050, | |
| "valid_targets_mean": 5434.4, | |
| "valid_targets_min": 4341 | |
| }, | |
| { | |
| "epoch": 3.288, | |
| "grad_norm": 0.3762746168452621, | |
| "learning_rate": 2.5551770266317224e-05, | |
| "loss": 0.2559, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2556823790073395, | |
| "step": 2055, | |
| "valid_targets_mean": 5134.6, | |
| "valid_targets_min": 4518 | |
| }, | |
| { | |
| "epoch": 3.296, | |
| "grad_norm": 0.3545230826477199, | |
| "learning_rate": 2.5475065669758713e-05, | |
| "loss": 0.2539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2530892491340637, | |
| "step": 2060, | |
| "valid_targets_mean": 5176.4, | |
| "valid_targets_min": 4720 | |
| }, | |
| { | |
| "epoch": 3.304, | |
| "grad_norm": 0.41644538422972593, | |
| "learning_rate": 2.5398273917121786e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2598080635070801, | |
| "step": 2065, | |
| "valid_targets_mean": 5162.8, | |
| "valid_targets_min": 4822 | |
| }, | |
| { | |
| "epoch": 3.312, | |
| "grad_norm": 0.35751551656554953, | |
| "learning_rate": 2.532139623083342e-05, | |
| "loss": 0.2558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25548699498176575, | |
| "step": 2070, | |
| "valid_targets_mean": 5641.9, | |
| "valid_targets_min": 4481 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.3550176472557679, | |
| "learning_rate": 2.5244433834688552e-05, | |
| "loss": 0.2594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25415748357772827, | |
| "step": 2075, | |
| "valid_targets_mean": 5365.8, | |
| "valid_targets_min": 4498 | |
| }, | |
| { | |
| "epoch": 3.328, | |
| "grad_norm": 0.37575538785363166, | |
| "learning_rate": 2.5167387953830602e-05, | |
| "loss": 0.2548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25079482793807983, | |
| "step": 2080, | |
| "valid_targets_mean": 5118.4, | |
| "valid_targets_min": 4439 | |
| }, | |
| { | |
| "epoch": 3.336, | |
| "grad_norm": 0.38844697932576017, | |
| "learning_rate": 2.5090259814731946e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2594192326068878, | |
| "step": 2085, | |
| "valid_targets_mean": 5582.4, | |
| "valid_targets_min": 4666 | |
| }, | |
| { | |
| "epoch": 3.344, | |
| "grad_norm": 0.40074363971269034, | |
| "learning_rate": 2.5013050645174414e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26167863607406616, | |
| "step": 2090, | |
| "valid_targets_mean": 5266.3, | |
| "valid_targets_min": 4721 | |
| }, | |
| { | |
| "epoch": 3.352, | |
| "grad_norm": 0.40974985031879735, | |
| "learning_rate": 2.4935761674229735e-05, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25476735830307007, | |
| "step": 2095, | |
| "valid_targets_mean": 5031.2, | |
| "valid_targets_min": 4544 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.35604816076575696, | |
| "learning_rate": 2.4858394132239982e-05, | |
| "loss": 0.2642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26181378960609436, | |
| "step": 2100, | |
| "valid_targets_mean": 6251.6, | |
| "valid_targets_min": 4583 | |
| }, | |
| { | |
| "epoch": 3.368, | |
| "grad_norm": 0.42789983477774807, | |
| "learning_rate": 2.4780949250797964e-05, | |
| "loss": 0.2547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2587730586528778, | |
| "step": 2105, | |
| "valid_targets_mean": 5059.0, | |
| "valid_targets_min": 4534 | |
| }, | |
| { | |
| "epoch": 3.376, | |
| "grad_norm": 0.412929475237948, | |
| "learning_rate": 2.4703428262727656e-05, | |
| "loss": 0.2517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25499147176742554, | |
| "step": 2110, | |
| "valid_targets_mean": 5591.4, | |
| "valid_targets_min": 4071 | |
| }, | |
| { | |
| "epoch": 3.384, | |
| "grad_norm": 0.3822408621280421, | |
| "learning_rate": 2.4625832402064525e-05, | |
| "loss": 0.2588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2681821584701538, | |
| "step": 2115, | |
| "valid_targets_mean": 5152.5, | |
| "valid_targets_min": 4576 | |
| }, | |
| { | |
| "epoch": 3.392, | |
| "grad_norm": 0.38423442990725043, | |
| "learning_rate": 2.454816290403595e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2522526979446411, | |
| "step": 2120, | |
| "valid_targets_mean": 5629.2, | |
| "valid_targets_min": 4516 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.4012552611203462, | |
| "learning_rate": 2.4470421005041492e-05, | |
| "loss": 0.2591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.255180299282074, | |
| "step": 2125, | |
| "valid_targets_mean": 5106.1, | |
| "valid_targets_min": 4340 | |
| }, | |
| { | |
| "epoch": 3.408, | |
| "grad_norm": 0.3679852789386794, | |
| "learning_rate": 2.4392607942633263e-05, | |
| "loss": 0.2587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25126224756240845, | |
| "step": 2130, | |
| "valid_targets_mean": 5241.3, | |
| "valid_targets_min": 4604 | |
| }, | |
| { | |
| "epoch": 3.416, | |
| "grad_norm": 0.3769603372606563, | |
| "learning_rate": 2.43147249554962e-05, | |
| "loss": 0.2583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25547105073928833, | |
| "step": 2135, | |
| "valid_targets_mean": 5038.0, | |
| "valid_targets_min": 4450 | |
| }, | |
| { | |
| "epoch": 3.424, | |
| "grad_norm": 0.4010496936888782, | |
| "learning_rate": 2.423677328342835e-05, | |
| "loss": 0.2622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26036322116851807, | |
| "step": 2140, | |
| "valid_targets_mean": 5094.1, | |
| "valid_targets_min": 4487 | |
| }, | |
| { | |
| "epoch": 3.432, | |
| "grad_norm": 0.37469274783275686, | |
| "learning_rate": 2.415875416732113e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2544146478176117, | |
| "step": 2145, | |
| "valid_targets_mean": 5419.3, | |
| "valid_targets_min": 4385 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.37793793984259566, | |
| "learning_rate": 2.4080668849139603e-05, | |
| "loss": 0.2536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24439139664173126, | |
| "step": 2150, | |
| "valid_targets_mean": 5038.1, | |
| "valid_targets_min": 4658 | |
| }, | |
| { | |
| "epoch": 3.448, | |
| "grad_norm": 0.3598968665052068, | |
| "learning_rate": 2.4002518571902665e-05, | |
| "loss": 0.2555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2562156319618225, | |
| "step": 2155, | |
| "valid_targets_mean": 5411.3, | |
| "valid_targets_min": 4589 | |
| }, | |
| { | |
| "epoch": 3.456, | |
| "grad_norm": 0.3994259051200495, | |
| "learning_rate": 2.392430457966328e-05, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2530721127986908, | |
| "step": 2160, | |
| "valid_targets_mean": 5216.0, | |
| "valid_targets_min": 4780 | |
| }, | |
| { | |
| "epoch": 3.464, | |
| "grad_norm": 0.37571999528126965, | |
| "learning_rate": 2.3846028117488686e-05, | |
| "loss": 0.2608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2571840286254883, | |
| "step": 2165, | |
| "valid_targets_mean": 5118.2, | |
| "valid_targets_min": 4695 | |
| }, | |
| { | |
| "epoch": 3.472, | |
| "grad_norm": 0.36131457650206183, | |
| "learning_rate": 2.3767690431440533e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2436513602733612, | |
| "step": 2170, | |
| "valid_targets_mean": 5932.6, | |
| "valid_targets_min": 4426 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 0.3850192212688054, | |
| "learning_rate": 2.368929276855512e-05, | |
| "loss": 0.2563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25989893078804016, | |
| "step": 2175, | |
| "valid_targets_mean": 5236.9, | |
| "valid_targets_min": 4555 | |
| }, | |
| { | |
| "epoch": 3.488, | |
| "grad_norm": 0.4035535368632327, | |
| "learning_rate": 2.361083637682347e-05, | |
| "loss": 0.2605, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2781639099121094, | |
| "step": 2180, | |
| "valid_targets_mean": 5540.8, | |
| "valid_targets_min": 4676 | |
| }, | |
| { | |
| "epoch": 3.496, | |
| "grad_norm": 0.38337014400716235, | |
| "learning_rate": 2.3532322505171502e-05, | |
| "loss": 0.2523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2474091500043869, | |
| "step": 2185, | |
| "valid_targets_mean": 5893.3, | |
| "valid_targets_min": 4621 | |
| }, | |
| { | |
| "epoch": 3.504, | |
| "grad_norm": 0.3983636228987262, | |
| "learning_rate": 2.3453752403440147e-05, | |
| "loss": 0.2596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2601785659790039, | |
| "step": 2190, | |
| "valid_targets_mean": 5188.9, | |
| "valid_targets_min": 4221 | |
| }, | |
| { | |
| "epoch": 3.512, | |
| "grad_norm": 0.3699591296410164, | |
| "learning_rate": 2.337512732236545e-05, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2584289014339447, | |
| "step": 2195, | |
| "valid_targets_mean": 5542.6, | |
| "valid_targets_min": 4347 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.4158656694817678, | |
| "learning_rate": 2.3296448513558628e-05, | |
| "loss": 0.2577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2636766731739044, | |
| "step": 2200, | |
| "valid_targets_mean": 5294.4, | |
| "valid_targets_min": 4579 | |
| }, | |
| { | |
| "epoch": 3.528, | |
| "grad_norm": 0.3630122133657765, | |
| "learning_rate": 2.321771722948622e-05, | |
| "loss": 0.2519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25399672985076904, | |
| "step": 2205, | |
| "valid_targets_mean": 5671.2, | |
| "valid_targets_min": 4750 | |
| }, | |
| { | |
| "epoch": 3.536, | |
| "grad_norm": 0.40662672863827737, | |
| "learning_rate": 2.3138934723450074e-05, | |
| "loss": 0.2539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2578379511833191, | |
| "step": 2210, | |
| "valid_targets_mean": 5070.2, | |
| "valid_targets_min": 4568 | |
| }, | |
| { | |
| "epoch": 3.544, | |
| "grad_norm": 0.3725056075748149, | |
| "learning_rate": 2.306010224956744e-05, | |
| "loss": 0.25, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2570652663707733, | |
| "step": 2215, | |
| "valid_targets_mean": 5506.8, | |
| "valid_targets_min": 4310 | |
| }, | |
| { | |
| "epoch": 3.552, | |
| "grad_norm": 0.3637467404388262, | |
| "learning_rate": 2.2981221062750986e-05, | |
| "loss": 0.2561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24861541390419006, | |
| "step": 2220, | |
| "valid_targets_mean": 5082.7, | |
| "valid_targets_min": 4669 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.35203719167870795, | |
| "learning_rate": 2.290229241868882e-05, | |
| "loss": 0.2599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2622869610786438, | |
| "step": 2225, | |
| "valid_targets_mean": 6176.3, | |
| "valid_targets_min": 4756 | |
| }, | |
| { | |
| "epoch": 3.568, | |
| "grad_norm": 0.3717736150436042, | |
| "learning_rate": 2.282331757382454e-05, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24464790523052216, | |
| "step": 2230, | |
| "valid_targets_mean": 5028.2, | |
| "valid_targets_min": 4432 | |
| }, | |
| { | |
| "epoch": 3.576, | |
| "grad_norm": 0.3673281100556043, | |
| "learning_rate": 2.2744297785337155e-05, | |
| "loss": 0.2564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25710946321487427, | |
| "step": 2235, | |
| "valid_targets_mean": 5477.9, | |
| "valid_targets_min": 4550 | |
| }, | |
| { | |
| "epoch": 3.584, | |
| "grad_norm": 0.36613763913846853, | |
| "learning_rate": 2.2665234311121155e-05, | |
| "loss": 0.2601, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25341567397117615, | |
| "step": 2240, | |
| "valid_targets_mean": 5003.9, | |
| "valid_targets_min": 4083 | |
| }, | |
| { | |
| "epoch": 3.592, | |
| "grad_norm": 0.37892523030563763, | |
| "learning_rate": 2.258612840976645e-05, | |
| "loss": 0.2616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25544115900993347, | |
| "step": 2245, | |
| "valid_targets_mean": 5216.2, | |
| "valid_targets_min": 4771 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.33820665473089867, | |
| "learning_rate": 2.2506981340538315e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2539617121219635, | |
| "step": 2250, | |
| "valid_targets_mean": 5914.1, | |
| "valid_targets_min": 4582 | |
| }, | |
| { | |
| "epoch": 3.608, | |
| "grad_norm": 0.3818152402562447, | |
| "learning_rate": 2.2427794363357384e-05, | |
| "loss": 0.2549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2526627779006958, | |
| "step": 2255, | |
| "valid_targets_mean": 5122.5, | |
| "valid_targets_min": 4433 | |
| }, | |
| { | |
| "epoch": 3.616, | |
| "grad_norm": 0.37534297890618695, | |
| "learning_rate": 2.2348568738779566e-05, | |
| "loss": 0.2534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2573014199733734, | |
| "step": 2260, | |
| "valid_targets_mean": 5240.1, | |
| "valid_targets_min": 4865 | |
| }, | |
| { | |
| "epoch": 3.624, | |
| "grad_norm": 0.3717550712377671, | |
| "learning_rate": 2.2269305727975993e-05, | |
| "loss": 0.2569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25530850887298584, | |
| "step": 2265, | |
| "valid_targets_mean": 5115.8, | |
| "valid_targets_min": 4402 | |
| }, | |
| { | |
| "epoch": 3.632, | |
| "grad_norm": 0.37308145674301035, | |
| "learning_rate": 2.2190006592712927e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24942773580551147, | |
| "step": 2270, | |
| "valid_targets_mean": 5033.2, | |
| "valid_targets_min": 4615 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.37632941596277086, | |
| "learning_rate": 2.2110672595331698e-05, | |
| "loss": 0.2592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2606221139431, | |
| "step": 2275, | |
| "valid_targets_mean": 5627.9, | |
| "valid_targets_min": 4635 | |
| }, | |
| { | |
| "epoch": 3.648, | |
| "grad_norm": 0.3835628746396792, | |
| "learning_rate": 2.2031304998728587e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25760626792907715, | |
| "step": 2280, | |
| "valid_targets_mean": 5164.8, | |
| "valid_targets_min": 4467 | |
| }, | |
| { | |
| "epoch": 3.656, | |
| "grad_norm": 0.3844427936707026, | |
| "learning_rate": 2.1951905066334737e-05, | |
| "loss": 0.2512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25517892837524414, | |
| "step": 2285, | |
| "valid_targets_mean": 5144.4, | |
| "valid_targets_min": 4552 | |
| }, | |
| { | |
| "epoch": 3.664, | |
| "grad_norm": 0.3898581775367082, | |
| "learning_rate": 2.1872474062096046e-05, | |
| "loss": 0.2549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25308695435523987, | |
| "step": 2290, | |
| "valid_targets_mean": 5123.2, | |
| "valid_targets_min": 4698 | |
| }, | |
| { | |
| "epoch": 3.672, | |
| "grad_norm": 0.38692852197100364, | |
| "learning_rate": 2.179301325045301e-05, | |
| "loss": 0.2497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25178107619285583, | |
| "step": 2295, | |
| "valid_targets_mean": 5061.7, | |
| "valid_targets_min": 4388 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.3666022121554562, | |
| "learning_rate": 2.1713523896320647e-05, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25144487619400024, | |
| "step": 2300, | |
| "valid_targets_mean": 5175.2, | |
| "valid_targets_min": 4599 | |
| }, | |
| { | |
| "epoch": 3.6879999999999997, | |
| "grad_norm": 0.3636310742793429, | |
| "learning_rate": 2.163400726506832e-05, | |
| "loss": 0.2546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26339221000671387, | |
| "step": 2305, | |
| "valid_targets_mean": 6045.6, | |
| "valid_targets_min": 4545 | |
| }, | |
| { | |
| "epoch": 3.6959999999999997, | |
| "grad_norm": 0.37805134637695786, | |
| "learning_rate": 2.155446462249961e-05, | |
| "loss": 0.2528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2537267804145813, | |
| "step": 2310, | |
| "valid_targets_mean": 5200.2, | |
| "valid_targets_min": 4581 | |
| }, | |
| { | |
| "epoch": 3.7039999999999997, | |
| "grad_norm": 0.35706148261006204, | |
| "learning_rate": 2.147489723483217e-05, | |
| "loss": 0.2636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2490694224834442, | |
| "step": 2315, | |
| "valid_targets_mean": 5528.3, | |
| "valid_targets_min": 4729 | |
| }, | |
| { | |
| "epoch": 3.7119999999999997, | |
| "grad_norm": 0.41949956996462584, | |
| "learning_rate": 2.139530636867757e-05, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2546956539154053, | |
| "step": 2320, | |
| "valid_targets_mean": 4994.5, | |
| "valid_targets_min": 4515 | |
| }, | |
| { | |
| "epoch": 3.7199999999999998, | |
| "grad_norm": 0.35714755729643927, | |
| "learning_rate": 2.1315693291021114e-05, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25141674280166626, | |
| "step": 2325, | |
| "valid_targets_mean": 5574.7, | |
| "valid_targets_min": 4572 | |
| }, | |
| { | |
| "epoch": 3.7279999999999998, | |
| "grad_norm": 0.3716922418046691, | |
| "learning_rate": 2.1236059269201686e-05, | |
| "loss": 0.2592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.251057505607605, | |
| "step": 2330, | |
| "valid_targets_mean": 5262.8, | |
| "valid_targets_min": 4541 | |
| }, | |
| { | |
| "epoch": 3.7359999999999998, | |
| "grad_norm": 0.3740571828651018, | |
| "learning_rate": 2.1156405570891584e-05, | |
| "loss": 0.2572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25300735235214233, | |
| "step": 2335, | |
| "valid_targets_mean": 5224.2, | |
| "valid_targets_min": 4671 | |
| }, | |
| { | |
| "epoch": 3.7439999999999998, | |
| "grad_norm": 0.3731152468480955, | |
| "learning_rate": 2.1076733464076322e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24885761737823486, | |
| "step": 2340, | |
| "valid_targets_mean": 5041.9, | |
| "valid_targets_min": 4308 | |
| }, | |
| { | |
| "epoch": 3.752, | |
| "grad_norm": 0.3599972684261803, | |
| "learning_rate": 2.0997044217034462e-05, | |
| "loss": 0.2545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.256458580493927, | |
| "step": 2345, | |
| "valid_targets_mean": 5047.6, | |
| "valid_targets_min": 4558 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.3790801144533084, | |
| "learning_rate": 2.0917339098317405e-05, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24056321382522583, | |
| "step": 2350, | |
| "valid_targets_mean": 4953.9, | |
| "valid_targets_min": 4256 | |
| }, | |
| { | |
| "epoch": 3.768, | |
| "grad_norm": 0.3439413197020935, | |
| "learning_rate": 2.083761937672922e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24720294773578644, | |
| "step": 2355, | |
| "valid_targets_mean": 5482.6, | |
| "valid_targets_min": 4664 | |
| }, | |
| { | |
| "epoch": 3.776, | |
| "grad_norm": 0.37051295467258905, | |
| "learning_rate": 2.0757886321306433e-05, | |
| "loss": 0.2523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25234872102737427, | |
| "step": 2360, | |
| "valid_targets_mean": 5112.2, | |
| "valid_targets_min": 4565 | |
| }, | |
| { | |
| "epoch": 3.784, | |
| "grad_norm": 0.38194134638096117, | |
| "learning_rate": 2.0678141201297827e-05, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25368550419807434, | |
| "step": 2365, | |
| "valid_targets_mean": 5128.2, | |
| "valid_targets_min": 4510 | |
| }, | |
| { | |
| "epoch": 3.792, | |
| "grad_norm": 0.3640337122913282, | |
| "learning_rate": 2.059838528614423e-05, | |
| "loss": 0.2531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25246694684028625, | |
| "step": 2370, | |
| "valid_targets_mean": 5522.0, | |
| "valid_targets_min": 4544 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.353343318421452, | |
| "learning_rate": 2.0518619845458322e-05, | |
| "loss": 0.2596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.269356906414032, | |
| "step": 2375, | |
| "valid_targets_mean": 6610.2, | |
| "valid_targets_min": 4754 | |
| }, | |
| { | |
| "epoch": 3.808, | |
| "grad_norm": 0.3524978628062369, | |
| "learning_rate": 2.0438846149004426e-05, | |
| "loss": 0.2566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25523629784584045, | |
| "step": 2380, | |
| "valid_targets_mean": 5577.1, | |
| "valid_targets_min": 4333 | |
| }, | |
| { | |
| "epoch": 3.816, | |
| "grad_norm": 0.37127931435883843, | |
| "learning_rate": 2.0359065466678268e-05, | |
| "loss": 0.2537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24259832501411438, | |
| "step": 2385, | |
| "valid_targets_mean": 5156.1, | |
| "valid_targets_min": 4709 | |
| }, | |
| { | |
| "epoch": 3.824, | |
| "grad_norm": 0.3537455247329018, | |
| "learning_rate": 2.0279279068486795e-05, | |
| "loss": 0.257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2595163583755493, | |
| "step": 2390, | |
| "valid_targets_mean": 6122.4, | |
| "valid_targets_min": 4782 | |
| }, | |
| { | |
| "epoch": 3.832, | |
| "grad_norm": 0.3592647364231637, | |
| "learning_rate": 2.019948822452794e-05, | |
| "loss": 0.2628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26666826009750366, | |
| "step": 2395, | |
| "valid_targets_mean": 5075.2, | |
| "valid_targets_min": 2945 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.36126350997277096, | |
| "learning_rate": 2.0119694204970393e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26632314920425415, | |
| "step": 2400, | |
| "valid_targets_mean": 5981.2, | |
| "valid_targets_min": 4433 | |
| }, | |
| { | |
| "epoch": 3.848, | |
| "grad_norm": 0.357631414773473, | |
| "learning_rate": 2.0039898280033414e-05, | |
| "loss": 0.2554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2618151903152466, | |
| "step": 2405, | |
| "valid_targets_mean": 5088.5, | |
| "valid_targets_min": 4420 | |
| }, | |
| { | |
| "epoch": 3.856, | |
| "grad_norm": 0.36024167671314916, | |
| "learning_rate": 1.9960101719966592e-05, | |
| "loss": 0.2561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25251504778862, | |
| "step": 2410, | |
| "valid_targets_mean": 5085.9, | |
| "valid_targets_min": 4301 | |
| }, | |
| { | |
| "epoch": 3.864, | |
| "grad_norm": 0.35245180229285206, | |
| "learning_rate": 1.9880305795029617e-05, | |
| "loss": 0.2568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25374269485473633, | |
| "step": 2415, | |
| "valid_targets_mean": 5473.1, | |
| "valid_targets_min": 4727 | |
| }, | |
| { | |
| "epoch": 3.872, | |
| "grad_norm": 0.3434369021878532, | |
| "learning_rate": 1.980051177547207e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25266820192337036, | |
| "step": 2420, | |
| "valid_targets_mean": 5553.6, | |
| "valid_targets_min": 4440 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.38415422271914135, | |
| "learning_rate": 1.9720720931513212e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2593597173690796, | |
| "step": 2425, | |
| "valid_targets_mean": 5028.2, | |
| "valid_targets_min": 4447 | |
| }, | |
| { | |
| "epoch": 3.888, | |
| "grad_norm": 0.36428105561050517, | |
| "learning_rate": 1.9640934533321735e-05, | |
| "loss": 0.2569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25206393003463745, | |
| "step": 2430, | |
| "valid_targets_mean": 5190.6, | |
| "valid_targets_min": 4445 | |
| }, | |
| { | |
| "epoch": 3.896, | |
| "grad_norm": 0.3617774067964773, | |
| "learning_rate": 1.9561153850995577e-05, | |
| "loss": 0.2567, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25121819972991943, | |
| "step": 2435, | |
| "valid_targets_mean": 5399.6, | |
| "valid_targets_min": 4619 | |
| }, | |
| { | |
| "epoch": 3.904, | |
| "grad_norm": 0.3569439893007088, | |
| "learning_rate": 1.948138015454168e-05, | |
| "loss": 0.2518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2534139156341553, | |
| "step": 2440, | |
| "valid_targets_mean": 5732.9, | |
| "valid_targets_min": 4564 | |
| }, | |
| { | |
| "epoch": 3.912, | |
| "grad_norm": 0.3862341342973294, | |
| "learning_rate": 1.9401614713855775e-05, | |
| "loss": 0.2622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2558533847332001, | |
| "step": 2445, | |
| "valid_targets_mean": 5132.8, | |
| "valid_targets_min": 4629 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.3704093359982995, | |
| "learning_rate": 1.932185879870218e-05, | |
| "loss": 0.2547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25796228647232056, | |
| "step": 2450, | |
| "valid_targets_mean": 5501.5, | |
| "valid_targets_min": 4668 | |
| }, | |
| { | |
| "epoch": 3.928, | |
| "grad_norm": 0.3827166007563897, | |
| "learning_rate": 1.924211367869357e-05, | |
| "loss": 0.2535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2528879642486572, | |
| "step": 2455, | |
| "valid_targets_mean": 5125.6, | |
| "valid_targets_min": 4306 | |
| }, | |
| { | |
| "epoch": 3.936, | |
| "grad_norm": 0.34629847627107746, | |
| "learning_rate": 1.9162380623270783e-05, | |
| "loss": 0.2547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2563566267490387, | |
| "step": 2460, | |
| "valid_targets_mean": 5984.0, | |
| "valid_targets_min": 4784 | |
| }, | |
| { | |
| "epoch": 3.944, | |
| "grad_norm": 0.36996853747490954, | |
| "learning_rate": 1.90826609016826e-05, | |
| "loss": 0.2548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2555483281612396, | |
| "step": 2465, | |
| "valid_targets_mean": 5221.9, | |
| "valid_targets_min": 4527 | |
| }, | |
| { | |
| "epoch": 3.952, | |
| "grad_norm": 0.4007199387477561, | |
| "learning_rate": 1.9002955782965548e-05, | |
| "loss": 0.2603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2574692666530609, | |
| "step": 2470, | |
| "valid_targets_mean": 5201.2, | |
| "valid_targets_min": 4372 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.39049357653608796, | |
| "learning_rate": 1.8923266535923688e-05, | |
| "loss": 0.254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24995353817939758, | |
| "step": 2475, | |
| "valid_targets_mean": 5060.3, | |
| "valid_targets_min": 4389 | |
| }, | |
| { | |
| "epoch": 3.968, | |
| "grad_norm": 0.3638144643751539, | |
| "learning_rate": 1.8843594429108426e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.262857049703598, | |
| "step": 2480, | |
| "valid_targets_mean": 5568.6, | |
| "valid_targets_min": 4676 | |
| }, | |
| { | |
| "epoch": 3.976, | |
| "grad_norm": 0.3743115306712722, | |
| "learning_rate": 1.8763940730798324e-05, | |
| "loss": 0.255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2510233223438263, | |
| "step": 2485, | |
| "valid_targets_mean": 5212.4, | |
| "valid_targets_min": 4756 | |
| }, | |
| { | |
| "epoch": 3.984, | |
| "grad_norm": 0.40050424018995884, | |
| "learning_rate": 1.8684306708978896e-05, | |
| "loss": 0.2526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24612918496131897, | |
| "step": 2490, | |
| "valid_targets_mean": 5454.7, | |
| "valid_targets_min": 4523 | |
| }, | |
| { | |
| "epoch": 3.992, | |
| "grad_norm": 0.38072501997911723, | |
| "learning_rate": 1.8604693631322433e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25215938687324524, | |
| "step": 2495, | |
| "valid_targets_mean": 5146.9, | |
| "valid_targets_min": 4848 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.3653618743799516, | |
| "learning_rate": 1.852510276516783e-05, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2501947581768036, | |
| "step": 2500, | |
| "valid_targets_mean": 5100.6, | |
| "valid_targets_min": 4232 | |
| }, | |
| { | |
| "epoch": 4.008, | |
| "grad_norm": 0.3674275159617866, | |
| "learning_rate": 1.8445535377500393e-05, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24039268493652344, | |
| "step": 2505, | |
| "valid_targets_mean": 5040.7, | |
| "valid_targets_min": 4372 | |
| }, | |
| { | |
| "epoch": 4.016, | |
| "grad_norm": 0.3984001853892099, | |
| "learning_rate": 1.8365992734931686e-05, | |
| "loss": 0.2505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24805349111557007, | |
| "step": 2510, | |
| "valid_targets_mean": 5123.1, | |
| "valid_targets_min": 4400 | |
| }, | |
| { | |
| "epoch": 4.024, | |
| "grad_norm": 0.37379693753225374, | |
| "learning_rate": 1.8286476103679356e-05, | |
| "loss": 0.2517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24470211565494537, | |
| "step": 2515, | |
| "valid_targets_mean": 5760.0, | |
| "valid_targets_min": 3921 | |
| }, | |
| { | |
| "epoch": 4.032, | |
| "grad_norm": 0.3643092937180645, | |
| "learning_rate": 1.8206986749546992e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23670101165771484, | |
| "step": 2520, | |
| "valid_targets_mean": 5016.5, | |
| "valid_targets_min": 4342 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.3617254335673591, | |
| "learning_rate": 1.8127525937903957e-05, | |
| "loss": 0.2536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24808432161808014, | |
| "step": 2525, | |
| "valid_targets_mean": 5198.0, | |
| "valid_targets_min": 4521 | |
| }, | |
| { | |
| "epoch": 4.048, | |
| "grad_norm": 0.3739815788598893, | |
| "learning_rate": 1.8048094933665262e-05, | |
| "loss": 0.2512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2635418772697449, | |
| "step": 2530, | |
| "valid_targets_mean": 5518.4, | |
| "valid_targets_min": 4661 | |
| }, | |
| { | |
| "epoch": 4.056, | |
| "grad_norm": 0.33522975956704837, | |
| "learning_rate": 1.7968695001271416e-05, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24991494417190552, | |
| "step": 2535, | |
| "valid_targets_mean": 6062.5, | |
| "valid_targets_min": 4231 | |
| }, | |
| { | |
| "epoch": 4.064, | |
| "grad_norm": 0.3643054002725916, | |
| "learning_rate": 1.7889327404668316e-05, | |
| "loss": 0.2476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24297180771827698, | |
| "step": 2540, | |
| "valid_targets_mean": 5105.7, | |
| "valid_targets_min": 4754 | |
| }, | |
| { | |
| "epoch": 4.072, | |
| "grad_norm": 0.3597944106935309, | |
| "learning_rate": 1.7809993407287083e-05, | |
| "loss": 0.2485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24304506182670593, | |
| "step": 2545, | |
| "valid_targets_mean": 5149.6, | |
| "valid_targets_min": 4320 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.37124777774495743, | |
| "learning_rate": 1.7730694272024018e-05, | |
| "loss": 0.2579, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2780216932296753, | |
| "step": 2550, | |
| "valid_targets_mean": 5798.8, | |
| "valid_targets_min": 4662 | |
| }, | |
| { | |
| "epoch": 4.088, | |
| "grad_norm": 0.3891104276119331, | |
| "learning_rate": 1.765143126122044e-05, | |
| "loss": 0.2522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25578761100769043, | |
| "step": 2555, | |
| "valid_targets_mean": 5110.8, | |
| "valid_targets_min": 4524 | |
| }, | |
| { | |
| "epoch": 4.096, | |
| "grad_norm": 0.39442400645265036, | |
| "learning_rate": 1.7572205636642622e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24166031181812286, | |
| "step": 2560, | |
| "valid_targets_mean": 5538.1, | |
| "valid_targets_min": 4452 | |
| }, | |
| { | |
| "epoch": 4.104, | |
| "grad_norm": 0.3712879335572006, | |
| "learning_rate": 1.749301865946169e-05, | |
| "loss": 0.2522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24312445521354675, | |
| "step": 2565, | |
| "valid_targets_mean": 4941.2, | |
| "valid_targets_min": 3618 | |
| }, | |
| { | |
| "epoch": 4.112, | |
| "grad_norm": 0.3699730312633202, | |
| "learning_rate": 1.7413871590233557e-05, | |
| "loss": 0.254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2448056936264038, | |
| "step": 2570, | |
| "valid_targets_mean": 5061.8, | |
| "valid_targets_min": 4347 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.3753839537904247, | |
| "learning_rate": 1.7334765688878848e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24360007047653198, | |
| "step": 2575, | |
| "valid_targets_mean": 5038.7, | |
| "valid_targets_min": 4276 | |
| }, | |
| { | |
| "epoch": 4.128, | |
| "grad_norm": 0.3458855511038516, | |
| "learning_rate": 1.7255702214662852e-05, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2510986328125, | |
| "step": 2580, | |
| "valid_targets_mean": 5695.3, | |
| "valid_targets_min": 4734 | |
| }, | |
| { | |
| "epoch": 4.136, | |
| "grad_norm": 0.4009078109285487, | |
| "learning_rate": 1.7176682426175468e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24612480401992798, | |
| "step": 2585, | |
| "valid_targets_mean": 4887.0, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 4.144, | |
| "grad_norm": 0.4180542841916532, | |
| "learning_rate": 1.709770758131118e-05, | |
| "loss": 0.2506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26033952832221985, | |
| "step": 2590, | |
| "valid_targets_mean": 5548.2, | |
| "valid_targets_min": 4138 | |
| }, | |
| { | |
| "epoch": 4.152, | |
| "grad_norm": 0.37650458482743576, | |
| "learning_rate": 1.7018778937249017e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2396523654460907, | |
| "step": 2595, | |
| "valid_targets_mean": 4995.2, | |
| "valid_targets_min": 4611 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.38316494496153264, | |
| "learning_rate": 1.6939897750432562e-05, | |
| "loss": 0.2557, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2509874701499939, | |
| "step": 2600, | |
| "valid_targets_mean": 5123.1, | |
| "valid_targets_min": 4534 | |
| }, | |
| { | |
| "epoch": 4.168, | |
| "grad_norm": 0.390779481109788, | |
| "learning_rate": 1.6861065276549933e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24917100369930267, | |
| "step": 2605, | |
| "valid_targets_mean": 5157.4, | |
| "valid_targets_min": 4385 | |
| }, | |
| { | |
| "epoch": 4.176, | |
| "grad_norm": 0.36204301742542894, | |
| "learning_rate": 1.6782282770513788e-05, | |
| "loss": 0.2537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25009480118751526, | |
| "step": 2610, | |
| "valid_targets_mean": 5232.2, | |
| "valid_targets_min": 4821 | |
| }, | |
| { | |
| "epoch": 4.184, | |
| "grad_norm": 0.3661428265786247, | |
| "learning_rate": 1.6703551486441382e-05, | |
| "loss": 0.2519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24723048508167267, | |
| "step": 2615, | |
| "valid_targets_mean": 5026.6, | |
| "valid_targets_min": 4645 | |
| }, | |
| { | |
| "epoch": 4.192, | |
| "grad_norm": 0.3737660178982299, | |
| "learning_rate": 1.6624872677634565e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2518741488456726, | |
| "step": 2620, | |
| "valid_targets_mean": 5140.2, | |
| "valid_targets_min": 4538 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.3757557509659822, | |
| "learning_rate": 1.654624759655986e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2518763840198517, | |
| "step": 2625, | |
| "valid_targets_mean": 5238.9, | |
| "valid_targets_min": 3933 | |
| }, | |
| { | |
| "epoch": 4.208, | |
| "grad_norm": 0.37465284133414084, | |
| "learning_rate": 1.64676774948285e-05, | |
| "loss": 0.2558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2534467279911041, | |
| "step": 2630, | |
| "valid_targets_mean": 5158.1, | |
| "valid_targets_min": 4793 | |
| }, | |
| { | |
| "epoch": 4.216, | |
| "grad_norm": 0.37095833952452023, | |
| "learning_rate": 1.6389163623176536e-05, | |
| "loss": 0.2522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25161969661712646, | |
| "step": 2635, | |
| "valid_targets_mean": 5093.1, | |
| "valid_targets_min": 4294 | |
| }, | |
| { | |
| "epoch": 4.224, | |
| "grad_norm": 0.401093428313045, | |
| "learning_rate": 1.6310707231444884e-05, | |
| "loss": 0.2507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25549280643463135, | |
| "step": 2640, | |
| "valid_targets_mean": 5065.9, | |
| "valid_targets_min": 4293 | |
| }, | |
| { | |
| "epoch": 4.232, | |
| "grad_norm": 0.37997992187416707, | |
| "learning_rate": 1.623230956855947e-05, | |
| "loss": 0.2555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26234710216522217, | |
| "step": 2645, | |
| "valid_targets_mean": 5177.1, | |
| "valid_targets_min": 4232 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.34017943727148453, | |
| "learning_rate": 1.6153971882511324e-05, | |
| "loss": 0.2512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2495141476392746, | |
| "step": 2650, | |
| "valid_targets_mean": 5650.7, | |
| "valid_targets_min": 4700 | |
| }, | |
| { | |
| "epoch": 4.248, | |
| "grad_norm": 0.39665708914975917, | |
| "learning_rate": 1.6075695420336724e-05, | |
| "loss": 0.2486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2638532221317291, | |
| "step": 2655, | |
| "valid_targets_mean": 5165.9, | |
| "valid_targets_min": 4663 | |
| }, | |
| { | |
| "epoch": 4.256, | |
| "grad_norm": 0.36148497532136165, | |
| "learning_rate": 1.5997481428097338e-05, | |
| "loss": 0.2542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2541789412498474, | |
| "step": 2660, | |
| "valid_targets_mean": 5041.2, | |
| "valid_targets_min": 4375 | |
| }, | |
| { | |
| "epoch": 4.264, | |
| "grad_norm": 0.36494426514951306, | |
| "learning_rate": 1.5919331150860396e-05, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24574601650238037, | |
| "step": 2665, | |
| "valid_targets_mean": 5168.0, | |
| "valid_targets_min": 4828 | |
| }, | |
| { | |
| "epoch": 4.272, | |
| "grad_norm": 0.3588265162690351, | |
| "learning_rate": 1.5841245832678873e-05, | |
| "loss": 0.2486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.243943989276886, | |
| "step": 2670, | |
| "valid_targets_mean": 5049.2, | |
| "valid_targets_min": 4546 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.39406153962690926, | |
| "learning_rate": 1.576322671657166e-05, | |
| "loss": 0.2495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24794012308120728, | |
| "step": 2675, | |
| "valid_targets_mean": 5037.5, | |
| "valid_targets_min": 4423 | |
| }, | |
| { | |
| "epoch": 4.288, | |
| "grad_norm": 0.3567560076380587, | |
| "learning_rate": 1.5685275044503804e-05, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2508270740509033, | |
| "step": 2680, | |
| "valid_targets_mean": 5535.0, | |
| "valid_targets_min": 4622 | |
| }, | |
| { | |
| "epoch": 4.296, | |
| "grad_norm": 0.38108296561457766, | |
| "learning_rate": 1.560739205736674e-05, | |
| "loss": 0.249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23806720972061157, | |
| "step": 2685, | |
| "valid_targets_mean": 5053.8, | |
| "valid_targets_min": 4324 | |
| }, | |
| { | |
| "epoch": 4.304, | |
| "grad_norm": 0.38637854332405347, | |
| "learning_rate": 1.552957899495851e-05, | |
| "loss": 0.2492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25444719195365906, | |
| "step": 2690, | |
| "valid_targets_mean": 5083.1, | |
| "valid_targets_min": 4408 | |
| }, | |
| { | |
| "epoch": 4.312, | |
| "grad_norm": 0.36594678774593287, | |
| "learning_rate": 1.5451837095964054e-05, | |
| "loss": 0.2546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2571535110473633, | |
| "step": 2695, | |
| "valid_targets_mean": 5465.6, | |
| "valid_targets_min": 4515 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.38890074026939525, | |
| "learning_rate": 1.5374167597935478e-05, | |
| "loss": 0.249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25096768140792847, | |
| "step": 2700, | |
| "valid_targets_mean": 5076.8, | |
| "valid_targets_min": 4553 | |
| }, | |
| { | |
| "epoch": 4.328, | |
| "grad_norm": 0.3647162275058187, | |
| "learning_rate": 1.5296571737272354e-05, | |
| "loss": 0.2558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2597350478172302, | |
| "step": 2705, | |
| "valid_targets_mean": 5046.0, | |
| "valid_targets_min": 4552 | |
| }, | |
| { | |
| "epoch": 4.336, | |
| "grad_norm": 0.36780294361652843, | |
| "learning_rate": 1.5219050749202037e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2397717535495758, | |
| "step": 2710, | |
| "valid_targets_mean": 4986.4, | |
| "valid_targets_min": 4104 | |
| }, | |
| { | |
| "epoch": 4.344, | |
| "grad_norm": 0.37897908375491746, | |
| "learning_rate": 1.5141605867760021e-05, | |
| "loss": 0.2539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24867841601371765, | |
| "step": 2715, | |
| "valid_targets_mean": 5127.8, | |
| "valid_targets_min": 4667 | |
| }, | |
| { | |
| "epoch": 4.352, | |
| "grad_norm": 0.35097869256384745, | |
| "learning_rate": 1.5064238325770267e-05, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23782838881015778, | |
| "step": 2720, | |
| "valid_targets_mean": 5456.8, | |
| "valid_targets_min": 4663 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.3625868632556459, | |
| "learning_rate": 1.498694935482559e-05, | |
| "loss": 0.2554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23967579007148743, | |
| "step": 2725, | |
| "valid_targets_mean": 4955.2, | |
| "valid_targets_min": 4340 | |
| }, | |
| { | |
| "epoch": 4.368, | |
| "grad_norm": 0.35778688721934254, | |
| "learning_rate": 1.4909740185268056e-05, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2430783212184906, | |
| "step": 2730, | |
| "valid_targets_mean": 5722.8, | |
| "valid_targets_min": 4354 | |
| }, | |
| { | |
| "epoch": 4.376, | |
| "grad_norm": 0.3420303387599777, | |
| "learning_rate": 1.4832612046169408e-05, | |
| "loss": 0.2529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24597465991973877, | |
| "step": 2735, | |
| "valid_targets_mean": 5415.5, | |
| "valid_targets_min": 4529 | |
| }, | |
| { | |
| "epoch": 4.384, | |
| "grad_norm": 0.379980978799852, | |
| "learning_rate": 1.4755566165311455e-05, | |
| "loss": 0.2536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.254502534866333, | |
| "step": 2740, | |
| "valid_targets_mean": 5123.9, | |
| "valid_targets_min": 4442 | |
| }, | |
| { | |
| "epoch": 4.392, | |
| "grad_norm": 0.3655270640481768, | |
| "learning_rate": 1.4678603769166591e-05, | |
| "loss": 0.2526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24838915467262268, | |
| "step": 2745, | |
| "valid_targets_mean": 5224.4, | |
| "valid_targets_min": 4637 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.3562134768789893, | |
| "learning_rate": 1.4601726082878226e-05, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.248890221118927, | |
| "step": 2750, | |
| "valid_targets_mean": 5545.4, | |
| "valid_targets_min": 4197 | |
| }, | |
| { | |
| "epoch": 4.408, | |
| "grad_norm": 0.3777659198035779, | |
| "learning_rate": 1.4524934330241292e-05, | |
| "loss": 0.2518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2561601400375366, | |
| "step": 2755, | |
| "valid_targets_mean": 5127.5, | |
| "valid_targets_min": 4754 | |
| }, | |
| { | |
| "epoch": 4.416, | |
| "grad_norm": 0.36083880794157014, | |
| "learning_rate": 1.4448229733682784e-05, | |
| "loss": 0.2494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25734955072402954, | |
| "step": 2760, | |
| "valid_targets_mean": 5322.5, | |
| "valid_targets_min": 4498 | |
| }, | |
| { | |
| "epoch": 4.424, | |
| "grad_norm": 0.39815767930353957, | |
| "learning_rate": 1.4371613514242264e-05, | |
| "loss": 0.2462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23867246508598328, | |
| "step": 2765, | |
| "valid_targets_mean": 5037.8, | |
| "valid_targets_min": 4256 | |
| }, | |
| { | |
| "epoch": 4.432, | |
| "grad_norm": 0.3957872659287014, | |
| "learning_rate": 1.4295086891552457e-05, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24358974397182465, | |
| "step": 2770, | |
| "valid_targets_mean": 4995.7, | |
| "valid_targets_min": 4574 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.3767128131944061, | |
| "learning_rate": 1.4218651083819811e-05, | |
| "loss": 0.2503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2545109987258911, | |
| "step": 2775, | |
| "valid_targets_mean": 5181.6, | |
| "valid_targets_min": 4736 | |
| }, | |
| { | |
| "epoch": 4.448, | |
| "grad_norm": 0.3658345303965923, | |
| "learning_rate": 1.4142307307805125e-05, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23972110450267792, | |
| "step": 2780, | |
| "valid_targets_mean": 5108.4, | |
| "valid_targets_min": 4605 | |
| }, | |
| { | |
| "epoch": 4.456, | |
| "grad_norm": 0.36976513836678376, | |
| "learning_rate": 1.406605677880416e-05, | |
| "loss": 0.2514, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.251924991607666, | |
| "step": 2785, | |
| "valid_targets_mean": 5104.1, | |
| "valid_targets_min": 4541 | |
| }, | |
| { | |
| "epoch": 4.464, | |
| "grad_norm": 0.3665288219452047, | |
| "learning_rate": 1.3989900710628313e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25707289576530457, | |
| "step": 2790, | |
| "valid_targets_mean": 5085.7, | |
| "valid_targets_min": 4524 | |
| }, | |
| { | |
| "epoch": 4.4719999999999995, | |
| "grad_norm": 0.3696395400603922, | |
| "learning_rate": 1.3913840315585279e-05, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24852848052978516, | |
| "step": 2795, | |
| "valid_targets_mean": 5238.6, | |
| "valid_targets_min": 4765 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.37541409632857764, | |
| "learning_rate": 1.3837876804459765e-05, | |
| "loss": 0.2529, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.260326623916626, | |
| "step": 2800, | |
| "valid_targets_mean": 5039.6, | |
| "valid_targets_min": 4379 | |
| }, | |
| { | |
| "epoch": 4.4879999999999995, | |
| "grad_norm": 0.37958739603840547, | |
| "learning_rate": 1.3762011386494191e-05, | |
| "loss": 0.2548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2580680847167969, | |
| "step": 2805, | |
| "valid_targets_mean": 5114.9, | |
| "valid_targets_min": 4366 | |
| }, | |
| { | |
| "epoch": 4.496, | |
| "grad_norm": 0.3681981814884332, | |
| "learning_rate": 1.3686245269369485e-05, | |
| "loss": 0.251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25740164518356323, | |
| "step": 2810, | |
| "valid_targets_mean": 5527.8, | |
| "valid_targets_min": 4487 | |
| }, | |
| { | |
| "epoch": 4.504, | |
| "grad_norm": 0.35031423770677733, | |
| "learning_rate": 1.3610579659185809e-05, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25285714864730835, | |
| "step": 2815, | |
| "valid_targets_mean": 5575.2, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 4.5120000000000005, | |
| "grad_norm": 0.3364928633051772, | |
| "learning_rate": 1.35350157604434e-05, | |
| "loss": 0.251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2474539875984192, | |
| "step": 2820, | |
| "valid_targets_mean": 5504.3, | |
| "valid_targets_min": 4373 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.35945978220706415, | |
| "learning_rate": 1.345955477602337e-05, | |
| "loss": 0.2446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24169385433197021, | |
| "step": 2825, | |
| "valid_targets_mean": 5073.8, | |
| "valid_targets_min": 4433 | |
| }, | |
| { | |
| "epoch": 4.5280000000000005, | |
| "grad_norm": 0.3546711082830975, | |
| "learning_rate": 1.3384197907168561e-05, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2409333884716034, | |
| "step": 2830, | |
| "valid_targets_mean": 5182.0, | |
| "valid_targets_min": 4599 | |
| }, | |
| { | |
| "epoch": 4.536, | |
| "grad_norm": 0.3677741797211739, | |
| "learning_rate": 1.3308946353464438e-05, | |
| "loss": 0.2479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2668917775154114, | |
| "step": 2835, | |
| "valid_targets_mean": 5635.5, | |
| "valid_targets_min": 4749 | |
| }, | |
| { | |
| "epoch": 4.5440000000000005, | |
| "grad_norm": 0.3509309685190188, | |
| "learning_rate": 1.3233801312819979e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2638437747955322, | |
| "step": 2840, | |
| "valid_targets_mean": 5560.4, | |
| "valid_targets_min": 4676 | |
| }, | |
| { | |
| "epoch": 4.552, | |
| "grad_norm": 0.36052061265939983, | |
| "learning_rate": 1.3158763981448606e-05, | |
| "loss": 0.2535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24797263741493225, | |
| "step": 2845, | |
| "valid_targets_mean": 5147.5, | |
| "valid_targets_min": 4529 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.3450487792567613, | |
| "learning_rate": 1.3083835553849148e-05, | |
| "loss": 0.2498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2507861256599426, | |
| "step": 2850, | |
| "valid_targets_mean": 5500.6, | |
| "valid_targets_min": 4628 | |
| }, | |
| { | |
| "epoch": 4.568, | |
| "grad_norm": 0.38966352006884225, | |
| "learning_rate": 1.3009017222786828e-05, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2384726107120514, | |
| "step": 2855, | |
| "valid_targets_mean": 4955.9, | |
| "valid_targets_min": 4417 | |
| }, | |
| { | |
| "epoch": 4.576, | |
| "grad_norm": 0.3874662249121658, | |
| "learning_rate": 1.2934310179274269e-05, | |
| "loss": 0.2561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2642170190811157, | |
| "step": 2860, | |
| "valid_targets_mean": 5188.8, | |
| "valid_targets_min": 4512 | |
| }, | |
| { | |
| "epoch": 4.584, | |
| "grad_norm": 0.35786442693332965, | |
| "learning_rate": 1.2859715612552541e-05, | |
| "loss": 0.2556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2611735463142395, | |
| "step": 2865, | |
| "valid_targets_mean": 6122.6, | |
| "valid_targets_min": 4358 | |
| }, | |
| { | |
| "epoch": 4.592, | |
| "grad_norm": 0.3901133627507907, | |
| "learning_rate": 1.278523471007223e-05, | |
| "loss": 0.2526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25248226523399353, | |
| "step": 2870, | |
| "valid_targets_mean": 5174.9, | |
| "valid_targets_min": 4582 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.38835505155327604, | |
| "learning_rate": 1.271086865747451e-05, | |
| "loss": 0.2547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25744742155075073, | |
| "step": 2875, | |
| "valid_targets_mean": 5499.4, | |
| "valid_targets_min": 4531 | |
| }, | |
| { | |
| "epoch": 4.608, | |
| "grad_norm": 0.40854598177277596, | |
| "learning_rate": 1.2636618638572316e-05, | |
| "loss": 0.2511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2559804916381836, | |
| "step": 2880, | |
| "valid_targets_mean": 5295.2, | |
| "valid_targets_min": 4799 | |
| }, | |
| { | |
| "epoch": 4.616, | |
| "grad_norm": 0.36105386024551667, | |
| "learning_rate": 1.2562485835331466e-05, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.242630273103714, | |
| "step": 2885, | |
| "valid_targets_mean": 5018.2, | |
| "valid_targets_min": 3803 | |
| }, | |
| { | |
| "epoch": 4.624, | |
| "grad_norm": 0.347517818153188, | |
| "learning_rate": 1.2488471427851852e-05, | |
| "loss": 0.2461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24531060457229614, | |
| "step": 2890, | |
| "valid_targets_mean": 5582.7, | |
| "valid_targets_min": 4729 | |
| }, | |
| { | |
| "epoch": 4.632, | |
| "grad_norm": 0.35753509143077106, | |
| "learning_rate": 1.241457659434866e-05, | |
| "loss": 0.2538, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2478037327528, | |
| "step": 2895, | |
| "valid_targets_mean": 5173.8, | |
| "valid_targets_min": 4389 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.35564624437636333, | |
| "learning_rate": 1.2340802511133605e-05, | |
| "loss": 0.2511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24675898253917694, | |
| "step": 2900, | |
| "valid_targets_mean": 5512.3, | |
| "valid_targets_min": 4756 | |
| }, | |
| { | |
| "epoch": 4.648, | |
| "grad_norm": 0.3673097150878072, | |
| "learning_rate": 1.2267150352596216e-05, | |
| "loss": 0.2526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26329073309898376, | |
| "step": 2905, | |
| "valid_targets_mean": 5201.6, | |
| "valid_targets_min": 4700 | |
| }, | |
| { | |
| "epoch": 4.656, | |
| "grad_norm": 0.38729187506887586, | |
| "learning_rate": 1.2193621291185132e-05, | |
| "loss": 0.254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2563297152519226, | |
| "step": 2910, | |
| "valid_targets_mean": 5596.0, | |
| "valid_targets_min": 4646 | |
| }, | |
| { | |
| "epoch": 4.664, | |
| "grad_norm": 0.36759125115398456, | |
| "learning_rate": 1.2120216497389446e-05, | |
| "loss": 0.2513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25492551922798157, | |
| "step": 2915, | |
| "valid_targets_mean": 5092.6, | |
| "valid_targets_min": 4157 | |
| }, | |
| { | |
| "epoch": 4.672, | |
| "grad_norm": 0.3799159123506485, | |
| "learning_rate": 1.2046937139720068e-05, | |
| "loss": 0.2457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2514094114303589, | |
| "step": 2920, | |
| "valid_targets_mean": 5197.9, | |
| "valid_targets_min": 4482 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.34680853396757405, | |
| "learning_rate": 1.1973784384691121e-05, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2551521062850952, | |
| "step": 2925, | |
| "valid_targets_mean": 5342.2, | |
| "valid_targets_min": 4478 | |
| }, | |
| { | |
| "epoch": 4.688, | |
| "grad_norm": 0.35461306421565264, | |
| "learning_rate": 1.1900759396801382e-05, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2462252974510193, | |
| "step": 2930, | |
| "valid_targets_mean": 5679.6, | |
| "valid_targets_min": 4796 | |
| }, | |
| { | |
| "epoch": 4.696, | |
| "grad_norm": 0.3805389614888485, | |
| "learning_rate": 1.1827863338515741e-05, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24924349784851074, | |
| "step": 2935, | |
| "valid_targets_mean": 5083.1, | |
| "valid_targets_min": 4516 | |
| }, | |
| { | |
| "epoch": 4.704, | |
| "grad_norm": 0.372926336910229, | |
| "learning_rate": 1.1755097370246669e-05, | |
| "loss": 0.2492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2541443109512329, | |
| "step": 2940, | |
| "valid_targets_mean": 5173.8, | |
| "valid_targets_min": 4688 | |
| }, | |
| { | |
| "epoch": 4.712, | |
| "grad_norm": 0.3894375901264325, | |
| "learning_rate": 1.1682462650335791e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24354255199432373, | |
| "step": 2945, | |
| "valid_targets_mean": 5028.4, | |
| "valid_targets_min": 4431 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.3616243366966302, | |
| "learning_rate": 1.1609960335035423e-05, | |
| "loss": 0.2552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24789413809776306, | |
| "step": 2950, | |
| "valid_targets_mean": 5456.2, | |
| "valid_targets_min": 4509 | |
| }, | |
| { | |
| "epoch": 4.728, | |
| "grad_norm": 0.3657182015907511, | |
| "learning_rate": 1.1537591578490165e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2519040107727051, | |
| "step": 2955, | |
| "valid_targets_mean": 5144.8, | |
| "valid_targets_min": 4623 | |
| }, | |
| { | |
| "epoch": 4.736, | |
| "grad_norm": 0.37753401792547003, | |
| "learning_rate": 1.146535753271853e-05, | |
| "loss": 0.2459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23880131542682648, | |
| "step": 2960, | |
| "valid_targets_mean": 5017.1, | |
| "valid_targets_min": 4670 | |
| }, | |
| { | |
| "epoch": 4.744, | |
| "grad_norm": 0.36976714227040597, | |
| "learning_rate": 1.139325934759461e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25853389501571655, | |
| "step": 2965, | |
| "valid_targets_mean": 5092.8, | |
| "valid_targets_min": 4520 | |
| }, | |
| { | |
| "epoch": 4.752, | |
| "grad_norm": 0.35637109514832777, | |
| "learning_rate": 1.1321298170829768e-05, | |
| "loss": 0.2526, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2502405047416687, | |
| "step": 2970, | |
| "valid_targets_mean": 5645.9, | |
| "valid_targets_min": 4385 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.39493191072831646, | |
| "learning_rate": 1.1249475147954363e-05, | |
| "loss": 0.2511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2531408667564392, | |
| "step": 2975, | |
| "valid_targets_mean": 5173.4, | |
| "valid_targets_min": 4402 | |
| }, | |
| { | |
| "epoch": 4.768, | |
| "grad_norm": 0.3797306543810631, | |
| "learning_rate": 1.1177791422299528e-05, | |
| "loss": 0.2465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24825051426887512, | |
| "step": 2980, | |
| "valid_targets_mean": 4983.6, | |
| "valid_targets_min": 4437 | |
| }, | |
| { | |
| "epoch": 4.776, | |
| "grad_norm": 0.3415100305618845, | |
| "learning_rate": 1.1106248134978959e-05, | |
| "loss": 0.2485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24582664668560028, | |
| "step": 2985, | |
| "valid_targets_mean": 5666.1, | |
| "valid_targets_min": 4538 | |
| }, | |
| { | |
| "epoch": 4.784, | |
| "grad_norm": 0.3704553398705258, | |
| "learning_rate": 1.1034846424870744e-05, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24741065502166748, | |
| "step": 2990, | |
| "valid_targets_mean": 5040.5, | |
| "valid_targets_min": 4289 | |
| }, | |
| { | |
| "epoch": 4.792, | |
| "grad_norm": 0.36511159486023675, | |
| "learning_rate": 1.0963587428599256e-05, | |
| "loss": 0.2516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25997665524482727, | |
| "step": 2995, | |
| "valid_targets_mean": 5197.4, | |
| "valid_targets_min": 4497 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.3486998656528999, | |
| "learning_rate": 1.089247228051704e-05, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24655069410800934, | |
| "step": 3000, | |
| "valid_targets_mean": 5216.8, | |
| "valid_targets_min": 4810 | |
| }, | |
| { | |
| "epoch": 4.808, | |
| "grad_norm": 0.3603350144816226, | |
| "learning_rate": 1.0821502112686753e-05, | |
| "loss": 0.2482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24212342500686646, | |
| "step": 3005, | |
| "valid_targets_mean": 4996.8, | |
| "valid_targets_min": 4368 | |
| }, | |
| { | |
| "epoch": 4.816, | |
| "grad_norm": 0.35237048393593023, | |
| "learning_rate": 1.0750678054863158e-05, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2500638961791992, | |
| "step": 3010, | |
| "valid_targets_mean": 5096.9, | |
| "valid_targets_min": 4544 | |
| }, | |
| { | |
| "epoch": 4.824, | |
| "grad_norm": 0.3660593202923096, | |
| "learning_rate": 1.0680001234475127e-05, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24405664205551147, | |
| "step": 3015, | |
| "valid_targets_mean": 5117.1, | |
| "valid_targets_min": 4562 | |
| }, | |
| { | |
| "epoch": 4.832, | |
| "grad_norm": 0.3585600940847136, | |
| "learning_rate": 1.0609472776607715e-05, | |
| "loss": 0.2461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24507729709148407, | |
| "step": 3020, | |
| "valid_targets_mean": 5642.1, | |
| "valid_targets_min": 4500 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.351073824053857, | |
| "learning_rate": 1.0539093803984217e-05, | |
| "loss": 0.2533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26090019941329956, | |
| "step": 3025, | |
| "valid_targets_mean": 5538.4, | |
| "valid_targets_min": 4557 | |
| }, | |
| { | |
| "epoch": 4.848, | |
| "grad_norm": 0.3396292332962187, | |
| "learning_rate": 1.046886543694832e-05, | |
| "loss": 0.2532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24989022314548492, | |
| "step": 3030, | |
| "valid_targets_mean": 5718.6, | |
| "valid_targets_min": 4756 | |
| }, | |
| { | |
| "epoch": 4.856, | |
| "grad_norm": 0.36519513505545914, | |
| "learning_rate": 1.0398788793446263e-05, | |
| "loss": 0.2478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2581470012664795, | |
| "step": 3035, | |
| "valid_targets_mean": 5705.0, | |
| "valid_targets_min": 4648 | |
| }, | |
| { | |
| "epoch": 4.864, | |
| "grad_norm": 0.36320902338898536, | |
| "learning_rate": 1.0328864989009037e-05, | |
| "loss": 0.248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24631816148757935, | |
| "step": 3040, | |
| "valid_targets_mean": 5354.4, | |
| "valid_targets_min": 4190 | |
| }, | |
| { | |
| "epoch": 4.872, | |
| "grad_norm": 0.3634351264408527, | |
| "learning_rate": 1.0259095136734634e-05, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2558499276638031, | |
| "step": 3045, | |
| "valid_targets_mean": 5254.2, | |
| "valid_targets_min": 4704 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.33586414030593026, | |
| "learning_rate": 1.0189480347270311e-05, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2411791831254959, | |
| "step": 3050, | |
| "valid_targets_mean": 5652.8, | |
| "valid_targets_min": 4579 | |
| }, | |
| { | |
| "epoch": 4.888, | |
| "grad_norm": 0.36332479232412296, | |
| "learning_rate": 1.0120021728794938e-05, | |
| "loss": 0.2507, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24667370319366455, | |
| "step": 3055, | |
| "valid_targets_mean": 5252.9, | |
| "valid_targets_min": 4588 | |
| }, | |
| { | |
| "epoch": 4.896, | |
| "grad_norm": 0.3665412586661285, | |
| "learning_rate": 1.0050720387001334e-05, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.253898024559021, | |
| "step": 3060, | |
| "valid_targets_mean": 5067.9, | |
| "valid_targets_min": 4655 | |
| }, | |
| { | |
| "epoch": 4.904, | |
| "grad_norm": 0.3794451616664997, | |
| "learning_rate": 9.981577425078672e-06, | |
| "loss": 0.2512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2505333423614502, | |
| "step": 3065, | |
| "valid_targets_mean": 5057.8, | |
| "valid_targets_min": 4398 | |
| }, | |
| { | |
| "epoch": 4.912, | |
| "grad_norm": 0.3388760898802902, | |
| "learning_rate": 9.912593943694924e-06, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2554622292518616, | |
| "step": 3070, | |
| "valid_targets_mean": 5468.9, | |
| "valid_targets_min": 4607 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.3766011797046395, | |
| "learning_rate": 9.843771040979328e-06, | |
| "loss": 0.252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2522331476211548, | |
| "step": 3075, | |
| "valid_targets_mean": 5093.4, | |
| "valid_targets_min": 4437 | |
| }, | |
| { | |
| "epoch": 4.928, | |
| "grad_norm": 0.34290246075172426, | |
| "learning_rate": 9.775109812504922e-06, | |
| "loss": 0.2459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24595046043395996, | |
| "step": 3080, | |
| "valid_targets_mean": 5161.0, | |
| "valid_targets_min": 4310 | |
| }, | |
| { | |
| "epoch": 4.936, | |
| "grad_norm": 0.3665534168296289, | |
| "learning_rate": 9.706611351271088e-06, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2467697113752365, | |
| "step": 3085, | |
| "valid_targets_mean": 5631.9, | |
| "valid_targets_min": 4437 | |
| }, | |
| { | |
| "epoch": 4.944, | |
| "grad_norm": 0.3915322359672945, | |
| "learning_rate": 9.638276747686169e-06, | |
| "loss": 0.2457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2523011863231659, | |
| "step": 3090, | |
| "valid_targets_mean": 5617.9, | |
| "valid_targets_min": 4761 | |
| }, | |
| { | |
| "epoch": 4.952, | |
| "grad_norm": 0.37619778569218115, | |
| "learning_rate": 9.570107089550091e-06, | |
| "loss": 0.2493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25879889726638794, | |
| "step": 3095, | |
| "valid_targets_mean": 5186.6, | |
| "valid_targets_min": 4433 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.3957599525375665, | |
| "learning_rate": 9.502103462037074e-06, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25049468874931335, | |
| "step": 3100, | |
| "valid_targets_mean": 5171.3, | |
| "valid_targets_min": 4584 | |
| }, | |
| { | |
| "epoch": 4.968, | |
| "grad_norm": 0.34187939305017395, | |
| "learning_rate": 9.434266947678326e-06, | |
| "loss": 0.2519, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2448119968175888, | |
| "step": 3105, | |
| "valid_targets_mean": 5553.6, | |
| "valid_targets_min": 4573 | |
| }, | |
| { | |
| "epoch": 4.976, | |
| "grad_norm": 0.3655287248579373, | |
| "learning_rate": 9.366598626344836e-06, | |
| "loss": 0.2518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24400463700294495, | |
| "step": 3110, | |
| "valid_targets_mean": 5046.4, | |
| "valid_targets_min": 4649 | |
| }, | |
| { | |
| "epoch": 4.984, | |
| "grad_norm": 0.3759540433579007, | |
| "learning_rate": 9.299099575230172e-06, | |
| "loss": 0.258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26158541440963745, | |
| "step": 3115, | |
| "valid_targets_mean": 5335.1, | |
| "valid_targets_min": 4728 | |
| }, | |
| { | |
| "epoch": 4.992, | |
| "grad_norm": 0.36851443015362717, | |
| "learning_rate": 9.231770868833334e-06, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24376040697097778, | |
| "step": 3120, | |
| "valid_targets_mean": 5105.3, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.35658356587151596, | |
| "learning_rate": 9.164613578941652e-06, | |
| "loss": 0.2518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25490039587020874, | |
| "step": 3125, | |
| "valid_targets_mean": 5186.2, | |
| "valid_targets_min": 4229 | |
| }, | |
| { | |
| "epoch": 5.008, | |
| "grad_norm": 0.35637390972212063, | |
| "learning_rate": 9.097628774613732e-06, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24802757799625397, | |
| "step": 3130, | |
| "valid_targets_mean": 5560.1, | |
| "valid_targets_min": 4738 | |
| }, | |
| { | |
| "epoch": 5.016, | |
| "grad_norm": 0.3512047033204037, | |
| "learning_rate": 9.030817522162403e-06, | |
| "loss": 0.2479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2461925595998764, | |
| "step": 3135, | |
| "valid_targets_mean": 5202.4, | |
| "valid_targets_min": 4495 | |
| }, | |
| { | |
| "epoch": 5.024, | |
| "grad_norm": 0.3686295472397922, | |
| "learning_rate": 8.964180885137797e-06, | |
| "loss": 0.2407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23188906908035278, | |
| "step": 3140, | |
| "valid_targets_mean": 5431.4, | |
| "valid_targets_min": 4612 | |
| }, | |
| { | |
| "epoch": 5.032, | |
| "grad_norm": 0.36145367200793777, | |
| "learning_rate": 8.897719924310375e-06, | |
| "loss": 0.2503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25033625960350037, | |
| "step": 3145, | |
| "valid_targets_mean": 5119.1, | |
| "valid_targets_min": 4411 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.36253281609711935, | |
| "learning_rate": 8.831435697654068e-06, | |
| "loss": 0.2448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24015460908412933, | |
| "step": 3150, | |
| "valid_targets_mean": 5134.5, | |
| "valid_targets_min": 4748 | |
| }, | |
| { | |
| "epoch": 5.048, | |
| "grad_norm": 0.3819173385002267, | |
| "learning_rate": 8.765329260329413e-06, | |
| "loss": 0.251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24555765092372894, | |
| "step": 3155, | |
| "valid_targets_mean": 5100.3, | |
| "valid_targets_min": 4585 | |
| }, | |
| { | |
| "epoch": 5.056, | |
| "grad_norm": 0.3697577467080201, | |
| "learning_rate": 8.699401664666774e-06, | |
| "loss": 0.2476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24489490687847137, | |
| "step": 3160, | |
| "valid_targets_mean": 5113.9, | |
| "valid_targets_min": 4320 | |
| }, | |
| { | |
| "epoch": 5.064, | |
| "grad_norm": 0.35031100080478067, | |
| "learning_rate": 8.633653960149579e-06, | |
| "loss": 0.2493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24456676840782166, | |
| "step": 3165, | |
| "valid_targets_mean": 5625.6, | |
| "valid_targets_min": 4729 | |
| }, | |
| { | |
| "epoch": 5.072, | |
| "grad_norm": 0.34136974583224045, | |
| "learning_rate": 8.56808719339762e-06, | |
| "loss": 0.2403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24351713061332703, | |
| "step": 3170, | |
| "valid_targets_mean": 5800.5, | |
| "valid_targets_min": 4334 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 0.3536470726668268, | |
| "learning_rate": 8.502702408150391e-06, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24576084315776825, | |
| "step": 3175, | |
| "valid_targets_mean": 5106.3, | |
| "valid_targets_min": 4306 | |
| }, | |
| { | |
| "epoch": 5.088, | |
| "grad_norm": 0.3732892984030791, | |
| "learning_rate": 8.43750064525047e-06, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25150924921035767, | |
| "step": 3180, | |
| "valid_targets_mean": 5155.6, | |
| "valid_targets_min": 4602 | |
| }, | |
| { | |
| "epoch": 5.096, | |
| "grad_norm": 0.36063734738283204, | |
| "learning_rate": 8.372482942626952e-06, | |
| "loss": 0.2484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24317416548728943, | |
| "step": 3185, | |
| "valid_targets_mean": 5186.1, | |
| "valid_targets_min": 4489 | |
| }, | |
| { | |
| "epoch": 5.104, | |
| "grad_norm": 0.3217356043679006, | |
| "learning_rate": 8.307650335278927e-06, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25135815143585205, | |
| "step": 3190, | |
| "valid_targets_mean": 6082.1, | |
| "valid_targets_min": 4635 | |
| }, | |
| { | |
| "epoch": 5.112, | |
| "grad_norm": 0.35191873044838295, | |
| "learning_rate": 8.243003855259015e-06, | |
| "loss": 0.2511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25053954124450684, | |
| "step": 3195, | |
| "valid_targets_mean": 6087.2, | |
| "valid_targets_min": 4662 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.3731620171976401, | |
| "learning_rate": 8.178544531656897e-06, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25866037607192993, | |
| "step": 3200, | |
| "valid_targets_mean": 5588.0, | |
| "valid_targets_min": 4281 | |
| }, | |
| { | |
| "epoch": 5.128, | |
| "grad_norm": 0.36010456243446953, | |
| "learning_rate": 8.11427339058299e-06, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24169722199440002, | |
| "step": 3205, | |
| "valid_targets_mean": 5505.9, | |
| "valid_targets_min": 4442 | |
| }, | |
| { | |
| "epoch": 5.136, | |
| "grad_norm": 0.32549134648466177, | |
| "learning_rate": 8.050191455152072e-06, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24478399753570557, | |
| "step": 3210, | |
| "valid_targets_mean": 6021.2, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 5.144, | |
| "grad_norm": 0.36116927922119546, | |
| "learning_rate": 7.986299745467013e-06, | |
| "loss": 0.2422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2374686300754547, | |
| "step": 3215, | |
| "valid_targets_mean": 5052.1, | |
| "valid_targets_min": 4348 | |
| }, | |
| { | |
| "epoch": 5.152, | |
| "grad_norm": 0.38699787728293894, | |
| "learning_rate": 7.922599278602524e-06, | |
| "loss": 0.2411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24092377722263336, | |
| "step": 3220, | |
| "valid_targets_mean": 4995.0, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 0.35248527563891274, | |
| "learning_rate": 7.859091068588987e-06, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24180716276168823, | |
| "step": 3225, | |
| "valid_targets_mean": 5098.1, | |
| "valid_targets_min": 4543 | |
| }, | |
| { | |
| "epoch": 5.168, | |
| "grad_norm": 0.34824308281846117, | |
| "learning_rate": 7.795776126396284e-06, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24726392328739166, | |
| "step": 3230, | |
| "valid_targets_mean": 5248.6, | |
| "valid_targets_min": 4938 | |
| }, | |
| { | |
| "epoch": 5.176, | |
| "grad_norm": 0.36928779921971205, | |
| "learning_rate": 7.732655459917726e-06, | |
| "loss": 0.2477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24958401918411255, | |
| "step": 3235, | |
| "valid_targets_mean": 5566.1, | |
| "valid_targets_min": 4526 | |
| }, | |
| { | |
| "epoch": 5.184, | |
| "grad_norm": 0.3475361106030053, | |
| "learning_rate": 7.669730073954005e-06, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.247992604970932, | |
| "step": 3240, | |
| "valid_targets_mean": 5545.1, | |
| "valid_targets_min": 4655 | |
| }, | |
| { | |
| "epoch": 5.192, | |
| "grad_norm": 0.3490925262897297, | |
| "learning_rate": 7.607000970197194e-06, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2480398416519165, | |
| "step": 3245, | |
| "valid_targets_mean": 5570.4, | |
| "valid_targets_min": 4568 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 0.33694793296226905, | |
| "learning_rate": 7.544469147214797e-06, | |
| "loss": 0.2506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24119962751865387, | |
| "step": 3250, | |
| "valid_targets_mean": 5650.4, | |
| "valid_targets_min": 4510 | |
| }, | |
| { | |
| "epoch": 5.208, | |
| "grad_norm": 0.3801170813160556, | |
| "learning_rate": 7.482135600433868e-06, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24771934747695923, | |
| "step": 3255, | |
| "valid_targets_mean": 5002.3, | |
| "valid_targets_min": 4232 | |
| }, | |
| { | |
| "epoch": 5.216, | |
| "grad_norm": 0.3807483849747572, | |
| "learning_rate": 7.420001322125156e-06, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24436020851135254, | |
| "step": 3260, | |
| "valid_targets_mean": 5127.6, | |
| "valid_targets_min": 4482 | |
| }, | |
| { | |
| "epoch": 5.224, | |
| "grad_norm": 0.3785624013626151, | |
| "learning_rate": 7.3580673013872946e-06, | |
| "loss": 0.2448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24474889039993286, | |
| "step": 3265, | |
| "valid_targets_mean": 5070.6, | |
| "valid_targets_min": 4670 | |
| }, | |
| { | |
| "epoch": 5.232, | |
| "grad_norm": 0.34055180351481185, | |
| "learning_rate": 7.2963345241310904e-06, | |
| "loss": 0.2459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23886609077453613, | |
| "step": 3270, | |
| "valid_targets_mean": 5491.1, | |
| "valid_targets_min": 4687 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 0.3689089037758627, | |
| "learning_rate": 7.234803973063797e-06, | |
| "loss": 0.2414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24341528117656708, | |
| "step": 3275, | |
| "valid_targets_mean": 5431.1, | |
| "valid_targets_min": 4229 | |
| }, | |
| { | |
| "epoch": 5.248, | |
| "grad_norm": 0.42000603181582424, | |
| "learning_rate": 7.173476627673492e-06, | |
| "loss": 0.2518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26888588070869446, | |
| "step": 3280, | |
| "valid_targets_mean": 5070.3, | |
| "valid_targets_min": 4600 | |
| }, | |
| { | |
| "epoch": 5.256, | |
| "grad_norm": 0.3825889243289451, | |
| "learning_rate": 7.112353464213477e-06, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25423097610473633, | |
| "step": 3285, | |
| "valid_targets_mean": 5192.5, | |
| "valid_targets_min": 4378 | |
| }, | |
| { | |
| "epoch": 5.264, | |
| "grad_norm": 0.3514727875029311, | |
| "learning_rate": 7.051435455686735e-06, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2716108560562134, | |
| "step": 3290, | |
| "valid_targets_mean": 6296.8, | |
| "valid_targets_min": 4433 | |
| }, | |
| { | |
| "epoch": 5.272, | |
| "grad_norm": 0.3674283081817138, | |
| "learning_rate": 6.990723571830438e-06, | |
| "loss": 0.2488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25290143489837646, | |
| "step": 3295, | |
| "valid_targets_mean": 5190.8, | |
| "valid_targets_min": 4461 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 0.3412710742936682, | |
| "learning_rate": 6.93021877910052e-06, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24233779311180115, | |
| "step": 3300, | |
| "valid_targets_mean": 5554.9, | |
| "valid_targets_min": 4559 | |
| }, | |
| { | |
| "epoch": 5.288, | |
| "grad_norm": 0.3617922314986243, | |
| "learning_rate": 6.8699220406562985e-06, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2520451545715332, | |
| "step": 3305, | |
| "valid_targets_mean": 5575.8, | |
| "valid_targets_min": 5039 | |
| }, | |
| { | |
| "epoch": 5.296, | |
| "grad_norm": 0.39665772654091425, | |
| "learning_rate": 6.809834316345117e-06, | |
| "loss": 0.2457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2421257048845291, | |
| "step": 3310, | |
| "valid_targets_mean": 5006.2, | |
| "valid_targets_min": 4568 | |
| }, | |
| { | |
| "epoch": 5.304, | |
| "grad_norm": 0.3536266701966106, | |
| "learning_rate": 6.749956562687083e-06, | |
| "loss": 0.2421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2458074390888214, | |
| "step": 3315, | |
| "valid_targets_mean": 5504.6, | |
| "valid_targets_min": 4385 | |
| }, | |
| { | |
| "epoch": 5.312, | |
| "grad_norm": 0.3608808047734682, | |
| "learning_rate": 6.690289732859841e-06, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2392977625131607, | |
| "step": 3320, | |
| "valid_targets_mean": 5592.0, | |
| "valid_targets_min": 4721 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 0.36770951015478714, | |
| "learning_rate": 6.630834776683403e-06, | |
| "loss": 0.2495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24873575568199158, | |
| "step": 3325, | |
| "valid_targets_mean": 5232.1, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 5.328, | |
| "grad_norm": 0.34430773670451037, | |
| "learning_rate": 6.571592640605e-06, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23626628518104553, | |
| "step": 3330, | |
| "valid_targets_mean": 5588.4, | |
| "valid_targets_min": 4488 | |
| }, | |
| { | |
| "epoch": 5.336, | |
| "grad_norm": 0.35426079324387627, | |
| "learning_rate": 6.512564267684061e-06, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2386239469051361, | |
| "step": 3335, | |
| "valid_targets_mean": 5359.0, | |
| "valid_targets_min": 4471 | |
| }, | |
| { | |
| "epoch": 5.344, | |
| "grad_norm": 0.3910365165223664, | |
| "learning_rate": 6.453750597577167e-06, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23908179998397827, | |
| "step": 3340, | |
| "valid_targets_mean": 5127.4, | |
| "valid_targets_min": 4492 | |
| }, | |
| { | |
| "epoch": 5.352, | |
| "grad_norm": 0.3704397436334283, | |
| "learning_rate": 6.395152566523106e-06, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2505912780761719, | |
| "step": 3345, | |
| "valid_targets_mean": 5307.0, | |
| "valid_targets_min": 4738 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 0.35824410807669194, | |
| "learning_rate": 6.336771107327966e-06, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24422509968280792, | |
| "step": 3350, | |
| "valid_targets_mean": 5106.2, | |
| "valid_targets_min": 4813 | |
| }, | |
| { | |
| "epoch": 5.368, | |
| "grad_norm": 0.3572261642162335, | |
| "learning_rate": 6.278607149350289e-06, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24168741703033447, | |
| "step": 3355, | |
| "valid_targets_mean": 5162.4, | |
| "valid_targets_min": 4437 | |
| }, | |
| { | |
| "epoch": 5.376, | |
| "grad_norm": 0.3717673632915534, | |
| "learning_rate": 6.220661618486268e-06, | |
| "loss": 0.2505, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2463008463382721, | |
| "step": 3360, | |
| "valid_targets_mean": 5170.6, | |
| "valid_targets_min": 4498 | |
| }, | |
| { | |
| "epoch": 5.384, | |
| "grad_norm": 0.3535061853628362, | |
| "learning_rate": 6.162935437155024e-06, | |
| "loss": 0.2436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25261732935905457, | |
| "step": 3365, | |
| "valid_targets_mean": 5175.5, | |
| "valid_targets_min": 4635 | |
| }, | |
| { | |
| "epoch": 5.392, | |
| "grad_norm": 0.3531779977838204, | |
| "learning_rate": 6.105429524283901e-06, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2539016902446747, | |
| "step": 3370, | |
| "valid_targets_mean": 5536.6, | |
| "valid_targets_min": 4385 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.34797145386230505, | |
| "learning_rate": 6.04814479529386e-06, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24314157664775848, | |
| "step": 3375, | |
| "valid_targets_mean": 5203.5, | |
| "valid_targets_min": 4731 | |
| }, | |
| { | |
| "epoch": 5.408, | |
| "grad_norm": 0.3572672840956105, | |
| "learning_rate": 5.991082162084889e-06, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25978779792785645, | |
| "step": 3380, | |
| "valid_targets_mean": 5297.6, | |
| "valid_targets_min": 4728 | |
| }, | |
| { | |
| "epoch": 5.416, | |
| "grad_norm": 0.3594231558139834, | |
| "learning_rate": 5.934242533021499e-06, | |
| "loss": 0.2416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24100100994110107, | |
| "step": 3385, | |
| "valid_targets_mean": 5039.4, | |
| "valid_targets_min": 4668 | |
| }, | |
| { | |
| "epoch": 5.424, | |
| "grad_norm": 0.3659680908213878, | |
| "learning_rate": 5.877626812918258e-06, | |
| "loss": 0.2466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24617427587509155, | |
| "step": 3390, | |
| "valid_targets_mean": 5138.4, | |
| "valid_targets_min": 4777 | |
| }, | |
| { | |
| "epoch": 5.432, | |
| "grad_norm": 0.37056684786670985, | |
| "learning_rate": 5.821235903025378e-06, | |
| "loss": 0.2452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23872539401054382, | |
| "step": 3395, | |
| "valid_targets_mean": 5618.4, | |
| "valid_targets_min": 4740 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.3420761114252738, | |
| "learning_rate": 5.765070701014391e-06, | |
| "loss": 0.249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26014673709869385, | |
| "step": 3400, | |
| "valid_targets_mean": 5761.1, | |
| "valid_targets_min": 4684 | |
| }, | |
| { | |
| "epoch": 5.448, | |
| "grad_norm": 0.35759193794850297, | |
| "learning_rate": 5.709132100963841e-06, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2500964105129242, | |
| "step": 3405, | |
| "valid_targets_mean": 5168.1, | |
| "valid_targets_min": 4711 | |
| }, | |
| { | |
| "epoch": 5.456, | |
| "grad_norm": 0.3724753287035692, | |
| "learning_rate": 5.653420993345062e-06, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24207034707069397, | |
| "step": 3410, | |
| "valid_targets_mean": 5131.9, | |
| "valid_targets_min": 4237 | |
| }, | |
| { | |
| "epoch": 5.464, | |
| "grad_norm": 0.3588079071899479, | |
| "learning_rate": 5.597938265007994e-06, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2477402687072754, | |
| "step": 3415, | |
| "valid_targets_mean": 5441.4, | |
| "valid_targets_min": 4647 | |
| }, | |
| { | |
| "epoch": 5.4719999999999995, | |
| "grad_norm": 0.37352496321576995, | |
| "learning_rate": 5.542684799167069e-06, | |
| "loss": 0.2486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24922166764736176, | |
| "step": 3420, | |
| "valid_targets_mean": 5124.5, | |
| "valid_targets_min": 4405 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 0.34978406887337793, | |
| "learning_rate": 5.487661475387152e-06, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24213308095932007, | |
| "step": 3425, | |
| "valid_targets_mean": 5630.8, | |
| "valid_targets_min": 4737 | |
| }, | |
| { | |
| "epoch": 5.4879999999999995, | |
| "grad_norm": 0.3364163078242646, | |
| "learning_rate": 5.432869169569541e-06, | |
| "loss": 0.2414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2347775250673294, | |
| "step": 3430, | |
| "valid_targets_mean": 5624.2, | |
| "valid_targets_min": 4424 | |
| }, | |
| { | |
| "epoch": 5.496, | |
| "grad_norm": 0.3787812323630855, | |
| "learning_rate": 5.378308753938024e-06, | |
| "loss": 0.2443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24613071978092194, | |
| "step": 3435, | |
| "valid_targets_mean": 5052.2, | |
| "valid_targets_min": 4455 | |
| }, | |
| { | |
| "epoch": 5.504, | |
| "grad_norm": 0.33998668223647627, | |
| "learning_rate": 5.323981097024986e-06, | |
| "loss": 0.2501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2438172996044159, | |
| "step": 3440, | |
| "valid_targets_mean": 5635.9, | |
| "valid_targets_min": 4439 | |
| }, | |
| { | |
| "epoch": 5.5120000000000005, | |
| "grad_norm": 0.3504777949136209, | |
| "learning_rate": 5.269887063657595e-06, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24032580852508545, | |
| "step": 3445, | |
| "valid_targets_mean": 5383.1, | |
| "valid_targets_min": 4492 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.36573702977248956, | |
| "learning_rate": 5.216027514944027e-06, | |
| "loss": 0.2416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24648946523666382, | |
| "step": 3450, | |
| "valid_targets_mean": 5496.6, | |
| "valid_targets_min": 4765 | |
| }, | |
| { | |
| "epoch": 5.5280000000000005, | |
| "grad_norm": 0.3820249800264497, | |
| "learning_rate": 5.162403308259767e-06, | |
| "loss": 0.2466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.242186039686203, | |
| "step": 3455, | |
| "valid_targets_mean": 5080.5, | |
| "valid_targets_min": 4722 | |
| }, | |
| { | |
| "epoch": 5.536, | |
| "grad_norm": 0.3545141339001876, | |
| "learning_rate": 5.109015297233935e-06, | |
| "loss": 0.2475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24215048551559448, | |
| "step": 3460, | |
| "valid_targets_mean": 5125.0, | |
| "valid_targets_min": 4821 | |
| }, | |
| { | |
| "epoch": 5.5440000000000005, | |
| "grad_norm": 0.37222019502883513, | |
| "learning_rate": 5.055864331735736e-06, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25402164459228516, | |
| "step": 3465, | |
| "valid_targets_mean": 5044.2, | |
| "valid_targets_min": 4526 | |
| }, | |
| { | |
| "epoch": 5.552, | |
| "grad_norm": 0.3638914453898955, | |
| "learning_rate": 5.002951257860909e-06, | |
| "loss": 0.2488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2532857060432434, | |
| "step": 3470, | |
| "valid_targets_mean": 5253.7, | |
| "valid_targets_min": 4609 | |
| }, | |
| { | |
| "epoch": 5.5600000000000005, | |
| "grad_norm": 0.3733482836956291, | |
| "learning_rate": 4.950276917918256e-06, | |
| "loss": 0.2461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24047890305519104, | |
| "step": 3475, | |
| "valid_targets_mean": 4884.6, | |
| "valid_targets_min": 4071 | |
| }, | |
| { | |
| "epoch": 5.568, | |
| "grad_norm": 0.34418268293353826, | |
| "learning_rate": 4.8978421504162385e-06, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24415336549282074, | |
| "step": 3480, | |
| "valid_targets_mean": 5568.5, | |
| "valid_targets_min": 4492 | |
| }, | |
| { | |
| "epoch": 5.576, | |
| "grad_norm": 0.34815829402844567, | |
| "learning_rate": 4.845647790049634e-06, | |
| "loss": 0.2449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24107426404953003, | |
| "step": 3485, | |
| "valid_targets_mean": 5236.5, | |
| "valid_targets_min": 4489 | |
| }, | |
| { | |
| "epoch": 5.584, | |
| "grad_norm": 0.35826417257938115, | |
| "learning_rate": 4.793694667686244e-06, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2487502247095108, | |
| "step": 3490, | |
| "valid_targets_mean": 5331.8, | |
| "valid_targets_min": 4451 | |
| }, | |
| { | |
| "epoch": 5.592, | |
| "grad_norm": 0.3669626461029233, | |
| "learning_rate": 4.741983610353664e-06, | |
| "loss": 0.2486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2452990710735321, | |
| "step": 3495, | |
| "valid_targets_mean": 5145.4, | |
| "valid_targets_min": 4645 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 0.3485161961836912, | |
| "learning_rate": 4.690515441226122e-06, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25180765986442566, | |
| "step": 3500, | |
| "valid_targets_mean": 5502.6, | |
| "valid_targets_min": 3921 | |
| }, | |
| { | |
| "epoch": 5.608, | |
| "grad_norm": 0.37271420915918807, | |
| "learning_rate": 4.639290979611379e-06, | |
| "loss": 0.2454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2505072057247162, | |
| "step": 3505, | |
| "valid_targets_mean": 5213.9, | |
| "valid_targets_min": 4653 | |
| }, | |
| { | |
| "epoch": 5.616, | |
| "grad_norm": 0.3425809565716745, | |
| "learning_rate": 4.588311040937683e-06, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24288851022720337, | |
| "step": 3510, | |
| "valid_targets_mean": 5552.4, | |
| "valid_targets_min": 4588 | |
| }, | |
| { | |
| "epoch": 5.624, | |
| "grad_norm": 0.34474132724461426, | |
| "learning_rate": 4.537576436740783e-06, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2476564347743988, | |
| "step": 3515, | |
| "valid_targets_mean": 5571.6, | |
| "valid_targets_min": 4428 | |
| }, | |
| { | |
| "epoch": 5.632, | |
| "grad_norm": 0.35920187811191173, | |
| "learning_rate": 4.487087974651016e-06, | |
| "loss": 0.2452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2592478394508362, | |
| "step": 3520, | |
| "valid_targets_mean": 5532.0, | |
| "valid_targets_min": 4648 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 0.36505985443389877, | |
| "learning_rate": 4.436846458380455e-06, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24498780071735382, | |
| "step": 3525, | |
| "valid_targets_mean": 5047.8, | |
| "valid_targets_min": 4450 | |
| }, | |
| { | |
| "epoch": 5.648, | |
| "grad_norm": 0.36552895747463016, | |
| "learning_rate": 4.386852687710104e-06, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2510492503643036, | |
| "step": 3530, | |
| "valid_targets_mean": 5090.1, | |
| "valid_targets_min": 4464 | |
| }, | |
| { | |
| "epoch": 5.656, | |
| "grad_norm": 0.36391596418610395, | |
| "learning_rate": 4.337107458477177e-06, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25247281789779663, | |
| "step": 3535, | |
| "valid_targets_mean": 5158.0, | |
| "valid_targets_min": 4558 | |
| }, | |
| { | |
| "epoch": 5.664, | |
| "grad_norm": 0.3491444465096234, | |
| "learning_rate": 4.287611562562422e-06, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2435702532529831, | |
| "step": 3540, | |
| "valid_targets_mean": 5179.9, | |
| "valid_targets_min": 4605 | |
| }, | |
| { | |
| "epoch": 5.672, | |
| "grad_norm": 0.3712453676836491, | |
| "learning_rate": 4.238365787877516e-06, | |
| "loss": 0.2456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25702357292175293, | |
| "step": 3545, | |
| "valid_targets_mean": 5317.6, | |
| "valid_targets_min": 4318 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 0.3622086481641909, | |
| "learning_rate": 4.189370918352531e-06, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24513322114944458, | |
| "step": 3550, | |
| "valid_targets_mean": 5125.0, | |
| "valid_targets_min": 4523 | |
| }, | |
| { | |
| "epoch": 5.688, | |
| "grad_norm": 0.35988915936320826, | |
| "learning_rate": 4.140627733923439e-06, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2519613802433014, | |
| "step": 3555, | |
| "valid_targets_mean": 5256.2, | |
| "valid_targets_min": 4749 | |
| }, | |
| { | |
| "epoch": 5.696, | |
| "grad_norm": 0.32218488991411665, | |
| "learning_rate": 4.092137010519712e-06, | |
| "loss": 0.2452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2488650679588318, | |
| "step": 3560, | |
| "valid_targets_mean": 5925.8, | |
| "valid_targets_min": 4246 | |
| }, | |
| { | |
| "epoch": 5.704, | |
| "grad_norm": 0.37199882441190113, | |
| "learning_rate": 4.043899520051964e-06, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2566066384315491, | |
| "step": 3565, | |
| "valid_targets_mean": 5072.3, | |
| "valid_targets_min": 4483 | |
| }, | |
| { | |
| "epoch": 5.712, | |
| "grad_norm": 0.3679180386152946, | |
| "learning_rate": 3.995916030399658e-06, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24378645420074463, | |
| "step": 3570, | |
| "valid_targets_mean": 5116.8, | |
| "valid_targets_min": 4673 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 0.3484547278273523, | |
| "learning_rate": 3.948187305398892e-06, | |
| "loss": 0.2498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2514280676841736, | |
| "step": 3575, | |
| "valid_targets_mean": 5207.4, | |
| "valid_targets_min": 4424 | |
| }, | |
| { | |
| "epoch": 5.728, | |
| "grad_norm": 0.36939425956733496, | |
| "learning_rate": 3.90071410483023e-06, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24703919887542725, | |
| "step": 3580, | |
| "valid_targets_mean": 5180.6, | |
| "valid_targets_min": 4638 | |
| }, | |
| { | |
| "epoch": 5.736, | |
| "grad_norm": 0.3778378516152843, | |
| "learning_rate": 3.853497184406623e-06, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24298296868801117, | |
| "step": 3585, | |
| "valid_targets_mean": 5094.5, | |
| "valid_targets_min": 4687 | |
| }, | |
| { | |
| "epoch": 5.744, | |
| "grad_norm": 0.3174132198866183, | |
| "learning_rate": 3.80653729576135e-06, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24780933558940887, | |
| "step": 3590, | |
| "valid_targets_mean": 6135.4, | |
| "valid_targets_min": 4733 | |
| }, | |
| { | |
| "epoch": 5.752, | |
| "grad_norm": 0.3600726972366865, | |
| "learning_rate": 3.7598351864360872e-06, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2484760880470276, | |
| "step": 3595, | |
| "valid_targets_mean": 5467.4, | |
| "valid_targets_min": 4477 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 0.3737726351098238, | |
| "learning_rate": 3.713391599868985e-06, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24499288201332092, | |
| "step": 3600, | |
| "valid_targets_mean": 4818.0, | |
| "valid_targets_min": 3917 | |
| }, | |
| { | |
| "epoch": 5.768, | |
| "grad_norm": 0.375223170923613, | |
| "learning_rate": 3.6672072753828424e-06, | |
| "loss": 0.243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24441808462142944, | |
| "step": 3605, | |
| "valid_targets_mean": 4987.3, | |
| "valid_targets_min": 4516 | |
| }, | |
| { | |
| "epoch": 5.776, | |
| "grad_norm": 0.39371736706395477, | |
| "learning_rate": 3.6212829481733368e-06, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2518998384475708, | |
| "step": 3610, | |
| "valid_targets_mean": 5282.2, | |
| "valid_targets_min": 4672 | |
| }, | |
| { | |
| "epoch": 5.784, | |
| "grad_norm": 0.34062083099159174, | |
| "learning_rate": 3.575619349297317e-06, | |
| "loss": 0.2428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24139930307865143, | |
| "step": 3615, | |
| "valid_targets_mean": 5604.4, | |
| "valid_targets_min": 4695 | |
| }, | |
| { | |
| "epoch": 5.792, | |
| "grad_norm": 0.36823562257166453, | |
| "learning_rate": 3.5302172056611682e-06, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.252723753452301, | |
| "step": 3620, | |
| "valid_targets_mean": 4984.4, | |
| "valid_targets_min": 4556 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 0.35711891093759784, | |
| "learning_rate": 3.485077240009247e-06, | |
| "loss": 0.2506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23670487105846405, | |
| "step": 3625, | |
| "valid_targets_mean": 5138.1, | |
| "valid_targets_min": 4398 | |
| }, | |
| { | |
| "epoch": 5.808, | |
| "grad_norm": 0.3638055579323492, | |
| "learning_rate": 3.4402001709123643e-06, | |
| "loss": 0.249, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24808424711227417, | |
| "step": 3630, | |
| "valid_targets_mean": 5140.2, | |
| "valid_targets_min": 4736 | |
| }, | |
| { | |
| "epoch": 5.816, | |
| "grad_norm": 0.3473257710993411, | |
| "learning_rate": 3.3955867127563515e-06, | |
| "loss": 0.2411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24211251735687256, | |
| "step": 3635, | |
| "valid_targets_mean": 5288.0, | |
| "valid_targets_min": 4406 | |
| }, | |
| { | |
| "epoch": 5.824, | |
| "grad_norm": 0.33424894079819717, | |
| "learning_rate": 3.351237575730695e-06, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24368932843208313, | |
| "step": 3640, | |
| "valid_targets_mean": 5867.6, | |
| "valid_targets_min": 4594 | |
| }, | |
| { | |
| "epoch": 5.832, | |
| "grad_norm": 0.34364394364730977, | |
| "learning_rate": 3.307153465817219e-06, | |
| "loss": 0.244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25161224603652954, | |
| "step": 3645, | |
| "valid_targets_mean": 5262.4, | |
| "valid_targets_min": 4735 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 0.3472906870529899, | |
| "learning_rate": 3.263335084778856e-06, | |
| "loss": 0.246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24716094136238098, | |
| "step": 3650, | |
| "valid_targets_mean": 5630.8, | |
| "valid_targets_min": 4577 | |
| }, | |
| { | |
| "epoch": 5.848, | |
| "grad_norm": 0.3292266528350295, | |
| "learning_rate": 3.2197831301484816e-06, | |
| "loss": 0.2453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24710077047348022, | |
| "step": 3655, | |
| "valid_targets_mean": 5807.1, | |
| "valid_targets_min": 4636 | |
| }, | |
| { | |
| "epoch": 5.856, | |
| "grad_norm": 0.3699242339444154, | |
| "learning_rate": 3.1764982952177805e-06, | |
| "loss": 0.2455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24612200260162354, | |
| "step": 3660, | |
| "valid_targets_mean": 5063.1, | |
| "valid_targets_min": 3935 | |
| }, | |
| { | |
| "epoch": 5.864, | |
| "grad_norm": 0.34153204922576014, | |
| "learning_rate": 3.1334812690262507e-06, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24460744857788086, | |
| "step": 3665, | |
| "valid_targets_mean": 5188.6, | |
| "valid_targets_min": 4685 | |
| }, | |
| { | |
| "epoch": 5.872, | |
| "grad_norm": 0.35603507228511666, | |
| "learning_rate": 3.0907327363502084e-06, | |
| "loss": 0.2422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24622929096221924, | |
| "step": 3670, | |
| "valid_targets_mean": 5200.5, | |
| "valid_targets_min": 4432 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 0.36248146070846143, | |
| "learning_rate": 3.0482533776918987e-06, | |
| "loss": 0.2498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2563058137893677, | |
| "step": 3675, | |
| "valid_targets_mean": 5146.4, | |
| "valid_targets_min": 4479 | |
| }, | |
| { | |
| "epoch": 5.888, | |
| "grad_norm": 0.3652911247289699, | |
| "learning_rate": 3.0060438692686533e-06, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24566249549388885, | |
| "step": 3680, | |
| "valid_targets_mean": 5067.0, | |
| "valid_targets_min": 4519 | |
| }, | |
| { | |
| "epoch": 5.896, | |
| "grad_norm": 0.36001676897003176, | |
| "learning_rate": 2.964104883002139e-06, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2391037493944168, | |
| "step": 3685, | |
| "valid_targets_mean": 5101.6, | |
| "valid_targets_min": 4377 | |
| }, | |
| { | |
| "epoch": 5.904, | |
| "grad_norm": 0.34447274221329893, | |
| "learning_rate": 2.9224370865076457e-06, | |
| "loss": 0.2433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2415609359741211, | |
| "step": 3690, | |
| "valid_targets_mean": 5142.4, | |
| "valid_targets_min": 4571 | |
| }, | |
| { | |
| "epoch": 5.912, | |
| "grad_norm": 0.33673555343229716, | |
| "learning_rate": 2.8810411430834716e-06, | |
| "loss": 0.2448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24423715472221375, | |
| "step": 3695, | |
| "valid_targets_mean": 5456.4, | |
| "valid_targets_min": 4544 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 0.3601349116050932, | |
| "learning_rate": 2.8399177117003595e-06, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24411669373512268, | |
| "step": 3700, | |
| "valid_targets_mean": 5523.4, | |
| "valid_targets_min": 4553 | |
| }, | |
| { | |
| "epoch": 5.928, | |
| "grad_norm": 0.36638792620159516, | |
| "learning_rate": 2.7990674469910085e-06, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24168094992637634, | |
| "step": 3705, | |
| "valid_targets_mean": 5015.6, | |
| "valid_targets_min": 4369 | |
| }, | |
| { | |
| "epoch": 5.936, | |
| "grad_norm": 0.33472011218091163, | |
| "learning_rate": 2.7584909992396515e-06, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24189507961273193, | |
| "step": 3710, | |
| "valid_targets_mean": 5597.9, | |
| "valid_targets_min": 4777 | |
| }, | |
| { | |
| "epoch": 5.944, | |
| "grad_norm": 0.36838061663548655, | |
| "learning_rate": 2.7181890143716995e-06, | |
| "loss": 0.246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24981597065925598, | |
| "step": 3715, | |
| "valid_targets_mean": 5125.8, | |
| "valid_targets_min": 4542 | |
| }, | |
| { | |
| "epoch": 5.952, | |
| "grad_norm": 0.32093465670691634, | |
| "learning_rate": 2.6781621339434717e-06, | |
| "loss": 0.2461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24284622073173523, | |
| "step": 3720, | |
| "valid_targets_mean": 5974.9, | |
| "valid_targets_min": 4533 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 0.3723372003325761, | |
| "learning_rate": 2.638410995131966e-06, | |
| "loss": 0.2511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24876484274864197, | |
| "step": 3725, | |
| "valid_targets_mean": 5117.8, | |
| "valid_targets_min": 4370 | |
| }, | |
| { | |
| "epoch": 5.968, | |
| "grad_norm": 0.3657154228542804, | |
| "learning_rate": 2.5989362307247313e-06, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24263575673103333, | |
| "step": 3730, | |
| "valid_targets_mean": 4969.6, | |
| "valid_targets_min": 2945 | |
| }, | |
| { | |
| "epoch": 5.976, | |
| "grad_norm": 0.348590295822325, | |
| "learning_rate": 2.5597384691097847e-06, | |
| "loss": 0.243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24658943712711334, | |
| "step": 3735, | |
| "valid_targets_mean": 5683.6, | |
| "valid_targets_min": 4806 | |
| }, | |
| { | |
| "epoch": 5.984, | |
| "grad_norm": 0.36062456047183067, | |
| "learning_rate": 2.520818334265611e-06, | |
| "loss": 0.2478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25034987926483154, | |
| "step": 3740, | |
| "valid_targets_mean": 5174.6, | |
| "valid_targets_min": 4606 | |
| }, | |
| { | |
| "epoch": 5.992, | |
| "grad_norm": 0.34334210812355265, | |
| "learning_rate": 2.482176445751232e-06, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24606841802597046, | |
| "step": 3745, | |
| "valid_targets_mean": 5457.1, | |
| "valid_targets_min": 4231 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.3712197172849216, | |
| "learning_rate": 2.4438134186963415e-06, | |
| "loss": 0.2471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24733126163482666, | |
| "step": 3750, | |
| "valid_targets_mean": 5144.3, | |
| "valid_targets_min": 4306 | |
| }, | |
| { | |
| "epoch": 6.008, | |
| "grad_norm": 0.3631775960462028, | |
| "learning_rate": 2.4057298637915105e-06, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23758924007415771, | |
| "step": 3755, | |
| "valid_targets_mean": 5179.2, | |
| "valid_targets_min": 4456 | |
| }, | |
| { | |
| "epoch": 6.016, | |
| "grad_norm": 0.36557834347026136, | |
| "learning_rate": 2.3679263872784717e-06, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24914216995239258, | |
| "step": 3760, | |
| "valid_targets_mean": 5288.6, | |
| "valid_targets_min": 4364 | |
| }, | |
| { | |
| "epoch": 6.024, | |
| "grad_norm": 0.3501854118417253, | |
| "learning_rate": 2.330403590940471e-06, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24144430458545685, | |
| "step": 3765, | |
| "valid_targets_mean": 4979.5, | |
| "valid_targets_min": 4138 | |
| }, | |
| { | |
| "epoch": 6.032, | |
| "grad_norm": 0.3607100430338566, | |
| "learning_rate": 2.2931620720926717e-06, | |
| "loss": 0.242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24601085484027863, | |
| "step": 3770, | |
| "valid_targets_mean": 5113.8, | |
| "valid_targets_min": 4345 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 0.34075273299213926, | |
| "learning_rate": 2.256202423572669e-06, | |
| "loss": 0.2484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24778611958026886, | |
| "step": 3775, | |
| "valid_targets_mean": 6020.8, | |
| "valid_targets_min": 4734 | |
| }, | |
| { | |
| "epoch": 6.048, | |
| "grad_norm": 0.3371605517676569, | |
| "learning_rate": 2.219525233731035e-06, | |
| "loss": 0.2492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25823408365249634, | |
| "step": 3780, | |
| "valid_targets_mean": 6105.9, | |
| "valid_targets_min": 4699 | |
| }, | |
| { | |
| "epoch": 6.056, | |
| "grad_norm": 0.363468771063016, | |
| "learning_rate": 2.183131086421961e-06, | |
| "loss": 0.2427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2409779280424118, | |
| "step": 3785, | |
| "valid_targets_mean": 5294.0, | |
| "valid_targets_min": 4554 | |
| }, | |
| { | |
| "epoch": 6.064, | |
| "grad_norm": 0.3577696642677877, | |
| "learning_rate": 2.1470205609939533e-06, | |
| "loss": 0.2461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23581229150295258, | |
| "step": 3790, | |
| "valid_targets_mean": 5075.6, | |
| "valid_targets_min": 4728 | |
| }, | |
| { | |
| "epoch": 6.072, | |
| "grad_norm": 0.355158615904837, | |
| "learning_rate": 2.1111942322806335e-06, | |
| "loss": 0.2478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24120250344276428, | |
| "step": 3795, | |
| "valid_targets_mean": 5128.6, | |
| "valid_targets_min": 4763 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 0.3569451194379194, | |
| "learning_rate": 2.0756526705915635e-06, | |
| "loss": 0.2462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24524953961372375, | |
| "step": 3800, | |
| "valid_targets_mean": 5054.7, | |
| "valid_targets_min": 4612 | |
| }, | |
| { | |
| "epoch": 6.088, | |
| "grad_norm": 0.378860376188784, | |
| "learning_rate": 2.0403964417031764e-06, | |
| "loss": 0.2369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2396886646747589, | |
| "step": 3805, | |
| "valid_targets_mean": 5074.7, | |
| "valid_targets_min": 4283 | |
| }, | |
| { | |
| "epoch": 6.096, | |
| "grad_norm": 0.3525497879900845, | |
| "learning_rate": 2.0054261068497773e-06, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2329699695110321, | |
| "step": 3810, | |
| "valid_targets_mean": 4965.2, | |
| "valid_targets_min": 4411 | |
| }, | |
| { | |
| "epoch": 6.104, | |
| "grad_norm": 0.3490159642821698, | |
| "learning_rate": 1.9707422227145922e-06, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23966556787490845, | |
| "step": 3815, | |
| "valid_targets_mean": 5461.9, | |
| "valid_targets_min": 4402 | |
| }, | |
| { | |
| "epoch": 6.112, | |
| "grad_norm": 0.35226650285947453, | |
| "learning_rate": 1.936345341420924e-06, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24397072196006775, | |
| "step": 3820, | |
| "valid_targets_mean": 5241.6, | |
| "valid_targets_min": 4633 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "grad_norm": 0.3831412236793039, | |
| "learning_rate": 1.9022360105233507e-06, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24569252133369446, | |
| "step": 3825, | |
| "valid_targets_mean": 5123.7, | |
| "valid_targets_min": 4619 | |
| }, | |
| { | |
| "epoch": 6.128, | |
| "grad_norm": 0.358324906924862, | |
| "learning_rate": 1.8684147729990188e-06, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2378392517566681, | |
| "step": 3830, | |
| "valid_targets_mean": 5096.0, | |
| "valid_targets_min": 4546 | |
| }, | |
| { | |
| "epoch": 6.136, | |
| "grad_norm": 0.3855314069373412, | |
| "learning_rate": 1.8348821672389893e-06, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24311140179634094, | |
| "step": 3835, | |
| "valid_targets_mean": 5606.6, | |
| "valid_targets_min": 4874 | |
| }, | |
| { | |
| "epoch": 6.144, | |
| "grad_norm": 0.35975512476455945, | |
| "learning_rate": 1.8016387270396784e-06, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24314913153648376, | |
| "step": 3840, | |
| "valid_targets_mean": 5164.4, | |
| "valid_targets_min": 4449 | |
| }, | |
| { | |
| "epoch": 6.152, | |
| "grad_norm": 0.35043217980698815, | |
| "learning_rate": 1.7686849815943486e-06, | |
| "loss": 0.2454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2315068244934082, | |
| "step": 3845, | |
| "valid_targets_mean": 5124.4, | |
| "valid_targets_min": 4590 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 0.33173541170906284, | |
| "learning_rate": 1.7360214554847e-06, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2421301305294037, | |
| "step": 3850, | |
| "valid_targets_mean": 6034.3, | |
| "valid_targets_min": 4566 | |
| }, | |
| { | |
| "epoch": 6.168, | |
| "grad_norm": 0.37369649472009386, | |
| "learning_rate": 1.703648668672495e-06, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25358569622039795, | |
| "step": 3855, | |
| "valid_targets_mean": 5154.9, | |
| "valid_targets_min": 4715 | |
| }, | |
| { | |
| "epoch": 6.176, | |
| "grad_norm": 0.3501240418748732, | |
| "learning_rate": 1.6715671364913077e-06, | |
| "loss": 0.246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24450397491455078, | |
| "step": 3860, | |
| "valid_targets_mean": 5378.2, | |
| "valid_targets_min": 4561 | |
| }, | |
| { | |
| "epoch": 6.184, | |
| "grad_norm": 0.35065770438579946, | |
| "learning_rate": 1.6397773696383091e-06, | |
| "loss": 0.2407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23744799196720123, | |
| "step": 3865, | |
| "valid_targets_mean": 5581.3, | |
| "valid_targets_min": 4866 | |
| }, | |
| { | |
| "epoch": 6.192, | |
| "grad_norm": 0.3440431874107462, | |
| "learning_rate": 1.6082798741661321e-06, | |
| "loss": 0.2459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24355855584144592, | |
| "step": 3870, | |
| "valid_targets_mean": 5196.7, | |
| "valid_targets_min": 4769 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "grad_norm": 0.3440937011159273, | |
| "learning_rate": 1.5770751514748273e-06, | |
| "loss": 0.2446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2479947954416275, | |
| "step": 3875, | |
| "valid_targets_mean": 5460.2, | |
| "valid_targets_min": 4524 | |
| }, | |
| { | |
| "epoch": 6.208, | |
| "grad_norm": 0.3664360021812792, | |
| "learning_rate": 1.5461636983038686e-06, | |
| "loss": 0.2444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24719715118408203, | |
| "step": 3880, | |
| "valid_targets_mean": 5045.1, | |
| "valid_targets_min": 4292 | |
| }, | |
| { | |
| "epoch": 6.216, | |
| "grad_norm": 0.34886744674712405, | |
| "learning_rate": 1.5155460067242578e-06, | |
| "loss": 0.2408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2426534742116928, | |
| "step": 3885, | |
| "valid_targets_mean": 5570.1, | |
| "valid_targets_min": 4331 | |
| }, | |
| { | |
| "epoch": 6.224, | |
| "grad_norm": 0.3541792509776254, | |
| "learning_rate": 1.4852225641306816e-06, | |
| "loss": 0.2448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24025562405586243, | |
| "step": 3890, | |
| "valid_targets_mean": 5051.6, | |
| "valid_targets_min": 4341 | |
| }, | |
| { | |
| "epoch": 6.232, | |
| "grad_norm": 0.35753270673514387, | |
| "learning_rate": 1.4551938532337607e-06, | |
| "loss": 0.2484, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2486327737569809, | |
| "step": 3895, | |
| "valid_targets_mean": 5092.7, | |
| "valid_targets_min": 4451 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 0.3368870472742755, | |
| "learning_rate": 1.4254603520523614e-06, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24108079075813293, | |
| "step": 3900, | |
| "valid_targets_mean": 5666.6, | |
| "valid_targets_min": 4655 | |
| }, | |
| { | |
| "epoch": 6.248, | |
| "grad_norm": 0.3614828603472403, | |
| "learning_rate": 1.3960225339059875e-06, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24041937291622162, | |
| "step": 3905, | |
| "valid_targets_mean": 5196.4, | |
| "valid_targets_min": 4537 | |
| }, | |
| { | |
| "epoch": 6.256, | |
| "grad_norm": 0.3331706996664771, | |
| "learning_rate": 1.3668808674072409e-06, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2347329705953598, | |
| "step": 3910, | |
| "valid_targets_mean": 5935.8, | |
| "valid_targets_min": 4317 | |
| }, | |
| { | |
| "epoch": 6.264, | |
| "grad_norm": 0.3842231759819104, | |
| "learning_rate": 1.338035816454375e-06, | |
| "loss": 0.2421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24102315306663513, | |
| "step": 3915, | |
| "valid_targets_mean": 4995.5, | |
| "valid_targets_min": 4157 | |
| }, | |
| { | |
| "epoch": 6.272, | |
| "grad_norm": 0.3591286292732204, | |
| "learning_rate": 1.3094878402238887e-06, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2383185476064682, | |
| "step": 3920, | |
| "valid_targets_mean": 5088.0, | |
| "valid_targets_min": 4280 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "grad_norm": 0.32160738326836574, | |
| "learning_rate": 1.2812373931632371e-06, | |
| "loss": 0.2422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23567475378513336, | |
| "step": 3925, | |
| "valid_targets_mean": 5883.0, | |
| "valid_targets_min": 4651 | |
| }, | |
| { | |
| "epoch": 6.288, | |
| "grad_norm": 0.36135558075925855, | |
| "learning_rate": 1.2532849249835932e-06, | |
| "loss": 0.2428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24971652030944824, | |
| "step": 3930, | |
| "valid_targets_mean": 5218.2, | |
| "valid_targets_min": 4340 | |
| }, | |
| { | |
| "epoch": 6.296, | |
| "grad_norm": 0.3536877104222755, | |
| "learning_rate": 1.2256308806526774e-06, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23529192805290222, | |
| "step": 3935, | |
| "valid_targets_mean": 5077.1, | |
| "valid_targets_min": 4594 | |
| }, | |
| { | |
| "epoch": 6.304, | |
| "grad_norm": 0.3901503986810758, | |
| "learning_rate": 1.1982757003876855e-06, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25148630142211914, | |
| "step": 3940, | |
| "valid_targets_mean": 5079.5, | |
| "valid_targets_min": 4652 | |
| }, | |
| { | |
| "epoch": 6.312, | |
| "grad_norm": 0.364936138384861, | |
| "learning_rate": 1.1712198196482793e-06, | |
| "loss": 0.2425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2538650631904602, | |
| "step": 3945, | |
| "valid_targets_mean": 5064.6, | |
| "valid_targets_min": 4579 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "grad_norm": 0.3360740953154378, | |
| "learning_rate": 1.1444636691296518e-06, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23852398991584778, | |
| "step": 3950, | |
| "valid_targets_mean": 5530.2, | |
| "valid_targets_min": 4372 | |
| }, | |
| { | |
| "epoch": 6.328, | |
| "grad_norm": 0.32046067837491743, | |
| "learning_rate": 1.11800767475567e-06, | |
| "loss": 0.2445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24686802923679352, | |
| "step": 3955, | |
| "valid_targets_mean": 6066.1, | |
| "valid_targets_min": 4718 | |
| }, | |
| { | |
| "epoch": 6.336, | |
| "grad_norm": 0.3582905264471923, | |
| "learning_rate": 1.0918522576721014e-06, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24235400557518005, | |
| "step": 3960, | |
| "valid_targets_mean": 5160.4, | |
| "valid_targets_min": 4658 | |
| }, | |
| { | |
| "epoch": 6.344, | |
| "grad_norm": 0.32867894929163155, | |
| "learning_rate": 1.0659978342399003e-06, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24433989822864532, | |
| "step": 3965, | |
| "valid_targets_mean": 6078.9, | |
| "valid_targets_min": 4831 | |
| }, | |
| { | |
| "epoch": 6.352, | |
| "grad_norm": 0.3453342058468073, | |
| "learning_rate": 1.0404448160285897e-06, | |
| "loss": 0.2387, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24090495705604553, | |
| "step": 3970, | |
| "valid_targets_mean": 5697.3, | |
| "valid_targets_min": 4633 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "grad_norm": 0.3600198892334908, | |
| "learning_rate": 1.0151936098097015e-06, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24847207963466644, | |
| "step": 3975, | |
| "valid_targets_mean": 5298.3, | |
| "valid_targets_min": 4744 | |
| }, | |
| { | |
| "epoch": 6.368, | |
| "grad_norm": 0.36204138962740917, | |
| "learning_rate": 9.902446175503089e-07, | |
| "loss": 0.2465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.246487095952034, | |
| "step": 3980, | |
| "valid_targets_mean": 5077.8, | |
| "valid_targets_min": 4197 | |
| }, | |
| { | |
| "epoch": 6.376, | |
| "grad_norm": 0.3373658345713561, | |
| "learning_rate": 9.655982364066197e-07, | |
| "loss": 0.2386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2348678857088089, | |
| "step": 3985, | |
| "valid_targets_mean": 5465.8, | |
| "valid_targets_min": 4572 | |
| }, | |
| { | |
| "epoch": 6.384, | |
| "grad_norm": 0.36083592733785025, | |
| "learning_rate": 9.412548587176595e-07, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2487880438566208, | |
| "step": 3990, | |
| "valid_targets_mean": 5041.9, | |
| "valid_targets_min": 4477 | |
| }, | |
| { | |
| "epoch": 6.392, | |
| "grad_norm": 0.34885619240044524, | |
| "learning_rate": 9.172148719990237e-07, | |
| "loss": 0.2466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24451696872711182, | |
| "step": 3995, | |
| "valid_targets_mean": 5104.6, | |
| "valid_targets_min": 4305 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 0.33010148899529074, | |
| "learning_rate": 8.934786589367106e-07, | |
| "loss": 0.2465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2330082654953003, | |
| "step": 4000, | |
| "valid_targets_mean": 5518.8, | |
| "valid_targets_min": 4651 | |
| }, | |
| { | |
| "epoch": 6.408, | |
| "grad_norm": 0.35193322431608964, | |
| "learning_rate": 8.700465973810246e-07, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24522963166236877, | |
| "step": 4005, | |
| "valid_targets_mean": 5373.2, | |
| "valid_targets_min": 4743 | |
| }, | |
| { | |
| "epoch": 6.416, | |
| "grad_norm": 0.3516968130910934, | |
| "learning_rate": 8.469190603405719e-07, | |
| "loss": 0.2414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24739448726177216, | |
| "step": 4010, | |
| "valid_targets_mean": 5115.8, | |
| "valid_targets_min": 4584 | |
| }, | |
| { | |
| "epoch": 6.424, | |
| "grad_norm": 0.3595270858719937, | |
| "learning_rate": 8.240964159763121e-07, | |
| "loss": 0.2406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23763275146484375, | |
| "step": 4015, | |
| "valid_targets_mean": 5177.1, | |
| "valid_targets_min": 4380 | |
| }, | |
| { | |
| "epoch": 6.432, | |
| "grad_norm": 0.3604951505138093, | |
| "learning_rate": 8.015790275957003e-07, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24399161338806152, | |
| "step": 4020, | |
| "valid_targets_mean": 5131.8, | |
| "valid_targets_min": 4637 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "grad_norm": 0.3457133862177745, | |
| "learning_rate": 7.793672536469077e-07, | |
| "loss": 0.2462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2410757839679718, | |
| "step": 4025, | |
| "valid_targets_mean": 5168.4, | |
| "valid_targets_min": 4609 | |
| }, | |
| { | |
| "epoch": 6.448, | |
| "grad_norm": 0.3637150867231195, | |
| "learning_rate": 7.574614477131081e-07, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24976620078086853, | |
| "step": 4030, | |
| "valid_targets_mean": 5229.0, | |
| "valid_targets_min": 4583 | |
| }, | |
| { | |
| "epoch": 6.456, | |
| "grad_norm": 0.34551969538766464, | |
| "learning_rate": 7.358619585068583e-07, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23369210958480835, | |
| "step": 4035, | |
| "valid_targets_mean": 5449.6, | |
| "valid_targets_min": 4596 | |
| }, | |
| { | |
| "epoch": 6.464, | |
| "grad_norm": 0.36467862199973183, | |
| "learning_rate": 7.145691298645419e-07, | |
| "loss": 0.2522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2411804050207138, | |
| "step": 4040, | |
| "valid_targets_mean": 5008.4, | |
| "valid_targets_min": 4424 | |
| }, | |
| { | |
| "epoch": 6.4719999999999995, | |
| "grad_norm": 0.34658540686884814, | |
| "learning_rate": 6.935833007408965e-07, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24360360205173492, | |
| "step": 4045, | |
| "valid_targets_mean": 5497.1, | |
| "valid_targets_min": 4729 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 0.3698570207092477, | |
| "learning_rate": 6.729048052036136e-07, | |
| "loss": 0.2445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23895549774169922, | |
| "step": 4050, | |
| "valid_targets_mean": 5080.3, | |
| "valid_targets_min": 4370 | |
| }, | |
| { | |
| "epoch": 6.4879999999999995, | |
| "grad_norm": 0.3495793458194073, | |
| "learning_rate": 6.52533972428031e-07, | |
| "loss": 0.2436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24023422598838806, | |
| "step": 4055, | |
| "valid_targets_mean": 5418.1, | |
| "valid_targets_min": 4364 | |
| }, | |
| { | |
| "epoch": 6.496, | |
| "grad_norm": 0.3585312634089415, | |
| "learning_rate": 6.324711266918826e-07, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24402227997779846, | |
| "step": 4060, | |
| "valid_targets_mean": 5059.8, | |
| "valid_targets_min": 4419 | |
| }, | |
| { | |
| "epoch": 6.504, | |
| "grad_norm": 0.35067996751857816, | |
| "learning_rate": 6.127165873701457e-07, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25074195861816406, | |
| "step": 4065, | |
| "valid_targets_mean": 5164.2, | |
| "valid_targets_min": 4550 | |
| }, | |
| { | |
| "epoch": 6.5120000000000005, | |
| "grad_norm": 0.3345114175916874, | |
| "learning_rate": 5.932706689299461e-07, | |
| "loss": 0.2425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2492753267288208, | |
| "step": 4070, | |
| "valid_targets_mean": 5768.8, | |
| "valid_targets_min": 4513 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "grad_norm": 0.34944919933770396, | |
| "learning_rate": 5.741336809255615e-07, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2288595587015152, | |
| "step": 4075, | |
| "valid_targets_mean": 4963.2, | |
| "valid_targets_min": 4611 | |
| }, | |
| { | |
| "epoch": 6.5280000000000005, | |
| "grad_norm": 0.3497099288207844, | |
| "learning_rate": 5.553059279934902e-07, | |
| "loss": 0.244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2530469596385956, | |
| "step": 4080, | |
| "valid_targets_mean": 5466.3, | |
| "valid_targets_min": 4463 | |
| }, | |
| { | |
| "epoch": 6.536, | |
| "grad_norm": 0.3645388140010008, | |
| "learning_rate": 5.36787709847597e-07, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2429083287715912, | |
| "step": 4085, | |
| "valid_targets_mean": 5479.3, | |
| "valid_targets_min": 4190 | |
| }, | |
| { | |
| "epoch": 6.5440000000000005, | |
| "grad_norm": 0.338672296974109, | |
| "learning_rate": 5.185793212743529e-07, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24177274107933044, | |
| "step": 4090, | |
| "valid_targets_mean": 5147.0, | |
| "valid_targets_min": 4247 | |
| }, | |
| { | |
| "epoch": 6.552, | |
| "grad_norm": 0.35111078017353203, | |
| "learning_rate": 5.006810521281335e-07, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2353895753622055, | |
| "step": 4095, | |
| "valid_targets_mean": 4912.9, | |
| "valid_targets_min": 4274 | |
| }, | |
| { | |
| "epoch": 6.5600000000000005, | |
| "grad_norm": 0.36656211181927273, | |
| "learning_rate": 4.830931873266065e-07, | |
| "loss": 0.2488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24475005269050598, | |
| "step": 4100, | |
| "valid_targets_mean": 5128.2, | |
| "valid_targets_min": 4709 | |
| }, | |
| { | |
| "epoch": 6.568, | |
| "grad_norm": 0.3461906980006675, | |
| "learning_rate": 4.658160068462025e-07, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24054673314094543, | |
| "step": 4105, | |
| "valid_targets_mean": 5602.5, | |
| "valid_targets_min": 4500 | |
| }, | |
| { | |
| "epoch": 6.576, | |
| "grad_norm": 0.3660022467448017, | |
| "learning_rate": 4.488497857176466e-07, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26431626081466675, | |
| "step": 4110, | |
| "valid_targets_mean": 5764.4, | |
| "valid_targets_min": 4747 | |
| }, | |
| { | |
| "epoch": 6.584, | |
| "grad_norm": 0.34116388644769263, | |
| "learning_rate": 4.321947940215898e-07, | |
| "loss": 0.2383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23708701133728027, | |
| "step": 4115, | |
| "valid_targets_mean": 5133.5, | |
| "valid_targets_min": 4665 | |
| }, | |
| { | |
| "epoch": 6.592, | |
| "grad_norm": 0.3601865300406288, | |
| "learning_rate": 4.1585129688430425e-07, | |
| "loss": 0.242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25522279739379883, | |
| "step": 4120, | |
| "valid_targets_mean": 5626.6, | |
| "valid_targets_min": 4523 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "grad_norm": 0.3643181411525989, | |
| "learning_rate": 3.998195544734706e-07, | |
| "loss": 0.2407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23358450829982758, | |
| "step": 4125, | |
| "valid_targets_mean": 5137.9, | |
| "valid_targets_min": 4308 | |
| }, | |
| { | |
| "epoch": 6.608, | |
| "grad_norm": 0.3700220566692827, | |
| "learning_rate": 3.840998219940284e-07, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2382754683494568, | |
| "step": 4130, | |
| "valid_targets_mean": 5096.4, | |
| "valid_targets_min": 4378 | |
| }, | |
| { | |
| "epoch": 6.616, | |
| "grad_norm": 0.34787421454326867, | |
| "learning_rate": 3.6869234968411214e-07, | |
| "loss": 0.2478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24739213287830353, | |
| "step": 4135, | |
| "valid_targets_mean": 5477.6, | |
| "valid_targets_min": 4707 | |
| }, | |
| { | |
| "epoch": 6.624, | |
| "grad_norm": 0.3482419862767242, | |
| "learning_rate": 3.5359738281107504e-07, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23362408578395844, | |
| "step": 4140, | |
| "valid_targets_mean": 5129.3, | |
| "valid_targets_min": 4640 | |
| }, | |
| { | |
| "epoch": 6.632, | |
| "grad_norm": 0.3439078609252227, | |
| "learning_rate": 3.38815161667585e-07, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22988522052764893, | |
| "step": 4145, | |
| "valid_targets_mean": 5195.3, | |
| "valid_targets_min": 4612 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 0.34320298300416757, | |
| "learning_rate": 3.24345921567788e-07, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22935551404953003, | |
| "step": 4150, | |
| "valid_targets_mean": 4987.1, | |
| "valid_targets_min": 4531 | |
| }, | |
| { | |
| "epoch": 6.648, | |
| "grad_norm": 0.33730705224771984, | |
| "learning_rate": 3.101898928435754e-07, | |
| "loss": 0.2444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24337095022201538, | |
| "step": 4155, | |
| "valid_targets_mean": 5533.1, | |
| "valid_targets_min": 4297 | |
| }, | |
| { | |
| "epoch": 6.656, | |
| "grad_norm": 0.35350990315252356, | |
| "learning_rate": 2.9634730084091343e-07, | |
| "loss": 0.2411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24109163880348206, | |
| "step": 4160, | |
| "valid_targets_mean": 5427.9, | |
| "valid_targets_min": 4101 | |
| }, | |
| { | |
| "epoch": 6.664, | |
| "grad_norm": 0.3321729850517723, | |
| "learning_rate": 2.8281836591624865e-07, | |
| "loss": 0.2397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23898422718048096, | |
| "step": 4165, | |
| "valid_targets_mean": 5538.8, | |
| "valid_targets_min": 4083 | |
| }, | |
| { | |
| "epoch": 6.672, | |
| "grad_norm": 0.3676878246031953, | |
| "learning_rate": 2.6960330343301033e-07, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24417418241500854, | |
| "step": 4170, | |
| "valid_targets_mean": 5235.1, | |
| "valid_targets_min": 4601 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "grad_norm": 0.3430751434122029, | |
| "learning_rate": 2.5670232375817784e-07, | |
| "loss": 0.2452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2486555278301239, | |
| "step": 4175, | |
| "valid_targets_mean": 5605.6, | |
| "valid_targets_min": 4518 | |
| }, | |
| { | |
| "epoch": 6.688, | |
| "grad_norm": 0.3750848867365519, | |
| "learning_rate": 2.441156322589322e-07, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24382588267326355, | |
| "step": 4180, | |
| "valid_targets_mean": 5038.2, | |
| "valid_targets_min": 4347 | |
| }, | |
| { | |
| "epoch": 6.696, | |
| "grad_norm": 0.3219480398870412, | |
| "learning_rate": 2.318434292993832e-07, | |
| "loss": 0.2457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24035072326660156, | |
| "step": 4185, | |
| "valid_targets_mean": 6278.6, | |
| "valid_targets_min": 4802 | |
| }, | |
| { | |
| "epoch": 6.704, | |
| "grad_norm": 0.3555339629229889, | |
| "learning_rate": 2.1988591023738514e-07, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24318590760231018, | |
| "step": 4190, | |
| "valid_targets_mean": 5149.9, | |
| "valid_targets_min": 4611 | |
| }, | |
| { | |
| "epoch": 6.712, | |
| "grad_norm": 0.35724399218766556, | |
| "learning_rate": 2.0824326542142835e-07, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24663877487182617, | |
| "step": 4195, | |
| "valid_targets_mean": 5062.2, | |
| "valid_targets_min": 4629 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 0.35170477816726814, | |
| "learning_rate": 1.9691568018759931e-07, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24812597036361694, | |
| "step": 4200, | |
| "valid_targets_mean": 5280.4, | |
| "valid_targets_min": 4903 | |
| }, | |
| { | |
| "epoch": 6.728, | |
| "grad_norm": 0.35062775430087, | |
| "learning_rate": 1.8590333485664525e-07, | |
| "loss": 0.2431, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24701952934265137, | |
| "step": 4205, | |
| "valid_targets_mean": 5022.6, | |
| "valid_targets_min": 3315 | |
| }, | |
| { | |
| "epoch": 6.736, | |
| "grad_norm": 0.363692610940855, | |
| "learning_rate": 1.752064047310853e-07, | |
| "loss": 0.2425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23704981803894043, | |
| "step": 4210, | |
| "valid_targets_mean": 4956.4, | |
| "valid_targets_min": 4189 | |
| }, | |
| { | |
| "epoch": 6.744, | |
| "grad_norm": 0.36259224632105397, | |
| "learning_rate": 1.6482506009243949e-07, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25554490089416504, | |
| "step": 4215, | |
| "valid_targets_mean": 5175.2, | |
| "valid_targets_min": 4253 | |
| }, | |
| { | |
| "epoch": 6.752, | |
| "grad_norm": 0.3516333931937418, | |
| "learning_rate": 1.5475946619850192e-07, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2464475780725479, | |
| "step": 4220, | |
| "valid_targets_mean": 5230.2, | |
| "valid_targets_min": 4431 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "grad_norm": 0.3656307156561673, | |
| "learning_rate": 1.4500978328071845e-07, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24912934005260468, | |
| "step": 4225, | |
| "valid_targets_mean": 5228.9, | |
| "valid_targets_min": 4695 | |
| }, | |
| { | |
| "epoch": 6.768, | |
| "grad_norm": 0.340055043847797, | |
| "learning_rate": 1.3557616654163775e-07, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23946282267570496, | |
| "step": 4230, | |
| "valid_targets_mean": 5120.0, | |
| "valid_targets_min": 4604 | |
| }, | |
| { | |
| "epoch": 6.776, | |
| "grad_norm": 0.35797490947423816, | |
| "learning_rate": 1.264587661524308e-07, | |
| "loss": 0.2485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24109522998332977, | |
| "step": 4235, | |
| "valid_targets_mean": 5133.6, | |
| "valid_targets_min": 4324 | |
| }, | |
| { | |
| "epoch": 6.784, | |
| "grad_norm": 0.3491794445151668, | |
| "learning_rate": 1.1765772725051084e-07, | |
| "loss": 0.2407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24150952696800232, | |
| "step": 4240, | |
| "valid_targets_mean": 5379.4, | |
| "valid_targets_min": 4510 | |
| }, | |
| { | |
| "epoch": 6.792, | |
| "grad_norm": 0.36242140720159416, | |
| "learning_rate": 1.0917318993721726e-07, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24523484706878662, | |
| "step": 4245, | |
| "valid_targets_mean": 5590.1, | |
| "valid_targets_min": 4602 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 0.3730470799999599, | |
| "learning_rate": 1.0100528927558861e-07, | |
| "loss": 0.2552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.251546710729599, | |
| "step": 4250, | |
| "valid_targets_mean": 5147.8, | |
| "valid_targets_min": 4727 | |
| }, | |
| { | |
| "epoch": 6.808, | |
| "grad_norm": 0.3589579191155185, | |
| "learning_rate": 9.31541552882087e-08, | |
| "loss": 0.246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24574515223503113, | |
| "step": 4255, | |
| "valid_targets_mean": 5281.3, | |
| "valid_targets_min": 4743 | |
| }, | |
| { | |
| "epoch": 6.816, | |
| "grad_norm": 0.3572016175915687, | |
| "learning_rate": 8.561991295514161e-08, | |
| "loss": 0.2493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25972914695739746, | |
| "step": 4260, | |
| "valid_targets_mean": 5223.8, | |
| "valid_targets_min": 4364 | |
| }, | |
| { | |
| "epoch": 6.824, | |
| "grad_norm": 0.3546121448821959, | |
| "learning_rate": 7.840268221193548e-08, | |
| "loss": 0.2434, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23891566693782806, | |
| "step": 4265, | |
| "valid_targets_mean": 5108.1, | |
| "valid_targets_min": 4593 | |
| }, | |
| { | |
| "epoch": 6.832, | |
| "grad_norm": 0.3692774207834368, | |
| "learning_rate": 7.150257794772186e-08, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24659819900989532, | |
| "step": 4270, | |
| "valid_targets_mean": 5193.8, | |
| "valid_targets_min": 4524 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "grad_norm": 0.35774686843107567, | |
| "learning_rate": 6.491971000337938e-08, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25011658668518066, | |
| "step": 4275, | |
| "valid_targets_mean": 5219.7, | |
| "valid_targets_min": 4615 | |
| }, | |
| { | |
| "epoch": 6.848, | |
| "grad_norm": 0.36440298812103605, | |
| "learning_rate": 5.8654183169788435e-08, | |
| "loss": 0.2453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24157004058361053, | |
| "step": 4280, | |
| "valid_targets_mean": 5122.5, | |
| "valid_targets_min": 4568 | |
| }, | |
| { | |
| "epoch": 6.856, | |
| "grad_norm": 0.358062536846215, | |
| "learning_rate": 5.270609718616593e-08, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2477453649044037, | |
| "step": 4285, | |
| "valid_targets_mean": 5172.8, | |
| "valid_targets_min": 4725 | |
| }, | |
| { | |
| "epoch": 6.864, | |
| "grad_norm": 0.34398406293364436, | |
| "learning_rate": 4.70755467384687e-08, | |
| "loss": 0.2433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2514168620109558, | |
| "step": 4290, | |
| "valid_targets_mean": 5591.8, | |
| "valid_targets_min": 4653 | |
| }, | |
| { | |
| "epoch": 6.872, | |
| "grad_norm": 0.3601884533900536, | |
| "learning_rate": 4.176262145789478e-08, | |
| "loss": 0.2425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25023889541625977, | |
| "step": 4295, | |
| "valid_targets_mean": 5246.7, | |
| "valid_targets_min": 4306 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 0.3533032916491212, | |
| "learning_rate": 3.676740591945782e-08, | |
| "loss": 0.2414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22928789258003235, | |
| "step": 4300, | |
| "valid_targets_mean": 4828.4, | |
| "valid_targets_min": 4368 | |
| }, | |
| { | |
| "epoch": 6.888, | |
| "grad_norm": 0.3550738687279205, | |
| "learning_rate": 3.208997964062821e-08, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24948222935199738, | |
| "step": 4305, | |
| "valid_targets_mean": 5126.3, | |
| "valid_targets_min": 4480 | |
| }, | |
| { | |
| "epoch": 6.896, | |
| "grad_norm": 0.33493343986519614, | |
| "learning_rate": 2.773041708008295e-08, | |
| "loss": 0.241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2384391725063324, | |
| "step": 4310, | |
| "valid_targets_mean": 5455.4, | |
| "valid_targets_min": 4405 | |
| }, | |
| { | |
| "epoch": 6.904, | |
| "grad_norm": 0.32900035961325086, | |
| "learning_rate": 2.3688787636511057e-08, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24105510115623474, | |
| "step": 4315, | |
| "valid_targets_mean": 5483.4, | |
| "valid_targets_min": 4580 | |
| }, | |
| { | |
| "epoch": 6.912, | |
| "grad_norm": 0.3591967844466533, | |
| "learning_rate": 1.9965155647507782e-08, | |
| "loss": 0.2457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24230828881263733, | |
| "step": 4320, | |
| "valid_targets_mean": 5194.5, | |
| "valid_targets_min": 4628 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "grad_norm": 0.3569634070016483, | |
| "learning_rate": 1.655958038855765e-08, | |
| "loss": 0.2394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24538391828536987, | |
| "step": 4325, | |
| "valid_targets_mean": 5197.4, | |
| "valid_targets_min": 4453 | |
| }, | |
| { | |
| "epoch": 6.928, | |
| "grad_norm": 0.3232711655439292, | |
| "learning_rate": 1.3472116072084096e-08, | |
| "loss": 0.2448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24799248576164246, | |
| "step": 4330, | |
| "valid_targets_mean": 5822.6, | |
| "valid_targets_min": 4804 | |
| }, | |
| { | |
| "epoch": 6.936, | |
| "grad_norm": 0.3513670756912916, | |
| "learning_rate": 1.0702811846590167e-08, | |
| "loss": 0.2465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24240067601203918, | |
| "step": 4335, | |
| "valid_targets_mean": 5218.8, | |
| "valid_targets_min": 4558 | |
| }, | |
| { | |
| "epoch": 6.944, | |
| "grad_norm": 0.32256648301402285, | |
| "learning_rate": 8.251711795876916e-09, | |
| "loss": 0.2398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23891803622245789, | |
| "step": 4340, | |
| "valid_targets_mean": 5829.8, | |
| "valid_targets_min": 4493 | |
| }, | |
| { | |
| "epoch": 6.952, | |
| "grad_norm": 0.34778979015315653, | |
| "learning_rate": 6.1188549383373044e-09, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24039101600646973, | |
| "step": 4345, | |
| "valid_targets_mean": 5670.2, | |
| "valid_targets_min": 4688 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 0.3469604996257864, | |
| "learning_rate": 4.304275226338916e-09, | |
| "loss": 0.2416, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24126708507537842, | |
| "step": 4350, | |
| "valid_targets_mean": 4949.8, | |
| "valid_targets_min": 4502 | |
| }, | |
| { | |
| "epoch": 6.968, | |
| "grad_norm": 0.33796825541290015, | |
| "learning_rate": 2.8080015456799503e-09, | |
| "loss": 0.2413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24856774508953094, | |
| "step": 4355, | |
| "valid_targets_mean": 5665.3, | |
| "valid_targets_min": 4370 | |
| }, | |
| { | |
| "epoch": 6.976, | |
| "grad_norm": 0.35793467252792954, | |
| "learning_rate": 1.6300577151340257e-09, | |
| "loss": 0.2439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24193130433559418, | |
| "step": 4360, | |
| "valid_targets_mean": 5116.7, | |
| "valid_targets_min": 4657 | |
| }, | |
| { | |
| "epoch": 6.984, | |
| "grad_norm": 0.356179707426998, | |
| "learning_rate": 7.70462486070489e-10, | |
| "loss": 0.2376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23189060389995575, | |
| "step": 4365, | |
| "valid_targets_mean": 5205.8, | |
| "valid_targets_min": 4623 | |
| }, | |
| { | |
| "epoch": 6.992, | |
| "grad_norm": 0.3251678311682762, | |
| "learning_rate": 2.2922954214799065e-10, | |
| "loss": 0.2473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23791056871414185, | |
| "step": 4370, | |
| "valid_targets_mean": 5491.1, | |
| "valid_targets_min": 4387 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.3669476771540812, | |
| "learning_rate": 6.367499107984288e-12, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23579511046409607, | |
| "step": 4375, | |
| "valid_targets_mean": 5026.7, | |
| "valid_targets_min": 4563 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23579511046409607, | |
| "step": 4375, | |
| "total_flos": 1774371357458432.0, | |
| "train_loss": 0.271993322290693, | |
| "train_runtime": 19852.4616, | |
| "train_samples_per_second": 3.526, | |
| "train_steps_per_second": 0.22, | |
| "valid_targets_mean": 5026.7, | |
| "valid_targets_min": 4563 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4375, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1774371357458432.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |